Class: PDF::Reader::PageState

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/page_state.rb

Overview

encapsulates logic for tracking graphics state as the instructions for a single page are processed. Most of the public methods correspond directly to PDF operators.

Constant Summary collapse

DEFAULT_GRAPHICS_STATE =
{
  :char_spacing   => 0,
  :word_spacing   => 0,
  :h_scaling      => 1.0,
  :text_leading   => 0,
  :text_font      => nil,
  :text_font_size => 0,
  :text_mode      => 0,
  :text_rise      => 0,
  :text_knockout  => 0
}

Instance Method Summary collapse

Constructor Details

#initialize(page) ⇒ PageState

starting a new page : (untyped) -> void



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/pdf/reader/page_state.rb', line 27

def initialize(page)
  @page          = page
  @cache         = page.cache
  @objects       = page.objects
  @font_stack    = [build_fonts(page.fonts)]
  @xobject_stack = [page.xobjects]
  @cs_stack      = [page.color_spaces]
  @stack         = [DEFAULT_GRAPHICS_STATE.dup]
  state[:ctm]  = identity_matrix

  # These are only valid when inside a `BT` block and we re-initialize them on each
  # `BT`. However, we need the instance variables set so PDFs with the text operators
  # out order don't trigger NoMethodError when these are nil
  @text_matrix      = identity_matrix
  @text_line_matrix = identity_matrix
end

Instance Method Details

#begin_text_objectObject

Text Object Operators



93
94
95
96
97
# File 'lib/pdf/reader/page_state.rb', line 93

def begin_text_object
  @text_matrix      = identity_matrix
  @text_line_matrix = identity_matrix
  @font_size = nil
end

#clone_stateObject

This returns a deep clone of the current state, ensuring changes are keep separate from earlier states.

Marshal is used to round-trip the state through a string to easily perform the deep clone. Kinda hacky, but effective.



294
295
296
297
298
299
300
# File 'lib/pdf/reader/page_state.rb', line 294

def clone_state
  if @stack.empty?
    {}
  else
    Marshal.load Marshal.dump(@stack.last)
  end
end

#concatenate_matrix(a, b, c, d, e, f) ⇒ Object

update the current transformation matrix.

If the CTM is currently undefined, just store the new values.

If there’s an existing CTM, then multiply the existing matrix with the new matrix to form the updated matrix.



75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/pdf/reader/page_state.rb', line 75

def concatenate_matrix(a, b, c, d, e, f)
  if state[:ctm]
    ctm = state[:ctm]
    state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
      ctm.a, ctm.b,
      ctm.c, ctm.d,
      ctm.e, ctm.f
    )
  else
    state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
  end
  @text_rendering_matrix = nil # invalidate cached value
end

#ctm_transform(x, y) ⇒ Object

transform x and y co-ordinates from the current user space to the underlying device space.



230
231
232
233
234
235
# File 'lib/pdf/reader/page_state.rb', line 230

def ctm_transform(x, y)
  [
    (ctm.a * x) + (ctm.c * y) + (ctm.e),
    (ctm.b * x) + (ctm.d * y) + (ctm.f)
  ]
end

#current_fontObject



255
256
257
# File 'lib/pdf/reader/page_state.rb', line 255

def current_font
  find_font(state[:text_font])
end

#end_text_objectObject



99
100
101
# File 'lib/pdf/reader/page_state.rb', line 99

def end_text_object
  # don't need to do anything
end

#find_color_space(label) ⇒ Object



266
267
268
269
270
271
# File 'lib/pdf/reader/page_state.rb', line 266

def find_color_space(label)
  dict = @cs_stack.detect { |colorspaces|
    colorspaces.has_key?(label)
  }
  dict ? dict[label] : nil
end

#find_font(label) ⇒ Object



259
260
261
262
263
264
# File 'lib/pdf/reader/page_state.rb', line 259

def find_font(label)
  dict = @font_stack.detect { |fonts|
    fonts.has_key?(label)
  }
  dict ? dict[label] : nil
end

#find_xobject(label) ⇒ Object



273
274
275
276
277
278
# File 'lib/pdf/reader/page_state.rb', line 273

def find_xobject(label)
  dict = @xobject_stack.detect { |xobjects|
    xobjects.has_key?(label)
  }
  dict ? dict[label] : nil
end

#font_sizeObject



120
121
122
123
124
125
126
# File 'lib/pdf/reader/page_state.rb', line 120

def font_size
  @font_size ||= begin
                   _, zero = trm_transform(0,0)
                   _, one  = trm_transform(1,1)
                   (zero - one).abs.round(10)
                 end
end

#invoke_xobject(label) ⇒ Object

XObjects

Raises:



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
# File 'lib/pdf/reader/page_state.rb', line 201

def invoke_xobject(label)
  save_graphics_state
  xobject = find_xobject(label)

  raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
  matrix = xobject.hash[:Matrix]
  concatenate_matrix(*matrix) if matrix

  if xobject.hash[:Subtype] == :Form
    form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache)
    @font_stack.unshift(form.font_objects)
    @xobject_stack.unshift(form.xobjects)
    yield form if block_given?
    @font_stack.shift
    @xobject_stack.shift
  else
    yield xobject if block_given?
  end

  restore_graphics_state
end

#move_text_position(x, y) ⇒ Object

Text Positioning Operators



148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/pdf/reader/page_state.rb', line 148

def move_text_position(x, y) # Td
  temp = TransformationMatrix.new(1, 0,
                                  0, 1,
                                  x, y)
  @text_line_matrix = temp.multiply!(
    @text_line_matrix.a, @text_line_matrix.b,
    @text_line_matrix.c, @text_line_matrix.d,
    @text_line_matrix.e, @text_line_matrix.f
  )
  @text_matrix = @text_line_matrix.dup
  @font_size = @text_rendering_matrix = nil # invalidate cached value
end

#move_text_position_and_set_leading(x, y) ⇒ Object

TD



161
162
163
164
# File 'lib/pdf/reader/page_state.rb', line 161

def move_text_position_and_set_leading(x, y) # TD
  set_text_leading(-1 * y)
  move_text_position(x, y)
end

#move_to_next_line_and_show_text(str) ⇒ Object



188
189
190
# File 'lib/pdf/reader/page_state.rb', line 188

def move_to_next_line_and_show_text(str) # '
  move_to_start_of_next_line
end

#move_to_start_of_next_lineObject

T*



176
177
178
# File 'lib/pdf/reader/page_state.rb', line 176

def move_to_start_of_next_line # T*
  move_text_position(0, -state[:text_leading])
end

#process_glyph_displacement(w0, tj, word_boundary) ⇒ Object

after each glyph is painted onto the page the text matrix must be modified. There’s no defined operator for this, but depending on the use case some receivers may need to mutate the state with this while walking a page.

NOTE: some of the variable names in this method are obscure because

they mirror variable names from the PDF spec

NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252

Arguments:

w0 - the glyph width in *text space*. This generally means the width

in glyph space should be divded by 1000 before being passed to
this function

tj - any kerning that should be applied to the text matrix before the

following glyph is painted. This is usually the numeric arguments
in the array passed to a TJ operator

word_boundary - a boolean indicating if a word boundary was just

reached. Depending on the current state extra space
may need to be added


324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
# File 'lib/pdf/reader/page_state.rb', line 324

def process_glyph_displacement(w0, tj, word_boundary)
  fs = state[:text_font_size]
  tc = state[:char_spacing]
  if word_boundary
    tw = state[:word_spacing]
  else
    tw = 0
  end
  th = state[:h_scaling]
  # optimise the common path to reduce Float allocations
  if th == 1 && tj == 0 && tc == 0 && tw == 0
    tx = w0 * fs
  elsif tj != 0
    # don't apply spacing to TJ displacement
    tx = (w0 - (tj/1000.0)) * fs * th
  else
    # apply horizontal scaling to spacing values but not font size
    tx = ((w0 * fs) + tc + tw) * th
  end
  # TODO: support ty > 0
  ty = 0
  temp = TransformationMatrix.new(1, 0,
                                  0, 1,
                                  tx, ty)
  @text_matrix = temp.multiply!(
    @text_matrix.a, @text_matrix.b,
    @text_matrix.c, @text_matrix.d,
    @text_matrix.e, @text_matrix.f
  )
  @font_size = @text_rendering_matrix = nil # invalidate cached value
end

#restore_graphics_stateObject

Restore the state to the previous value on the stack.

: () -> untyped



60
61
62
# File 'lib/pdf/reader/page_state.rb', line 60

def restore_graphics_state
  @stack.pop
end

#save_graphics_stateObject

Clones the current graphics state and push it onto the top of the stack. Any changes that are subsequently made to the state can then by reversed by calling restore_graphics_state.

: () -> untyped



53
54
55
# File 'lib/pdf/reader/page_state.rb', line 53

def save_graphics_state
  @stack.push clone_state
end

#set_character_spacing(char_spacing) ⇒ Object

Text State Operators



107
108
109
# File 'lib/pdf/reader/page_state.rb', line 107

def set_character_spacing(char_spacing)
  state[:char_spacing] = char_spacing
end

#set_horizontal_text_scaling(h_scaling) ⇒ Object



111
112
113
# File 'lib/pdf/reader/page_state.rb', line 111

def set_horizontal_text_scaling(h_scaling)
  state[:h_scaling] = h_scaling / 100.0
end

#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object



192
193
194
195
196
# File 'lib/pdf/reader/page_state.rb', line 192

def set_spacing_next_line_show_text(aw, ac, string) # "
  set_word_spacing(aw)
  set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

#set_text_font_and_size(label, size) ⇒ Object



115
116
117
118
# File 'lib/pdf/reader/page_state.rb', line 115

def set_text_font_and_size(label, size)
  state[:text_font]      = label
  state[:text_font_size] = size
end

#set_text_leading(leading) ⇒ Object



128
129
130
# File 'lib/pdf/reader/page_state.rb', line 128

def set_text_leading(leading)
  state[:text_leading] = leading
end

#set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) ⇒ Object

Tm



166
167
168
169
170
171
172
173
174
# File 'lib/pdf/reader/page_state.rb', line 166

def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
  @text_matrix = TransformationMatrix.new(
    a, b,
    c, d,
    e, f
  )
  @text_line_matrix = @text_matrix.dup
  @font_size = @text_rendering_matrix = nil # invalidate cached value
end

#set_text_rendering_mode(mode) ⇒ Object



132
133
134
# File 'lib/pdf/reader/page_state.rb', line 132

def set_text_rendering_mode(mode)
  state[:text_mode] = mode
end

#set_text_rise(rise) ⇒ Object



136
137
138
# File 'lib/pdf/reader/page_state.rb', line 136

def set_text_rise(rise)
  state[:text_rise] = rise
end

#set_word_spacing(word_spacing) ⇒ Object



140
141
142
# File 'lib/pdf/reader/page_state.rb', line 140

def set_word_spacing(word_spacing)
  state[:word_spacing] = word_spacing
end

#show_text_with_positioning(params) ⇒ Object

Text Showing Operators



184
185
186
# File 'lib/pdf/reader/page_state.rb', line 184

def show_text_with_positioning(params) # TJ
  # TODO record position changes in state here
end

#stack_depthObject

when save_graphics_state is called, we need to push a new copy of the current state onto the stack. That way any modifications to the state will be undone once restore_graphics_state is called.



284
285
286
# File 'lib/pdf/reader/page_state.rb', line 284

def stack_depth
  @stack.size
end

#trm_transform(x, y) ⇒ Object

transform x and y co-ordinates from the current text space to the underlying device space.

transforming (0,0) is a really common case, so optimise for it to avoid unnecessary object allocations



243
244
245
246
247
248
249
250
251
252
253
# File 'lib/pdf/reader/page_state.rb', line 243

def trm_transform(x, y)
  trm = text_rendering_matrix
  if x == 0 && y == 0
    [trm.e, trm.f]
  else
    [
      (trm.a * x) + (trm.c * y) + (trm.e),
      (trm.b * x) + (trm.d * y) + (trm.f)
    ]
  end
end