Class: PDF::Reader::PageReceiver

Inherits:
Object
  • Object
show all
Defined in:
lib/pdf/reader/page_receiver.rb

Overview

encapsulates logic for tracking graphics state as the instructions for a single page are processed. Most of the public methods correspond directly to PDF operators.

Direct Known Subclasses

PageTextReceiver

Constant Summary collapse

DEFAULT_GRAPHICS_STATE =
{
  :char_spacing   => 0,
  :word_spacing   => 0,
  :h_scaling      => 1.0,
  :text_leading   => 0,
  :text_font      => nil,
  :text_font_size => nil,
  :text_mode      => 0,
  :text_rise      => 0,
  :text_knockout  => 0,
  :tj             => 0
}
SPACE =
" "

Instance Method Summary collapse

Instance Method Details

#begin_text_objectObject

Text Object Operators



93
94
95
96
# File 'lib/pdf/reader/page_receiver.rb', line 93

def begin_text_object
  @text_matrix      = identity_matrix
  @text_line_matrix = identity_matrix
end

#clone_stateObject

This returns a deep clone of the current state, ensuring changes are keep separate from earlier states.

Marshal is used to round-trip the state through a string to easily perform the deep clone. Kinda hacky, but effective.



310
311
312
313
314
315
316
# File 'lib/pdf/reader/page_receiver.rb', line 310

def clone_state
  if @stack.empty?
    {}
  else
    Marshal.load Marshal.dump(@stack.last)
  end
end

#concatenate_matrix(a, b, c, d, e, f) ⇒ Object

update the current transformation matrix.

If the CTM is currently undefined, just store the new values.

If there’s an existing CTM, then multiply the existing matrix with the new matrix to form the updated matrix.



80
81
82
83
84
85
86
87
# File 'lib/pdf/reader/page_receiver.rb', line 80

def concatenate_matrix(a, b, c, d, e, f)
  if state[:ctm]
    state[:ctm].left_multiply!(a,b,c,d,e,f)
  else
    state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
  end
  invalidate_cached_values
end

#ctm_transform(x, y) ⇒ Object

transform x and y co-ordinates from the current user space to the underlying device space.



246
247
248
249
250
251
# File 'lib/pdf/reader/page_receiver.rb', line 246

def ctm_transform(x, y)
  [
    (ctm.a * x) + (ctm.c * y) + (ctm.e),
    (ctm.b * x) + (ctm.d * y) + (ctm.f)
  ]
end

#current_fontObject



271
272
273
# File 'lib/pdf/reader/page_receiver.rb', line 271

def current_font
  find_font(state[:text_font])
end

#end_text_objectObject



98
99
100
# File 'lib/pdf/reader/page_receiver.rb', line 98

def end_text_object
  # don't need to do anything
end

#find_color_space(label) ⇒ Object



282
283
284
285
286
287
# File 'lib/pdf/reader/page_receiver.rb', line 282

def find_color_space(label)
  dict = @cs_stack.detect { |colorspaces|
    colorspaces.has_key?(label)
  }
  dict ? dict[label] : nil
end

#find_font(label) ⇒ Object



275
276
277
278
279
280
# File 'lib/pdf/reader/page_receiver.rb', line 275

def find_font(label)
  dict = @font_stack.detect { |fonts|
    fonts.has_key?(label)
  }
  dict ? dict[label] : nil
end

#find_xobject(label) ⇒ Object



289
290
291
292
293
294
# File 'lib/pdf/reader/page_receiver.rb', line 289

def find_xobject(label)
  dict = @xobject_stack.detect { |xobjects|
    xobjects.has_key?(label)
  }
  dict ? dict[label] : nil
end

#font_sizeObject



119
120
121
# File 'lib/pdf/reader/page_receiver.rb', line 119

def font_size
  state[:text_font_size]
end

#glyph_width_scaling_factorObject



123
124
125
# File 'lib/pdf/reader/page_receiver.rb', line 123

def glyph_width_scaling_factor
  @glyph_width_scaling_factor |= magnitude(text_rendering_matrix.a, text_rendering_matrix.b)
end

#invoke_xobject(label) ⇒ Object

XObjects

Raises:



217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# File 'lib/pdf/reader/page_receiver.rb', line 217

def invoke_xobject(label)
  save_graphics_state
  xobject = find_xobject(label)

  raise MalformedPDFError, "XObject #{label} not found" if xobject.nil?
  matrix = xobject.hash[:Matrix]
  concatenate_matrix(*matrix) if matrix

  if xobject.hash[:Subtype] == :Form
    form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache)
    @font_stack.unshift(form.font_objects)
    @xobject_stack.unshift(form.xobjects)
    yield form if block_given?
    @font_stack.shift
    @xobject_stack.shift
  else
    yield xobject if block_given?
  end

  restore_graphics_state
end

#move_text_position(x, y) ⇒ Object

Text Positioning Operators



147
148
149
150
151
# File 'lib/pdf/reader/page_receiver.rb', line 147

def move_text_position(x, y) # Td
  @text_line_matrix.displacement_left_multiply!(x,y)
  @text_matrix = @text_line_matrix.dup
  invalidate_cached_values
end

#move_text_position_and_set_leading(x, y) ⇒ Object

TD



153
154
155
156
# File 'lib/pdf/reader/page_receiver.rb', line 153

def move_text_position_and_set_leading(x, y) # TD
  set_text_leading(-1 * y)
  move_text_position(x, y)
end

#move_to_next_line_and_show_text(str) ⇒ Object



203
204
205
206
# File 'lib/pdf/reader/page_receiver.rb', line 203

def move_to_next_line_and_show_text(str) # '
  move_to_start_of_next_line
  show_text(str)
end

#move_to_start_of_next_lineObject

T*



168
169
170
# File 'lib/pdf/reader/page_receiver.rb', line 168

def move_to_start_of_next_line # T*
  move_text_position(0, -state[:text_leading])
end

#page=(page) ⇒ Object

starting a new page



27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/pdf/reader/page_receiver.rb', line 27

def page=(page)
  @page          = page
  @cache         = page.cache
  @objects       = page.objects
  @font_stack    = [build_fonts(page.fonts)]
  @xobject_stack = [page.xobjects]
  @cs_stack      = [page.color_spaces]
  @stack         = [DEFAULT_GRAPHICS_STATE.dup]
  @show_text_callback = lambda{ |string, kerning| }
  state[:ctm]    = identity_matrix
  invalidate_cached_values
end

#process_glyph(glyph_code) ⇒ Object

This function gets called for each character that gets drawn on the page. There are no



48
49
# File 'lib/pdf/reader/page_receiver.rb', line 48

def process_glyph(glyph_code)
end

#process_glyph_displacement(glyph_code) ⇒ Object

after each glyph is painted onto the page the text matrix must be modified. There’s no defined operator for this, but depending on the use case some receivers may need to mutate the state with this while walking a page.

NOTE: some of the variable names in this method are obscure because

they mirror variable names from the PDF spec

NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252

Arguments:

glyph_code - self explanatory



332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
# File 'lib/pdf/reader/page_receiver.rb', line 332

def process_glyph_displacement(glyph_code)
  fs = state[:text_font_size] # font size
  tc = state[:char_spacing]
  tw = current_font.is_space?(glyph_code) ? state[:word_spacing] : 0
  tj = state[:tj]
  state[:tj] = 0
  if state[:text_mode] == 0
    w0 = current_font.glyph_width(glyph_code)
    th = state[:h_scaling]
    tx = ((w0-tj)*fs/1000.0 + tc + tw) * th
    ty = 0
  else
    w1 = current_font.glyph_height(glyph_code)
    ty = (w1-tj)*fs/1000.0 + tc + tw
    tx = 0
  end

  @text_matrix.displacement_left_multiply!(tx,ty)
  invalidate_cached_values
end

#restore_graphics_stateObject

Restore the state to the previous value on the stack.



65
66
67
# File 'lib/pdf/reader/page_receiver.rb', line 65

def restore_graphics_state
  @stack.pop
end

#save_graphics_stateObject

Clones the current graphics state and push it onto the top of the stack. Any changes that are subsequently made to the state can then by reversed by calling restore_graphics_state.



59
60
61
# File 'lib/pdf/reader/page_receiver.rb', line 59

def save_graphics_state
  @stack.push clone_state
end

#set_character_spacing(char_spacing) ⇒ Object

Text State Operators



106
107
108
# File 'lib/pdf/reader/page_receiver.rb', line 106

def set_character_spacing(char_spacing)
  state[:char_spacing] = char_spacing
end

#set_horizontal_text_scaling(h_scaling) ⇒ Object



110
111
112
# File 'lib/pdf/reader/page_receiver.rb', line 110

def set_horizontal_text_scaling(h_scaling)
  state[:h_scaling] = h_scaling / 100.0
end

#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object



208
209
210
211
212
# File 'lib/pdf/reader/page_receiver.rb', line 208

def set_spacing_next_line_show_text(aw, ac, string) # "
  set_word_spacing(aw)
  set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

#set_text_font_and_size(label, size) ⇒ Object



114
115
116
117
# File 'lib/pdf/reader/page_receiver.rb', line 114

def set_text_font_and_size(label, size)
  state[:text_font]      = label
  state[:text_font_size] = size
end

#set_text_leading(leading) ⇒ Object



127
128
129
# File 'lib/pdf/reader/page_receiver.rb', line 127

def set_text_leading(leading)
  state[:text_leading] = leading
end

#set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) ⇒ Object

Tm



158
159
160
161
162
163
164
165
166
# File 'lib/pdf/reader/page_receiver.rb', line 158

def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm
  @text_matrix = TransformationMatrix.new(
    a, b,
    c, d,
    e, f
  )
  @text_line_matrix = @text_matrix.dup
  invalidate_cached_values
end

#set_text_rendering_mode(mode) ⇒ Object



131
132
133
# File 'lib/pdf/reader/page_receiver.rb', line 131

def set_text_rendering_mode(mode)
  state[:text_mode] = mode
end

#set_text_rise(rise) ⇒ Object



135
136
137
# File 'lib/pdf/reader/page_receiver.rb', line 135

def set_text_rise(rise)
  state[:text_rise] = rise
end

#set_word_spacing(word_spacing) ⇒ Object



139
140
141
# File 'lib/pdf/reader/page_receiver.rb', line 139

def set_word_spacing(word_spacing)
  state[:word_spacing] = word_spacing
end

#show_text(string) ⇒ Object

Text Showing Operators



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/pdf/reader/page_receiver.rb', line 176

def show_text(string) # Tj (AWAY)
  if current_font.nil?
    raise PDF::Reader::MalformedPDFError, "current font is invalid"
  end
  glyphs = current_font.unpack(string)
  glyphs.each_with_index do |glyph_code|
    # Subclasses should implement this function
    # default implementation does nothing
    process_glyph(glyph_code)

    # apply to glyph displacment for the current glyph so the next
    # glyph will appear in the correct position
    process_glyph_displacement(glyph_code)
  end
end

#show_text_with_positioning(params) ⇒ Object

TJ [(A) 120 (WA) 20 (Y)]



192
193
194
195
196
197
198
199
200
201
# File 'lib/pdf/reader/page_receiver.rb', line 192

def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  params.each do |e|
    case e
    when Float, Fixnum
      state[:tj] += e
    else
      show_text(e)
    end
  end
end

#stack_depthObject

when save_graphics_state is called, we need to push a new copy of the current state onto the stack. That way any modifications to the state will be undone once restore_graphics_state is called.



300
301
302
# File 'lib/pdf/reader/page_receiver.rb', line 300

def stack_depth
  @stack.size
end

#trm_transform(x, y) ⇒ Object

transform x and y co-ordinates from the current text space to the underlying device space.

transforming (0,0) is a really common case, so optimise for it to avoid unnecessary object allocations



259
260
261
262
263
264
265
266
267
268
269
# File 'lib/pdf/reader/page_receiver.rb', line 259

def trm_transform(x, y)
  trm = text_rendering_matrix
  if x == 0 && y == 0
    [trm.e, trm.f]
  else
    [
      (trm.a * x) + (trm.c * y) + (trm.e),
      (trm.b * x) + (trm.d * y) + (trm.f)
    ]
  end
end