Class: PDF::Reader::PageReceiver
- Inherits:
-
Object
- Object
- PDF::Reader::PageReceiver
- Defined in:
- lib/pdf/reader/page_receiver.rb
Overview
encapsulates logic for tracking graphics state as the instructions for a single page are processed. Most of the public methods correspond directly to PDF operators.
Direct Known Subclasses
Constant Summary collapse
- DEFAULT_GRAPHICS_STATE =
{ :char_spacing => 0, :word_spacing => 0, :h_scaling => 1.0, :text_leading => 0, :text_font => nil, :text_font_size => nil, :text_mode => 0, :text_rise => 0, :text_knockout => 0, :tj => 0 }
- SPACE =
" "
Instance Method Summary collapse
-
#begin_text_object ⇒ Object
Text Object Operators.
-
#clone_state ⇒ Object
This returns a deep clone of the current state, ensuring changes are keep separate from earlier states.
-
#concatenate_matrix(a, b, c, d, e, f) ⇒ Object
update the current transformation matrix.
-
#ctm_transform(x, y) ⇒ Object
transform x and y co-ordinates from the current user space to the underlying device space.
- #current_font ⇒ Object
- #end_text_object ⇒ Object
- #find_color_space(label) ⇒ Object
- #find_font(label) ⇒ Object
- #find_xobject(label) ⇒ Object
- #font_size ⇒ Object
- #glyph_width_scaling_factor ⇒ Object
-
#invoke_xobject(label) ⇒ Object
XObjects.
-
#move_text_position(x, y) ⇒ Object
Text Positioning Operators.
-
#move_text_position_and_set_leading(x, y) ⇒ Object
TD.
-
#move_to_next_line_and_show_text(str) ⇒ Object
‘.
-
#move_to_start_of_next_line ⇒ Object
T*.
-
#page=(page) ⇒ Object
starting a new page.
-
#process_glyph(glyph_code) ⇒ Object
This function gets called for each character that gets drawn on the page.
-
#process_glyph_displacement(glyph_code) ⇒ Object
after each glyph is painted onto the page the text matrix must be modified.
-
#restore_graphics_state ⇒ Object
Restore the state to the previous value on the stack.
-
#save_graphics_state ⇒ Object
Clones the current graphics state and push it onto the top of the stack.
-
#set_character_spacing(char_spacing) ⇒ Object
Text State Operators.
- #set_horizontal_text_scaling(h_scaling) ⇒ Object
-
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
“.
- #set_text_font_and_size(label, size) ⇒ Object
- #set_text_leading(leading) ⇒ Object
-
#set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) ⇒ Object
Tm.
- #set_text_rendering_mode(mode) ⇒ Object
- #set_text_rise(rise) ⇒ Object
- #set_word_spacing(word_spacing) ⇒ Object
-
#show_text(string) ⇒ Object
Text Showing Operators.
-
#show_text_with_positioning(params) ⇒ Object
TJ [(A) 120 (WA) 20 (Y)].
-
#stack_depth ⇒ Object
when save_graphics_state is called, we need to push a new copy of the current state onto the stack.
-
#trm_transform(x, y) ⇒ Object
transform x and y co-ordinates from the current text space to the underlying device space.
Instance Method Details
#begin_text_object ⇒ Object
Text Object Operators
93 94 95 96 |
# File 'lib/pdf/reader/page_receiver.rb', line 93 def begin_text_object @text_matrix = identity_matrix @text_line_matrix = identity_matrix end |
#clone_state ⇒ Object
This returns a deep clone of the current state, ensuring changes are keep separate from earlier states.
Marshal is used to round-trip the state through a string to easily perform the deep clone. Kinda hacky, but effective.
310 311 312 313 314 315 316 |
# File 'lib/pdf/reader/page_receiver.rb', line 310 def clone_state if @stack.empty? {} else Marshal.load Marshal.dump(@stack.last) end end |
#concatenate_matrix(a, b, c, d, e, f) ⇒ Object
update the current transformation matrix.
If the CTM is currently undefined, just store the new values.
If there’s an existing CTM, then multiply the existing matrix with the new matrix to form the updated matrix.
80 81 82 83 84 85 86 87 |
# File 'lib/pdf/reader/page_receiver.rb', line 80 def concatenate_matrix(a, b, c, d, e, f) if state[:ctm] state[:ctm].left_multiply!(a,b,c,d,e,f) else state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f) end invalidate_cached_values end |
#ctm_transform(x, y) ⇒ Object
transform x and y co-ordinates from the current user space to the underlying device space.
246 247 248 249 250 251 |
# File 'lib/pdf/reader/page_receiver.rb', line 246 def ctm_transform(x, y) [ (ctm.a * x) + (ctm.c * y) + (ctm.e), (ctm.b * x) + (ctm.d * y) + (ctm.f) ] end |
#current_font ⇒ Object
271 272 273 |
# File 'lib/pdf/reader/page_receiver.rb', line 271 def current_font find_font(state[:text_font]) end |
#end_text_object ⇒ Object
98 99 100 |
# File 'lib/pdf/reader/page_receiver.rb', line 98 def end_text_object # don't need to do anything end |
#find_color_space(label) ⇒ Object
282 283 284 285 286 287 |
# File 'lib/pdf/reader/page_receiver.rb', line 282 def find_color_space(label) dict = @cs_stack.detect { |colorspaces| colorspaces.has_key?(label) } dict ? dict[label] : nil end |
#find_font(label) ⇒ Object
275 276 277 278 279 280 |
# File 'lib/pdf/reader/page_receiver.rb', line 275 def find_font(label) dict = @font_stack.detect { |fonts| fonts.has_key?(label) } dict ? dict[label] : nil end |
#find_xobject(label) ⇒ Object
289 290 291 292 293 294 |
# File 'lib/pdf/reader/page_receiver.rb', line 289 def find_xobject(label) dict = @xobject_stack.detect { |xobjects| xobjects.has_key?(label) } dict ? dict[label] : nil end |
#font_size ⇒ Object
119 120 121 |
# File 'lib/pdf/reader/page_receiver.rb', line 119 def font_size state[:text_font_size] end |
#glyph_width_scaling_factor ⇒ Object
123 124 125 |
# File 'lib/pdf/reader/page_receiver.rb', line 123 def glyph_width_scaling_factor @glyph_width_scaling_factor |= magnitude(text_rendering_matrix.a, text_rendering_matrix.b) end |
#invoke_xobject(label) ⇒ Object
XObjects
217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
# File 'lib/pdf/reader/page_receiver.rb', line 217 def invoke_xobject(label) save_graphics_state xobject = find_xobject(label) raise MalformedPDFError, "XObject #{label} not found" if xobject.nil? matrix = xobject.hash[:Matrix] concatenate_matrix(*matrix) if matrix if xobject.hash[:Subtype] == :Form form = PDF::Reader::FormXObject.new(@page, xobject, :cache => @cache) @font_stack.unshift(form.font_objects) @xobject_stack.unshift(form.xobjects) yield form if block_given? @font_stack.shift @xobject_stack.shift else yield xobject if block_given? end restore_graphics_state end |
#move_text_position(x, y) ⇒ Object
Text Positioning Operators
147 148 149 150 151 |
# File 'lib/pdf/reader/page_receiver.rb', line 147 def move_text_position(x, y) # Td @text_line_matrix.displacement_left_multiply!(x,y) @text_matrix = @text_line_matrix.dup invalidate_cached_values end |
#move_text_position_and_set_leading(x, y) ⇒ Object
TD
153 154 155 156 |
# File 'lib/pdf/reader/page_receiver.rb', line 153 def move_text_position_and_set_leading(x, y) # TD set_text_leading(-1 * y) move_text_position(x, y) end |
#move_to_next_line_and_show_text(str) ⇒ Object
‘
203 204 205 206 |
# File 'lib/pdf/reader/page_receiver.rb', line 203 def move_to_next_line_and_show_text(str) # ' move_to_start_of_next_line show_text(str) end |
#move_to_start_of_next_line ⇒ Object
T*
168 169 170 |
# File 'lib/pdf/reader/page_receiver.rb', line 168 def move_to_start_of_next_line # T* move_text_position(0, -state[:text_leading]) end |
#page=(page) ⇒ Object
starting a new page
27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/pdf/reader/page_receiver.rb', line 27 def page=(page) @page = page @cache = page.cache @objects = page.objects @font_stack = [build_fonts(page.fonts)] @xobject_stack = [page.xobjects] @cs_stack = [page.color_spaces] @stack = [DEFAULT_GRAPHICS_STATE.dup] @show_text_callback = lambda{ |string, kerning| } state[:ctm] = identity_matrix invalidate_cached_values end |
#process_glyph(glyph_code) ⇒ Object
This function gets called for each character that gets drawn on the page. There are no
48 49 |
# File 'lib/pdf/reader/page_receiver.rb', line 48 def process_glyph(glyph_code) end |
#process_glyph_displacement(glyph_code) ⇒ Object
after each glyph is painted onto the page the text matrix must be modified. There’s no defined operator for this, but depending on the use case some receivers may need to mutate the state with this while walking a page.
NOTE: some of the variable names in this method are obscure because
they mirror variable names from the PDF spec
NOTE: see Section 9.4.4, PDF 32000-1:2008, pp 252
Arguments:
glyph_code - self explanatory
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 |
# File 'lib/pdf/reader/page_receiver.rb', line 332 def process_glyph_displacement(glyph_code) fs = state[:text_font_size] # font size tc = state[:char_spacing] tw = current_font.is_space?(glyph_code) ? state[:word_spacing] : 0 tj = state[:tj] state[:tj] = 0 if state[:text_mode] == 0 w0 = current_font.glyph_width(glyph_code) th = state[:h_scaling] tx = ((w0-tj)*fs/1000.0 + tc + tw) * th ty = 0 else w1 = current_font.glyph_height(glyph_code) ty = (w1-tj)*fs/1000.0 + tc + tw tx = 0 end @text_matrix.displacement_left_multiply!(tx,ty) invalidate_cached_values end |
#restore_graphics_state ⇒ Object
Restore the state to the previous value on the stack.
65 66 67 |
# File 'lib/pdf/reader/page_receiver.rb', line 65 def restore_graphics_state @stack.pop end |
#save_graphics_state ⇒ Object
Clones the current graphics state and push it onto the top of the stack. Any changes that are subsequently made to the state can then by reversed by calling restore_graphics_state.
59 60 61 |
# File 'lib/pdf/reader/page_receiver.rb', line 59 def save_graphics_state @stack.push clone_state end |
#set_character_spacing(char_spacing) ⇒ Object
Text State Operators
106 107 108 |
# File 'lib/pdf/reader/page_receiver.rb', line 106 def set_character_spacing(char_spacing) state[:char_spacing] = char_spacing end |
#set_horizontal_text_scaling(h_scaling) ⇒ Object
110 111 112 |
# File 'lib/pdf/reader/page_receiver.rb', line 110 def set_horizontal_text_scaling(h_scaling) state[:h_scaling] = h_scaling / 100.0 end |
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
“
208 209 210 211 212 |
# File 'lib/pdf/reader/page_receiver.rb', line 208 def set_spacing_next_line_show_text(aw, ac, string) # " set_word_spacing(aw) set_character_spacing(ac) move_to_next_line_and_show_text(string) end |
#set_text_font_and_size(label, size) ⇒ Object
114 115 116 117 |
# File 'lib/pdf/reader/page_receiver.rb', line 114 def set_text_font_and_size(label, size) state[:text_font] = label state[:text_font_size] = size end |
#set_text_leading(leading) ⇒ Object
127 128 129 |
# File 'lib/pdf/reader/page_receiver.rb', line 127 def set_text_leading(leading) state[:text_leading] = leading end |
#set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) ⇒ Object
Tm
158 159 160 161 162 163 164 165 166 |
# File 'lib/pdf/reader/page_receiver.rb', line 158 def set_text_matrix_and_text_line_matrix(a, b, c, d, e, f) # Tm @text_matrix = TransformationMatrix.new( a, b, c, d, e, f ) @text_line_matrix = @text_matrix.dup invalidate_cached_values end |
#set_text_rendering_mode(mode) ⇒ Object
131 132 133 |
# File 'lib/pdf/reader/page_receiver.rb', line 131 def set_text_rendering_mode(mode) state[:text_mode] = mode end |
#set_text_rise(rise) ⇒ Object
135 136 137 |
# File 'lib/pdf/reader/page_receiver.rb', line 135 def set_text_rise(rise) state[:text_rise] = rise end |
#set_word_spacing(word_spacing) ⇒ Object
139 140 141 |
# File 'lib/pdf/reader/page_receiver.rb', line 139 def set_word_spacing(word_spacing) state[:word_spacing] = word_spacing end |
#show_text(string) ⇒ Object
Text Showing Operators
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/pdf/reader/page_receiver.rb', line 176 def show_text(string) # Tj (AWAY) if current_font.nil? raise PDF::Reader::MalformedPDFError, "current font is invalid" end glyphs = current_font.unpack(string) glyphs.each_with_index do |glyph_code| # Subclasses should implement this function # default implementation does nothing process_glyph(glyph_code) # apply to glyph displacment for the current glyph so the next # glyph will appear in the correct position process_glyph_displacement(glyph_code) end end |
#show_text_with_positioning(params) ⇒ Object
TJ [(A) 120 (WA) 20 (Y)]
192 193 194 195 196 197 198 199 200 201 |
# File 'lib/pdf/reader/page_receiver.rb', line 192 def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)] params.each do |e| case e when Float, Fixnum state[:tj] += e else show_text(e) end end end |
#stack_depth ⇒ Object
when save_graphics_state is called, we need to push a new copy of the current state onto the stack. That way any modifications to the state will be undone once restore_graphics_state is called.
300 301 302 |
# File 'lib/pdf/reader/page_receiver.rb', line 300 def stack_depth @stack.size end |
#trm_transform(x, y) ⇒ Object
transform x and y co-ordinates from the current text space to the underlying device space.
transforming (0,0) is a really common case, so optimise for it to avoid unnecessary object allocations
259 260 261 262 263 264 265 266 267 268 269 |
# File 'lib/pdf/reader/page_receiver.rb', line 259 def trm_transform(x, y) trm = text_rendering_matrix if x == 0 && y == 0 [trm.e, trm.f] else [ (trm.a * x) + (trm.c * y) + (trm.e), (trm.b * x) + (trm.d * y) + (trm.f) ] end end |