Class: PDF::Reader::PageTextReceiver

Inherits:
PageReceiver show all
Defined in:
lib/pdf/reader/page_text_receiver.rb

Overview

Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.

Constant Summary

Constants inherited from PageReceiver

PDF::Reader::PageReceiver::DEFAULT_GRAPHICS_STATE, PDF::Reader::PageReceiver::SPACE

Instance Method Summary collapse

Methods inherited from PageReceiver

#begin_text_object, #clone_state, #concatenate_matrix, #ctm_transform, #current_font, #end_text_object, #find_color_space, #find_font, #find_xobject, #font_size, #glyph_width_scaling_factor, #move_text_position, #move_text_position_and_set_leading, #move_to_next_line_and_show_text, #move_to_start_of_next_line, #process_glyph_displacement, #restore_graphics_state, #save_graphics_state, #set_character_spacing, #set_horizontal_text_scaling, #set_spacing_next_line_show_text, #set_text_font_and_size, #set_text_leading, #set_text_matrix_and_text_line_matrix, #set_text_rendering_mode, #set_text_rise, #set_word_spacing, #show_text, #show_text_with_positioning, #stack_depth, #trm_transform

Instance Method Details

#contentObject



21
22
23
24
# File 'lib/pdf/reader/page_text_receiver.rb', line 21

def content
  puts @characters
  PageLayout.new(@characters, @mediabox).to_s
end

#invoke_xobject(label) ⇒ Object

XObjects



29
30
31
32
33
34
35
36
# File 'lib/pdf/reader/page_text_receiver.rb', line 29

def invoke_xobject(label)
  super(label) do |xobj|
    case xobj
    when PDF::Reader::FormXObject then
      xobj.walk(self)
    end
  end
end

#page=(page) ⇒ Object

starting a new page



15
16
17
18
19
# File 'lib/pdf/reader/page_text_receiver.rb', line 15

def page=(page)
  super(page)
  @characters = []
  @mediabox = page.attributes[:MediaBox]
end

#process_glyph(glyph_code) ⇒ Object



38
39
40
41
42
43
44
45
46
47
# File 'lib/pdf/reader/page_text_receiver.rb', line 38

def process_glyph(glyph_code)
  unless current_font.is_space?(glyph_code)
    x = text_rendering_matrix.e
    y = text_rendering_matrix.f
    text = current_font.to_utf8(glyph_code)
    #TODO: this only works for horizontal characters are upright (not sideways)
    width = current_font.glyph_width(glyph_code)/1000.0 * text_rendering_matrix.a
    @characters << TextRun.new(x,y,width,state[:text_font_size] * text_rendering_matrix.d,text)
  end
end