Class: PDF::Reader::PageTextReceiver
- Inherits:
-
Object
- Object
- PDF::Reader::PageTextReceiver
- Extended by:
- Forwardable
- Defined in:
- lib/pdf/reader/page_text_receiver.rb
Overview
Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.
Constant Summary collapse
- SPACE =
" "
Instance Attribute Summary collapse
-
#options ⇒ Object
readonly
Returns the value of attribute options.
-
#state ⇒ Object
readonly
Returns the value of attribute state.
Instance Method Summary collapse
-
#content ⇒ Object
deprecated.
-
#invoke_xobject(label) ⇒ Object
XObjects.
-
#move_to_next_line_and_show_text(str) ⇒ Object
‘.
-
#page=(page) ⇒ Object
starting a new page.
- #runs(opts = {}) ⇒ Object
-
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
“.
-
#show_text(string) ⇒ Object
Text Showing Operators.
-
#show_text_with_positioning(params) ⇒ Object
TJ [(A) 120 (WA) 20 (Y)].
Instance Attribute Details
#options ⇒ Object (readonly)
Returns the value of attribute options.
19 20 21 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 19 def @options end |
#state ⇒ Object (readonly)
Returns the value of attribute state.
19 20 21 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 19 def state @state end |
Instance Method Details
#content ⇒ Object
deprecated
75 76 77 78 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 75 def content mediabox = @page.rectangles[:MediaBox] PageLayout.new(runs, mediabox).to_s end |
#invoke_xobject(label) ⇒ Object
XObjects
114 115 116 117 118 119 120 121 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 114 def invoke_xobject(label) @state.invoke_xobject(label) do |xobj| case xobj when PDF::Reader::FormXObject then xobj.walk(self) end end end |
#move_to_next_line_and_show_text(str) ⇒ Object
‘
100 101 102 103 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 100 def move_to_next_line_and_show_text(str) # ' @state.move_to_start_of_next_line show_text(str) end |
#page=(page) ⇒ Object
starting a new page
43 44 45 46 47 48 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 43 def page=(page) @state = PageState.new(page) @page = page @content = [] @characters = [] end |
#runs(opts = {}) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 50 def runs(opts = {}) runs = @characters if rect = opts.fetch(:rect, @page.rectangles[:CropBox]) runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect) end if opts.fetch(:skip_zero_width, true) runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs) end if opts.fetch(:skip_overlapping, true) runs = OverlappingRunsFilter.exclude_redundant_runs(runs) end runs = NoTextFilter.exclude_empty_strings(runs) if opts.fetch(:merge, true) runs = merge_runs(runs) end runs end |
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
“
105 106 107 108 109 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 105 def set_spacing_next_line_show_text(aw, ac, string) # " @state.set_word_spacing(aw) @state.set_character_spacing(ac) move_to_next_line_and_show_text(string) end |
#show_text(string) ⇒ Object
Text Showing Operators
record text that is drawn on the page
84 85 86 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 84 def show_text(string) # Tj (AWAY) internal_show_text(string) end |
#show_text_with_positioning(params) ⇒ Object
TJ [(A) 120 (WA) 20 (Y)]
88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 88 def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)] params.each do |arg| if arg.is_a?(String) internal_show_text(arg) elsif arg.is_a?(Numeric) @state.process_glyph_displacement(0, arg, false) else # skip it end end end |