Class: PDF::Reader::PageTextReceiver
- Inherits:
-
Object
- Object
- PDF::Reader::PageTextReceiver
- Extended by:
- Forwardable
- Defined in:
- lib/pdf/reader/page_text_receiver.rb
Overview
Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.
Constant Summary collapse
- SPACE =
: String
" "
Instance Attribute Summary collapse
-
#options ⇒ Object
readonly
: untyped.
-
#state ⇒ Object
readonly
: untyped.
Instance Method Summary collapse
-
#content ⇒ Object
deprecated.
-
#invoke_xobject(label) ⇒ Object
XObjects.
-
#move_to_next_line_and_show_text(str) ⇒ Object
‘.
-
#page=(page) ⇒ Object
starting a new page.
- #runs(opts = {}) ⇒ Object
-
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
“.
-
#show_text(string) ⇒ Object
Text Showing Operators.
-
#show_text_with_positioning(params) ⇒ Object
TJ [(A) 120 (WA) 20 (Y)].
Instance Attribute Details
#options ⇒ Object (readonly)
: untyped
23 24 25 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 23 def @options end |
#state ⇒ Object (readonly)
: untyped
20 21 22 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 20 def state @state end |
Instance Method Details
#content ⇒ Object
deprecated
87 88 89 90 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 87 def content mediabox = @page.rectangles[:MediaBox] PageLayout.new(runs, mediabox).to_s end |
#invoke_xobject(label) ⇒ Object
XObjects
126 127 128 129 130 131 132 133 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 126 def invoke_xobject(label) @state.invoke_xobject(label) do |xobj| case xobj when PDF::Reader::FormXObject then xobj.walk(self) end end end |
#move_to_next_line_and_show_text(str) ⇒ Object
‘
112 113 114 115 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 112 def move_to_next_line_and_show_text(str) # ' @state.move_to_start_of_next_line show_text(str) end |
#page=(page) ⇒ Object
starting a new page
47 48 49 50 51 52 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 47 def page=(page) @state = PageState.new(page) @page = page @content = [] @characters = [] end |
#runs(opts = {}) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 54 def runs(opts = {}) runs = @characters if rect = opts.fetch(:rect, @page.rectangles[:CropBox]) runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect) end if opts.fetch(:skip_zero_width, true) runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs) end if opts.fetch(:skip_overlapping, true) runs = OverlappingRunsFilter.exclude_redundant_runs(runs) end runs = NoTextFilter.exclude_empty_strings(runs) if opts.fetch(:merge, true) runs = merge_runs(runs) end if (only_filter = opts.fetch(:only, nil)) runs = AdvancedTextRunFilter.only(runs, only_filter) end if (exclude_filter = opts.fetch(:exclude, nil)) runs = AdvancedTextRunFilter.exclude(runs, exclude_filter) end runs end |
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
“
117 118 119 120 121 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 117 def set_spacing_next_line_show_text(aw, ac, string) # " @state.set_word_spacing(aw) @state.set_character_spacing(ac) move_to_next_line_and_show_text(string) end |
#show_text(string) ⇒ Object
Text Showing Operators
record text that is drawn on the page
96 97 98 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 96 def show_text(string) # Tj (AWAY) internal_show_text(string) end |
#show_text_with_positioning(params) ⇒ Object
TJ [(A) 120 (WA) 20 (Y)]
100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/pdf/reader/page_text_receiver.rb', line 100 def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)] params.each do |arg| if arg.is_a?(String) internal_show_text(arg) elsif arg.is_a?(Numeric) @state.process_glyph_displacement(0, arg, false) else # skip it end end end |