Class: PDF::Reader::TextReceiver
- Inherits:
-
Object
- Object
- PDF::Reader::TextReceiver
- Defined in:
- lib/pdf/reader/text_receiver.rb
Overview
An example receiver class that processes all text found in a PDF file. All text that is found will be printed to the IO object specified in the constructor.
Usage:
receiver = PDF::Reader::TextReceiver.new($stdout)
PDF::Reader.file("somefile.pdf", receiver)
DEPRECATED: this class was deprecated in version 0.11.0 and will
eventually be removed
Instance Method Summary collapse
-
#begin_document(root) ⇒ Object
Called when the document parsing begins.
-
#begin_page(info) ⇒ Object
Called when new page parsing begins.
- #begin_page_container(page) ⇒ Object
-
#begin_text_object ⇒ Object
PDF operator BT.
- #calculate_line_and_location(new_loc) ⇒ Object
-
#end_document ⇒ Object
Called when the document parsing ends.
-
#end_page ⇒ Object
Called when page parsing ends.
- #end_page_container ⇒ Object
-
#end_text_object ⇒ Object
PDF operator ET.
-
#initialize(main_receiver) ⇒ TextReceiver
constructor
Initialize with the library user’s receiver.
- #media_box_check(dict) ⇒ Object
-
#move_text_position(tx, ty) ⇒ Object
PDF operator Td.
-
#move_text_position_and_set_leading(tx, ty) ⇒ Object
PDF operator TD.
-
#move_to_next_line_and_show_text(string) ⇒ Object
PDF operator ‘.
-
#move_to_start_of_next_line ⇒ Object
PDF operator T*.
-
#set_character_spacing(n) ⇒ Object
PDF operator Tc.
-
#set_horizontal_text_scaling(n) ⇒ Object
PDF operator Tz.
-
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
PDF operator “.
-
#set_text_leading(n) ⇒ Object
PDF operator TL.
-
#set_text_matrix_and_text_line_matrix(*args) ⇒ Object
PDF operator Tm.
-
#set_word_spacing(n) ⇒ Object
PDF operator Tw.
-
#show_text(string) ⇒ Object
PDF operator Tj.
-
#show_text_with_positioning(params) ⇒ Object
PDF operator TJ.
- #super_show_text(string) ⇒ Object
Constructor Details
#initialize(main_receiver) ⇒ TextReceiver
Initialize with the library user’s receiver
42 43 44 45 |
# File 'lib/pdf/reader/text_receiver.rb', line 42 def initialize (main_receiver) @main_receiver = main_receiver @upper_corners = [] end |
Instance Method Details
#begin_document(root) ⇒ Object
Called when the document parsing begins
48 49 50 |
# File 'lib/pdf/reader/text_receiver.rb', line 48 def begin_document (root) @upper_corners = [] end |
#begin_page(info) ⇒ Object
Called when new page parsing begins
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/pdf/reader/text_receiver.rb', line 66 def begin_page (info) @page = info @state = [{ :char_spacing => 0, :word_spacing => 0, :hori_scaling => 100, :leading => 0, :tj_adjustment => 0, }] @upper_corners.push(media_box_check(info)) @output = [] @line = 0 @location = 0 @displacement = {} @smallest_y_loc = @upper_corners.last[:ury] @written_to = false end |
#begin_page_container(page) ⇒ Object
57 58 59 |
# File 'lib/pdf/reader/text_receiver.rb', line 57 def begin_page_container (page) @upper_corners.push(media_box_check(page)) end |
#begin_text_object ⇒ Object
PDF operator BT
94 95 96 |
# File 'lib/pdf/reader/text_receiver.rb', line 94 def begin_text_object @state.push(@state.last.dup) end |
#calculate_line_and_location(new_loc) ⇒ Object
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/pdf/reader/text_receiver.rb', line 236 def calculate_line_and_location (new_loc) ##puts "calculate_line_and_location(#{new_loc})" key = new_loc; key.freeze #key = new_loc.to_s # because hashes with string keys are magic (auto-freeze) if @written_to unless @displacement.has_key?(key) if key < @location @displacement[key] = @line + 1 elsif key < @smallest_y_loc @displacement[key] = @line + 1 else key = @displacement.keys.find_all {|i| key > i}.sort.last @displacement[key] = 0 unless @displacement.has_key?(key) end end else @displacement[key] = 0 end @smallest_y_loc = key if key < @smallest_y_loc @location = key @line = @displacement[key] end |
#end_document ⇒ Object
Called when the document parsing ends
53 54 55 |
# File 'lib/pdf/reader/text_receiver.rb', line 53 def end_document @state.clear end |
#end_page ⇒ Object
Called when page parsing ends
88 89 90 91 |
# File 'lib/pdf/reader/text_receiver.rb', line 88 def end_page @main_receiver << @output.join("\n") @upper_corners.pop end |
#end_page_container ⇒ Object
61 62 63 |
# File 'lib/pdf/reader/text_receiver.rb', line 61 def end_page_container @upper_corners.pop end |
#end_text_object ⇒ Object
PDF operator ET
99 100 101 |
# File 'lib/pdf/reader/text_receiver.rb', line 99 def end_text_object @state.pop end |
#media_box_check(dict) ⇒ Object
224 225 226 227 228 229 230 231 232 233 234 |
# File 'lib/pdf/reader/text_receiver.rb', line 224 def media_box_check (dict) corners = (@upper_corners.last || {:urx => 0, :ury => 0}).dup if dict.has_key?(:MediaBox) media_box = dict[:MediaBox] corners[:urx] = media_box[2] - media_box[0] corners[:ury] = media_box[3] - media_box[1] end corners end |
#move_text_position(tx, ty) ⇒ Object
PDF operator Td
136 137 138 139 |
# File 'lib/pdf/reader/text_receiver.rb', line 136 def move_text_position (tx, ty) #puts "#{tx} #{ty} Td" calculate_line_and_location(@location + ty) end |
#move_text_position_and_set_leading(tx, ty) ⇒ Object
PDF operator TD
142 143 144 145 |
# File 'lib/pdf/reader/text_receiver.rb', line 142 def move_text_position_and_set_leading (tx, ty) set_text_leading(ty)# * -1) move_text_position(tx, ty) end |
#move_to_next_line_and_show_text(string) ⇒ Object
PDF operator ‘
212 213 214 215 |
# File 'lib/pdf/reader/text_receiver.rb', line 212 def move_to_next_line_and_show_text (string) move_to_start_of_next_line show_text(string) end |
#move_to_start_of_next_line ⇒ Object
PDF operator T*
131 132 133 |
# File 'lib/pdf/reader/text_receiver.rb', line 131 def move_to_start_of_next_line move_text_position(0, @state.last[:leading]) end |
#set_character_spacing(n) ⇒ Object
PDF operator Tc
111 112 113 |
# File 'lib/pdf/reader/text_receiver.rb', line 111 def set_character_spacing (n) @state.last[:char_spacing] = n end |
#set_horizontal_text_scaling(n) ⇒ Object
PDF operator Tz
121 122 123 |
# File 'lib/pdf/reader/text_receiver.rb', line 121 def set_horizontal_text_scaling (n) @state.last[:hori_scaling] = n/100 end |
#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object
PDF operator “
218 219 220 221 222 |
# File 'lib/pdf/reader/text_receiver.rb', line 218 def set_spacing_next_line_show_text (aw, ac, string) set_word_spacing(aw) set_character_spacing(ac) move_to_next_line_and_show_text(string) end |
#set_text_leading(n) ⇒ Object
PDF operator TL
126 127 128 |
# File 'lib/pdf/reader/text_receiver.rb', line 126 def set_text_leading (n) @state.last[:leading] = n end |
#set_text_matrix_and_text_line_matrix(*args) ⇒ Object
PDF operator Tm
104 105 106 107 108 |
# File 'lib/pdf/reader/text_receiver.rb', line 104 def set_text_matrix_and_text_line_matrix (*args) # these variable names look bad, but they're from the PDF spec a, b, c, d, e, f = *args calculate_line_and_location(f) end |
#set_word_spacing(n) ⇒ Object
PDF operator Tw
116 117 118 |
# File 'lib/pdf/reader/text_receiver.rb', line 116 def set_word_spacing (n) @state.last[:word_spacing] = n end |
#show_text(string) ⇒ Object
PDF operator Tj
148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/pdf/reader/text_receiver.rb', line 148 def show_text (string) #puts "getting line #@line" place = (@output[@line] ||= "") #place << " " unless place.empty? place << " " * (@state.last[:tj_adjustment].abs/900) if @state.last[:tj_adjustment] < -1000 place << string #puts "place is now: #{place}" @written_to = true end |
#show_text_with_positioning(params) ⇒ Object
PDF operator TJ
196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/pdf/reader/text_receiver.rb', line 196 def show_text_with_positioning (params) prev_adjustment = @state.last[:tj_adjustment] params.each do |p| case p when Float, Fixnum @state.last[:tj_adjustment] = p else show_text(p) end end @state.last[:tj_adjustment] = prev_adjustment end |
#super_show_text(string) ⇒ Object
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/pdf/reader/text_receiver.rb', line 160 def super_show_text (string) urx = @upper_corners.last[:urx]/TS_UNITS_PER_H_CHAR ury = @upper_corners.last[:ury]/TS_UNITS_PER_V_CHAR x = (@tm[2,0]/TS_UNITS_PER_H_CHAR).to_i y = (ury - (@tm[2,1]/TS_UNITS_PER_V_CHAR)).to_i #puts "rendering '#{string}' to #{x}x#{y}" place = (@output[y] ||= (" " * urx.to_i)) #puts "#{urx} #{place.size} #{string.size} #{x}" return if x+string.size >= urx string.split(//).each do |c| chars = 1 case c when " " chars += @state.last[:word_spacing].to_i place[x-1, chars] = (" " * chars) else chars += @state.last[:char_spacing].to_i chars -= (@state.last[:tj_adjustment]/1000).to_i if @state.last[:tj_adjustment] chars = 1 if chars < 1 place[x-1] = c place[x, chars-1] = (" " * (chars-1)) if chars > 1 end x += chars end @tm += Matrix.rows([[1, 0, 0], [0, 1, 0], [x*TS_UNITS_PER_H_CHAR, y*TS_UNITS_PER_V_CHAR, 1]]) end |