Class: PDF::Reader::PageTextReceiver

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/pdf/reader/page_text_receiver.rb

Overview

Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.

Constant Summary collapse

SPACE =

: String

" "

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#optionsObject (readonly)

: untyped



23
24
25
# File 'lib/pdf/reader/page_text_receiver.rb', line 23

def options
  @options
end

#stateObject (readonly)

: untyped



20
21
22
# File 'lib/pdf/reader/page_text_receiver.rb', line 20

def state
  @state
end

Instance Method Details

#contentObject

deprecated



87
88
89
90
# File 'lib/pdf/reader/page_text_receiver.rb', line 87

def content
  mediabox = @page.rectangles[:MediaBox]
  PageLayout.new(runs, mediabox).to_s
end

#invoke_xobject(label) ⇒ Object

XObjects



126
127
128
129
130
131
132
133
# File 'lib/pdf/reader/page_text_receiver.rb', line 126

def invoke_xobject(label)
  @state.invoke_xobject(label) do |xobj|
    case xobj
    when PDF::Reader::FormXObject then
      xobj.walk(self)
    end
  end
end

#move_to_next_line_and_show_text(str) ⇒ Object



112
113
114
115
# File 'lib/pdf/reader/page_text_receiver.rb', line 112

def move_to_next_line_and_show_text(str) # '
  @state.move_to_start_of_next_line
  show_text(str)
end

#page=(page) ⇒ Object

starting a new page



47
48
49
50
51
52
# File 'lib/pdf/reader/page_text_receiver.rb', line 47

def page=(page)
  @state = PageState.new(page)
  @page = page
  @content = []
  @characters = []
end

#runs(opts = {}) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/pdf/reader/page_text_receiver.rb', line 54

def runs(opts = {})
  runs = @characters

  if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
    runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
  end

  if opts.fetch(:skip_zero_width, true)
    runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
  end

  if opts.fetch(:skip_overlapping, true)
    runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
  end

  runs = NoTextFilter.exclude_empty_strings(runs)

  if opts.fetch(:merge, true)
    runs = merge_runs(runs)
  end

  if (only_filter = opts.fetch(:only, nil))
    runs = AdvancedTextRunFilter.only(runs, only_filter)
  end

  if (exclude_filter = opts.fetch(:exclude, nil))
    runs = AdvancedTextRunFilter.exclude(runs, exclude_filter)
  end

  runs
end

#set_spacing_next_line_show_text(aw, ac, string) ⇒ Object



117
118
119
120
121
# File 'lib/pdf/reader/page_text_receiver.rb', line 117

def set_spacing_next_line_show_text(aw, ac, string) # "
  @state.set_word_spacing(aw)
  @state.set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end

#show_text(string) ⇒ Object

Text Showing Operators

record text that is drawn on the page



96
97
98
# File 'lib/pdf/reader/page_text_receiver.rb', line 96

def show_text(string) # Tj (AWAY)
  internal_show_text(string)
end

#show_text_with_positioning(params) ⇒ Object

TJ [(A) 120 (WA) 20 (Y)]



100
101
102
103
104
105
106
107
108
109
110
# File 'lib/pdf/reader/page_text_receiver.rb', line 100

def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  params.each do |arg|
    if arg.is_a?(String)
      internal_show_text(arg)
    elsif arg.is_a?(Numeric)
      @state.process_glyph_displacement(0, arg, false)
    else
      # skip it
    end
  end
end