Method: PDF::Reader::Turtletext#text_in_region

Defined in:
lib/pdf/reader/turtletext.rb

#text_in_region(xmin, xmax, ymin, ymax, page = 1) ⇒ Object

Returns an array of text elements found within the x,y limits, x ranges from xmin (left of page) to xmax (right of page) y ranges from ymin (bottom of page) to ymax (top of page) Each line of text found is returned as an array element. Each line of text is an array of the seperate text elements found on that line.

[["first line first text", "first line last text"],["second line text"]]


85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/pdf/reader/turtletext.rb', line 85

def text_in_region(xmin,xmax,ymin,ymax,page=1)
  text_map = content(page)
  box = []

  text_map.each do |y,text_row|
    if y >= ymin && y<= ymax
      row = []
      text_row.each do |x,element|
        if x >= xmin && x<= xmax
          row << [x,element]
        end
      end
      box << row.sort{|a,b| a.first <=> b.first }.map(&:last) unless row.empty?
    end
  end
  box
end