Class: Tabula::Line
- Inherits:
-
ZoneEntity
- Object
- Tabula.javajava.awtjava.awt.geomjava.awt.geom.Rectangle2Djava.awt.geom.Rectangle2D::Float
- ZoneEntity
- Tabula::Line
- Defined in:
- lib/tabula/entities/line.rb
Constant Summary collapse
- SPACE_RUN_MAX_LENGTH =
3
Instance Attribute Summary collapse
-
#index ⇒ Object
readonly
Returns the value of attribute index.
-
#text_elements ⇒ Object
Returns the value of attribute text_elements.
Attributes inherited from ZoneEntity
Instance Method Summary collapse
- #<<(t) ⇒ Object
-
#==(other) ⇒ Object
used for testing, ignores text element stuff besides stripped text.
-
#initialize(index = nil) ⇒ Line
constructor
A new instance of Line.
-
#remove_sequential_spaces!(seq_spaces_count = SPACE_RUN_MAX_LENGTH) ⇒ Object
remove runs of the space char longer than SPACE_RUN_MAX_LENGTH should not change dimensions of the container
Line
.
Methods inherited from ZoneEntity
#<=>, #inspect, #merge!, #points, #tlbr, #tlwh, #to_json
Constructor Details
#initialize(index = nil) ⇒ Line
Returns a new instance of Line.
8 9 10 11 |
# File 'lib/tabula/entities/line.rb', line 8 def initialize(index=nil) @text_elements = [] @index = index end |
Instance Attribute Details
#index ⇒ Object (readonly)
Returns the value of attribute index.
4 5 6 |
# File 'lib/tabula/entities/line.rb', line 4 def index @index end |
#text_elements ⇒ Object
Returns the value of attribute text_elements.
3 4 5 |
# File 'lib/tabula/entities/line.rb', line 3 def text_elements @text_elements end |
Instance Method Details
#<<(t) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/tabula/entities/line.rb', line 13 def <<(t) if @text_elements.size == 0 @text_elements << t self.top = t.top self.left = t.left self.width = t.width self.height = t.height else self.text_elements << t self.merge!(t) end end |
#==(other) ⇒ Object
used for testing, ignores text element stuff besides stripped text.
75 76 77 78 79 80 81 82 83 |
# File 'lib/tabula/entities/line.rb', line 75 def ==(other) return false if other.nil? self.text_elements = self.text_elements.rpad(TextElement::EMPTY, other.text_elements.size) other.text_elements = other.text_elements.rpad(TextElement::EMPTY, self.text_elements.size) self.text_elements.zip(other.text_elements).inject(true) do |memo, my_yours| my, yours = my_yours memo && my == yours end end |
#remove_sequential_spaces!(seq_spaces_count = SPACE_RUN_MAX_LENGTH) ⇒ Object
remove runs of the space char longer than SPACE_RUN_MAX_LENGTH should not change dimensions of the container Line
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/tabula/entities/line.rb', line 29 def remove_sequential_spaces!(seq_spaces_count=SPACE_RUN_MAX_LENGTH) self.text_elements = self.text_elements.reduce([]) do |memo, text_chunk| long_space_runs = text_chunk .text_elements .chunk { |te| te.text == ' '} # detect runs of spaces... .select { |is_space, text_elements| # ...longer than SPACE_RUN_MAX_LENGTH is_space && !text_elements.nil? && text_elements.size >= SPACE_RUN_MAX_LENGTH } .map { |_, text_elements| text_elements } # no long runs of spaces # keep as it was and end iteration if long_space_runs.empty? memo << text_chunk next memo end ranges = long_space_runs.map { |lsr| idx = text_chunk .text_elements .index { |te| te.equal?(lsr.first) } # we need pointer comparison here (idx)..(idx+lsr.size-1) } in_run = false new_chunk = true text_chunk .text_elements .each_with_index do |te, i| if ranges.any? { |r| r.include?(i) } # te belongs to a run of spaces, skip in_run = true else if in_run || new_chunk memo << TextChunk.create_from_text_element(te) else memo.last << te end in_run = new_chunk = false end end memo end # reduce self end |