Class: Tabula::Spreadsheet
- Inherits:
-
ZoneEntity
- Object
- Tabula.javajava.awtjava.awt.geomjava.awt.geom.Rectangle2Djava.awt.geom.Rectangle2D::Float
- ZoneEntity
- Tabula::Spreadsheet
- Includes:
- HasCells
- Defined in:
- lib/tabula/entities/spreadsheet.rb
Overview
the both should implement ‘cells`, `rows`, `cols`, `extraction_method`
Constant Summary
Constants included from HasCells
HasCells::ANOTHER_MAGIC_NUMBER
Instance Attribute Summary collapse
-
#cells ⇒ Object
Returns the value of attribute cells.
-
#cells_resolved ⇒ Object
Returns the value of attribute cells_resolved.
-
#extraction_method ⇒ Object
readonly
Returns the value of attribute extraction_method.
-
#horizontal_ruling_lines ⇒ Object
Returns the value of attribute horizontal_ruling_lines.
-
#page ⇒ Object
readonly
Returns the value of attribute page.
-
#vertical_ruling_lines ⇒ Object
Returns the value of attribute vertical_ruling_lines.
Attributes inherited from ZoneEntity
Instance Method Summary collapse
- #+(other) ⇒ Object
-
#cols(evaluate_cells = true) ⇒ Object
call ‘cols` with `evaluate_cells` as `false` to defer filling in the text in each cell, which can be computationally intensive.
- #fill_in_cells! ⇒ Object
-
#initialize(top, left, width, height, page, cells, vertical_ruling_lines, horizontal_ruling_lines) ⇒ Spreadsheet
constructor
, lines).
-
#rows(evaluate_cells = true) ⇒ Object
call ‘rows` with `evaluate_cells` as `false` to defer filling in the text in each cell, which can be computationally intensive.
- #ruling_lines ⇒ Object
- #ruling_lines=(lines) ⇒ Object
- #to_a ⇒ Object
- #to_csv ⇒ Object
- #to_json(*a) ⇒ Object
- #to_tsv ⇒ Object
Methods included from HasCells
#add_spanning_cells!, #find_cells!, #find_spreadsheets_from_cells, #is_tabular?
Methods inherited from ZoneEntity
#<=>, #inspect, #merge!, #points, #tlbr
Constructor Details
#initialize(top, left, width, height, page, cells, vertical_ruling_lines, horizontal_ruling_lines) ⇒ Spreadsheet
, lines)
12 13 14 15 16 17 18 19 |
# File 'lib/tabula/entities/spreadsheet.rb', line 12 def initialize(top, left, width, height, page, cells, vertical_ruling_lines, horizontal_ruling_lines) #, lines) super(top, left, width, height) @cells = cells @page = page @vertical_ruling_lines = vertical_ruling_lines @horizontal_ruling_lines = horizontal_ruling_lines @extraction_method = "spreadsheet" end |
Instance Attribute Details
#cells ⇒ Object
Returns the value of attribute cells.
9 10 11 |
# File 'lib/tabula/entities/spreadsheet.rb', line 9 def cells @cells end |
#cells_resolved ⇒ Object
Returns the value of attribute cells_resolved.
9 10 11 |
# File 'lib/tabula/entities/spreadsheet.rb', line 9 def cells_resolved @cells_resolved end |
#extraction_method ⇒ Object (readonly)
Returns the value of attribute extraction_method.
10 11 12 |
# File 'lib/tabula/entities/spreadsheet.rb', line 10 def extraction_method @extraction_method end |
#horizontal_ruling_lines ⇒ Object
Returns the value of attribute horizontal_ruling_lines.
9 10 11 |
# File 'lib/tabula/entities/spreadsheet.rb', line 9 def horizontal_ruling_lines @horizontal_ruling_lines end |
#page ⇒ Object (readonly)
Returns the value of attribute page.
10 11 12 |
# File 'lib/tabula/entities/spreadsheet.rb', line 10 def page @page end |
#vertical_ruling_lines ⇒ Object
Returns the value of attribute vertical_ruling_lines.
9 10 11 |
# File 'lib/tabula/entities/spreadsheet.rb', line 9 def vertical_ruling_lines @vertical_ruling_lines end |
Instance Method Details
#+(other) ⇒ Object
105 106 107 108 |
# File 'lib/tabula/entities/spreadsheet.rb', line 105 def +(other) raise ArgumentError unless other.page == @page Spreadsheet.new(nil, nil, nil, nil, @page, @cells + other.cells, nil, nil ) end |
#cols(evaluate_cells = true) ⇒ Object
call ‘cols` with `evaluate_cells` as `false` to defer filling in the text in each cell, which can be computationally intensive.
70 71 72 73 74 75 76 77 78 |
# File 'lib/tabula/entities/spreadsheet.rb', line 70 def cols(evaluate_cells=true) if evaluate_cells fill_in_cells! end lefts = cells.map(&:left).uniq.sort lefts.map do |left| cells.select{|c| c.left == left }.sort_by(&:top) end end |
#fill_in_cells! ⇒ Object
30 31 32 33 34 35 36 37 |
# File 'lib/tabula/entities/spreadsheet.rb', line 30 def fill_in_cells! unless @cells_resolved @cells_resolved = true cells.each do |cell| cell.text_elements = @page.get_cell_text(cell) end end end |
#rows(evaluate_cells = true) ⇒ Object
call ‘rows` with `evaluate_cells` as `false` to defer filling in the text in each cell, which can be computationally intensive.
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/tabula/entities/spreadsheet.rb', line 41 def rows(evaluate_cells=true) if evaluate_cells fill_in_cells! end tops = cells.map(&:top).uniq.sort array_of_rows = tops.map do |top| cells.select{|c| c.top == top }.sort_by(&:left) end #here, insert another kind of placeholder for empty corners # like in 01001523B_China.pdf #TODO: support placeholders for "empty" cells in rows other than row 1, and in #cols # puts array_of_rows[0].inspect if array_of_rows.size > 2 if array_of_rows[0].map(&:left).uniq.size < array_of_rows[1].map(&:left).uniq.size missing_spots = array_of_rows[1].map(&:left) - array_of_rows[0].map(&:left) # puts missing_spots.inspect missing_spots.each do |missing_spot| missing_spot_placeholder = Cell.new(array_of_rows[0][0].top, missing_spot, 0, 0) missing_spot_placeholder.placeholder = true array_of_rows[0] << missing_spot_placeholder end end array_of_rows[0].sort_by!(&:left) end array_of_rows end |
#ruling_lines ⇒ Object
21 22 23 |
# File 'lib/tabula/entities/spreadsheet.rb', line 21 def ruling_lines @vertical_ruling_lines + @horizontal_ruling_lines end |
#ruling_lines=(lines) ⇒ Object
25 26 27 28 |
# File 'lib/tabula/entities/spreadsheet.rb', line 25 def ruling_lines=(lines) @vertical_ruling_lines = lines.select{|vl| vl.vertical? && spr.intersectsLine(vl) } @horizontal_ruling_lines = lines.select{|hl| hl.horizontal? && spr.intersectsLine(hl) } end |
#to_a ⇒ Object
80 81 82 83 |
# File 'lib/tabula/entities/spreadsheet.rb', line 80 def to_a fill_in_cells! rows.map{ |row_cells| row_cells.map(&:text) } end |
#to_csv ⇒ Object
85 86 87 88 89 |
# File 'lib/tabula/entities/spreadsheet.rb', line 85 def to_csv out = StringIO.new Tabula::Writers.CSV(rows, out) out.string end |
#to_json(*a) ⇒ Object
97 98 99 100 101 102 103 |
# File 'lib/tabula/entities/spreadsheet.rb', line 97 def to_json(*a) { 'json_class' => self.class.name, 'extraction_method' => @extraction_method, 'data' => rows, }.to_json(*a) end |