Class: Tabula::Table

Inherits:
Object
  • Object
show all
Defined in:
lib/tabula/entities/table.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(line_count, separators) ⇒ Table

Returns a new instance of Table.



5
6
7
8
9
# File 'lib/tabula/entities/table.rb', line 5

def initialize(line_count, separators)
  @separators = separators
  @lines = (0...line_count).inject([]) { |m| m << Line.new }
  @extraction_method = "original"
end

Instance Attribute Details

#extraction_methodObject (readonly)

Returns the value of attribute extraction_method.



3
4
5
# File 'lib/tabula/entities/table.rb', line 3

def extraction_method
  @extraction_method
end

#linesObject

Returns the value of attribute lines.



4
5
6
# File 'lib/tabula/entities/table.rb', line 4

def lines
  @lines
end

Class Method Details

.new_from_array(array_of_rows) ⇒ Object

create a new Table object from an array of arrays, representing a list of rows in a spreadsheet probably only used for testing



47
48
49
50
51
52
53
54
55
# File 'lib/tabula/entities/table.rb', line 47

def self.new_from_array(array_of_rows)
  t = Table.new(array_of_rows.size, [])
  @extraction_method = "testing"
  array_of_rows.each_with_index do |row, index|
    t.lines[index].text_elements = row.each_with_index.map{|cell, inner_index| TextElement.new(index, inner_index, 1, 1, nil, nil, cell, nil)}
  end
  t.rpad!
  t
end

Instance Method Details

#==(other) ⇒ Object

used for testing, ignores separator locations (they’ll sometimes be nil/empty)



78
79
80
81
82
83
84
85
86
# File 'lib/tabula/entities/table.rb', line 78

def ==(other)
  self.instance_variable_set(:@lines, self.lstrip_lines)
  other.instance_variable_set(:@lines, other.lstrip_lines)
  self.instance_variable_set(:@lines, self.lines.rpad(nil, other.lines.size))
  other.instance_variable_set(:@lines, other.lines.rpad(nil, self.lines.size))

  self.lines.zip(other.lines).all? { |my, yours| my == yours }

end

#add_text_element(text_element, i, j) ⇒ Object



11
12
13
14
15
16
17
18
19
20
# File 'lib/tabula/entities/table.rb', line 11

def add_text_element(text_element, i, j)
  if @lines.size <= i
    @lines[i] = Line.new
  end
  if @lines[i].text_elements[j]
    @lines[i].text_elements[j].merge!(text_element)
  else
    @lines[i].text_elements[j] = text_element
  end
end

#colsObject



32
33
34
# File 'lib/tabula/entities/table.rb', line 32

def cols
  rows.transpose
end

#lstrip_linesObject

for equality testing, return @lines stripped of leading columns of empty strings TODO: write a method to strip all totally-empty columns (or not?)



59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/tabula/entities/table.rb', line 59

def lstrip_lines
  return @lines if @lines.include?(nil)
  min_leading_empty_strings = Float::INFINITY
  @lines.each do |line|
    empties = line.text_elements.map{|t| t.nil? || t.text.empty? }
    min_leading_empty_strings = [min_leading_empty_strings, empties.index(false)].min
  end
  if min_leading_empty_strings == 0
    @lines
  else
    @lines.each{|line| line.text_elements = line.text_elements[min_leading_empty_strings..-1]}
    @lines
  end
end

#lstrip_lines!Object



73
74
75
# File 'lib/tabula/entities/table.rb', line 73

def lstrip_lines!
  @lines = self.lstrip_lines
end

#rowsObject



36
37
38
39
40
41
42
43
# File 'lib/tabula/entities/table.rb', line 36

def rows
  self.rpad!
  lines.map do |l|
    l.text_elements.map! do |te|
      te || TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
    end
  end.sort_by { |l| l.map { |te| te.top || 0 }.max }    
end

#rpad!Object



22
23
24
25
26
27
28
29
30
# File 'lib/tabula/entities/table.rb', line 22

def rpad!
  max = lines.map{|l| l.text_elements.size}.max
  lines.each do |line|
    needed = max - line.text_elements.size
    needed.times do 
      line.text_elements << TextElement.new(nil, nil, nil, nil, nil, nil, '', nil)
    end
  end
end

#to_csvObject



96
97
98
99
100
# File 'lib/tabula/entities/table.rb', line 96

def to_csv
  out = StringIO.new
  Tabula::Writers.CSV(rows, out)
  out.string
end

#to_json(*a) ⇒ Object



88
89
90
91
92
93
94
# File 'lib/tabula/entities/table.rb', line 88

def to_json(*a)
  {
    'json_class'   => self.class.name,
    'extraction_method' => @extraction_method,
    'data' => rows,
  }.to_json(*a)
end

#to_tsvObject



102
103
104
105
106
# File 'lib/tabula/entities/table.rb', line 102

def to_tsv
  out = StringIO.new
  Tabula::Writers.TSV(rows, out)
  out.string
end