Class: TestTableExtractor

Inherits:
Minitest::Test
  • Object
show all
Defined in:
lib/table_extractor.rb

Constant Summary collapse

@@regexp =
/^[ \t]*\|? *(?::?-+:?) *( *\| *(?::?-+:?) *)*\|? *$/

Instance Method Summary collapse

Instance Method Details

#test_inconsistent_columnsObject



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/table_extractor.rb', line 120

def test_inconsistent_columns
  lines = [
    '| Species| Genus| Family',
    '|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae',
    '',
    '| Name| Species',
    '|-|-|-',
    '| Tapanuli Orangutan| Pongo tapanuliensis'
  ]
  # number of columns determined from row of dividers
  expected = [{ rows: 4, columns: 2, start_index: 0 },
              { rows: 3, columns: 3, start_index: 5 }]
  assert_equal expected,
               TableExtractor.extract_tables(lines, regexp: @@regexp)
end

#test_indented_tableObject



79
80
81
82
83
84
85
86
87
88
89
# File 'lib/table_extractor.rb', line 79

def test_indented_table
  lines = [
    "\t | Species| Genus| Family",
    "\t |-|-|-",
    "\t | Pongo tapanuliensis| Pongo| Hominidae",
    "\t | | Histiophryne| Antennariidae"
  ]
  expected = [{ rows: 4, columns: 3, start_index: 0 }]
  assert_equal expected,
               TableExtractor.extract_tables(lines, regexp: @@regexp)
end

#test_multiple_tablesObject



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/table_extractor.rb', line 91

def test_multiple_tables
  lines = [
    '| Species| Genus| Family',
    '|-|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae',
    '',
    '| Name| Species',
    '|-|-',
    '| Tapanuli Orangutan| Pongo tapanuliensis'
  ]
  expected = [
    { rows: 4, columns: 3, start_index: 0 },
    { rows: 3, columns: 2, start_index: 5 }
  ]
  assert_equal expected,
               TableExtractor.extract_tables(lines, regexp: @@regexp)
end

#test_no_tablesObject



110
111
112
113
114
115
116
117
118
# File 'lib/table_extractor.rb', line 110

def test_no_tables
  lines = [
    'This is a regular line.',
    'Another regular line.'
  ]
  expected = []
  assert_equal expected,
               TableExtractor.extract_tables(lines, regexp: @@regexp)
end

#test_single_tableObject



67
68
69
70
71
72
73
74
75
76
77
# File 'lib/table_extractor.rb', line 67

def test_single_table
  lines = [
    '| Species| Genus| Family',
    '|-|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae'
  ]
  expected = [{ rows: 4, columns: 3, start_index: 0 }]
  assert_equal expected,
               TableExtractor.extract_tables(lines, regexp: @@regexp)
end

#test_table_at_end_of_linesObject



138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/table_extractor.rb', line 138

def test_table_at_end_of_lines
  lines = [
    'Some introductory text.',
    '| Species| Genus| Family',
    '|-|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae'
  ]
  expected = [{ rows: 4, columns: 3, start_index: 1 }]
  assert_equal expected,
               TableExtractor.extract_tables(lines, regexp: @@regexp)
end

#test_table_with_colon_hyphensObject



164
165
166
167
168
169
170
171
172
173
174
# File 'lib/table_extractor.rb', line 164

def test_table_with_colon_hyphens
  lines = [
    '| Name| Age| City',
    '|:-:|:-|:-:',
    '| John Doe| 30| New York',
    '| Jane Doe| 25| Los Angeles'
  ]
  expected = [{ rows: 4, columns: 3, start_index: 0 }]
  assert_equal expected,
               TableExtractor.extract_tables(lines, regexp: @@regexp)
end

#test_table_without_starting_pipeObject



151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/table_extractor.rb', line 151

def test_table_without_starting_pipe
  lines = [
    'Some introductory text.',
    'Platform| Target Environment| Command',
    '|-|-|-',
    '| Pongo tapanuliensis| Pongo| Hominidae',
    '| | Histiophryne| Antennariidae'
  ]
  expected = [{ rows: 4, columns: 3, start_index: 1 }]
  assert_equal expected,
               TableExtractor.extract_tables(lines, regexp: @@regexp)
end