Class: TableExtractor
Class Method Summary collapse
-
.extract_tables(lines, regexp:) ⇒ Array<Hash>
Extract tables from an array of text lines formatted in Markdown style.
Class Method Details
.extract_tables(lines, regexp:) ⇒ Array<Hash>
Extract tables from an array of text lines formatted in Markdown style
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/table_extractor.rb', line 8 def self.extract_tables(lines, regexp:) tables = [] inside_table = false table_start = nil row_count = 0 column_count = 0 lines.each_with_index do |line, index| # Match line separators with at least 2 columns if line.strip.match?(regexp) if inside_table # Add the current table before starting a new one tables << { rows: row_count, columns: column_count, start_index: table_start } end # Start a new table table_start = index - 1 if table_start.nil? column_count = line.split('|').count - 1 row_count = 2 # Reset to 2 to account for the header and separator rows inside_table = true elsif inside_table && (line.strip.start_with?('|') || line.include?('|')) row_count += 1 elsif inside_table # Add the current table and reset the state tables << { rows: row_count, columns: column_count, start_index: table_start } inside_table = false table_start = nil row_count = 0 column_count = 0 end end # Handle case where table ends at the last line if inside_table tables << { rows: row_count, columns: column_count, start_index: table_start } end tables end |