Class: CSVJoin::Comparator
- Inherits:
-
Object
- Object
- CSVJoin::Comparator
- Defined in:
- lib/comparator.rb
Overview
Compare and join two tables
Instance Attribute Summary collapse
-
#columns ⇒ Object
Returns the value of attribute columns.
-
#data ⇒ Object
Returns the value of attribute data.
-
#headers ⇒ Object
Returns the value of attribute headers.
-
#input_col_sep ⇒ Object
Returns the value of attribute input_col_sep.
-
#rows ⇒ Object
Returns the value of attribute rows.
-
#weights ⇒ Object
Returns the value of attribute weights.
Instance Method Summary collapse
- #action_verbose(action) ⇒ Object
- #columns_to_compare(cols) ⇒ Object
- #compare(source1, source2) ⇒ Object
- #csv_to_talimer_rows(csv, side: 'undef') ⇒ Object
-
#initialize ⇒ Comparator
constructor
A new instance of Comparator.
- #intuit_col_sep(line) ⇒ Object
- #intuit_separator(file) ⇒ Object
- #joined_row(change) ⇒ Object
- #parse(data) ⇒ Object
- #parse_side(source, side: nil) ⇒ Object
- #prepare(source1, source2) ⇒ Object
- #prepare_rows(side: nil) ⇒ Object
-
#set_default_column_names ⇒ Object
by default use columns with same names in both tables.
Constructor Details
#initialize ⇒ Comparator
Returns a new instance of Comparator.
18 19 20 21 22 23 |
# File 'lib/comparator.rb', line 18 def initialize @data = [] @rows = [] @empty = [] @input_col_sep = "," end |
Instance Attribute Details
#columns ⇒ Object
Returns the value of attribute columns.
14 15 16 |
# File 'lib/comparator.rb', line 14 def columns @columns end |
#data ⇒ Object
Returns the value of attribute data.
15 16 17 |
# File 'lib/comparator.rb', line 15 def data @data end |
#headers ⇒ Object
Returns the value of attribute headers.
15 16 17 |
# File 'lib/comparator.rb', line 15 def headers @headers end |
#input_col_sep ⇒ Object
Returns the value of attribute input_col_sep.
16 17 18 |
# File 'lib/comparator.rb', line 16 def input_col_sep @input_col_sep end |
#rows ⇒ Object
Returns the value of attribute rows.
15 16 17 |
# File 'lib/comparator.rb', line 15 def rows @rows end |
#weights ⇒ Object
Returns the value of attribute weights.
14 15 16 |
# File 'lib/comparator.rb', line 14 def weights @weights end |
Instance Method Details
#action_verbose(action) ⇒ Object
95 96 97 98 99 100 |
# File 'lib/comparator.rb', line 95 def action_verbose(action) repl = { "!": "!==", "-": "==>", "+": "<==", "=": "===" } raise "wrong action #{action}" unless repl.has_key? action.to_sym return repl[action.to_sym] end |
#columns_to_compare(cols) ⇒ Object
124 125 126 127 128 129 130 131 132 |
# File 'lib/comparator.rb', line 124 def columns_to_compare(cols) @columns = [] cols.scan(/([^,:=~]+)(?:[=~])([^,:=~]+)/).each do |from, to| @columns << [from, to] end @weights = [1, *[0] * (@columns.size - 1)] end |
#compare(source1, source2) ⇒ Object
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/comparator.rb', line 102 def compare(source1, source2) prepare(source1, source2) sdiff = Diff::LCS.sdiff(@rows[LEFT], @rows[RIGHT], Diff::LCS::NoReplaceDiffCallbacks) CSV.generate(row_sep: "\n", col_sep: @input_col_sep) do |csv| csv << @headers sdiff.each do |change| row = joined_row(change) csv << row end end end |
#csv_to_talimer_rows(csv, side: 'undef') ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/comparator.rb', line 50 def csv_to_talimer_rows(csv, side: 'undef') list = [] row_columns = columns.map { |c| side.eql?(LEFT) ? c.first : c.last } csv.each do |row| row2 = DataRow.new(row.headers, row.fields) row2.columns = row_columns row2.weights = weights row2.side = side list << row2 end list end |
#intuit_col_sep(line) ⇒ Object
25 26 27 28 29 |
# File 'lib/comparator.rb', line 25 def intuit_col_sep(line) return "," if line.nil? [",", ";", "\t"].max_by { |char| line.count(char) } end |
#intuit_separator(file) ⇒ Object
31 32 33 34 35 36 37 |
# File 'lib/comparator.rb', line 31 def intuit_separator(file) File.open(file, encoding: 'bom|utf-8').each do |line| @input_col_sep = intuit_col_sep(line) break end file end |
#joined_row(change) ⇒ Object
118 119 120 121 122 |
# File 'lib/comparator.rb', line 118 def joined_row(change) left_row = change.old_element.nil? ? @empty[LEFT] : change.old_element.fields right_row = change.new_element.nil? ? @empty[RIGHT] : change.new_element.fields [*left_row, action_verbose(change.action), *right_row] end |
#parse(data) ⇒ Object
39 40 41 42 43 44 45 46 47 48 |
# File 'lib/comparator.rb', line 39 def parse(data) if File.exist? data intuit_separator(data) csv = CSV.read(data, headers: true, col_sep: @input_col_sep) raise "Wrong CSV" if csv == [] else csv = CSV.parse(data, headers: true, col_sep: @input_col_sep) end csv end |
#parse_side(source, side: nil) ⇒ Object
66 67 68 69 |
# File 'lib/comparator.rb', line 66 def parse_side(source, side: nil) @data[side] = parse(source) @empty[side] = [*[''] * @data[side].headers.size] end |
#prepare(source1, source2) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/comparator.rb', line 75 def prepare(source1, source2) parse_side(source1, side: LEFT) parse_side(source2, side: RIGHT) set_default_column_names prepare_rows(side: LEFT) prepare_rows(side: RIGHT) @headers = [*@data[LEFT].headers, "diff", *@data[RIGHT].headers] end |
#prepare_rows(side: nil) ⇒ Object
71 72 73 |
# File 'lib/comparator.rb', line 71 def prepare_rows(side: nil) @rows[side] = csv_to_talimer_rows(@data[side], side: side) end |
#set_default_column_names ⇒ Object
by default use columns with same names in both tables
88 89 90 91 92 93 |
# File 'lib/comparator.rb', line 88 def set_default_column_names return unless @columns.nil? @columns = (@data[LEFT].headers & @data[RIGHT].headers).map { |a| [a, a] } @weights = [*[1] * @columns.size] end |