Class: CSVJoin::Comparator

Inherits:
Object
  • Object
show all
Defined in:
lib/comparator.rb

Overview

Compare and join two tables

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeComparator

Returns a new instance of Comparator.



18
19
20
21
22
23
# File 'lib/comparator.rb', line 18

def initialize
  @data = []
  @rows = []
  @empty = []
  @input_col_sep = ","
end

Instance Attribute Details

#columnsObject

Returns the value of attribute columns.



14
15
16
# File 'lib/comparator.rb', line 14

def columns
  @columns
end

#dataObject

Returns the value of attribute data.



15
16
17
# File 'lib/comparator.rb', line 15

def data
  @data
end

#headersObject

Returns the value of attribute headers.



15
16
17
# File 'lib/comparator.rb', line 15

def headers
  @headers
end

#input_col_sepObject

Returns the value of attribute input_col_sep.



16
17
18
# File 'lib/comparator.rb', line 16

def input_col_sep
  @input_col_sep
end

#rowsObject

Returns the value of attribute rows.



15
16
17
# File 'lib/comparator.rb', line 15

def rows
  @rows
end

#weightsObject

Returns the value of attribute weights.



14
15
16
# File 'lib/comparator.rb', line 14

def weights
  @weights
end

Instance Method Details

#action_verbose(action) ⇒ Object



95
96
97
98
99
100
# File 'lib/comparator.rb', line 95

def action_verbose(action)
  repl = { "!": "!==", "-": "==>", "+": "<==", "=": "===" }
  raise "wrong action #{action}" unless repl.has_key? action.to_sym

  return repl[action.to_sym]
end

#columns_to_compare(cols) ⇒ Object



124
125
126
127
128
129
130
131
132
# File 'lib/comparator.rb', line 124

def columns_to_compare(cols)
  @columns = []

  cols.scan(/([^,:=~]+)(?:[=~])([^,:=~]+)/).each do |from, to|
    @columns << [from, to]
  end

  @weights = [1, *[0] * (@columns.size - 1)]
end

#compare(source1, source2) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/comparator.rb', line 102

def compare(source1, source2)
  prepare(source1, source2)

  sdiff = Diff::LCS.sdiff(@rows[LEFT],
                          @rows[RIGHT],
                          Diff::LCS::NoReplaceDiffCallbacks)

  CSV.generate(row_sep: "\n", col_sep: @input_col_sep) do |csv|
    csv << @headers
    sdiff.each do |change|
      row = joined_row(change)
      csv << row
    end
  end
end

#csv_to_talimer_rows(csv, side: 'undef') ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/comparator.rb', line 50

def csv_to_talimer_rows(csv, side: 'undef')
  list = []
  row_columns = columns.map { |c| side.eql?(LEFT) ? c.first : c.last }

  csv.each do |row|
    row2 = DataRow.new(row.headers, row.fields)
    row2.columns = row_columns
    row2.weights = weights
    row2.side = side

    list << row2
  end

  list
end

#intuit_col_sep(line) ⇒ Object



25
26
27
28
29
# File 'lib/comparator.rb', line 25

def intuit_col_sep(line)
  return "," if line.nil?

  [",", ";", "\t"].max_by { |char| line.count(char) }
end

#intuit_separator(file) ⇒ Object



31
32
33
34
35
36
37
# File 'lib/comparator.rb', line 31

def intuit_separator(file)
  File.open(file, encoding: 'bom|utf-8').each do |line|
    @input_col_sep = intuit_col_sep(line)
    break
  end
  file
end

#joined_row(change) ⇒ Object



118
119
120
121
122
# File 'lib/comparator.rb', line 118

def joined_row(change)
  left_row = change.old_element.nil? ? @empty[LEFT] : change.old_element.fields
  right_row = change.new_element.nil? ? @empty[RIGHT] : change.new_element.fields
  [*left_row, action_verbose(change.action), *right_row]
end

#parse(data) ⇒ Object



39
40
41
42
43
44
45
46
47
48
# File 'lib/comparator.rb', line 39

def parse(data)
  if File.exist? data
    intuit_separator(data)
    csv = CSV.read(data, headers: true, col_sep: @input_col_sep)
    raise "Wrong CSV" if csv == []
  else
    csv = CSV.parse(data, headers: true, col_sep: @input_col_sep)
  end
  csv
end

#parse_side(source, side: nil) ⇒ Object



66
67
68
69
# File 'lib/comparator.rb', line 66

def parse_side(source, side: nil)
  @data[side] = parse(source)
  @empty[side] = [*[''] * @data[side].headers.size]
end

#prepare(source1, source2) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
# File 'lib/comparator.rb', line 75

def prepare(source1, source2)
  parse_side(source1, side: LEFT)
  parse_side(source2, side: RIGHT)

  set_default_column_names

  prepare_rows(side: LEFT)
  prepare_rows(side: RIGHT)

  @headers = [*@data[LEFT].headers, "diff", *@data[RIGHT].headers]
end

#prepare_rows(side: nil) ⇒ Object



71
72
73
# File 'lib/comparator.rb', line 71

def prepare_rows(side: nil)
  @rows[side] = csv_to_talimer_rows(@data[side], side: side)
end

#set_default_column_namesObject

by default use columns with same names in both tables



88
89
90
91
92
93
# File 'lib/comparator.rb', line 88

def set_default_column_names
  return unless @columns.nil?

  @columns = (@data[LEFT].headers & @data[RIGHT].headers).map { |a| [a, a] }
  @weights = [*[1] * @columns.size]
end