Module: DBX::Differ

Defined in:
lib/dbx/differ.rb

Class Method Summary collapse

Class Method Details

.create_diff_stats(diff_table, force: false) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/dbx/differ.rb', line 57

def create_diff_stats(diff_table, force: false)
  DBX.connection do |conn|
    diff_stats = "#{diff_table}_stats"
    conn.execute("DROP TABLE IF EXISTS #{diff_stats}") if force
    selects = conn.columns(diff_table).map do |column|
      header, type = column.name, column.type
      col = header[/(.*)_diff$/, 1]
      next unless col

      if column.sql_type == 'interval'
        %{SUM(#{header}) AS #{col}_sum}
      else
        case type
        when :decimal, :integer, :date, :datetime
          %{SUM(#{header}) AS #{col}_sum}
        else
          %{COUNT(#{header}) AS #{col}_count}
        end
      end
    end.compact.join(",\n")
    conn.execute(<<-SQL)
      CREATE TABLE #{diff_stats} AS
      SELECT
        #{selects}
      FROM #{diff_table}
    SQL
  end
end

.diff(table_a:, table_b:, force: false, using: ['id'], exclude_columns: nil, no_a_b: false) ⇒ Object

Compare ‘src_a` with `src_b`. Numeric types will be diffed by subtracting the values. Dates will contain difference by day. Datetime will contain difference by seconds. Everything else can only return a boolean true/false that it is different.

Parameters:

  • table (String)

    A Should be the initial state table.

  • table (String)

    B Should be newer than table A, but doesn’t have to be.

  • using (Array<String>) (defaults to: ['id'])

    is the join criteria between the 2 tables.

  • exclude_columns (Array<String>) (defaults to: nil)

    are excluded from the diff comparison.



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/dbx/differ.rb', line 16

def diff(table_a:, table_b:, force: false, using: ['id'], exclude_columns: nil, no_a_b: false)
  table_diff = "diff_#{table_a}_#{table_b}"
  exclude_columns ||= []
  using = using.map(&:downcase)
  DBX.info("Creating diff table #{table_diff}")
  DBX.connection do |conn|
    conn.execute("DROP TABLE IF EXISTS #{table_diff}") if force
    conn.execute(<<-SQL)
    CREATE TABLE #{table_diff} AS
    SELECT
      #{using.join(', ')},
      #{select_columns(table_a, exclude_columns: using + exclude_columns, no_a_b: no_a_b)}
    FROM #{table_a} AS a
    FULL OUTER JOIN #{table_b} b USING (#{using.join(',')})
    WHERE
      #{where_columns(table_a, exclude_columns: using + exclude_columns)}
    SQL
    DBX.info(conn.exec_query(<<-SQL).as_json)
    SELECT
      (SELECT COUNT(*) FROM #{table_a}) count_table_a,
      (SELECT COUNT(*) FROM #{table_b}) count_table_b,
      (SELECT COUNT(*) FROM #{table_diff}) diffs
    SQL
  end
  DBX.info("Creating diff stats: #{table_diff}")
  create_diff_stats(table_diff, force: force)

  DBX.info("Diff complete. Results details in:   #{table_diff}")
end

.import_and_diff(src_a:, src_b:, force: false, using: ['id'], exclude_columns: nil, no_a_b: false) ⇒ Object



46
47
48
49
50
51
52
53
54
55
# File 'lib/dbx/differ.rb', line 46

def import_and_diff(src_a:, src_b:, force: false, using: ['id'], exclude_columns: nil, no_a_b: false)
  DBX.info("Importing #{src_a}")
  table_a = DBX.import_table(src_a, force: force)


  DBX.info("Importing #{src_b}")
  table_b = DBX.import_table(src_b, force: force)

  diff(table_a: table_a, table_b: table_b, force: force, using: using, exclude_columns: exclude_columns, no_a_b: no_a_b)
end

.select_boolean(column, no_a_b: false) ⇒ Object



145
146
147
148
149
150
# File 'lib/dbx/differ.rb', line 145

def select_boolean(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}NULLIF(#{a} <> #{b}, FALSE) AS #{column}_diff)
end

.select_columns(table, exclude_columns: nil, no_a_b: false) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/dbx/differ.rb', line 86

def select_columns(table, exclude_columns: nil, no_a_b: false)
  exclude_columns ||= []
  DBX.connection do |conn|
    conn.columns(table).map do |column|
      header, type = column.name, column.type
      next if exclude_columns.include?(header)
      case type
      when :decimal, :integer
        select_difference(header, no_a_b: no_a_b)
      when :date
        select_difference_as_int(header, no_a_b: no_a_b)
      when :datetime
        select_difference_as_interval(header, no_a_b: no_a_b)
      else
        select_boolean(header, no_a_b: no_a_b)
      end
    end.compact.join(',')
  end
end

.select_difference(column, no_a_b: false) ⇒ Object



117
118
119
120
121
122
# File 'lib/dbx/differ.rb', line 117

def select_difference(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}(CASE WHEN #{a} = #{b} THEN NULL WHEN #{a} IS NULL THEN #{b} WHEN #{b} IS NULL THEN #{a} ELSE NULLIF(#{b} - #{a}, 0) END) AS #{column}_diff)
end

.select_difference_as_int(column, no_a_b: false) ⇒ Object



124
125
126
127
128
129
# File 'lib/dbx/differ.rb', line 124

def select_difference_as_int(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}(CASE WHEN #{a} = #{b} THEN NULL::bigint WHEN #{a} IS NULL THEN 1 WHEN #{b} IS NULL THEN -1 ELSE (#{b} - #{a}) END) AS #{column}_diff)
end

.select_difference_as_interval(column, no_a_b: false) ⇒ Object



131
132
133
134
135
136
# File 'lib/dbx/differ.rb', line 131

def select_difference_as_interval(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}(CASE WHEN #{a} = #{b} THEN NULL::interval WHEN #{a} IS NULL THEN '1 day'::interval WHEN #{b} IS NULL THEN '-1 day'::interval ELSE (#{b} - #{a})::interval END) AS #{column}_diff)
end

.select_difference_as_text(column, no_a_b: false) ⇒ Object



138
139
140
141
142
143
# File 'lib/dbx/differ.rb', line 138

def select_difference_as_text(column, no_a_b: false)
  a = "a.#{column}"
  b = "b.#{column}"
  a_b = no_a_b ? '' : "#{a} AS #{column}_a, #{b} AS #{column}_b, "
  %(#{a_b}(CASE WHEN #{a} = #{b} THEN NULL WHEN #{a} IS NULL THEN #{b}::text WHEN #{b} IS NULL THEN #{a}::text ELSE (#{b} - #{a})::text END) AS #{column}_diff)
end

.where_columns(table, exclude_columns: nil) ⇒ Object



106
107
108
109
110
111
112
113
114
115
# File 'lib/dbx/differ.rb', line 106

def where_columns(table, exclude_columns: nil)
  exclude_columns ||= []
  DBX.connection do |conn|
    conn.columns(table).map do |column|
      header, type = column.name, column.type
      next if exclude_columns.include?(header)
      %((a.#{header} <> b.#{header}))
    end
  end.compact.join('OR')
end