Module: DatabaseSanitizer

Defined in:
lib/database_sanitizer.rb,
lib/database_sanitizer.rb,
lib/database_sanitizer/version.rb,
lib/database_sanitizer/transformers.rb

Defined Under Namespace

Classes: Destination, Source

Constant Summary collapse

CHUNK_SIZE =
(ENV['CHUNK_SIZE'] || "1000").to_i
VERSION =
'0.0.14'
Transformers =
{
  'email' => ->(i, rec) { "email#{i.to_s.rjust(5, ?0)}@#{rec.split(?@)[1]}"},
  'wipe' => proc { nil },
  'zero' => proc { 0 },
  'empty_string' => proc { '' },
  'name' => proc { 'John Doe' },
  'phone_number' => ->(i, rec) { rec.nil? ? rec : "#{rec[0,3]}#{i.to_s.rjust rec.length-3, ?0}" }
}

Class Method Summary collapse

Class Method Details

.duplicate_schema(schema = nil) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/database_sanitizer.rb', line 35

def duplicate_schema schema=nil
  schema_src = nil
  if schema.nil?
    schema_sio = StringIO.new
    puts 'Dumping schema.rb...'
    ActiveRecord::SchemaDumper.dump(Source.connection, schema_sio)
    puts 'Loading schema.rb...'
    ActiveRecord::Migration.suppress_messages { eval schema_sio.string }
  else
    puts 'Reading schema SQL...'
    schema_src = IO.read File.expand_path(schema, Dir.pwd)
    ActiveRecord::Migration.suppress_messages { Destination.connection.execute schema_src }
  end
end

.export(opts = {}) ⇒ Object



61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# File 'lib/database_sanitizer.rb', line 61

def export opts={}
  src = Source.connection
  dest = Destination.connection
  duplicate_schema opts[:schema]
  tables = (opts[:tables] || src.tables.collect(&:to_s)) - (opts[:exclude] || [])
  transformers = read_comments tables
  max_tbl_name_len = transformers.keys.map(&:length).sort.last || 0

  tables.with_progress('Exporting').each do |table|
    q_table = dest.quote_table_name table
    s_table = table.to_sym

    get_chunks(src, table).times_with_progress(table.rjust max_tbl_name_len) do |chunk_i|
      result = src.exec_query select_query q_table, s_table, (chunk_i * CHUNK_SIZE)
      dest.exec_query insert_query q_table, s_table, transformers, result
    end
  end
end

.extract_order(comment) ⇒ Object



19
# File 'lib/database_sanitizer.rb', line 19

def extract_order comment; comment ? comment[/order_by: ?(\w+)/,1] : nil; end

.extract_transformer(comment) ⇒ Object



17
# File 'lib/database_sanitizer.rb', line 17

def extract_transformer comment; comment ? comment[/sanitize: ?(\w+)/,1] : nil; end

.get_chunks(conn, table) ⇒ Object



50
51
52
53
54
55
56
57
58
59
# File 'lib/database_sanitizer.rb', line 50

def get_chunks conn, table
  query = "SELECT count(*) FROM #{conn.quote_table_name table}"
  pg_query = "SELECT reltuples::bigint FROM pg_class WHERE relname=#{conn.quote table}"
  res = conn.adapter_name == 'PostgreSQL' ? (conn.exec_query(pg_query) rescue false) : false
  unless res
    puts 'Counting...'
    conn.exec_query(query)
  end
  res.rows[0][0].to_i / CHUNK_SIZE + 1
end

.insert_query(q_table, s_table, transformers, result) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/database_sanitizer.rb', line 80

def insert_query q_table, s_table, transformers, result
  dest = Destination.connection
  cols = result.columns.map { |col| dest.quote_column_name col }.join ','
  ins_query_part = "INSERT INTO #{q_table} (#{cols}) VALUES ("
  ins_query = StringIO.new
  result.rows.each_with_index do |src_row, row_i|
    values = result.columns.each_with_index.map do |col, col_i|
      transformer = transformers[s_table][col.to_sym]
      dest.quote transformer ? transformer.(offset + row_i, src_row[col_i]) : src_row[col_i]
    end
    ins_query << ins_query_part << values.join(',') << '); '
  end
  ins_query.string
end

.order_clause(s_table) ⇒ Object



99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/database_sanitizer.rb', line 99

def order_clause s_table
  order_sql = 'ORDER BY '
  src = Source.connection
  order_by = extract_order src.retrieve_table_comment s_table
  if order_by
    order_sql + src.quote_table_name(order_by)
  elsif src.column_exists? s_table, :id
    order_sql + 'id'
  else
    nil
  end
end

.read_comments(tables) ⇒ Object



21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/database_sanitizer.rb', line 21

def read_comments tables
  tables.inject({}) do |transformers, table|
    transformers[table.to_sym] = Source.connection.retrieve_column_comments(table.to_sym).inject({}) do |table_transformers, column|
      transformer_key = extract_transformer column[1]
      unless transformer_key.nil? || Transformers.include?(transformer_key)
        abort "Transformer '#{transformer_key}' not found (#{table}.#{column[0]})"
      end
      table_transformers[column[0]] = transformer_key && Transformers[transformer_key]
      table_transformers
    end
    transformers
  end
end

.select_query(q_table, s_table, offset) ⇒ Object



95
96
97
# File 'lib/database_sanitizer.rb', line 95

def select_query q_table, s_table, offset
  "SELECT * FROM #{q_table} #{order_clause s_table} LIMIT #{CHUNK_SIZE} OFFSET #{offset}"
end