Class: DataSampler::TableSample

Inherits:
Object
  • Object
show all
Defined in:
lib/data_sampler/table_sample.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(connection, table_name, size = 1000) ⇒ TableSample

Returns a new instance of TableSample.



10
11
12
13
14
15
16
17
18
# File 'lib/data_sampler/table_sample.rb', line 10

def initialize(connection, table_name, size = 1000)
  @table_name = table_name
  @connection = connection
  @size = size
  @pending_dependencies = Set.new
  @sample = Set.new
  @sampled = false
  @sampled_ids = Set.new
end

Instance Attribute Details

#pending_dependenciesObject (readonly)

Returns the value of attribute pending_dependencies.



8
9
10
# File 'lib/data_sampler/table_sample.rb', line 8

def pending_dependencies
  @pending_dependencies
end

#table_nameObject (readonly)

Returns the value of attribute table_name.



7
8
9
# File 'lib/data_sampler/table_sample.rb', line 7

def table_name
  @table_name
end

Instance Method Details

#add(row) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
# File 'lib/data_sampler/table_sample.rb', line 50

def add(row)
  return 0 unless @sample.add? row
  @sampled_ids.add row['id'] if row['id']
  newly_added = 0
  dependencies_for(row).each do |dep|
    newly_added += 1 if @pending_dependencies.add?(dep)
  end
  newly_added
rescue ActiveRecord::StatementInvalid => e
  # Don't choke on unknown table engines, such as Sphinx
end

#ensure_referential_integrity(table_samples) ⇒ Object



62
63
64
65
66
67
68
69
70
71
# File 'lib/data_sampler/table_sample.rb', line 62

def ensure_referential_integrity(table_samples)
  newly_added = 0
  deps_in_progress = @pending_dependencies
  @pending_dependencies = Set.new
  deps_in_progress.each do |dependency|
    raise "Table sample for `#{dependency.table_name}` not found" unless table_samples[dependency.table_name]
    newly_added += table_samples[dependency.table_name].fulfil(dependency)
  end
  newly_added
end

#fulfil(dependency) ⇒ Object



29
30
31
32
33
34
35
36
# File 'lib/data_sampler/table_sample.rb', line 29

def fulfil(dependency)
  return 0 if fulfilled?(dependency)
  where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND '
  sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where
  row = @connection.select_one(sql)
  raise "Could not find #{dependency}" if row.nil?
  add row
end

#fulfilled?(dependency) ⇒ Boolean

Returns:

  • (Boolean)


38
39
40
41
42
43
44
45
46
47
48
# File 'lib/data_sampler/table_sample.rb', line 38

def fulfilled?(dependency)
  # FIXME: Only checks id column
  if dependency.keys.values.size == 1
    dependency.keys.each_pair do |key, val|
      if key == 'id'
        return true if @sampled_ids.include?(val)
      end
    end
  end
  false
end

#sample!Object



20
21
22
23
# File 'lib/data_sampler/table_sample.rb', line 20

def sample!
  fetch_sample(@size) unless @sampled
  @sample
end

#sizeObject



25
26
27
# File 'lib/data_sampler/table_sample.rb', line 25

def size
  @sampled ? @sample.size : @size
end

#to_sqlObject



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/data_sampler/table_sample.rb', line 73

def to_sql
  ret = "-- #{@table_name}: #{@sample.count} rows\n"
  unless @sample.empty?
    quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col }
    # INSERT in batches of 1000
    @sample.each_slice(1000) do |rows|
      values = rows.collect { |row|
        quoted_vals = []
        row.each_pair do |field,val|
          # HACK: Brute attempt at not revealing sensitive data
          val.gsub! /./, '*' if field.downcase == 'password'
          quoted_vals << @connection.quote(val)
        end
        quoted_vals * ','
      } * '),('
      ret << "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','}) VALUES (#{values});\n"
    end
  end
  ret
end