Class: DataSampler::TableSample
- Inherits:
-
Object
- Object
- DataSampler::TableSample
- Defined in:
- lib/data_sampler/table_sample.rb
Instance Attribute Summary collapse
-
#pending_dependencies ⇒ Object
readonly
Returns the value of attribute pending_dependencies.
-
#table_name ⇒ Object
readonly
Returns the value of attribute table_name.
Instance Method Summary collapse
- #add(row) ⇒ Object
- #ensure_referential_integrity(table_samples) ⇒ Object
- #fulfil(dependency) ⇒ Object
- #fulfilled?(dependency) ⇒ Boolean
-
#initialize(connection, table_name, size = 1000) ⇒ TableSample
constructor
A new instance of TableSample.
- #sample! ⇒ Object
- #size ⇒ Object
- #to_sql ⇒ Object
Constructor Details
#initialize(connection, table_name, size = 1000) ⇒ TableSample
Returns a new instance of TableSample.
10 11 12 13 14 15 16 17 18 |
# File 'lib/data_sampler/table_sample.rb', line 10 def initialize(connection, table_name, size = 1000) @table_name = table_name @connection = connection @size = size @pending_dependencies = Set.new @sample = Set.new @sampled = false @sampled_ids = Set.new end |
Instance Attribute Details
#pending_dependencies ⇒ Object (readonly)
Returns the value of attribute pending_dependencies.
8 9 10 |
# File 'lib/data_sampler/table_sample.rb', line 8 def pending_dependencies @pending_dependencies end |
#table_name ⇒ Object (readonly)
Returns the value of attribute table_name.
7 8 9 |
# File 'lib/data_sampler/table_sample.rb', line 7 def table_name @table_name end |
Instance Method Details
#add(row) ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/data_sampler/table_sample.rb', line 50 def add(row) return 0 unless @sample.add? row @sampled_ids.add row['id'] if row['id'] newly_added = 0 dependencies_for(row).each do |dep| newly_added += 1 if @pending_dependencies.add?(dep) end newly_added rescue ActiveRecord::StatementInvalid => e # Don't choke on unknown table engines, such as Sphinx end |
#ensure_referential_integrity(table_samples) ⇒ Object
62 63 64 65 66 67 68 69 70 71 |
# File 'lib/data_sampler/table_sample.rb', line 62 def ensure_referential_integrity(table_samples) newly_added = 0 deps_in_progress = @pending_dependencies @pending_dependencies = Set.new deps_in_progress.each do |dependency| raise "Table sample for `#{dependency.table_name}` not found" unless table_samples[dependency.table_name] newly_added += table_samples[dependency.table_name].fulfil(dependency) end newly_added end |
#fulfil(dependency) ⇒ Object
29 30 31 32 33 34 35 36 |
# File 'lib/data_sampler/table_sample.rb', line 29 def fulfil(dependency) return 0 if fulfilled?(dependency) where = dependency.keys.collect { |col, val| "#{@connection.quote_column_name col} = #{@connection.quote val}" } * ' AND ' sql = "SELECT * FROM #{@connection.quote_table_name @table_name} WHERE " + where row = @connection.select_one(sql) raise "Could not find #{dependency}" if row.nil? add row end |
#fulfilled?(dependency) ⇒ Boolean
38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/data_sampler/table_sample.rb', line 38 def fulfilled?(dependency) # FIXME: Only checks id column if dependency.keys.values.size == 1 dependency.keys.each_pair do |key, val| if key == 'id' return true if @sampled_ids.include?(val) end end end false end |
#sample! ⇒ Object
20 21 22 23 |
# File 'lib/data_sampler/table_sample.rb', line 20 def sample! fetch_sample(@size) unless @sampled @sample end |
#size ⇒ Object
25 26 27 |
# File 'lib/data_sampler/table_sample.rb', line 25 def size @sampled ? @sample.size : @size end |
#to_sql ⇒ Object
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/data_sampler/table_sample.rb', line 73 def to_sql ret = "-- #{@table_name}: #{@sample.count} rows\n" unless @sample.empty? quoted_cols = @sample.first.keys.collect { |col| @connection.quote_column_name col } # INSERT in batches of 1000 @sample.each_slice(1000) do |rows| values = rows.collect { |row| quoted_vals = [] row.each_pair do |field,val| # HACK: Brute attempt at not revealing sensitive data val.gsub! /./, '*' if field.downcase == 'password' quoted_vals << @connection.quote(val) end quoted_vals * ',' } * '),(' ret << "INSERT INTO #{@connection.quote_table_name @table_name} (#{quoted_cols * ','}) VALUES (#{values});\n" end end ret end |