Class: BridgeCache::Plugins::CSVDump
- Inherits:
-
Object
- Object
- BridgeCache::Plugins::CSVDump
- Defined in:
- app/lib/bridge_cache/plugins/csv_dump.rb
Constant Summary collapse
- MAX_ROW_INTERVAL =
5000
Class Method Summary collapse
- .bulk_import(iterator, model) ⇒ Object
-
.condition_sql(klass, columns) ⇒ Object
This method generates SQL that looks like: (users.sis_id, users.email) IS DISTINCT FROM (EXCLUDED.sis_id, EXCLUDED.email).
- .dump_row(clazz, row) ⇒ Object
- .dump_rows(rows) ⇒ Object
- .dump_to_table(clazz, file_path) ⇒ Object
- .initialze_row(clazz, row) ⇒ Object
- .perform_bulk_import(klass, columns, rows) ⇒ Object
- .remove_bad_columns(clazz, row) ⇒ Object
Class Method Details
permalink .bulk_import(iterator, model) ⇒ Object
[View source]
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 9 def self.bulk_import(iterator, model) ids = [] rows = [] klass = "BridgeCache::#{model.camelcase}".constantize csv_column_names = klass.csv_mapping.keys database_column_names = klass.csv_mapping.values iterator.each_row(model.pluralize) do |row| row = remove_bad_columns(klass, BridgeCache::Plugins::DataTransform.set_bridge_id(row).to_h) row = klass.format_import_row(row) rows << csv_column_names.map { |column| row[column] } ids << row['bridge_id'] if row['bridge_id'].present? # Some CSV's do not have an ID column if rows.length >= BridgeCache.batch_size perform_bulk_import(klass, database_column_names, rows) rows = [] end end perform_bulk_import(klass, database_column_names, rows) ids end |
permalink .condition_sql(klass, columns) ⇒ Object
This method generates SQL that looks like: (users.sis_id, users.email) IS DISTINCT FROM (EXCLUDED.sis_id, EXCLUDED.email)
This prevents activerecord-import from setting the ‘updated_at` column for rows that haven’t actually changed. This allows you to query for rows that have changed by doing something like:
started_at = Time.now run_the_users_sync! changed = User.where(“updated_at >= ?”, started_at)
82 83 84 85 86 |
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 82 def self.condition_sql(klass, columns) columns_str = columns.map { |c| "#{klass.quoted_table_name}.#{c}" }.join(', ') excluded_str = columns.map { |c| "EXCLUDED.#{c}" }.join(', ') "(#{columns_str}) IS DISTINCT FROM (#{excluded_str})" end |
permalink .dump_row(clazz, row) ⇒ Object
[View source]
56 57 58 59 |
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 56 def self.dump_row(clazz, row) instance = initialze_row(clazz, row) dump_rows([instance]) end |
permalink .dump_rows(rows) ⇒ Object
[View source]
95 96 97 98 99 |
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 95 def self.dump_rows(rows) rows.each do |row| row.save! if row.changed? end end |
permalink .dump_to_table(clazz, file_path) ⇒ Object
[View source]
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 33 def self.dump_to_table(clazz, file_path) count = 1 total = 0 rows = [] CSV.foreach(file_path, headers: true) do |_row| total += 1 end CSV.foreach(file_path, headers: true) do |row| rows << initialze_row(clazz, row) if count < MAX_ROW_INTERVAL if (count % MAX_ROW_INTERVAL).zero? || count == total dump_rows(rows) count = 0 rows = [] end count += 1 end end |
permalink .initialze_row(clazz, row) ⇒ Object
[View source]
88 89 90 91 92 93 |
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 88 def self.initialze_row(clazz, row) instance = clazz.find_or_create_by(bridge_id: row['id']) instance.assign_attributes(remove_bad_columns(clazz, BridgeCache::Plugins::DataTransform.set_bridge_id(row).to_h)) instance end |
permalink .perform_bulk_import(klass, columns, rows) ⇒ Object
[View source]
61 62 63 64 65 66 67 68 69 70 |
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 61 def self.perform_bulk_import(klass, columns, rows) return if rows.empty? columns = columns.dup klass.import(columns, rows, validate: false, on_duplicate_key_update: { conflict_target: klass.unique_column_names, condition: condition_sql(klass, columns), columns: columns }) end |
permalink .remove_bad_columns(clazz, row) ⇒ Object
[View source]
101 102 103 |
# File 'app/lib/bridge_cache/plugins/csv_dump.rb', line 101 def self.remove_bad_columns(clazz, row) row.delete_if { |key, _value| !clazz.column_names.include?(key) } end |