Module: ActiveSanitization
- Defined in:
- lib/tasks/rake_tasks.rb,
lib/active_sanitization.rb,
lib/active_sanitization/version.rb
Defined Under Namespace
Classes: Configuration, RakeTasks, TempDatabaseConnection
Constant Summary
collapse
- VERSION =
"0.2.0"
Class Attribute Summary collapse
Class Method Summary
collapse
Class Attribute Details
.configuration ⇒ Object
Returns the value of attribute configuration.
9
10
11
|
# File 'lib/active_sanitization.rb', line 9
def configuration
@configuration
end
|
Class Method Details
.clean_up_files(dump_file, compressed_dump_file) ⇒ Object
200
201
202
203
204
205
|
# File 'lib/active_sanitization.rb', line 200
def self.clean_up_files(dump_file, compressed_dump_file)
self.log("Deleting #{dump_file}")
File.delete(dump_file) if File.exist?(dump_file)
self.log("Deleting #{compressed_dump_file}")
File.delete(compressed_dump_file) if File.exist?(compressed_dump_file)
end
|
.clean_up_temp_db(temp_db) ⇒ Object
164
165
166
167
|
# File 'lib/active_sanitization.rb', line 164
def self.clean_up_temp_db(temp_db)
self.log("Dropping #{temp_db}")
self.configuration.active_record_connection.execute("DROP DATABASE #{temp_db};")
end
|
12
13
14
15
|
# File 'lib/active_sanitization.rb', line 12
def self.configure
self.configuration ||= Configuration.new
yield(configuration)
end
|
.create_files ⇒ Object
142
143
144
145
146
147
148
|
# File 'lib/active_sanitization.rb', line 142
def self.create_files
dump_file = "#{File.join(self.configuration.root, "tmp")}/data.dump"
compressed_dump_file = "#{dump_file}.gz"
File.new(dump_file, "w+")
File.new(compressed_dump_file, "w+")
[dump_file, compressed_dump_file]
end
|
.duplicate_database ⇒ Object
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
# File 'lib/active_sanitization.rb', line 95
def self.duplicate_database
temp_db = "#{self.configuration.db_config['database']}_copy"
self.log("Deleting temp DB if exists")
self.configuration.active_record_connection.execute("DROP DATABASE IF EXISTS #{temp_db};")
self.log("Creating temp DB")
self.configuration.active_record_connection.execute("CREATE DATABASE #{temp_db}")
self.log("Copying #{self.configuration.env} DB to temp DB")
self.log("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} #{self.configuration.tables_to_sanitize.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} #{self.configuration.tables_to_sanitize.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
if $?.exitstatus == 0
self.log("Temp DB created and populated")
else
raise "Failed to load DB #{self.configuration.db_config} into temp DB #{temp_db}."
end
self.log("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=XXXXXXXXX --no-data #{self.configuration.db_config['database']} #{self.configuration.tables_to_truncate.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=XXXXXXXXX -D #{temp_db}")
system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} --no-data #{self.configuration.db_config['database']} #{self.configuration.tables_to_truncate.keys.join(' ')} | mysql -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} -D #{temp_db}")
if $?.exitstatus == 0
self.log("Temp DB created and populated")
else
raise "Failed to load DB #{self.configuration.db_config} into temp DB #{temp_db}."
end
temp_db_config = self.configuration.db_config.dup
temp_db_config['database'] = temp_db
TempDatabaseConnection.establish_connection(temp_db_config)
temp_db_connection = TempDatabaseConnection.connection
[temp_db, temp_db_connection, temp_db_config]
end
|
.export_temp_db_to_file(dump_file, temp_db_config, temp_db) ⇒ Object
207
208
209
210
211
212
213
214
215
216
|
# File 'lib/active_sanitization.rb', line 207
def self.export_temp_db_to_file(dump_file, temp_db_config, temp_db)
self.log("Dumping temp DB to #{dump_file}")
system("mysqldump -h #{temp_db_config['host']} -u #{temp_db_config['username']} --password=#{temp_db_config['password']} #{temp_db} >> '#{dump_file}'")
if $?.exitstatus == 0
self.log("Dump created")
else
self.log("Failed to create dump")
return
end
end
|
.get_s3_bucket ⇒ Object
179
180
181
182
|
# File 'lib/active_sanitization.rb', line 179
def self.get_s3_bucket
resource = Aws::S3::Resource.new(client: get_s3_client)
resource.bucket(self.configuration.s3_bucket)
end
|
.get_s3_client ⇒ Object
174
175
176
177
|
# File 'lib/active_sanitization.rb', line 174
def self.get_s3_client
creds = Aws::Credentials.new(self.configuration.aws_access_key_id, self.configuration.aws_secret_access_key)
Aws::S3::Client.new(credentials: creds, region: self.configuration.s3_bucket_region)
end
|
.gzip(dump_file) ⇒ Object
169
170
171
172
|
# File 'lib/active_sanitization.rb', line 169
def self.gzip(dump_file)
self.log("Gzipping #{dump_file}")
system("gzip '#{dump_file}'")
end
|
.hash_diff(hash1, hash2) ⇒ Object
Returns a hash that represents the difference between two hashes.
hash_diff({1 => 2}, {1 => 2}) hash_diff({1 => 2}, {1 => 3}) hash_diff({}, {1 => 2}) hash_diff({1 => 2, 3 => 4}, {1 => 2})
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
# File 'lib/active_sanitization.rb', line 47
def self.hash_diff(hash1, hash2)
difference1 = hash1.dup
difference2 = hash2.dup
difference1.delete_if do |key, value|
hash2[key] == value
end
difference2.delete_if do |key, value|
hash1.has_key?(key)
end
difference1.merge(difference2)
end
|
.import_data(env = nil, timestamp = nil) ⇒ Object
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
|
# File 'lib/active_sanitization.rb', line 256
def self.import_data(env = nil, timestamp = nil)
env = "production" if env.nil?
prefix = "#{self.configuration.app_name}/#{env}/mysql"
bucket = get_s3_bucket
if timestamp.nil?
timestamp = bucket.objects(prefix: prefix).collect {|x| x.key[%r(#{prefix}\/(.*)\/), 1] }.max
end
if timestamp.nil?
self.log("No mysql snapshot for timestamp #{prefix}/#{timestamp}")
return
end
self.log('WARNING: this rake task will dump your MySQL DB to a file, then wipe your DB before importing a snapshot')
local_dump_file = "#{File.join(self.configuration.root, "tmp")}/local_data.dump"
system("mysqldump -h #{self.configuration.db_config['host']} -u #{self.configuration.db_config['username']} --password=#{self.configuration.db_config['password']} #{self.configuration.db_config['database']} > '#{local_dump_file}'")
if $?.exitstatus == 0
self.log("Local DB dump stored in #{local_dump_file}")
else
raise "Failed to create a local DB dump. If a previous local dump exists, please delete it and try again."
end
dump_file = "#{File.join(self.configuration.root, "tmp")}/data.dump"
compressed_dump_file = "#{dump_file}.gz"
name = "#{prefix}/#{timestamp}/data.dump.gz"
self.log("Downloading dump from bucket: #{self.configuration.s3_bucket}, path: #{name}")
get_s3_client.get_object({ bucket:self.configuration.s3_bucket , key: name }, target: compressed_dump_file)
self.log("Recreating your local DB")
Rake::Task["db:drop"].invoke
Rake::Task["db:create"].invoke
self.log("Unzipping and importing data...")
self.log("gunzip < #{compressed_dump_file} | mysql -u root #{self.configuration.db_config['database']}")
system("gunzip < #{compressed_dump_file} | mysql -u root #{self.configuration.db_config['database']}")
if $?.exitstatus == 0
File.delete(compressed_dump_file) if File.exist?(compressed_dump_file)
else
raise "Could not load #{compressed_dump_file} into DB #{self.configuration.db_config}"
end
self.log('-- DONE --')
end
|
.is_dev_or_integration_env? ⇒ Boolean
218
219
220
|
# File 'lib/active_sanitization.rb', line 218
def self.is_dev_or_integration_env?
self.configuration.env == 'development' || self.configuration.env == 'integration'
end
|
.log(output) ⇒ Object
62
63
64
65
66
|
# File 'lib/active_sanitization.rb', line 62
def self.log(output)
self.configuration.loggers.each do |logger|
logger.info(output)
end unless self.configuration.env == 'test'
end
|
.pre_sanitization_checks ⇒ Object
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
|
# File 'lib/active_sanitization.rb', line 68
def self.pre_sanitization_checks
db_tables = {}
self.configuration.active_record_connection.tables.each do |table_name|
next if self.configuration.tables_to_ignore.include?(table_name)
db_tables[table_name] = []
self.configuration.active_record_connection.columns(table_name).each { |c| db_tables[table_name] << c.name }
db_tables[table_name].sort!
end
tables_with_sorted_columns = {}
self.configuration.tables_to_sanitize.merge(self.configuration.tables_to_truncate).each { |k, v| tables_with_sorted_columns[k] = v.sort }
table_difference = hash_diff(db_tables, tables_with_sorted_columns)
checks = {}
if table_difference != {}
column_difference = {}
table_difference.collect do |table_name, table_columns|
column_difference[table_name] = table_columns - self.configuration.tables_to_sanitize.merge(self.configuration.tables_to_truncate)[table_name].to_a
end
checks[:pass] = false
checks[:error] = "The following tables or columns have been found in the #{self.configuration.env} DB but are not known to this script (#{column_difference}).\n Please update the active_sanitization config!"
else
checks[:pass] = true
end
checks
end
|
.sanitize_and_export_data ⇒ Object
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
|
# File 'lib/active_sanitization.rb', line 222
def self.sanitize_and_export_data
checks = self.pre_sanitization_checks
if checks[:pass]
dump_file, compressed_dump_file = self.create_files
self.clean_up_files(dump_file, compressed_dump_file)
if self.is_dev_or_integration_env?
self.export_temp_db_to_file(dump_file, self.configuration.db_config, self.configuration.db_config["database"])
else
temp_db, temp_db_connection, temp_db_config = self.duplicate_database
self.sanitize_tables(temp_db_connection)
self.export_temp_db_to_file(dump_file, temp_db_config, temp_db)
self.clean_up_temp_db(temp_db)
end
self.gzip(dump_file)
if self.configuration.s3_bucket && self.configuration.aws_access_key_id && self.configuration.aws_secret_access_key
self.upload(compressed_dump_file)
else
self.clean_up_files(dump_file, compressed_dump_file)
end
self.log("-- DONE --")
else
self.log(checks[:error])
end
end
|
.sanitize_table(table, temp_db_connection) ⇒ Object
127
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# File 'lib/active_sanitization.rb', line 127
def self.sanitize_table(table, temp_db_connection)
table_columns = temp_db_connection.select_values("DESCRIBE #{table};")
self.configuration.sanitization_columns.keys.each do |column|
if table_columns.include?(column)
distinct_values = temp_db_connection.execute("SELECT DISTINCT(#{column}) FROM #{table};").collect { |data| data.first }
distinct_values.each do |value|
temp_db_connection.execute("UPDATE #{table} SET #{column}='#{self.configuration.sanitization_columns[column].sample}' WHERE #{column}=#{ActiveRecord::Base.sanitize(value)};")
end
end
end
self.configuration.custom_sanitization.send("sanitize_#{table}", temp_db_connection) if self.configuration.custom_sanitization.respond_to?("sanitize_#{table}")
end
|
.sanitize_tables(temp_db_connection) ⇒ Object
150
151
152
153
154
155
156
157
158
159
160
161
162
|
# File 'lib/active_sanitization.rb', line 150
def self.sanitize_tables(temp_db_connection)
self.log("Processing TABLES_TO_TRUNCATE...")
self.configuration.tables_to_truncate.keys.each do |table|
self.log("Truncating #{table}")
temp_db_connection.execute("TRUNCATE #{table};")
end
self.log("Processing TABLES_TO_SANITIZE...")
self.configuration.tables_to_sanitize.keys.each do |table|
self.log("Sanitizing #{table}")
self.sanitize_table(table, temp_db_connection)
end
end
|
.upload(compressed_dump_file) ⇒ Object
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
# File 'lib/active_sanitization.rb', line 184
def self.upload(compressed_dump_file)
timestamp = DateTime.now.strftime('%Y%m%d%H%M%S')
name = "#{self.configuration.app_name}/#{self.configuration.env}/mysql/#{timestamp}/#{File.basename(compressed_dump_file)}"
self.log("Uploading to bucket: #{self.configuration.s3_bucket}, path: #{name}")
file = File.open(compressed_dump_file, 'r')
bucket = get_s3_bucket
obj = bucket.object(name)
obj.put(body: file)
file.close
File.unlink(compressed_dump_file)
obj
end
|