Class: RailsRedshiftReplicator::FileManager
- Inherits:
-
Object
- Object
- RailsRedshiftReplicator::FileManager
- Defined in:
- lib/rails_redshift_replicator/file_manager.rb
Instance Attribute Summary collapse
-
#exporter ⇒ Object
readonly
Returns the value of attribute exporter.
Class Method Summary collapse
-
.s3_file_key(source_table, file) ⇒ String
File location on s3.
Instance Method Summary collapse
- #bucket ⇒ Object
- #delete ⇒ Object
-
#file_key_in_format(file_name, format) ⇒ String
Returns the s3 key to be used.
- #files_without_base(files) ⇒ Object
-
#gzipped(file) ⇒ String
Rename file to use .gz extension.
-
#initialize(exporter = nil) ⇒ FileManager
constructor
A new instance of FileManager.
-
#local_file(name) ⇒ String
Path to the local export file.
-
#row_count_threshold(counts) ⇒ Integer
Number of lines per file.
- #s3_client ⇒ Object
- #s3_replication_files ⇒ Object
-
#split_file(name, record_count) ⇒ Object
Splits the CSV into a number of files determined by the number of Redshift Slices.
- #temp_file_name ⇒ Object
-
#upload_csv(files) ⇒ Object
Uploads splitted CSVs.
- #upload_gzip(files) ⇒ Object
-
#write_csv(file_name, records) ⇒ Integer
Writes all results to one file for future splitting.
Constructor Details
#initialize(exporter = nil) ⇒ FileManager
Returns a new instance of FileManager.
20 21 22 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 20 def initialize(exporter = nil) @exporter = exporter end |
Instance Attribute Details
#exporter ⇒ Object (readonly)
Returns the value of attribute exporter.
4 5 6 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 4 def exporter @exporter end |
Class Method Details
.s3_file_key(source_table, file) ⇒ String
File location on s3
16 17 18 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 16 def self.s3_file_key(source_table, file) File.join RailsRedshiftReplicator.s3_bucket_params[:prefix], source_table, file end |
Instance Method Details
#bucket ⇒ Object
24 25 26 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 24 def bucket RailsRedshiftReplicator.s3_bucket_params[:bucket] end |
#delete ⇒ Object
28 29 30 31 32 33 34 35 36 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 28 def delete response = s3_replication_files if response.contents response.contents.each do |file| RailsRedshiftReplicator.logger.info I18n.t(:deleting_file, key: file.key, scope: :rails_redshift_replicator) s3_client.delete_object(bucket: bucket, key: file.key) end end end |
#file_key_in_format(file_name, format) ⇒ String
Returns the s3 key to be used
80 81 82 83 84 85 86 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 80 def file_key_in_format(file_name, format) if format == "gzip" self.class.s3_file_key exporter.source_table, gzipped(file_name) else self.class.s3_file_key exporter.source_table, file_name end end |
#files_without_base(files) ⇒ Object
111 112 113 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 111 def files_without_base(files) files.reject{|f| f.split('.').last.in? %w(gz csv)} end |
#gzipped(file) ⇒ String
Rename file to use .gz extension
90 91 92 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 90 def gzipped(file) file.gsub(".csv", ".gz") end |
#local_file(name) ⇒ String
Path to the local export file
56 57 58 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 56 def local_file(name) @local_file ||= "#{RailsRedshiftReplicator.local_replication_path}/#{name}" end |
#row_count_threshold(counts) ⇒ Integer
Number of lines per file
73 74 75 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 73 def row_count_threshold(counts) (counts.to_f/exporter.replication.slices).ceil end |
#s3_client ⇒ Object
6 7 8 9 10 11 12 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 6 def s3_client @client ||= Aws::S3::Client.new( region: RailsRedshiftReplicator.s3_bucket_params[:region], access_key_id: RailsRedshiftReplicator.aws_credentials[:key], secret_access_key: RailsRedshiftReplicator.aws_credentials[:secret] ) end |
#s3_replication_files ⇒ Object
38 39 40 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 38 def s3_replication_files s3_client.list_objects(bucket: bucket, prefix: exporter.replication.key) end |
#split_file(name, record_count) ⇒ Object
This method requires an executable split and is compliant with Mac and Linux versions of it.
Splits the CSV into a number of files determined by the number of Redshift Slices
64 65 66 67 68 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 64 def split_file(name, record_count) counts = row_count_threshold(record_count) file_name = local_file(name) `#{RailsRedshiftReplicator.split_command} -l #{counts} #{file_name} #{file_name}.` end |
#temp_file_name ⇒ Object
42 43 44 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 42 def temp_file_name "#{exporter.source_table}_#{Time.now.to_i}.csv" end |
#upload_csv(files) ⇒ Object
Uploads splitted CSVs
117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 117 def upload_csv(files) files.each do |file| basename = File.basename(file) next if basename == File.basename(exporter.replication.key) RailsRedshiftReplicator.logger.info I18n.t(:uploading_notice, file: file, key: self.class.s3_file_key(exporter.source_table, basename), scope: :rails_redshift_replicator) s3_client.put_object( key: self.class.s3_file_key(exporter.source_table, basename), body: File.open(file), bucket: bucket ) end files.each { |f| FileUtils.rm f } end |
#upload_gzip(files) ⇒ Object
94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 94 def upload_gzip(files) without_base = files_without_base(files) without_base.each do |file| basename = File.basename(file) command = "#{RailsRedshiftReplicator.gzip_command} -c #{file} > #{gzipped(file)}" RailsRedshiftReplicator.logger.info I18n.t(:gzip_notice, file: file, gzip_file: gzipped(file), command: command, scope: :rails_redshift_replicator) `#{command}` s3_client.put_object( key: self.class.s3_file_key(exporter.source_table, gzipped(basename)), body: File.open(gzipped(file)), bucket: bucket ) end files.each { |f| FileUtils.rm f } without_base.each { |f| FileUtils.rm gzipped(f) } end |
#write_csv(file_name, records) ⇒ Integer
Writes all results to one file for future splitting.
49 50 51 |
# File 'lib/rails_redshift_replicator/file_manager.rb', line 49 def write_csv(file_name, records) line_number = exporter.connection_adapter.write(local_file(file_name), records) end |