Class: RailsRedshiftReplicator::FileManager

Inherits:
Object
  • Object
show all
Defined in:
lib/rails_redshift_replicator/file_manager.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(exporter = nil) ⇒ FileManager

Returns a new instance of FileManager.



20
21
22
# File 'lib/rails_redshift_replicator/file_manager.rb', line 20

def initialize(exporter = nil)
  @exporter = exporter
end

Instance Attribute Details

#exporterObject (readonly)

Returns the value of attribute exporter.



4
5
6
# File 'lib/rails_redshift_replicator/file_manager.rb', line 4

def exporter
  @exporter
end

Class Method Details

.s3_file_key(source_table, file) ⇒ String

File location on s3

Returns:

  • (String)

    file location



16
17
18
# File 'lib/rails_redshift_replicator/file_manager.rb', line 16

def self.s3_file_key(source_table, file)
  File.join RailsRedshiftReplicator.s3_bucket_params[:prefix], source_table, file
end

Instance Method Details

#bucketObject



24
25
26
# File 'lib/rails_redshift_replicator/file_manager.rb', line 24

def bucket
  RailsRedshiftReplicator.s3_bucket_params[:bucket]
end

#deleteObject



28
29
30
31
32
33
34
35
36
# File 'lib/rails_redshift_replicator/file_manager.rb', line 28

def delete
  response = s3_replication_files
  if response.contents
    response.contents.each do |file|
      RailsRedshiftReplicator.logger.info I18n.t(:deleting_file, key: file.key, scope: :rails_redshift_replicator)
      s3_client.delete_object(bucket: bucket, key: file.key)
    end
  end
end

#file_key_in_format(file_name, format) ⇒ String

Returns the s3 key to be used

Returns:

  • (String)

    file key with extension



80
81
82
83
84
85
86
# File 'lib/rails_redshift_replicator/file_manager.rb', line 80

def file_key_in_format(file_name, format)
  if format == "gzip"
    self.class.s3_file_key exporter.source_table, gzipped(file_name)
  else
    self.class.s3_file_key exporter.source_table, file_name
  end
end

#files_without_base(files) ⇒ Object



111
112
113
# File 'lib/rails_redshift_replicator/file_manager.rb', line 111

def files_without_base(files)
  files.reject{|f| f.split('.').last.in? %w(gz csv)}
end

#gzipped(file) ⇒ String

Rename file to use .gz extension

Returns:

  • (String)


90
91
92
# File 'lib/rails_redshift_replicator/file_manager.rb', line 90

def gzipped(file)
  file.gsub(".csv", ".gz")
end

#local_file(name) ⇒ String

Path to the local export file

Parameters:

  • name (String)

    file name

Returns:

  • (String)

    path to file



56
57
58
# File 'lib/rails_redshift_replicator/file_manager.rb', line 56

def local_file(name)
  @local_file ||= "#{RailsRedshiftReplicator.local_replication_path}/#{name}"
end

#row_count_threshold(counts) ⇒ Integer

Number of lines per file

Parameters:

  • counts (Integer)

    number of records

Returns:

  • (Integer)

    Number of lines per export file



73
74
75
# File 'lib/rails_redshift_replicator/file_manager.rb', line 73

def row_count_threshold(counts)
  (counts.to_f/exporter.replication.slices).ceil
end

#s3_clientObject



6
7
8
9
10
11
12
# File 'lib/rails_redshift_replicator/file_manager.rb', line 6

def s3_client
  @client ||= Aws::S3::Client.new(
    region: RailsRedshiftReplicator.s3_bucket_params[:region],
    access_key_id: RailsRedshiftReplicator.aws_credentials[:key],
    secret_access_key: RailsRedshiftReplicator.aws_credentials[:secret]
  )
end

#s3_replication_filesObject



38
39
40
# File 'lib/rails_redshift_replicator/file_manager.rb', line 38

def s3_replication_files
  s3_client.list_objects(bucket: bucket, prefix: exporter.replication.key)
end

#split_file(name, record_count) ⇒ Object

Note:

This method requires an executable split and is compliant with Mac and Linux versions of it.

Splits the CSV into a number of files determined by the number of Redshift Slices

Parameters:

  • name (String)

    file name

  • counts (Integer)

    number of files



64
65
66
67
68
# File 'lib/rails_redshift_replicator/file_manager.rb', line 64

def split_file(name, record_count)
  counts = row_count_threshold(record_count)
  file_name = local_file(name)
  `#{RailsRedshiftReplicator.split_command} -l #{counts} #{file_name} #{file_name}.`
end

#temp_file_nameObject



42
43
44
# File 'lib/rails_redshift_replicator/file_manager.rb', line 42

def temp_file_name
  "#{exporter.source_table}_#{Time.now.to_i}.csv"
end

#upload_csv(files) ⇒ Object

Uploads splitted CSVs

Parameters:

  • files (Array<String>)

    array of files paths to upload



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/rails_redshift_replicator/file_manager.rb', line 117

def upload_csv(files)
  files.each do |file|
    basename = File.basename(file)
    next if basename == File.basename(exporter.replication.key)
    RailsRedshiftReplicator.logger.info I18n.t(:uploading_notice,
                                               file: file,
                                               key: self.class.s3_file_key(exporter.source_table, basename),
                                               scope: :rails_redshift_replicator)
    s3_client.put_object(
      key: self.class.s3_file_key(exporter.source_table, basename),
      body: File.open(file),
      bucket: bucket
    )
  end
  files.each { |f| FileUtils.rm f }
end

#upload_gzip(files) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/rails_redshift_replicator/file_manager.rb', line 94

def upload_gzip(files)
  without_base = files_without_base(files)
  without_base.each do |file|
    basename = File.basename(file)
    command = "#{RailsRedshiftReplicator.gzip_command} -c #{file} > #{gzipped(file)}"
    RailsRedshiftReplicator.logger.info I18n.t(:gzip_notice, file: file, gzip_file: gzipped(file), command: command, scope: :rails_redshift_replicator)
    `#{command}`
    s3_client.put_object(
      key: self.class.s3_file_key(exporter.source_table, gzipped(basename)),
      body: File.open(gzipped(file)),
      bucket: bucket
    )
  end
  files.each { |f| FileUtils.rm f }
  without_base.each { |f| FileUtils.rm gzipped(f) }
end

#write_csv(file_name, records) ⇒ Integer

Writes all results to one file for future splitting.

Parameters:

  • file_name (String)

    name of the local export file

Returns:

  • (Integer)

    number of records to export.



49
50
51
# File 'lib/rails_redshift_replicator/file_manager.rb', line 49

def write_csv(file_name, records)
  line_number = exporter.connection_adapter.write(local_file(file_name), records)
end