Module: RailsRedshiftReplicator

Defined in:
lib/rails_redshift_replicator.rb,
lib/rails_redshift_replicator/engine.rb,
lib/rails_redshift_replicator/deleter.rb,
lib/rails_redshift_replicator/rlogger.rb,
lib/rails_redshift_replicator/version.rb,
lib/rails_redshift_replicator/replicable.rb,
lib/rails_redshift_replicator/file_manager.rb,
lib/rails_redshift_replicator/tools/vacuum.rb,
lib/rails_redshift_replicator/tools/analyze.rb,
lib/rails_redshift_replicator/exporters/base.rb,
lib/rails_redshift_replicator/importers/base.rb,
lib/rails_redshift_replicator/adapters/mysql2.rb,
lib/rails_redshift_replicator/adapters/sqlite.rb,
lib/rails_redshift_replicator/model/extension.rb,
lib/rails_redshift_replicator/adapters/generic.rb,
app/models/rails_redshift_replicator/replication.rb,
lib/rails_redshift_replicator/adapters/postgresql.rb,
lib/rails_redshift_replicator/exporters/full_replicator.rb,
lib/rails_redshift_replicator/importers/full_replicator.rb,
app/helpers/rails_redshift_replicator/application_helper.rb,
lib/rails_redshift_replicator/exporters/timed_replicator.rb,
lib/rails_redshift_replicator/importers/timed_replicator.rb,
lib/generators/rails_redshift_replicator/install_generator.rb,
lib/rails_redshift_replicator/exporters/identity_replicator.rb,
lib/rails_redshift_replicator/importers/identity_replicator.rb,
app/controllers/rails_redshift_replicator/application_controller.rb

Defined Under Namespace

Modules: Adapters, ApplicationHelper, Exporters, Generators, Importers, Model, Tools Classes: ApplicationController, Deleter, Engine, FileManager, RLogger, Replicable, Replication

Constant Summary collapse

VERSION =
"0.0.1"

Class Method Summary collapse

Class Method Details

.add_replicable(hash) ⇒ Object



126
127
128
129
# File 'lib/rails_redshift_replicator.rb', line 126

def add_replicable(hash)
  logger.debug I18n.t(:replicable_added, table_name: hash.keys.first, scope: :rails_redshift_replicator) 
  RailsRedshiftReplicator.replicables.merge! hash
end

.analyze(*args) ⇒ Object



176
177
178
# File 'lib/rails_redshift_replicator.rb', line 176

def analyze(*args)
  Tools::Analyze.new(*args).perform
end

.base_exporter_typesObject

Lists exporters names



181
182
183
184
185
186
187
# File 'lib/rails_redshift_replicator.rb', line 181

def base_exporter_types
  [
    'identity_replicator',
    'timed_replicator',
    'full_replicator'
  ]
end

.check_args(tables) ⇒ Object



164
165
166
167
168
169
170
# File 'lib/rails_redshift_replicator.rb', line 164

def check_args(tables)
  if tables == []
    error_message = I18n.t(:must_specify_tables, scope: :rails_redshift_replicator)
    logger.error error_message
    raise StandardError.new(error_message)
  end
end

.connectionPG::Connection

Redshift connection

Returns:

  • (PG::Connection)


224
225
226
# File 'lib/rails_redshift_replicator.rb', line 224

def connection
  @redshift ||= PG.connect(redshift_connection_params)
end

.debug_mode=(value) ⇒ Object



112
113
114
115
# File 'lib/rails_redshift_replicator.rb', line 112

def debug_mode=(value)
  logger.level = value == true ? Logger::DEBUG : Logger::WARN
  @@debug_mode = value
end

.define_defaultsObject Also known as: reload

Note:

Useful for testing



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/rails_redshift_replicator.rb', line 34

def define_defaults
  @@replicables = {}.with_indifferent_access
  @@logger = RLogger.new(STDOUT).tap{ |l| l.level = Logger::WARN }
  
  # Connection parameters for Redshift. Defaults to environment variables.
  @@redshift_connection_params = {
    host: ENV['RRR_REDSHIFT_HOST'],
    dbname: ENV['RRR_REDSHIFT_DATABASE'],
    port: ENV['RRR_REDSHIFT_PORT'],
    user: ENV['RRR_REDSHIFT_USER'],
    password: ENV['RRR_REDSHIFT_PASSWORD']
  }

  # AWS S3 Replication bucket credentials. Defaults to environment variables.
  @@aws_credentials = {
    key: ENV['RRR_AWS_ACCESS_KEY_ID'],
    secret: ENV['RRR_AWS_SECRET_ACCESS_KEY']
  }

  # AWS S3 replication bucket parameters.
  # region defaults to environment variable or US East (N. Virginia)
  # bucket defaults to environment variable
  @@s3_bucket_params = {
    region: (ENV['RRR_REPLICATION_REGION'] || 'us-east-1'),
    bucket: ENV['RRR_REPLICATION_BUCKET'],
    prefix: ENV['RRR_REPLICATION_PREFIX']
  }

  # see [http://docs.aws.amazon.com/redshift/latest/dg/r_COPY.html]
  # You can add other keys aside from changing these.
  # The keys won't be used on the copy commands. Just their values.
  # To remove one of the defaults, set it to nil.
  # @example:
  #   @@copy_options = {
  #     statupdate: nil,
  #   }
  @@copy_options = {
    statupdate: 'STATUPDATE TRUE',
    acceptinvchars: 'ACCEPTINVCHARS',
    empty: 'EMPTYASNULL',
    truncate: 'TRUNCATECOLUMNS'
  }

  # Number of slices available on Redshift cluster. Used to split export files. Defaults to 1.
  # see [http://docs.aws.amazon.com/redshift/latest/dg/t_splitting-data-files.html]
  @@redshift_slices = 1

  # Folder to store temporary replication files until the S3 upload. Defaults to /tmp
  @@local_replication_path = '/tmp'

  # Command or path to executable that splits files
  @@split_command = 'split'

  # Command or path to executable that compresses files to gzip
  @@gzip_command = 'gzip'

  # Enable debug mode to output messages to STDOUT. Default to false
  @@debug_mode = false

  # Defines how many replication records are kept in history. Default to nil keeping full history.
  @@history_cap = nil

  # Preferred format for export file
  @@preferred_format = 'csv'

  # Maximum number of retries for a replication before cancelling and starting another
  @@max_retries = nil

  # If deletes should be tracked and propagated to redshift
  @@enable_delete_tracking = false

  # If exported files on s3 should be deleted after imported
  @@delete_s3_file_after_import = true

  return nil
end

.export(*tables) ⇒ Object

See Also:



153
154
155
156
# File 'lib/rails_redshift_replicator.rb', line 153

def export(*tables)
  check_args(tables)
  replicable_definitions(tables_to_perform(tables)).each { |_, replicable| replicable.export }
end

.history_cap=(value) ⇒ Object



117
118
119
# File 'lib/rails_redshift_replicator.rb', line 117

def history_cap=(value)
  @@history_cap = value && [value,2].max
end

.import(*tables) ⇒ Object

See Also:



159
160
161
162
# File 'lib/rails_redshift_replicator.rb', line 159

def import(*tables)
  check_args(tables)
  replicable_definitions(tables_to_perform(tables)).each { |_, replicable| replicable.import }
end

.reload_replicablesObject



131
132
133
134
135
136
# File 'lib/rails_redshift_replicator.rb', line 131

def reload_replicables
  replicables = {}
  replicables.each do |name, replicable|
    add_replicable(name => RailsRedshiftReplicator::Replicable.new(replicable.replication_type, replicable.options))
  end
end

.replicable_definitions(tables) ⇒ Object



201
202
203
# File 'lib/rails_redshift_replicator.rb', line 201

def replicable_definitions(tables)
  RailsRedshiftReplicator.replicables.select { |k,_| k.to_s.in? tables.map(&:to_s) }
end

.replicable_tablesArray<String>

All replicable tables registered in RailsRedshiftReplicator eighter from the model or directly.

Returns:

  • (Array<String>)

    tables



192
193
194
# File 'lib/rails_redshift_replicator.rb', line 192

def replicable_tables
  RailsRedshiftReplicator.replicables.keys.map(&:to_s)
end

.replicable_target_tablesObject



196
197
198
# File 'lib/rails_redshift_replicator.rb', line 196

def replicable_target_tables
  RailsRedshiftReplicator.replicables.map{ |k,v| v[:target_table] }
end

.replicate(*tables) ⇒ Object

Performs full replication (export + import)

Examples:

Replicate user and post models.

RedshiftReplicator.replicate(:user, :publication)

Replicate all models

RedshiftReplicator.replicate(:all)

Parameters:

  • models (Array<Symbol>, Argument list)

    activerecord models to export or :all



144
145
146
147
148
149
150
# File 'lib/rails_redshift_replicator.rb', line 144

def replicate(*tables)
  check_args(tables)
  replicable_definitions(tables_to_perform(tables)).each do |_, replicable|
    replication = replicable.export
    replicable.import
  end
end

.setup {|_self| ... } ⇒ RedshiftReplicator

Yields:

  • (_self)

Yield Parameters:

Returns:

  • (RedshiftReplicator)


122
123
124
# File 'lib/rails_redshift_replicator.rb', line 122

def setup
  yield self
end

.tables_to_perform(tables) ⇒ Object

Returns tables to export. :all selects all eligible



207
208
209
210
211
212
213
214
215
216
# File 'lib/rails_redshift_replicator.rb', line 207

def tables_to_perform(tables)
  tables = Array(tables).map(&:to_s)
  if tables[0] == 'all'
    replicable_tables
  else
    (replicable_tables & tables).tap do |selected|
      warn_if_unreplicable tables-selected
    end
  end
end

.vacuum(*args) ⇒ Object



172
173
174
# File 'lib/rails_redshift_replicator.rb', line 172

def vacuum(*args)
  Tools::Vacuum.new(*args).perform
end

.warn_if_unreplicable(tables) ⇒ Object



218
219
220
# File 'lib/rails_redshift_replicator.rb', line 218

def warn_if_unreplicable(tables)
  tables.each { |table| logger.warn I18n.t(:table_not_replicable, table_name: table, scope: :rails_redshift_replicator) }
end