Class: RedshiftETL::Extract::DatabaseFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/redshift_etl/extract/database_fetcher.rb

Overview

Copies data from a database in CSV format.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ DatabaseFetcher

Returns a new instance of DatabaseFetcher.



8
9
10
# File 'lib/redshift_etl/extract/database_fetcher.rb', line 8

def initialize(config)
  @config = config
end

Instance Attribute Details

#configObject (readonly)

Returns the value of attribute config.



7
8
9
# File 'lib/redshift_etl/extract/database_fetcher.rb', line 7

def config
  @config
end

Instance Method Details

#dataset_to_copy(db) ⇒ Object



33
34
35
36
37
38
39
40
# File 'lib/redshift_etl/extract/database_fetcher.rb', line 33

def dataset_to_copy(db)
  table = db[config.table_name.to_sym]
  # TODO truncate to 65k for varchar columns
  # TODO maybe later, if citext, convert to lower(column)
  columns_to_copy = config.columns.keys.map(&:to_sym)

  table.select(*columns_to_copy)
end

#linesObject



12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/redshift_etl/extract/database_fetcher.rb', line 12

def lines
  sequel do |db|
    db.copy_table(dataset_to_copy(db), format: :csv, options: 'header') do |line|
      begin
        yield line
      rescue # copy_table rescues errors, hard to debug without rescuing them ourselves.
        puts $!.inspect
        puts $!.backtrace
        raise
      end
    end
  end
end

#sequel(&block) ⇒ Object



26
27
28
29
30
31
# File 'lib/redshift_etl/extract/database_fetcher.rb', line 26

def sequel(&block)
  Sequel.connect(config.connection_string) do |db|
    db << "set time zone 'UTC'"
    block.call(db)
  end
end