Class: ETL::Processor::BulkImportProcessor
- Defined in:
- lib/etl/processor/bulk_import_processor.rb
Overview
Processor which is used to bulk import data into a target database. The underlying database driver from ActiveRecord must support the methods bulk_load
method.
Instance Attribute Summary collapse
-
#columns ⇒ Object
readonly
Array of symbols representing the column load order.
-
#field_enclosure ⇒ Object
The field enclosure (defaults to nil).
-
#field_separator ⇒ Object
The field separator (defaults to a comma).
-
#file ⇒ Object
readonly
The file to load from.
-
#line_separator ⇒ Object
The line separator (defaults to a newline).
-
#null_string ⇒ Object
The string that indicates a NULL (defaults to an empty string).
-
#table ⇒ Object
readonly
The table name.
-
#target ⇒ Object
readonly
The target database.
-
#truncate ⇒ Object
readonly
Set to true to truncate.
Instance Method Summary collapse
-
#initialize(control, configuration) ⇒ BulkImportProcessor
constructor
Initialize the processor.
-
#process ⇒ Object
Execute the processor.
- #table_name ⇒ Object
Constructor Details
#initialize(control, configuration) ⇒ BulkImportProcessor
Initialize the processor.
Configuration options:
-
:file
: The file to load data from -
:target
: The target database -
:table
: The table name -
:truncate
: Set to true to truncate before loading -
:columns
: The columns to load in the order they appear in the bulk data file -
:field_separator
: The field separator. Defaults to a comma -
:line_separator
: The line separator. Defaults to a newline -
:field_enclosure
: The field enclosure charcaters
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 39 def initialize(control, configuration) super @file = File.join(File.dirname(control.file), configuration[:file]) @target = configuration[:target] @table = configuration[:table] @truncate = configuration[:truncate] ||= false @columns = configuration[:columns] @field_separator = (configuration[:field_separator] || ',') @line_separator = (configuration[:line_separator] || "\n") @null_string = (configuration[:null_string] || "") @field_enclosure = configuration[:field_enclosure] raise ControlError, "Target must be specified" unless @target raise ControlError, "Table must be specified" unless @table end |
Instance Attribute Details
#columns ⇒ Object (readonly)
Array of symbols representing the column load order
17 18 19 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 17 def columns @columns end |
#field_enclosure ⇒ Object
The field enclosure (defaults to nil)
21 22 23 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 21 def field_enclosure @field_enclosure end |
#field_separator ⇒ Object
The field separator (defaults to a comma)
19 20 21 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 19 def field_separator @field_separator end |
#file ⇒ Object (readonly)
The file to load from
9 10 11 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 9 def file @file end |
#line_separator ⇒ Object
The line separator (defaults to a newline)
23 24 25 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 23 def line_separator @line_separator end |
#null_string ⇒ Object
The string that indicates a NULL (defaults to an empty string)
25 26 27 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 25 def null_string @null_string end |
#table ⇒ Object (readonly)
The table name
13 14 15 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 13 def table @table end |
#target ⇒ Object (readonly)
The target database
11 12 13 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 11 def target @target end |
#truncate ⇒ Object (readonly)
Set to true to truncate
15 16 17 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 15 def truncate @truncate end |
Instance Method Details
#process ⇒ Object
Execute the processor
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/etl/processor/bulk_import_processor.rb', line 56 def process return if ETL::Engine.skip_bulk_import return if File.size(file) == 0 conn = ETL::Engine.connection(target) conn.transaction do conn.truncate(table_name) if truncate = {} [:columns] = columns if field_separator || field_enclosure || line_separator || null_string [:fields] = {} [:fields][:null_string] = null_string if null_string [:fields][:delimited_by] = field_separator if field_separator [:fields][:enclosed_by] = field_enclosure if field_enclosure [:fields][:terminated_by] = line_separator if line_separator end conn.bulk_load(file, table_name, ) end end |