Class: ETL::Processor::EncodeProcessor

Inherits:
Processor show all
Defined in:
lib/etl/processor/encode_processor.rb

Overview

The encode processor uses Iconv to convert a file from one encoding (eg: utf-8) to another (eg: latin1), line by line.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(control, configuration) ⇒ EncodeProcessor

Initialize the processor.

Configuration options:

  • :source_file: The file to load data from

  • :source_encoding: The source file encoding (eg: ‘latin1’,‘utf-8’), as supported by Iconv

  • :target_file: The file to write data to

  • :target_encoding: The target file encoding

Raises:



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/etl/processor/encode_processor.rb', line 22

def initialize(control, configuration)
  super
  raise ControlError, "Source file must be specified" if configuration[:source_file].nil?
  raise ControlError, "Target file must be specified" if configuration[:target_file].nil?
  @source_file = File.join(File.dirname(control.file), configuration[:source_file])
  @source_encoding = configuration[:source_encoding]
  @target_file = File.join(File.dirname(control.file), configuration[:target_file])
  @target_encoding = configuration[:target_encoding]
  raise ControlError, "Source and target file cannot currently point to the same file" if source_file == target_file
  begin
    @iconv = Iconv.new(target_encoding,source_encoding)
  rescue Iconv::InvalidEncoding
    raise ControlError, "Either the source encoding '#{source_encoding}' or the target encoding '#{target_encoding}' is not supported"
  end
end

Instance Attribute Details

#source_encodingObject (readonly)

The source file encoding



11
12
13
# File 'lib/etl/processor/encode_processor.rb', line 11

def source_encoding
  @source_encoding
end

#source_fileObject (readonly)

The file to load from



7
8
9
# File 'lib/etl/processor/encode_processor.rb', line 7

def source_file
  @source_file
end

#target_encodingObject (readonly)

The target file encoding



13
14
15
# File 'lib/etl/processor/encode_processor.rb', line 13

def target_encoding
  @target_encoding
end

#target_fileObject (readonly)

The file to write to



9
10
11
# File 'lib/etl/processor/encode_processor.rb', line 9

def target_file
  @target_file
end

Instance Method Details

#processObject

Execute the processor



39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/etl/processor/encode_processor.rb', line 39

def process
  # operate line by line to handle large files without loading them in-memory
  # could be replaced by a system iconv call when available, for greater performance
  File.open(source_file) do |source|
    #puts "Opening #{target_file}"
    File.open(target_file,'w') do |target|
      source.each_line do |line|
        target << @iconv.iconv(line)
      end
    end
  end
end