Class: ETL::Parser::DelimitedParser

Inherits:

Parser

Object
Parser
ETL::Parser::DelimitedParser

show all

Defined in:: lib/etl/parser/delimited_parser.rb

Overview

Parses delimited files

Defined Under Namespace

Classes: Field

Instance Attribute Summary

Attributes inherited from Parser

#options, #source

Instance Method Summary collapse

#each ⇒ Object

Returns each row.
#fields ⇒ Object

Get an array of defined fields.
#get_fields_names(file) ⇒ Object
#initialize(source, options = {}) ⇒ DelimitedParser constructor

Initialize the parser * source: The Source object * options: Hash of options for the parser, defaults to an empty hash.

Methods inherited from Parser

class_for_name

Constructor Details

#initialize(source, options = {}) ⇒ `DelimitedParser`

Initialize the parser

source: The Source object
options: Hash of options for the parser, defaults to an empty hash

# File 'lib/etl/parser/delimited_parser.rb', line 8

def initialize(source, options={})
  super
  configure
end

Instance Method Details

#each ⇒ `Object`

Returns each row.

# File 'lib/etl/parser/delimited_parser.rb', line 29

def each
  Dir.glob(file).each do |file|
    ETL::Engine.logger.debug "parsing #{file}"
    if fields.length == 0
      ETL::Engine.logger.debug "no columns specified so reading names from first line of #{file}"
      @fields = get_fields_names(file)
    end
    line = 0
    lines_skipped = 0
    FasterCSV.foreach(file, options) do |raw_row|
      if lines_skipped < source.skip_lines
        ETL::Engine.logger.debug "skipping line"
        lines_skipped += 1
        next
      end
      line += 1
      row = {}
      validate_row(raw_row, line, file)
      raw_row.each_with_index do |value, index|
        f = fields[index]
        row[f.name] = value
      end
      yield row
    end
  end
end

#fields ⇒ `Object`

Get an array of defined fields



57
58
59

# File 'lib/etl/parser/delimited_parser.rb', line 57

def fields
  @fields ||= []
end

#get_fields_names(file) ⇒ `Object`

# File 'lib/etl/parser/delimited_parser.rb', line 13

def get_fields_names(file)
  File.open(file) do |input|
    fields = FasterCSV.parse(input.readline).first
    new_fields = []
    fields.each_with_index do |field,index|
      # compute the index of occurrence of this specific occurrence of the field (usually, will be 1)
      occurrence_index = fields[0..index].find_all { |e| e == field }.size
      number_of_occurrences = fields.find_all { |e| e == field }.size
      new_field = field + (number_of_occurrences > 1 ? "_#{occurrence_index}" : "")
      new_fields << Field.new(new_field.to_sym)
    end
    return new_fields
  end
end

Class: ETL::Parser::DelimitedParser

Overview

Defined Under Namespace

Instance Attribute Summary

Attributes inherited from Parser

Instance Method Summary collapse

Methods inherited from Parser

Constructor Details

#initialize(source, options = {}) ⇒ DelimitedParser

Instance Method Details

#each ⇒ Object

#fields ⇒ Object

#get_fields_names(file) ⇒ Object

#initialize(source, options = {}) ⇒ `DelimitedParser`

#each ⇒ `Object`

#fields ⇒ `Object`

#get_fields_names(file) ⇒ `Object`