Class: ETL::Parser::DelimitedParser

Inherits:
Parser
  • Object
show all
Defined in:
lib/etl/parser/delimited_parser.rb

Overview

Parses delimited files

Defined Under Namespace

Classes: Field

Instance Attribute Summary

Attributes inherited from Parser

#options, #source

Instance Method Summary collapse

Methods inherited from Parser

class_for_name

Constructor Details

#initialize(source, options = {}) ⇒ DelimitedParser

Initialize the parser

  • source: The Source object

  • options: Hash of options for the parser, defaults to an empty hash



8
9
10
11
# File 'lib/etl/parser/delimited_parser.rb', line 8

def initialize(source, options={})
  super
  configure
end

Instance Method Details

#eachObject

Returns each row.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/etl/parser/delimited_parser.rb', line 29

def each
  Dir.glob(file).each do |file|
    ETL::Engine.logger.debug "parsing #{file}"
    if fields.length == 0
      ETL::Engine.logger.debug "no columns specified so reading names from first line of #{file}"
      @fields = get_fields_names(file)
    end
    line = 0
    lines_skipped = 0
    FasterCSV.foreach(file, options) do |raw_row|
      if lines_skipped < source.skip_lines
        ETL::Engine.logger.debug "skipping line"
        lines_skipped += 1
        next
      end
      line += 1
      row = {}
      validate_row(raw_row, line, file)
      raw_row.each_with_index do |value, index|
        f = fields[index]
        row[f.name] = value
      end
      yield row
    end
  end
end

#fieldsObject

Get an array of defined fields



57
58
59
# File 'lib/etl/parser/delimited_parser.rb', line 57

def fields
  @fields ||= []
end

#get_fields_names(file) ⇒ Object



13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/etl/parser/delimited_parser.rb', line 13

def get_fields_names(file)
  File.open(file) do |input|
    fields = FasterCSV.parse(input.readline).first
    new_fields = []
    fields.each_with_index do |field,index|
      # compute the index of occurrence of this specific occurrence of the field (usually, will be 1)
      occurrence_index = fields[0..index].find_all { |e| e == field }.size
      number_of_occurrences = fields.find_all { |e| e == field }.size
      new_field = field + (number_of_occurrences > 1 ? "_#{occurrence_index}" : "")
      new_fields << Field.new(new_field.to_sym)
    end
    return new_fields
  end
end