Class: ETL::Parser::CsvParser
- Defined in:
- lib/etl/parser/csv_parser.rb
Overview
Parses CSV files
Defined Under Namespace
Classes: Field
Instance Attribute Summary
Attributes inherited from Parser
Instance Method Summary collapse
-
#each ⇒ Object
Returns each row.
-
#fields ⇒ Object
Get an array of defined fields.
- #get_fields_names(file) ⇒ Object
-
#initialize(source, options = {}) ⇒ CsvParser
constructor
Initialize the parser *
source
: The Source object *options
: Hash of options for the parser, defaults to an empty hash.
Methods inherited from Parser
Constructor Details
#initialize(source, options = {}) ⇒ CsvParser
Initialize the parser
-
source
: The Source object -
options
: Hash of options for the parser, defaults to an empty hash
8 9 10 11 |
# File 'lib/etl/parser/csv_parser.rb', line 8 def initialize(source, ={}) super configure end |
Instance Method Details
#each ⇒ Object
Returns each row.
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/etl/parser/csv_parser.rb', line 29 def each Dir.glob(file).each do |file| ETL::Engine.logger.debug "parsing #{file}" if fields.length == 0 ETL::Engine.logger.debug "no columns specified so reading names from first line of #{file}" @fields = get_fields_names(file) end line = 0 lines_skipped = 0 CSV.foreach(file, ) do |raw_row| if lines_skipped < source.skip_lines ETL::Engine.logger.debug "skipping line" lines_skipped += 1 next end line += 1 row = {} validate_row(raw_row, line, file) raw_row.each_with_index do |value, index| f = fields[index] row[f.name] = value end yield row end end end |
#fields ⇒ Object
Get an array of defined fields
57 58 59 |
# File 'lib/etl/parser/csv_parser.rb', line 57 def fields @fields ||= [] end |
#get_fields_names(file) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/etl/parser/csv_parser.rb', line 13 def get_fields_names(file) File.open(file) do |input| fields = CSV.parse(input.readline, ).first new_fields = [] fields.each_with_index do |field,index| # compute the index of occurrence of this specific occurrence of the field (usually, will be 1) occurrence_index = fields[0..index].find_all { |e| e == field }.size number_of_occurrences = fields.find_all { |e| e == field }.size new_field = field + (number_of_occurrences > 1 ? "_#{occurrence_index}" : "") new_fields << Field.new(new_field.to_sym) end return new_fields end end |