Module: DarwinCore::Ingester
Instance Attribute Summary collapse
-
#data ⇒ Object
readonly
Returns the value of attribute data.
-
#encoding ⇒ Object
readonly
Returns the value of attribute encoding.
-
#fields ⇒ Object
readonly
Returns the value of attribute fields.
-
#fields_separator ⇒ Object
readonly
Returns the value of attribute fields_separator.
-
#file_path ⇒ Object
readonly
Returns the value of attribute file_path.
-
#ignore_headers ⇒ Object
readonly
Returns the value of attribute ignore_headers.
-
#line_separator ⇒ Object
readonly
Returns the value of attribute line_separator.
-
#properties ⇒ Object
readonly
Returns the value of attribute properties.
-
#quote_character ⇒ Object
readonly
Returns the value of attribute quote_character.
-
#size ⇒ Object
readonly
Returns the value of attribute size.
Instance Method Summary collapse
Instance Attribute Details
#data ⇒ Object (readonly)
Returns the value of attribute data.
4 5 6 |
# File 'lib/dwc-archive/ingester.rb', line 4 def data @data end |
#encoding ⇒ Object (readonly)
Returns the value of attribute encoding.
4 5 6 |
# File 'lib/dwc-archive/ingester.rb', line 4 def encoding @encoding end |
#fields ⇒ Object (readonly)
Returns the value of attribute fields.
5 6 7 |
# File 'lib/dwc-archive/ingester.rb', line 5 def fields @fields end |
#fields_separator ⇒ Object (readonly)
Returns the value of attribute fields_separator.
4 5 6 |
# File 'lib/dwc-archive/ingester.rb', line 4 def fields_separator @fields_separator end |
#file_path ⇒ Object (readonly)
Returns the value of attribute file_path.
5 6 7 |
# File 'lib/dwc-archive/ingester.rb', line 5 def file_path @file_path end |
#ignore_headers ⇒ Object (readonly)
Returns the value of attribute ignore_headers.
5 6 7 |
# File 'lib/dwc-archive/ingester.rb', line 5 def ignore_headers @ignore_headers end |
#line_separator ⇒ Object (readonly)
Returns the value of attribute line_separator.
5 6 7 |
# File 'lib/dwc-archive/ingester.rb', line 5 def line_separator @line_separator end |
#properties ⇒ Object (readonly)
Returns the value of attribute properties.
4 5 6 |
# File 'lib/dwc-archive/ingester.rb', line 4 def properties @properties end |
#quote_character ⇒ Object (readonly)
Returns the value of attribute quote_character.
5 6 7 |
# File 'lib/dwc-archive/ingester.rb', line 5 def quote_character @quote_character end |
#size ⇒ Object (readonly)
Returns the value of attribute size.
4 5 6 |
# File 'lib/dwc-archive/ingester.rb', line 4 def size @size end |
Instance Method Details
#read(batch_size = 10000) {|[res, errors]| ... } ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/dwc-archive/ingester.rb', line 12 def read(batch_size = 10000) DarwinCore.logger_write(@dwc.object_id, "Reading %s data" % name) res = [] errors = [] index_fix = 1 args = {:col_sep => @field_separator} @quote_character = "\b" if @quote_character.empty? args.merge!({:quote_char => @quote_character}) min_size = @fields.map {|f| f[:index].to_i || 0}.sort[-1] + 1 csv = CSV.new(open(@file_path), args) csv.each_with_index do |r, i| index_fix = 0; next if @ignore_headers && i == 0 min_size > r.size ? errors << r : process_csv_row(res, errors, r) if (i + index_fix) % batch_size == 0 DarwinCore.logger_write(@dwc.object_id, "Ingested %s records from %s" % [(i + index_fix), name]) if block_given? yield [res, errors] res = [] errors = [] end end end yield [res, errors] if block_given? [res, errors] end |