Module: DarwinCore::Ingester

Included in:
Core, Extension
Defined in:
lib/dwc_archive/ingester.rb

Overview

This module abstracts information for reading csv file to be used in several classes which need such functionality

Instance Attribute Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#dataObject (readonly)

Returns the value of attribute data


6
7
8
# File 'lib/dwc_archive/ingester.rb', line 6

def data
  @data
end

#encodingObject (readonly)

Returns the value of attribute encoding


6
7
8
# File 'lib/dwc_archive/ingester.rb', line 6

def encoding
  @encoding
end

#fieldsObject (readonly)

Returns the value of attribute fields


7
8
9
# File 'lib/dwc_archive/ingester.rb', line 7

def fields
  @fields
end

#fields_separatorObject (readonly)

Returns the value of attribute fields_separator


6
7
8
# File 'lib/dwc_archive/ingester.rb', line 6

def fields_separator
  @fields_separator
end

#file_pathObject (readonly)

Returns the value of attribute file_path


7
8
9
# File 'lib/dwc_archive/ingester.rb', line 7

def file_path
  @file_path
end

#ignore_headersObject (readonly)

Returns the value of attribute ignore_headers


7
8
9
# File 'lib/dwc_archive/ingester.rb', line 7

def ignore_headers
  @ignore_headers
end

#line_separatorObject (readonly)

Returns the value of attribute line_separator


7
8
9
# File 'lib/dwc_archive/ingester.rb', line 7

def line_separator
  @line_separator
end

#propertiesObject (readonly)

Returns the value of attribute properties


6
7
8
# File 'lib/dwc_archive/ingester.rb', line 6

def properties
  @properties
end

#quote_characterObject (readonly)

Returns the value of attribute quote_character


7
8
9
# File 'lib/dwc_archive/ingester.rb', line 7

def quote_character
  @quote_character
end

#sizeObject (readonly)

Returns the value of attribute size


6
7
8
# File 'lib/dwc_archive/ingester.rb', line 6

def size
  @size
end

Instance Method Details

#read(batch_size = 10_000) {|[res, errors]| ... } ⇒ Object

Yields:

  • ([res, errors])

14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/dwc_archive/ingester.rb', line 14

def read(batch_size = 10_000)
  DarwinCore.logger_write(@dwc.object_id, "Reading #{name} data")
  res = []
  errors = []
  args = define_csv_args
  min_size = @fields.map { |f| f[:index].to_i || 0 }.sort[-1] + 1
  csv = CSV.new(open(@file_path), args)
  csv.each_with_index do |r, i|
    next if @ignore_headers && i == 0
    min_size > r.size ? errors << r : process_csv_row(res, errors, r)
    next if i == 0 || i % batch_size != 0
    DarwinCore.logger_write(@dwc.object_id,
                            format("Ingested %s records from %s",
                                   i, name))
    next unless block_given?
    yield [res, errors]
    res = []
    errors = []
  end
  yield [res, errors] if block_given?
  [res, errors]
end