Class: DataShift::CsvLoader

Inherits:
LoaderBase show all
Includes:
FileLoader, Logging
Defined in:
lib/datashift/loaders/csv_loader.rb

Instance Attribute Summary

Attributes included from FileLoader

#file_name

Attributes inherited from LoaderBase

#binder, #doc_context, #file_name

Attributes included from Delimiters

#attribute_list_end, #attribute_list_start, #csv_delimiter, #key_value_sep, #text_delim

Instance Method Summary collapse

Methods included from Logging

#logdir, #logdir=, #logger, #verbose

Methods inherited from LoaderBase

#abort_on_failure?, #bind_headers, #configure_from, #load_object_class, #report, #reset, #run, #set_headers, #setup_load_class

Methods included from Querying

#find_or_new, #get_record_by, #get_record_by!, #search_for_record, where_field_and_values

Methods included from Delimiters

#column_delim, #column_delim=, #eol, #multi_assoc_delim, #multi_assoc_delim=, #multi_facet_delim, #multi_value_delim, #multi_value_delim=, #name_value_delim, #name_value_delim=, #setmulti_facet_delim

Constructor Details

#initializeCsvLoader

Returns a new instance of CsvLoader.



18
19
20
# File 'lib/datashift/loaders/csv_loader.rb', line 18

def initialize
  super
end

Instance Method Details

#perform_load(_options = {}) ⇒ Object

Options

[:allow_empty_rows]  : Default is to stop processing once we hit a completely empty row. Over ride.
                       WARNING maybe slow, as will process all rows as defined by Excel

[:dummy]           : Perform a dummy run - attempt to load everything but then roll back


30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/datashift/loaders/csv_loader.rb', line 30

def perform_load( _options = {} )
  require 'csv'

  raise "Cannot load - failed to create a #{klass}" unless load_object

  logger.info "Starting bulk load from CSV : #{file_name}"

  # TODO: - can we abstract out what a 'parsed file' is - headers plus value of each node
  # so a common object can represent excel,csv etc
  # then  we can make load() more generic

  parsed_file = CSV.read(file_name)

  # assume headers are row 0
  header_idx = 0
  header_row = parsed_file.shift

  set_headers( DataShift::Headers.new(:csv, header_idx, header_row) )

  # maps list of headers into suitable calls on the Active Record class
  bind_headers(headers)

  begin
    puts 'Dummy Run - Changes will be rolled back' if(DataShift::Configuration.call.dummy_run)

    load_object_class.transaction do
      logger.info "Processing #{parsed_file.size} rows"

      parsed_file.each_with_index do |row, i|

        logger.info "Processing Row #{i} : #{row}"

        # Iterate over the bindings, creating a context from data in associated Excel column

        @binder.bindings.each_with_index do |method_binding, i|

          unless method_binding.valid?
            logger.warn("No binding was found for column (#{i}) [#{method_binding.pp}]")
            next
          end

          # If binding to a column, get the value from the cell (bindings can be to internal methods)
          value = method_binding.index ? row[method_binding.index] : nil

          context = doc_context.create_node_context(method_binding, i, value)

          logger.info "Processing Column #{method_binding.index} (#{method_binding.pp})"

          begin
            context.process
          rescue StandardError => x
            if doc_context.all_or_nothing?
              logger.error('Complete Row aborted - All or nothing set and Current Column failed.')
              logger.error(x.backtrace.first.inspect)
              logger.error(x.inspect)
              break
            end
          end
        end # end of each column(node)

        doc_context.save_and_monitor_progress

        doc_context.reset unless doc_context.node_context.next_update?
      end # all rows processed

      if(DataShift::Configuration.call.dummy_run)
        puts 'CSV loading stage done - Dummy run so Rolling Back.'
        raise ActiveRecord::Rollback # Don't actually create/upload to DB if we are doing dummy run
      end
    end # TRANSACTION N.B ActiveRecord::Rollback does not propagate outside of the containing transaction block
  rescue StandardError => e
    puts "ERROR: CSV loading failed : #{e.inspect}"
    raise e
  ensure
    report
  end

  puts 'CSV loading stage Complete.'
end