Module: DataShift::ExcelLoading

Includes:
ExcelBase
Included in:
ExcelLoader
Defined in:
lib/loaders/excel_loader.rb

Instance Attribute Summary collapse

Attributes included from ExcelBase

#excel_headers, #header_row_index

Instance Method Summary collapse

Methods included from ExcelBase

#ar_to_headers, #ar_to_xls, #ar_to_xls_cell, #ar_to_xls_row, #parse_headers, #sanitize_sheet_name

Instance Attribute Details

#contains_dataObject (readonly)

Currently struggling to determine the ‘end’ of data in a spreadsheet this reflects if current row had any data at all



30
31
32
# File 'lib/loaders/excel_loader.rb', line 30

def contains_data
  @contains_data
end

#excelObject

Returns the value of attribute excel.



26
27
28
# File 'lib/loaders/excel_loader.rb', line 26

def excel
  @excel
end

Instance Method Details

#perform_excel_load(file_name, options = {}) ⇒ Object

Options:

 [:dummy]           : Perform a dummy run - attempt to load everything but then roll back

 [:sheet_number]    : Default is 0. The index of the Excel Worksheet to use.
 [:header_row]      : Default is 0. Use alternative row as header definition.

Options passed through  to :  populate_method_mapper_from_headers

 [:mandatory]       : Array of mandatory column names
 [:force_inclusion] : Array of inbound column names to force into mapping
 [:include_all]     : Include all headers in processing - takes precedence of :force_inclusion
 [:strict]          : Raise exception when no mapping found for a column heading (non mandatory)


69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/loaders/excel_loader.rb', line 69

def perform_excel_load( file_name, options = {} )

  raise MissingHeadersError, "Minimum row for Headers is 0 - passed #{options[:header_row]}" if(options[:header_row] && options[:header_row].to_i < 0)

  start_excel(file_name, options)

  begin
    puts "Dummy Run - Changes will be rolled back" if options[:dummy]

    load_object_class.transaction do

      @sheet.each_with_index do |row, i|

        current_row_idx = i
        @current_row = row

        next if(current_row_idx == header_row_index)

        # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
        # (TODO - write spec to process .xls with a huge number of rows)
        #
        # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
        # got no better idea than ending once we hit the first completely empty row
        break if(@current_row.nil? || @current_row.compact.empty?)

        logger.info "Processing Row #{current_row_idx} : #{@current_row}"

        @contains_data = false

        begin

          process_excel_row(row)

          # This is rubbish but currently have to manually detect when actual data ends,
          # no other way to detect when we hit the first completely empty row
          break unless(contains_data == true)

        rescue => e
          process_excel_failure(e, true)

          # don't forget to reset the load object
          new_load_object
          next
        end

        break unless(contains_data == true)

        # currently here as we can only identify the end of a speadsheet by first empty row
        @reporter.processed_object_count += 1

        # TODO - make optional -  all or nothing or carry on and dump out the exception list at end

        save_and_report

        # don't forget to reset the object or we'll update rather than create
        new_load_object

      end   # all rows processed

      if(options[:dummy])
        puts "Excel loading stage complete - Dummy run so Rolling Back."
        raise ActiveRecord::Rollback # Don't actually create/upload to DB if we are doing dummy run
      end

    end   # TRANSACTION N.B ActiveRecord::Rollback does not propagate outside of the containing transaction block

  rescue => e
    puts "ERROR: Excel loading failed : #{e.inspect}"
    raise e
  ensure
    report
  end

end

#process_excel_failure(e, delete_object = true) ⇒ Object



144
145
146
147
148
149
150
151
152
153
154
# File 'lib/loaders/excel_loader.rb', line 144

def process_excel_failure( e, delete_object = true)
  failure(@current_row, delete_object)

  if(verbose)
    puts "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
    puts e.backtrace
  end

  logger.error  "perform_excel_load failed in row [#{current_row_idx}] #{@current_row} - #{e.message} :"
  logger.error e.backtrace.join("\n")
end

#process_excel_row(row) ⇒ Object



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/loaders/excel_loader.rb', line 161

def process_excel_row(row)

  # First assign any default values for columns
  process_defaults

  # TODO - Smart sorting of column processing order ....
  # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
  # before associations can be processed so user should ensure mandatory columns are prior to associations

  # as part of this we also attempt to save early, for example before assigning to
  # has_and_belongs_to associations which require the load_object has an id for the join table

  # Iterate over method_details, working on data out of associated Excel column
  @method_mapper.method_details.each_with_index do |method_detail, i|

    unless method_detail
      logger.warn("No method_detail found for column (#{i})")
      next # TODO populate unmapped with a real MethodDetail that is 'null' and create is_nil
    end

    logger.info "Processing Column #{method_detail.column_index} (#{method_detail.operator})"

    value = row[method_detail.column_index]

    @contains_data = true unless(value.nil? || value.to_s.empty?)

    process(method_detail, value)
  end

end

#start_excel(file_name, options = {}) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/loaders/excel_loader.rb', line 32

def start_excel( file_name, options = {} )

  @excel = Excel.new

  excel.open(file_name)

  puts "\n\n\nLoading from Excel file: #{file_name}"
  logger.info("\nStarting Load from Excel file: #{file_name}")

  sheet_number = options[:sheet_number] || 0

  @sheet = excel.worksheet( sheet_number )

  parse_headers(@sheet, options[:header_row])

  raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(excel_headers.empty?)

  # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
  # For example if model has an attribute 'price' will map columns called Price or price or PRICE etc to this attribute
  populate_method_mapper_from_headers(excel_headers, options )

  reporter.reset
end

#value_at(row, column) ⇒ Object



157
158
159
# File 'lib/loaders/excel_loader.rb', line 157

def value_at(row, column)
  @excel[row, column]
end