Class: DataShift::LoaderBase

Inherits:
Object
  • Object
show all
Includes:
Logging, Querying
Defined in:
lib/loaders/loader_base.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Querying

#get_record_by, #get_record_by!, #search_for_record

Methods included from Logging

#logdir, #logger

Constructor Details

#initialize(object_class, object = nil, options = {}) ⇒ LoaderBase

Setup loading

Options to drive building the method dictionary for a class, enabling headers to be mapped to operators on that class.

Options

:reload           : Force load of the method dictionary for object_class even if already loaded
:instance_methods : Include setter/delegate style instance methods for assignment, as well as AR columns
:verbose          : Verbose logging and to STDOUT


50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/loaders/loader_base.rb', line 50

def initialize(object_class, object = nil, options = {})
  @load_object_class = object_class

  logger.info("Loading objects of type #{@load_object_class} (#{object}")

  @populator = if(options[:populator].is_a?(String))
                 ::Object.const_get(options[:populator]).new
               elsif(options[:populator].is_a?(Class))
                 options[:populator].new
               else
                 DataShift::Populator.new
               end

  # Gather names of all possible 'setter' methods on AR class (instance variables and associations)
  if( !MethodDictionary::for?(object_class) || options[:reload] )
    #puts "DEBUG Building Method Dictionary for class #{object_class}"

    meth_dict_opts = options.extract!(:reload, :instance_methods)
    DataShift::MethodDictionary.find_operators( @load_object_class, meth_dict_opts)

    # Create dictionary of data on all possible 'setter' methods which can be used to
    # populate or integrate an object of type @load_object_class
    DataShift::MethodDictionary.build_method_details(@load_object_class)
  end

  @method_mapper = DataShift::MethodMapper.new
  @config = options.dup    # clone can cause issues like 'can't modify frozen hash'

  @verbose = @config[:verbose]

  @current_row_idx = 0

  @headers = []

  @reporter = DataShift::Reporter.new

  reset(object)
end

Instance Attribute Details

#configObject

Returns the value of attribute config.



35
36
37
# File 'lib/loaders/loader_base.rb', line 35

def config
  @config
end

#current_row_idxObject

The inbound row/line number



28
29
30
# File 'lib/loaders/loader_base.rb', line 28

def current_row_idx
  @current_row_idx
end

#headersObject (readonly)

Returns the value of attribute headers.



23
24
25
# File 'lib/loaders/loader_base.rb', line 23

def headers
  @headers
end

#load_objectObject

Returns the value of attribute load_object.



30
31
32
# File 'lib/loaders/loader_base.rb', line 30

def load_object
  @load_object
end

#load_object_classObject

Returns the value of attribute load_object_class.



30
31
32
# File 'lib/loaders/loader_base.rb', line 30

def load_object_class
  @load_object_class
end

#method_mapperObject

Returns the value of attribute method_mapper.



25
26
27
# File 'lib/loaders/loader_base.rb', line 25

def method_mapper
  @method_mapper
end

#populatorObject

Returns the value of attribute populator.



33
34
35
# File 'lib/loaders/loader_base.rb', line 33

def populator
  @populator
end

#reporterObject

Returns the value of attribute reporter.



32
33
34
# File 'lib/loaders/loader_base.rb', line 32

def reporter
  @reporter
end

#verboseObject

Returns the value of attribute verbose.



35
36
37
# File 'lib/loaders/loader_base.rb', line 35

def verbose
  @verbose
end

Instance Method Details

#abort_on_failure?Boolean

Returns:

  • (Boolean)


432
433
434
# File 'lib/loaders/loader_base.rb', line 432

def abort_on_failure?
  @config[:abort_on_failure].to_s == 'true'
end

#configure_from(yaml_file) ⇒ Object

Any Config under key ‘LoaderBase’ is merged over existing options - taking precedence.

Any Config under a key equal to the full name of the Loader class (e.g DataShift::SpreeEcom::ImageLoader) is merged over existing options - taking precedence.

Format :

  LoaderClass:
   option: value


233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# File 'lib/loaders/loader_base.rb', line 233

def configure_from(yaml_file)

  logger.info("Reading Datashift loader config from: #{yaml_file.inspect}")

  data = YAML::load( ERB.new( IO.read(yaml_file) ).result )

  logger.info("Read Datashift config: #{data.inspect}")

  if(data['LoaderBase'])
    @config.merge!(data['LoaderBase'])
  end

  if(data[self.class.name])
    @config.merge!(data[self.class.name])
  end

  @populator.configure_from(load_object_class, yaml_file)
  logger.info("Loader Options : #{@config.inspect}")
end

#failed_countObject



440
441
442
# File 'lib/loaders/loader_base.rb', line 440

def failed_count
  reporter.failed_objects.size
end

#failure(object = @load_object, rollback = false) ⇒ Object

Loading failed. Store a failed object and if requested roll back (destroy) the current load object For use case where object saved early but subsequent required columns fail to process so the load object is invalid



381
382
383
384
385
386
387
388
389
390
391
# File 'lib/loaders/loader_base.rb', line 381

def failure( object = @load_object, rollback = false)
  if(object)
    @reporter.add_failed_object(object)

    if(rollback && object.respond_to?('destroy') && !object.new_record?)
      klass = object.class
      object.destroy
      object = klass.new
    end
  end
end

#find_and_process(column_name, data) ⇒ Object

Core API - Given a single free text column name from a file, search method mapper for associated operator on base object class.

If suitable association found, process row data and then assign to current load_object



208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/loaders/loader_base.rb', line 208

def find_and_process(column_name, data)

  puts "WARNING: MethodDictionary empty for class #{load_object_class}" unless(MethodDictionary.for?(load_object_class))

  method_detail = MethodDictionary.find_method_detail( load_object_class, column_name )

  if(method_detail)
    process(method_detail, data)
  else
    puts "No matching method found for column #{column_name}"
    @load_object.errors.add(:base, "No matching method found for column #{column_name}")
  end
end

#find_or_new(klass, condition_hash = {}) ⇒ Object



456
457
458
459
460
461
462
463
# File 'lib/loaders/loader_base.rb', line 456

def find_or_new( klass, condition_hash = {} )
  @records[klass] = klass.find(:all, :conditions => condition_hash)
  if @records[klass].any?
    return @records[klass].first
  else
    return klass.new
  end
end

#get_operator_and_data(inbound_data) ⇒ Object

Return the find_by (where) operator, if specified, otherwise use the heading operator. i.e where operator embedded in row ,takes precedence over operator in column heading

Treat rest of the node as the value to use in the where clause e.g

price:0.99

Column headings will be used, if the row only contains data e.g

0.99

We leave it to caller to manage any other aspects or problems in ‘rest’



265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
# File 'lib/loaders/loader_base.rb', line 265

def get_operator_and_data(inbound_data)

  where_operator, data = inbound_data.split(Delimiters::name_value_delim)

  md = @populator.current_method_detail

  # Find by operator embedded in row takes precedence over operator in column heading
  if((data.nil? || data.empty?) && md.find_by_operator)
    if((where_operator.nil? || where_operator.empty?))  #colum completely empty - check for defaults
      if(md.find_by_value)
        data = md.find_by_value
      else
        data = Populator::header_default_data(md.operator)
      end
    else
      data = where_operator
    end

    # row contains single entry only so take operator from header via method details
    where_operator = md.find_by_operator
  end

  logger.debug("LoaderBase - get_operator_and_data - [#{where_operator}] - [#{data}]")

  return where_operator, data
end

#headers_contain_mandatory?(mandatory_list) ⇒ Boolean

Check whether headers contains supplied list

Returns:

  • (Boolean)


446
447
448
# File 'lib/loaders/loader_base.rb', line 446

def headers_contain_mandatory?( mandatory_list )
  [ [*mandatory_list] - @headers].flatten.empty?
end

#loaded_countObject



436
437
438
# File 'lib/loaders/loader_base.rb', line 436

def loaded_count
  reporter.loaded_objects.size
end

#missing_mandatory_headers(mandatory_list) ⇒ Object

Check whether headers contains supplied list



452
453
454
# File 'lib/loaders/loader_base.rb', line 452

def missing_mandatory_headers( mandatory_list )
  [ [*mandatory_list] - @headers].flatten
end

#new_load_objectObject



427
428
429
430
# File 'lib/loaders/loader_base.rb', line 427

def new_load_object
  @load_object = @load_object_class.new
  @load_object
end

#optionsObject



38
# File 'lib/loaders/loader_base.rb', line 38

def options() return @config; end

#perform_load(file_name, options = {}) ⇒ Object

Based on filename call appropriate loading function Currently supports :

Excel/Open Office files saved as .xls
CSV files

OPTIONS :

[:dummy]         : Perform a dummy run - attempt to load everything but then roll back

strict           : Raise an exception of any headers can't be mapped to an attribute/association
ignore           : List of column headers to ignore when building operator map
mandatory        : List of columns that must be present in headers

force_inclusion  : List of columns that do not map to any operator but should be includeed in processing.
                   This provides the opportunity for loaders to provide specific methods to handle these fields
                   when no direct operator is available on the model or it's associations

Raises:



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/loaders/loader_base.rb', line 107

def perform_load( file_name, options = {} )

  raise DataShift::BadFile, "Cannot load #{file_name} file not found." unless(File.exists?(file_name))

  logger.info("Perform Load Options:\n#{options.inspect}")

  ext = File.extname(file_name)

  # TODO - make more modular - these methods doing too much, for example move the object creation/reset
  # out of these perform... methods to make it easier to over ride that behaviour
  if(ext.casecmp('.xls') == 0)
    perform_excel_load(file_name, options)
  elsif(ext.casecmp('.csv') == 0)
    perform_csv_load(file_name, options)
  else
    raise DataShift::UnsupportedFileType, "#{ext} files not supported - Try .csv or OpenOffice/Excel .xls"
  end
end

#populate_method_mapper_from_headers(headers, options = {}) ⇒ Object

Core API

Given a list of free text column names from a file, map all headers to a MethodDetail instance containing details on operator, look ups etc.

These are available through @method_mapper.method_details

Options:

[:strict]          : Raise an exception of any headers can't be mapped to an attribute/association
[:ignore]          : List of column headers to ignore when building operator map
[:mandatory]       : List of columns that must be present in headers

[:force_inclusion] : List of columns that do not map to any operator but should be includeed in processing.

   This provides the opportunity for :

   1) loaders to provide specific methods to handle these fields, when no direct operator
    is available on the model or it's associations

   2) Handle delegated methods i.e no direct association but method is on a model throuygh it's delegate

[:include_all]     : Include all headers in processing - takes precedence of :force_inclusion


153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/loaders/loader_base.rb', line 153

def populate_method_mapper_from_headers( headers, options = {} )
  @headers = headers

  mandatory = options[:mandatory] || []

  strict = (options[:strict] == true)

  begin
    @method_mapper.map_inbound_headers_to_methods( load_object_class, @headers, options )
  rescue => e
    puts e.inspect, e.backtrace
    logger.error("Failed to map header row to set of database operators : #{e.inspect}")
    raise MappingDefinitionError, "Failed to map header row to set of database operators"
  end

  unless(@method_mapper.missing_methods.empty?)
    logger.warn("Following headings couldn't be mapped to #{load_object_class} \n#{@method_mapper.missing_methods.inspect}")
    raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict)
  end

  unless(mandatory.empty? || @method_mapper.contains_mandatory?(mandatory) )
    @method_mapper.missing_mandatory(mandatory).each { |er| puts "ERROR: Mandatory column missing - expected column '#{er}'" }
    raise MissingMandatoryError, "Mandatory columns missing  - please fix and retry."
  end

  @method_mapper
end

#process(method_detail, value) ⇒ Object

Process a value string from a column. Assigning value(s) to correct association on @load_object. Method detail represents a column from a file and it’s correlated AR associations. Value string which may contain multiple values for a collection association.



297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
# File 'lib/loaders/loader_base.rb', line 297

def process(method_detail, value)

  current_method_detail = method_detail

  current_value, current_attribute_hash = @populator.prepare_data(method_detail, value)

  # TODO - Move ALL of this into Populator properly
  if(current_method_detail.operator_for(:has_many))

    if(current_method_detail.operator_class && current_value)

      # there are times when we need to save early, for example before assigning to
      # has_and_belongs_to associations which require the load_object has an id for the join table

      save_if_new

      # A single column can contain multiple associations delimited by special char
      # Size:large|Colour:red,green,blue => ['Size:large', 'Colour:red,green,blue']
      columns = current_value.to_s.split( Delimiters::multi_assoc_delim )

      # Size:large|Colour:red,green,blue  =>
      #   find_by_size( 'large' )
      #   find_all_by_colour( ['red','green','blue'] )

      columns.each do |col_str|

        find_operator, col_values = get_operator_and_data( col_str )

        raise "Cannot perform DB find by #{find_operator}. Expected format key:value" unless(find_operator && col_values)

        find_by_values = col_values.split(Delimiters::multi_value_delim)

        find_by_values << current_method_detail.find_by_value if(current_method_detail.find_by_value)

        found_values = []

        #if(find_by_values.size() == 1)
        # logger.info("Find or create #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")
        #  item = current_method_detail.operator_class.where(find_operator => find_by_values.first).first_or_create
        #else
        #  logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = values #{find_by_values.inspect}")
        #  current_method_detail.operator_class.where(find_operator => find_by_values).all
        #end

        operator_class = current_method_detail.operator_class

        logger.info("Find #{current_method_detail.operator_class} with #{find_operator} = #{find_by_values.inspect}")

        find_by_values.each do |v|
          begin
            found_values << operator_class.where(find_operator => v).first_or_create
          rescue => e
            logger.error(e.inspect)
            # TODO some way to define if this is a fatal error or not ?
          end
        end

        logger.info("Scan result #{found_values.inspect}")

        unless(find_by_values.size == found_values.size)
          found = found_values.collect {|f| f.send(find_operator) }
          @load_object.errors.add( current_method_detail.operator, "Association with key(s) #{(find_by_values - found).inspect} NOT found")
          logger.error "Association [#{current_method_detail.operator}] with key(s) #{(find_by_values - found).inspect} NOT found - Not added."
          next if(found_values.empty?)
        end

        logger.info("Assigning #{found_values.inspect} (#{found_values.class})")

        # Lookup Assoc's Model done, now add the found value(s) to load model's collection
        @populator.prepare_and_assign(current_method_detail, @load_object, found_values)
      end # END HAS_MANY
    end
  else
    # Nice n simple straight assignment to a column variable
    #puts "INFO: LOADER BASE processing #{method_detail.name}"
    @populator.assign(load_object)
  end
end

#process_defaultsObject

TODO - Move code into Populator Process columns with a default value specified



184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
# File 'lib/loaders/loader_base.rb', line 184

def process_defaults()

  @populator.default_values.each do |dname, dv|

    method_detail = MethodDictionary.find_method_detail( load_object_class, dname )

    if(method_detail)
      logger.debug "Applying default value [#{dname}] on (#{method_detail.operator})"
      @populator.prepare_and_assign(method_detail, load_object, dv)
    else
      logger.warn "No operator found for default [#{dname}] trying basic assignment"
      begin
        @populator.insistent_assignment(load_object, dv, dname)
      rescue
        logger.error "Badly specified default - could not set #{dname}(#{dv})"
      end
    end
  end
end

#reportObject



126
127
128
# File 'lib/loaders/loader_base.rb', line 126

def report
  @reporter.report
end

#reset(object = nil) ⇒ Object

Reset the loader, including database object to be populated, and load counts



421
422
423
424
# File 'lib/loaders/loader_base.rb', line 421

def reset(object = nil)
  @load_object = object || new_load_object
  @reporter.reset
end

#saveObject



405
406
407
408
409
410
411
412
413
414
415
416
417
# File 'lib/loaders/loader_base.rb', line 405

def save
  return unless( @load_object )

  puts "DEBUG: SAVING #{@load_object.class} : #{@load_object.inspect}" if(verbose)
  begin
    return @load_object.save
  rescue => e
    logger.error( "Save Error : #{e.inspect} on #{@load_object.class}")
    logger.error(e.backtrace)
  end

  false
end

#save_and_reportObject



393
394
395
396
397
398
399
400
401
402
403
# File 'lib/loaders/loader_base.rb', line 393

def save_and_report
  unless(save)
    failure
    logger.error "Failed to save row (#{current_row_idx}) - [#{@current_row}]"
    logger.error load_object.errors.inspect if(load_object)
  else
    logger.info("Successfully SAVED Object with ID #{load_object.id} for Row #{@current_row}")
    @reporter.add_loaded_object(@load_object)
    @reporter.success_inbound_count += 1
  end
end