Class: Bulkrax::Importer

Inherits:
ApplicationRecord show all
Includes:
ImporterExporterBehavior, StatusInfo
Defined in:
app/models/bulkrax/importer.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from StatusInfo

#current_status, #failed?, #last_error, #status_at, #status_info, #succeeded?

Methods included from ImporterExporterBehavior

#increment_counters, #last_imported_at, #next_import_at, #parser, #parser_class

Instance Attribute Details

#current_runObject



89
90
91
# File 'app/models/bulkrax/importer.rb', line 89

def current_run
  @current_run ||= self.importer_runs.create!(total_work_entries: self.limit || parser.total, total_collection_entries: parser.collections_total)
end

#fileObject

Returns the value of attribute file.



24
25
26
# File 'app/models/bulkrax/importer.rb', line 24

def file
  @file
end

#file_styleObject

Returns the value of attribute file_style.



24
25
26
# File 'app/models/bulkrax/importer.rb', line 24

def file_style
  @file_style
end

#only_updatesObject

Returns the value of attribute only_updates.



24
25
26
# File 'app/models/bulkrax/importer.rb', line 24

def only_updates
  @only_updates
end

Class Method Details

.frequency_enumsObject



69
70
71
72
73
74
# File 'app/models/bulkrax/importer.rb', line 69

def self.frequency_enums
  # these duration values use ISO 8601 Durations (https://en.wikipedia.org/wiki/ISO_8601#Durations)
  # TLDR; all durations are prefixed with 'P' and the parts are a number with the type of duration.
  # i.e. P1Y2M3W4DT5H6M7S == 1 Year, 2 Months, 3 Weeks, 4 Days, 5 Hours, 6 Minutes, 7 Seconds
  [['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']]
end

Instance Method Details

#errored_entries_csv_pathObject



145
146
147
# File 'app/models/bulkrax/importer.rb', line 145

def errored_entries_csv_path
  @errored_entries_csv_path ||= File.join(Bulkrax.import_path, "import_#{path_string}_errored_entries.csv")
end

#frequencyObject



80
81
82
83
# File 'app/models/bulkrax/importer.rb', line 80

def frequency
  f = self[:frequency] || "PT0S"
  ISO8601::Duration.new(f)
end

#frequency=(frequency) ⇒ Object



76
77
78
# File 'app/models/bulkrax/importer.rb', line 76

def frequency=(frequency)
  self[:frequency] = ISO8601::Duration.new(frequency).to_s
end

#import_collectionsObject



117
118
119
120
121
122
# File 'app/models/bulkrax/importer.rb', line 117

def import_collections
  self.save if self.new_record? # Object needs to be saved for statuses
  parser.create_collections
rescue StandardError => e
  status_info(e)
end

#import_metadata_formatObject

The format for metadata for the incoming import; corresponds to an Entry class



131
132
133
# File 'app/models/bulkrax/importer.rb', line 131

def 
  [['CSV', 'Bulkrax::CsvEntry'], ['RDF (N-Triples)', 'Bulkrax::RdfEntry']]
end

#import_worksObject



109
110
111
112
113
114
115
# File 'app/models/bulkrax/importer.rb', line 109

def import_works
  self.save if self.new_record? # Object needs to be saved for statuses
  self.only_updates ||= false
  parser.create_works
rescue StandardError => e
  status_info(e)
end

#importer_unzip_pathObject

If the import data is zipped, unzip it to this path



141
142
143
# File 'app/models/bulkrax/importer.rb', line 141

def importer_unzip_path
  @importer_unzip_path ||= File.join(Bulkrax.import_path, "import_#{path_string}")
end

#last_runObject



93
94
95
# File 'app/models/bulkrax/importer.rb', line 93

def last_run
  @last_run ||= self.importer_runs.last
end

#mappingObject

If field_mapping is empty, setup a default based on the export_properties



51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'app/models/bulkrax/importer.rb', line 51

def mapping
  @mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
                 if parser.import_fields.present? || self.field_mapping == [{}]
                   ActiveSupport::HashWithIndifferentAccess.new(
                     parser.import_fields.reject(&:nil?).map do |m|
                       Bulkrax.default_field_mapping.call(m)
                     end.inject(:merge)
                   )
                 end
               else
                 self.field_mapping
               end
end

#parser_fieldsObject



65
66
67
# File 'app/models/bulkrax/importer.rb', line 65

def parser_fields
  self[:parser_fields] || {}
end

#path_stringObject



149
150
151
152
153
# File 'app/models/bulkrax/importer.rb', line 149

def path_string
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}_#{self.importer_runs.last.id}"
rescue
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
end

#record_statusObject



35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'app/models/bulkrax/importer.rb', line 35

def record_status
  importer_run = ImporterRun.find(current_run.id) # make sure fresh
  return if importer_run.enqueued_records.positive? # still processing
  if importer_run.failed_records.positive?
    if importer_run.invalid_records.present?
      e = Bulkrax::ImportFailed.new('Failed with Invalid Records', importer_run.invalid_records.split("\n"))
      importer_run.importer.status_info(e)
    else
      importer_run.importer.status_info('Complete (with failures)')
    end
  else
    importer_run.importer.status_info('Complete')
  end
end

#replace_filesObject



101
102
103
# File 'app/models/bulkrax/importer.rb', line 101

def replace_files
  self.parser_fields['replace_files']
end

#schedulable?Boolean

Returns:

  • (Boolean)


85
86
87
# File 'app/models/bulkrax/importer.rb', line 85

def schedulable?
  frequency.to_seconds != 0
end

#seenObject



97
98
99
# File 'app/models/bulkrax/importer.rb', line 97

def seen
  @seen ||= {}
end

#statusObject



27
28
29
30
31
32
33
# File 'app/models/bulkrax/importer.rb', line 27

def status
  if self.validate_only
    'Validated'
  else
    super
  end
end

#unique_collection_identifier(id) ⇒ Object

TODO:
  • move to parser, as this is OAI specific

Prepend the base_url to ensure unique set identifiers



126
127
128
# File 'app/models/bulkrax/importer.rb', line 126

def unique_collection_identifier(id)
  "#{self.parser_fields['base_url'].split('/')[2]}_#{id}"
end

#update_filesObject



105
106
107
# File 'app/models/bulkrax/importer.rb', line 105

def update_files
  self.parser_fields['update_files']
end