Class: Bulkrax::Importer

Inherits:
ApplicationRecord show all
Includes:
ImporterExporterBehavior, StatusInfo
Defined in:
app/models/bulkrax/importer.rb

Overview

rubocop:disable Metrics/ClassLength

Constant Summary collapse

DEFAULT_OBJECT_TYPES =
%w[collection work file_set relationship].freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from StatusInfo

#current_status, #failed?, #last_error, #set_status_info, #skipped?, #status_at, #succeeded?

Methods included from ImporterExporterBehavior

#file?, #increment_counters, #key_without_numbers, #keys_without_numbers, #last_imported_at, #next_import_at, #parser, #parser_class, #zip?

Instance Attribute Details

#current_run(skip_counts: false) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'app/models/bulkrax/importer.rb', line 107

def current_run(skip_counts: false)
  return @current_run if @current_run.present?

  @current_run = self.importer_runs.create!
  return @current_run if file? && zip?
  return @current_run if skip_counts

  entry_counts = {
    total_work_entries: self.limit || parser.works_total,
    total_collection_entries: parser.collections_total,
    total_file_set_entries: parser.file_sets_total
  }
  @current_run.update!(entry_counts)

  @current_run
end

#fileObject

Returns the value of attribute file.



24
25
26
# File 'app/models/bulkrax/importer.rb', line 24

def file
  @file
end

#file_styleObject

Returns the value of attribute file_style.



24
25
26
# File 'app/models/bulkrax/importer.rb', line 24

def file_style
  @file_style
end

#only_updatesObject

Returns the value of attribute only_updates.



24
25
26
# File 'app/models/bulkrax/importer.rb', line 24

def only_updates
  @only_updates
end

Class Method Details

.frequency_enumsObject



87
88
89
90
91
92
# File 'app/models/bulkrax/importer.rb', line 87

def self.frequency_enums
  # these duration values use ISO 8601 Durations (https://en.wikipedia.org/wiki/ISO_8601#Durations)
  # TLDR; all durations are prefixed with 'P' and the parts are a number with the type of duration.
  # i.e. P1Y2M3W4DT5H6M7S == 1 Year, 2 Months, 3 Weeks, 4 Days, 5 Hours, 6 Minutes, 7 Seconds
  [['Daily', 'P1D'], ['Monthly', 'P1M'], ['Yearly', 'P1Y'], ['Once (on save)', 'PT0S']]
end

.safe_uri_filename(uri) ⇒ Object



27
28
29
30
31
32
33
34
# File 'app/models/bulkrax/importer.rb', line 27

def self.safe_uri_filename(uri)
  r = Faraday.head(uri.to_s)
  return CGI.parse(r.headers['content-disposition'])["filename"][0].delete("\"")
rescue
  filename = File.basename(uri.to_s)
  filename.delete!('/')
  filename.presence || SecureRandom.uuid
end

Instance Method Details

#completed_statusesObject



145
146
147
148
149
# File 'app/models/bulkrax/importer.rb', line 145

def completed_statuses
  @completed_statuses ||= Bulkrax::Status.latest_by_statusable
                                         .includes(:statusable)
                                         .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Complete')
end

#default_field_mappingObject



73
74
75
76
77
78
79
80
81
# File 'app/models/bulkrax/importer.rb', line 73

def default_field_mapping
  return self.field_mapping if parser.import_fields.nil?

  ActiveSupport::HashWithIndifferentAccess.new(
    parser.import_fields.reject(&:nil?).map do |m|
      Bulkrax.default_field_mapping.call(m)
    end.inject(:merge)
  )
end

#errored_entries_csv_pathObject



244
245
246
# File 'app/models/bulkrax/importer.rb', line 244

def errored_entries_csv_path
  @errored_entries_csv_path ||= File.join(parser.base_path, "import_#{path_string}_errored_entries.csv")
end

#existing_entries?Boolean

Returns:

  • (Boolean)


183
184
185
# File 'app/models/bulkrax/importer.rb', line 183

def existing_entries?
  parser.parser_fields['file_style']&.match(/Existing Entries/)
end

#failed_entries?Boolean

Returns:

  • (Boolean)


128
129
130
# File 'app/models/bulkrax/importer.rb', line 128

def failed_entries?
  entries.failed.any?
end

#failed_messagesObject



138
139
140
141
142
143
# File 'app/models/bulkrax/importer.rb', line 138

def failed_messages
  failed_statuses.each_with_object({}) do |e, i|
    i[e.error_message] ||= []
    i[e.error_message] << e.id
  end
end

#failed_statusesObject



132
133
134
135
136
# File 'app/models/bulkrax/importer.rb', line 132

def failed_statuses
  @failed_statuses ||= Bulkrax::Status.latest_by_statusable
                                      .includes(:statusable)
                                      .where('bulkrax_statuses.statusable_id IN (?) AND bulkrax_statuses.statusable_type = ? AND status_message = ?', self.entries.pluck(:id), 'Bulkrax::Entry', 'Failed')
end

#frequencyObject



98
99
100
101
# File 'app/models/bulkrax/importer.rb', line 98

def frequency
  f = self[:frequency] || "PT0S"
  ISO8601::Duration.new(f)
end

#frequency=(frequency) ⇒ Object



94
95
96
# File 'app/models/bulkrax/importer.rb', line 94

def frequency=(frequency)
  self[:frequency] = ISO8601::Duration.new(frequency).to_s
end

#import_collectionsObject



191
192
193
# File 'app/models/bulkrax/importer.rb', line 191

def import_collections
  import_objects(['collection'])
end

#import_file_pathObject



155
156
157
# File 'app/models/bulkrax/importer.rb', line 155

def import_file_path
  self.parser_fields['import_file_path']
end

#import_file_setsObject



195
196
197
# File 'app/models/bulkrax/importer.rb', line 195

def import_file_sets
  import_objects(['file_set'])
end

#import_metadata_formatObject

The format for metadata for the incoming import; corresponds to an Entry class



230
231
232
# File 'app/models/bulkrax/importer.rb', line 230

def 
  [['CSV', 'Bulkrax::CsvEntry'], ['RDF (N-Triples)', 'Bulkrax::RdfEntry']]
end

#import_objects(types_array = nil) ⇒ Object



205
206
207
208
209
210
211
212
213
# File 'app/models/bulkrax/importer.rb', line 205

def import_objects(types_array = nil)
  self.only_updates ||= false
  self.save if self.new_record? # Object needs to be saved for statuses
  types = types_array || DEFAULT_OBJECT_TYPES
  existing_entries? ? parser.rebuild_entries(types) : parser.create_objects(types)
  mark_unseen_as_skipped
rescue StandardError => e
  set_status_info(e)
end

#import_relationshipsObject



199
200
201
# File 'app/models/bulkrax/importer.rb', line 199

def import_relationships
  import_objects(['relationship'])
end

#import_worksObject



187
188
189
# File 'app/models/bulkrax/importer.rb', line 187

def import_works
  import_objects(['work'])
end

#importer_unzip_pathObject

If the import data is zipped, unzip it to this path



240
241
242
# File 'app/models/bulkrax/importer.rb', line 240

def importer_unzip_path
  @importer_unzip_path ||= File.join(parser.base_path, "import_#{path_string}")
end

#last_runObject



124
125
126
# File 'app/models/bulkrax/importer.rb', line 124

def last_run
  @last_run ||= self.importer_runs.last
end

#mappingObject

If field_mapping is empty, setup a default based on the export_properties



60
61
62
63
64
65
66
67
68
69
70
71
# File 'app/models/bulkrax/importer.rb', line 60

def mapping
  # rubocop:disable Style/IfUnlessModifier
  @mapping ||= if self.field_mapping.blank? || self.field_mapping == [{}]
                 if parser.import_fields.present? || self.field_mapping == [{}]
                   default_field_mapping
                 end
               else
                 default_field_mapping.merge(self.field_mapping)
               end

  # rubocop:enable Style/IfUnlessModifier
end

#mark_unseen_as_skippedObject

After an import any entries we did not touch are skipped. They are not really pending, complete for the last run, or failed



217
218
219
220
221
# File 'app/models/bulkrax/importer.rb', line 217

def mark_unseen_as_skipped
  entries.where.not(identifier: seen.keys).find_each do |entry|
    entry.set_status_info('Skipped')
  end
end

#metadata_only?Boolean

Returns:

  • (Boolean)


179
180
181
# File 'app/models/bulkrax/importer.rb', line 179

def 
  parser.parser_fields['metadata_only'] == true
end

#original_fileObject



163
164
165
# File 'app/models/bulkrax/importer.rb', line 163

def original_file
  import_file_path if original_file?
end

#original_file?Boolean

Returns:

  • (Boolean)


159
160
161
# File 'app/models/bulkrax/importer.rb', line 159

def original_file?
  import_file_path && File.exist?(import_file_path)
end

#parser_fieldsObject



83
84
85
# File 'app/models/bulkrax/importer.rb', line 83

def parser_fields
  self[:parser_fields] || {}
end

#path_stringObject



248
249
250
251
252
# File 'app/models/bulkrax/importer.rb', line 248

def path_string
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}_#{self.importer_runs.last.id}"
rescue
  "#{self.id}_#{self.created_at.strftime('%Y%m%d%H%M%S')}"
end

#record_statusObject



44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'app/models/bulkrax/importer.rb', line 44

def record_status
  importer_run = ImporterRun.find(current_run.id) # make sure fresh
  return if importer_run.enqueued_records.positive? # still processing
  if importer_run.failed_records.positive?
    if importer_run.invalid_records.present?
      e = Bulkrax::ImportFailed.new('Failed with Invalid Records', importer_run.invalid_records.split("\n"))
      importer_run.importer.set_status_info(e)
    else
      importer_run.importer.set_status_info('Complete (with failures)')
    end
  else
    importer_run.importer.set_status_info('Complete')
  end
end

#remove_and_rerunObject



175
176
177
# File 'app/models/bulkrax/importer.rb', line 175

def remove_and_rerun
  self.parser_fields['remove_and_rerun']
end

#replace_filesObject



167
168
169
# File 'app/models/bulkrax/importer.rb', line 167

def replace_files
  self.parser_fields['replace_files']
end

#schedulable?Boolean

Returns:

  • (Boolean)


103
104
105
# File 'app/models/bulkrax/importer.rb', line 103

def schedulable?
  frequency.to_seconds != 0
end

#seenObject



151
152
153
# File 'app/models/bulkrax/importer.rb', line 151

def seen
  @seen ||= {}
end

#statusObject



36
37
38
39
40
41
42
# File 'app/models/bulkrax/importer.rb', line 36

def status
  if self.validate_only
    'Validated'
  else
    super
  end
end

#unique_collection_identifier(id) ⇒ Object

TODO:
  • move to parser, as this is OAI specific

Prepend the base_url to ensure unique set identifiers



225
226
227
# File 'app/models/bulkrax/importer.rb', line 225

def unique_collection_identifier(id)
  "#{self.parser_fields['base_url'].split('/')[2]}_#{id}"
end

#update_filesObject



171
172
173
# File 'app/models/bulkrax/importer.rb', line 171

def update_files
  self.parser_fields['update_files']
end