Class: Bulkrax::CsvEntry

Inherits:
Entry show all
Defined in:
app/models/bulkrax/csv_entry.rb

Overview

TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense. We do too much in these entry classes. We need to extract the common logic from the various entry models into a module that can be shared between them.

Direct Known Subclasses

CsvCollectionEntry, CsvFileSetEntry

Defined Under Namespace

Modules: AttributeBuilderMethod Classes: CsvWrapper

Instance Attribute Summary

Attributes inherited from Entry

#all_attrs

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Entry

#build, #exporter?, #fetch_field_mapping, #find_collection, #importer?, #last_run, parent_field, #source_identifier, #valid_system_id, #work_identifier

Methods included from HasLocalProcessing

#add_local

Methods included from StatusInfo

#current_status, #failed?, #last_error, #set_status_info, #status, #status_at, #succeeded?

Methods included from ExportBehavior

#build_for_exporter, #filename, #hyrax_record

Methods included from ImportBehavior

#active_id_for_authority?, #add_admin_set_id, #add_collections, #add_rights_statement, #add_user_to_permission_templates!, #add_visibility, #build_for_importer, #child_jobs, #factory, #factory_class, #override_rights_statement, #parent_jobs, #rights_statement, #sanitize_controlled_uri_value, #sanitize_controlled_uri_values!, #validate_value

Methods included from HasMatchers

#add_metadata, #excluded?, #field_supported?, #field_to, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #set_parsed_data, #set_parsed_object_data, #single_metadata, #supported_bulkrax_fields

Class Method Details

.data_for_entry(data, _source_id, parser) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
# File 'app/models/bulkrax/csv_entry.rb', line 59

def self.data_for_entry(data, _source_id, parser)
  # If a multi-line CSV data is passed, grab the first row
  data = data.first if data.is_a?(CSV::Table)
  # model has to be separated so that it doesn't get mistranslated by to_h
  raw_data = data.to_h
  raw_data[:model] = data[:model] if data[:model].present?
  # If the collection field mapping is not 'collection', add 'collection' - the parser needs it
  # TODO: change to :parents
  raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
  return raw_data
end

.fields_from_data(data) ⇒ Object



12
13
14
# File 'app/models/bulkrax/csv_entry.rb', line 12

def self.fields_from_data(data)
  data.headers.flatten.compact.uniq
end

.matcher_classObject



338
339
340
# File 'app/models/bulkrax/csv_entry.rb', line 338

def self.matcher_class
  Bulkrax::CsvMatcher
end

.read_data(path) ⇒ Object

there’s a risk that this reads the whole file into memory and could cause a memory leak

Raises:

  • (StandardError)


19
20
21
22
23
24
25
26
27
28
29
# File 'app/models/bulkrax/csv_entry.rb', line 19

def self.read_data(path)
  raise StandardError, 'CSV path empty' if path.blank?
  options = {
    headers: true,
    header_converters: ->(h) { h.to_s.strip.to_sym },
    encoding: 'utf-8'
  }.merge(csv_read_data_options)

  results = CSV.read(path, **options)
  csv_wrapper_class.new(results)
end

Instance Method Details

#add_fileObject



128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'app/models/bulkrax/csv_entry.rb', line 128

def add_file
  self.['file'] ||= []
  if record['file']&.is_a?(String)
    self.['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
  elsif record['file'].is_a?(Array)
    self.['file'] = record['file']
  end
  self.['file'] = self.['file'].map do |f|
    next if f.blank?

    path_to_file(f.tr(' ', '_'))
  end.compact
end

#add_identifierObject



95
96
97
# File 'app/models/bulkrax/csv_entry.rb', line 95

def add_identifier
  self.[work_identifier] = [record[source_identifier]]
end

#add_ingested_metadataObject



119
120
121
122
123
124
125
126
# File 'app/models/bulkrax/csv_entry.rb', line 119

def 
  # we do not want to sort the values in the record before adding the metadata.
  # if we do, the factory_class will be set to the default_work_type for all values that come before "model" or "work type"
  record.each do |key, value|
    index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
    (key_without_numbers(key), value, index)
  end
end

#add_metadata_for_modelObject



105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'app/models/bulkrax/csv_entry.rb', line 105

def 
  if defined?(::Collection) && factory_class == ::Collection
    add_collection_type_gid if defined?(::Hyrax)
    # add any additional collection metadata methods here
  elsif factory_class == Bulkrax.file_model_class
    validate_presence_of_filename!
    add_path_to_file
    validate_presence_of_parent!
  else
    add_file unless importerexporter.
    add_admin_set_id
  end
end

#build_export_metadataObject



142
143
144
145
146
147
148
149
150
151
152
# File 'app/models/bulkrax/csv_entry.rb', line 142

def 
  self. = {}

  
   if defined?(Collection) && !hyrax_record.is_a?(Collection)
  
  
  self.save!

  self.
end

#build_files_metadataObject



163
164
165
166
167
168
169
170
171
172
173
174
# File 'app/models/bulkrax/csv_entry.rb', line 163

def 
  # attaching files to the FileSet row only so we don't have duplicates when importing to a new tenant
  if hyrax_record.work?
    build_thumbnail_files
  else
    file_mapping = key_for_export('file')
    file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
    filenames = map_file_sets(file_sets)

    handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
  end
end

#build_mapping_metadataObject



226
227
228
229
230
231
232
233
234
# File 'app/models/bulkrax/csv_entry.rb', line 226

def 
  mapping = fetch_field_mapping
  mapping.each do |key, value|
    method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
    next unless method_name

    send(method_name, key, value)
  end
end

#build_metadataObject



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'app/models/bulkrax/csv_entry.rb', line 71

def 
  validate_record

  self. = {}
  add_identifier
  establish_factory_class
  
  # TODO(alishaevn): remove the collections stuff entirely and only reference collections via the new parents code
  add_collections
  add_visibility
  
  add_rights_statement
  sanitize_controlled_uri_values!
  add_local

  self.
end

#build_object(_key, value) ⇒ Object



236
237
238
239
240
241
242
243
244
# File 'app/models/bulkrax/csv_entry.rb', line 236

def build_object(_key, value)
  return unless hyrax_record.respond_to?(value['object'])

  data = hyrax_record.send(value['object'])
  return if data.empty?

  data = data.to_a if data.is_a?(ActiveTriples::Relation)
  (Array.wrap(data))
end

#build_relationship_metadataObject



176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'app/models/bulkrax/csv_entry.rb', line 176

def 
  # Includes all relationship methods for all exportable record types (works, Collections, FileSets)
  relationship_methods = {
    related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
    related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
  }

  relationship_methods.each do |relationship_key, methods|
    next if relationship_key.blank?

    values = []
    methods.each do |m|
      values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
    end
    values = values.flatten.uniq
    next if values.blank?

    handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
  end
end

#build_system_metadataObject

Metadata required by Bulkrax for round-tripping



155
156
157
158
159
160
161
# File 'app/models/bulkrax/csv_entry.rb', line 155

def 
  self.['id'] = hyrax_record.id
  source_id = hyrax_record.send(work_identifier)
  source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
  self.[source_identifier] = source_id
  self.[key_for_export('model')] = hyrax_record.has_model.first
end

#build_thumbnail_filesObject



313
314
315
316
317
318
319
320
321
# File 'app/models/bulkrax/csv_entry.rb', line 313

def build_thumbnail_files
  return unless importerexporter.include_thumbnails

  thumbnail_mapping = 'thumbnail_file'
  file_sets = Array.wrap(hyrax_record.thumbnail)

  filenames = map_file_sets(file_sets)
  handle_join_on_export(thumbnail_mapping, filenames, false)
end

#build_value(property_name, mapping_config) ⇒ Object



246
247
248
249
250
251
252
253
254
255
256
257
258
# File 'app/models/bulkrax/csv_entry.rb', line 246

def build_value(property_name, mapping_config)
  return unless hyrax_record.respond_to?(property_name.to_s)

  data = hyrax_record.send(property_name.to_s)

  if mapping_config['join'] || !data.is_a?(Enumerable)
    self.[key_for_export(property_name)] = prepare_export_data_with_join(data)
  else
    data.each_with_index do |d, i|
      self.["#{key_for_export(property_name)}_#{i + 1}"] = prepare_export_data(d)
    end
  end
end

#collection_identifiersObject



342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
# File 'app/models/bulkrax/csv_entry.rb', line 342

def collection_identifiers
  return @collection_identifiers if @collection_identifiers.present?

  parent_field_mapping = self.class.parent_field(parser)
  return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?

  identifiers = []
  split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
  split_references.each do |c_reference|
    matching_collection_entries = importerexporter.entries.select do |e|
      (e.&.[](source_identifier) == c_reference) &&
        e.is_a?(CsvCollectionEntry)
    end
    raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
    identifiers << matching_collection_entries.first&.identifier
  end
  @collection_identifiers = identifiers.compact.presence || []
end

#collections_created?Boolean

Returns:

  • (Boolean)


361
362
363
364
# File 'app/models/bulkrax/csv_entry.rb', line 361

def collections_created?
  # TODO: look into if this method is still needed after new relationships code
  true
end

#establish_factory_classObject



99
100
101
102
103
# File 'app/models/bulkrax/csv_entry.rb', line 99

def establish_factory_class
  parser.model_field_mappings.each do |key|
    ('model', record[key]) if record.key?(key)
  end
end

#find_collection_idsObject



366
367
368
369
370
371
372
373
374
375
376
377
# File 'app/models/bulkrax/csv_entry.rb', line 366

def find_collection_ids
  return self.collection_ids if collections_created?
  if collection_identifiers.present?
    collection_identifiers.each do |collection_id|
      c = find_collection(collection_id)
      skip = c.blank? || self.collection_ids.include?(c.id)
      self.collection_ids << c.id unless skip
    end
  end

  self.collection_ids
end

#handle_join_on_export(key, values, join) ⇒ Object



323
324
325
326
327
328
329
330
331
332
# File 'app/models/bulkrax/csv_entry.rb', line 323

def handle_join_on_export(key, values, join)
  if join
    [key] = values.join(Bulkrax.multi_value_element_join_on)
  else
    values.each_with_index do |value, i|
      ["#{key}_#{i + 1}"] = value
    end
    .delete(key)
  end
end

#key_for_export(key) ⇒ Object

On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically



262
263
264
265
266
267
# File 'app/models/bulkrax/csv_entry.rb', line 262

def key_for_export(key)
  clean_key = key_without_numbers(key)
  unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
  # Bring the number back if there is one
  "#{unnumbered_key}#{key.sub(clean_key, '')}"
end

#object_metadata(data) ⇒ Object



285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# File 'app/models/bulkrax/csv_entry.rb', line 285

def (data)
  # NOTE: What is `d` in this case:
  #
  #  "[{\"single_object_first_name\"=>\"Fake\", \"single_object_last_name\"=>\"Fakerson\", \"single_object_position\"=>\"Leader, Jester, Queen\", \"single_object_language\"=>\"english\"}]"
  #
  # The above is a stringified version of a Ruby string.  Using eval is a very bad idea as it
  # will execute the value of `d` within the full Ruby interpreter context.
  #
  # TODO: Would it be possible to store this as a non-string?  Maybe the actual Ruby Array and Hash?
  data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval

  data.each_with_index do |obj, index|
    next if obj.nil?
    # allow the object_key to be valid whether it's a string or symbol
    obj = obj.with_indifferent_access

    obj.each_key do |key|
      if obj[key].is_a?(Array)
        obj[key].each_with_index do |_nested_item, nested_index|
          self.["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
        end
      else
        self.["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
      end
    end
  end
end

#path_to_file(file) ⇒ Object

If only filename is given, construct the path (/files/my_file)



380
381
382
383
384
385
386
387
# File 'app/models/bulkrax/csv_entry.rb', line 380

def path_to_file(file)
  # return if we already have the full file path
  return file if File.exist?(file)
  path = importerexporter.parser.path_to_files
  f = File.join(path, file)
  return f if File.exist?(f)
  raise "File #{f} does not exist"
end

#prepare_export_data(datum) ⇒ Object



277
278
279
280
281
282
283
# File 'app/models/bulkrax/csv_entry.rb', line 277

def prepare_export_data(datum)
  if datum.is_a?(ActiveTriples::Resource)
    datum.to_uri.to_s
  else
    datum
  end
end

#prepare_export_data_with_join(data) ⇒ Object



269
270
271
272
273
274
275
# File 'app/models/bulkrax/csv_entry.rb', line 269

def prepare_export_data_with_join(data)
  # Yes...it's possible we're asking to coerce a multi-value but only have a single value.
  return data.to_s unless data.is_a?(Enumerable)
  return "" if data.empty?

  data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
end

#recordObject



334
335
336
# File 'app/models/bulkrax/csv_entry.rb', line 334

def record
  @record ||= 
end

#validate_recordObject

Raises:

  • (StandardError)


89
90
91
92
93
# File 'app/models/bulkrax/csv_entry.rb', line 89

def validate_record
  raise StandardError, 'Record not found' if record.nil?
  raise StandardError, "Missing required elements, missing element(s) are: "\
"#{importerexporter.parser.missing_elements(record).join(', ')}" unless importerexporter.parser.required_elements?(record)
end