Class: Bulkrax::CsvEntry
- Inherits:
-
Entry
show all
- Defined in:
- app/models/bulkrax/csv_entry.rb
Overview
TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense. We do too much in these entry classes. We need to extract the common logic from the various entry models into a module that can be shared between them.
Defined Under Namespace
Modules: AttributeBuilderMethod
Classes: CsvWrapper
Instance Attribute Summary
Attributes inherited from Entry
#all_attrs
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Entry
#build, #exporter?, #fetch_field_mapping, #find_collection, #importer?, #last_run, parent_field, #source_identifier, #valid_system_id, #work_identifier
#add_local
Methods included from StatusInfo
#current_status, #failed?, #last_error, #set_status_info, #status, #status_at, #succeeded?
#build_for_exporter, #filename, #hyrax_record
#active_id_for_authority?, #add_admin_set_id, #add_collections, #add_rights_statement, #add_user_to_permission_templates!, #add_visibility, #build_for_importer, #child_jobs, #factory, #factory_class, #override_rights_statement, #parent_jobs, #rights_statement, #sanitize_controlled_uri_value, #sanitize_controlled_uri_values!, #validate_value
#add_metadata, #excluded?, #field_supported?, #field_to, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #set_parsed_data, #set_parsed_object_data, #single_metadata, #supported_bulkrax_fields
Class Method Details
.data_for_entry(data, _source_id, parser) ⇒ Object
59
60
61
62
63
64
65
66
67
68
69
|
# File 'app/models/bulkrax/csv_entry.rb', line 59
def self.data_for_entry(data, _source_id, parser)
data = data.first if data.is_a?(CSV::Table)
raw_data = data.to_h
raw_data[:model] = data[:model] if data[:model].present?
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
return raw_data
end
|
.fields_from_data(data) ⇒ Object
12
13
14
|
# File 'app/models/bulkrax/csv_entry.rb', line 12
def self.fields_from_data(data)
data..flatten.compact.uniq
end
|
.matcher_class ⇒ Object
338
339
340
|
# File 'app/models/bulkrax/csv_entry.rb', line 338
def self.matcher_class
Bulkrax::CsvMatcher
end
|
.read_data(path) ⇒ Object
there’s a risk that this reads the whole file into memory and could cause a memory leak
19
20
21
22
23
24
25
26
27
28
29
|
# File 'app/models/bulkrax/csv_entry.rb', line 19
def self.read_data(path)
raise StandardError, 'CSV path empty' if path.blank?
options = {
headers: true,
header_converters: ->(h) { h.to_s.strip.to_sym },
encoding: 'utf-8'
}.merge(csv_read_data_options)
results = CSV.read(path, **options)
csv_wrapper_class.new(results)
end
|
Instance Method Details
#add_file ⇒ Object
128
129
130
131
132
133
134
135
136
137
138
139
140
|
# File 'app/models/bulkrax/csv_entry.rb', line 128
def add_file
self.parsed_metadata['file'] ||= []
if record['file']&.is_a?(String)
self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
elsif record['file'].is_a?(Array)
self.parsed_metadata['file'] = record['file']
end
self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
next if f.blank?
path_to_file(f.tr(' ', '_'))
end.compact
end
|
#add_identifier ⇒ Object
95
96
97
|
# File 'app/models/bulkrax/csv_entry.rb', line 95
def add_identifier
self.parsed_metadata[work_identifier] = [record[source_identifier]]
end
|
119
120
121
122
123
124
125
126
|
# File 'app/models/bulkrax/csv_entry.rb', line 119
def add_ingested_metadata
record.each do |key, value|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
add_metadata(key_without_numbers(key), value, index)
end
end
|
105
106
107
108
109
110
111
112
113
114
115
116
117
|
# File 'app/models/bulkrax/csv_entry.rb', line 105
def add_metadata_for_model
if defined?(::Collection) && factory_class == ::Collection
add_collection_type_gid if defined?(::Hyrax)
elsif factory_class == Bulkrax.file_model_class
validate_presence_of_filename!
add_path_to_file
validate_presence_of_parent!
else
add_file unless importerexporter.metadata_only?
add_admin_set_id
end
end
|
142
143
144
145
146
147
148
149
150
151
152
|
# File 'app/models/bulkrax/csv_entry.rb', line 142
def build_export_metadata
self.parsed_metadata = {}
build_system_metadata
build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
build_relationship_metadata
build_mapping_metadata
self.save!
self.parsed_metadata
end
|
163
164
165
166
167
168
169
170
171
172
173
174
|
# File 'app/models/bulkrax/csv_entry.rb', line 163
def build_files_metadata
if hyrax_record.work?
build_thumbnail_files
else
file_mapping = key_for_export('file')
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
filenames = map_file_sets(file_sets)
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
end
end
|
226
227
228
229
230
231
232
233
234
|
# File 'app/models/bulkrax/csv_entry.rb', line 226
def build_mapping_metadata
mapping = fetch_field_mapping
mapping.each do |key, value|
method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
next unless method_name
send(method_name, key, value)
end
end
|
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
# File 'app/models/bulkrax/csv_entry.rb', line 71
def build_metadata
validate_record
self.parsed_metadata = {}
add_identifier
establish_factory_class
add_ingested_metadata
add_collections
add_visibility
add_metadata_for_model
add_rights_statement
sanitize_controlled_uri_values!
add_local
self.parsed_metadata
end
|
#build_object(_key, value) ⇒ Object
236
237
238
239
240
241
242
243
244
|
# File 'app/models/bulkrax/csv_entry.rb', line 236
def build_object(_key, value)
return unless hyrax_record.respond_to?(value['object'])
data = hyrax_record.send(value['object'])
return if data.empty?
data = data.to_a if data.is_a?(ActiveTriples::Relation)
object_metadata(Array.wrap(data))
end
|
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
|
# File 'app/models/bulkrax/csv_entry.rb', line 176
def build_relationship_metadata
relationship_methods = {
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
}
relationship_methods.each do |relationship_key, methods|
next if relationship_key.blank?
values = []
methods.each do |m|
values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
end
values = values.flatten.uniq
next if values.blank?
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
end
end
|
Metadata required by Bulkrax for round-tripping
155
156
157
158
159
160
161
|
# File 'app/models/bulkrax/csv_entry.rb', line 155
def build_system_metadata
self.parsed_metadata['id'] = hyrax_record.id
source_id = hyrax_record.send(work_identifier)
source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
self.parsed_metadata[source_identifier] = source_id
self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
end
|
#build_thumbnail_files ⇒ Object
313
314
315
316
317
318
319
320
321
|
# File 'app/models/bulkrax/csv_entry.rb', line 313
def build_thumbnail_files
return unless importerexporter.include_thumbnails
thumbnail_mapping = 'thumbnail_file'
file_sets = Array.wrap(hyrax_record.thumbnail)
filenames = map_file_sets(file_sets)
handle_join_on_export(thumbnail_mapping, filenames, false)
end
|
#build_value(property_name, mapping_config) ⇒ Object
246
247
248
249
250
251
252
253
254
255
256
257
258
|
# File 'app/models/bulkrax/csv_entry.rb', line 246
def build_value(property_name, mapping_config)
return unless hyrax_record.respond_to?(property_name.to_s)
data = hyrax_record.send(property_name.to_s)
if mapping_config['join'] || !data.is_a?(Enumerable)
self.parsed_metadata[key_for_export(property_name)] = prepare_export_data_with_join(data)
else
data.each_with_index do |d, i|
self.parsed_metadata["#{key_for_export(property_name)}_#{i + 1}"] = prepare_export_data(d)
end
end
end
|
#collection_identifiers ⇒ Object
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
|
# File 'app/models/bulkrax/csv_entry.rb', line 342
def collection_identifiers
return @collection_identifiers if @collection_identifiers.present?
parent_field_mapping = self.class.parent_field(parser)
return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
identifiers = []
split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
split_references.each do |c_reference|
matching_collection_entries = importerexporter.entries.select do |e|
(e.raw_metadata&.[](source_identifier) == c_reference) &&
e.is_a?(CsvCollectionEntry)
end
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
identifiers << matching_collection_entries.first&.identifier
end
@collection_identifiers = identifiers.compact.presence || []
end
|
#collections_created? ⇒ Boolean
361
362
363
364
|
# File 'app/models/bulkrax/csv_entry.rb', line 361
def collections_created?
true
end
|
#establish_factory_class ⇒ Object
99
100
101
102
103
|
# File 'app/models/bulkrax/csv_entry.rb', line 99
def establish_factory_class
parser.model_field_mappings.each do |key|
add_metadata('model', record[key]) if record.key?(key)
end
end
|
#find_collection_ids ⇒ Object
366
367
368
369
370
371
372
373
374
375
376
377
|
# File 'app/models/bulkrax/csv_entry.rb', line 366
def find_collection_ids
return self.collection_ids if collections_created?
if collection_identifiers.present?
collection_identifiers.each do |collection_id|
c = find_collection(collection_id)
skip = c.blank? || self.collection_ids.include?(c.id)
self.collection_ids << c.id unless skip
end
end
self.collection_ids
end
|
#handle_join_on_export(key, values, join) ⇒ Object
323
324
325
326
327
328
329
330
331
332
|
# File 'app/models/bulkrax/csv_entry.rb', line 323
def handle_join_on_export(key, values, join)
if join
parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
else
values.each_with_index do |value, i|
parsed_metadata["#{key}_#{i + 1}"] = value
end
parsed_metadata.delete(key)
end
end
|
#key_for_export(key) ⇒ Object
On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
262
263
264
265
266
267
|
# File 'app/models/bulkrax/csv_entry.rb', line 262
def key_for_export(key)
clean_key = key_without_numbers(key)
unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
"#{unnumbered_key}#{key.sub(clean_key, '')}"
end
|
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
|
# File 'app/models/bulkrax/csv_entry.rb', line 285
def object_metadata(data)
data = data.map { |d| eval(d) }.flatten
data.each_with_index do |obj, index|
next if obj.nil?
obj = obj.with_indifferent_access
obj.each_key do |key|
if obj[key].is_a?(Array)
obj[key].each_with_index do |_nested_item, nested_index|
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
end
else
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
end
end
end
end
|
#path_to_file(file) ⇒ Object
If only filename is given, construct the path (/files/my_file)
380
381
382
383
384
385
386
387
|
# File 'app/models/bulkrax/csv_entry.rb', line 380
def path_to_file(file)
return file if File.exist?(file)
path = importerexporter.parser.path_to_files
f = File.join(path, file)
return f if File.exist?(f)
raise "File #{f} does not exist"
end
|
#prepare_export_data(datum) ⇒ Object
277
278
279
280
281
282
283
|
# File 'app/models/bulkrax/csv_entry.rb', line 277
def prepare_export_data(datum)
if datum.is_a?(ActiveTriples::Resource)
datum.to_uri.to_s
else
datum
end
end
|
#prepare_export_data_with_join(data) ⇒ Object
269
270
271
272
273
274
275
|
# File 'app/models/bulkrax/csv_entry.rb', line 269
def prepare_export_data_with_join(data)
return data.to_s unless data.is_a?(Enumerable)
return "" if data.empty?
data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
end
|
#record ⇒ Object
334
335
336
|
# File 'app/models/bulkrax/csv_entry.rb', line 334
def record
@record ||= raw_metadata
end
|
#validate_record ⇒ Object
89
90
91
92
93
|
# File 'app/models/bulkrax/csv_entry.rb', line 89
def validate_record
raise StandardError, 'Record not found' if record.nil?
raise StandardError, "Missing required elements, missing element(s) are: "\
"#{importerexporter.parser.missing_elements(record).join(', ')}" unless importerexporter.parser.required_elements?(record)
end
|