Class: Bulkrax::CsvEntry
- Inherits:
-
Entry
show all
- Defined in:
- app/models/bulkrax/csv_entry.rb
Overview
TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense. We do too much in these entry classes. We need to extract the common logic from the various entry models into a module that can be shared between them.
Defined Under Namespace
Modules: AttributeBuilderMethod
Classes: CsvWrapper
Instance Attribute Summary
Attributes inherited from Entry
#all_attrs
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Entry
#build, #exporter?, #fetch_field_mapping, #find_collection, #importer?, #last_run, parent_field, #source_identifier, #valid_system_id, #work_identifier
#add_local
Methods included from StatusInfo
#current_status, #failed?, #last_error, #set_status_info, #status, #status_at, #succeeded?
#build_for_exporter, #filename, #hyrax_record
#active_id_for_authority?, #add_admin_set_id, #add_collections, #add_rights_statement, #add_user_to_permission_templates!, #add_visibility, #build_for_importer, #child_jobs, #factory, #factory_class, #override_rights_statement, #parent_jobs, #rights_statement, #sanitize_controlled_uri_value, #sanitize_controlled_uri_values!, #validate_value
#add_metadata, #excluded?, #field_supported?, #field_to, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #set_parsed_data, #set_parsed_object_data, #single_metadata, #supported_bulkrax_fields
Class Method Details
.data_for_entry(data, _source_id, parser) ⇒ Object
59
60
61
62
63
64
65
66
67
68
69
|
# File 'app/models/bulkrax/csv_entry.rb', line 59
def self.data_for_entry(data, _source_id, parser)
data = data.first if data.is_a?(CSV::Table)
raw_data = data.to_h
raw_data[:model] = data[:model] if data[:model].present?
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
return raw_data
end
|
.fields_from_data(data) ⇒ Object
12
13
14
|
# File 'app/models/bulkrax/csv_entry.rb', line 12
def self.fields_from_data(data)
data..flatten.compact.uniq
end
|
.matcher_class ⇒ Object
340
341
342
|
# File 'app/models/bulkrax/csv_entry.rb', line 340
def self.matcher_class
Bulkrax::CsvMatcher
end
|
.read_data(path) ⇒ Object
there’s a risk that this reads the whole file into memory and could cause a memory leak
19
20
21
22
23
24
25
26
27
28
29
|
# File 'app/models/bulkrax/csv_entry.rb', line 19
def self.read_data(path)
raise StandardError, 'CSV path empty' if path.blank?
options = {
headers: true,
header_converters: ->(h) { h.to_s.strip.to_sym },
encoding: 'utf-8'
}.merge(csv_read_data_options)
results = CSV.read(path, **options)
csv_wrapper_class.new(results)
end
|
Instance Method Details
#add_file ⇒ Object
130
131
132
133
134
135
136
137
138
139
140
141
142
|
# File 'app/models/bulkrax/csv_entry.rb', line 130
def add_file
self.parsed_metadata['file'] ||= []
if record['file']&.is_a?(String)
self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
elsif record['file'].is_a?(Array)
self.parsed_metadata['file'] = record['file']
end
self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
next if f.blank?
path_to_file(f.tr(' ', '_'))
end.compact
end
|
#add_identifier ⇒ Object
97
98
99
|
# File 'app/models/bulkrax/csv_entry.rb', line 97
def add_identifier
self.parsed_metadata[work_identifier] = [record[source_identifier]]
end
|
121
122
123
124
125
126
127
128
|
# File 'app/models/bulkrax/csv_entry.rb', line 121
def add_ingested_metadata
record.each do |key, value|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
add_metadata(key_without_numbers(key), value, index)
end
end
|
107
108
109
110
111
112
113
114
115
116
117
118
119
|
# File 'app/models/bulkrax/csv_entry.rb', line 107
def add_metadata_for_model
if defined?(::Collection) && factory_class == ::Collection
add_collection_type_gid if defined?(::Hyrax)
elsif factory_class == Bulkrax.file_model_class
validate_presence_of_filename!
add_path_to_file
validate_presence_of_parent!
else
add_file unless importerexporter.metadata_only?
add_admin_set_id
end
end
|
144
145
146
147
148
149
150
151
152
153
154
|
# File 'app/models/bulkrax/csv_entry.rb', line 144
def build_export_metadata
self.parsed_metadata = {}
build_system_metadata
build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
build_relationship_metadata
build_mapping_metadata
self.save!
self.parsed_metadata
end
|
165
166
167
168
169
170
171
172
173
174
175
176
|
# File 'app/models/bulkrax/csv_entry.rb', line 165
def build_files_metadata
if hyrax_record.work?
build_thumbnail_files
else
file_mapping = key_for_export('file')
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
filenames = map_file_sets(file_sets)
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
end
end
|
228
229
230
231
232
233
234
235
236
|
# File 'app/models/bulkrax/csv_entry.rb', line 228
def build_mapping_metadata
mapping = fetch_field_mapping
mapping.each do |key, value|
method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
next unless method_name
send(method_name, key, value)
end
end
|
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
# File 'app/models/bulkrax/csv_entry.rb', line 71
def build_metadata
validate_record
self.parsed_metadata = {}
add_identifier
establish_factory_class
add_ingested_metadata
add_collections
add_visibility
add_metadata_for_model
add_rights_statement
sanitize_controlled_uri_values!
add_local
self.parsed_metadata
end
|
#build_object(_key, value) ⇒ Object
238
239
240
241
242
243
244
245
246
|
# File 'app/models/bulkrax/csv_entry.rb', line 238
def build_object(_key, value)
return unless hyrax_record.respond_to?(value['object'])
data = hyrax_record.send(value['object'])
return if data.empty?
data = data.to_a if data.is_a?(ActiveTriples::Relation)
object_metadata(Array.wrap(data))
end
|
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
|
# File 'app/models/bulkrax/csv_entry.rb', line 178
def build_relationship_metadata
relationship_methods = {
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
}
relationship_methods.each do |relationship_key, methods|
next if relationship_key.blank?
values = []
methods.each do |m|
values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
end
values = values.flatten.uniq
next if values.blank?
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
end
end
|
Metadata required by Bulkrax for round-tripping
157
158
159
160
161
162
163
|
# File 'app/models/bulkrax/csv_entry.rb', line 157
def build_system_metadata
self.parsed_metadata['id'] = hyrax_record.id
source_id = hyrax_record.send(work_identifier)
source_id = source_id.to_a.first if source_id.is_a?(ActiveTriples::Relation)
self.parsed_metadata[source_identifier] = source_id
self.parsed_metadata[key_for_export('model')] = hyrax_record.has_model.first
end
|
#build_thumbnail_files ⇒ Object
315
316
317
318
319
320
321
322
323
|
# File 'app/models/bulkrax/csv_entry.rb', line 315
def build_thumbnail_files
return unless importerexporter.include_thumbnails
thumbnail_mapping = 'thumbnail_file'
file_sets = Array.wrap(hyrax_record.thumbnail)
filenames = map_file_sets(file_sets)
handle_join_on_export(thumbnail_mapping, filenames, false)
end
|
#build_value(property_name, mapping_config) ⇒ Object
248
249
250
251
252
253
254
255
256
257
258
259
260
|
# File 'app/models/bulkrax/csv_entry.rb', line 248
def build_value(property_name, mapping_config)
return unless hyrax_record.respond_to?(property_name.to_s)
data = hyrax_record.send(property_name.to_s)
if mapping_config['join'] || !data.is_a?(Enumerable)
self.parsed_metadata[key_for_export(property_name)] = prepare_export_data_with_join(data)
else
data.each_with_index do |d, i|
self.parsed_metadata["#{key_for_export(property_name)}_#{i + 1}"] = prepare_export_data(d)
end
end
end
|
#collection_identifiers ⇒ Object
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
|
# File 'app/models/bulkrax/csv_entry.rb', line 344
def collection_identifiers
return @collection_identifiers if @collection_identifiers.present?
parent_field_mapping = self.class.parent_field(parser)
return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
identifiers = []
split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
split_references.each do |c_reference|
matching_collection_entries = importerexporter.entries.select do |e|
(e.raw_metadata&.[](source_identifier) == c_reference) &&
e.is_a?(CsvCollectionEntry)
end
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
identifiers << matching_collection_entries.first&.identifier
end
@collection_identifiers = identifiers.compact.presence || []
end
|
#collections_created? ⇒ Boolean
363
364
365
366
|
# File 'app/models/bulkrax/csv_entry.rb', line 363
def collections_created?
true
end
|
#establish_factory_class ⇒ Object
101
102
103
104
105
|
# File 'app/models/bulkrax/csv_entry.rb', line 101
def establish_factory_class
parser.model_field_mappings.each do |key|
add_metadata('model', record[key]) if record.key?(key)
end
end
|
#find_collection_ids ⇒ Object
368
369
370
371
372
373
374
375
376
377
378
379
|
# File 'app/models/bulkrax/csv_entry.rb', line 368
def find_collection_ids
return self.collection_ids if collections_created?
if collection_identifiers.present?
collection_identifiers.each do |collection_id|
c = find_collection(collection_id)
skip = c.blank? || self.collection_ids.include?(c.id)
self.collection_ids << c.id unless skip
end
end
self.collection_ids
end
|
#handle_join_on_export(key, values, join) ⇒ Object
325
326
327
328
329
330
331
332
333
334
|
# File 'app/models/bulkrax/csv_entry.rb', line 325
def handle_join_on_export(key, values, join)
if join
parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
else
values.each_with_index do |value, i|
parsed_metadata["#{key}_#{i + 1}"] = value
end
parsed_metadata.delete(key)
end
end
|
#key_for_export(key) ⇒ Object
On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
264
265
266
267
268
269
|
# File 'app/models/bulkrax/csv_entry.rb', line 264
def key_for_export(key)
clean_key = key_without_numbers(key)
unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
"#{unnumbered_key}#{key.sub(clean_key, '')}"
end
|
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
|
# File 'app/models/bulkrax/csv_entry.rb', line 287
def object_metadata(data)
data = data.map { |d| eval(d) }.flatten
data.each_with_index do |obj, index|
next if obj.nil?
obj = obj.with_indifferent_access
obj.each_key do |key|
if obj[key].is_a?(Array)
obj[key].each_with_index do |_nested_item, nested_index|
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
end
else
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
end
end
end
end
|
#path_to_file(file) ⇒ Object
If only filename is given, construct the path (/files/my_file)
382
383
384
385
386
387
388
389
|
# File 'app/models/bulkrax/csv_entry.rb', line 382
def path_to_file(file)
return file if File.exist?(file)
path = importerexporter.parser.path_to_files
f = File.join(path, file)
return f if File.exist?(f)
raise "File #{f} does not exist"
end
|
#prepare_export_data(datum) ⇒ Object
279
280
281
282
283
284
285
|
# File 'app/models/bulkrax/csv_entry.rb', line 279
def prepare_export_data(datum)
if datum.is_a?(ActiveTriples::Resource)
datum.to_uri.to_s
else
datum
end
end
|
#prepare_export_data_with_join(data) ⇒ Object
271
272
273
274
275
276
277
|
# File 'app/models/bulkrax/csv_entry.rb', line 271
def prepare_export_data_with_join(data)
return data.to_s unless data.is_a?(Enumerable)
return "" if data.empty?
data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
end
|
#record ⇒ Object
336
337
338
|
# File 'app/models/bulkrax/csv_entry.rb', line 336
def record
@record ||= raw_metadata
end
|
#validate_record ⇒ Object
89
90
91
92
93
94
95
|
# File 'app/models/bulkrax/csv_entry.rb', line 89
def validate_record
raise StandardError, 'Record not found' if record.nil?
unless importerexporter.parser.required_elements?(record)
raise StandardError, "Missing required elements, missing element(s) are: "\
"#{importerexporter.parser.missing_elements(record).join(', ')}"
end
end
|