Class: Bulkrax::CsvEntry
- Inherits:
-
Entry
show all
- Defined in:
- app/models/bulkrax/csv_entry.rb
Overview
TODO: We need to rework this class some to address the Metrics/ClassLength rubocop offense. We do too much in these entry classes. We need to extract the common logic from the various entry models into a module that can be shared between them.
Defined Under Namespace
Modules: AttributeBuilderMethod
Classes: CsvWrapper
Instance Attribute Summary
Attributes inherited from Entry
#all_attrs
Class Method Summary
collapse
Instance Method Summary
collapse
Methods inherited from Entry
#build, #exporter?, #fetch_field_mapping, #find_collection, #importer?, #last_run, parent_field, #source_identifier, #work_identifier
#add_local
Methods included from StatusInfo
#current_status, #failed?, #last_error, #set_status_info, #skipped?, #status, #status_at, #succeeded?
#build_for_exporter, #filename, #hyrax_record
#active_id_for_authority?, #add_admin_set_id, #add_collections, #add_rights_statement, #add_user_to_permission_templates!, #add_visibility, #build_for_importer, #child_jobs, #factory, #factory_class, #override_rights_statement, #parent_jobs, #rights_statement, #sanitize_controlled_uri_value, #sanitize_controlled_uri_values!, #validate_value
#add_metadata, #excluded?, #field_supported?, #field_to, #fields_that_are_always_multiple, #fields_that_are_always_singular, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #schema_form_definitions, #set_parsed_data, #set_parsed_object_data, #single_metadata, #supported_bulkrax_fields
Class Method Details
.data_for_entry(data, _source_id, parser) ⇒ Object
58
59
60
61
62
63
64
65
66
67
68
|
# File 'app/models/bulkrax/csv_entry.rb', line 58
def self.data_for_entry(data, _source_id, parser)
data = data.first if data.is_a?(CSV::Table)
raw_data = data.to_h
raw_data[:model] = data[:model] if data[:model].present?
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
return raw_data
end
|
.fields_from_data(data) ⇒ Object
10
11
12
|
# File 'app/models/bulkrax/csv_entry.rb', line 10
def self.fields_from_data(data)
data..flatten.compact.uniq
end
|
.matcher_class ⇒ Object
342
343
344
|
# File 'app/models/bulkrax/csv_entry.rb', line 342
def self.matcher_class
Bulkrax::CsvMatcher
end
|
.read_data(path) ⇒ Object
there’s a risk that this reads the whole file into memory and could cause a memory leak we strip any special characters out of the headers. looking at you Excel
18
19
20
21
22
23
24
25
26
27
28
|
# File 'app/models/bulkrax/csv_entry.rb', line 18
def self.read_data(path)
raise StandardError, 'CSV path empty' if path.blank?
options = {
headers: true,
header_converters: ->(h) { h.to_s.gsub(/[^\w\d\. -]+/, '').strip.to_sym },
encoding: 'utf-8'
}.merge(csv_read_data_options)
results = CSV.read(path, **options)
csv_wrapper_class.new(results)
end
|
Instance Method Details
#add_file ⇒ Object
129
130
131
132
133
134
135
136
137
138
139
140
141
|
# File 'app/models/bulkrax/csv_entry.rb', line 129
def add_file
self.parsed_metadata['file'] ||= []
if record['file']&.is_a?(String)
self.parsed_metadata['file'] = record['file'].split(Bulkrax.multi_value_element_split_on)
elsif record['file'].is_a?(Array)
self.parsed_metadata['file'] = record['file']
end
self.parsed_metadata['file'] = self.parsed_metadata['file'].map do |f|
next if f.blank?
path_to_file(f.tr(' ', '_'))
end.compact
end
|
#add_identifier ⇒ Object
96
97
98
|
# File 'app/models/bulkrax/csv_entry.rb', line 96
def add_identifier
self.parsed_metadata[work_identifier] = [record[source_identifier]]
end
|
120
121
122
123
124
125
126
127
|
# File 'app/models/bulkrax/csv_entry.rb', line 120
def add_ingested_metadata
record.each do |key, value|
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
add_metadata(key_without_numbers(key), value, index)
end
end
|
106
107
108
109
110
111
112
113
114
115
116
117
118
|
# File 'app/models/bulkrax/csv_entry.rb', line 106
def add_metadata_for_model
if factory_class.present? && factory_class == Bulkrax.collection_model_class
add_collection_type_gid if defined?(::Hyrax)
elsif factory_class == Bulkrax.file_model_class
validate_presence_of_filename!
add_path_to_file
validate_presence_of_parent!
else
add_file unless importerexporter.metadata_only?
add_admin_set_id
end
end
|
143
144
145
146
147
148
149
150
151
152
153
|
# File 'app/models/bulkrax/csv_entry.rb', line 143
def build_export_metadata
self.parsed_metadata = {}
build_system_metadata
build_files_metadata if Bulkrax.collection_model_class.present? && !hyrax_record.is_a?(Bulkrax.collection_model_class)
build_relationship_metadata
build_mapping_metadata
self.save!
self.parsed_metadata
end
|
167
168
169
170
171
172
173
174
175
176
177
178
|
# File 'app/models/bulkrax/csv_entry.rb', line 167
def build_files_metadata
if hyrax_record.work?
build_thumbnail_files
else
file_mapping = key_for_export('file')
file_sets = hyrax_record.file_set? ? Array.wrap(hyrax_record) : hyrax_record.file_sets
filenames = map_file_sets(file_sets)
handle_join_on_export(file_mapping, filenames, mapping['file']&.[]('join')&.present?)
end
end
|
230
231
232
233
234
235
236
237
238
|
# File 'app/models/bulkrax/csv_entry.rb', line 230
def build_mapping_metadata
mapping = fetch_field_mapping
mapping.each do |key, value|
method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
next unless method_name
send(method_name, key, value)
end
end
|
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# File 'app/models/bulkrax/csv_entry.rb', line 70
def build_metadata
validate_record
self.parsed_metadata = {}
add_identifier
establish_factory_class
add_ingested_metadata
add_collections
add_visibility
add_metadata_for_model
add_rights_statement
sanitize_controlled_uri_values!
add_local
self.parsed_metadata
end
|
#build_object(_key, value) ⇒ Object
240
241
242
243
244
245
246
247
248
|
# File 'app/models/bulkrax/csv_entry.rb', line 240
def build_object(_key, value)
return unless hyrax_record.respond_to?(value['object'])
data = hyrax_record.send(value['object'])
return if data.empty?
data = data.to_a if data.is_a?(ActiveTriples::Relation)
object_metadata(Array.wrap(data))
end
|
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
|
# File 'app/models/bulkrax/csv_entry.rb', line 180
def build_relationship_metadata
relationship_methods = {
related_parents_parsed_mapping => %i[member_of_collection_ids member_of_work_ids in_work_ids],
related_children_parsed_mapping => %i[member_collection_ids member_work_ids file_set_ids]
}
relationship_methods.each do |relationship_key, methods|
next if relationship_key.blank?
values = []
methods.each do |m|
values << hyrax_record.public_send(m) if hyrax_record.respond_to?(m)
end
values = values.flatten.uniq
next if values.blank?
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
end
end
|
Metadata required by Bulkrax for round-tripping
156
157
158
159
160
161
162
163
164
165
|
# File 'app/models/bulkrax/csv_entry.rb', line 156
def build_system_metadata
self.parsed_metadata['id'] = hyrax_record.id
source_id = hyrax_record.send(work_identifier)
source_id = source_id.to_a if source_id.is_a?(ActiveTriples::Relation)
source_id = Array.wrap(source_id).first
self.parsed_metadata[source_identifier] = source_id
model_name = hyrax_record.respond_to?(:to_rdf_representation) ? hyrax_record.to_rdf_representation : hyrax_record.has_model.first
self.parsed_metadata[key_for_export('model')] = model_name
end
|
#build_thumbnail_files ⇒ Object
317
318
319
320
321
322
323
324
325
|
# File 'app/models/bulkrax/csv_entry.rb', line 317
def build_thumbnail_files
return unless importerexporter.include_thumbnails
thumbnail_mapping = 'thumbnail_file'
file_sets = Array.wrap(hyrax_record.thumbnail)
filenames = map_file_sets(file_sets)
handle_join_on_export(thumbnail_mapping, filenames, false)
end
|
#build_value(property_name, mapping_config) ⇒ Object
250
251
252
253
254
255
256
257
258
259
260
261
262
|
# File 'app/models/bulkrax/csv_entry.rb', line 250
def build_value(property_name, mapping_config)
return unless hyrax_record.respond_to?(property_name.to_s)
data = hyrax_record.send(property_name.to_s)
if mapping_config['join'] || !data.is_a?(Enumerable)
self.parsed_metadata[key_for_export(property_name)] = prepare_export_data_with_join(data)
else
data.each_with_index do |d, i|
self.parsed_metadata["#{key_for_export(property_name)}_#{i + 1}"] = prepare_export_data(d)
end
end
end
|
#collection_identifiers ⇒ Object
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
|
# File 'app/models/bulkrax/csv_entry.rb', line 346
def collection_identifiers
return @collection_identifiers if @collection_identifiers.present?
parent_field_mapping = self.class.parent_field(parser)
return [] unless parent_field_mapping.present? && record[parent_field_mapping].present?
identifiers = []
split_references = record[parent_field_mapping].split(Bulkrax.multi_value_element_split_on)
split_references.each do |c_reference|
matching_collection_entries = importerexporter.entries.select do |e|
(e.raw_metadata&.[](source_identifier) == c_reference) &&
e.is_a?(CsvCollectionEntry)
end
raise ::StandardError, 'Only expected to find one matching entry' if matching_collection_entries.count > 1
identifiers << matching_collection_entries.first&.identifier
end
@collection_identifiers = identifiers.compact.presence || []
end
|
#collections_created? ⇒ Boolean
365
366
367
368
|
# File 'app/models/bulkrax/csv_entry.rb', line 365
def collections_created?
true
end
|
#establish_factory_class ⇒ Object
100
101
102
103
104
|
# File 'app/models/bulkrax/csv_entry.rb', line 100
def establish_factory_class
parser.model_field_mappings.each do |key|
add_metadata('model', record[key]) if record.key?(key)
end
end
|
#find_collection_ids ⇒ Object
370
371
372
373
374
375
376
377
378
379
380
381
|
# File 'app/models/bulkrax/csv_entry.rb', line 370
def find_collection_ids
return self.collection_ids if collections_created?
if collection_identifiers.present?
collection_identifiers.each do |collection_id|
c = find_collection(collection_id)
skip = c.blank? || self.collection_ids.include?(c.id)
self.collection_ids << c.id unless skip
end
end
self.collection_ids
end
|
#handle_join_on_export(key, values, join) ⇒ Object
327
328
329
330
331
332
333
334
335
336
|
# File 'app/models/bulkrax/csv_entry.rb', line 327
def handle_join_on_export(key, values, join)
if join
parsed_metadata[key] = values.join(Bulkrax.multi_value_element_join_on)
else
values.each_with_index do |value, i|
parsed_metadata["#{key}_#{i + 1}"] = value
end
parsed_metadata.delete(key)
end
end
|
#key_for_export(key) ⇒ Object
On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
266
267
268
269
270
271
|
# File 'app/models/bulkrax/csv_entry.rb', line 266
def key_for_export(key)
clean_key = key_without_numbers(key)
unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
"#{unnumbered_key}#{key.sub(clean_key, '')}"
end
|
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
|
# File 'app/models/bulkrax/csv_entry.rb', line 289
def object_metadata(data)
data = data.map { |d| eval(d) }.flatten
data.each_with_index do |obj, index|
next if obj.nil?
obj = obj.with_indifferent_access
obj.each_key do |key|
if obj[key].is_a?(Array)
obj[key].each_with_index do |_nested_item, nested_index|
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
end
else
self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
end
end
end
end
|
#path_to_file(file) ⇒ Object
If only filename is given, construct the path (/files/my_file)
384
385
386
387
388
389
390
391
|
# File 'app/models/bulkrax/csv_entry.rb', line 384
def path_to_file(file)
return file if File.exist?(file)
path = importerexporter.parser.path_to_files
f = File.join(path, file)
return f if File.exist?(f)
raise "File #{f} does not exist"
end
|
#prepare_export_data(datum) ⇒ Object
281
282
283
284
285
286
287
|
# File 'app/models/bulkrax/csv_entry.rb', line 281
def prepare_export_data(datum)
if datum.is_a?(ActiveTriples::Resource)
datum.to_uri.to_s
else
datum
end
end
|
#prepare_export_data_with_join(data) ⇒ Object
273
274
275
276
277
278
279
|
# File 'app/models/bulkrax/csv_entry.rb', line 273
def prepare_export_data_with_join(data)
return data.to_s unless data.is_a?(Enumerable)
return "" if data.empty?
data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
end
|
#record ⇒ Object
338
339
340
|
# File 'app/models/bulkrax/csv_entry.rb', line 338
def record
@record ||= raw_metadata
end
|
#validate_record ⇒ Object
88
89
90
91
92
93
94
|
# File 'app/models/bulkrax/csv_entry.rb', line 88
def validate_record
raise StandardError, 'Record not found' if record.nil?
unless importerexporter.parser.required_elements?(record)
raise StandardError, "Missing required elements, missing element(s) are: "\
"#{importerexporter.parser.missing_elements(record).join(', ')}"
end
end
|