Class: Bulkrax::CsvParser
Overview
rubocop:disable Metrics/ClassLength
Instance Attribute Summary collapse
#headers, #importerexporter
Class Method Summary
collapse
Instance Method Summary
collapse
#build_export_metadata, #build_for_exporter, #filename, #hyrax_record
#build_errored_entry_row, #setup_errored_entries_file, #write_errored_entries_file
#base_path, #exporter?, #find_or_create_entry, #generated_metadata_mapping, #get_field_mapping_hash_for, #import_file_path, import_supported?, #importer?, #initialize, #invalid_record, #limit_reached?, #model_field_mappings, #new_entry, parser_fields, #path_for_import, #perform_method, #record, #record_has_source_identifier, #related_children_parsed_mapping, #related_children_raw_mapping, #related_parents_parsed_mapping, #related_parents_raw_mapping, #required_elements, #source_identifier, #unzip, #visibility, #work_identifier, #write, #write_import_file, #zip
Instance Attribute Details
#collections ⇒ Object
56
57
58
59
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 56
def collections
build_records if @collections.nil?
@collections
end
|
#file_sets ⇒ Object
66
67
68
69
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 66
def file_sets
build_records if @file_sets.nil?
@file_sets
end
|
#works ⇒ Object
61
62
63
64
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 61
def works
build_records if @works.nil?
@works
end
|
Class Method Details
.export_supported? ⇒ Boolean
10
11
12
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 10
def self.export_supported?
true
end
|
Instance Method Details
#build_records ⇒ Object
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 26
def build_records
@collections = []
@works = []
@file_sets = []
if model_field_mappings.map { |mfm| mfm.to_sym.in?(records.first.keys) }.any?
records.map do |r|
model_field_mappings.map(&:to_sym).each do |model_mapping|
next unless r.key?(model_mapping)
model = r[model_mapping].nil? ? "" : r[model_mapping].strip
if model.casecmp('collection').zero?
@collections << r
elsif model.casecmp('fileset').zero?
@file_sets << r
else
@works << r
end
end
end
@collections = @collections.flatten.compact.uniq
@file_sets = @file_sets.flatten.compact.uniq
@works = @works.flatten.compact.uniq
else @works = records.flatten.compact.uniq
end
true
end
|
#collection_entry_class ⇒ Object
209
210
211
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 209
def collection_entry_class
CsvCollectionEntry
end
|
#collections_total ⇒ Object
71
72
73
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 71
def collections_total
collections.size
end
|
#create_collections ⇒ Object
116
117
118
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 116
def create_collections
create_objects(['collection'])
end
|
#create_entry_and_job(current_record, type) ⇒ Object
155
156
157
158
159
160
161
162
163
164
165
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 155
def create_entry_and_job(current_record, type)
new_entry = find_or_create_entry(send("#{type}_entry_class"),
current_record[source_identifier],
'Bulkrax::Importer',
current_record.to_h)
if current_record[:delete].present?
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
else
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
end
end
|
#create_file_sets ⇒ Object
124
125
126
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 124
def create_file_sets
create_objects(['file_set'])
end
|
#create_new_entries ⇒ Object
Also known as:
create_from_collection, create_from_importer, create_from_worktype, create_from_all
186
187
188
189
190
191
192
193
194
195
196
197
198
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 186
def create_new_entries
current_records_for_export.each do |id, entry_class|
new_entry = find_or_create_entry(entry_class, id, 'Bulkrax::Exporter')
begin
entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
rescue => e
Rails.logger.info("#{e.message} was detected during export")
end
self. |= entry.parsed_metadata.keys if entry
end
end
|
#create_objects(types_array = nil) ⇒ Object
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 132
def create_objects(types_array = nil)
index = 0
(types_array || %w[collection work file_set relationship]).each do |type|
if type.eql?('relationship')
ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
next
end
send(type.pluralize).each do |current_record|
next unless record_has_source_identifier(current_record, index)
break if limit_reached?(limit, index)
seen[current_record[source_identifier]] = true
create_entry_and_job(current_record, type)
increment_counters(index, "#{type}": true)
index += 1
end
importer.record_status
end
true
rescue StandardError => e
set_status_info(e)
end
|
#create_relationships ⇒ Object
128
129
130
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 128
def create_relationships
create_objects(['relationship'])
end
|
#create_works ⇒ Object
120
121
122
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 120
def create_works
create_objects(['work'])
end
|
#current_records_for_export ⇒ Object
179
180
181
182
183
184
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 179
def current_records_for_export
@current_records_for_export ||= Bulkrax::ParserExportRecordSet.for(
parser: self,
export_from: importerexporter.export_from
)
end
|
#entry_class ⇒ Object
Also known as:
work_entry_class
204
205
206
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 204
def entry_class
CsvEntry
end
|
All possible column names
316
317
318
319
320
321
322
323
324
325
326
327
328
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 316
def
= (self.)
.delete('access_control_id') if .include?('access_control_id')
.prepend('model')
.prepend(source_identifier.to_s)
.prepend('id')
.uniq
end
|
#export_key_allowed(key) ⇒ Object
310
311
312
313
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 310
def export_key_allowed(key)
new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
key != source_identifier.to_s
end
|
#file_paths ⇒ Object
Retrieve file paths for [:file] mapping in records
and check all listed files exist.
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 375
def file_paths
raise StandardError, 'No records were found' if records.blank?
return [] if importerexporter.metadata_only?
@file_paths ||= records.map do |r|
file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
next if r[file_mapping].blank?
r[file_mapping].split(Bulkrax.multi_value_element_split_on).map do |f|
file = File.join(path_to_files, f.tr(' ', '_'))
if File.exist?(file) file
else
raise "File #{file} does not exist"
end
end
end.flatten.compact.uniq
end
|
#file_set_entry_class ⇒ Object
213
214
215
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 213
def file_set_entry_class
CsvFileSetEntry
end
|
#file_sets_total ⇒ Object
79
80
81
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 79
def file_sets_total
file_sets.size
end
|
#import_fields ⇒ Object
We could use CsvEntry#fields_from_data(data) but that would mean re-reading the data
84
85
86
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 84
def import_fields
@import_fields ||= records.inject(:merge).keys.compact.uniq
end
|
#missing_elements(record) ⇒ Object
92
93
94
95
96
97
98
99
100
101
102
103
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 92
def missing_elements(record)
keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
keys = []
importerexporter.mapping.stringify_keys.map do |k, v|
Array.wrap(v['from']).each do |vf|
keys << k if keys_from_record.include?(vf)
end
end
required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
end
|
#object_names ⇒ Object
330
331
332
333
334
335
336
337
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 330
def object_names
return @object_names if @object_names
@object_names = mapping.values.map { |value| value['object'] }
@object_names.uniq!&.delete(nil)
@object_names
end
|
#path_to_files(**args) ⇒ Object
Retrieve the path where we expect to find the files
395
396
397
398
399
400
401
402
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 395
def path_to_files(**args)
filename = args.fetch(:filename, '')
return @path_to_files if @path_to_files.present? && filename.blank?
@path_to_files = File.join(
zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
)
end
|
#records(_opts = {}) ⇒ Object
14
15
16
17
18
19
20
21
22
23
24
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 14
def records(_opts = {})
return @records if @records.present?
file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
csv_data = entry_class.read_data(file_for_import)
importer.parser_fields['total'] = csv_data.count
importer.save
@records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
end
|
#records_split_count ⇒ Object
238
239
240
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 238
def records_split_count
1000
end
|
#required_elements?(record) ⇒ Boolean
88
89
90
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 88
def required_elements?(record)
missing_elements(record).blank?
end
|
#retrieve_cloud_files(files) ⇒ Object
TODO:
DownloadCloudFileJob before it starts
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 245
def retrieve_cloud_files(files)
files_path = File.join(path_for_import, 'files')
FileUtils.mkdir_p(files_path) unless File.exist?(files_path)
files.each_pair do |_key, file|
if file['auth_header'].present?
file['headers'] ||= {}
file['headers'].merge!(file['auth_header'])
end
target_file = File.join(files_path, file['file_name'].tr(' ', '_'))
Bulkrax::DownloadCloudFileJob.perform_now(file, target_file)
end
return nil
end
|
#setup_export_file(folder_count) ⇒ Object
in the parser as it is specific to the format
366
367
368
369
370
371
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 366
def setup_export_file(folder_count)
path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
FileUtils.mkdir_p(path) unless File.exist?(path)
File.join(path, "export_#{importerexporter.export_source}_from_#{importerexporter.export_from}_#{folder_count}.csv")
end
|
#sort_entries(entries) ⇒ Object
339
340
341
342
343
344
345
346
347
348
349
350
351
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 339
def sort_entries(entries)
entries.sort_by do |entry|
case entry.type
when 'Bulkrax::CsvCollectionEntry'
'1'
when 'Bulkrax::CsvFileSetEntry'
'2'
else
'0'
end
end
end
|
353
354
355
356
357
358
359
360
361
362
363
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 353
def ()
.sort_by do |item|
number = item.match(/\d+/)&.[](0) || 0.to_s
sort_number = number.rjust(4, "0")
object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
"#{object_prefix}_#{sort_number}_#{remainder}"
end
end
|
#store_files(identifier, folder_count) ⇒ Object
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 288
def store_files(identifier, folder_count)
record = ActiveFedora::Base.find(identifier)
return unless record
file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
file_sets << record.thumbnail if exporter.include_thumbnails && record.thumbnail.present? && record.work?
file_sets.each do |fs|
path = File.join(exporter_export_path, folder_count, 'files')
FileUtils.mkdir_p(path) unless File.exist? path
file = filename(fs)
next if file.blank? || fs.original_file.blank?
io = open(fs.original_file.uri)
File.open(File.join(path, file), 'wb') do |f|
f.write(io.read)
f.close
end
end
rescue Ldp::Gone
return
end
|
#total ⇒ Object
TODO: figure out why using the version of this method that’s in the bagit parser breaks specs for the “if importer?” line
223
224
225
226
227
228
229
230
231
232
233
234
235
236
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 223
def total
@total =
if importer?
importer.parser_fields['total'] || 0
elsif exporter?
limit.to_i.zero? ? current_records_for_export.count : limit.to_i
else
0
end
return @total
rescue StandardError
@total = 0
end
|
#valid_entry_types ⇒ Object
217
218
219
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 217
def valid_entry_types
[collection_entry_class.to_s, file_set_entry_class.to_s, entry_class.to_s]
end
|
#valid_import? ⇒ Boolean
105
106
107
108
109
110
111
112
113
114
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 105
def valid_import?
compressed_record = records.flat_map(&:to_a).partition { |_, v| !v }.flatten(1).to_h
error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}"
raise StandardError, error_alert unless required_elements?(compressed_record)
file_paths.is_a?(Array)
rescue StandardError => e
set_status_info(e)
false
end
|
#works_total ⇒ Object
75
76
77
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 75
def works_total
works.size
end
|
#write_files ⇒ Object
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 265
def write_files
require 'open-uri'
folder_count = 0
sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
.select { |e| valid_entry_types.include?(e.type) }
group_size = limit.to_i.zero? ? total : limit.to_i
sorted_entries[0..group_size].in_groups_of(records_split_count, false) do |group|
folder_count += 1
CSV.open(setup_export_file(folder_count), "w", headers: , write_headers: true) do |csv|
group.each do |entry|
csv << entry.parsed_metadata
next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'
store_files(entry.identifier, folder_count.to_s)
end
end
end
end
|
#write_partial_import_file(file) ⇒ Object
167
168
169
170
171
172
173
174
175
176
177
|
# File 'app/parsers/bulkrax/csv_parser.rb', line 167
def write_partial_import_file(file)
import_filename = import_file_path.split('/').last
partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv"
path = File.join(path_for_import, partial_import_filename)
FileUtils.mv(
file.path,
path
)
path
end
|