Class: Bulkrax::CsvEntry

Inherits:
Entry show all
Defined in:
app/models/bulkrax/csv_entry.rb

Direct Known Subclasses

CsvCollectionEntry

Instance Attribute Summary

Attributes inherited from Entry

#all_attrs

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Entry

#build, #exporter?, #find_collection, #importer?, #last_run, #source_identifier, #valid_system_id, #work_identifier

Methods included from HasLocalProcessing

#add_local

Methods included from StatusInfo

#current_status, #failed?, #last_error, #status, #status_at, #status_info, #succeeded?

Methods included from ExportBehavior

#build_for_exporter, #filename, #hyrax_record, #write_files

Methods included from ImportBehavior

#add_admin_set_id, #add_collections, #add_rights_statement, #add_visibility, #build_for_importer, #factory, #factory_class, #override_rights_statement, #rights_statement

Methods included from HasMatchers

#add_metadata, #excluded?, #field_supported?, #field_to, #get_object_name, #matched_metadata, #multiple?, #multiple_metadata, #set_parsed_data, #single_metadata

Class Method Details

.children_fieldObject



39
40
41
# File 'app/models/bulkrax/csv_entry.rb', line 39

def self.children_field
  Bulkrax.parent_child_field_mapping[self.to_s] || 'children'
end

.collection_fieldObject



35
36
37
# File 'app/models/bulkrax/csv_entry.rb', line 35

def self.collection_field
  Bulkrax.collection_field_mapping[self.class.to_s] || 'collection'
end

.data_for_entry(data, _source_id) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
# File 'app/models/bulkrax/csv_entry.rb', line 22

def self.data_for_entry(data, _source_id)
  # If a multi-line CSV data is passed, grab the first row
  data = data.first if data.is_a?(CSV::Table)
  # model has to be separated so that it doesn't get mistranslated by to_h
  raw_data = data.to_h
  raw_data[:model] = data[:model]
  # If the collection field mapping is not 'collection', add 'collection' - the parser needs it
  raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
  # If the children field mapping is not 'children', add 'children' - the parser needs it
  raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
  return raw_data
end

.fields_from_data(data) ⇒ Object



9
10
11
# File 'app/models/bulkrax/csv_entry.rb', line 9

def self.fields_from_data(data)
  data.headers.flatten.compact.uniq
end

.matcher_classObject



132
133
134
# File 'app/models/bulkrax/csv_entry.rb', line 132

def self.matcher_class
  Bulkrax::CsvMatcher
end

.read_data(path) ⇒ Object

there’s a risk that this reads the whole file into memory and could cause a memory leak

Raises:

  • (StandardError)


14
15
16
17
18
19
20
# File 'app/models/bulkrax/csv_entry.rb', line 14

def self.read_data(path)
  raise StandardError, 'CSV path empty' if path.blank?
  CSV.read(path,
           headers: true,
           header_converters: :symbol,
           encoding: 'utf-8')
end

Instance Method Details

#add_fileObject



72
73
74
75
76
77
78
79
80
# File 'app/models/bulkrax/csv_entry.rb', line 72

def add_file
  self.['file'] ||= []
  if record['file']&.is_a?(String)
    self.['file'] = record['file'].split(/\s*[;|]\s*/)
  elsif record['file'].is_a?(Array)
    self.['file'] = record['file']
  end
  self.['file'] = self.['file'].map { |f| path_to_file(f.tr(' ', '_')) }
end

#build_export_metadataObject



82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'app/models/bulkrax/csv_entry.rb', line 82

def 
  # make_round_trippable
  self. = {}
  self.['id'] = hyrax_record.id
  self.[source_identifier] = hyrax_record.send(work_identifier)
  self.['model'] = hyrax_record.has_model.first
  
  self.['collections'] = hyrax_record.member_of_collection_ids.join('; ')
  unless hyrax_record.is_a?(Collection)
    self.['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
  end
  self.
end

#build_mapping_metadataObject



96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'app/models/bulkrax/csv_entry.rb', line 96

def 
  mapping.each do |key, value|
    next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
    next if key == "model"
    next unless hyrax_record.respond_to?(key.to_s)
    data = hyrax_record.send(key.to_s)
    if data.is_a?(ActiveTriples::Relation)
      self.[key] = data.map { |d| prepare_export_data(d) }.join('; ').to_s unless value[:excluded]
    else
      self.[key] = prepare_export_data(data)
    end
  end
end

#build_metadataObject

Raises:

  • (StandardError)


51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'app/models/bulkrax/csv_entry.rb', line 51

def 
  raise StandardError, 'Record not found' if record.nil?
  raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))

  self. = {}
  self.[work_identifier] = [record[source_identifier]]
  record.each do |key, value|
    next if key == 'collection'

    index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
    (key_without_numbers(key), value, index)
  end
  add_file
  add_visibility
  add_rights_statement
  add_admin_set_id
  add_collections
  add_local
  self.
end

#collections_created?Boolean

Returns:

  • (Boolean)


136
137
138
139
# File 'app/models/bulkrax/csv_entry.rb', line 136

def collections_created?
  return true if record[self.class.collection_field].blank?
  record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
end

#find_or_create_collection_idsObject



141
142
143
144
145
146
147
148
149
150
151
# File 'app/models/bulkrax/csv_entry.rb', line 141

def find_or_create_collection_ids
  return self.collection_ids if collections_created?
  valid_system_id(Collection)
  if record[self.class.collection_field].present?
    record[self.class.collection_field].split(/\s*[:;|]\s*/).each do |collection|
      c = find_collection(collection)
      self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
    end
  end
  self.collection_ids
end

#key_without_numbers(key) ⇒ Object



47
48
49
# File 'app/models/bulkrax/csv_entry.rb', line 47

def key_without_numbers(key)
  key.gsub(/_\d+/, '').sub(/^\d+_/, '')
end

#keys_without_numbers(keys) ⇒ Object



43
44
45
# File 'app/models/bulkrax/csv_entry.rb', line 43

def keys_without_numbers(keys)
  keys.map { |key| key_without_numbers(key) }
end

#make_round_trippableObject

In order for the existing exported hyrax_record, to be updated by a re-import we need a unique value in system_identifier add the existing hyrax_record id to system_identifier



121
122
123
124
125
126
# File 'app/models/bulkrax/csv_entry.rb', line 121

def make_round_trippable
  values = hyrax_record.send(work_identifier.to_s).to_a
  values << hyrax_record.id
  hyrax_record.send("#{work_identifier}=", values)
  hyrax_record.save
end

#path_to_file(file) ⇒ Object

If only filename is given, construct the path (/files/my_file)



154
155
156
157
158
159
160
161
# File 'app/models/bulkrax/csv_entry.rb', line 154

def path_to_file(file)
  # return if we already have the full file path
  return file if File.exist?(file)
  path = importerexporter.parser.path_to_files
  f = File.join(path, file)
  return f if File.exist?(f)
  raise "File #{f} does not exist"
end

#prepare_export_data(datum) ⇒ Object



110
111
112
113
114
115
116
# File 'app/models/bulkrax/csv_entry.rb', line 110

def prepare_export_data(datum)
  if datum.is_a?(ActiveTriples::Resource)
    datum.to_uri.to_s
  else
    datum
  end
end

#recordObject



128
129
130
# File 'app/models/bulkrax/csv_entry.rb', line 128

def record
  @record ||= 
end