Class: Bulkrax::ApplicationParser

Inherits:
Object
  • Object
show all
Defined in:
app/parsers/bulkrax/application_parser.rb

Direct Known Subclasses

BagitParser, CsvParser, OaiDcParser, XmlParser

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(importerexporter) ⇒ ApplicationParser

Returns a new instance of ApplicationParser.



26
27
28
# File 'app/parsers/bulkrax/application_parser.rb', line 26

def initialize(importerexporter)
  @importerexporter = importerexporter
end

Instance Attribute Details

#importerexporterObject Also known as: importer, exporter

Returns the value of attribute importerexporter.



5
6
7
# File 'app/parsers/bulkrax/application_parser.rb', line 5

def importerexporter
  @importerexporter
end

Class Method Details

.export_supported?Boolean

Returns:

  • (Boolean)


18
19
20
# File 'app/parsers/bulkrax/application_parser.rb', line 18

def self.export_supported?
  false
end

.import_supported?Boolean

Returns:

  • (Boolean)


22
23
24
# File 'app/parsers/bulkrax/application_parser.rb', line 22

def self.import_supported?
  true
end

.parser_fieldsObject



14
15
16
# File 'app/parsers/bulkrax/application_parser.rb', line 14

def self.parser_fields
  {}
end

Instance Method Details

#collection_entry_classObject

Raises:

  • (StandardError)


36
37
38
# File 'app/parsers/bulkrax/application_parser.rb', line 36

def collection_entry_class
  raise StandardError, 'must be defined'
end

#collections_totalObject



255
256
257
# File 'app/parsers/bulkrax/application_parser.rb', line 255

def collections_total
  0
end

#create_collectionsObject

Raises:

  • (StandardError)


74
75
76
# File 'app/parsers/bulkrax/application_parser.rb', line 74

def create_collections
  raise StandardError, 'must be defined' if importer?
end

#create_parent_child_relationshipsObject

Optional, only used by certain parsers Other parsers should override with a custom or empty method Will be skipped unless the #record is a Hash



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'app/parsers/bulkrax/application_parser.rb', line 105

def create_parent_child_relationships
  parents.each do |key, value|
    parent = entry_class.where(
      identifier: key,
      importerexporter_id: importerexporter.id,
      importerexporter_type: 'Bulkrax::Importer'
    ).first

    # not finding the entries here indicates that the given identifiers are incorrect
    # in that case we should log that
    children = value.map do |child|
      entry_class.where(
        identifier: child,
        importerexporter_id: importerexporter.id,
        importerexporter_type: 'Bulkrax::Importer'
      ).first
    end.compact.uniq

    if parent.present? && (children.length != value.length)
      # Increment the failures for the number we couldn't find
      # Because all of our entries have been created by now, if we can't find them, the data is wrong
      Rails.logger.error("Expected #{value.length} children for parent entry #{parent.id}, found #{children.length}")
      break if children.empty?
      Rails.logger.warn("Adding #{children.length} children to parent entry #{parent.id} (expected #{value.length})")
    end
    parent_id = parent.id
    child_entry_ids = children.map(&:id)
    ChildRelationshipsJob.perform_later(parent_id, child_entry_ids, current_run.id)
  end
rescue StandardError => e
  status_info(e)
end

#create_worksObject

Raises:

  • (StandardError)


78
79
80
# File 'app/parsers/bulkrax/application_parser.rb', line 78

def create_works
  raise StandardError, 'must be defined' if importer?
end

#entry_classObject

Raises:

  • (StandardError)


31
32
33
# File 'app/parsers/bulkrax/application_parser.rb', line 31

def entry_class
  raise StandardError, 'must be defined'
end

#exporter?Boolean

Returns:

  • (Boolean)


176
177
178
# File 'app/parsers/bulkrax/application_parser.rb', line 176

def exporter?
  importerexporter.is_a?(Bulkrax::Exporter)
end

#file?Boolean

Is this a file?

Returns:

  • (Boolean)


274
275
276
# File 'app/parsers/bulkrax/application_parser.rb', line 274

def file?
  parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
end

#find_or_create_entry(entryclass, identifier, type, raw_metadata = nil) ⇒ Object



231
232
233
234
235
236
237
238
239
240
# File 'app/parsers/bulkrax/application_parser.rb', line 231

def find_or_create_entry(entryclass, identifier, type,  = nil)
  entry = entryclass.where(
    importerexporter_id: importerexporter.id,
    importerexporter_type: type,
    identifier: identifier
  ).first_or_create!
  entry. = 
  entry.save!
  entry
end

#identifier_hashObject

Raises:

  • (StandardError)


53
54
55
56
57
58
59
60
# File 'app/parsers/bulkrax/application_parser.rb', line 53

def identifier_hash
  @identifier_hash ||= importerexporter.mapping.select do |_, h|
    h.key?("source_identifier")
  end
  raise StandardError, "more than one source_identifier declared: #{@identifier_hash.keys.join(', ')}" if @identifier_hash.length > 1

  @identifier_hash
end

#import_file_pathObject

Path for the import



284
285
286
# File 'app/parsers/bulkrax/application_parser.rb', line 284

def import_file_path
  @import_file_path ||= real_import_file_path
end

#importer?Boolean

Returns:

  • (Boolean)


172
173
174
# File 'app/parsers/bulkrax/application_parser.rb', line 172

def importer?
  importerexporter.is_a?(Bulkrax::Importer)
end

#invalid_record(message) ⇒ Object

rubocop:disable Rails/SkipsModelValidations



207
208
209
210
211
212
213
# File 'app/parsers/bulkrax/application_parser.rb', line 207

def invalid_record(message)
  current_run.invalid_records ||= ""
  current_run.invalid_records += message
  current_run.save
  ImporterRun.find(current_run.id).increment!(:failed_records)
  ImporterRun.find(current_run.id).decrement!(:enqueued_records) unless ImporterRun.find(current_run.id).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches
end

#limit_reached?(limit, index) ⇒ boolean

Parameters:

  • limit (Integer)

    limit set on the importerexporter

  • index (Integer)

    index of current iteration

Returns:

  • (boolean)


183
184
185
186
# File 'app/parsers/bulkrax/application_parser.rb', line 183

def limit_reached?(limit, index)
  return false if limit.nil? || limit.zero? # no limit
  index >= limit
end

#new_entry(entryclass, type) ⇒ Object



224
225
226
227
228
229
# File 'app/parsers/bulkrax/application_parser.rb', line 224

def new_entry(entryclass, type)
  entryclass.new(
    importerexporter_id: importerexporter.id,
    importerexporter_type: type
  )
end

#parentsObject



138
139
140
# File 'app/parsers/bulkrax/application_parser.rb', line 138

def parents
  @parents ||= setup_parents
end

#path_for_importObject

Path where we’ll store the import metadata and files

this is used for uploaded and cloud files


96
97
98
99
100
# File 'app/parsers/bulkrax/application_parser.rb', line 96

def path_for_import
  @path_for_import = File.join(Bulkrax.import_path, importerexporter.path_string)
  FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import)
  @path_for_import
end

#perform_methodObject



62
63
64
65
66
67
68
# File 'app/parsers/bulkrax/application_parser.rb', line 62

def perform_method
  if self.validate_only
    'perform_now'
  else
    'perform_later'
  end
end

#record(identifier, _opts = {}) ⇒ Object

TODO:
  • review this method - is it ever used?



243
244
245
246
247
248
249
# File 'app/parsers/bulkrax/application_parser.rb', line 243

def record(identifier, _opts = {})
  return @record if @record

  @record = entry_class.new(self, identifier)
  @record.build
  return @record
end

#record_has_source_identifier(record, index) ⇒ Object



193
194
195
196
197
198
199
200
201
202
203
204
# File 'app/parsers/bulkrax/application_parser.rb', line 193

def record_has_source_identifier(record, index)
  if record[source_identifier].blank?
    if Bulkrax.fill_in_blank_source_identifiers.present?
      record[source_identifier] = Bulkrax.fill_in_blank_source_identifiers.call(self, index)
    else
      invalid_record("Missing #{source_identifier} for #{record.to_h}\n")
      false
    end
  else
    true
  end
end

#records(_opts = {}) ⇒ Object

Raises:

  • (StandardError)


41
42
43
# File 'app/parsers/bulkrax/application_parser.rb', line 41

def records(_opts = {})
  raise StandardError, 'must be defined'
end

#required_elementsObject

rubocop:enable Rails/SkipsModelValidations



216
217
218
219
220
221
222
# File 'app/parsers/bulkrax/application_parser.rb', line 216

def required_elements
  if Bulkrax.fill_in_blank_source_identifiers
    ['title']
  else
    ['title', source_identifier]
  end
end

#retrieve_cloud_files(files) ⇒ Object

Optional, define if using browse everything for file upload



83
# File 'app/parsers/bulkrax/application_parser.rb', line 83

def retrieve_cloud_files(files); end

#setup_export_fileObject

Raises:

  • (StandardError)


164
165
166
# File 'app/parsers/bulkrax/application_parser.rb', line 164

def setup_export_file
  raise StandardError, 'must be defined' if exporter?
end

#setup_parentsObject



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'app/parsers/bulkrax/application_parser.rb', line 142

def setup_parents
  pts = []
  records.each do |record|
    r = if record.respond_to?(:to_h)
          record.to_h
        else
          record
        end
    next unless r.is_a?(Hash)
    children = if r[:children].is_a?(String)
                 r[:children].split(/\s*[:;|]\s*/)
               else
                 r[:children]
               end
    next if children.blank?
    pts << {
      r[source_identifier] => children
    }
  end
  pts.blank? ? pts : pts.inject(:merge)
end

#source_identifierObject



45
46
47
# File 'app/parsers/bulkrax/application_parser.rb', line 45

def source_identifier
  @source_identifier ||= identifier_hash.values.first&.[]("from")&.first&.to_sym || :source_identifier
end

#totalObject



251
252
253
# File 'app/parsers/bulkrax/application_parser.rb', line 251

def total
  0
end

#unzip(file_to_unzip) ⇒ Object



264
265
266
# File 'app/parsers/bulkrax/application_parser.rb', line 264

def unzip(file_to_unzip)
  WillowSword::ZipPackage.new(file_to_unzip, importer_unzip_path).unzip_file
end

#valid_import?Boolean

Override to add specific validations

Returns:

  • (Boolean)


189
190
191
# File 'app/parsers/bulkrax/application_parser.rb', line 189

def valid_import?
  true
end

#visibilityObject



70
71
72
# File 'app/parsers/bulkrax/application_parser.rb', line 70

def visibility
  @visibility ||= self.parser_fields['visibility'] || 'open'
end

#work_identifierObject



49
50
51
# File 'app/parsers/bulkrax/application_parser.rb', line 49

def work_identifier
  @work_identifier ||= identifier_hash.keys.first&.to_sym || :source
end

#writeObject



259
260
261
262
# File 'app/parsers/bulkrax/application_parser.rb', line 259

def write
  write_files
  zip
end

#write_filesObject

Raises:

  • (StandardError)


168
169
170
# File 'app/parsers/bulkrax/application_parser.rb', line 168

def write_files
  raise StandardError, 'must be defined' if exporter?
end

#write_import_file(file) ⇒ Object



85
86
87
88
89
90
91
92
# File 'app/parsers/bulkrax/application_parser.rb', line 85

def write_import_file(file)
  path = File.join(path_for_import, file.original_filename)
  FileUtils.mv(
    file.path,
    path
  )
  path
end

#zipObject



268
269
270
271
# File 'app/parsers/bulkrax/application_parser.rb', line 268

def zip
  FileUtils.rm_rf(exporter_export_zip_path)
  WillowSword::ZipPackage.new(exporter_export_path, exporter_export_zip_path).create_zip
end

#zip?Boolean

Is this a zip file?

Returns:

  • (Boolean)


279
280
281
# File 'app/parsers/bulkrax/application_parser.rb', line 279

def zip?
  parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip')
end