Class: Bulkrax::BagitParser

Inherits:
ApplicationParser show all
Defined in:
app/parsers/bulkrax/bagit_parser.rb

Instance Attribute Summary

Attributes inherited from ApplicationParser

#importerexporter

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from ApplicationParser

#create_parent_child_relationships, #exporter?, #file?, #find_or_create_entry, #identifier_hash, #import_file_path, import_supported?, #importer?, #initialize, #invalid_record, #limit_reached?, #new_entry, #parents, parser_fields, #path_for_import, #perform_method, #record, #record_has_source_identifier, #required_elements, #setup_export_file, #setup_parents, #source_identifier, #unzip, #visibility, #work_identifier, #write, #write_files, #write_import_file, #zip, #zip?

Constructor Details

This class inherits a constructor from Bulkrax::ApplicationParser

Class Method Details

.export_supported?Boolean

Returns:

  • (Boolean)


5
6
7
# File 'app/parsers/bulkrax/bagit_parser.rb', line 5

def self.export_supported?
  false # @todo will be supported
end

Instance Method Details

#collection_entry_classObject



20
21
22
23
24
# File 'app/parsers/bulkrax/bagit_parser.rb', line 20

def collection_entry_class
  parser_fields['metadata_format'].gsub('Entry', 'CollectionEntry').constantize
rescue
  Entry
end

#collectionsObject



85
86
87
# File 'app/parsers/bulkrax/bagit_parser.rb', line 85

def collections
  records.map { |r| r[:collection].split(/\s*[;|]\s*/) if r[:collection].present? }.flatten.compact.uniq
end

#collections_totalObject



89
90
91
# File 'app/parsers/bulkrax/bagit_parser.rb', line 89

def collections_total
  collections.size
end

#create_collectionsObject

Find or create collections referenced by works If the import data also contains records for these works, they will be updated during create works



51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'app/parsers/bulkrax/bagit_parser.rb', line 51

def create_collections
  collections.each_with_index do |collection, index|
    next if collection.blank?
     = {
      title: [collection],
      work_identifier => [collection],
      visibility: 'open',
      collection_type_gid: Hyrax::CollectionType.find_or_create_default_collection_type.gid
    }
    new_entry = find_or_create_entry(collection_entry_class, collection, 'Bulkrax::Importer', )
    ImportWorkCollectionJob.perform_now(new_entry.id, current_run.id)
    increment_counters(index, true)
  end
end

#create_worksObject



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'app/parsers/bulkrax/bagit_parser.rb', line 66

def create_works
  records.each_with_index do |record, index|
    next unless record_has_source_identifier(record, index)
    break if limit_reached?(limit, index)

    seen[record[source_identifier]] = true
    new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record)
    if record[:delete].present?
      DeleteWorkJob.send(perform_method, new_entry, current_run)
    else
      ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
    end
    increment_counters(index)
  end
  importer.record_status
rescue StandardError => e
  status_info(e)
end

#entry_classObject



16
17
18
# File 'app/parsers/bulkrax/bagit_parser.rb', line 16

def entry_class
  parser_fields['metadata_format'].constantize
end

#import_fieldsObject

Take a random sample of 10 metadata_paths and work out the import fields from that

Raises:

  • (StandardError)


27
28
29
30
31
32
# File 'app/parsers/bulkrax/bagit_parser.rb', line 27

def import_fields
  raise StandardError, 'No metadata files were found' if .blank?
  @import_fields ||= .sample(10).map do |path|
    entry_class.fields_from_data(entry_class.read_data(path))
  end.flatten.compact.uniq
end

#records(_opts = {}) ⇒ Object

Assume a single metadata record per path Create an Array of all metadata records, one per file

Raises:

  • (StandardError)


36
37
38
39
40
41
42
43
44
45
46
# File 'app/parsers/bulkrax/bagit_parser.rb', line 36

def records(_opts = {})
  raise StandardError, 'No BagIt records were found' if bags.blank?
  @records ||= bags.map do |bag|
    path = (bag)
    raise StandardError, 'No metadata files were found' if path.blank?
    data = entry_class.read_data(path)
    data = entry_class.data_for_entry(data, source_identifier)
    data[:file] = bag.bag_files.join('|')
    data
  end
end

#required_elements?(keys) ⇒ Boolean

Returns:

  • (Boolean)


97
98
99
100
# File 'app/parsers/bulkrax/bagit_parser.rb', line 97

def required_elements?(keys)
  return if keys.blank?
  !required_elements.map { |el| keys.map(&:to_s).include?(el) }.include?(false)
end

#retrieve_cloud_files(files) ⇒ Object

TODO:
  • investigate getting directory structure

TODO:
  • investigate using perform_later, and having the importer check for

DownloadCloudFileJob before it starts



105
106
107
108
109
110
111
112
# File 'app/parsers/bulkrax/bagit_parser.rb', line 105

def retrieve_cloud_files(files)
  # There should only be one zip file for Bagit, take the first
  return if files['0'].blank?
  target_file = File.join(path_for_import, files['0']['file_name'].tr(' ', '_'))
  # Now because we want the files in place before the importer runs
  Bulkrax::DownloadCloudFileJob.perform_now(files['0'], target_file)
  return target_file
end

#totalObject



93
94
95
# File 'app/parsers/bulkrax/bagit_parser.rb', line 93

def total
  .count
end

#valid_import?Boolean

Returns:

  • (Boolean)


9
10
11
12
13
14
# File 'app/parsers/bulkrax/bagit_parser.rb', line 9

def valid_import?
  return true if import_fields.present?
rescue => e
  status_info(e)
  false
end