Class: Bulkrax::OaiDcParser

Inherits:
ApplicationParser show all
Defined in:
app/parsers/bulkrax/oai_dc_parser.rb

Direct Known Subclasses

OaiQualifiedDcParser

Instance Attribute Summary collapse

Attributes inherited from ApplicationParser

#importerexporter

Instance Method Summary collapse

Methods inherited from ApplicationParser

#base_path, #create_file_sets, #create_objects, #create_relationships, export_supported?, #exporter?, #file_sets_total, #find_or_create_entry, #generated_metadata_mapping, #get_field_mapping_hash_for, #import_file_path, import_supported?, #importer?, #invalid_record, #limit_reached?, #model_field_mappings, #new_entry, parser_fields, #path_for_import, #perform_method, #record, #record_has_source_identifier, #related_children_parsed_mapping, #related_children_raw_mapping, #related_parents_parsed_mapping, #related_parents_raw_mapping, #required_elements, #retrieve_cloud_files, #setup_export_file, #source_identifier, #unzip, #valid_import?, #visibility, #work_identifier, #write, #write_files, #write_import_file, #zip

Constructor Details

#initialize(importerexporter) ⇒ OaiDcParser

Returns a new instance of OaiDcParser.



8
9
10
11
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 8

def initialize(importerexporter)
  super
  @headers = { from: importerexporter.user.email }
end

Instance Attribute Details

#headersObject

Returns the value of attribute headers.



5
6
7
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 5

def headers
  @headers
end

Instance Method Details

#clientObject



13
14
15
16
17
18
19
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 13

def client
  @client ||= OAI::Client.new(importerexporter.parser_fields['base_url'],
                              headers: headers,
                              parser: 'libxml')
rescue StandardError
  raise OAIError
end

#collection_entry_classObject



29
30
31
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 29

def collection_entry_class
  OaiSetEntry
end

#collection_nameObject



21
22
23
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 21

def collection_name
  @collection_name ||= parser_fields['set'] || 'all'
end

#collectionsObject



112
113
114
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 112

def collections
  @collections ||= list_sets
end

#collections_totalObject



116
117
118
119
120
121
122
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 116

def collections_total
  if collection_name == 'all'
    collections.count
  else
    1
  end
end

#create_collectionsObject



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 66

def create_collections
   = {
    visibility: 'open'
  }
  [:collection_type_gid] = Hyrax::CollectionType.find_or_create_default_collection_type.gid if defined?(::Hyrax)

  collections.each_with_index do |set, index|
    next unless collection_name == 'all' || collection_name == set.spec
    unique_collection_identifier = importerexporter.unique_collection_identifier(set.spec)
    [:title] = [set.name]
    [work_identifier] = [unique_collection_identifier]

    new_entry = collection_entry_class.where(importerexporter: importerexporter, identifier: unique_collection_identifier, raw_metadata: ).first_or_create!
    # perform now to ensure this gets created before work imports start
    ImportCollectionJob.perform_now(new_entry.id, importerexporter.current_run.id)
    increment_counters(index, collection: true)
  end
end

#create_worksObject



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 85

def create_works
  results = self.records(quick: true)
  return if results.blank?
  results.full.each_with_index do |record, index|
    identifier = record.send(source_identifier)
    if identifier.blank?
      if Bulkrax.fill_in_blank_source_identifiers.present?
        identifier = Bulkrax.fill_in_blank_source_identifiers.call(self, index)
      else
        invalid_record("Missing #{source_identifier} for #{record.to_h}\n")
        next
      end
    end

    break if limit_reached?(limit, index)
    seen[identifier] = true
    new_entry = entry_class.where(importerexporter: self.importerexporter, identifier: identifier).first_or_create!
    if record.deleted?
      DeleteWorkJob.send(perform_method, new_entry, importerexporter.current_run)
    else
      ImportWorkJob.send(perform_method, new_entry.id, importerexporter.current_run.id)
    end
    increment_counters(index, work: true)
  end
  importer.record_status
end

#entry_classObject



25
26
27
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 25

def entry_class
  OaiDcEntry
end

#file_set_entry_classObject



33
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 33

def file_set_entry_class; end

#import_fieldsObject

the set of fields available in the import data



60
61
62
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 60

def import_fields
  ['contributor', 'coverage', 'creator', 'date', 'description', 'format', 'identifier', 'language', 'publisher', 'relation', 'rights', 'source', 'subject', 'title', 'type']
end

#records(opts = {}) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 35

def records(opts = {})
  opts[:metadata_prefix] ||= importerexporter.parser_fields['metadata_prefix']
  opts[:set] = collection_name unless collection_name == 'all'

  opts[:from] = importerexporter&.last_imported_at&.strftime("%Y-%m-%d") if importerexporter.last_imported_at && only_updates

  if opts[:quick]
    opts.delete(:quick)
    begin
      @short_records = client.list_identifiers(opts)
    rescue OAI::Exception => e
      return @short_records = [] if e.code == "noRecordsMatch"
      raise e
    end
  else
    begin
      @records ||= client.list_records(opts.merge(metadata_prefix: parser_fields['metadata_prefix']))
    rescue OAI::Exception => e
      return @records = [] if e.code == "noRecordsMatch"
      raise e
    end
  end
end

#totalObject



129
130
131
132
133
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 129

def total
  @total ||= records(quick: true).doc.find(".//resumptionToken").to_a.first.attributes["completeListSize"].to_i
rescue
  @total = 0
end

#works_totalObject

TODO: change to differentiate between collection and work records when adding ability to import collection metadata



125
126
127
# File 'app/parsers/bulkrax/oai_dc_parser.rb', line 125

def works_total
  total
end