Class: Bulkrax::XmlParser

Inherits:
ApplicationParser show all
Defined in:
app/parsers/bulkrax/xml_parser.rb

Instance Attribute Summary

Attributes inherited from ApplicationParser

#importerexporter

Instance Method Summary collapse

Methods inherited from ApplicationParser

#collections_total, #create_parent_child_relationships, export_supported?, #exporter?, #file?, #find_or_create_entry, #identifier_hash, #import_file_path, import_supported?, #importer?, #initialize, #invalid_record, #limit_reached?, #new_entry, #parents, parser_fields, #path_for_import, #perform_method, #record, #record_has_source_identifier, #required_elements, #retrieve_cloud_files, #setup_export_file, #setup_parents, #source_identifier, #unzip, #visibility, #work_identifier, #write, #write_files, #write_import_file, #zip, #zip?

Constructor Details

This class inherits a constructor from Bulkrax::ApplicationParser

Instance Method Details

#collection_entry_classObject

TODO:

not yet supported



10
# File 'app/parsers/bulkrax/xml_parser.rb', line 10

def collection_entry_class; end

#create_collectionsObject

TODO:

not yet supported



13
# File 'app/parsers/bulkrax/xml_parser.rb', line 13

def create_collections; end

#create_worksObject



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'app/parsers/bulkrax/xml_parser.rb', line 80

def create_works
  records.each_with_index do |record, index|
    next unless record_has_source_identifier(record, index)
    break if !limit.nil? && index >= limit

    seen[record[source_identifier]] = true
    new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record)
    if record[:delete].present?
      DeleteWorkJob.send(perform_method, new_entry, current_run)
    else
      ImportWorkJob.send(perform_method, new_entry.id, current_run.id)
    end
    increment_counters(index)
  end
  importer.record_status
rescue StandardError => e
  status_info(e)
end

#entry_classObject



5
6
7
# File 'app/parsers/bulkrax/xml_parser.rb', line 5

def entry_class
  Bulkrax::XmlEntry
end

#file_pathsObject

Return all files in the import directory and sub-directories



55
56
57
58
59
60
61
62
63
64
# File 'app/parsers/bulkrax/xml_parser.rb', line 55

def file_paths
  @file_paths ||=
    # Relative to the file
    if file?
      Dir.glob("#{File.dirname(import_file_path)}/**/*").reject { |f| File.file?(f) == false }
    # In the supplied directory
    else
      Dir.glob("#{import_file_path}/**/*").reject { |f| File.file?(f) == false }
    end
end

#import_fieldsObject

TODO:

not yet supported



16
# File 'app/parsers/bulkrax/xml_parser.rb', line 16

def import_fields; end

#metadata_pathsObject

If the import_file_path is an xml file, return that Otherwise return all xml files in the given folder



68
69
70
71
72
73
74
75
76
77
78
# File 'app/parsers/bulkrax/xml_parser.rb', line 68

def 
  @metadata_paths ||=
    if file? && MIME::Types.type_for(import_file_path).include?('application/xml')
      [import_file_path]
    else
      file_paths.select do |f|
        MIME::Types.type_for(f).include?('application/xml') &&
          f.include?("import_#{importerexporter.id}")
      end
    end
end

#record_elementObject



50
51
52
# File 'app/parsers/bulkrax/xml_parser.rb', line 50

def record_element
  parser_fields['record_element']
end

#records(_opts = {}) ⇒ Object

For multiple, we expect to find metadata for multiple works in the given metadata file(s) For single, we expect to find metadata for a single work in the given metadata file(s)

if the file contains more than one record, we take only the first

In either case there may be multiple metadata files returned by metadata_paths



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'app/parsers/bulkrax/xml_parser.rb', line 31

def records(_opts = {})
  @records ||=
    if parser_fields['import_type'] == 'multiple'
      r = []
      .map do |md|
        # Retrieve all records
        elements = entry_class.read_data(md).xpath("//#{record_element}")
        r += elements.map { |el| entry_class.data_for_entry(el, source_identifier) }
      end
      # Flatten because we may have multiple records per array
      r.compact.flatten
    elsif parser_fields['import_type'] == 'single'
      .map do |md|
        data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record
        entry_class.data_for_entry(data, source_identifier)
      end.compact # No need to flatten because we take only the first record
    end
end

#totalObject



99
100
101
# File 'app/parsers/bulkrax/xml_parser.rb', line 99

def total
  records.size
end

#valid_import?Boolean

Returns:

  • (Boolean)


18
19
20
21
22
23
24
25
# File 'app/parsers/bulkrax/xml_parser.rb', line 18

def valid_import?
  raise StandardError, 'No metadata files found' if .blank?
  raise StandardError, 'No records found' if records.blank?
  true
rescue StandardError => e
  status_info(e)
  false
end