Class: Bulkrax::XmlParser

Inherits:
ApplicationParser show all
Defined in:
app/parsers/bulkrax/xml_parser.rb

Instance Attribute Summary

Attributes inherited from ApplicationParser

#headers, #importerexporter

Instance Method Summary collapse

Methods inherited from ApplicationParser

#base_path, #calculate_type_delay, #collections_total, #create_entry_and_job, #create_objects, #create_relationships, #create_works, export_supported?, #exporter?, #file_sets_total, #find_or_create_entry, #generated_metadata_mapping, #get_field_mapping_hash_for, #import_file_path, import_supported?, #importer?, #initialize, #invalid_record, #limit_reached?, #model_field_mappings, #new_entry, parser_fields, #path_for_import, #perform_method, #rebuild_entries, #rebuild_entry_query, #record, #record_deleted?, #record_has_source_identifier, #record_raw_metadata, #record_remove_and_rerun?, #related_children_parsed_mapping, #related_children_raw_mapping, #related_parents_parsed_mapping, #related_parents_raw_mapping, #required_elements, #retrieve_cloud_files, #setup_export_file, #source_identifier, #untar, #unzip, #visibility, #work_entry_class, #work_identifier, #work_identifier_search_field, #write, #write_files, #write_import_file, #zip

Constructor Details

This class inherits a constructor from Bulkrax::ApplicationParser

Instance Method Details

#collection_entry_classObject

TODO:

not yet supported



9
# File 'app/parsers/bulkrax/xml_parser.rb', line 9

def collection_entry_class; end

#collectionsObject

Raises:

  • (NotImplementedError)


28
29
30
# File 'app/parsers/bulkrax/xml_parser.rb', line 28

def collections
  raise NotImplementedError
end

#create_collectionsObject

TODO:

not yet supported

Raises:

  • (NotImplementedError)


12
13
14
# File 'app/parsers/bulkrax/xml_parser.rb', line 12

def create_collections
  raise NotImplementedError
end

#create_file_setsObject

TODO:

not yet supported

Raises:

  • (NotImplementedError)


20
21
22
# File 'app/parsers/bulkrax/xml_parser.rb', line 20

def create_file_sets
  raise NotImplementedError
end

#entry_classObject



4
5
6
# File 'app/parsers/bulkrax/xml_parser.rb', line 4

def entry_class
  Bulkrax::XmlEntry
end

#file_pathsObject

Return all files in the import directory and sub-directories



81
82
83
84
85
86
87
88
89
90
# File 'app/parsers/bulkrax/xml_parser.rb', line 81

def file_paths
  @file_paths ||=
    # Relative to the file
    if file?
      Dir.glob("#{File.dirname(import_file_path)}/**/*").reject { |f| File.file?(f) == false }
    # In the supplied directory
    else
      Dir.glob("#{import_file_path}/**/*").reject { |f| File.file?(f) == false }
    end
end

#file_set_entry_classObject

TODO:

not yet supported



17
# File 'app/parsers/bulkrax/xml_parser.rb', line 17

def file_set_entry_class; end

#file_setsObject

Raises:

  • (NotImplementedError)


24
25
26
# File 'app/parsers/bulkrax/xml_parser.rb', line 24

def file_sets
  raise NotImplementedError
end

#good_file_type?(path) ⇒ Boolean

Returns:

  • (Boolean)


105
106
107
# File 'app/parsers/bulkrax/xml_parser.rb', line 105

def good_file_type?(path)
  %w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml')
end

#import_fieldsObject

TODO:

not yet supported



42
# File 'app/parsers/bulkrax/xml_parser.rb', line 42

def import_fields; end

#metadata_pathsObject

If the import_file_path is an xml file, return that Otherwise return all xml files in the given folder



94
95
96
97
98
99
100
101
102
103
# File 'app/parsers/bulkrax/xml_parser.rb', line 94

def 
  @metadata_paths ||=
    if file? && good_file_type?(import_file_path)
      [import_file_path]
    else
      file_paths.select do |f|
        good_file_type?(f) && f.include?("import_#{importerexporter.id}")
      end
    end
end

#record_elementObject



76
77
78
# File 'app/parsers/bulkrax/xml_parser.rb', line 76

def record_element
  parser_fields['record_element']
end

#records(_opts = {}) ⇒ Object

For multiple, we expect to find metadata for multiple works in the given metadata file(s) For single, we expect to find metadata for a single work in the given metadata file(s)

if the file contains more than one record, we take only the first

In either case there may be multiple metadata files returned by metadata_paths



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# File 'app/parsers/bulkrax/xml_parser.rb', line 57

def records(_opts = {})
  @records ||=
    if parser_fields['import_type'] == 'multiple'
      r = []
      .map do |md|
        # Retrieve all records
        elements = entry_class.read_data(md).xpath("//#{record_element}")
        r += elements.map { |el| entry_class.data_for_entry(el, source_identifier, self) }
      end
      # Flatten because we may have multiple records per array
      r.compact.flatten
    elsif parser_fields['import_type'] == 'single'
      .map do |md|
        data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record
        entry_class.data_for_entry(data, source_identifier, self)
      end.compact # No need to flatten because we take only the first record
    end
end

#totalObject



109
110
111
# File 'app/parsers/bulkrax/xml_parser.rb', line 109

def total
  records.size
end

#valid_import?Boolean

Returns:

  • (Boolean)


44
45
46
47
48
49
50
51
# File 'app/parsers/bulkrax/xml_parser.rb', line 44

def valid_import?
  raise StandardError, 'No metadata files found' if .blank?
  raise StandardError, 'No records found' if records.blank?
  true
rescue StandardError => e
  set_status_info(e)
  false
end

#worksObject



32
33
34
# File 'app/parsers/bulkrax/xml_parser.rb', line 32

def works
  records
end

#works_totalObject

TODO: change to differentiate between collection and work records when adding ability to import collection metadata



37
38
39
# File 'app/parsers/bulkrax/xml_parser.rb', line 37

def works_total
  total
end