Class: Bulkrax::XmlParser
- Inherits:
-
ApplicationParser
- Object
- ApplicationParser
- Bulkrax::XmlParser
- Defined in:
- app/parsers/bulkrax/xml_parser.rb
Instance Attribute Summary
Attributes inherited from ApplicationParser
Instance Method Summary collapse
- #collection_entry_class ⇒ Object
- #create_collections ⇒ Object
- #create_file_sets ⇒ Object
- #create_works ⇒ Object
- #entry_class ⇒ Object
-
#file_paths ⇒ Object
Return all files in the import directory and sub-directories.
- #file_set_entry_class ⇒ Object
- #good_file_type?(path) ⇒ Boolean
- #import_fields ⇒ Object
-
#metadata_paths ⇒ Object
If the import_file_path is an xml file, return that Otherwise return all xml files in the given folder.
- #record_element ⇒ Object
-
#records(_opts = {}) ⇒ Object
For multiple, we expect to find metadata for multiple works in the given metadata file(s) For single, we expect to find metadata for a single work in the given metadata file(s) if the file contains more than one record, we take only the first In either case there may be multiple metadata files returned by metadata_paths.
- #total ⇒ Object
- #valid_import? ⇒ Boolean
-
#works_total ⇒ Object
TODO: change to differentiate between collection and work records when adding ability to import collection metadata.
Methods inherited from ApplicationParser
#base_path, #collections_total, #create_objects, #create_relationships, export_supported?, #exporter?, #file_sets_total, #find_or_create_entry, #generated_metadata_mapping, #get_field_mapping_hash_for, #import_file_path, import_supported?, #importer?, #initialize, #invalid_record, #limit_reached?, #model_field_mappings, #new_entry, parser_fields, #path_for_import, #perform_method, #record, #record_has_source_identifier, #related_children_parsed_mapping, #related_children_raw_mapping, #related_parents_parsed_mapping, #related_parents_raw_mapping, #required_elements, #retrieve_cloud_files, #setup_export_file, #source_identifier, #unzip, #visibility, #work_identifier, #write, #write_files, #write_import_file, #zip
Constructor Details
This class inherits a constructor from Bulkrax::ApplicationParser
Instance Method Details
#collection_entry_class ⇒ Object
not yet supported
11 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 11 def collection_entry_class; end |
#create_collections ⇒ Object
not yet supported
14 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 14 def create_collections; end |
#create_file_sets ⇒ Object
not yet supported
20 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 20 def create_file_sets; end |
#create_works ⇒ Object
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 95 def create_works records.each_with_index do |record, index| next unless record_has_source_identifier(record, index) break if !limit.nil? && index >= limit seen[record[source_identifier]] = true new_entry = find_or_create_entry(entry_class, record[source_identifier], 'Bulkrax::Importer', record) if record[:delete].present? DeleteWorkJob.send(perform_method, new_entry, current_run) else ImportWorkJob.send(perform_method, new_entry.id, current_run.id) end increment_counters(index, work: true) end importer.record_status rescue StandardError => e set_status_info(e) end |
#entry_class ⇒ Object
6 7 8 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 6 def entry_class Bulkrax::XmlEntry end |
#file_paths ⇒ Object
Return all files in the import directory and sub-directories
67 68 69 70 71 72 73 74 75 76 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 67 def file_paths @file_paths ||= # Relative to the file if file? Dir.glob("#{File.dirname(import_file_path)}/**/*").reject { |f| File.file?(f) == false } # In the supplied directory else Dir.glob("#{import_file_path}/**/*").reject { |f| File.file?(f) == false } end end |
#file_set_entry_class ⇒ Object
not yet supported
17 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 17 def file_set_entry_class; end |
#good_file_type?(path) ⇒ Boolean
91 92 93 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 91 def good_file_type?(path) %w[.xml .xls .xsd].include?(File.extname(path)) || ::Marcel::MimeType.for(path).include?('application/xml') end |
#import_fields ⇒ Object
not yet supported
28 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 28 def import_fields; end |
#metadata_paths ⇒ Object
If the import_file_path is an xml file, return that Otherwise return all xml files in the given folder
80 81 82 83 84 85 86 87 88 89 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 80 def @metadata_paths ||= if file? && good_file_type?(import_file_path) [import_file_path] else file_paths.select do |f| good_file_type?(f) && f.include?("import_#{importerexporter.id}") end end end |
#record_element ⇒ Object
62 63 64 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 62 def record_element parser_fields['record_element'] end |
#records(_opts = {}) ⇒ Object
For multiple, we expect to find metadata for multiple works in the given metadata file(s) For single, we expect to find metadata for a single work in the given metadata file(s)
if the file contains more than one record, we take only the first
In either case there may be multiple metadata files returned by metadata_paths
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 43 def records(_opts = {}) @records ||= if parser_fields['import_type'] == 'multiple' r = [] .map do |md| # Retrieve all records elements = entry_class.read_data(md).xpath("//#{record_element}") r += elements.map { |el| entry_class.data_for_entry(el, source_identifier, self) } end # Flatten because we may have multiple records per array r.compact.flatten elsif parser_fields['import_type'] == 'single' .map do |md| data = entry_class.read_data(md).xpath("//#{record_element}").first # Take only the first record entry_class.data_for_entry(data, source_identifier, self) end.compact # No need to flatten because we take only the first record end end |
#total ⇒ Object
114 115 116 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 114 def total records.size end |
#valid_import? ⇒ Boolean
30 31 32 33 34 35 36 37 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 30 def valid_import? raise StandardError, 'No metadata files found' if .blank? raise StandardError, 'No records found' if records.blank? true rescue StandardError => e set_status_info(e) false end |
#works_total ⇒ Object
TODO: change to differentiate between collection and work records when adding ability to import collection metadata
23 24 25 |
# File 'app/parsers/bulkrax/xml_parser.rb', line 23 def works_total total end |