Class: RelatonNist::DataFetcher
- Inherits:
-
Object
- Object
- RelatonNist::DataFetcher
- Defined in:
- lib/relaton_nist/data_fetcher.rb
Constant Summary collapse
- URL =
"https://github.com/usnistgov/NIST-Tech-Pubs/releases/download/Sept2024/allrecords-MODS.xml"
Class Method Summary collapse
-
.fetch(output: "data", format: "yaml") ⇒ Object
Fetch all the documnts from dataset.
Instance Method Summary collapse
- #add_static_files ⇒ Object
-
#fetch ⇒ Object
Fetch all the documnts from dataset.
- #fetch_tech_pubs ⇒ Object
- #index ⇒ Object
-
#initialize(output, format) ⇒ DataFetcher
constructor
A new instance of DataFetcher.
- #output(bib) ⇒ Object
- #series ⇒ Object
-
#write_file(bib) ⇒ Object
Save document.
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Returns a new instance of DataFetcher.
11 12 13 14 15 16 |
# File 'lib/relaton_nist/data_fetcher.rb', line 11 def initialize(output, format) @output = output @format = format @ext = format.sub(/^bib/, "") @files = [] end |
Class Method Details
.fetch(output: "data", format: "yaml") ⇒ Object
Fetch all the documnts from dataset
94 95 96 |
# File 'lib/relaton_nist/data_fetcher.rb', line 94 def self.fetch(output: "data", format: "yaml") new(output, format).fetch end |
Instance Method Details
#add_static_files ⇒ Object
80 81 82 83 84 85 86 |
# File 'lib/relaton_nist/data_fetcher.rb', line 80 def add_static_files Dir["./static/*.yaml"].each do |file| hash = YAML.load_file file bib = RelatonNist::NistBibliographicItem.from_hash(hash) index.add_or_update bib.docidentifier[0].id, file end end |
#fetch ⇒ Object
Fetch all the documnts from dataset
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/relaton_nist/data_fetcher.rb', line 54 def fetch # rubocop:disable Metrics/AbcSize,Metrics/MethodLength t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p @output FileUtils.rm Dir[File.join(@output, "*.#{@ext}")] fetch_tech_pubs add_static_files index.save t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." # rescue StandardError => e # Util.error "#{e.message}\n#{e.backtrace[0..5].join("\n")}" end |
#fetch_tech_pubs ⇒ Object
72 73 74 75 76 77 78 |
# File 'lib/relaton_nist/data_fetcher.rb', line 72 def fetch_tech_pubs docs = LocMods::Collection.from_xml OpenURI.open_uri(URL) # docs.xpath( # "/body/query/doi_record/report-paper/report-paper_metadata", # ) docs.mods.each { |doc| write_file ModsParser.new(doc, series).parse } end |
#index ⇒ Object
18 19 20 |
# File 'lib/relaton_nist/data_fetcher.rb', line 18 def index @index ||= Relaton::Index.find_or_create :nist, file: "index-v1.yaml" end |
#output(bib) ⇒ Object
43 44 45 46 47 48 49 |
# File 'lib/relaton_nist/data_fetcher.rb', line 43 def output(bib) case @format when "yaml" then bib.to_hash.to_yaml when "xml" then bib.to_xml bibdata: true else bib.send "to_#{@format}" end end |
#series ⇒ Object
22 23 24 |
# File 'lib/relaton_nist/data_fetcher.rb', line 22 def series @series ||= YAML.load_file File.("series.yaml", __dir__) end |
#write_file(bib) ⇒ Object
Save document
31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/relaton_nist/data_fetcher.rb', line 31 def write_file(bib) # rubocop:disable Metrics/AbcSize id = bib.docidentifier[0].id.gsub(%r{[/\s:.]}, "_").upcase.sub(/^NIST_IR/, "NISTIR") file = File.join(@output, "#{id}.#{@ext}") if @files.include? file Util.warn "File #{file} exists. Docid: #{bib.docidentifier[0].id}" # warn "Link: #{bib.link.detect { |l| l.type == 'src' }.content}" else @files << file end index.add_or_update bib.docidentifier[0].id, file File.write file, output(bib), encoding: "UTF-8" end |