Class: RelatonIana::DataFetcher
- Inherits:
-
Object
- Object
- RelatonIana::DataFetcher
- Defined in:
- lib/relaton_iana/data_fetcher.rb
Class Method Summary collapse
-
.fetch(output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch.
Instance Method Summary collapse
-
#fetch ⇒ Object
Parse documents.
-
#file_name(bib) ⇒ String
Generate file name.
-
#initialize(output, format) ⇒ DataFetcher
constructor
Data fetcher initializer.
- #parse(content) ⇒ Object
-
#save_doc(bib) ⇒ Object
Save document to file.
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Data fetcher initializer
9 10 11 12 13 14 15 |
# File 'lib/relaton_iana/data_fetcher.rb', line 9 def initialize(output, format) @output = output @format = format @ext = format.sub(/^bib/, "") @files = [] @index = Relaton::Index.find_or_create :IANA, file: "index-v1.yaml" end |
Class Method Details
.fetch(output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch
23 24 25 26 27 28 29 30 31 |
# File 'lib/relaton_iana/data_fetcher.rb', line 23 def self.fetch(output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output new(output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#fetch ⇒ Object
Parse documents
36 37 38 39 40 41 42 43 44 |
# File 'lib/relaton_iana/data_fetcher.rb', line 36 def fetch Dir["iana-registries/**/*.xml"].each do |file| content = File.read file, encoding: "UTF-8" parse(content) if content.include? "<registry" rescue StandardError => e Util.error "Error: #{e.}. File: #{file}" end @index.save end |
#file_name(bib) ⇒ String
Generate file name
84 85 86 87 |
# File 'lib/relaton_iana/data_fetcher.rb', line 84 def file_name(bib) name = bib.docnumber.downcase.gsub(/[\s,:\/]/, "_").squeeze("_") File.join @output, "#{name}.#{@ext}" end |
#parse(content) ⇒ Object
46 47 48 49 50 51 52 |
# File 'lib/relaton_iana/data_fetcher.rb', line 46 def parse(content) xml = Nokogiri::XML(content) registry = xml.at("/xmlns:registry") doc = Parser.parse registry save_doc doc registry.xpath("./xmlns:registry").each { |r| save_doc Parser.parse(r, doc) } end |
#save_doc(bib) ⇒ Object
Save document to file
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
# File 'lib/relaton_iana/data_fetcher.rb', line 59 def save_doc(bib) # rubocop:disable Metrics/MethodLength return unless bib c = case @format when "xml" then bib.to_xml(bibdata: true) when "yaml" then bib.to_hash.to_yaml else bib.send("to_#{@format}") end file = file_name(bib) if @files.include? file Util.warn "File #{file} already exists. Document: #{bib.docnumber}" else @files << file end @index.add_or_update bib.docnumber, file File.write file, c, encoding: "UTF-8" end |