Class: RelatonOasis::DataFetcher
- Inherits:
-
Object
- Object
- RelatonOasis::DataFetcher
- Defined in:
- lib/relaton_oasis/data_fetcher.rb
Class Method Summary collapse
-
.fetch(output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch.
Instance Method Summary collapse
-
#fetch ⇒ Object
Fetch and save all the documents from OASIS.
-
#fetch_parts(item) ⇒ Object
Fetch and save parts of document.
-
#file_name(doc) ⇒ String
Generate file name.
-
#initialize(output, format) ⇒ DataFetcher
constructor
Initialize a new DataFetcher.
-
#save_doc(doc) ⇒ Object
Save document to file.
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Initialize a new DataFetcher
9 10 11 12 13 14 15 16 |
# File 'lib/relaton_oasis/data_fetcher.rb', line 9 def initialize(output, format) @output = output @format = format @ext = @format.sub(/^bib|^rfc/, "") @files = [] @index = Index.new @index1 = Relaton::Index.find_or_create :oasis, file: "index-v1.yaml" end |
Class Method Details
.fetch(output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch
24 25 26 27 28 29 30 31 32 |
# File 'lib/relaton_oasis/data_fetcher.rb', line 24 def self.fetch(output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output new(output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#fetch ⇒ Object
Fetch and save all the documents from OASIS
37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/relaton_oasis/data_fetcher.rb', line 37 def fetch agent = Mechanize.new resp = agent.get "https://www.oasis-open.org/standards/" doc = Nokogiri::HTML resp.body doc.xpath("//details").map do |item| save_doc DataParser.new(item).parse fetch_parts item end @index.save @index1.save end |
#fetch_parts(item) ⇒ Object
Fetch and save parts of document
54 55 56 57 58 59 60 61 |
# File 'lib/relaton_oasis/data_fetcher.rb', line 54 def fetch_parts(item) parts = item.xpath("./div/div/div[contains(@class, 'standard__grid--cite-as')]/p[strong or span/strong]") return unless parts.size > 1 parts.each do |part| save_doc DataPartParser.new(part).parse end end |
#file_name(doc) ⇒ String
Generate file name
92 93 94 95 |
# File 'lib/relaton_oasis/data_fetcher.rb', line 92 def file_name(doc) name = doc.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase File.join @output, "#{name}.#{@ext}" end |
#save_doc(doc) ⇒ Object
Save document to file
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/relaton_oasis/data_fetcher.rb', line 68 def save_doc(doc) # rubocop:disable Metrics/MethodLength c = case @format when "xml" then doc.to_xml(bibdata: true) when "yaml" then doc.to_hash.to_yaml else doc.send("to_#{@format}") end file = file_name doc if @files.include? file Util.warn "File #{file} already exists. Document: #{doc.docnumber}" else @files << file @index[doc] = file end @index1.add_or_update doc.docnumber, file File.write file, c, encoding: "UTF-8" end |