Class: RelatonOgc::DataFetcher
- Inherits:
-
Object
- Object
- RelatonOgc::DataFetcher
- Includes:
- Utils
- Defined in:
- lib/relaton_ogc/data_fetcher.rb
Defined Under Namespace
Modules: Utils
Constant Summary
Constants included from Utils
Class Method Summary collapse
Instance Method Summary collapse
- #content(bib) ⇒ Object
- #fetch ⇒ Object
- #fetch_doc(hit) ⇒ Object
- #file_name(bib) ⇒ Object
- #index ⇒ Object
-
#initialize(output, format) ⇒ DataFetcher
constructor
Create DataFetcher instance.
-
#write_document(bib) ⇒ Object
rubocop:disable Metrics/AbcSize.
Methods included from Utils
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Create DataFetcher instance
48 49 50 51 52 53 54 55 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 48 def initialize(output, format) @output = output @etagfile = File.join output, "etag.txt" @format = format @ext = format.sub "bib", "" @docids = [] @dupids = Set.new end |
Class Method Details
.fetch(output: "data", format: "yaml") ⇒ Object
61 62 63 64 65 66 67 68 69 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 61 def self.fetch(output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output new(output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#content(bib) ⇒ Object
110 111 112 113 114 115 116 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 110 def content(bib) case @format when "xml" then bib.to_xml bibdata: true when "yaml" then bib.to_hash.to_yaml when "bibxml" then bib.to_bibxml end end |
#fetch ⇒ Object
71 72 73 74 75 76 77 78 79 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 71 def fetch get_data do |etag, json| no_errors = true json.each { |_, hit| fetch_doc(hit) || no_errors = false } Util.warn "Duplicated documents: #{@dupids.to_a.join(', ')}" if @dupids.any? self.etag = etag if no_errors index.save end end |
#fetch_doc(hit) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 81 def fetch_doc(hit) return if hit["type"] == "CC" bib = Scrapper.parse_page hit write_document bib true rescue StandardError => e Util.error "Fetching document: #{hit['identifier']}\n" \ "#{e.class} #{e.}\n#{e.backtrace}" false end |
#file_name(bib) ⇒ Object
105 106 107 108 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 105 def file_name(bib) name = bib.docidentifier[0].id.upcase.gsub(/[\s:.]/, "_") "#{@output}/#{name}.#{@ext}" end |
#index ⇒ Object
57 58 59 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 57 def index @index ||= Relaton::Index.find_or_create :ogc, file: "index-v1.yaml" end |
#write_document(bib) ⇒ Object
rubocop:disable Metrics/AbcSize
93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/relaton_ogc/data_fetcher.rb', line 93 def write_document(bib) # rubocop:disable Metrics/AbcSize if @docids.include?(bib.docidentifier[0].id) @dupids << bib.docidentifier[0].id return end @docids << bib.docidentifier[0].id file = file_name bib index.add_or_update bib.docidentifier[0].id, file File.write file, content(bib), encoding: "UTF-8" end |