Class: RelatonIetf::DataFetcher
- Inherits:
-
Object
- Object
- RelatonIetf::DataFetcher
- Defined in:
- lib/relaton_ietf/data_fetcher.rb
Constant Summary collapse
- INDEX1 =
"index-v1".freeze
Class Method Summary collapse
-
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch.
Instance Method Summary collapse
- #add_to_index(entry, file) ⇒ Object
-
#create_series(ref, versions) ⇒ Object
Create unversioned bibliographic item.
-
#fetch ⇒ Object
Fetch documents.
-
#fetch_ieft_internet_drafts ⇒ Object
Fetches ietf-internet-drafts documents.
-
#fetch_ieft_rfcs ⇒ Object
Fetches ietf-rfc-entries documents.
-
#fetch_ieft_rfcsubseries ⇒ Object
Fetches ietf-rfcsubseries documents.
-
#file_name(entry) ⇒ String
Generate file name.
-
#initialize(source, output, format) ⇒ DataFetcher
constructor
Data fetcher initializer.
-
#read_doc(file) ⇒ RelatonIetf::IetfBibliographicItem
Redad saved documents.
-
#rfc_index ⇒ Nokogiri::XML::Document
Get RFC index.
-
#save_doc(entry, check_duplicate: true) ⇒ Object
Save document to file.
-
#update_versions(versions) ⇒ Object
Updates I-D’s versions.
-
#version_relation(ref, type) ⇒ RelatonBib::DocumentRelation
Create bibitem relation.
Constructor Details
#initialize(source, output, format) ⇒ DataFetcher
Data fetcher initializer
18 19 20 21 22 23 24 25 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 18 def initialize(source, output, format) @source = source @output = output @format = format @ext = @format.sub(/^bib|^rfc/, "") @files = [] @index = Relaton::Index.find_or_create :IETF, file: "#{INDEX1}.yaml" end |
Class Method Details
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch
35 36 37 38 39 40 41 42 43 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 35 def self.fetch(source, output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output # unless Dir.exist? output new(source, output, format).fetch t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#add_to_index(entry, file) ⇒ Object
210 211 212 213 214 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 210 def add_to_index(entry, file) docid = entry.docidentifier.detect(&:primary) docid ||= entry.docidentifier.first @index.add_or_update docid.id, file end |
#create_series(ref, versions) ⇒ Object
Create unversioned bibliographic item
120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 120 def create_series(ref, versions) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength vs = versions.sort_by { |v| v.match(/\d+$/).to_s.to_i } fref = RelatonBib::FormattedRef.new content: ref docid = RelatonBib::DocumentIdentifier.new type: "Internet-Draft", id: ref, primary: true rel = vs.map { |v| version_relation v, "includes" } last_v = HashConverter.hash_to_bib YAML.load_file("#{@output}/#{vs.last}.#{@ext}") bib = IetfBibliographicItem.new( title: last_v[:title], abstract: last_v[:abstract], formattedref: fref, docid: [docid], relation: rel ) save_doc bib end |
#fetch ⇒ Object
Fetch documents
48 49 50 51 52 53 54 55 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 48 def fetch case @source when "ietf-rfcsubseries" then fetch_ieft_rfcsubseries when "ietf-internet-drafts" then fetch_ieft_internet_drafts when "ietf-rfc-entries" then fetch_ieft_rfcs end @index.save end |
#fetch_ieft_internet_drafts ⇒ Object
Fetches ietf-internet-drafts documents
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 69 def fetch_ieft_internet_drafts # rubocop:disable Metrics/MethodLength, Metrics/AbcSize versions = Dir["bibxml-ids/*.xml"].each_with_object([]) do |path, vers| file = File.basename path, ".xml" if file.include?("D.draft-") vers << file.sub(/^reference\.I-D\./, "").downcase /(?<ver>\d+)$/ =~ file end bib = BibXMLParser.parse(File.read(path, encoding: "UTF-8")) if ver version = RelatonBib::BibliographicItem::Version.new nil, ver bib.instance_variable_set :@version, [version] end save_doc bib end update_versions(versions) if versions.any? && @format != "bibxml" end |
#fetch_ieft_rfcs ⇒ Object
Fetches ietf-rfc-entries documents
167 168 169 170 171 172 173 174 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 167 def fetch_ieft_rfcs rfc_index.xpath("xmlns:rfc-entry").each do |doc| save_doc RfcEntry.parse(doc) rescue StandardError => e Util.error "Error parsing #{doc.at('./xmlns:doc-id').text}: #{e.}\n" \ "#{e.backtrace[0..5].join("\n")}" end end |
#fetch_ieft_rfcsubseries ⇒ Object
Fetches ietf-rfcsubseries documents
60 61 62 63 64 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 60 def fetch_ieft_rfcsubseries rfc_index.xpath("xmlns:bcp-entry|xmlns:fyi-entry|xmlns:std-entry").each do |doc| save_doc RfcIndexEntry.parse(doc) end end |
#file_name(entry) ⇒ String
Generate file name
223 224 225 226 227 228 229 230 231 232 233 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 223 def file_name(entry) # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity id = if entry.respond_to? :docidentifier entry.docidentifier.detect { |i| i.type == "Internet-Draft" }&.id end id ||= entry.docnumber || entry.formattedref.content if @source == "ietf-internet-drafts" then id.downcase! else id.upcase! end name = id.gsub(/[\s,:\/]/, "_").squeeze("_") File.join @output, "#{name}.#{@ext}" end |
#read_doc(file) ⇒ RelatonIetf::IetfBibliographicItem
Redad saved documents
155 156 157 158 159 160 161 162 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 155 def read_doc(file) doc = File.read(file, encoding: "UTF-8") case @format when "xml" then XMLParser.from_xml(doc) when "yaml" then IetfBibliographicItem.from_hash YAML.safe_load(doc) else BibXMLParser.parse(doc) end end |
#rfc_index ⇒ Nokogiri::XML::Document
Get RFC index
181 182 183 184 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 181 def rfc_index uri = URI "https://www.rfc-editor.org/rfc-index.xml" Nokogiri::XML(Net::HTTP.get(uri)).at("/xmlns:rfc-index") end |
#save_doc(entry, check_duplicate: true) ⇒ Object
Save document to file
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 192 def save_doc(entry, check_duplicate: true) # rubocop:disable Metrics/MethodLength, Metrics/CyclomaticComplexity return unless entry c = case @format when "xml" then entry.to_xml(bibdata: true) when "yaml" then entry.to_hash.to_yaml else entry.send("to_#{@format}") end file = file_name entry if check_duplicate && @files.include?(file) Util.warn "File #{file} already exists. Document: #{entry.docnumber}" elsif check_duplicate @files << file end File.write file, c, encoding: "UTF-8" add_to_index entry, file end |
#update_versions(versions) ⇒ Object
Updates I-D’s versions
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 91 def update_versions(versions) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity series = "" bib_versions = [] Dir["#{@output}/*.#{@ext}"].each do |file| match = /(?<series>draft-.+)-(?<ver>\d{2})\.#{@ext}$/.match file if match if series != match[:series] bib_versions = versions.grep(/^#{Regexp.quote match[:series]}-\d{2}/) create_series match[:series], bib_versions series = match[:series] end lv = bib_versions.select { |ref| ref.match(/\d+$/).to_s.to_i < match[:ver].to_i } hv = bib_versions.select { |ref| ref.match(/\d+$/).to_s.to_i > match[:ver].to_i } if lv.any? || hv.any? bib = read_doc(file) bib.relation << version_relation(lv.last, "updates") if lv.any? bib.relation << version_relation(hv.first, "updatedBy") if hv.any? save_doc bib, check_duplicate: false end end end end |
#version_relation(ref, type) ⇒ RelatonBib::DocumentRelation
Create bibitem relation
141 142 143 144 145 146 |
# File 'lib/relaton_ietf/data_fetcher.rb', line 141 def version_relation(ref, type) fref = RelatonBib::FormattedRef.new content: ref docid = RelatonBib::DocumentIdentifier.new type: "Internet-Draft", id: ref, primary: true bibitem = IetfBibliographicItem.new formattedref: fref, docid: [docid] RelatonBib::DocumentRelation.new(type: type, bibitem: bibitem) end |