Class: RelatonIec::DataParser
- Inherits:
-
Object
- Object
- RelatonIec::DataParser
- Defined in:
- lib/relaton_iec/data_parser.rb
Constant Summary collapse
- DOMAIN =
"https://webstore.iec.ch"
- ATTRS =
%i[ docid structuredidentifier language script title doctype ics date contributor editorialgroup abstract copyright link relation ].freeze
- ABBREVS =
{ "ISO" => ["International Organization for Standardization", "www.iso.org"], "IEC" => ["International Electrotechnical Commission", "www.iec.ch"], "IEEE" => ["Institute of Electrical and Electronics Engineers", "www.ieee.org"], "ASTM" => ["American Society of Testing Materials", "www.astm.org"], "CISPR" => ["International special committee on radio interference", "www.iec.ch"], }.freeze
- DOCTYPES =
{ "IS" => "international-standard", "TR" => "technical-report", "TS" => "technical-specification", "PAS" => "publicly-available-specification", "SRD" => "system-reference-deliverable", }
Instance Method Summary collapse
-
#abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
-
#contributor ⇒ Array<Hash>
Parse contributors.
- #copyright ⇒ Array<Hash>
-
#create_relations(doc) ⇒ Array<Hash>
Create relations.
-
#date ⇒ Array<RelatonBib::BibliographicDate>
Parse dates.
-
#docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifiers.
-
#doctype ⇒ String
Parse document type.
-
#editorialgroup ⇒ Hash
Parse editorial group.
-
#ics ⇒ Array<RelatonIsoBib::Ics>
Fetche ics.
-
#initialize(pub) ⇒ DataParser
constructor
Initialize new instance.
-
#lang_to_script(lang) ⇒ String
Detect script.
-
#language ⇒ Array<String>
Parse languages.
-
#link ⇒ Array<RelatonBib::TypedUri>
Parse links.
-
#parse ⇒ RelatonIec::IecBibliographicItem
Parse document.
-
#relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
-
#script ⇒ Array<String>
Parse scripts.
-
#structuredidentifier ⇒ RelatonIsoBib::StructuredIdentifier
Parse structured identifier.
-
#title ⇒ RelatonBib::TypedTitleStringCollection
Parse titles.
-
#urn_id ⇒ String
Extract URN ID from URN.
Constructor Details
#initialize(pub) ⇒ DataParser
Initialize new instance.
31 32 33 |
# File 'lib/relaton_iec/data_parser.rb', line 31 def initialize(pub) @pub = pub end |
Instance Method Details
#abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
146 147 148 149 150 151 152 153 |
# File 'lib/relaton_iec/data_parser.rb', line 146 def abstract @pub["abstract"]&.map do |a| RelatonBib::FormattedString.new( content: a["content"], language: a["lang"], script: lang_to_script(a["lang"]), format: a["format"] ) end end |
#contributor ⇒ Array<Hash>
Parse contributors.
205 206 207 208 209 210 211 |
# File 'lib/relaton_iec/data_parser.rb', line 205 def contributor @pub["reference"].sub(/\s.*/, "").split("/").map do |abbrev| name, url = ABBREVS[abbrev] { entity: { name: name, url: url, abbreviation: abbrev }, role: [type: "publisher"] } end end |
#copyright ⇒ Array<Hash>
156 157 158 159 160 161 162 163 164 165 166 167 |
# File 'lib/relaton_iec/data_parser.rb', line 156 def copyright # rubocop:disable Metrics/AbcSize from = @pub["reference"].match(/(?<=:)\d{4}/).to_s from = @pub["releaseDate"]&.match(/\d{4}/).to_s if from.empty? return [] if from.nil? || from.empty? abbreviation = @pub["reference"].match(/.*?(?=\s)/).to_s owner = abbreviation.split("/").map do |abbrev| name, url = ABBREVS[abbrev] { name: name, abbreviation: abbrev, url: url } end [{ owner: owner, from: from }] end |
#create_relations(doc) ⇒ Array<Hash>
Create relations.
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
# File 'lib/relaton_iec/data_parser.rb', line 274 def create_relations(doc) # rubocop:disable Metrics/MethodLength doc.xpath('//ROW[STATUS[.!="PREPARING" and .!="PUBLISHED"]]') .map do |r| r_type = r.at("STATUS").text.downcase type = case r_type when "revised", "replaced" then "updates" when "withdrawn" then "obsoletes" else r_type end ref = r.at("FULL_NAME").text fref = RelatonBib::FormattedRef.new content: ref, format: "text/plain" docid = RelatonBib::DocumentIdentifier.new(id: ref, type: "IEC", primary: true) bibitem = IecBibliographicItem.new(formattedref: fref, docid: [docid]) RelatonBib::DocumentRelation.new type: type, bibitem: bibitem end end |
#date ⇒ Array<RelatonBib::BibliographicDate>
Parse dates.
187 188 189 190 191 192 193 194 195 196 197 198 |
# File 'lib/relaton_iec/data_parser.rb', line 187 def date { "published" => "publicationDate", "stable-until" => "stabilityDate", "confirmed" => "confirmationDate", "obsoleted" => "dateOfWithdrawal", }.reduce([]) do |a, (k, v)| next a unless @pub[v] a << RelatonBib::BibliographicDate.new(type: k, on: @pub[v]) end end |
#docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifiers.
54 55 56 57 58 59 |
# File 'lib/relaton_iec/data_parser.rb', line 54 def docid ids = [] ids << RelatonBib::DocumentIdentifier.new(id: @pub["reference"], type: "IEC", primary: true) urnid = "urn:#{@pub['urnAlt'][0]}" ids << RelatonBib::DocumentIdentifier.new(id: urnid, type: "URN") end |
#doctype ⇒ String
Parse document type.
243 244 245 246 |
# File 'lib/relaton_iec/data_parser.rb', line 243 def doctype type = DOCTYPES[@pub["stdType"]] || @pub["stdType"].downcase DocumentType.new type: type end |
#editorialgroup ⇒ Hash
Parse editorial group.
128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/relaton_iec/data_parser.rb', line 128 def editorialgroup return unless @pub["committee"] wg = @pub["committee"]["reference"] { technical_committee: [{ name: wg, type: "technicalCommittee", number: wg.match(/\d+/)&.to_s&.to_i, }], } end |
#ics ⇒ Array<RelatonIsoBib::Ics>
Fetche ics.
174 175 176 177 178 179 180 |
# File 'lib/relaton_iec/data_parser.rb', line 174 def ics return [] unless @pub["classifications"] @pub["classifications"].select { |c| c["type"] == "ICS" }.map do |c| RelatonIsoBib::Ics.new(c["value"]) end end |
#lang_to_script(lang) ⇒ String
Detect script.
104 105 106 107 108 |
# File 'lib/relaton_iec/data_parser.rb', line 104 def lang_to_script(lang) case lang when "en", "fr", "es" then "Latn" end end |
#language ⇒ Array<String>
Parse languages.
81 82 83 |
# File 'lib/relaton_iec/data_parser.rb', line 81 def language @pub["title"].map { |t| t["lang"] }.uniq end |
#link ⇒ Array<RelatonBib::TypedUri>
Parse links.
218 219 220 221 222 223 224 225 226 227 |
# File 'lib/relaton_iec/data_parser.rb', line 218 def link url = "#{DOMAIN}/publication/#{urn_id}" l = [RelatonBib::TypedUri.new(content: url, type: "src")] RelatonBib.array(@pub["releaseItems"]).each_with_object(l) do |r, a| next unless r["type"] == "PREVIEW" url = "#{DOMAIN}/preview/#{r['contentRef']['fileName']}" a << RelatonBib::TypedUri.new(content: url, type: "obp") end end |
#parse ⇒ RelatonIec::IecBibliographicItem
Parse document.
40 41 42 43 44 45 46 47 |
# File 'lib/relaton_iec/data_parser.rb', line 40 def parse # rubocop:disable Metrics/AbcSize args = ATTRS.each_with_object({}) { |a, h| h[a] = send a } args[:docstatus] = RelatonBib::DocumentStatus.new stage: @pub["status"] args[:edition] = @pub["edition"] args[:price_code] = @pub["priceInfo"]["priceCode"] args[:place] = ["Geneva"] IecBibliographicItem.new(**args) end |
#relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
253 254 255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/relaton_iec/data_parser.rb', line 253 def relation # rubocop:disable Metrics/MethodLength try = 0 begin uri = URI "#{DOMAIN}/webstore/webstore.nsf/AjaxRequestXML?" \ "Openagent&url=#{urn_id}" resp = Net::HTTP.get_response uri doc = Nokogiri::XML resp.body create_relations doc rescue StandardError => e try += 1 try < 3 ? retry : raise(e) end end |
#script ⇒ Array<String>
Parse scripts.
90 91 92 93 94 95 |
# File 'lib/relaton_iec/data_parser.rb', line 90 def script language.each_with_object([]) do |l, s| scr = lang_to_script l s << scr if scr && !s.include?(scr) end end |
#structuredidentifier ⇒ RelatonIsoBib::StructuredIdentifier
Parse structured identifier.
66 67 68 69 70 71 72 73 74 |
# File 'lib/relaton_iec/data_parser.rb', line 66 def structuredidentifier m = @pub["reference"].match( /(?<=\s)(?<project>\w+)(?:-(?<part>\w*)(?:-(?<subpart>\w*))?)?/, ) RelatonIsoBib::StructuredIdentifier.new( project_number: m[:project], part: m[:part], subpart: m[:subpart], type: "IEC", id: @pub["reference"] ) end |
#title ⇒ RelatonBib::TypedTitleStringCollection
Parse titles.
115 116 117 118 119 120 121 |
# File 'lib/relaton_iec/data_parser.rb', line 115 def title @pub["title"].reduce(RelatonBib::TypedTitleStringCollection.new) do |a, t| a + RelatonBib::TypedTitleString.from_string( t["value"], t["lang"], lang_to_script(t["lang"]) ) end end |
#urn_id ⇒ String
Extract URN ID from URN.
234 235 236 |
# File 'lib/relaton_iec/data_parser.rb', line 234 def urn_id @pub["urn"].split(":").last end |