Module: RelatonBib::BibXMLParser
Constant Summary collapse
- SERIESINFONAMES =
SeriesInfo what should be saved as docidentifiers in the Relaton model.
["DOI"].freeze
- RFCPREFIXES =
%w[RFC BCP FYI STD].freeze
- FLAVOR =
nil
- ORGNAMES =
{ "IEEE" => "Istitute of Electrical and Electronics Engineers", "W3C" => "World Wide Web Consortium", "3GPP" => "3rd Generation Partnership Project", }.freeze
Instance Method Summary collapse
- #abstracts(ref) ⇒ Array<RelatonBib::FormattedString>
- #add_contact(conts, type, value) ⇒ Object
- #address(postal) ⇒ Object
- #affiliation(author) ⇒ Array<RelatonBib::Affiliation>
- #bib_item(**attrs) ⇒ RelatonBib::BibliographicItem
- #committee(wgr) ⇒ RelatonBib::TechnicalCommittee
- #contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>
- #contributor_role(author) ⇒ Hash
- #contributors(reference) ⇒ Array<Hash>
-
#create_docid(id, ver) ⇒ Object
rubocop:disable Metrics/MethodLength.
-
#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>
Extract date from reference.
-
#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>
Extract document identifiers from reference.
- #docnumber(reference) ⇒ Object
- #doctype(anchor) ⇒ String
- #editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?
- #fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem
- #formattedref(reference) ⇒ RelatonBib::FormattedRef?
- #full_name(author, reference) ⇒ RelatonBib::FullName
- #id_to_pref_num(id) ⇒ Object
- #language(reference) ⇒ String
- #link(reference, url, ver) ⇒ Array<Hash>
- #localized_string(content, lang) ⇒ RelatonBib::LocalizedString?
- #month(mon) ⇒ Object
- #new_org(name, abbr) ⇒ RelatonBib::Organization
- #organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>
- #parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object
- #person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>
-
#pubid_type(id) ⇒ String
Extract document identifier type from identifier.
- #relations(reference) ⇒ Hash
-
#series(reference) ⇒ Array<RelatonBib::Series>
Extract series form reference.
-
#status(reference) ⇒ RelatonBib::DocumentStatus
extract status.
- #titles(reference) ⇒ Array<Hash>
Instance Method Details
#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>
195 196 197 198 199 200 201 202 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 195 def abstracts(ref) ref.xpath("./front/abstract").map do |a| c = a.children.to_s.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2') .gsub(/[\t\n]/, " ").squeeze " " FormattedString.new(content: c, language: language(ref), script: "Latn", format: "text/html") end end |
#add_contact(conts, type, value) ⇒ Object
318 319 320 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 318 def add_contact(conts, type, value) conts << Contact.new(type: type, value: value.text) if value end |
#address(postal) ⇒ Object
302 303 304 305 306 307 308 309 310 311 312 313 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 302 def address(postal) # rubocop:disable Metrics/CyclomaticComplexity street = [ (postal.at("./postalLine") || postal.at("./street"))&.text, ].compact Address.new( street: street, city: postal.at("./city")&.text, postcode: postal.at("./code")&.text, country: postal.at("./country")&.text, state: postal.at("./region")&.text, ) end |
#affiliation(author) ⇒ Array<RelatonBib::Affiliation>
263 264 265 266 267 268 269 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 263 def affiliation() o = .at("./organization") return [] if o.nil? || o.text.empty? org = new_org o.text, o[:abbrev] [Affiliation.new(organization: org)] end |
#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem
58 59 60 61 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 58 def bib_item(**attrs) # attrs[:place] = ["Fremont, CA"] BibliographicItem.new(**attrs) end |
#committee(wgr) ⇒ RelatonBib::TechnicalCommittee
365 366 367 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 365 def committee(wgr) TechnicalCommittee.new wgr end |
#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>
288 289 290 291 292 293 294 295 296 297 298 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 288 def contacts(addr) conts = [] return conts unless addr postal = addr.at("./postal") conts << address(postal) if postal add_contact(conts, "phone", addr.at("./phone")) add_contact(conts, "email", addr.at("./email")) add_contact(conts, "uri", addr.at("./uri")) conts end |
#contributor_role(author) ⇒ Hash
324 325 326 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 324 def contributor_role() { type: [:role] || "author" } end |
#contributors(reference) ⇒ Array<Hash>
206 207 208 209 210 211 212 213 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 206 def contributors(reference) reference.xpath("./front/author").map do |contrib| if contrib[:fullname] || contrib[:surname] then person(contrib, reference) else organization(contrib) end end.compact # persons(reference) + organizations(reference) end |
#create_docid(id, ver) ⇒ Object
rubocop:disable Metrics/MethodLength
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 112 def create_docid(id, ver) # rubocop:disable Metrics/MethodLength pref, num = id_to_pref_num(id) if RFCPREFIXES.include?(pref) pid = "#{pref} #{num.sub(/^-?0+/, '')}" type = pubid_type id elsif %w[I-D draft].include?(pref) pid = "draft-#{num}" pid.sub!(/(?<=-)\d{2}$/, ver) if ver type = "Internet-Draft" else pid = pref ? "#{pref} #{num}" : id type = pubid_type id end DocumentIdentifier.new(type: type, id: pid, primary: true) end |
#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>
Extract date from reference.
342 343 344 345 346 347 348 349 350 351 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 342 def dates(reference) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize date = reference.at "./front/date" return [] if date.nil? || date[:year].nil? || date[:year].empty? d = date[:year] d += "-#{month(date[:month])}" if date[:month] && !date[:month].empty? d += "-#{date[:day]}" if date[:day] # date = Time.parse(d).strftime "%Y-%m-%d" [BibliographicDate.new(type: "published", on: d)] end |
#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>
Extract document identifiers from reference
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 77 def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize ret = [] si = reference.at("./seriesInfo[@name='Internet-Draft']", "./front/seriesInfo[@name='Internet-Draft']") if si id = si[:value] id.sub!(/(?<=-)\d{2}$/, ver) if ver ret << DocumentIdentifier.new(type: "Internet-Draft", id: id, primary: true) else id = reference[:anchor] || reference[:docName] || reference[:number] ret << create_docid(id, ver) if id end %w[anchor docName number].each do |atr| if reference[atr] pref, num = id_to_pref_num reference[atr] atrid = if atr == "anchor" && RFCPREFIXES.include?(pref) "#{pref}#{num.sub(/^-?0+/, '')}" else reference[atr] end type = pubid_type id ret << DocumentIdentifier.new(id: atrid, type: type, scope: atr) end end ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si| next unless SERIESINFONAMES.include? si[:name] id = si[:value] # id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft" DocumentIdentifier.new(id: id, type: si[:name]) end.compact end |
#docnumber(reference) ⇒ Object
52 53 54 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 52 def docnumber(reference) reference[:anchor]&.sub(/^\w+\./, "") end |
#doctype(anchor) ⇒ String
395 396 397 398 399 400 401 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 395 def doctype(anchor) case anchor when /I-D/ then "internet-draft" when /IEEE/ then "ieee" else "rfc" end end |
#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?
355 356 357 358 359 360 361 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 355 def editorialgroup(reference) tc = reference.xpath("./front/workgroup").map do |ed| wg = WorkGroup.new name: ed.text committee wg end EditorialGroup.new tc if tc.any? end |
#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 25 def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength return unless reference hash = { is_relation: is_relation, docnumber: docnumber(reference), type: "standard", docid: docids(reference, ver), status: status(reference), language: [language(reference)], script: ["Latn"], link: link(reference, url, ver), title: titles(reference), formattedref: formattedref(reference), abstract: abstracts(reference), contributor: contributors(reference), relation: relations(reference), date: dates(reference), editorialgroup: editorialgroup(reference), series: series(reference), keyword: reference.xpath("front/keyword").map(&:text), doctype: doctype(reference[:anchor]), } # hash[:fetched] = Date.today.to_s unless is_relation bib_item(**hash) end |
#formattedref(reference) ⇒ RelatonBib::FormattedRef?
182 183 184 185 186 187 188 189 190 191 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 182 def formattedref(reference) return if reference.at "./front/title" cont = (reference[:anchor] || reference[:docName] || reference[:number]) if cont FormattedRef.new( content: cont, language: language(reference), script: "Latn", ) end end |
#full_name(author, reference) ⇒ RelatonBib::FullName
252 253 254 255 256 257 258 259 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 252 def full_name(, reference) lang = language reference FullName.new( completename: localized_string([:fullname], lang), initial: [localized_string([:initials], lang)].compact, surname: localized_string([:surname], lang), ) end |
#id_to_pref_num(id) ⇒ Object
128 129 130 131 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 128 def id_to_pref_num(id) tn = /^(?<pref>I-D|draft|3GPP|W3C|[A-Z]{2,})[._-]?(?<num>.+)/.match id tn && tn.to_a[1..2] end |
#language(reference) ⇒ String
65 66 67 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 65 def language(reference) reference[:lang] || "en" end |
#link(reference, url, ver) ⇒ Array<Hash>
159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 159 def link(reference, url, ver) l = [] l << { type: "xml", content: url } if url l << { type: "src", content: reference[:target] } if reference[:target] if /^I-D/.match? reference[:anchor] reference.xpath("format").each do |f| c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target] l << { type: f[:type], content: c } end end l end |
#localized_string(content, lang) ⇒ RelatonBib::LocalizedString?
282 283 284 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 282 def localized_string(content, lang) LocalizedString.new(content, lang) if content end |
#month(mon) ⇒ Object
369 370 371 372 373 374 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 369 def month(mon) # return 1 if !mon || mon.empty? return mon if /^\d+$/.match? mon Date::MONTHNAMES.index { |m| m&.include? mon }.to_s.rjust 2, "0" end |
#new_org(name, abbr) ⇒ RelatonBib::Organization
274 275 276 277 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 274 def new_org(name, abbr) # (name = "Internet Engineering Task Force", abbr = "IETF") Organization.new name: name, abbreviation: abbr end |
#organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 233 def organization(contrib) # publisher = { entity: new_org, role: [type: "publisher"] } # orgs = reference.xpath("./seriesinfo").reduce([]) do |mem, si| # next mem unless si[:stream] # mem << { entity: new_org(si[:stream], nil), role: [type: "author"] } # end # orgs + reference.xpath( # "front/author[not(@surname)][not(@fullname)]/organization", # ).map do |org| org = contrib.at("./organization") name = ORGNAMES[org.text] || org.text { entity: new_org(name, org[:abbrev]), role: [contributor_role(contrib)] } # end end |
#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object
15 16 17 18 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 15 def parse(bibxml, url: nil, is_relation: false, ver: nil) doc = Nokogiri::XML bibxml fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver end |
#person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>
218 219 220 221 222 223 224 225 226 227 228 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 218 def person(, reference) # reference.xpath("./front/author[@surname]|./front/author[@fullname]") # .map do |author| entity = Person.new( name: full_name(, reference), affiliation: affiliation(), contact: contacts(.at("./address")), ) { entity: entity, role: [contributor_role()] } # end end |
#pubid_type(id) ⇒ String
Extract document identifier type from identifier
140 141 142 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 140 def pubid_type(id) id_to_pref_num(id)&.first end |
#relations(reference) ⇒ Hash
330 331 332 333 334 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 330 def relations(reference) reference.xpath("reference").map do |ref| { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) } end end |
#series(reference) ⇒ Array<RelatonBib::Series>
Extract series form reference
382 383 384 385 386 387 388 389 390 391 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 382 def series(reference) reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si| next if SERIESINFONAMES.include?(si[:name]) || si[:stream] || si[:status] t = TypedTitleString.new( content: si[:name], language: language(reference), script: "Latn", ) Series.new(title: t, number: si[:value], type: "main") end.compact end |
#status(reference) ⇒ RelatonBib::DocumentStatus
extract status
150 151 152 153 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 150 def status(reference) st = reference.at("./seriesinfo[@status]") DocumentStatus.new(stage: st[:status]) if st end |
#titles(reference) ⇒ Array<Hash>
174 175 176 177 178 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 174 def titles(reference) reference.xpath("./front/title").map do |title| { content: title.text, language: language(reference), script: "Latn" } end end |