Module: RelatonBib::BibXMLParser
Constant Summary collapse
- SERIESINFONAMES =
SeriesInfo what should be saved as docidentifiers in the Relaton model.
["DOI", "Internet-Draft"].freeze
- FLAVOR =
nil
- ORGNAMES =
{ "IEEE" => "Istitute of Electrical and Electronics Engineers", "W3C" => "World Wide Web Consortium", "3GPP" => "3rd Generation Partnership Project", }.freeze
Instance Method Summary collapse
- #abstracts(ref) ⇒ Array<RelatonBib::FormattedString>
- #add_contact(conts, type, value) ⇒ Object
- #address(postal) ⇒ Object
- #affiliation(author) ⇒ Array<RelatonBib::Affiliation>
- #bib_item(**attrs) ⇒ RelatonBib::BibliographicItem
- #committee(wgr) ⇒ RelatonBib::TechnicalCommittee
- #contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>
- #contributor_role(author) ⇒ Hash
- #contributors(reference) ⇒ Array<Hash>
-
#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>
Extract date from reference.
-
#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>
Extract document identifiers from reference.
- #docnumber(reference) ⇒ Object
- #doctype(anchor) ⇒ String
- #editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?
- #fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem
- #formattedref(reference) ⇒ RelatonBib::FormattedRef?
- #full_name(author, reference) ⇒ RelatonBib::FullName
- #language(reference) ⇒ String
- #link(reference, url, ver) ⇒ Array<Hash>
- #localized_string(content, lang) ⇒ RelatonBib::LocalizedString?
- #month(mon) ⇒ Object
- #new_org(name, abbr) ⇒ RelatonBib::Organization
- #organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>
- #parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object
- #person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>
-
#pubid_type(id) ⇒ String
Extract document identifier type from identifier.
- #relations(reference) ⇒ Hash
-
#series(reference) ⇒ Array<RelatonBib::Series>
Extract series form reference.
-
#status(reference) ⇒ RelatonBib::DocumentStatus
extract status.
- #titles(reference) ⇒ Array<Hash>
Instance Method Details
#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>
162 163 164 165 166 167 168 169 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 162 def abstracts(ref) ref.xpath("./front/abstract").map do |a| c = a.children.to_s.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2') .gsub(/[\t\n]/, " ").squeeze " " FormattedString.new(content: c, language: language(ref), script: "Latn", format: "text/html") end end |
#add_contact(conts, type, value) ⇒ Object
285 286 287 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 285 def add_contact(conts, type, value) conts << Contact.new(type: type, value: value.text) if value end |
#address(postal) ⇒ Object
269 270 271 272 273 274 275 276 277 278 279 280 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 269 def address(postal) # rubocop:disable Metrics/CyclomaticComplexity street = [ (postal.at("./postalLine") || postal.at("./street"))&.text, ].compact Address.new( street: street, city: postal.at("./city")&.text, postcode: postal.at("./code")&.text, country: postal.at("./country")&.text, state: postal.at("./region")&.text, ) end |
#affiliation(author) ⇒ Array<RelatonBib::Affiliation>
230 231 232 233 234 235 236 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 230 def affiliation() o = .at("./organization") return [] if o.nil? || o.text.empty? org = new_org o.text, o[:abbrev] [Affiliation.new(organization: org)] end |
#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem
57 58 59 60 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 57 def bib_item(**attrs) # attrs[:place] = ["Fremont, CA"] BibliographicItem.new(**attrs) end |
#committee(wgr) ⇒ RelatonBib::TechnicalCommittee
333 334 335 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 333 def committee(wgr) TechnicalCommittee.new wgr end |
#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>
255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 255 def contacts(addr) conts = [] return conts unless addr postal = addr.at("./postal") conts << address(postal) if postal add_contact(conts, "phone", addr.at("./phone")) add_contact(conts, "email", addr.at("./email")) add_contact(conts, "uri", addr.at("./uri")) conts end |
#contributor_role(author) ⇒ Hash
291 292 293 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 291 def contributor_role() { type: [:role] || "author" } end |
#contributors(reference) ⇒ Array<Hash>
173 174 175 176 177 178 179 180 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 173 def contributors(reference) reference.xpath("./front/author").map do |contrib| if contrib[:fullname] || contrib[:surname] then person(contrib, reference) else organization(contrib) end end.compact # persons(reference) + organizations(reference) end |
#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>
Extract date from reference.
309 310 311 312 313 314 315 316 317 318 319 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 309 def dates(reference) return [] unless (date = reference.at "./front/date") d = date[:year] d += "-#{month(date[:month])}" if date[:month] && !date[:month].empty? d += "-#{date[:day]}" if date[:day] # date = Time.parse(d).strftime "%Y-%m-%d" [BibliographicDate.new(type: "published", on: d)] # rescue ArgumentError # [] end |
#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>
Extract document identifiers from reference
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 76 def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize ret = [] id = reference["anchor"] || reference["docName"] || reference["number"] if id /^(?<pref>I-D|3GPP|W3C|[A-Z]{2,})[._]?(?<num>.+)/ =~ id num.sub!(/^-?0+/, "") if %w[RFC BCP FYI STD].include?(pref) pid = pref ? "#{pref} #{num}" : id ret << DocumentIdentifier.new(type: pubid_type(id), id: pid) end %w[anchor docName number].each do |atr| if reference[atr] ret << DocumentIdentifier.new(id: reference[atr], type: pubid_type(id), scope: atr) end end ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si| next unless SERIESINFONAMES.include? si[:name] id = si[:value] id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft" DocumentIdentifier.new(id: id, type: si[:name]) end.compact end |
#docnumber(reference) ⇒ Object
51 52 53 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 51 def docnumber(reference) reference[:anchor]&.sub(/^\w+\./, "") end |
#doctype(anchor) ⇒ String
363 364 365 366 367 368 369 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 363 def doctype(anchor) case anchor when /I-D/ then "internet-draft" when /IEEE/ then "ieee" else "rfc" end end |
#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?
323 324 325 326 327 328 329 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 323 def editorialgroup(reference) tc = reference.xpath("./front/workgroup").map do |ed| wg = WorkGroup.new name: ed.text committee wg end EditorialGroup.new tc if tc.any? end |
#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 24 def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength return unless reference hash = { is_relation: is_relation, docnumber: docnumber(reference), type: "standard", docid: docids(reference, ver), status: status(reference), language: [language(reference)], script: ["Latn"], link: link(reference, url, ver), title: titles(reference), formattedref: formattedref(reference), abstract: abstracts(reference), contributor: contributors(reference), relation: relations(reference), date: dates(reference), editorialgroup: editorialgroup(reference), series: series(reference), keyword: reference.xpath("front/keyword").map(&:text), doctype: doctype(reference[:anchor]), } # hash[:fetched] = Date.today.to_s unless is_relation bib_item(**hash) end |
#formattedref(reference) ⇒ RelatonBib::FormattedRef?
149 150 151 152 153 154 155 156 157 158 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 149 def formattedref(reference) return if reference.at "./front/title" cont = (reference[:anchor] || reference[:docName] || reference[:number]) if cont FormattedRef.new( content: cont, language: language(reference), script: "Latn", ) end end |
#full_name(author, reference) ⇒ RelatonBib::FullName
219 220 221 222 223 224 225 226 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 219 def full_name(, reference) lang = language reference FullName.new( completename: localized_string([:fullname], lang), initial: [localized_string([:initials], lang)].compact, surname: localized_string([:surname], lang), ) end |
#language(reference) ⇒ String
64 65 66 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 64 def language(reference) reference[:lang] || "en" end |
#link(reference, url, ver) ⇒ Array<Hash>
126 127 128 129 130 131 132 133 134 135 136 137 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 126 def link(reference, url, ver) l = [] l << { type: "xml", content: url } if url l << { type: "src", content: reference[:target] } if reference[:target] if /^I-D/.match? reference[:anchor] reference.xpath("format").each do |f| c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target] l << { type: f[:type], content: c } end end l end |
#localized_string(content, lang) ⇒ RelatonBib::LocalizedString?
249 250 251 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 249 def localized_string(content, lang) LocalizedString.new(content, lang) if content end |
#month(mon) ⇒ Object
337 338 339 340 341 342 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 337 def month(mon) # return 1 if !mon || mon.empty? return mon if /^\d+$/.match? mon Date::MONTHNAMES.index(mon).to_s.rjust 2, "0" end |
#new_org(name, abbr) ⇒ RelatonBib::Organization
241 242 243 244 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 241 def new_org(name, abbr) # (name = "Internet Engineering Task Force", abbr = "IETF") Organization.new name: name, abbreviation: abbr end |
#organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 200 def organization(contrib) # publisher = { entity: new_org, role: [type: "publisher"] } # orgs = reference.xpath("./seriesinfo").reduce([]) do |mem, si| # next mem unless si[:stream] # mem << { entity: new_org(si[:stream], nil), role: [type: "author"] } # end # orgs + reference.xpath( # "front/author[not(@surname)][not(@fullname)]/organization", # ).map do |org| org = contrib.at("./organization") name = ORGNAMES[org.text] || org.text { entity: new_org(name, org[:abbrev]), role: [contributor_role(contrib)] } # end end |
#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object
14 15 16 17 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 14 def parse(bibxml, url: nil, is_relation: false, ver: nil) doc = Nokogiri::XML bibxml fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver end |
#person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>
185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 185 def person(, reference) # reference.xpath("./front/author[@surname]|./front/author[@fullname]") # .map do |author| entity = Person.new( name: full_name(, reference), affiliation: affiliation(), contact: contacts(.at("./address")), ) { entity: entity, role: [contributor_role()] } # end end |
#pubid_type(id) ⇒ String
Extract document identifier type from identifier
106 107 108 109 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 106 def pubid_type(id) type_match = id&.match(/^(3GPP|W3C|[A-Z]{2,})(?:\.(?=[A-Z])|(?=\d))/) type_match && type_match[1] end |
#relations(reference) ⇒ Hash
297 298 299 300 301 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 297 def relations(reference) reference.xpath("reference").map do |ref| { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) } end end |
#series(reference) ⇒ Array<RelatonBib::Series>
Extract series form reference
350 351 352 353 354 355 356 357 358 359 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 350 def series(reference) reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si| next if SERIESINFONAMES.include?(si[:name]) || si[:stream] || si[:status] t = TypedTitleString.new( content: si[:name], language: language(reference), script: "Latn", ) Series.new(title: t, number: si[:value], type: "main") end.compact end |
#status(reference) ⇒ RelatonBib::DocumentStatus
extract status
117 118 119 120 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 117 def status(reference) st = reference.at("./seriesinfo[@status]") DocumentStatus.new(stage: st[:status]) if st end |
#titles(reference) ⇒ Array<Hash>
141 142 143 144 145 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 141 def titles(reference) reference.xpath("./front/title").map do |title| { content: title.text, language: language(reference), script: "Latn" } end end |