Module: RelatonBib::BibXMLParser
Constant Summary collapse
- SERIESINFONAMES =
["DOI", "Internet-Draft"].freeze
- FLAVOR =
nil
Instance Method Summary collapse
- #abstracts(ref) ⇒ Array<RelatonBib::FormattedString>
- #add_contact(conts, type, value) ⇒ Object
- #address(postal) ⇒ Object
- #affiliation(author) ⇒ Array<RelatonBib::Affiliation>
- #bib_item(**attrs) ⇒ RelatonBib::BibliographicItem
- #committee(wgr) ⇒ RelatonBib::TechnicalCommittee
- #contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>
- #contributor_role(author) ⇒ Hash
- #contributors(reference) ⇒ Array<Hash>
-
#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>
Extract date from reference.
-
#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>
Extract document identifiers from reference.
- #docnumber(reference) ⇒ Object
- #doctype(anchor) ⇒ String
- #editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?
- #fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem
- #formattedref(reference) ⇒ RelatonBib::FormattedRef?
- #full_name(author, reference) ⇒ RelatonBib::FullName
- #language(reference) ⇒ String
- #link(reference, url, ver) ⇒ Array<Hash>
- #localized_string(content, lang) ⇒ RelatonBib::LocalizedString?
- #month(mon) ⇒ Object
- #new_org(name, abbr) ⇒ RelatonBib::Organization
- #organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>
- #parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object
- #person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>
- #relations(reference) ⇒ Hash
-
#series(reference) ⇒ Array<RelatonBib::Series>
Extract series form reference.
-
#status(reference) ⇒ RelatonBib::DocumentStatus
extract status.
- #titles(reference) ⇒ Array<Hash>
Instance Method Details
#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>
163 164 165 166 167 168 169 170 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 163 def abstracts(ref) ref.xpath("./front/abstract").map do |a| c = a.children.to_s.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2') .gsub(/[\t\n]/, " ").squeeze " " FormattedString.new(content: c, language: language(ref), script: "Latn", format: "text/html") end end |
#add_contact(conts, type, value) ⇒ Object
285 286 287 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 285 def add_contact(conts, type, value) conts << Contact.new(type: type, value: value.text) if value end |
#address(postal) ⇒ Object
269 270 271 272 273 274 275 276 277 278 279 280 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 269 def address(postal) # rubocop:disable Metrics/CyclomaticComplexity street = [ (postal.at("./postalLine") || postal.at("./street"))&.text, ].compact Address.new( street: street, city: postal.at("./city")&.text, postcode: postal.at("./code")&.text, country: postal.at("./country")&.text, state: postal.at("./region")&.text, ) end |
#affiliation(author) ⇒ Array<RelatonBib::Affiliation>
230 231 232 233 234 235 236 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 230 def affiliation() o = .at("./organization") return [] if o.nil? || o.text.empty? org = new_org o.text, o[:abbrev] [Affiliation.new(organization: org)] end |
#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem
49 50 51 52 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 49 def bib_item(**attrs) # attrs[:place] = ["Fremont, CA"] BibliographicItem.new(**attrs) end |
#committee(wgr) ⇒ RelatonBib::TechnicalCommittee
331 332 333 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 331 def committee(wgr) TechnicalCommittee.new wgr end |
#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>
255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 255 def contacts(addr) conts = [] return conts unless addr postal = addr.at("./postal") conts << address(postal) if postal add_contact(conts, "phone", addr.at("./phone")) add_contact(conts, "email", addr.at("./email")) add_contact(conts, "uri", addr.at("./uri")) conts end |
#contributor_role(author) ⇒ Hash
291 292 293 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 291 def contributor_role() { type: [:role] || "author" } end |
#contributors(reference) ⇒ Array<Hash>
174 175 176 177 178 179 180 181 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 174 def contributors(reference) reference.xpath("./front/author").map do |contrib| if contrib[:fullname] || contrib[:surname] then person(contrib, reference) else organization(contrib) end end.compact # persons(reference) + organizations(reference) end |
#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>
Extract date from reference.
309 310 311 312 313 314 315 316 317 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 309 def dates(reference) return unless (date = reference.at "./front/date") d = [date[:year], month(date[:month]), (date[:day] || 1)].compact.join "-" date = Time.parse(d).strftime "%Y-%m-%d" [BibliographicDate.new(type: "published", on: date)] rescue ArgumentError [] end |
#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>
Extract document identifiers from reference
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 68 def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize ret = [] # sfid = reference.at("./seriesInfo[@name='#{self::FLAVOR}']", # "./front/seriesInfo[@name='#{self::FLAVOR}']") # if sfid # type = sfid[:name] # id = sfid[:value] # # scope = "series" # else # if self::FLAVOR # id, scope = if reference[:anchor] then [reference[:anchor], "anchor"] # elsif reference[:docName] then [reference[:docName], "docName"] # elsif reference[:number] then [reference[:number], "number"] # end id = reference["anchor"] || reference["docName"] || reference["number"] type_match = id&.match(/^(3GPP|W3C|[A-Z]{2,})(?:\.(?=[A-Z])|(?=\d))/) type = self::FLAVOR || (type_match && type_match[1]) if id /^(?<pref>I-D|3GPP|W3C|[A-Z]{2,})[._]?(?<num>.+)/ =~ id num.sub!(/^-?0+/, "") if %w[RFC BCP FYI STD].include?(pref) pid = pref ? "#{pref} #{num}" : id ret << DocumentIdentifier.new(type: type, id: pid) end %w[anchor docName number].each do |atr| if reference[atr] ret << DocumentIdentifier.new(id: reference[atr], type: type, scope: atr) end end # end # if id # ret << DocumentIdentifier.new(type: type, id: id) # ret << DocumentIdentifier.new(type: type, id: id, scope: scope) if scope # end # if (id = reference[:anchor]) # ret << DocumentIdentifier.new(type: "rfc-anchor", id: id) # end ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si| next unless SERIESINFONAMES.include? si[:name] id = si[:value] id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft" DocumentIdentifier.new(id: id, type: si[:name]) end.compact end |
#docnumber(reference) ⇒ Object
43 44 45 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 43 def docnumber(reference) reference[:anchor]&.sub(/^\w+\./, "") end |
#doctype(anchor) ⇒ String
361 362 363 364 365 366 367 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 361 def doctype(anchor) case anchor when /I-D/ then "internet-draft" when /IEEE/ then "ieee" else "rfc" end end |
#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?
321 322 323 324 325 326 327 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 321 def editorialgroup(reference) tc = reference.xpath("./front/workgroup").map do |ed| wg = WorkGroup.new name: ed.text committee wg end EditorialGroup.new tc if tc.any? end |
#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 16 def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength return unless reference hash = { is_relation: is_relation, docnumber: docnumber(reference), type: "standard", docid: docids(reference, ver), status: status(reference), language: [language(reference)], script: ["Latn"], link: link(reference, url, ver), title: titles(reference), formattedref: formattedref(reference), abstract: abstracts(reference), contributor: contributors(reference), relation: relations(reference), date: dates(reference), editorialgroup: editorialgroup(reference), series: series(reference), keyword: reference.xpath("front/keyword").map(&:text), doctype: doctype(reference[:anchor]), } # hash[:fetched] = Date.today.to_s unless is_relation bib_item(**hash) end |
#formattedref(reference) ⇒ RelatonBib::FormattedRef?
150 151 152 153 154 155 156 157 158 159 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 150 def formattedref(reference) return if reference.at "./front/title" cont = (reference[:anchor] || reference[:docName] || reference[:number]) if cont FormattedRef.new( content: cont, language: language(reference), script: "Latn", ) end end |
#full_name(author, reference) ⇒ RelatonBib::FullName
219 220 221 222 223 224 225 226 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 219 def full_name(, reference) lang = language reference FullName.new( completename: localized_string([:fullname], lang), initial: [localized_string([:initials], lang)].compact, surname: localized_string([:surname], lang), ) end |
#language(reference) ⇒ String
56 57 58 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 56 def language(reference) reference[:lang] || "en" end |
#link(reference, url, ver) ⇒ Array<Hash>
127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 127 def link(reference, url, ver) l = [] l << { type: "xml", content: url } if url l << { type: "src", content: reference[:target] } if reference[:target] if /^I-D/.match? reference[:anchor] reference.xpath("format").each do |f| c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target] l << { type: f[:type], content: c } end end l end |
#localized_string(content, lang) ⇒ RelatonBib::LocalizedString?
249 250 251 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 249 def localized_string(content, lang) LocalizedString.new(content, lang) if content end |
#month(mon) ⇒ Object
335 336 337 338 339 340 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 335 def month(mon) return 1 if !mon || mon.empty? return mon if /^\d+$/.match? mon Date::MONTHNAMES.index(mon) end |
#new_org(name, abbr) ⇒ RelatonBib::Organization
241 242 243 244 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 241 def new_org(name, abbr) # (name = "Internet Engineering Task Force", abbr = "IETF") Organization.new name: name, abbreviation: abbr end |
#organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>
201 202 203 204 205 206 207 208 209 210 211 212 213 214 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 201 def organization(contrib) # publisher = { entity: new_org, role: [type: "publisher"] } # orgs = reference.xpath("./seriesinfo").reduce([]) do |mem, si| # next mem unless si[:stream] # mem << { entity: new_org(si[:stream], nil), role: [type: "author"] } # end # orgs + reference.xpath( # "front/author[not(@surname)][not(@fullname)]/organization", # ).map do |org| org = contrib.at("./organization") { entity: new_org(org.text, org[:abbrev]), role: [contributor_role(contrib)] } # end end |
#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object
6 7 8 9 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 6 def parse(bibxml, url: nil, is_relation: false, ver: nil) doc = Nokogiri::XML bibxml fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver end |
#person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>
186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 186 def person(, reference) # reference.xpath("./front/author[@surname]|./front/author[@fullname]") # .map do |author| entity = Person.new( name: full_name(, reference), affiliation: affiliation(), contact: contacts(.at("./address")), ) { entity: entity, role: [contributor_role()] } # end end |
#relations(reference) ⇒ Hash
297 298 299 300 301 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 297 def relations(reference) reference.xpath("reference").map do |ref| { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) } end end |
#series(reference) ⇒ Array<RelatonBib::Series>
Extract series form reference
348 349 350 351 352 353 354 355 356 357 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 348 def series(reference) reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si| next if si[:name] == "DOI" || si[:stream] || si[:status] t = TypedTitleString.new( content: si[:name], language: language(reference), script: "Latn", ) Series.new(title: t, number: si[:value], type: "main") end.compact end |
#status(reference) ⇒ RelatonBib::DocumentStatus
extract status
118 119 120 121 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 118 def status(reference) st = reference.at("./seriesinfo[@status]") DocumentStatus.new(stage: st[:status]) if st end |
#titles(reference) ⇒ Array<Hash>
142 143 144 145 146 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 142 def titles(reference) reference.xpath("./front/title").map do |title| { content: title.text, language: language(reference), script: "Latn" } end end |