Module: RelatonBib::BibXMLParser
Constant Summary collapse
- SERIESINFONAMES =
["DOI", "Internet-Draft"].freeze
- FLAVOR =
nil
Instance Method Summary collapse
- #abstracts(ref) ⇒ Array<RelatonBib::FormattedString>
- #add_contact(conts, type, value) ⇒ Object
- #address(postal) ⇒ Object
- #affiliation(author) ⇒ Array<RelatonBib::Affiliation>
- #bib_item(**attrs) ⇒ RelatonBib::BibliographicItem
- #committee(wgr) ⇒ RelatonBib::TechnicalCommittee
- #contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>
- #contributor_role(author) ⇒ Hash
- #contributors(reference) ⇒ Array<Hash>
-
#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>
Extract date from reference.
-
#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>
Extract document identifiers from reference.
- #docnumber(reference) ⇒ Object
- #doctype(anchor) ⇒ String
- #editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?
- #fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem
- #formattedref(reference) ⇒ RelatonBib::FormattedRef?
- #full_name(author, reference) ⇒ RelatonBib::FullName
- #language(reference) ⇒ String
- #link(reference, url, ver) ⇒ Array<Hash>
- #localized_string(content, lang) ⇒ RelatonBib::LocalizedString?
- #month(mon) ⇒ Object
- #new_org(name, abbr) ⇒ RelatonBib::Organization
- #organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>
- #parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object
- #person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>
- #relations(reference) ⇒ Hash
-
#series(reference) ⇒ Array<RelatonBib::Series>
Extract series form reference.
-
#status(reference) ⇒ RelatonBib::DocumentStatus
extract status.
- #titles(reference) ⇒ Array<Hash>
Instance Method Details
#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>
148 149 150 151 152 153 154 155 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 148 def abstracts(ref) ref.xpath("./front/abstract").map do |a| c = a.children.to_s.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2') .gsub(/[\t\n]/, " ").squeeze " " FormattedString.new(content: c, language: language(ref), script: "Latn", format: "text/html") end end |
#add_contact(conts, type, value) ⇒ Object
270 271 272 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 270 def add_contact(conts, type, value) conts << Contact.new(type: type, value: value.text) if value end |
#address(postal) ⇒ Object
254 255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 254 def address(postal) # rubocop:disable Metrics/CyclomaticComplexity street = [ (postal.at("./postalLine") || postal.at("./street"))&.text, ].compact Address.new( street: street, city: postal.at("./city")&.text, postcode: postal.at("./code")&.text, country: postal.at("./country")&.text, state: postal.at("./region")&.text, ) end |
#affiliation(author) ⇒ Array<RelatonBib::Affiliation>
215 216 217 218 219 220 221 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 215 def affiliation() o = .at("./organization") return [] if o.nil? || o.text.empty? org = new_org o.text, o[:abbrev] [Affiliation.new(organization: org)] end |
#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem
49 50 51 52 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 49 def bib_item(**attrs) # attrs[:place] = ["Fremont, CA"] BibliographicItem.new(**attrs) end |
#committee(wgr) ⇒ RelatonBib::TechnicalCommittee
316 317 318 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 316 def committee(wgr) TechnicalCommittee.new wgr end |
#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>
240 241 242 243 244 245 246 247 248 249 250 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 240 def contacts(addr) conts = [] return conts unless addr postal = addr.at("./postal") conts << address(postal) if postal add_contact(conts, "phone", addr.at("./phone")) add_contact(conts, "email", addr.at("./email")) add_contact(conts, "uri", addr.at("./uri")) conts end |
#contributor_role(author) ⇒ Hash
276 277 278 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 276 def contributor_role() { type: [:role] || "author" } end |
#contributors(reference) ⇒ Array<Hash>
159 160 161 162 163 164 165 166 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 159 def contributors(reference) reference.xpath("./front/author").map do |contrib| if contrib[:fullname] || contrib[:surname] then person(contrib, reference) else organization(contrib) end end.compact # persons(reference) + organizations(reference) end |
#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>
Extract date from reference.
294 295 296 297 298 299 300 301 302 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 294 def dates(reference) return unless (date = reference.at "./front/date") d = [date[:year], month(date[:month]), (date[:day] || 1)].compact.join "-" date = Time.parse(d).strftime "%Y-%m-%d" [BibliographicDate.new(type: "published", on: date)] rescue ArgumentError [] end |
#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>
Extract document identifiers from reference
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 68 def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize ret = [] sfid = reference.at("./seriesInfo[@name='#{self::FLAVOR}']", "./front/seriesInfo[@name='#{self::FLAVOR}']") if sfid type = sfid[:name] id = sfid[:value] scope = "series" else # if self::FLAVOR id, scope = if reference[:anchor] then [reference[:anchor], "anchor"] elsif reference[:docName] then [reference[:docName], "docName"] elsif reference[:number] then [reference[:number], "number"] end id&.match(/^(3GPP|W3C|[A-Z]{2,})(?:\.(?=[A-Z])|(?=\d))/) type = self::FLAVOR || ($~ && $~[1]) end ret << DocumentIdentifier.new(type: type, id: id, scope: scope) if id # if (id = reference[:anchor]) # ret << DocumentIdentifier.new(type: "rfc-anchor", id: id) # end ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si| next unless SERIESINFONAMES.include? si[:name] id = si[:value] id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft" DocumentIdentifier.new(id: id, type: si[:name]) end.compact end |
#docnumber(reference) ⇒ Object
43 44 45 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 43 def docnumber(reference) reference[:anchor]&.sub(/^\w+\./, "") end |
#doctype(anchor) ⇒ String
346 347 348 349 350 351 352 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 346 def doctype(anchor) case anchor when /I-D/ then "internet-draft" when /IEEE/ then "ieee" else "rfc" end end |
#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?
306 307 308 309 310 311 312 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 306 def editorialgroup(reference) tc = reference.xpath("./front/workgroup").map do |ed| wg = WorkGroup.new name: ed.text committee wg end EditorialGroup.new tc if tc.any? end |
#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 16 def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength return unless reference hash = { is_relation: is_relation, docnumber: docnumber(reference), type: "standard", docid: docids(reference, ver), status: status(reference), language: [language(reference)], script: ["Latn"], link: link(reference, url, ver), title: titles(reference), formattedref: formattedref(reference), abstract: abstracts(reference), contributor: contributors(reference), relation: relations(reference), date: dates(reference), editorialgroup: editorialgroup(reference), series: series(reference), keyword: reference.xpath("front/keyword").map(&:text), doctype: doctype(reference[:anchor]), } # hash[:fetched] = Date.today.to_s unless is_relation bib_item(**hash) end |
#formattedref(reference) ⇒ RelatonBib::FormattedRef?
135 136 137 138 139 140 141 142 143 144 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 135 def formattedref(reference) return if reference.at "./front/title" cont = (reference[:anchor] || reference[:docName] || reference[:number]) if cont FormattedRef.new( content: cont, language: language(reference), script: "Latn", ) end end |
#full_name(author, reference) ⇒ RelatonBib::FullName
204 205 206 207 208 209 210 211 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 204 def full_name(, reference) lang = language reference FullName.new( completename: localized_string([:fullname], lang), initial: [localized_string([:initials], lang)].compact, surname: localized_string([:surname], lang), ) end |
#language(reference) ⇒ String
56 57 58 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 56 def language(reference) reference[:lang] || "en" end |
#link(reference, url, ver) ⇒ Array<Hash>
112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 112 def link(reference, url, ver) l = [] l << { type: "xml", content: url } if url l << { type: "src", content: reference[:target] } if reference[:target] if /^I-D/.match? reference[:anchor] reference.xpath("format").each do |f| c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target] l << { type: f[:type], content: c } end end l end |
#localized_string(content, lang) ⇒ RelatonBib::LocalizedString?
234 235 236 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 234 def localized_string(content, lang) LocalizedString.new(content, lang) if content end |
#month(mon) ⇒ Object
320 321 322 323 324 325 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 320 def month(mon) return 1 if !mon || mon.empty? return mon if /^\d+$/.match? mon Date::MONTHNAMES.index(mon) end |
#new_org(name, abbr) ⇒ RelatonBib::Organization
226 227 228 229 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 226 def new_org(name, abbr) # (name = "Internet Engineering Task Force", abbr = "IETF") Organization.new name: name, abbreviation: abbr end |
#organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>
186 187 188 189 190 191 192 193 194 195 196 197 198 199 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 186 def organization(contrib) # publisher = { entity: new_org, role: [type: "publisher"] } # orgs = reference.xpath("./seriesinfo").reduce([]) do |mem, si| # next mem unless si[:stream] # mem << { entity: new_org(si[:stream], nil), role: [type: "author"] } # end # orgs + reference.xpath( # "front/author[not(@surname)][not(@fullname)]/organization", # ).map do |org| org = contrib.at("./organization") { entity: new_org(org.text, org[:abbrev]), role: [contributor_role(contrib)] } # end end |
#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object
6 7 8 9 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 6 def parse(bibxml, url: nil, is_relation: false, ver: nil) doc = Nokogiri::XML bibxml fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver end |
#person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>
171 172 173 174 175 176 177 178 179 180 181 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 171 def person(, reference) # reference.xpath("./front/author[@surname]|./front/author[@fullname]") # .map do |author| entity = Person.new( name: full_name(, reference), affiliation: affiliation(), contact: contacts(.at("./address")), ) { entity: entity, role: [contributor_role()] } # end end |
#relations(reference) ⇒ Hash
282 283 284 285 286 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 282 def relations(reference) reference.xpath("reference").map do |ref| { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) } end end |
#series(reference) ⇒ Array<RelatonBib::Series>
Extract series form reference
333 334 335 336 337 338 339 340 341 342 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 333 def series(reference) reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si| next if si[:name] == "DOI" || si[:stream] || si[:status] t = TypedTitleString.new( content: si[:name], language: language(reference), script: "Latn", ) Series.new(title: t, number: si[:value], type: "main") end.compact end |
#status(reference) ⇒ RelatonBib::DocumentStatus
extract status
103 104 105 106 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 103 def status(reference) st = reference.at("./seriesinfo[@status]") DocumentStatus.new(stage: st[:status]) if st end |
#titles(reference) ⇒ Array<Hash>
127 128 129 130 131 |
# File 'lib/relaton_bib/bibxml_parser.rb', line 127 def titles(reference) reference.xpath("./front/title").map do |title| { content: title.text, language: language(reference), script: "Latn" } end end |