Module: RelatonBib::BibXMLParser

Extended by:
BibXMLParser
Included in:
BibXMLParser
Defined in:
lib/relaton_bib/bibxml_parser.rb

Constant Summary collapse

SERIESINFONAMES =

SeriesInfo what should be saved as docidentifiers in the Relaton model.

["DOI"].freeze
RFCPREFIXES =
%w[RFC BCP FYI STD].freeze
FLAVOR =
nil
ORGNAMES =
{
  "IEEE" => "Institute of Electrical and Electronics Engineers",
  "W3C" => "World Wide Web Consortium",
  "3GPP" => "3rd Generation Partnership Project",
}.freeze

Instance Method Summary collapse

Instance Method Details

#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



205
206
207
208
209
210
211
212
# File 'lib/relaton_bib/bibxml_parser.rb', line 205

def abstracts(ref)
  ref.xpath("./front/abstract").map do |a|
    c = a.inner_html.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2')
      .gsub(/[\t\n]/, " ").squeeze " "
    FormattedString.new(content: c, language: language(ref), script: "Latn",
                        format: "text/html")
  end
end

#add_contact(conts, type, value) ⇒ Object

Parameters:



334
335
336
# File 'lib/relaton_bib/bibxml_parser.rb', line 334

def add_contact(conts, type, value)
  conts << Contact.new(type: type, value: value.text) if value
end

#address(postal) ⇒ Object

Parameters:

  • postal (Nokogiri::XML::Element)


320
321
322
323
324
325
326
327
328
329
# File 'lib/relaton_bib/bibxml_parser.rb', line 320

def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
  street = [postal.at("./postalLine | ./street")&.text].compact
  Address.new(
    street: street,
    city: postal.at("./city")&.text,
    postcode: postal.at("./code")&.text,
    country: postal.at("./country")&.text,
    state: postal.at("./region")&.text,
  )
end

#affiliation(author) ⇒ Array<RelatonBib::Affiliation>

Parameters:

  • author (Nokogiri::XML::Element)

Returns:



281
282
283
284
285
286
287
# File 'lib/relaton_bib/bibxml_parser.rb', line 281

def affiliation(author)
  o = author.at("./organization")
  return [] if o.nil? || o.text.empty?

  org = new_org o.text, o[:abbrev]
  [Affiliation.new(organization: org)]
end

#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem

Parameters:

  • attrs (Hash)

Returns:



68
69
70
71
# File 'lib/relaton_bib/bibxml_parser.rb', line 68

def bib_item(**attrs)
  # attrs[:place] = ["Fremont, CA"]
  BibliographicItem.new(**attrs)
end

#committee(wgr) ⇒ RelatonBib::TechnicalCommittee



381
382
383
# File 'lib/relaton_bib/bibxml_parser.rb', line 381

def committee(wgr)
  TechnicalCommittee.new wgr
end

#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>

Parameters:

  • postal (Nokogiri::XML::Element)

Returns:



306
307
308
309
310
311
312
313
314
315
316
# File 'lib/relaton_bib/bibxml_parser.rb', line 306

def contacts(addr)
  conts = []
  return conts unless addr

  postal = addr.at("./postal")
  conts << address(postal) if postal&.at("./city") && postal&.at("./country")
  add_contact(conts, "phone", addr.at("./phone"))
  add_contact(conts, "email", addr.at("./email"))
  add_contact(conts, "uri", addr.at("./uri"))
  conts
end

#contributor_role(author) ⇒ Hash

Parameters:

  • author (Nokogiri::XML::Document)

Returns:

  • (Hash)


340
341
342
# File 'lib/relaton_bib/bibxml_parser.rb', line 340

def contributor_role(author)
  { type: author[:role] || "author" }
end

#contributors(reference) ⇒ Array<Hash>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



216
217
218
219
220
221
222
223
224
# File 'lib/relaton_bib/bibxml_parser.rb', line 216

def contributors(reference)
  lang = language reference
  reference.xpath("./front/author").map do |contrib|
    entity = person(contrib, lang) || organization(contrib)
    next unless entity

    { entity: entity, role: [contributor_role(contrib)] }
  end.compact
end

#create_docid(id, ver) ⇒ Object

rubocop:disable Metrics/MethodLength



122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/relaton_bib/bibxml_parser.rb', line 122

def create_docid(id, ver) # rubocop:disable Metrics/MethodLength
  pref, num = id_to_pref_num(id)
  if RFCPREFIXES.include?(pref)
    pid = "#{pref} #{num.sub(/^-?0+/, '')}"
    type = pubid_type id
  elsif %w[I-D draft].include?(pref)
    pid = "draft-#{num}"
    pid.sub!(/(?<=-)\d{2}$/, ver) if ver
    type = "Internet-Draft"
  else
    pid = pref ? "#{pref} #{num}" : id
    type = pubid_type id
  end
  DocumentIdentifier.new(type: type, id: pid, primary: true)
end

#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>

Extract date from reference.

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



358
359
360
361
362
363
364
365
366
367
# File 'lib/relaton_bib/bibxml_parser.rb', line 358

def dates(reference) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize
  date = reference.at "./front/date"
  return [] if date.nil? || date[:year].nil? || date[:year].empty?

  d = date[:year]
  d += "-#{month(date[:month])}" if date[:month] && !date[:month].empty?
  d += "-#{date[:day]}" if date[:day]
  # date = Time.parse(d).strftime "%Y-%m-%d"
  [BibliographicDate.new(type: "published", on: d)]
end

#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>

Extract document identifiers from reference

Parameters:

  • reference (Nokogiri::XML::Element)
  • ver (String, nil)

    Internet Draft version

Returns:



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/relaton_bib/bibxml_parser.rb', line 87

def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
  ret = []
  si = reference.at("./seriesInfo[@name='Internet-Draft']",
                    "./front/seriesInfo[@name='Internet-Draft']")
  if si
    id = si[:value]
    id.sub!(/(?<=-)\d{2}$/, ver) if ver
    ret << DocumentIdentifier.new(type: "Internet-Draft", id: id, primary: true)
  else
    id = reference[:anchor] || reference[:docName] || reference[:number]
    ret << create_docid(id, ver) if id
  end

  %w[anchor docName number].each do |atr|
    if reference[atr]
      pref, num = id_to_pref_num reference[atr]
      atrid = if atr == "anchor" && RFCPREFIXES.include?(pref)
                "#{pref}#{num.sub(/^-?0+/, '')}"
              else
                reference[atr]
              end
      type = pubid_type id
      ret << DocumentIdentifier.new(id: atrid, type: type, scope: atr)
    end
  end

  ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next unless SERIESINFONAMES.include? si[:name]

    id = si[:value]
    # id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft"
    DocumentIdentifier.new(id: id, type: si[:name])
  end.compact
end

#docnumber(reference) ⇒ Object



62
63
64
# File 'lib/relaton_bib/bibxml_parser.rb', line 62

def docnumber(reference)
  reference[:anchor]&.sub(/^\w+\./, "")
end

#doctype(anchor) ⇒ String

Parameters:

  • anchor (String)

Returns:

  • (String)


411
412
413
414
415
416
417
# File 'lib/relaton_bib/bibxml_parser.rb', line 411

def doctype(anchor)
  case anchor
  when /I-D/ then "internet-draft"
  when /IEEE/ then "ieee"
  else "rfc"
  end
end

#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



371
372
373
374
375
376
377
# File 'lib/relaton_bib/bibxml_parser.rb', line 371

def editorialgroup(reference)
  tc = reference.xpath("./front/workgroup").map do |ed|
    wg = WorkGroup.new name: ed.text
    committee wg
  end
  EditorialGroup.new tc if tc.any?
end

#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem

Parameters:

  • reference (Nokogiri::XML::Element, nil)
  • is_relation (Boolean) (defaults to: false)

    don’t add fetched date for relation if true

  • url (String, nil) (defaults to: nil)
  • ver (String, nil) (defaults to: nil)

    Internet Draft version

Returns:



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/relaton_bib/bibxml_parser.rb', line 35

def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  return unless reference

  hash = {
    is_relation: is_relation,
    docnumber: docnumber(reference),
    type: "standard",
    docid: docids(reference, ver),
    status: status(reference),
    language: [language(reference)],
    script: ["Latn"],
    link: link(reference, url, ver),
    title: titles(reference),
    formattedref: formattedref(reference),
    abstract: abstracts(reference),
    contributor: contributors(reference),
    relation: relations(reference),
    date: dates(reference),
    editorialgroup: editorialgroup(reference),
    series: series(reference),
    keyword: reference.xpath("front/keyword").map(&:text),
    doctype: doctype(reference[:anchor]),
  }
  # hash[:fetched] = Date.today.to_s unless is_relation
  bib_item(**hash)
end

#forename(initials, lang = nil, script = nil) ⇒ Array<RelatonBib::Forename>

Create forenames with initials

Parameters:

  • initials (String)

    initials

  • lang (String) (defaults to: nil)

    language

Returns:



271
272
273
274
275
276
277
# File 'lib/relaton_bib/bibxml_parser.rb', line 271

def forename(initials, lang = nil, script = nil)
  return [] unless initials

  initials.split(/\.-?\s?|\s/).map do |i|
    Forename.new(initial: i, language: lang, script: script)
  end
end

#formattedref(reference) ⇒ RelatonBib::FormattedRef?

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



192
193
194
195
196
197
198
199
200
201
# File 'lib/relaton_bib/bibxml_parser.rb', line 192

def formattedref(reference)
  return if reference.at "./front/title"

  cont = (reference[:anchor] || reference[:docName] || reference[:number])
  if cont
    FormattedRef.new(
      content: cont, language: language(reference), script: "Latn",
    )
  end
end

#full_name(fname, sname, inits, lang) ⇒ RelatonBib::FullName

Parameters:

  • fname (String)

    full name

  • sname (String)

    surname

  • inits (String)

    initials

  • lang (String)

    language

Returns:



255
256
257
258
259
260
261
# File 'lib/relaton_bib/bibxml_parser.rb', line 255

def full_name(fname, sname, inits, lang)
  initials = localized_string(inits, lang) if inits
  FullName.new(
    completename: localized_string(fname, lang), initials: initials,
    forename: forename(inits, lang), surname: localized_string(sname, lang)
  )
end

#id_to_pref_num(id) ⇒ Object



138
139
140
141
# File 'lib/relaton_bib/bibxml_parser.rb', line 138

def id_to_pref_num(id)
  tn = /^(?<pref>I-D|draft|3GPP|W3C|[A-Z]{2,})[._-]?(?<num>.+)/.match id
  tn && tn.to_a[1..2]
end

#language(reference) ⇒ String

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:

  • (String)


75
76
77
# File 'lib/relaton_bib/bibxml_parser.rb', line 75

def language(reference)
  reference[:lang] || "en"
end

Parameters:

  • reference (Nokogiri::XML::Element)
  • url (String)
  • ver (String, nil)

    Internet Draft version

Returns:



169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/relaton_bib/bibxml_parser.rb', line 169

def link(reference, url, ver)
  l = []
  l << { type: "xml", content: url } if url
  l << { type: "src", content: reference[:target] } if reference[:target]
  if /^I-D/.match? reference[:anchor]
    reference.xpath("format").each do |f|
      c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
      l << { type: f[:type], content: c }
    end
  end
  l
end

#localized_string(content, lang, script = nil) ⇒ RelatonBib::LocalizedString?

Parameters:

  • content (String, nil)
  • lang (String, nil)
  • script (String, nil) (defaults to: nil)

Returns:



300
301
302
# File 'lib/relaton_bib/bibxml_parser.rb', line 300

def localized_string(content, lang, script = nil)
  LocalizedString.new(content, lang, script) if content
end

#month(mon) ⇒ Object



385
386
387
388
389
390
# File 'lib/relaton_bib/bibxml_parser.rb', line 385

def month(mon)
  # return 1 if !mon || mon.empty?
  return mon if /^\d+$/.match? mon

  Date::MONTHNAMES.index { |m| m&.include? mon }.to_s.rjust 2, "0"
end

#new_org(name, abbr = nil) ⇒ RelatonBib::Organization

Parameters:

  • name (String)
  • abbr (String, nil) (defaults to: nil)

Returns:



292
293
294
# File 'lib/relaton_bib/bibxml_parser.rb', line 292

def new_org(name, abbr = nil)
  Organization.new name: name, abbreviation: abbr
end

#organization(contrib) ⇒ RelatonBib::Organization?

Parameters:

  • contrib (Nokogiri::XML::Element)

Returns:



239
240
241
242
243
244
245
246
247
248
# File 'lib/relaton_bib/bibxml_parser.rb', line 239

def organization(contrib)
  org = contrib.at("./organization")
  return unless org

  orgname = org.text.strip
  return if orgname.empty?

  name = ORGNAMES[orgname] || orgname
  new_org name, org[:abbrev]
end

#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ <Type>

Parse BibXML content

Parameters:

  • bibxml (String)

    content

  • url (String, nil) (defaults to: nil)

    source URL

  • is_relation (Boolean) (defaults to: false)

    true if the content is relation item

  • ver (String, nil) (defaults to: nil)

    version

Returns:

  • (<Type>)

    <description>



25
26
27
28
# File 'lib/relaton_bib/bibxml_parser.rb', line 25

def parse(bibxml, url: nil, is_relation: false, ver: nil)
  doc = Nokogiri::XML bibxml
  fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver
end

#person(author, lang) ⇒ RelatonBib::Person?

Parameters:

  • author (Nokogiri::XML::Element)
  • lang (String)

Returns:



229
230
231
232
233
234
235
# File 'lib/relaton_bib/bibxml_parser.rb', line 229

def person(author, lang)
  return unless author[:fullname] || author[:surname]

  name = full_name(author[:fullname], author[:surname], author[:initials], lang)
  Person.new(name: name, affiliation: affiliation(author),
             contact: contacts(author.at("./address")))
end

#pubid_type(id) ⇒ String

Extract document identifier type from identifier

Parameters:

  • id (String)

    identifier

Returns:

  • (String)


150
151
152
# File 'lib/relaton_bib/bibxml_parser.rb', line 150

def pubid_type(id)
  id_to_pref_num(id)&.first
end

#relations(reference) ⇒ Hash

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:

  • (Hash)


346
347
348
349
350
# File 'lib/relaton_bib/bibxml_parser.rb', line 346

def relations(reference)
  reference.xpath("reference").map do |ref|
    { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
  end
end

#series(reference) ⇒ Array<RelatonBib::Series>

Extract series form reference

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



398
399
400
401
402
403
404
405
406
407
# File 'lib/relaton_bib/bibxml_parser.rb', line 398

def series(reference)
  reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next if SERIESINFONAMES.include?(si[:name]) || si[:stream] || si[:status]

    t = TypedTitleString.new(
      content: si[:name], language: language(reference), script: "Latn",
    )
    Series.new(title: t, number: si[:value], type: "main")
  end.compact
end

#status(reference) ⇒ RelatonBib::DocumentStatus

extract status

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



160
161
162
163
# File 'lib/relaton_bib/bibxml_parser.rb', line 160

def status(reference)
  st = reference.at("./seriesinfo[@status]")
  DocumentStatus.new(stage: st[:status]) if st
end

#titles(reference) ⇒ Array<Hash>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



184
185
186
187
188
# File 'lib/relaton_bib/bibxml_parser.rb', line 184

def titles(reference)
  reference.xpath("./front/title").map do |title|
    { content: title.text, language: language(reference), script: "Latn" }
  end
end