Module: RelatonBib::BibXMLParser

Extended by:
BibXMLParser
Included in:
BibXMLParser
Defined in:
lib/relaton_bib/bibxml_parser.rb

Constant Summary collapse

SERIESINFONAMES =

SeriesInfo what should be saved as docidentifiers in the Relaton model.

["DOI"].freeze
RFCPREFIXES =
%w[RFC BCP FYI STD].freeze
FLAVOR =
nil
ORGNAMES =
{
  "IEEE" => "Istitute of Electrical and Electronics Engineers",
  "W3C" => "World Wide Web Consortium",
  "3GPP" => "3rd Generation Partnership Project",
}.freeze

Instance Method Summary collapse

Instance Method Details

#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



195
196
197
198
199
200
201
202
# File 'lib/relaton_bib/bibxml_parser.rb', line 195

def abstracts(ref)
  ref.xpath("./front/abstract").map do |a|
    c = a.children.to_s.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2')
      .gsub(/[\t\n]/, " ").squeeze " "
    FormattedString.new(content: c, language: language(ref), script: "Latn",
                        format: "text/html")
  end
end

#add_contact(conts, type, value) ⇒ Object

Parameters:



318
319
320
# File 'lib/relaton_bib/bibxml_parser.rb', line 318

def add_contact(conts, type, value)
  conts << Contact.new(type: type, value: value.text) if value
end

#address(postal) ⇒ Object

Parameters:

  • postal (Nokogiri::XML::Element)


302
303
304
305
306
307
308
309
310
311
312
313
# File 'lib/relaton_bib/bibxml_parser.rb', line 302

def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
  street = [
    (postal.at("./postalLine") || postal.at("./street"))&.text,
  ].compact
  Address.new(
    street: street,
    city: postal.at("./city")&.text,
    postcode: postal.at("./code")&.text,
    country: postal.at("./country")&.text,
    state: postal.at("./region")&.text,
  )
end

#affiliation(author) ⇒ Array<RelatonBib::Affiliation>

Parameters:

  • author (Nokogiri::XML::Element)

Returns:



263
264
265
266
267
268
269
# File 'lib/relaton_bib/bibxml_parser.rb', line 263

def affiliation(author)
  o = author.at("./organization")
  return [] if o.nil? || o.text.empty?

  org = new_org o.text, o[:abbrev]
  [Affiliation.new(organization: org)]
end

#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem

Parameters:

  • attrs (Hash)

Returns:



58
59
60
61
# File 'lib/relaton_bib/bibxml_parser.rb', line 58

def bib_item(**attrs)
  # attrs[:place] = ["Fremont, CA"]
  BibliographicItem.new(**attrs)
end

#committee(wgr) ⇒ RelatonBib::TechnicalCommittee



365
366
367
# File 'lib/relaton_bib/bibxml_parser.rb', line 365

def committee(wgr)
  TechnicalCommittee.new wgr
end

#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>

Parameters:

  • postal (Nokogiri::XML::Element)

Returns:



288
289
290
291
292
293
294
295
296
297
298
# File 'lib/relaton_bib/bibxml_parser.rb', line 288

def contacts(addr)
  conts = []
  return conts unless addr

  postal = addr.at("./postal")
  conts << address(postal) if postal
  add_contact(conts, "phone", addr.at("./phone"))
  add_contact(conts, "email", addr.at("./email"))
  add_contact(conts, "uri", addr.at("./uri"))
  conts
end

#contributor_role(author) ⇒ Hash

Parameters:

  • author (Nokogiri::XML::Document)

Returns:

  • (Hash)


324
325
326
# File 'lib/relaton_bib/bibxml_parser.rb', line 324

def contributor_role(author)
  { type: author[:role] || "author" }
end

#contributors(reference) ⇒ Array<Hash>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



206
207
208
209
210
211
212
213
# File 'lib/relaton_bib/bibxml_parser.rb', line 206

def contributors(reference)
  reference.xpath("./front/author").map do |contrib|
    if contrib[:fullname] || contrib[:surname] then person(contrib, reference)
    else organization(contrib)
    end
  end.compact
  # persons(reference) + organizations(reference)
end

#create_docid(id, ver) ⇒ Object

rubocop:disable Metrics/MethodLength



112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/relaton_bib/bibxml_parser.rb', line 112

def create_docid(id, ver) # rubocop:disable Metrics/MethodLength
  pref, num = id_to_pref_num(id)
  if RFCPREFIXES.include?(pref)
    pid = "#{pref} #{num.sub(/^-?0+/, '')}"
    type = pubid_type id
  elsif %w[I-D draft].include?(pref)
    pid = "draft-#{num}"
    pid.sub!(/(?<=-)\d{2}$/, ver) if ver
    type = "Internet-Draft"
  else
    pid = pref ? "#{pref} #{num}" : id
    type = pubid_type id
  end
  DocumentIdentifier.new(type: type, id: pid, primary: true)
end

#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>

Extract date from reference.

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



342
343
344
345
346
347
348
349
350
351
# File 'lib/relaton_bib/bibxml_parser.rb', line 342

def dates(reference) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/AbcSize
  date = reference.at "./front/date"
  return [] if date.nil? || date[:year].nil? || date[:year].empty?

  d = date[:year]
  d += "-#{month(date[:month])}" if date[:month] && !date[:month].empty?
  d += "-#{date[:day]}" if date[:day]
  # date = Time.parse(d).strftime "%Y-%m-%d"
  [BibliographicDate.new(type: "published", on: d)]
end

#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>

Extract document identifiers from reference

Parameters:

  • reference (Nokogiri::XML::Element)
  • ver (String, nil)

    Internet Draft version

Returns:



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/relaton_bib/bibxml_parser.rb', line 77

def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
  ret = []
  si = reference.at("./seriesInfo[@name='Internet-Draft']",
                    "./front/seriesInfo[@name='Internet-Draft']")
  if si
    id = si[:value]
    id.sub!(/(?<=-)\d{2}$/, ver) if ver
    ret << DocumentIdentifier.new(type: "Internet-Draft", id: id, primary: true)
  else
    id = reference[:anchor] || reference[:docName] || reference[:number]
    ret << create_docid(id, ver) if id
  end

  %w[anchor docName number].each do |atr|
    if reference[atr]
      pref, num = id_to_pref_num reference[atr]
      atrid = if atr == "anchor" && RFCPREFIXES.include?(pref)
                "#{pref}#{num.sub(/^-?0+/, '')}"
              else
                reference[atr]
              end
      type = pubid_type id
      ret << DocumentIdentifier.new(id: atrid, type: type, scope: atr)
    end
  end

  ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next unless SERIESINFONAMES.include? si[:name]

    id = si[:value]
    # id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft"
    DocumentIdentifier.new(id: id, type: si[:name])
  end.compact
end

#docnumber(reference) ⇒ Object



52
53
54
# File 'lib/relaton_bib/bibxml_parser.rb', line 52

def docnumber(reference)
  reference[:anchor]&.sub(/^\w+\./, "")
end

#doctype(anchor) ⇒ String

Parameters:

  • anchor (String)

Returns:

  • (String)


395
396
397
398
399
400
401
# File 'lib/relaton_bib/bibxml_parser.rb', line 395

def doctype(anchor)
  case anchor
  when /I-D/ then "internet-draft"
  when /IEEE/ then "ieee"
  else "rfc"
  end
end

#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



355
356
357
358
359
360
361
# File 'lib/relaton_bib/bibxml_parser.rb', line 355

def editorialgroup(reference)
  tc = reference.xpath("./front/workgroup").map do |ed|
    wg = WorkGroup.new name: ed.text
    committee wg
  end
  EditorialGroup.new tc if tc.any?
end

#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem

Parameters:

  • reference (Nokogiri::XML::Element, nil)
  • is_relation (Boolean) (defaults to: false)

    don’t add fetched date for relation if true

  • url (String, nil) (defaults to: nil)
  • ver (String, nil) (defaults to: nil)

    Internet Draft version

Returns:



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/relaton_bib/bibxml_parser.rb', line 25

def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  return unless reference

  hash = {
    is_relation: is_relation,
    docnumber: docnumber(reference),
    type: "standard",
    docid: docids(reference, ver),
    status: status(reference),
    language: [language(reference)],
    script: ["Latn"],
    link: link(reference, url, ver),
    title: titles(reference),
    formattedref: formattedref(reference),
    abstract: abstracts(reference),
    contributor: contributors(reference),
    relation: relations(reference),
    date: dates(reference),
    editorialgroup: editorialgroup(reference),
    series: series(reference),
    keyword: reference.xpath("front/keyword").map(&:text),
    doctype: doctype(reference[:anchor]),
  }
  # hash[:fetched] = Date.today.to_s unless is_relation
  bib_item(**hash)
end

#formattedref(reference) ⇒ RelatonBib::FormattedRef?

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



182
183
184
185
186
187
188
189
190
191
# File 'lib/relaton_bib/bibxml_parser.rb', line 182

def formattedref(reference)
  return if reference.at "./front/title"

  cont = (reference[:anchor] || reference[:docName] || reference[:number])
  if cont
    FormattedRef.new(
      content: cont, language: language(reference), script: "Latn",
    )
  end
end

#full_name(author, reference) ⇒ RelatonBib::FullName

Parameters:

  • author (Nokogiri::XML::Element)
  • reference (Nokogiri::XML::Element)

Returns:



252
253
254
255
256
257
258
259
# File 'lib/relaton_bib/bibxml_parser.rb', line 252

def full_name(author, reference)
  lang = language reference
  FullName.new(
    completename: localized_string(author[:fullname], lang),
    initial: [localized_string(author[:initials], lang)].compact,
    surname: localized_string(author[:surname], lang),
  )
end

#id_to_pref_num(id) ⇒ Object



128
129
130
131
# File 'lib/relaton_bib/bibxml_parser.rb', line 128

def id_to_pref_num(id)
  tn = /^(?<pref>I-D|draft|3GPP|W3C|[A-Z]{2,})[._-]?(?<num>.+)/.match id
  tn && tn.to_a[1..2]
end

#language(reference) ⇒ String

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:

  • (String)


65
66
67
# File 'lib/relaton_bib/bibxml_parser.rb', line 65

def language(reference)
  reference[:lang] || "en"
end

Parameters:

  • reference (Nokogiri::XML::Element)
  • url (String)
  • ver (String, nil)

    Internet Draft version

Returns:



159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/relaton_bib/bibxml_parser.rb', line 159

def link(reference, url, ver)
  l = []
  l << { type: "xml", content: url } if url
  l << { type: "src", content: reference[:target] } if reference[:target]
  if /^I-D/.match? reference[:anchor]
    reference.xpath("format").each do |f|
      c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
      l << { type: f[:type], content: c }
    end
  end
  l
end

#localized_string(content, lang) ⇒ RelatonBib::LocalizedString?

Parameters:

  • content (String, nil)
  • lang (String, nil)

Returns:



282
283
284
# File 'lib/relaton_bib/bibxml_parser.rb', line 282

def localized_string(content, lang)
  LocalizedString.new(content, lang) if content
end

#month(mon) ⇒ Object



369
370
371
372
373
374
# File 'lib/relaton_bib/bibxml_parser.rb', line 369

def month(mon)
  # return 1 if !mon || mon.empty?
  return mon if /^\d+$/.match? mon

  Date::MONTHNAMES.index { |m| m&.include? mon }.to_s.rjust 2, "0"
end

#new_org(name, abbr) ⇒ RelatonBib::Organization

Parameters:

  • name (String)
  • abbr (String)

Returns:



274
275
276
277
# File 'lib/relaton_bib/bibxml_parser.rb', line 274

def new_org(name, abbr)
  # (name = "Internet Engineering Task Force", abbr = "IETF")
  Organization.new name: name, abbreviation: abbr
end

#organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>

Parameters:

  • contrib (Nokogiri::XML::Element)

Returns:



233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/relaton_bib/bibxml_parser.rb', line 233

def organization(contrib)
  # publisher = { entity: new_org, role: [type: "publisher"] }
  # orgs = reference.xpath("./seriesinfo").reduce([]) do |mem, si|
  #   next mem unless si[:stream]

  #   mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
  # end
  # orgs + reference.xpath(
  #   "front/author[not(@surname)][not(@fullname)]/organization",
  # ).map do |org|
  org = contrib.at("./organization")
  name = ORGNAMES[org.text] || org.text
  { entity: new_org(name, org[:abbrev]), role: [contributor_role(contrib)] }
  # end
end

#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object



15
16
17
18
# File 'lib/relaton_bib/bibxml_parser.rb', line 15

def parse(bibxml, url: nil, is_relation: false, ver: nil)
  doc = Nokogiri::XML bibxml
  fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver
end

#person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>

Parameters:

  • author (Nokogiri::XML::Element)
  • reference (Nokogiri::XML::Element)

Returns:



218
219
220
221
222
223
224
225
226
227
228
# File 'lib/relaton_bib/bibxml_parser.rb', line 218

def person(author, reference)
  # reference.xpath("./front/author[@surname]|./front/author[@fullname]")
  #   .map do |author|
  entity = Person.new(
    name: full_name(author, reference),
    affiliation: affiliation(author),
    contact: contacts(author.at("./address")),
  )
  { entity: entity, role: [contributor_role(author)] }
  # end
end

#pubid_type(id) ⇒ String

Extract document identifier type from identifier

Parameters:

  • id (String)

    identifier

Returns:

  • (String)


140
141
142
# File 'lib/relaton_bib/bibxml_parser.rb', line 140

def pubid_type(id)
  id_to_pref_num(id)&.first
end

#relations(reference) ⇒ Hash

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:

  • (Hash)


330
331
332
333
334
# File 'lib/relaton_bib/bibxml_parser.rb', line 330

def relations(reference)
  reference.xpath("reference").map do |ref|
    { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
  end
end

#series(reference) ⇒ Array<RelatonBib::Series>

Extract series form reference

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



382
383
384
385
386
387
388
389
390
391
# File 'lib/relaton_bib/bibxml_parser.rb', line 382

def series(reference)
  reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next if SERIESINFONAMES.include?(si[:name]) || si[:stream] || si[:status]

    t = TypedTitleString.new(
      content: si[:name], language: language(reference), script: "Latn",
    )
    Series.new(title: t, number: si[:value], type: "main")
  end.compact
end

#status(reference) ⇒ RelatonBib::DocumentStatus

extract status

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



150
151
152
153
# File 'lib/relaton_bib/bibxml_parser.rb', line 150

def status(reference)
  st = reference.at("./seriesinfo[@status]")
  DocumentStatus.new(stage: st[:status]) if st
end

#titles(reference) ⇒ Array<Hash>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



174
175
176
177
178
# File 'lib/relaton_bib/bibxml_parser.rb', line 174

def titles(reference)
  reference.xpath("./front/title").map do |title|
    { content: title.text, language: language(reference), script: "Latn" }
  end
end