Module: RelatonBib::BibXMLParser

Extended by:
BibXMLParser
Included in:
BibXMLParser
Defined in:
lib/relaton_bib/bibxml_parser.rb

Constant Summary collapse

SERIESINFONAMES =
["DOI", "Internet-Draft"].freeze
FLAVOR =
nil

Instance Method Summary collapse

Instance Method Details

#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



163
164
165
166
167
168
169
170
# File 'lib/relaton_bib/bibxml_parser.rb', line 163

def abstracts(ref)
  ref.xpath("./front/abstract").map do |a|
    c = a.children.to_s.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2')
      .gsub(/[\t\n]/, " ").squeeze " "
    FormattedString.new(content: c, language: language(ref), script: "Latn",
                        format: "text/html")
  end
end

#add_contact(conts, type, value) ⇒ Object

Parameters:



285
286
287
# File 'lib/relaton_bib/bibxml_parser.rb', line 285

def add_contact(conts, type, value)
  conts << Contact.new(type: type, value: value.text) if value
end

#address(postal) ⇒ Object

Parameters:

  • postal (Nokogiri::XML::Element)


269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/relaton_bib/bibxml_parser.rb', line 269

def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
  street = [
    (postal.at("./postalLine") || postal.at("./street"))&.text,
  ].compact
  Address.new(
    street: street,
    city: postal.at("./city")&.text,
    postcode: postal.at("./code")&.text,
    country: postal.at("./country")&.text,
    state: postal.at("./region")&.text,
  )
end

#affiliation(author) ⇒ Array<RelatonBib::Affiliation>

Parameters:

  • author (Nokogiri::XML::Element)

Returns:



230
231
232
233
234
235
236
# File 'lib/relaton_bib/bibxml_parser.rb', line 230

def affiliation(author)
  o = author.at("./organization")
  return [] if o.nil? || o.text.empty?

  org = new_org o.text, o[:abbrev]
  [Affiliation.new(organization: org)]
end

#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem

Parameters:

  • attrs (Hash)

Returns:



49
50
51
52
# File 'lib/relaton_bib/bibxml_parser.rb', line 49

def bib_item(**attrs)
  # attrs[:place] = ["Fremont, CA"]
  BibliographicItem.new(**attrs)
end

#committee(wgr) ⇒ RelatonBib::TechnicalCommittee



331
332
333
# File 'lib/relaton_bib/bibxml_parser.rb', line 331

def committee(wgr)
  TechnicalCommittee.new wgr
end

#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>

Parameters:

  • postal (Nokogiri::XML::Element)

Returns:



255
256
257
258
259
260
261
262
263
264
265
# File 'lib/relaton_bib/bibxml_parser.rb', line 255

def contacts(addr)
  conts = []
  return conts unless addr

  postal = addr.at("./postal")
  conts << address(postal) if postal
  add_contact(conts, "phone", addr.at("./phone"))
  add_contact(conts, "email", addr.at("./email"))
  add_contact(conts, "uri", addr.at("./uri"))
  conts
end

#contributor_role(author) ⇒ Hash

Parameters:

  • author (Nokogiri::XML::Document)

Returns:

  • (Hash)


291
292
293
# File 'lib/relaton_bib/bibxml_parser.rb', line 291

def contributor_role(author)
  { type: author[:role] || "author" }
end

#contributors(reference) ⇒ Array<Hash>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



174
175
176
177
178
179
180
181
# File 'lib/relaton_bib/bibxml_parser.rb', line 174

def contributors(reference)
  reference.xpath("./front/author").map do |contrib|
    if contrib[:fullname] || contrib[:surname] then person(contrib, reference)
    else organization(contrib)
    end
  end.compact
  # persons(reference) + organizations(reference)
end

#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>

Extract date from reference.

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



309
310
311
312
313
314
315
316
317
# File 'lib/relaton_bib/bibxml_parser.rb', line 309

def dates(reference)
  return unless (date = reference.at "./front/date")

  d = [date[:year], month(date[:month]), (date[:day] || 1)].compact.join "-"
  date = Time.parse(d).strftime "%Y-%m-%d"
  [BibliographicDate.new(type: "published", on: date)]
rescue ArgumentError
  []
end

#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>

Extract document identifiers from reference

Parameters:

  • reference (Nokogiri::XML::Element)
  • ver (String, nil)

    Internet Draft version

Returns:



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/relaton_bib/bibxml_parser.rb', line 68

def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
  ret = []
  # sfid = reference.at("./seriesInfo[@name='#{self::FLAVOR}']",
  #                     "./front/seriesInfo[@name='#{self::FLAVOR}']")
  # if sfid
  #   type = sfid[:name]
  #   id = sfid[:value]
  #   # scope = "series"
  # else # if self::FLAVOR
    # id, scope = if reference[:anchor] then [reference[:anchor], "anchor"]
    #             elsif reference[:docName] then [reference[:docName], "docName"]
    #             elsif reference[:number] then [reference[:number], "number"]
    #             end
  id = reference["anchor"] || reference["docName"] || reference["number"]
  type_match = id&.match(/^(3GPP|W3C|[A-Z]{2,})(?:\.(?=[A-Z])|(?=\d))/)
  type = self::FLAVOR || (type_match && type_match[1])
  if id
    /^(?<pref>I-D|3GPP|W3C|[A-Z]{2,})[._]?(?<num>.+)/ =~ id
    num.sub!(/^-?0+/, "") if %w[RFC BCP FYI STD].include?(pref)
    pid = pref ? "#{pref} #{num}" : id
    ret << DocumentIdentifier.new(type: type, id: pid)
  end
  %w[anchor docName number].each do |atr|
    if reference[atr]
      ret << DocumentIdentifier.new(id: reference[atr], type: type, scope: atr)
    end
  end
  # end
  # if id
  #   ret << DocumentIdentifier.new(type: type, id: id)
  #   ret << DocumentIdentifier.new(type: type, id: id, scope: scope) if scope
  # end
  # if (id = reference[:anchor])
  #   ret << DocumentIdentifier.new(type: "rfc-anchor", id: id)
  # end
  ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next unless SERIESINFONAMES.include? si[:name]

    id = si[:value]
    id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft"
    DocumentIdentifier.new(id: id, type: si[:name])
  end.compact
end

#docnumber(reference) ⇒ Object



43
44
45
# File 'lib/relaton_bib/bibxml_parser.rb', line 43

def docnumber(reference)
  reference[:anchor]&.sub(/^\w+\./, "")
end

#doctype(anchor) ⇒ String

Parameters:

  • anchor (String)

Returns:

  • (String)


361
362
363
364
365
366
367
# File 'lib/relaton_bib/bibxml_parser.rb', line 361

def doctype(anchor)
  case anchor
  when /I-D/ then "internet-draft"
  when /IEEE/ then "ieee"
  else "rfc"
  end
end

#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



321
322
323
324
325
326
327
# File 'lib/relaton_bib/bibxml_parser.rb', line 321

def editorialgroup(reference)
  tc = reference.xpath("./front/workgroup").map do |ed|
    wg = WorkGroup.new name: ed.text
    committee wg
  end
  EditorialGroup.new tc if tc.any?
end

#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem

Parameters:

  • reference (Nokogiri::XML::Element, nil)
  • is_relation (Boolean) (defaults to: false)

    don’t add fetched date for relation if true

  • url (String, nil) (defaults to: nil)
  • ver (String, nil) (defaults to: nil)

    Internet Draft version

Returns:



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/relaton_bib/bibxml_parser.rb', line 16

def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  return unless reference

  hash = {
    is_relation: is_relation,
    docnumber: docnumber(reference),
    type: "standard",
    docid: docids(reference, ver),
    status: status(reference),
    language: [language(reference)],
    script: ["Latn"],
    link: link(reference, url, ver),
    title: titles(reference),
    formattedref: formattedref(reference),
    abstract: abstracts(reference),
    contributor: contributors(reference),
    relation: relations(reference),
    date: dates(reference),
    editorialgroup: editorialgroup(reference),
    series: series(reference),
    keyword: reference.xpath("front/keyword").map(&:text),
    doctype: doctype(reference[:anchor]),
  }
  # hash[:fetched] = Date.today.to_s unless is_relation
  bib_item(**hash)
end

#formattedref(reference) ⇒ RelatonBib::FormattedRef?

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



150
151
152
153
154
155
156
157
158
159
# File 'lib/relaton_bib/bibxml_parser.rb', line 150

def formattedref(reference)
  return if reference.at "./front/title"

  cont = (reference[:anchor] || reference[:docName] || reference[:number])
  if cont
    FormattedRef.new(
      content: cont, language: language(reference), script: "Latn",
    )
  end
end

#full_name(author, reference) ⇒ RelatonBib::FullName

Parameters:

  • author (Nokogiri::XML::Element)
  • reference (Nokogiri::XML::Element)

Returns:



219
220
221
222
223
224
225
226
# File 'lib/relaton_bib/bibxml_parser.rb', line 219

def full_name(author, reference)
  lang = language reference
  FullName.new(
    completename: localized_string(author[:fullname], lang),
    initial: [localized_string(author[:initials], lang)].compact,
    surname: localized_string(author[:surname], lang),
  )
end

#language(reference) ⇒ String

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:

  • (String)


56
57
58
# File 'lib/relaton_bib/bibxml_parser.rb', line 56

def language(reference)
  reference[:lang] || "en"
end

Parameters:

  • reference (Nokogiri::XML::Element)
  • url (String)
  • ver (String, nil)

    Internet Draft version

Returns:



127
128
129
130
131
132
133
134
135
136
137
138
# File 'lib/relaton_bib/bibxml_parser.rb', line 127

def link(reference, url, ver)
  l = []
  l << { type: "xml", content: url } if url
  l << { type: "src", content: reference[:target] } if reference[:target]
  if /^I-D/.match? reference[:anchor]
    reference.xpath("format").each do |f|
      c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
      l << { type: f[:type], content: c }
    end
  end
  l
end

#localized_string(content, lang) ⇒ RelatonBib::LocalizedString?

Parameters:

  • content (String, nil)
  • lang (String, nil)

Returns:



249
250
251
# File 'lib/relaton_bib/bibxml_parser.rb', line 249

def localized_string(content, lang)
  LocalizedString.new(content, lang) if content
end

#month(mon) ⇒ Object



335
336
337
338
339
340
# File 'lib/relaton_bib/bibxml_parser.rb', line 335

def month(mon)
  return 1 if !mon || mon.empty?
  return mon if /^\d+$/.match? mon

  Date::MONTHNAMES.index(mon)
end

#new_org(name, abbr) ⇒ RelatonBib::Organization

Parameters:

  • name (String)
  • abbr (String)

Returns:



241
242
243
244
# File 'lib/relaton_bib/bibxml_parser.rb', line 241

def new_org(name, abbr)
  # (name = "Internet Engineering Task Force", abbr = "IETF")
  Organization.new name: name, abbreviation: abbr
end

#organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>

Parameters:

  • contrib (Nokogiri::XML::Element)

Returns:



201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/relaton_bib/bibxml_parser.rb', line 201

def organization(contrib)
  # publisher = { entity: new_org, role: [type: "publisher"] }
  # orgs = reference.xpath("./seriesinfo").reduce([]) do |mem, si|
  #   next mem unless si[:stream]

  #   mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
  # end
  # orgs + reference.xpath(
  #   "front/author[not(@surname)][not(@fullname)]/organization",
  # ).map do |org|
  org = contrib.at("./organization")
  { entity: new_org(org.text, org[:abbrev]), role: [contributor_role(contrib)] }
  # end
end

#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object



6
7
8
9
# File 'lib/relaton_bib/bibxml_parser.rb', line 6

def parse(bibxml, url: nil, is_relation: false, ver: nil)
  doc = Nokogiri::XML bibxml
  fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver
end

#person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>

Parameters:

  • author (Nokogiri::XML::Element)
  • reference (Nokogiri::XML::Element)

Returns:



186
187
188
189
190
191
192
193
194
195
196
# File 'lib/relaton_bib/bibxml_parser.rb', line 186

def person(author, reference)
  # reference.xpath("./front/author[@surname]|./front/author[@fullname]")
  #   .map do |author|
  entity = Person.new(
    name: full_name(author, reference),
    affiliation: affiliation(author),
    contact: contacts(author.at("./address")),
  )
  { entity: entity, role: [contributor_role(author)] }
  # end
end

#relations(reference) ⇒ Hash

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:

  • (Hash)


297
298
299
300
301
# File 'lib/relaton_bib/bibxml_parser.rb', line 297

def relations(reference)
  reference.xpath("reference").map do |ref|
    { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
  end
end

#series(reference) ⇒ Array<RelatonBib::Series>

Extract series form reference

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



348
349
350
351
352
353
354
355
356
357
# File 'lib/relaton_bib/bibxml_parser.rb', line 348

def series(reference)
  reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next if si[:name] == "DOI" || si[:stream] || si[:status]

    t = TypedTitleString.new(
      content: si[:name], language: language(reference), script: "Latn",
    )
    Series.new(title: t, number: si[:value], type: "main")
  end.compact
end

#status(reference) ⇒ RelatonBib::DocumentStatus

extract status

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



118
119
120
121
# File 'lib/relaton_bib/bibxml_parser.rb', line 118

def status(reference)
  st = reference.at("./seriesinfo[@status]")
  DocumentStatus.new(stage: st[:status]) if st
end

#titles(reference) ⇒ Array<Hash>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



142
143
144
145
146
# File 'lib/relaton_bib/bibxml_parser.rb', line 142

def titles(reference)
  reference.xpath("./front/title").map do |title|
    { content: title.text, language: language(reference), script: "Latn" }
  end
end