Module: RelatonBib::BibXMLParser

Extended by:
BibXMLParser
Included in:
BibXMLParser
Defined in:
lib/relaton_bib/bibxml_parser.rb

Constant Summary collapse

SERIESINFONAMES =

SeriesInfo what should be saved as docidentifiers in the Relaton model.

["DOI", "Internet-Draft"].freeze
FLAVOR =
nil
ORGNAMES =
{
  "IEEE" => "Istitute of Electrical and Electronics Engineers",
  "W3C" => "World Wide Web Consortium",
  "3GPP" => "3rd Generation Partnership Project",
}.freeze

Instance Method Summary collapse

Instance Method Details

#abstracts(ref) ⇒ Array<RelatonBib::FormattedString>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



162
163
164
165
166
167
168
169
# File 'lib/relaton_bib/bibxml_parser.rb', line 162

def abstracts(ref)
  ref.xpath("./front/abstract").map do |a|
    c = a.children.to_s.gsub(/\s*(<\/?)t(>)\s*/, '\1p\2')
      .gsub(/[\t\n]/, " ").squeeze " "
    FormattedString.new(content: c, language: language(ref), script: "Latn",
                        format: "text/html")
  end
end

#add_contact(conts, type, value) ⇒ Object

Parameters:



285
286
287
# File 'lib/relaton_bib/bibxml_parser.rb', line 285

def add_contact(conts, type, value)
  conts << Contact.new(type: type, value: value.text) if value
end

#address(postal) ⇒ Object

Parameters:

  • postal (Nokogiri::XML::Element)


269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/relaton_bib/bibxml_parser.rb', line 269

def address(postal) # rubocop:disable Metrics/CyclomaticComplexity
  street = [
    (postal.at("./postalLine") || postal.at("./street"))&.text,
  ].compact
  Address.new(
    street: street,
    city: postal.at("./city")&.text,
    postcode: postal.at("./code")&.text,
    country: postal.at("./country")&.text,
    state: postal.at("./region")&.text,
  )
end

#affiliation(author) ⇒ Array<RelatonBib::Affiliation>

Parameters:

  • author (Nokogiri::XML::Element)

Returns:



230
231
232
233
234
235
236
# File 'lib/relaton_bib/bibxml_parser.rb', line 230

def affiliation(author)
  o = author.at("./organization")
  return [] if o.nil? || o.text.empty?

  org = new_org o.text, o[:abbrev]
  [Affiliation.new(organization: org)]
end

#bib_item(**attrs) ⇒ RelatonBib::BibliographicItem

Parameters:

  • attrs (Hash)

Returns:



57
58
59
60
# File 'lib/relaton_bib/bibxml_parser.rb', line 57

def bib_item(**attrs)
  # attrs[:place] = ["Fremont, CA"]
  BibliographicItem.new(**attrs)
end

#committee(wgr) ⇒ RelatonBib::TechnicalCommittee



333
334
335
# File 'lib/relaton_bib/bibxml_parser.rb', line 333

def committee(wgr)
  TechnicalCommittee.new wgr
end

#contacts(addr) ⇒ Array<RelatonBib::Address, RelatonBib::Phone>

Parameters:

  • postal (Nokogiri::XML::Element)

Returns:



255
256
257
258
259
260
261
262
263
264
265
# File 'lib/relaton_bib/bibxml_parser.rb', line 255

def contacts(addr)
  conts = []
  return conts unless addr

  postal = addr.at("./postal")
  conts << address(postal) if postal
  add_contact(conts, "phone", addr.at("./phone"))
  add_contact(conts, "email", addr.at("./email"))
  add_contact(conts, "uri", addr.at("./uri"))
  conts
end

#contributor_role(author) ⇒ Hash

Parameters:

  • author (Nokogiri::XML::Document)

Returns:

  • (Hash)


291
292
293
# File 'lib/relaton_bib/bibxml_parser.rb', line 291

def contributor_role(author)
  { type: author[:role] || "author" }
end

#contributors(reference) ⇒ Array<Hash>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



173
174
175
176
177
178
179
180
# File 'lib/relaton_bib/bibxml_parser.rb', line 173

def contributors(reference)
  reference.xpath("./front/author").map do |contrib|
    if contrib[:fullname] || contrib[:surname] then person(contrib, reference)
    else organization(contrib)
    end
  end.compact
  # persons(reference) + organizations(reference)
end

#dates(reference) ⇒ Array<RelatonBib::BibliographicDate>

Extract date from reference.

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



309
310
311
312
313
314
315
316
317
318
319
# File 'lib/relaton_bib/bibxml_parser.rb', line 309

def dates(reference)
  return [] unless (date = reference.at "./front/date")

  d = date[:year]
  d += "-#{month(date[:month])}" if date[:month] && !date[:month].empty?
  d += "-#{date[:day]}" if date[:day]
  # date = Time.parse(d).strftime "%Y-%m-%d"
  [BibliographicDate.new(type: "published", on: d)]
# rescue ArgumentError
#   []
end

#docids(reference, ver) ⇒ Array<RelatonBib::DocumentIdentifier>

Extract document identifiers from reference

Parameters:

  • reference (Nokogiri::XML::Element)
  • ver (String, nil)

    Internet Draft version

Returns:



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/relaton_bib/bibxml_parser.rb', line 76

def docids(reference, ver) # rubocop:disable Metrics/MethodLength,Metrics/CyclomaticComplexity,Metrics/PerceivedComplexity,Metrics/AbcSize
  ret = []
  id = reference["anchor"] || reference["docName"] || reference["number"]
  if id
    /^(?<pref>I-D|3GPP|W3C|[A-Z]{2,})[._]?(?<num>.+)/ =~ id
    num.sub!(/^-?0+/, "") if %w[RFC BCP FYI STD].include?(pref)
    pid = pref ? "#{pref} #{num}" : id
    ret << DocumentIdentifier.new(type: pubid_type(id), id: pid)
  end
  %w[anchor docName number].each do |atr|
    if reference[atr]
      ret << DocumentIdentifier.new(id: reference[atr], type: pubid_type(id), scope: atr)
    end
  end
  ret + reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next unless SERIESINFONAMES.include? si[:name]

    id = si[:value]
    id.sub!(/(?<=-)\d{2}$/, ver) if ver && si[:name] == "Internet-Draft"
    DocumentIdentifier.new(id: id, type: si[:name])
  end.compact
end

#docnumber(reference) ⇒ Object



51
52
53
# File 'lib/relaton_bib/bibxml_parser.rb', line 51

def docnumber(reference)
  reference[:anchor]&.sub(/^\w+\./, "")
end

#doctype(anchor) ⇒ String

Parameters:

  • anchor (String)

Returns:

  • (String)


363
364
365
366
367
368
369
# File 'lib/relaton_bib/bibxml_parser.rb', line 363

def doctype(anchor)
  case anchor
  when /I-D/ then "internet-draft"
  when /IEEE/ then "ieee"
  else "rfc"
  end
end

#editorialgroup(reference) ⇒ RelatonBib::EditorialGroup?

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



323
324
325
326
327
328
329
# File 'lib/relaton_bib/bibxml_parser.rb', line 323

def editorialgroup(reference)
  tc = reference.xpath("./front/workgroup").map do |ed|
    wg = WorkGroup.new name: ed.text
    committee wg
  end
  EditorialGroup.new tc if tc.any?
end

#fetch_rfc(reference, is_relation: false, url: nil, ver: nil) ⇒ RelatonBib::BibliographicItem

Parameters:

  • reference (Nokogiri::XML::Element, nil)
  • is_relation (Boolean) (defaults to: false)

    don’t add fetched date for relation if true

  • url (String, nil) (defaults to: nil)
  • ver (String, nil) (defaults to: nil)

    Internet Draft version

Returns:



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/relaton_bib/bibxml_parser.rb', line 24

def fetch_rfc(reference, is_relation: false, url: nil, ver: nil) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  return unless reference

  hash = {
    is_relation: is_relation,
    docnumber: docnumber(reference),
    type: "standard",
    docid: docids(reference, ver),
    status: status(reference),
    language: [language(reference)],
    script: ["Latn"],
    link: link(reference, url, ver),
    title: titles(reference),
    formattedref: formattedref(reference),
    abstract: abstracts(reference),
    contributor: contributors(reference),
    relation: relations(reference),
    date: dates(reference),
    editorialgroup: editorialgroup(reference),
    series: series(reference),
    keyword: reference.xpath("front/keyword").map(&:text),
    doctype: doctype(reference[:anchor]),
  }
  # hash[:fetched] = Date.today.to_s unless is_relation
  bib_item(**hash)
end

#formattedref(reference) ⇒ RelatonBib::FormattedRef?

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



149
150
151
152
153
154
155
156
157
158
# File 'lib/relaton_bib/bibxml_parser.rb', line 149

def formattedref(reference)
  return if reference.at "./front/title"

  cont = (reference[:anchor] || reference[:docName] || reference[:number])
  if cont
    FormattedRef.new(
      content: cont, language: language(reference), script: "Latn",
    )
  end
end

#full_name(author, reference) ⇒ RelatonBib::FullName

Parameters:

  • author (Nokogiri::XML::Element)
  • reference (Nokogiri::XML::Element)

Returns:



219
220
221
222
223
224
225
226
# File 'lib/relaton_bib/bibxml_parser.rb', line 219

def full_name(author, reference)
  lang = language reference
  FullName.new(
    completename: localized_string(author[:fullname], lang),
    initial: [localized_string(author[:initials], lang)].compact,
    surname: localized_string(author[:surname], lang),
  )
end

#language(reference) ⇒ String

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:

  • (String)


64
65
66
# File 'lib/relaton_bib/bibxml_parser.rb', line 64

def language(reference)
  reference[:lang] || "en"
end

Parameters:

  • reference (Nokogiri::XML::Element)
  • url (String)
  • ver (String, nil)

    Internet Draft version

Returns:



126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/relaton_bib/bibxml_parser.rb', line 126

def link(reference, url, ver)
  l = []
  l << { type: "xml", content: url } if url
  l << { type: "src", content: reference[:target] } if reference[:target]
  if /^I-D/.match? reference[:anchor]
    reference.xpath("format").each do |f|
      c = ver ? f[:target].sub(/(?<=-)\d{2}(?=\.)/, ver) : f[:target]
      l << { type: f[:type], content: c }
    end
  end
  l
end

#localized_string(content, lang) ⇒ RelatonBib::LocalizedString?

Parameters:

  • content (String, nil)
  • lang (String, nil)

Returns:



249
250
251
# File 'lib/relaton_bib/bibxml_parser.rb', line 249

def localized_string(content, lang)
  LocalizedString.new(content, lang) if content
end

#month(mon) ⇒ Object



337
338
339
340
341
342
# File 'lib/relaton_bib/bibxml_parser.rb', line 337

def month(mon)
  # return 1 if !mon || mon.empty?
  return mon if /^\d+$/.match? mon

  Date::MONTHNAMES.index(mon).to_s.rjust 2, "0"
end

#new_org(name, abbr) ⇒ RelatonBib::Organization

Parameters:

  • name (String)
  • abbr (String)

Returns:



241
242
243
244
# File 'lib/relaton_bib/bibxml_parser.rb', line 241

def new_org(name, abbr)
  # (name = "Internet Engineering Task Force", abbr = "IETF")
  Organization.new name: name, abbreviation: abbr
end

#organization(contrib) ⇒ Array<Hash{Symbol=>RelatonBib::Organization, Symbol=>Array<String>}>

Parameters:

  • contrib (Nokogiri::XML::Element)

Returns:



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# File 'lib/relaton_bib/bibxml_parser.rb', line 200

def organization(contrib)
  # publisher = { entity: new_org, role: [type: "publisher"] }
  # orgs = reference.xpath("./seriesinfo").reduce([]) do |mem, si|
  #   next mem unless si[:stream]

  #   mem << { entity: new_org(si[:stream], nil), role: [type: "author"] }
  # end
  # orgs + reference.xpath(
  #   "front/author[not(@surname)][not(@fullname)]/organization",
  # ).map do |org|
  org = contrib.at("./organization")
  name = ORGNAMES[org.text] || org.text
  { entity: new_org(name, org[:abbrev]), role: [contributor_role(contrib)] }
  # end
end

#parse(bibxml, url: nil, is_relation: false, ver: nil) ⇒ Object



14
15
16
17
# File 'lib/relaton_bib/bibxml_parser.rb', line 14

def parse(bibxml, url: nil, is_relation: false, ver: nil)
  doc = Nokogiri::XML bibxml
  fetch_rfc doc.at("/referencegroup", "/reference"), url: url, is_relation: is_relation, ver: ver
end

#person(author, reference) ⇒ Array<Hash{Symbol=>RelatonBib::Person,Symbol=>Array<String>}>

Parameters:

  • author (Nokogiri::XML::Element)
  • reference (Nokogiri::XML::Element)

Returns:



185
186
187
188
189
190
191
192
193
194
195
# File 'lib/relaton_bib/bibxml_parser.rb', line 185

def person(author, reference)
  # reference.xpath("./front/author[@surname]|./front/author[@fullname]")
  #   .map do |author|
  entity = Person.new(
    name: full_name(author, reference),
    affiliation: affiliation(author),
    contact: contacts(author.at("./address")),
  )
  { entity: entity, role: [contributor_role(author)] }
  # end
end

#pubid_type(id) ⇒ String

Extract document identifier type from identifier

Parameters:

  • id (String)

    identifier

Returns:

  • (String)


106
107
108
109
# File 'lib/relaton_bib/bibxml_parser.rb', line 106

def pubid_type(id)
  type_match = id&.match(/^(3GPP|W3C|[A-Z]{2,})(?:\.(?=[A-Z])|(?=\d))/)
  type_match && type_match[1]
end

#relations(reference) ⇒ Hash

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:

  • (Hash)


297
298
299
300
301
# File 'lib/relaton_bib/bibxml_parser.rb', line 297

def relations(reference)
  reference.xpath("reference").map do |ref|
    { type: "includes", bibitem: fetch_rfc(ref, is_relation: true) }
  end
end

#series(reference) ⇒ Array<RelatonBib::Series>

Extract series form reference

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



350
351
352
353
354
355
356
357
358
359
# File 'lib/relaton_bib/bibxml_parser.rb', line 350

def series(reference)
  reference.xpath("./seriesInfo", "./front/seriesInfo").map do |si|
    next if SERIESINFONAMES.include?(si[:name]) || si[:stream] || si[:status]

    t = TypedTitleString.new(
      content: si[:name], language: language(reference), script: "Latn",
    )
    Series.new(title: t, number: si[:value], type: "main")
  end.compact
end

#status(reference) ⇒ RelatonBib::DocumentStatus

extract status

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



117
118
119
120
# File 'lib/relaton_bib/bibxml_parser.rb', line 117

def status(reference)
  st = reference.at("./seriesinfo[@status]")
  DocumentStatus.new(stage: st[:status]) if st
end

#titles(reference) ⇒ Array<Hash>

Parameters:

  • reference (Nokogiri::XML::Element)

Returns:



141
142
143
144
145
# File 'lib/relaton_bib/bibxml_parser.rb', line 141

def titles(reference)
  reference.xpath("./front/title").map do |title|
    { content: title.text, language: language(reference), script: "Latn" }
  end
end