Class: RelatonBipm::RawdataBipmMetrologia::ArticleParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb

Constant Summary collapse

ATTRS =
%i[docid title contributor date copyright abstract relation series
extent type doctype link].freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(doc, journal, volume, article) ⇒ ArticleParser

Initialize parser

Parameters:

  • doc (Nokogiri::XML::Document)

    XML document

  • journal (String)

    journal

  • volume (String)

    volume

  • article (String)

    article



27
28
29
30
31
32
33
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 27

def initialize(doc, journal, volume, article)
  @doc = doc.at "/article"
  @journal = journal
  @volume = volume
  @article = article
  @meta = doc.at("/article/front/article-meta")
end

Class Method Details

.parse(path) ⇒ RelatonBipm::BipmBibliographicItem

Create new parser and parse document

Parameters:

  • path (String)

    path to XML file

Returns:



13
14
15
16
17
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 13

def self.parse(path)
  doc = Nokogiri::XML(File.read(path, encoding: "UTF-8"))
  journal, volume, article = path.split("/")[-2].split("_")[1..]
  new(doc, journal, volume, article).parse
end

Instance Method Details

#affiliation(contrib) ⇒ Array<RelatonBib::Affiliation>

Parse affiliations

Parameters:

  • contrib (Nokogiri::XML::Element)

    contributor element

Returns:

  • (Array<RelatonBib::Affiliation>)

    array of affiliations



143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 143

def affiliation(contrib) # rubocop:disable Metrics/AbcSize
  contrib.xpath("./xref[@ref-type='aff']").map do |x|
    a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()")
    parts = a.text.split(", ")
    orgname = parts[0..-3].join(", ")
    city, country = parts[-2..]
    address = []
    address << RelatonBib::Address.new(city: city, country: country) if city && country
    org = RelatonBib::Organization.new name: orgname, contact: address
    RelatonBib::Affiliation.new organization: org
  end
end

#bibitem(date, type) ⇒ RelatonBipm::BipmBibliographicItem

Create bibitem

Parameters:

  • date (String)
  • type (String)

    date type

Returns:



275
276
277
278
279
280
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 275

def bibitem(date, type)
  dt = RelatonBib::BibliographicDate.new(type: type, on: date)
  carrier = type == "epub" ? "online" : "print"
  medium = RelatonBib::Medium.new carrier: carrier
  BipmBibliographicItem.new title: parse_title, date: [dt], medium: medium
end

#create_docid(id, type, primary = nil) ⇒ RelatonBib::DocumentIdentifier

Create document identifier

Parameters:

  • id (String)

    document id

  • type (String)

    id type

  • primary (Boolean, nil) (defaults to: nil)

    is primary id

Returns:

  • (RelatonBib::DocumentIdentifier)

    document identifier



93
94
95
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 93

def create_docid(id, type, primary = nil)
  RelatonBib::DocumentIdentifier.new id: id, type: type, primary: primary
end

#create_organization(contrib) ⇒ Object



132
133
134
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 132

def create_organization(contrib)
  RelatonBib::Organization.new name: contrib.at("./collab").text
end

#create_person(contrib) ⇒ Object



125
126
127
128
129
130
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 125

def create_person(contrib)
  name = contrib.at("./name")
  return unless name

  RelatonBib::Person.new name: fullname(name), affiliation: affiliation(contrib)
end

#date_part(date, type) ⇒ Object



217
218
219
220
221
222
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 217

def date_part(date, type)
  part = date.at("./#{type}")&.text
  return "01" if part.nil? || part.empty?

  part.rjust(2, "0")
end

#dates {|date, type| ... } ⇒ Array<String, Object>

Parse date

Yields:

  • (date, type)

    date and type

Returns:

  • (Array<String, Object>)

    string date or whatever block returns



208
209
210
211
212
213
214
215
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 208

def dates
  @meta.xpath("./pub-date").map do |d|
    month = date_part(d, "month")
    day = date_part(d, "day")
    date = "#{d.at('./year').text}-#{month}-#{day}"
    block_given? ? yield(date, d[:"pub-type"]) : date
  end
end

#forename(given_name) ⇒ Array<RelatonBib::Forename>

Parse forename

Parameters:

  • given_name (String)

    given name

Returns:

  • (Array<RelatonBib::Forename>)

    array of forenames



176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 176

def forename(given_name) # rubocop:disable Metrics/MethodLength
  return [] unless given_name

  given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int|
    if nm.size == 1
      name = nil
      init = nm
    else
      name = nm
      init = int
    end
    RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init)
  end
end

#fullname(name) ⇒ RelatonBib::FullName

Create full name

Parameters:

  • contrib (Nokogiri::XML::Element)

    contributor element

Returns:

  • (RelatonBib::FullName)

    full name



163
164
165
166
167
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 163

def fullname(name)
  cname = [name.at("./given-names"), name.at("./surname")].compact.map(&:text).join(" ")
  completename = RelatonBib::LocalizedString.new cname, "en", "Latn"
  RelatonBib::FullName.new completename: completename
end

#journal_titleString

Parse journal title

Returns:

  • (String)

    journal title



80
81
82
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 80

def journal_title
  @doc.at("./front/journal-meta/journal-title-group/journal-title").text
end

#parseRelatonBipm::BipmBibliographicItem

Create new document

Returns:



40
41
42
43
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 40

def parse
  attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] }
  BipmBibliographicItem.new(**attrs)
end

#parse_abstractArray<RelatonBib::FormattedString>

Parse abstract

Returns:

  • (Array<RelatonBib::FormattedString>)

    array of abstracts



248
249
250
251
252
253
254
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 248

def parse_abstract
  @meta.xpath("./abstract").map do |a|
    RelatonBib::FormattedString.new(
      content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html",
    )
  end
end

#parse_contributorArray<RelatonBib::Contributor>

Parse contributor

Returns:

  • (Array<RelatonBib::Contributor>)

    array of contributors



118
119
120
121
122
123
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 118

def parse_contributor
  @meta.xpath("./contrib-group/contrib").map do |c|
    entity = create_person(c) || create_organization(c)
    RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]])
  end
end

Parse copyright

Returns:

  • (Array<RelatonBib::CopyrightAssociation>)

    array of copyright associations



229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 229

def parse_copyright
  @meta.xpath("./permissions").each_with_object([]) do |l, m|
    from = l.at("./copyright-year")
    next unless from

    owner = l.at("./copyright-statement").text.split(" & ").map do |c|
      /(?<name>[A-z]+(?:\s[A-z]+)*)/ =~ c
      org = RelatonBib::Organization.new name: name
      RelatonBib::ContributionInfo.new(entity: org)
    end
    m << RelatonBib::CopyrightAssociation.new(owner: owner, from: from.text)
  end
end

#parse_dateArray<RelatonBib::BibliographicDate>

Parse date

Returns:

  • (Array<RelatonBib::BibliographicDate>)

    array of dates



196
197
198
199
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 196

def parse_date
  on = dates.min
  [RelatonBib::BibliographicDate.new(type: "published", on: on)]
end

#parse_docidArray<RelatonBib::DocumentIdentifier>

Parse docid

Returns:

  • (Array<RelatonBib::DocumentIdentifier>)

    array of document identifiers



50
51
52
53
54
55
56
57
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 50

def parse_docid
  pubid = "#{journal_title} #{volume_issue_article}"
  primary_id = create_docid pubid, "BIPM", true
  @meta.xpath("./article-id[@pub-id-type='doi']")
    .each_with_object([primary_id]) do |id, m|
    m << create_docid(id.text, id["pub-id-type"])
  end
end

#parse_doctypeObject



318
319
320
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 318

def parse_doctype
  DocumentType.new type: "article"
end

#parse_extentArray<RelatonBib::Extent>

Parse extent

Returns:

  • (Array<RelatonBib::Extent>)

    array of extents



299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 299

def parse_extent
  @meta.xpath("./volume|./issue|./fpage").map do |e|
    if e.name == "fpage"
      type = "page"
      to = @meta.at("./lpage")&.text
    else
      type = e.name
    end
    RelatonBib::Locality.new type, e.text, to
  end
  # %w[volume issue page].map.with_index do |t, i|
  #   RelatonBib::Locality.new t, volume_issue_page[i]
  # end
end


322
323
324
325
326
327
328
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 322

def parse_link
  @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a|
    url = "https://doi.org/#{l.text}"
    a << RelatonBib::TypedUri.new(content: url, type: "src")
    a << RelatonBib::TypedUri.new(content: url, type: "doi")
  end
end

#parse_relationArray<RelatonBib::DocumentRelation>

Parese relation

Returns:

  • (Array<RelatonBib::DocumentRelation>)

    array of document relations



261
262
263
264
265
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 261

def parse_relation
  dates do |d, t|
    RelatonBib::DocumentRelation.new(type: "hasManifestation", bibitem: bibitem(d, t))
  end
end

#parse_seriesArray<RelatonBib::Series>

Parse series

Returns:

  • (Array<RelatonBib::Series>)

    array of series



287
288
289
290
291
292
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 287

def parse_series
  title = RelatonBib::TypedTitleString.new(
    content: journal_title, language: ["en"], script: ["Latn"],
  )
  [RelatonBib::Series.new(title: title)]
end

#parse_titleArray<RelatonBib::TypedTitleString>

Parse title

Returns:

  • (Array<RelatonBib::TypedTitleString>)

    array of title strings



102
103
104
105
106
107
108
109
110
111
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 102

def parse_title
  @meta.xpath("./title-group/article-title").map do |t|
    next if t.text.empty?

    format = CGI.escapeHTML(t.inner_html) == t.inner_html ? "text/plain" : "text/html"
    RelatonBib::TypedTitleString.new(
      content: t.inner_html, language: t[:"xml:lang"], script: "Latn", format: format,
    )
  end.compact
end

#parse_typeObject



314
315
316
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 314

def parse_type
  "article"
end

#volume_issue_articleArray<String>

Parse volume, issue and page

Returns:

  • (Array<String>)

    array of volume, issue and page



64
65
66
67
68
69
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 64

def volume_issue_article
  # volume = @meta.at("./volume").text
  # issue = @meta.at("./issue").text
  # page = @doc.at("./front/article-meta/fpage")&.text || manuscript
  [@journal, @volume, @article].compact.join(" ")
end