Class: RelatonBipm::RawdataBipmMetrologia::ArticleParser
- Inherits:
-
Object
- Object
- RelatonBipm::RawdataBipmMetrologia::ArticleParser
- Defined in:
- lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb
Constant Summary collapse
- ATTRS =
%i[docid title contributor date copyright abstract relation series extent type doctype link].freeze
Class Method Summary collapse
-
.parse(path) ⇒ RelatonBipm::BipmBibliographicItem
Create new parser and parse document.
Instance Method Summary collapse
-
#affiliation(contrib) ⇒ Array<RelatonBib::Affiliation>
Parse affiliations.
-
#bibitem(date, type) ⇒ RelatonBipm::BipmBibliographicItem
Create bibitem.
-
#create_docid(id, type, primary = nil) ⇒ RelatonBib::DocumentIdentifier
Create document identifier.
- #create_organization(contrib) ⇒ Object
- #create_person(contrib) ⇒ Object
- #date_part(date, type) ⇒ Object
-
#dates {|date, type| ... } ⇒ Array<String, Object>
Parse date.
-
#forename(given_name) ⇒ Array<RelatonBib::Forename>
Parse forename.
-
#fullname(name) ⇒ RelatonBib::FullName
Create full name.
-
#initialize(doc, journal, volume, article) ⇒ ArticleParser
constructor
Initialize parser.
-
#journal_title ⇒ String
Parse journal title.
-
#parse ⇒ RelatonBipm::BipmBibliographicItem
Create new document.
-
#parse_abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
-
#parse_contributor ⇒ Array<RelatonBib::Contributor>
Parse contributor.
-
#parse_copyright ⇒ Array<RelatonBib::CopyrightAssociation>
Parse copyright.
-
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date.
-
#parse_docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse docid.
- #parse_doctype ⇒ Object
-
#parse_extent ⇒ Array<RelatonBib::Extent>
Parse extent.
- #parse_link ⇒ Object
-
#parse_relation ⇒ Array<RelatonBib::DocumentRelation>
Parese relation.
-
#parse_series ⇒ Array<RelatonBib::Series>
Parse series.
-
#parse_title ⇒ Array<RelatonBib::TypedTitleString>
Parse title.
- #parse_type ⇒ Object
-
#volume_issue_article ⇒ Array<String>
Parse volume, issue and page.
Constructor Details
#initialize(doc, journal, volume, article) ⇒ ArticleParser
Initialize parser
27 28 29 30 31 32 33 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 27 def initialize(doc, journal, volume, article) @doc = doc.at "/article" @journal = journal @volume = volume @article = article @meta = doc.at("/article/front/article-meta") end |
Class Method Details
.parse(path) ⇒ RelatonBipm::BipmBibliographicItem
Create new parser and parse document
13 14 15 16 17 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 13 def self.parse(path) doc = Nokogiri::XML(File.read(path, encoding: "UTF-8")) journal, volume, article = path.split("/")[-2].split("_")[1..] new(doc, journal, volume, article).parse end |
Instance Method Details
#affiliation(contrib) ⇒ Array<RelatonBib::Affiliation>
Parse affiliations
143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 143 def affiliation(contrib) # rubocop:disable Metrics/AbcSize contrib.xpath("./xref[@ref-type='aff']").map do |x| a = @meta.at("./contrib-group/aff[@id='#{x[:rid]}']/label/following-sibling::node()") parts = a.text.split(", ") orgname = parts[0..-3].join(", ") city, country = parts[-2..] address = [] address << RelatonBib::Address.new(city: city, country: country) if city && country org = RelatonBib::Organization.new name: orgname, contact: address RelatonBib::Affiliation.new organization: org end end |
#bibitem(date, type) ⇒ RelatonBipm::BipmBibliographicItem
Create bibitem
275 276 277 278 279 280 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 275 def bibitem(date, type) dt = RelatonBib::BibliographicDate.new(type: type, on: date) carrier = type == "epub" ? "online" : "print" medium = RelatonBib::Medium.new carrier: carrier BipmBibliographicItem.new title: parse_title, date: [dt], medium: medium end |
#create_docid(id, type, primary = nil) ⇒ RelatonBib::DocumentIdentifier
Create document identifier
93 94 95 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 93 def create_docid(id, type, primary = nil) RelatonBib::DocumentIdentifier.new id: id, type: type, primary: primary end |
#create_organization(contrib) ⇒ Object
132 133 134 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 132 def create_organization(contrib) RelatonBib::Organization.new name: contrib.at("./collab").text end |
#create_person(contrib) ⇒ Object
125 126 127 128 129 130 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 125 def create_person(contrib) name = contrib.at("./name") return unless name RelatonBib::Person.new name: fullname(name), affiliation: affiliation(contrib) end |
#date_part(date, type) ⇒ Object
217 218 219 220 221 222 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 217 def date_part(date, type) part = date.at("./#{type}")&.text return "01" if part.nil? || part.empty? part.rjust(2, "0") end |
#dates {|date, type| ... } ⇒ Array<String, Object>
Parse date
208 209 210 211 212 213 214 215 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 208 def dates @meta.xpath("./pub-date").map do |d| month = date_part(d, "month") day = date_part(d, "day") date = "#{d.at('./year').text}-#{month}-#{day}" block_given? ? yield(date, d[:"pub-type"]) : date end end |
#forename(given_name) ⇒ Array<RelatonBib::Forename>
Parse forename
176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 176 def forename(given_name) # rubocop:disable Metrics/MethodLength return [] unless given_name given_name.text.scan(/(\w+)(?:\s(\w)(?:\s|$))?/).map do |nm, int| if nm.size == 1 name = nil init = nm else name = nm init = int end RelatonBib::Forename.new(content: name, language: ["en"], script: ["Latn"], initial: init) end end |
#fullname(name) ⇒ RelatonBib::FullName
Create full name
163 164 165 166 167 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 163 def fullname(name) cname = [name.at("./given-names"), name.at("./surname")].compact.map(&:text).join(" ") completename = RelatonBib::LocalizedString.new cname, "en", "Latn" RelatonBib::FullName.new completename: completename end |
#journal_title ⇒ String
Parse journal title
80 81 82 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 80 def journal_title @doc.at("./front/journal-meta/journal-title-group/journal-title").text end |
#parse ⇒ RelatonBipm::BipmBibliographicItem
Create new document
40 41 42 43 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 40 def parse attrs = ATTRS.to_h { |a| [a, send("parse_#{a}")] } BipmBibliographicItem.new(**attrs) end |
#parse_abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract
248 249 250 251 252 253 254 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 248 def parse_abstract @meta.xpath("./abstract").map do |a| RelatonBib::FormattedString.new( content: a.inner_html, language: a[:"xml:lang"], script: ["Latn"], format: "text/html", ) end end |
#parse_contributor ⇒ Array<RelatonBib::Contributor>
Parse contributor
118 119 120 121 122 123 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 118 def parse_contributor @meta.xpath("./contrib-group/contrib").map do |c| entity = create_person(c) || create_organization(c) RelatonBib::ContributionInfo.new(entity: entity, role: [type: c[:"contrib-type"]]) end end |
#parse_copyright ⇒ Array<RelatonBib::CopyrightAssociation>
Parse copyright
229 230 231 232 233 234 235 236 237 238 239 240 241 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 229 def parse_copyright @meta.xpath("./permissions").each_with_object([]) do |l, m| from = l.at("./copyright-year") next unless from owner = l.at("./copyright-statement").text.split(" & ").map do |c| /(?<name>[A-z]+(?:\s[A-z]+)*)/ =~ c org = RelatonBib::Organization.new name: name RelatonBib::ContributionInfo.new(entity: org) end m << RelatonBib::CopyrightAssociation.new(owner: owner, from: from.text) end end |
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date
196 197 198 199 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 196 def parse_date on = dates.min [RelatonBib::BibliographicDate.new(type: "published", on: on)] end |
#parse_docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse docid
50 51 52 53 54 55 56 57 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 50 def parse_docid pubid = "#{journal_title} #{volume_issue_article}" primary_id = create_docid pubid, "BIPM", true @meta.xpath("./article-id[@pub-id-type='doi']") .each_with_object([primary_id]) do |id, m| m << create_docid(id.text, id["pub-id-type"]) end end |
#parse_doctype ⇒ Object
318 319 320 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 318 def parse_doctype DocumentType.new type: "article" end |
#parse_extent ⇒ Array<RelatonBib::Extent>
Parse extent
299 300 301 302 303 304 305 306 307 308 309 310 311 312 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 299 def parse_extent @meta.xpath("./volume|./issue|./fpage").map do |e| if e.name == "fpage" type = "page" to = @meta.at("./lpage")&.text else type = e.name end RelatonBib::Locality.new type, e.text, to end # %w[volume issue page].map.with_index do |t, i| # RelatonBib::Locality.new t, volume_issue_page[i] # end end |
#parse_link ⇒ Object
322 323 324 325 326 327 328 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 322 def parse_link @meta.xpath("./article-id[@pub-id-type='doi']").each_with_object([]) do |l, a| url = "https://doi.org/#{l.text}" a << RelatonBib::TypedUri.new(content: url, type: "src") a << RelatonBib::TypedUri.new(content: url, type: "doi") end end |
#parse_relation ⇒ Array<RelatonBib::DocumentRelation>
Parese relation
261 262 263 264 265 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 261 def parse_relation dates do |d, t| RelatonBib::DocumentRelation.new(type: "hasManifestation", bibitem: bibitem(d, t)) end end |
#parse_series ⇒ Array<RelatonBib::Series>
Parse series
287 288 289 290 291 292 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 287 def parse_series title = RelatonBib::TypedTitleString.new( content: journal_title, language: ["en"], script: ["Latn"], ) [RelatonBib::Series.new(title: title)] end |
#parse_title ⇒ Array<RelatonBib::TypedTitleString>
Parse title
102 103 104 105 106 107 108 109 110 111 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 102 def parse_title @meta.xpath("./title-group/article-title").map do |t| next if t.text.empty? format = CGI.escapeHTML(t.inner_html) == t.inner_html ? "text/plain" : "text/html" RelatonBib::TypedTitleString.new( content: t.inner_html, language: t[:"xml:lang"], script: "Latn", format: format, ) end.compact end |
#parse_type ⇒ Object
314 315 316 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 314 def parse_type "article" end |
#volume_issue_article ⇒ Array<String>
Parse volume, issue and page
64 65 66 67 68 69 |
# File 'lib/relaton_bipm/rawdata_bipm_metrologia/article_parser.rb', line 64 def volume_issue_article # volume = @meta.at("./volume").text # issue = @meta.at("./issue").text # page = @doc.at("./front/article-meta/fpage")&.text || manuscript [@journal, @volume, @article].compact.join(" ") end |