Class: RelatonIeee::DataParser
- Inherits:
-
Object
- Object
- RelatonIeee::DataParser
- Defined in:
- lib/relaton_ieee/data_parser.rb
Constant Summary collapse
- DATETYPES =
{ "OriginalPub" => "created", "ePub" => "published", "LastInspecUpd" => "updated" }.freeze
- ATTRS =
%i[ docnumber title date docid contributor abstract copyright docstatus relation link keyword ics editorialgroup standard_status standard_modified pubstatus holdstatus doctype ].freeze
Instance Attribute Summary collapse
-
#doc ⇒ Object
readonly
Returns the value of attribute doc.
-
#fetcher ⇒ Object
readonly
Returns the value of attribute fetcher.
Class Method Summary collapse
-
.parse(doc, fetcher) ⇒ RelatonIeee::IeeeBibliographicItem
Parse IEEE document.
Instance Method Summary collapse
-
#create_org(name, addr = []) ⇒ RelatonBib::Organization
Create organization.
-
#docnumber ⇒ String
Parse docnumber.
-
#initialize(doc, fetcher) ⇒ DataParser
constructor
Create RelatonIeee::DataParser instance.
-
#parse ⇒ RelatonIeee::IeeeBibliographicItem
Parse IEEE document.
-
#parse_abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract.
-
#parse_contributor ⇒ Array<RelatonBib::ContributionInfo>
Parse contributors.
-
#parse_copyright ⇒ Array<RelatonBib::CopyrightAssociation>
Parse copyright.
- #parse_country_city(address) ⇒ Object
-
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date.
-
#parse_date_string(date) ⇒ String
Convert date string with month name to numeric date.
-
#parse_docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse identifiers.
- #parse_docnumber ⇒ Object
-
#parse_docstatus ⇒ RelatonIee::DocumentStatus?
Parse status.
-
#parse_doctype ⇒ String
Parse doctype.
-
#parse_editorialgroup ⇒ RelatonIeee::EditorialGroup?
Parse editorialgroup.
-
#parse_holdstatus ⇒ String?
Pasrse holdstatus.
-
#parse_ics ⇒ Array<RelatonBib::ICS>
Parse ICS.
-
#parse_keyword ⇒ Array<Strign>
Parse keyword.
-
#parse_link ⇒ Array<RelatonBib::TypedUri>
Parce link.
-
#parse_pubstatus ⇒ String?
Parse pubstatus.
-
#parse_relation ⇒ RelatonBib::DocRelationCollection
Parse relation.
-
#parse_standard_modified ⇒ String?
Parse standard modifier.
-
#parse_standard_status ⇒ String?
Parse standard status.
-
#parse_title ⇒ Array<RelatonBib::TypedTitleString>
Parse title.
-
#pubid ⇒ RelatonIeee::RawbibIdParser
Create PubID.
Constructor Details
#initialize(doc, fetcher) ⇒ DataParser
Create RelatonIeee::DataParser instance
19 20 21 22 |
# File 'lib/relaton_ieee/data_parser.rb', line 19 def initialize(doc, fetcher) @doc = doc @fetcher = fetcher end |
Instance Attribute Details
#doc ⇒ Object (readonly)
Returns the value of attribute doc.
11 12 13 |
# File 'lib/relaton_ieee/data_parser.rb', line 11 def doc @doc end |
#fetcher ⇒ Object (readonly)
Returns the value of attribute fetcher.
11 12 13 |
# File 'lib/relaton_ieee/data_parser.rb', line 11 def fetcher @fetcher end |
Class Method Details
.parse(doc, fetcher) ⇒ RelatonIeee::IeeeBibliographicItem
Parse IEEE document
32 33 34 |
# File 'lib/relaton_ieee/data_parser.rb', line 32 def self.parse(doc, fetcher) new(doc, fetcher).parse end |
Instance Method Details
#create_org(name, addr = []) ⇒ RelatonBib::Organization
Create organization
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
# File 'lib/relaton_ieee/data_parser.rb', line 182 def create_org(name, addr = []) # rubocop:disable Metrics/MethodLength case name when "IEEE" abbr = name n = "Institute of Electrical and Electronics Engineers" url = "http://www.ieee.org" when "ANSI" abbr = name n = "American National Standards Institute" url = "https://www.ansi.org" else n = name end RelatonBib::Organization.new( name: n, abbreviation: abbr, url: url, contact: addr, ) end |
#docnumber ⇒ String
Parse docnumber
143 144 145 |
# File 'lib/relaton_ieee/data_parser.rb', line 143 def docnumber @docnumber ||= pubid&.to_id # doc.at("./publicationinfo/stdnumber").text end |
#parse ⇒ RelatonIeee::IeeeBibliographicItem
Parse IEEE document
41 42 43 44 45 |
# File 'lib/relaton_ieee/data_parser.rb', line 41 def parse args = { type: "standard", language: ["en"], script: ["Latn"] } ATTRS.each { |attr| args[attr] = send("parse_#{attr}") } IeeeBibliographicItem.new(**args) end |
#parse_abstract ⇒ Array<RelatonBib::FormattedString>
Parse abstract
204 205 206 207 208 209 210 |
# File 'lib/relaton_ieee/data_parser.rb', line 204 def parse_abstract doc.xpath("./volume/article/articleinfo/abstract")[0...1].map do |a| RelatonBib::FormattedString.new( content: a.text, language: "en", script: "Latn", ) end end |
#parse_contributor ⇒ Array<RelatonBib::ContributionInfo>
Parse contributors
152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/relaton_ieee/data_parser.rb', line 152 def parse_contributor # rubocop:disable Metrics/AbcSize,Metrics/MethodLength doc.xpath("./publicationinfo/publisher").map do |contrib| n = contrib.at("./publishername").text addr = contrib.xpath("./address").each_with_object([]) do |adr, ob| city, country, state = parse_country_city adr next unless city && country ob << RelatonBib::Address.new(street: [], city: city, state: state, country: country) end e = create_org n, addr RelatonBib::ContributionInfo.new entity: e, role: [type: "publisher"] end end |
#parse_copyright ⇒ Array<RelatonBib::CopyrightAssociation>
Parse copyright
217 218 219 220 221 222 223 224 225 226 |
# File 'lib/relaton_ieee/data_parser.rb', line 217 def parse_copyright doc.xpath("./publicationinfo/copyrightgroup/copyright").map do |c| owner = c.at("./holder").text.split("/").map do |own| RelatonBib::ContributionInfo.new entity: create_org(own) end RelatonBib::CopyrightAssociation.new( owner: owner, from: c.at("./year").text, ) end end |
#parse_country_city(address) ⇒ Object
166 167 168 169 170 171 172 173 |
# File 'lib/relaton_ieee/data_parser.rb', line 166 def parse_country_city(address) city = address.at("./city") return unless city city, state = city.text.split(", ") country = address.at("./country")&.text || "USA" [city, country, state] end |
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
# File 'lib/relaton_ieee/data_parser.rb', line 66 def parse_date # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity dates = doc.xpath("./volume/article/articleinfo/date").map do |d| da = [d.at("./year").text] m = d.at("./month")&.text if m /^(?:(?<day>\d{1,2})\s)?(?<mon>\w+)/ =~ m month = Date::ABBR_MONTHNAMES.index(mon) || Date::MONTHNAMES.index(mon) || m da << month.to_s.rjust(2, "0") end day = d.at("./day")&.text || day da << day.rjust(2, "0") if day on = da.compact.join "-" RelatonBib::BibliographicDate.new type: DATETYPES[d[:datetype]], on: on end pad = doc.at("./publicationinfo/PubApprovalDate") if pad issued = parse_date_string pad.text dates << RelatonBib::BibliographicDate.new(type: "issued", on: issued) end dates end |
#parse_date_string(date) ⇒ String
Convert date string with month name to numeric date
95 96 97 98 99 100 |
# File 'lib/relaton_ieee/data_parser.rb', line 95 def parse_date_string(date) case date when /^\d{4}$/ then date when /^\d{1,2}\s\w+\.?\s\d{4}/ then Date.parse(date).to_s end end |
#parse_docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse identifiers
107 108 109 110 111 112 113 114 115 116 117 118 119 |
# File 'lib/relaton_ieee/data_parser.rb', line 107 def parse_docid # rubocop:disable Metrics/MethodLength ids = [ { id: pubid.to_s, type: "IEEE", primary: true }, { id: pubid.to_s(trademark: true), scope: "trademark", type: "IEEE", primary: true }, ] isbn = doc.at("./publicationinfo/isbn") ids << { id: isbn.text, type: "ISBN" } if isbn doi = doc.at("./volume/article/articleinfo/articledoi") ids << { id: doi.text, type: "DOI" } if doi ids.map do |dcid| RelatonBib::DocumentIdentifier.new(**dcid) end end |
#parse_docnumber ⇒ Object
134 135 136 |
# File 'lib/relaton_ieee/data_parser.rb', line 134 def parse_docnumber docnumber end |
#parse_docstatus ⇒ RelatonIee::DocumentStatus?
Parse status
233 234 235 236 237 238 |
# File 'lib/relaton_ieee/data_parser.rb', line 233 def parse_docstatus st = parse_standard_modified return unless %w[Draft Approved Superseded Withdrawn].include?(st) DocumentStatus.new stage: st.downcase end |
#parse_doctype ⇒ String
Parse doctype
345 346 347 348 |
# File 'lib/relaton_ieee/data_parser.rb', line 345 def parse_doctype type = parse_standard_modified == "Redline" ? "redline" : "standard" DocumentType.new type: type end |
#parse_editorialgroup ⇒ RelatonIeee::EditorialGroup?
Parse editorialgroup
297 298 299 300 301 302 |
# File 'lib/relaton_ieee/data_parser.rb', line 297 def parse_editorialgroup committee = doc.xpath( "./publicationinfo/pubsponsoringcommitteeset/pubsponsoringcommittee", ).map &:text EditorialGroup.new committee: committee if committee.any? end |
#parse_holdstatus ⇒ String?
Pasrse holdstatus
336 337 338 |
# File 'lib/relaton_ieee/data_parser.rb', line 336 def parse_holdstatus doc.at("./publicationinfo/holdstatus")&.text end |
#parse_ics ⇒ Array<RelatonBib::ICS>
Parse ICS
286 287 288 289 290 |
# File 'lib/relaton_ieee/data_parser.rb', line 286 def parse_ics doc.xpath("./publicationinfo/icscodes/code_term").map do |ics| RelatonBib::ICS.new code: ics[:codenum], text: ics.text end end |
#parse_keyword ⇒ Array<Strign>
Parse keyword
275 276 277 278 279 |
# File 'lib/relaton_ieee/data_parser.rb', line 275 def parse_keyword doc.xpath( "./volume/article/articleinfo/keywordset/keyword/keywordterm", ).map &:text end |
#parse_link ⇒ Array<RelatonBib::TypedUri>
Parce link
263 264 265 266 267 268 |
# File 'lib/relaton_ieee/data_parser.rb', line 263 def parse_link doc.xpath("./volume/article/articleinfo/amsid").map do |id| l = "https://ieeexplore.ieee.org/document/#{id.text}" RelatonBib::TypedUri.new content: l, type: "src" end end |
#parse_pubstatus ⇒ String?
Parse pubstatus
327 328 329 |
# File 'lib/relaton_ieee/data_parser.rb', line 327 def parse_pubstatus doc.at("./publicationinfo/pubstatus")&.text end |
#parse_relation ⇒ RelatonBib::DocRelationCollection
Parse relation
245 246 247 248 249 250 251 252 253 254 255 256 |
# File 'lib/relaton_ieee/data_parser.rb', line 245 def parse_relation # rubocop:disable Metrics/AbcSize rels = [] doc.xpath("./publicationinfo/standard_relationship").each do |r| if (ref = fetcher.backrefs[r.text]) rel = fetcher.create_relation(r[:type], ref) rels << rel if rel elsif !"Inactive Date".include?(r) && docnumber fetcher.add_crossref(docnumber, r) end end RelatonBib::DocRelationCollection.new rels end |
#parse_standard_modified ⇒ String?
Parse standard modifier
318 319 320 |
# File 'lib/relaton_ieee/data_parser.rb', line 318 def parse_standard_modified doc.at("./publicationinfo/standardmodifierset/standard_modifier")&.text end |
#parse_standard_status ⇒ String?
Parse standard status
309 310 311 |
# File 'lib/relaton_ieee/data_parser.rb', line 309 def parse_standard_status doc.at("./publicationinfo/standard_status")&.text end |
#parse_title ⇒ Array<RelatonBib::TypedTitleString>
Parse title
52 53 54 55 56 57 58 59 |
# File 'lib/relaton_ieee/data_parser.rb', line 52 def parse_title t = [] content = CGI.unescapeHTML doc.at("./volume/article/title").text if content =~ /\A(.+)\s[-\u2014]\sredline\z/i t << RelatonBib::TypedTitleString.new(content: $1, type: "title-main") end t << RelatonBib::TypedTitleString.new(content: content, type: "main") end |
#pubid ⇒ RelatonIeee::RawbibIdParser
Create PubID
126 127 128 129 130 131 132 |
# File 'lib/relaton_ieee/data_parser.rb', line 126 def pubid @pubid ||= begin normtitle = doc.at("./normtitle").text stdnumber = doc.at("./publicationinfo/stdnumber")&.text RawbibIdParser.parse(normtitle, stdnumber) end end |