Class: RelatonIeee::DataParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_ieee/data_parser.rb

Constant Summary collapse

DATETYPES =
{ "OriginalPub" => "created", "ePub" => "published",
"LastInspecUpd" => "updated" }.freeze
ATTRS =
%i[
  docnumber title date docid contributor abstract copyright docstatus
  relation link keyword ics editorialgroup standard_status standard_modified
  pubstatus holdstatus doctype
].freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(doc, fetcher) ⇒ DataParser

Create RelatonIeee::DataParser instance

Parameters:



19
20
21
22
# File 'lib/relaton_ieee/data_parser.rb', line 19

def initialize(doc, fetcher)
  @doc = doc
  @fetcher = fetcher
end

Instance Attribute Details

#docObject (readonly)

Returns the value of attribute doc.



11
12
13
# File 'lib/relaton_ieee/data_parser.rb', line 11

def doc
  @doc
end

#fetcherObject (readonly)

Returns the value of attribute fetcher.



11
12
13
# File 'lib/relaton_ieee/data_parser.rb', line 11

def fetcher
  @fetcher
end

Class Method Details

.parse(doc, fetcher) ⇒ RelatonIeee::IeeeBibliographicItem

Parse IEEE document

Parameters:

Returns:



32
33
34
# File 'lib/relaton_ieee/data_parser.rb', line 32

def self.parse(doc, fetcher)
  new(doc, fetcher).parse
end

Instance Method Details

#create_org(name, addr = []) ⇒ RelatonBib::Organization

Create organization

Parameters:

  • name (String)

    organization’s name

  • addr (Array<Hash>) (defaults to: [])

    address

Returns:

  • (RelatonBib::Organization)


182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/relaton_ieee/data_parser.rb', line 182

def create_org(name, addr = []) # rubocop:disable Metrics/MethodLength
  case name
  when "IEEE"
    abbr = name
    n = "Institute of Electrical and Electronics Engineers"
    url = "http://www.ieee.org"
  when "ANSI"
    abbr = name
    n = "American National Standards Institute"
    url = "https://www.ansi.org"
  else n = name
  end
  RelatonBib::Organization.new(
    name: n, abbreviation: abbr, url: url, contact: addr,
  )
end

#docnumberString

Parse docnumber

Returns:

  • (String)

    PubID



143
144
145
# File 'lib/relaton_ieee/data_parser.rb', line 143

def docnumber
  @docnumber ||= pubid&.to_id # doc.at("./publicationinfo/stdnumber").text
end

#parseRelatonIeee::IeeeBibliographicItem

Parse IEEE document



41
42
43
44
45
# File 'lib/relaton_ieee/data_parser.rb', line 41

def parse
  args = { type: "standard", language: ["en"], script: ["Latn"] }
  ATTRS.each { |attr| args[attr] = send("parse_#{attr}") }
  IeeeBibliographicItem.new(**args)
end

#parse_abstractArray<RelatonBib::FormattedString>

Parse abstract

Returns:

  • (Array<RelatonBib::FormattedString>)


204
205
206
207
208
209
210
# File 'lib/relaton_ieee/data_parser.rb', line 204

def parse_abstract
  doc.xpath("./volume/article/articleinfo/abstract")[0...1].map do |a|
    RelatonBib::FormattedString.new(
      content: a.text, language: "en", script: "Latn",
    )
  end
end

#parse_contributorArray<RelatonBib::ContributionInfo>

Parse contributors

Returns:

  • (Array<RelatonBib::ContributionInfo>)


152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/relaton_ieee/data_parser.rb', line 152

def parse_contributor # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  doc.xpath("./publicationinfo/publisher").map do |contrib|
    n = contrib.at("./publishername").text
    addr = contrib.xpath("./address").each_with_object([]) do |adr, ob|
      city, country, state = parse_country_city adr
      next unless city && country

      ob << RelatonBib::Address.new(street: [], city: city, state: state, country: country)
    end
    e = create_org n, addr
    RelatonBib::ContributionInfo.new entity: e, role: [type: "publisher"]
  end
end

Parse copyright

Returns:

  • (Array<RelatonBib::CopyrightAssociation>)


217
218
219
220
221
222
223
224
225
226
# File 'lib/relaton_ieee/data_parser.rb', line 217

def parse_copyright
  doc.xpath("./publicationinfo/copyrightgroup/copyright").map do |c|
    owner = c.at("./holder").text.split("/").map do |own|
      RelatonBib::ContributionInfo.new entity: create_org(own)
    end
    RelatonBib::CopyrightAssociation.new(
      owner: owner, from: c.at("./year").text,
    )
  end
end

#parse_country_city(address) ⇒ Object



166
167
168
169
170
171
172
173
# File 'lib/relaton_ieee/data_parser.rb', line 166

def parse_country_city(address)
  city = address.at("./city")
  return unless city

  city, state = city.text.split(", ")
  country = address.at("./country")&.text || "USA"
  [city, country, state]
end

#parse_dateArray<RelatonBib::BibliographicDate>

Parse date

Returns:

  • (Array<RelatonBib::BibliographicDate>)


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/relaton_ieee/data_parser.rb', line 66

def parse_date # rubocop:disable Metrics/AbcSize,Metrics/CyclomaticComplexity,Metrics/MethodLength,Metrics/PerceivedComplexity
  dates = doc.xpath("./volume/article/articleinfo/date").map do |d|
    da = [d.at("./year").text]
    m = d.at("./month")&.text
    if m
      /^(?:(?<day>\d{1,2})\s)?(?<mon>\w+)/ =~ m
      month = Date::ABBR_MONTHNAMES.index(mon) || m
      da << month.to_s.rjust(2, "0")
    end
    day = d.at("./day")&.text || day
    da << day.rjust(2, "0") if day
    on = da.compact.join "-"
    RelatonBib::BibliographicDate.new type: DATETYPES[d[:datetype]], on: on
  end
  pad = doc.at("./publicationinfo/PubApprovalDate")
  if pad
    issued = parse_date_string pad.text
    dates << RelatonBib::BibliographicDate.new(type: "issued", on: issued)
  end
  dates
end

#parse_date_string(date) ⇒ String

Convert date string with month name to numeric date

Parameters:

  • date (String)

    source date

Returns:

  • (String)

    numeric date



95
96
97
98
99
100
# File 'lib/relaton_ieee/data_parser.rb', line 95

def parse_date_string(date)
  case date
  when /^\d{4}$/ then date
  when /^\d{1,2}\s\w+\.?\s\d{4}/ then Date.parse(date).to_s
  end
end

#parse_docidArray<RelatonBib::DocumentIdentifier>

Parse identifiers

Returns:

  • (Array<RelatonBib::DocumentIdentifier>)


107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/relaton_ieee/data_parser.rb', line 107

def parse_docid # rubocop:disable Metrics/MethodLength
  ids = [
    { id: pubid.to_s, type: "IEEE", primary: true },
    { id: pubid.to_s(trademark: true), scope: "trademark", type: "IEEE", primary: true },
  ]
  isbn = doc.at("./publicationinfo/isbn")
  ids << { id: isbn.text, type: "ISBN" } if isbn
  doi = doc.at("./volume/article/articleinfo/articledoi")
  ids << { id: doi.text, type: "DOI" } if doi
  ids.map do |dcid|
    RelatonBib::DocumentIdentifier.new(**dcid)
  end
end

#parse_docnumberObject



134
135
136
# File 'lib/relaton_ieee/data_parser.rb', line 134

def parse_docnumber
  docnumber
end

#parse_docstatusRelatonIee::DocumentStatus?

Parse status

Returns:

  • (RelatonIee::DocumentStatus, nil)


233
234
235
236
237
238
# File 'lib/relaton_ieee/data_parser.rb', line 233

def parse_docstatus
  st = parse_standard_modified
  return unless %w[Draft Approved Superseded Withdrawn].include?(st)

  DocumentStatus.new stage: st.downcase
end

#parse_doctypeString

Parse doctype

Returns:

  • (String)

    doctype



345
346
347
348
# File 'lib/relaton_ieee/data_parser.rb', line 345

def parse_doctype
  type = parse_standard_modified == "Redline" ? "redline" : "standard"
  DocumentType.new type: type
end

#parse_editorialgroupRelatonIeee::EditorialGroup?

Parse editorialgroup

Returns:



297
298
299
300
301
302
# File 'lib/relaton_ieee/data_parser.rb', line 297

def parse_editorialgroup
  committee = doc.xpath(
    "./publicationinfo/pubsponsoringcommitteeset/pubsponsoringcommittee",
  ).map &:text
  EditorialGroup.new committee: committee if committee.any?
end

#parse_holdstatusString?

Pasrse holdstatus

Returns:

  • (String, nil)

    holdstatus or nil



336
337
338
# File 'lib/relaton_ieee/data_parser.rb', line 336

def parse_holdstatus
  doc.at("./publicationinfo/holdstatus")&.text
end

#parse_icsArray<RelatonBib::ICS>

Parse ICS

Returns:

  • (Array<RelatonBib::ICS>)


286
287
288
289
290
# File 'lib/relaton_ieee/data_parser.rb', line 286

def parse_ics
  doc.xpath("./publicationinfo/icscodes/code_term").map do |ics|
    RelatonBib::ICS.new code: ics[:codenum], text: ics.text
  end
end

#parse_keywordArray<Strign>

Parse keyword

Returns:

  • (Array<Strign>)


275
276
277
278
279
# File 'lib/relaton_ieee/data_parser.rb', line 275

def parse_keyword
  doc.xpath(
    "./volume/article/articleinfo/keywordset/keyword/keywordterm",
  ).map &:text
end

Parce link

Returns:

  • (Array<RelatonBib::TypedUri>)


263
264
265
266
267
268
# File 'lib/relaton_ieee/data_parser.rb', line 263

def parse_link
  doc.xpath("./volume/article/articleinfo/amsid").map do |id|
    l = "https://ieeexplore.ieee.org/document/#{id.text}"
    RelatonBib::TypedUri.new content: l, type: "src"
  end
end

#parse_pubstatusString?

Parse pubstatus

Returns:

  • (String, nil)

    pubstatus or nil



327
328
329
# File 'lib/relaton_ieee/data_parser.rb', line 327

def parse_pubstatus
  doc.at("./publicationinfo/pubstatus")&.text
end

#parse_relationRelatonBib::DocRelationCollection

Parse relation

Returns:

  • (RelatonBib::DocRelationCollection)


245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/relaton_ieee/data_parser.rb', line 245

def parse_relation # rubocop:disable Metrics/AbcSize
  rels = []
  doc.xpath("./publicationinfo/standard_relationship").each do |r|
    if (ref = fetcher.backrefs[r.text])
      rel = fetcher.create_relation(r[:type], ref)
      rels << rel if rel
    elsif !"Inactive Date".include?(r) && docnumber
      fetcher.add_crossref(docnumber, r)
    end
  end
  RelatonBib::DocRelationCollection.new rels
end

#parse_standard_modifiedString?

Parse standard modifier

Returns:

  • (String, nil)

    standard modifier or nil



318
319
320
# File 'lib/relaton_ieee/data_parser.rb', line 318

def parse_standard_modified
  doc.at("./publicationinfo/standardmodifierset/standard_modifier")&.text
end

#parse_standard_statusString?

Parse standard status

Returns:

  • (String, nil)

    standard status or nil



309
310
311
# File 'lib/relaton_ieee/data_parser.rb', line 309

def parse_standard_status
  doc.at("./publicationinfo/standard_status")&.text
end

#parse_titleArray<RelatonBib::TypedTitleString>

Parse title

Returns:

  • (Array<RelatonBib::TypedTitleString>)


52
53
54
55
56
57
58
59
# File 'lib/relaton_ieee/data_parser.rb', line 52

def parse_title
  t = []
  content = CGI.unescapeHTML doc.at("./volume/article/title").text
  if content =~ /\A(.+)\s[-\u2014]\sredline\z/i
    t << RelatonBib::TypedTitleString.new(content: $1, type: "title-main")
  end
  t << RelatonBib::TypedTitleString.new(content: content, type: "main")
end

#pubidRelatonIeee::RawbibIdParser

Create PubID

Returns:



126
127
128
129
130
131
132
# File 'lib/relaton_ieee/data_parser.rb', line 126

def pubid
  @pubid ||= begin
    normtitle = doc.at("./normtitle").text
    stdnumber = doc.at("./publicationinfo/stdnumber")&.text
    RawbibIdParser.parse(normtitle, stdnumber)
  end
end