Class: RelatonIec::DataParser

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_iec/data_parser.rb

Constant Summary collapse

DOMAIN =
"https://webstore.iec.ch"
ATTRS =
%i[
  docid structuredidentifier language script title doctype
  ics date contributor editorialgroup abstract copyright link relation
].freeze
ABBREVS =
{
  "ISO" => ["International Organization for Standardization", "www.iso.org"],
  "IEC" => ["International Electrotechnical Commission", "www.iec.ch"],
  "IEEE" => ["Institute of Electrical and Electronics Engineers", "www.ieee.org"],
  "ASTM" => ["American Society of Testing Materials", "www.astm.org"],
  "CISPR" => ["International special committee on radio interference", "www.iec.ch"],
}.freeze
DOCTYPES =
{
  "IS" => "international-standard",
  "TR" => "technical-report",
  "TS" => "technical-specification",
  "PAS" => "publicly-available-specification",
  "SRD" => "system-reference-deliverable",
}

Instance Method Summary collapse

Constructor Details

#initialize(pub) ⇒ DataParser

Initialize new instance.

Parameters:

  • pub (Hash)

    document data



31
32
33
# File 'lib/relaton_iec/data_parser.rb', line 31

def initialize(pub)
  @pub = pub
end

Instance Method Details

#abstractArray<RelatonBib::FormattedString>

Parse abstract.

Returns:

  • (Array<RelatonBib::FormattedString>)

    abstract



146
147
148
149
150
151
152
153
# File 'lib/relaton_iec/data_parser.rb', line 146

def abstract
  @pub["abstract"]&.map do |a|
    RelatonBib::FormattedString.new(
      content: a["content"], language: a["lang"], script: lang_to_script(a["lang"]),
      format: a["format"]
    )
  end
end

#contributorArray<Hash>

Parse contributors.

Returns:

  • (Array<Hash>)

    contributors



205
206
207
208
209
210
211
# File 'lib/relaton_iec/data_parser.rb', line 205

def contributor
  @pub["reference"].sub(/\s.*/, "").split("/").map do |abbrev|
    name, url = ABBREVS[abbrev]
    { entity: { name: name, url: url, abbreviation: abbrev },
      role: [type: "publisher"] }
  end
end

Returns:

  • (Array<Hash>)


156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/relaton_iec/data_parser.rb', line 156

def copyright # rubocop:disable Metrics/AbcSize
  from = @pub["reference"].match(/(?<=:)\d{4}/).to_s
  from = @pub["releaseDate"]&.match(/\d{4}/).to_s if from.empty?
  return [] if from.nil? || from.empty?

  abbreviation = @pub["reference"].match(/.*?(?=\s)/).to_s
  owner = abbreviation.split("/").map do |abbrev|
    name, url = ABBREVS[abbrev]
    { name: name, abbreviation: abbrev, url: url }
  end
  [{ owner: owner, from: from }]
end

#create_relations(doc) ⇒ Array<Hash>

Create relations.

Parameters:

  • doc (Nokogiri::XML::Document)

    XML document

Returns:

  • (Array<Hash>)

    relations



274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/relaton_iec/data_parser.rb', line 274

def create_relations(doc) # rubocop:disable Metrics/MethodLength
  doc.xpath('//ROW[STATUS[.!="PREPARING" and .!="PUBLISHED"]]')
    .map do |r|
    r_type = r.at("STATUS").text.downcase
    type = case r_type
           when "revised", "replaced" then "updates"
           when "withdrawn" then "obsoletes"
           else r_type
           end
    ref = r.at("FULL_NAME").text
    fref = RelatonBib::FormattedRef.new content: ref, format: "text/plain"
    docid = RelatonBib::DocumentIdentifier.new(id: ref, type: "IEC", primary: true)
    bibitem = IecBibliographicItem.new(formattedref: fref, docid: [docid])
    RelatonBib::DocumentRelation.new type: type, bibitem: bibitem
  end
end

#dateArray<RelatonBib::BibliographicDate>

Parse dates.

Returns:

  • (Array<RelatonBib::BibliographicDate>)

    dates



187
188
189
190
191
192
193
194
195
196
197
198
# File 'lib/relaton_iec/data_parser.rb', line 187

def date
  {
    "published" => "publicationDate",
    "stable-until" => "stabilityDate",
    "confirmed" => "confirmationDate",
    "obsoleted" => "dateOfWithdrawal",
  }.reduce([]) do |a, (k, v)|
    next a unless @pub[v]

    a << RelatonBib::BibliographicDate.new(type: k, on: @pub[v])
  end
end

#docidArray<RelatonBib::DocumentIdentifier>

Parse document identifiers.

Returns:

  • (Array<RelatonBib::DocumentIdentifier>)

    document identifiers



54
55
56
57
58
59
# File 'lib/relaton_iec/data_parser.rb', line 54

def docid
  ids = []
  ids << RelatonBib::DocumentIdentifier.new(id: @pub["reference"], type: "IEC", primary: true)
  urnid = "urn:#{@pub['urnAlt'][0]}"
  ids << RelatonBib::DocumentIdentifier.new(id: urnid, type: "URN")
end

#doctypeString

Parse document type.

Returns:

  • (String)

    document type



243
244
245
246
# File 'lib/relaton_iec/data_parser.rb', line 243

def doctype
  type = DOCTYPES[@pub["stdType"]] || @pub["stdType"].downcase
  DocumentType.new type: type
end

#editorialgroupHash

Parse editorial group.

Returns:

  • (Hash)

    editorial group



128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/relaton_iec/data_parser.rb', line 128

def editorialgroup
  return unless @pub["committee"]

  wg = @pub["committee"]["reference"]
  {
    technical_committee: [{
      name: wg,
      type: "technicalCommittee",
      number: wg.match(/\d+/)&.to_s&.to_i,
    }],
  }
end

#icsArray<RelatonIsoBib::Ics>

Fetche ics.

Returns:

  • (Array<RelatonIsoBib::Ics>)

    ics



174
175
176
177
178
179
180
# File 'lib/relaton_iec/data_parser.rb', line 174

def ics
  return [] unless @pub["classifications"]

  @pub["classifications"].select { |c| c["type"] == "ICS" }.map do |c|
    RelatonIsoBib::Ics.new(c["value"])
  end
end

#lang_to_script(lang) ⇒ String

Detect script.

Parameters:

  • lang (String)

    language

Returns:

  • (String)

    script



104
105
106
107
108
# File 'lib/relaton_iec/data_parser.rb', line 104

def lang_to_script(lang)
  case lang
  when "en", "fr", "es" then "Latn"
  end
end

#languageArray<String>

Parse languages.

Returns:

  • (Array<String>)

    languages



81
82
83
# File 'lib/relaton_iec/data_parser.rb', line 81

def language
  @pub["title"].map { |t| t["lang"] }.uniq
end

Parse links.

Returns:

  • (Array<RelatonBib::TypedUri>)

    links



218
219
220
221
222
223
224
225
226
227
# File 'lib/relaton_iec/data_parser.rb', line 218

def link
  url = "#{DOMAIN}/publication/#{urn_id}"
  l = [RelatonBib::TypedUri.new(content: url, type: "src")]
  RelatonBib.array(@pub["releaseItems"]).each_with_object(l) do |r, a|
    next unless r["type"] == "PREVIEW"

    url = "#{DOMAIN}/preview/#{r['contentRef']['fileName']}"
    a << RelatonBib::TypedUri.new(content: url, type: "obp")
  end
end

#parseRelatonIec::IecBibliographicItem

Parse document.

Returns:



40
41
42
43
44
45
46
47
# File 'lib/relaton_iec/data_parser.rb', line 40

def parse # rubocop:disable Metrics/AbcSize
  args = ATTRS.each_with_object({}) { |a, h| h[a] = send a }
  args[:docstatus] = RelatonBib::DocumentStatus.new stage: @pub["status"]
  args[:edition] = @pub["edition"]
  args[:price_code] = @pub["priceInfo"]["priceCode"]
  args[:place] = ["Geneva"]
  IecBibliographicItem.new(**args)
end

#relationArray<RelatonBib::DocumentRelation>

Parse relation.

Returns:

  • (Array<RelatonBib::DocumentRelation>)

    relation



253
254
255
256
257
258
259
260
261
262
263
264
265
# File 'lib/relaton_iec/data_parser.rb', line 253

def relation # rubocop:disable Metrics/MethodLength
  try = 0
  begin
    uri = URI "#{DOMAIN}/webstore/webstore.nsf/AjaxRequestXML?" \
              "Openagent&url=#{urn_id}"
    resp = Net::HTTP.get_response uri
    doc = Nokogiri::XML resp.body
    create_relations doc
  rescue StandardError => e
    try += 1
    try < 3 ? retry : raise(e)
  end
end

#scriptArray<String>

Parse scripts.

Returns:

  • (Array<String>)

    scripts



90
91
92
93
94
95
# File 'lib/relaton_iec/data_parser.rb', line 90

def script
  language.each_with_object([]) do |l, s|
    scr = lang_to_script l
    s << scr if scr && !s.include?(scr)
  end
end

#structuredidentifierRelatonIsoBib::StructuredIdentifier

Parse structured identifier.

Returns:

  • (RelatonIsoBib::StructuredIdentifier)

    structured identifier



66
67
68
69
70
71
72
73
74
# File 'lib/relaton_iec/data_parser.rb', line 66

def structuredidentifier
  m = @pub["reference"].match(
    /(?<=\s)(?<project>\w+)(?:-(?<part>\w*)(?:-(?<subpart>\w*))?)?/,
  )
  RelatonIsoBib::StructuredIdentifier.new(
    project_number: m[:project], part: m[:part], subpart: m[:subpart],
    type: "IEC", id: @pub["reference"]
  )
end

#titleRelatonBib::TypedTitleStringCollection

Parse titles.

Returns:

  • (RelatonBib::TypedTitleStringCollection)

    titles



115
116
117
118
119
120
121
# File 'lib/relaton_iec/data_parser.rb', line 115

def title
  @pub["title"].reduce(RelatonBib::TypedTitleStringCollection.new) do |a, t|
    a + RelatonBib::TypedTitleString.from_string(
      t["value"], t["lang"], lang_to_script(t["lang"])
    )
  end
end

#urn_idString

Extract URN ID from URN.

Returns:

  • (String)

    URN ID



234
235
236
# File 'lib/relaton_iec/data_parser.rb', line 234

def urn_id
  @pub["urn"].split(":").last
end