Class: RelatonOasis::DataPartParser

Inherits:
Object
  • Object
show all
Includes:
DataParserUtils
Defined in:
lib/relaton_oasis/data_part_parser.rb

Overview

Parser for OASIS part document.

Instance Method Summary collapse

Methods included from DataParserUtils

#affiliation, #contact, #create_contribution_info, #create_person, #page, #parse_chairs, #parse_contributor, #parse_docid, #parse_doctype, #parse_editors, #parse_editors_from_text, #parse_errata, #parse_part, #parse_spec, #publisher_oasis, #retry_page

Constructor Details

#initialize(node) ⇒ DataPartParser

Initialize parser.

Parameters:

  • node (Nokogiri::HTML::Element)

    docment node



11
12
13
# File 'lib/relaton_oasis/data_part_parser.rb', line 11

def initialize(node)
  @node = node
end

Instance Method Details



146
147
148
# File 'lib/relaton_oasis/data_part_parser.rb', line 146

def link_node
  @link_node = @node.at("./a|./following-sibling::p[1]/a")
end

#parseRelatonOasis::OasisBibliographicItem

Parse document.

Returns:



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/relaton_oasis/data_part_parser.rb', line 42

def parse # rubocop:disable Metrics/MethodLength
  RelatonOasis::OasisBibliographicItem.new(
    type: "standard",
    doctype: parse_doctype,
    title: parse_title,
    docid: parse_docid,
    link: parse_link,
    docnumber: parse_docnumber,
    date: parse_date,
    abstract: parse_abstract,
    language: ["en"],
    script: ["Latn"],
    editorialgroup: parse_editorialgroup,
    relation: parse_relation,
    contributor: parse_contributor,
    technology_area: parse_technology_area,
  )
end

#parse_abstractObject



103
104
105
106
107
108
# File 'lib/relaton_oasis/data_part_parser.rb', line 103

def parse_abstract
  page.xpath("//p[preceding-sibling::p[starts-with(., 'Abstract')]][1]").map do |p|
    cnt = p.text.gsub(/[\r\n]+/, " ").strip
    RelatonBib::FormattedString.new(content: cnt, language: "en", script: "Latn")
  end
end

#parse_authorizerObject



135
136
137
138
139
140
141
142
143
144
# File 'lib/relaton_oasis/data_part_parser.rb', line 135

def parse_authorizer
  return [] unless page

  page.xpath("//p[preceding-sibling::p[starts-with(., 'Technical')]][1]//a").map do |a|
    cnt = RelatonBib::Contact.new(type: "uri", value: a[:href])
    org = RelatonBib::Organization.new name: a.text.gsub(/[\r\n]+/, " ").strip, contact: [cnt]
    role = { type: "authorizer", description: ["Committee"] }
    RelatonBib::ContributionInfo.new entity: org, role: [role]
  end
end

#parse_dateArray<RelatonBib::BibliographicDate>

Parse date.

Returns:

  • (Array<RelatonBib::BibliographicDate>)

    bibliographic dates



98
99
100
101
# File 'lib/relaton_oasis/data_part_parser.rb', line 98

def parse_date
  /(?<on>\d{1,2}\s\w+\s\d{4})/ =~ text
  [RelatonBib::BibliographicDate.new(on: Date.parse(on).to_s, type: "issued")]
end

#parse_docnumberString

Parse document number.

Returns:

  • (String)

    document number



75
76
77
78
79
80
81
82
# File 'lib/relaton_oasis/data_part_parser.rb', line 75

def parse_docnumber
  ref = @node.at("./span[@class='citationLabel']/strong|./strong|b/span")
  num = ref.text.match(/[^\[\]]+/).to_s
  id = parse_errata(num)
  # some part references need to be added by "Pt" to be distinguishable from root doc
  id += "-Pt" if %w[CMIS-v1.1 DocBook-5.0 XACML-V3.0 mqtt-v3.1.1 OData-JSON-Format-v4.0].include?(id)
  parse_part parse_spec id
end

#parse_editorialgroupRelatonBib::EditorialGroup

Parse technical committee.

Returns:

  • (RelatonBib::EditorialGroup)

    technical committee



115
116
117
118
119
120
121
# File 'lib/relaton_oasis/data_part_parser.rb', line 115

def parse_editorialgroup
  tc = page.xpath("//p[preceding-sibling::p[starts-with(., 'Technical')]][1]//a").map do |a|
    wg = RelatonBib::WorkGroup.new name: a.text.strip
    RelatonBib::TechnicalCommittee.new wg
  end
  RelatonBib::EditorialGroup.new tc
end

Parse link.

Returns:

  • (Array<RelatonBib::TypedTitleString>)

    link



89
90
91
# File 'lib/relaton_oasis/data_part_parser.rb', line 89

def parse_link
  [RelatonBib::TypedUri.new(type: "src", content: link_node[:href])]
end

#parse_relationArray<RelatonBib::DocumentRelation>

Parse relation.

Returns:

  • (Array<RelatonBib::DocumentRelation>)

    document relations



128
129
130
131
132
133
# File 'lib/relaton_oasis/data_part_parser.rb', line 128

def parse_relation
  parser = DataParser.new @node.at("./ancestor::details")
  fref = RelatonBib::FormattedRef.new(content: parser.parse_docid[0].id)
  bib = RelatonOasis::OasisBibliographicItem.new(formattedref: fref)
  [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: bib)]
end

#parse_technology_areaArray<String>

Parse technology area.

Returns:

  • (Array<String>)

    technology areas



155
156
157
# File 'lib/relaton_oasis/data_part_parser.rb', line 155

def parse_technology_area
  super @node.at("./ancestor::details")
end

#parse_titleArray<RelatonBib::TypedTitleString>

Pase title.

Returns:

  • (Array<RelatonBib::TypedTitleString>)

    title



66
67
68
# File 'lib/relaton_oasis/data_part_parser.rb', line 66

def parse_title
  [RelatonBib::TypedTitleString.new(type: "main", content: title, language: "en", script: "Latn")]
end

#textObject



15
16
17
18
19
20
21
22
23
24
25
# File 'lib/relaton_oasis/data_part_parser.rb', line 15

def text
  return @text if @text

  if @node.at("./strong/following-sibling::text()|./span[strong]/following-sibling::text()")
    @text = @node.xpath(
      "./strong/following-sibling::node()|./span[strong]/following-sibling::node()",
    ).text.strip
  else
    @text = @node.xpath("./following-sibling::p[1][em]").text.strip
  end
end

#titleObject



27
28
29
30
31
32
33
34
35
# File 'lib/relaton_oasis/data_part_parser.rb', line 27

def title
  return @title if @title

  t = @node.at("./span[@class='citationTitle' or @class='citeTitle']|./em|./i")
  @title = if t then t.text
           else
             text.match(/(?<content>.+)\s(?:Edited|\d{2}\s\w+\d{4})/)[:content]
           end.strip
end