Module: RelatonOasis::DataParserUtils

Included in:
DataParser, DataPartParser
Defined in:
lib/relaton_oasis/data_parser_utils.rb

Overview

Common methods for document and part parsers.

Instance Method Summary collapse

Instance Method Details

#affiliation(org) ⇒ Object



99
100
101
102
103
104
105
106
# File 'lib/relaton_oasis/data_parser_utils.rb', line 99

def affiliation(org)
  return [] unless org

  cnt = RelatonBib::Contact.new(type: "uri", value: org[:href])
  org_name = org.text.gsub(/[\r\n]+/, " ")
  organization = RelatonBib::Organization.new name: org_name, contact: [cnt]
  [RelatonBib::Affiliation.new(organization: organization)]
end

#contact(email) ⇒ Object



93
94
95
96
97
# File 'lib/relaton_oasis/data_parser_utils.rb', line 93

def contact(email)
  return [] unless email

  [RelatonBib::Contact.new(type: "email", value: email[:href].split(":")[1])]
end

#create_contribution_info(person, type, description = []) ⇒ Object



77
78
79
80
81
82
83
# File 'lib/relaton_oasis/data_parser_utils.rb', line 77

def create_contribution_info(person, type, description = [])
  name = person.text.match(/^[^(]+/).to_s.strip
  email, org = person.xpath ".//a[@href]"
  entity = create_person name, email, org
  role = { type: type, description: description }
  RelatonBib::ContributionInfo.new(role: [role], entity: entity)
end

#create_person(name, email = nil, org = nil) ⇒ Object



85
86
87
88
89
90
91
# File 'lib/relaton_oasis/data_parser_utils.rb', line 85

def create_person(name, email = nil, org = nil)
  forename, surname = name.split
  fn = RelatonBib::Forename.new(content: forename, language: ["en"], script: ["Latn"])
  sn = RelatonBib::LocalizedString.new(surname, "en", "Latn")
  name = RelatonBib::FullName.new(surname: sn, forename: [fn])
  RelatonBib::Person.new(name: name, contact: contact(email), affiliation: affiliation(org))
end

#pageObject



31
32
33
34
35
36
37
38
39
40
# File 'lib/relaton_oasis/data_parser_utils.rb', line 31

def page
  return @page if defined? @page

  if link_node && link_node[:href].match?(/\.html$/)
    agent = Mechanize.new
    agent.agent.allowed_error_codes = [404]
    resp = retry_page(link_node[:href], agent)
    @page = resp if resp && resp.code == "200"
  end
end

#parse_chairsObject



60
61
62
63
64
65
66
# File 'lib/relaton_oasis/data_parser_utils.rb', line 60

def parse_chairs
  return [] unless page

  page.xpath(
    "//p[preceding-sibling::p[starts-with(., 'Chair')]][following-sibling::p[starts-with(., 'Editor')]]",
  ).map { |p| create_contribution_info(p, "editor", ["Chair"]) }
end

#parse_contributorArray<RelatonBib::ContributionInfo>

Parse contributor.

Returns:

  • (Array<RelatonBib::ContributionInfo>)

    contributors



9
10
11
# File 'lib/relaton_oasis/data_parser_utils.rb', line 9

def parse_contributor
  publisher_oasis + parse_authorizer + parse_chairs + parse_editors
end

#parse_docidArray<RelatonBib::DocumentIdentifier>

Parse document identifier.

Returns:

  • (Array<RelatonBib::DocumentIdentifier>)

    document identifier



161
162
163
164
# File 'lib/relaton_oasis/data_parser_utils.rb', line 161

def parse_docid
  id = "OASIS #{parse_docnumber}"
  [RelatonBib::DocumentIdentifier.new(type: "OASIS", id: id, primary: true)]
end

#parse_doctypeRelatonOasis::DocumentType

Parse document type.

Returns:



171
172
173
174
175
176
177
178
179
180
# File 'lib/relaton_oasis/data_parser_utils.rb', line 171

def parse_doctype
  type =  case text
          when /OASIS Project Specification/, /Committee Specification/
            "specification"
          when /Technical Memorandum/ then "memorandum"
          when /Technical Resolution/ then "resolution"
          else "standard"
          end
  DocumentType.new(type: type)
end

#parse_editorsObject



68
69
70
71
72
73
74
75
# File 'lib/relaton_oasis/data_parser_utils.rb', line 68

def parse_editors
  return parse_editors_from_text unless page

  page.xpath(
    "//p[contains(@class, 'Contributor')][preceding-sibling::p[starts-with(., 'Editor')]]" \
    "[following-sibling::p[contains(@class, 'Title')]]",
  ).map { |p| create_contribution_info(p, "editor") }
end

#parse_editors_from_textObject



23
24
25
26
27
28
29
# File 'lib/relaton_oasis/data_parser_utils.rb', line 23

def parse_editors_from_text
  return [] unless text

  text.match(/(?<=Edited\sby\s)[^.]+/).to_s.split(/,?\sand\s|,\s/).map do |c|
    RelatonBib::ContributionInfo.new(role: [type: "editor"], entity: create_person(c))
  end
end

#parse_errata(id) ⇒ String

Parse document identifier errata.

Parameters:

  • id (String)

    document identifier

Returns:

  • (String)

    document identifier with errata if needed



146
147
148
149
150
151
152
153
154
# File 'lib/relaton_oasis/data_parser_utils.rb', line 146

def parse_errata(id)
  return id.sub("errata", "Errata") if id.match?(/errata\d+/i)

  case title
  when /Plus\sErrata\s(\d+)/ then "#{id}-plus-Errata#{$1}"
  when /Errata\s(\d+)/ then "#{id}-Errata#{$1}"
  else id
  end
end

#parse_part(docid) ⇒ <Type>

Parse document identifier part.

Parameters:

  • docid (<Type>)

    <description>

Returns:

  • (<Type>)

    <description>



130
131
132
133
134
135
136
137
# File 'lib/relaton_oasis/data_parser_utils.rb', line 130

def parse_part(docid)
  return docid if docid.match?(/(?:Part|Pt)\d+/i)

  case title
  when /Part\s(\d+)/ then "#{docid}-Pt#{$1}"
  else docid
  end
end

#parse_spec(num) ⇒ String

Parse document identifier specification.

Parameters:

  • num (String)

    document number

Returns:

  • (String)

    document identifier with specification if needed



115
116
117
118
119
120
121
# File 'lib/relaton_oasis/data_parser_utils.rb', line 115

def parse_spec(num)
  case text
  when /OASIS Project Specification (\d+)/ then "#{num}-PS#{$1}"
  when /Committee Specification (\d+)/ then "#{num}-CS#{$1}"
  else num
  end
end

#parse_technology_area(node) ⇒ Array<String>

Parse technology area.

Returns:

  • (Array<String>)

    technology areas



187
188
189
190
191
# File 'lib/relaton_oasis/data_parser_utils.rb', line 187

def parse_technology_area(node)
  node.xpath("./summary/div/div/ul[@class='technology-areas__list']/li/a").map do |ta|
    ta.text.strip.gsub(/\s/, "-").sub("development", "Development")
  end
end

#publisher_oasisObject



13
14
15
16
17
18
19
20
21
# File 'lib/relaton_oasis/data_parser_utils.rb', line 13

def publisher_oasis
  cnt = RelatonBib::Contact.new type: "uri", value: "https://www.oasis-open.org/"
  entity = RelatonBib::Organization.new name: "OASIS", contact: [cnt]
  role = [
    { type: "authorizer", description: ["Standards Development Organization"] },
    { type: "publisher" },
  ]
  [RelatonBib::ContributionInfo.new(entity: entity, role: role)]
end

#retry_page(url, agent, retries = 3) ⇒ Mechanize::Page?

Retry to get page.

Parameters:

  • url (String)

    page URL

  • agent (Mechanize)

    HTTP client

  • retries (Integer) (defaults to: 3)

    number of retries

Returns:

  • (Mechanize::Page, nil)

    page or nil



51
52
53
54
55
56
57
58
# File 'lib/relaton_oasis/data_parser_utils.rb', line 51

def retry_page(url, agent, retries = 3)
  sleep 1 # to avoid 429 error
  agent.get url
rescue Errno::ETIMEDOUT, Net::OpenTimeout => e
  retry if (retries -= 1).positive?
  Util.error "Failed to get page `#{url}`\n#{e.message}"
  nil
end