Class: RelatonOasis::DataPartParser
- Inherits:
-
Object
- Object
- RelatonOasis::DataPartParser
- Includes:
- DataParserUtils
- Defined in:
- lib/relaton_oasis/data_part_parser.rb
Overview
Parser for OASIS part document.
Instance Method Summary collapse
-
#initialize(node) ⇒ DataPartParser
constructor
Initialize parser.
- #link_node ⇒ Object
-
#parse ⇒ RelatonOasis::OasisBibliographicItem
Parse document.
- #parse_abstract ⇒ Object
- #parse_authorizer ⇒ Object
-
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date.
-
#parse_docnumber ⇒ String
Parse document number.
-
#parse_editorialgroup ⇒ RelatonBib::EditorialGroup
Parse technical committee.
-
#parse_link ⇒ Array<RelatonBib::TypedTitleString>
Parse link.
-
#parse_relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
-
#parse_technology_area ⇒ Array<String>
Parse technology area.
-
#parse_title ⇒ Array<RelatonBib::TypedTitleString>
Pase title.
- #text ⇒ Object
- #title ⇒ Object
Methods included from DataParserUtils
#affiliation, #contact, #create_contribution_info, #create_person, #page, #parse_chairs, #parse_contributor, #parse_docid, #parse_doctype, #parse_editors, #parse_editors_from_text, #parse_errata, #parse_part, #parse_spec, #publisher_oasis, #retry_page
Constructor Details
#initialize(node) ⇒ DataPartParser
Initialize parser.
11 12 13 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 11 def initialize(node) @node = node end |
Instance Method Details
#link_node ⇒ Object
146 147 148 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 146 def link_node @link_node = @node.at("./a|./following-sibling::p[1]/a") end |
#parse ⇒ RelatonOasis::OasisBibliographicItem
Parse document.
42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 42 def parse # rubocop:disable Metrics/MethodLength RelatonOasis::OasisBibliographicItem.new( type: "standard", doctype: parse_doctype, title: parse_title, docid: parse_docid, link: parse_link, docnumber: parse_docnumber, date: parse_date, abstract: parse_abstract, language: ["en"], script: ["Latn"], editorialgroup: parse_editorialgroup, relation: parse_relation, contributor: parse_contributor, technology_area: parse_technology_area, ) end |
#parse_abstract ⇒ Object
103 104 105 106 107 108 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 103 def parse_abstract page.xpath("//p[preceding-sibling::p[starts-with(., 'Abstract')]][1]").map do |p| cnt = p.text.gsub(/[\r\n]+/, " ").strip RelatonBib::FormattedString.new(content: cnt, language: "en", script: "Latn") end end |
#parse_authorizer ⇒ Object
135 136 137 138 139 140 141 142 143 144 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 135 def return [] unless page page.xpath("//p[preceding-sibling::p[starts-with(., 'Technical')]][1]//a").map do |a| cnt = RelatonBib::Contact.new(type: "uri", value: a[:href]) org = RelatonBib::Organization.new name: a.text.gsub(/[\r\n]+/, " ").strip, contact: [cnt] role = { type: "authorizer", description: ["Committee"] } RelatonBib::ContributionInfo.new entity: org, role: [role] end end |
#parse_date ⇒ Array<RelatonBib::BibliographicDate>
Parse date.
98 99 100 101 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 98 def parse_date /(?<on>\d{1,2}\s\w+\s\d{4})/ =~ text [RelatonBib::BibliographicDate.new(on: Date.parse(on).to_s, type: "issued")] end |
#parse_docnumber ⇒ String
Parse document number.
75 76 77 78 79 80 81 82 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 75 def parse_docnumber ref = @node.at("./span[@class='citationLabel']/strong|./strong|b/span") num = ref.text.match(/[^\[\]]+/).to_s id = parse_errata(num) # some part references need to be added by "Pt" to be distinguishable from root doc id += "-Pt" if %w[CMIS-v1.1 DocBook-5.0 XACML-V3.0 mqtt-v3.1.1 OData-JSON-Format-v4.0].include?(id) parse_part parse_spec id end |
#parse_editorialgroup ⇒ RelatonBib::EditorialGroup
Parse technical committee.
115 116 117 118 119 120 121 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 115 def parse_editorialgroup tc = page.xpath("//p[preceding-sibling::p[starts-with(., 'Technical')]][1]//a").map do |a| wg = RelatonBib::WorkGroup.new name: a.text.strip RelatonBib::TechnicalCommittee.new wg end RelatonBib::EditorialGroup.new tc end |
#parse_link ⇒ Array<RelatonBib::TypedTitleString>
Parse link.
89 90 91 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 89 def parse_link [RelatonBib::TypedUri.new(type: "src", content: link_node[:href])] end |
#parse_relation ⇒ Array<RelatonBib::DocumentRelation>
Parse relation.
128 129 130 131 132 133 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 128 def parse_relation parser = DataParser.new @node.at("./ancestor::details") fref = RelatonBib::FormattedRef.new(content: parser.parse_docid[0].id) bib = RelatonOasis::OasisBibliographicItem.new(formattedref: fref) [RelatonBib::DocumentRelation.new(type: "partOf", bibitem: bib)] end |
#parse_technology_area ⇒ Array<String>
Parse technology area.
155 156 157 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 155 def parse_technology_area super @node.at("./ancestor::details") end |
#parse_title ⇒ Array<RelatonBib::TypedTitleString>
Pase title.
66 67 68 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 66 def parse_title [RelatonBib::TypedTitleString.new(type: "main", content: title, language: "en", script: "Latn")] end |
#text ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 15 def text return @text if @text if @node.at("./strong/following-sibling::text()|./span[strong]/following-sibling::text()") @text = @node.xpath( "./strong/following-sibling::node()|./span[strong]/following-sibling::node()", ).text.strip else @text = @node.xpath("./following-sibling::p[1][em]").text.strip end end |
#title ⇒ Object
27 28 29 30 31 32 33 34 35 |
# File 'lib/relaton_oasis/data_part_parser.rb', line 27 def title return @title if @title t = @node.at("./span[@class='citationTitle' or @class='citeTitle']|./em|./i") @title = if t then t.text else text.match(/(?<content>.+)\s(?:Edited|\d{2}\s\w+\d{4})/)[:content] end.strip end |