Module: RelatonOasis::DataParserUtils
- Included in:
- DataParser, DataPartParser
- Defined in:
- lib/relaton_oasis/data_parser_utils.rb
Overview
Common methods for document and part parsers.
Instance Method Summary collapse
- #affiliation(org) ⇒ Object
- #contact(email) ⇒ Object
- #create_contribution_info(person, type, description = []) ⇒ Object
- #create_person(name, email = nil, org = nil) ⇒ Object
- #page ⇒ Object
- #parse_chairs ⇒ Object
-
#parse_contributor ⇒ Array<RelatonBib::ContributionInfo>
Parse contributor.
-
#parse_docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifier.
-
#parse_doctype ⇒ RelatonOasis::DocumentType
Parse document type.
- #parse_editors ⇒ Object
- #parse_editors_from_text ⇒ Object
-
#parse_errata(id) ⇒ String
Parse document identifier errata.
-
#parse_part(docid) ⇒ <Type>
Parse document identifier part.
-
#parse_spec(num) ⇒ String
Parse document identifier specification.
-
#parse_technology_area(node) ⇒ Array<String>
Parse technology area.
- #publisher_oasis ⇒ Object
-
#retry_page(url, agent, retries = 3) ⇒ Mechanize::Page?
Retry to get page.
Instance Method Details
#affiliation(org) ⇒ Object
99 100 101 102 103 104 105 106 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 99 def affiliation(org) return [] unless org cnt = RelatonBib::Contact.new(type: "uri", value: org[:href]) org_name = org.text.gsub(/[\r\n]+/, " ") organization = RelatonBib::Organization.new name: org_name, contact: [cnt] [RelatonBib::Affiliation.new(organization: organization)] end |
#contact(email) ⇒ Object
93 94 95 96 97 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 93 def contact(email) return [] unless email [RelatonBib::Contact.new(type: "email", value: email[:href].split(":")[1])] end |
#create_contribution_info(person, type, description = []) ⇒ Object
77 78 79 80 81 82 83 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 77 def create_contribution_info(person, type, description = []) name = person.text.match(/^[^(]+/).to_s.strip email, org = person.xpath ".//a[@href]" entity = create_person name, email, org role = { type: type, description: description } RelatonBib::ContributionInfo.new(role: [role], entity: entity) end |
#create_person(name, email = nil, org = nil) ⇒ Object
85 86 87 88 89 90 91 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 85 def create_person(name, email = nil, org = nil) forename, surname = name.split fn = RelatonBib::Forename.new(content: forename, language: ["en"], script: ["Latn"]) sn = RelatonBib::LocalizedString.new(surname, "en", "Latn") name = RelatonBib::FullName.new(surname: sn, forename: [fn]) RelatonBib::Person.new(name: name, contact: contact(email), affiliation: affiliation(org)) end |
#page ⇒ Object
31 32 33 34 35 36 37 38 39 40 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 31 def page return @page if defined? @page if link_node && link_node[:href].match?(/\.html$/) agent = Mechanize.new agent.agent.allowed_error_codes = [404] resp = retry_page(link_node[:href], agent) @page = resp if resp && resp.code == "200" end end |
#parse_chairs ⇒ Object
60 61 62 63 64 65 66 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 60 def parse_chairs return [] unless page page.xpath( "//p[preceding-sibling::p[starts-with(., 'Chair')]][following-sibling::p[starts-with(., 'Editor')]]", ).map { |p| create_contribution_info(p, "editor", ["Chair"]) } end |
#parse_contributor ⇒ Array<RelatonBib::ContributionInfo>
Parse contributor.
9 10 11 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 9 def parse_contributor publisher_oasis + + parse_chairs + parse_editors end |
#parse_docid ⇒ Array<RelatonBib::DocumentIdentifier>
Parse document identifier.
161 162 163 164 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 161 def parse_docid id = "OASIS #{parse_docnumber}" [RelatonBib::DocumentIdentifier.new(type: "OASIS", id: id, primary: true)] end |
#parse_doctype ⇒ RelatonOasis::DocumentType
Parse document type.
171 172 173 174 175 176 177 178 179 180 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 171 def parse_doctype type = case text when /OASIS Project Specification/, /Committee Specification/ "specification" when /Technical Memorandum/ then "memorandum" when /Technical Resolution/ then "resolution" else "standard" end DocumentType.new(type: type) end |
#parse_editors ⇒ Object
68 69 70 71 72 73 74 75 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 68 def parse_editors return parse_editors_from_text unless page page.xpath( "//p[contains(@class, 'Contributor')][preceding-sibling::p[starts-with(., 'Editor')]]" \ "[following-sibling::p[contains(@class, 'Title')]]", ).map { |p| create_contribution_info(p, "editor") } end |
#parse_editors_from_text ⇒ Object
23 24 25 26 27 28 29 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 23 def parse_editors_from_text return [] unless text text.match(/(?<=Edited\sby\s)[^.]+/).to_s.split(/,?\sand\s|,\s/).map do |c| RelatonBib::ContributionInfo.new(role: [type: "editor"], entity: create_person(c)) end end |
#parse_errata(id) ⇒ String
Parse document identifier errata.
146 147 148 149 150 151 152 153 154 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 146 def parse_errata(id) return id.sub("errata", "Errata") if id.match?(/errata\d+/i) case title when /Plus\sErrata\s(\d+)/ then "#{id}-plus-Errata#{$1}" when /Errata\s(\d+)/ then "#{id}-Errata#{$1}" else id end end |
#parse_part(docid) ⇒ <Type>
Parse document identifier part.
130 131 132 133 134 135 136 137 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 130 def parse_part(docid) return docid if docid.match?(/(?:Part|Pt)\d+/i) case title when /Part\s(\d+)/ then "#{docid}-Pt#{$1}" else docid end end |
#parse_spec(num) ⇒ String
Parse document identifier specification.
115 116 117 118 119 120 121 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 115 def parse_spec(num) case text when /OASIS Project Specification (\d+)/ then "#{num}-PS#{$1}" when /Committee Specification (\d+)/ then "#{num}-CS#{$1}" else num end end |
#parse_technology_area(node) ⇒ Array<String>
Parse technology area.
187 188 189 190 191 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 187 def parse_technology_area(node) node.xpath("./summary/div/div/ul[@class='technology-areas__list']/li/a").map do |ta| ta.text.strip.gsub(/\s/, "-").sub("development", "Development") end end |
#publisher_oasis ⇒ Object
13 14 15 16 17 18 19 20 21 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 13 def publisher_oasis cnt = RelatonBib::Contact.new type: "uri", value: "https://www.oasis-open.org/" entity = RelatonBib::Organization.new name: "OASIS", contact: [cnt] role = [ { type: "authorizer", description: ["Standards Development Organization"] }, { type: "publisher" }, ] [RelatonBib::ContributionInfo.new(entity: entity, role: role)] end |
#retry_page(url, agent, retries = 3) ⇒ Mechanize::Page?
Retry to get page.
51 52 53 54 55 56 57 58 |
# File 'lib/relaton_oasis/data_parser_utils.rb', line 51 def retry_page(url, agent, retries = 3) sleep 1 # to avoid 429 error agent.get url rescue Errno::ETIMEDOUT, Net::OpenTimeout => e retry if (retries -= 1).positive? Util.error "Failed to get page `#{url}`\n#{e.}" nil end |