Module: RelatonItu::DataParserR
Instance Method Summary collapse
- #fetch_abstract(doc) ⇒ Array<RelatonBib::FormattedString>
- #fetch_date(doc) ⇒ Araay<RelatonBib::BibliographicDate>
- #fetch_docid(doc) ⇒ Araay<RelatonBib::DocumentIdentifier>
- #fetch_doctype(type) ⇒ Object
- #fetch_link(url) ⇒ Array<RelatonBib::TypedUri>
- #fetch_status(doc) ⇒ RelatonBib::DocumentStatus?
- #fetch_title(doc) ⇒ Araay<RelatonBib::TypedTitleString>
-
#parse(doc, url, type) ⇒ RelatonItu::ItuBibliographicItem
Parse ITU-R document.
- #parse_date(date, type) ⇒ RelatonBib::BibliographicDate
Instance Method Details
#fetch_abstract(doc) ⇒ Array<RelatonBib::FormattedString>
43 44 45 46 47 48 |
# File 'lib/relaton_itu/data_parser_r.rb', line 43 def fetch_abstract(doc) doc.xpath('//h3[.="Observation"]/parent::td/following-sibling::td[2]').map do |a| c = a.text.strip RelatonBib::FormattedString.new content: c, language: "en", script: "Latn" unless c.empty? end.compact end |
#fetch_date(doc) ⇒ Araay<RelatonBib::BibliographicDate>
52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/relaton_itu/data_parser_r.rb', line 52 def fetch_date(doc) dates = [] date = doc.at('//h3[.="Approval_Date"]/parent::td/following-sibling::td[2]', '//h3[.="Approval date"]/parent::td/following-sibling::td[2]', '//h3[.="Approval year"]/parent::td/following-sibling::td[2]') dates << parse_date(date.text, "confirmed") if date date = doc.at('//h3[.="Version year"]/parent::td/following-sibling::td[2]') dates << parse_date(date.text, "updated") if date date = doc.at('//div[@id="idDocSetPropertiesWebPart"]/h2').text.match(/(?<=-)(19|20)\d{2}/) dates << parse_date(date.to_s, "published") if date dates end |
#fetch_docid(doc) ⇒ Araay<RelatonBib::DocumentIdentifier>
25 26 27 28 29 30 31 32 |
# File 'lib/relaton_itu/data_parser_r.rb', line 25 def fetch_docid(doc) # id = doc.at('//h3[.="Number"]/parent::td/following-sibling::td[2]').text # .match(/^[^\s\(]+/).to_s # %r{^(?<id1>[^\s\(\/]+(\/\d+)?)(\/(?<id2>\w+[^\s\(]+))?} =~ id id = doc.at('//div[@id="idDocSetPropertiesWebPart"]/h2').text.match(/^R-\w+-([^-]+(?:-\d{1,3})?)/)[1] [RelatonBib::DocumentIdentifier.new(type: "ITU", id: "ITU-R #{id}", primary: true)] # docid << RelatonBib::DocumentIdentifier.new(type: 'ITU', id: id2) if id2 # docid end |
#fetch_doctype(type) ⇒ Object
94 95 96 |
# File 'lib/relaton_itu/data_parser_r.rb', line 94 def fetch_doctype(type) DocumentType.new(type: type) end |
#fetch_link(url) ⇒ Array<RelatonBib::TypedUri>
81 82 83 |
# File 'lib/relaton_itu/data_parser_r.rb', line 81 def fetch_link(url) [RelatonBib::TypedUri.new(type: "src", content: url)] end |
#fetch_status(doc) ⇒ RelatonBib::DocumentStatus?
87 88 89 90 91 92 |
# File 'lib/relaton_itu/data_parser_r.rb', line 87 def fetch_status(doc) s = doc.at('//h3[.="Status"]/parent::td/following-sibling::td[2]') return unless s RelatonBib::DocumentStatus.new stage: s.text end |
#fetch_title(doc) ⇒ Araay<RelatonBib::TypedTitleString>
36 37 38 39 |
# File 'lib/relaton_itu/data_parser_r.rb', line 36 def fetch_title(doc) content = doc.at('//h3[.="Title"]/parent::td/following-sibling::td[2]').text [RelatonBib::TypedTitleString.new(type: "main", content: content, language: "en", script: "Latn")] end |
#parse(doc, url, type) ⇒ RelatonItu::ItuBibliographicItem
Parse ITU-R document.
14 15 16 17 18 19 20 21 |
# File 'lib/relaton_itu/data_parser_r.rb', line 14 def parse(doc, url, type) RelatonItu::ItuBibliographicItem.new( docid: fetch_docid(doc), title: fetch_title(doc), abstract: fetch_abstract(doc), date: fetch_date(doc), language: ["en"], link: fetch_link(url), script: ["Latn"], docstatus: fetch_status(doc), type: "standard", doctype: fetch_doctype(type) ) end |
#parse_date(date, type) ⇒ RelatonBib::BibliographicDate
69 70 71 72 73 74 75 76 77 |
# File 'lib/relaton_itu/data_parser_r.rb', line 69 def parse_date(date, type) d = case date # when /^\d{4}$/ then date when /(\d{4})(\d{2})/ then "#{$1}-#{$2}" when %r{(\d{1,2})/(\d{1,2})/(\d{4})} then "#{$3}-#{$1}-#{$2}" else date end RelatonBib::BibliographicDate.new(type: type, on: d) end |