Module: RelatonItu::Scrapper
- Defined in:
- lib/relaton_itu/scrapper.rb
Overview
Scrapper.
Constant Summary collapse
- ROMAN_MONTHS =
%w[I II III IV V VI VII VIII IX X XI XII].freeze
- TYPES =
{ "ISO" => "international-standard", "TS" => "technicalSpecification", "TR" => "technicalReport", "PAS" => "publiclyAvailableSpecification", "AWI" => "appruvedWorkItem", "CD" => "committeeDraft", "FDIS" => "finalDraftInternationalStandard", "NP" => "newProposal", "DIS" => "draftInternationalStandard", "WD" => "workingDraft", "R" => "recommendation", "Guide" => "guide", }.freeze
Class Method Summary collapse
-
.parse_page(hit, imp: false) ⇒ Hash
Parse page.
Class Method Details
.parse_page(hit, imp: false) ⇒ Hash
Parse page.
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
# File 'lib/relaton_itu/scrapper.rb', line 30 def parse_page(hit, imp: false) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength doc = get_page hit return unless doc.code == "200" if imp a = doc.at "//span[contains(@id, 'tab_ig_uc_rec')]/a" return unless a doc = get_page hit, a[:href].to_s end # Fetch edition. edition = doc.at("//table/tr/td[contains(@style,'color: white')]/span[contains(@id, 'Label8')]/b")&.text docid = fetch_docid(doc, hit) ItuBibliographicItem.new( id: fetch_id(docid), fetched: Date.today.to_s, type: "standard", docid: docid, edition: edition, language: ["en"], script: ["Latn"], title: fetch_titles(doc), doctype: DocumentType.new(type: hit.hit[:type]), docstatus: fetch_status(doc), ics: [], # fetch_ics(doc), date: fetch_dates(doc), contributor: fetch_contributors(hit.hit[:code]), editorialgroup: fetch_workgroup(hit.hit[:code], doc), abstract: fetch_abstract(doc, hit), copyright: fetch_copyright(hit.hit[:code], doc), link: fetch_link(doc), relation: fetch_relations(doc), place: ["Geneva"], ) end |