Class: RelatonEtsi::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_etsi/data_fetcher.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(output, format) ⇒ DataFetcher

Initialize data fetcher.

Parameters:

  • output (String)

    output directory

  • format (String)

    output format (xml, bibxml, yaml)



9
10
11
12
13
# File 'lib/relaton_etsi/data_fetcher.rb', line 9

def initialize(output, format)
  @output = output
  @format = format
  @ext = format.sub(/^bib/, "")
end

Class Method Details

.fetch(output: "data", format: "yaml") ⇒ Object



15
16
17
18
19
20
21
22
23
# File 'lib/relaton_etsi/data_fetcher.rb', line 15

def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#content(bib) ⇒ Object



48
49
50
51
52
53
54
# File 'lib/relaton_etsi/data_fetcher.rb', line 48

def content(bib)
  case @format
  when "xml" then bib.to_xml bibdata: true
  when "yaml" then bib.to_hash.to_yaml
  else bib.send "to_#{@format}"
  end
end

#fetchObject



29
30
31
32
33
34
35
36
37
38
39
# File 'lib/relaton_etsi/data_fetcher.rb', line 29

def fetch
  url = "https://www.etsi.org/?option=com_standardssearch&view=data&format=csv&includeScope=1&page=1&search=&" \
        "title=1&etsiNumber=1&content=1&version=0&onApproval=1&published=1&withdrawn=1&historical=1&isCurrent=1&" \
        "superseded=1&startDate=1988-01-15&endDate=2023-10-31&harmonized=0&keyword=&TB=&stdType=&frequency=&" \
        "mandate=&collection=&sort=1&x=1698720135146"
  csv = OpenURI.open_uri(url) { |f| f.readlines.join }
  CSV.parse(csv, headers: true, col_sep: ';', skip_lines: /sep=;/).each do |row|
    save DataParser.new(row).parse
  end
  index1.save
end

#index1Object



25
26
27
# File 'lib/relaton_etsi/data_fetcher.rb', line 25

def index1
  @index1 ||= Relaton::Index.find_or_create :etsi, file: Bibliography::INDEX_FILE
end

#save(bib) ⇒ Object



41
42
43
44
45
46
# File 'lib/relaton_etsi/data_fetcher.rb', line 41

def save(bib)
  filename = bib.docidentifier.first.id.gsub(/\//, "-").gsub(/\s|\./, "_").gsub(/\(|\)/, "")
  file = File.join @output, "#{filename}.#{@ext}"
  File.write file, content(bib), encoding: "UTF-8"
  index1.add_or_update bib.docidentifier.first.id, file
end