Class: RelatonIana::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_iana/data_fetcher.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(output, format) ⇒ DataFetcher

Data fetcher initializer

Parameters:

  • output (String)

    directory to save files

  • format (String)

    format of output files (xml, yaml, bibxml)



9
10
11
12
13
14
15
# File 'lib/relaton_iana/data_fetcher.rb', line 9

def initialize(output, format)
  @output = output
  @format = format
  @ext = format.sub(/^bib/, "")
  @files = []
  @index = Relaton::Index.find_or_create :IANA, file: "index-v1.yaml"
end

Class Method Details

.fetch(output: "data", format: "yaml") ⇒ Object

Initialize fetcher and run fetch

Parameters:

  • output (Strin) (defaults to: "data")

    directory to save files, default: “data”

  • format (Strin) (defaults to: "yaml")

    format of output files (xml, yaml, bibxml), default: yaml



23
24
25
26
27
28
29
30
31
# File 'lib/relaton_iana/data_fetcher.rb', line 23

def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#fetchObject

Parse documents



36
37
38
39
40
41
42
43
44
# File 'lib/relaton_iana/data_fetcher.rb', line 36

def fetch
  Dir["iana-registries/**/*.xml"].each do |file|
    content = File.read file, encoding: "UTF-8"
    parse(content) if content.include? "<registry"
  rescue StandardError => e
    Util.error "Error: #{e.message}. File: #{file}"
  end
  @index.save
end

#file_name(bib) ⇒ String

Generate file name

Parameters:

Returns:

  • (String)

    file name



84
85
86
87
# File 'lib/relaton_iana/data_fetcher.rb', line 84

def file_name(bib)
  name = bib.docnumber.downcase.gsub(/[\s,:\/]/, "_").squeeze("_")
  File.join @output, "#{name}.#{@ext}"
end

#parse(content) ⇒ Object



46
47
48
49
50
51
52
# File 'lib/relaton_iana/data_fetcher.rb', line 46

def parse(content)
  xml = Nokogiri::XML(content)
  registry = xml.at("/xmlns:registry")
  doc = Parser.parse registry
  save_doc doc
  registry.xpath("./xmlns:registry").each { |r| save_doc Parser.parse(r, doc) }
end

#save_doc(bib) ⇒ Object

Save document to file

Parameters:



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
# File 'lib/relaton_iana/data_fetcher.rb', line 59

def save_doc(bib) # rubocop:disable Metrics/MethodLength
  return unless bib

  c = case @format
      when "xml" then bib.to_xml(bibdata: true)
      when "yaml" then bib.to_hash.to_yaml
      else bib.send("to_#{@format}")
      end
  file = file_name(bib)
  if @files.include? file
    Util.warn "File #{file} already exists. Document: #{bib.docnumber}"
  else
    @files << file
  end
  @index.add_or_update bib.docnumber, file
  File.write file, c, encoding: "UTF-8"
end