Class: Relaton::Core::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton/core/data_fetcher.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(output, format) ⇒ DataFetcher

Initialize fetcher

Parameters:

  • output (String)

    path to output directory

  • format (String)

    output format (yaml, xml, bibxml)



10
11
12
13
14
15
16
# File 'lib/relaton/core/data_fetcher.rb', line 10

def initialize(output, format)
  @output = output
  @format = format
  @ext = format.sub "bibxml", "xml"
  @files = []
  @docs = []
end

Instance Attribute Details

#docsObject

Returns the value of attribute docs.



3
4
5
# File 'lib/relaton/core/data_fetcher.rb', line 3

def docs
  @docs
end

Class Method Details

.fetch(output: "data", format: "yaml") ⇒ Object

API method for external service



25
26
27
28
29
30
31
32
33
# File 'lib/relaton/core/data_fetcher.rb', line 25

def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

.get_identifier_classObject

Raises:

  • (NotImplementedError)


35
36
37
# File 'lib/relaton/core/data_fetcher.rb', line 35

def self.get_identifier_class
  raise NotImplementedError, "#{self.class}#get_identifier_class method must be implemented"
end

Instance Method Details

#fetchObject



39
40
41
42
43
# File 'lib/relaton/core/data_fetcher.rb', line 39

def fetch
  fetch_docs ACTIVE_PUBS_URL
  fetch_docs OBSOLETE_PUBS_URL, retired: true
  index.save
end

#get_output_file(bib) ⇒ String

Returns filename based on PubID identifier.

Parameters:

  • bib (RelatonBib::BibliographicItem)

Returns:

  • (String)

    filename based on PubID identifier



54
55
56
# File 'lib/relaton/core/data_fetcher.rb', line 54

def get_output_file(bib)
  File.join @output, "#{bib.docidentifier.first.id.gsub(/[.\s-]+/, '-')}.#{@ext}"
end

#indexObject



18
19
20
21
22
# File 'lib/relaton/core/data_fetcher.rb', line 18

def index
  @index ||= Relaton::Index.find_or_create self.class::INDEX_TYPE,
                                           file: self.class::INDEX_FILE,
                                           pubid_class: self.class.get_identifier_class
end

#index_add_or_update(bib) ⇒ Object



84
85
86
87
# File 'lib/relaton/core/data_fetcher.rb', line 84

def index_add_or_update(bib)
  index.add_or_update self.class.get_identifier_class.parse(bib.docidentifier.first.id),
                      get_output_file(bib)
end

#parse(doc) ⇒ RelatonBib

Parse hash and return RelatonBib

Parameters:

  • doc (Hash)

    document data

Returns:

  • (RelatonBib)

Raises:

  • (NotImplementedError)


48
49
50
# File 'lib/relaton/core/data_fetcher.rb', line 48

def parse(doc)
  raise NotImplementedError, "#{self.class}#parse method must be implemented"
end

#parse_and_save(doc) ⇒ void

This method returns an undefined value.

Parse document and save to file

Parameters:

  • doc (Hash)

    document data

  • retired (Boolean)

    if true then document is retired



66
67
68
69
70
# File 'lib/relaton/core/data_fetcher.rb', line 66

def parse_and_save(doc)
  bibitem = parse(doc)
  save_bib(bibitem)
  index_add_or_update(bibitem)
end

#save_bib(bib) ⇒ void

This method returns an undefined value.

Save bibitem to file

Parameters:

  • bib (RelatonBib::BibliographicItem)

    bibitem



79
80
81
82
# File 'lib/relaton/core/data_fetcher.rb', line 79

def save_bib(bib)
  file = get_output_file(bib)
  File.write file, serialize(bib), encoding: "UTF-8"
end

#serialize(bib) ⇒ String

Serialize bibliographic item

Parameters:

  • bib (RelatonCcsds::BibliographicItem)

    <description>

Returns:

  • (String)

    serialized bibliographic item



96
97
98
99
100
101
102
# File 'lib/relaton/core/data_fetcher.rb', line 96

def serialize(bib)
  case @format
  when "yaml" then bib.to_hash.to_yaml
  when "xml" then bib.to_xml(bibdata: true)
  else bib.send "to_#{@format}"
  end
end