Class: RelatonCcsds::DataFetcher

Inherits:
Object
  • Object
show all
Defined in:
lib/relaton_ccsds/data_fetcher.rb

Constant Summary collapse

ACTIVE_PUBS_URL =
<<~URL.freeze
  https://public.ccsds.org/_api/web/lists/getbytitle('CCSDS%20Publications')/items?$top=1000&$select=Dcoument_x0020_Title,
  Document_x0020_Number,Book_x0020_Type,Issue_x0020_Number,calPublishedMonth,calPublishedYear,Description0,Working_x0020_Group,
  FileRef,ISO_x0020_Number,Patents,Extra_x0020_Link,Area,calActive,calHtmlColorCode&$filter=Book_x0020_Type%20eq%20%27Blue%20
  Book%27%20or%20Book_x0020_Type%20eq%20%27Magenta%20Book%27%20or%20Book_x0020_Type%20eq%20%27Green%20Book%27%20or%20
  Book_x0020_Type%20eq%20%27Orange%20Book%27%20or%20Book_x0020_Type%20eq%20%27Yellow%20Book%20-%20Reports%20and%20Records%27%20
  or%20Book_x0020_Type%20eq%20%27Yellow%20Book%20-%20CCSDS%20Normative%20Procedures%27
URL
OBSOLETE_PUBS_URL =
<<~URL.freeze
  https://public.ccsds.org/_api/web/lists/getbytitle('CCSDS%20Publications')/items?$top=1000&$select=Dcoument_x0020_Title,
  Document_x0020_Number,Book_x0020_Type,Issue_x0020_Number,calPublishedMonth,calPublishedYear,Description0,Working_x0020_Group,
  FileRef,ISO_x0020_Number,Patents,Extra_x0020_Link,Area,calHtmlColorCode&$filter=Book_x0020_Type%20eq%20%27Silver%20Book%27
URL
TRRGX =
/\s-\s\w+\sTranslated$/.freeze

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(output, format) ⇒ DataFetcher

Initialize fetcher

Parameters:

  • output (String)

    path to output directory

  • format (String)

    output format (yaml, xml, bibxml)



26
27
28
29
30
31
# File 'lib/relaton_ccsds/data_fetcher.rb', line 26

def initialize(output, format)
  @output = output
  @format = format
  @ext = format.sub "bibxml", "xml"
  @files = []
end

Class Method Details

.fetch(output: "data", format: "yaml") ⇒ void

This method returns an undefined value.

Create fetcher instance and fetch data

Parameters:

  • output (String) (defaults to: "data")

    path to output directory (default: “data”)

  • format (String) (defaults to: "yaml")

    output format (yaml, xml, bibxml) (default: “yaml”)



53
54
55
56
57
58
59
60
61
# File 'lib/relaton_ccsds/data_fetcher.rb', line 53

def self.fetch(output: "data", format: "yaml")
  t1 = Time.now
  puts "Started at: #{t1}"
  FileUtils.mkdir_p output
  new(output, format).fetch
  t2 = Time.now
  puts "Stopped at: #{t2}"
  puts "Done in: #{(t2 - t1).round} sec."
end

Instance Method Details

#agentObject



33
34
35
36
37
38
39
# File 'lib/relaton_ccsds/data_fetcher.rb', line 33

def agent
  return @agent if @agent

  @agent = Mechanize.new
  @agent.request_headers = { "Accept" => "application/json;odata=verbose" }
  @agent
end

#content(bib) ⇒ String

Srerialize bibliographic item

Parameters:

Returns:

  • (String)

    serialized bibliographic item



252
253
254
255
256
257
258
# File 'lib/relaton_ccsds/data_fetcher.rb', line 252

def content(bib)
  case @format
  when "yaml" then bib.to_hash.to_yaml
  when "xml" then bib.to_xml(bibdata: true)
  else bib.send "to_#{@format}"
  end
end

#create_instance_relation(bib, file) ⇒ void

This method returns an undefined value.

Create instance relation and save to file

Parameters:



204
205
206
207
208
209
210
# File 'lib/relaton_ccsds/data_fetcher.rb', line 204

def create_instance_relation(bib, file)
  hash = YAML.load_file file
  inst = BibliographicItem.from_hash hash
  bib.relation << create_relation(inst, "hasInstance")
  inst.relation << create_relation(bib, "instanceOf")
  File.write file, content(inst), encoding: "UTF-8"
end

#create_relation(bib, type) ⇒ RelatonBib::DocumentRelation

Create relation

Parameters:

Returns:

  • (RelatonBib::DocumentRelation)

    relation



220
221
222
223
224
# File 'lib/relaton_ccsds/data_fetcher.rb', line 220

def create_relation(bib, type)
  fref = RelatonBib::FormattedRef.new content: bib.docidentifier.first.id
  rel = BibliographicItem.new docid: bib.docidentifier, formattedref: fref
  RelatonBib::DocumentRelation.new(type: type, bibitem: rel)
end

#create_relations(bib, file) ⇒ void

This method returns an undefined value.

Create translation or instance relation and save to file

Parameters:



172
173
174
175
176
177
178
179
# File 'lib/relaton_ccsds/data_fetcher.rb', line 172

def create_relations(bib, file)
  hash = YAML.load_file file
  inst = BibliographicItem.from_hash hash
  type1, type2 = translation_relation_types(inst)
  bib.relation << create_relation(inst, type1)
  inst.relation << create_relation(bib, type2)
  File.write file, content(inst), encoding: "UTF-8"
end

#fetchObject



63
64
65
66
67
# File 'lib/relaton_ccsds/data_fetcher.rb', line 63

def fetch
  fetch_docs ACTIVE_PUBS_URL
  fetch_docs OBSOLETE_PUBS_URL, retired: true
  index.save
end

#fetch_docs(url, retired: false) ⇒ void

This method returns an undefined value.

Fetch documents from url

Parameters:

  • url (String)
  • retired (Boolean) (defaults to: false)

    if true, then fetch retired documents



77
78
79
80
81
82
83
# File 'lib/relaton_ccsds/data_fetcher.rb', line 77

def fetch_docs(url, retired: false)
  resp = agent.get(url)
  json = JSON.parse resp.body
  @array = json["d"]["results"].map do |doc|
    parse_and_save doc, json["d"]["results"], retired
  end
end

#indexObject



41
42
43
# File 'lib/relaton_ccsds/data_fetcher.rb', line 41

def index
  @index ||= Relaton::Index.find_or_create "CCSDS", file: "index-v1.yaml"
end

This method returns an undefined value.

Merge identical documents with different links (updaes given bibitem)

Parameters:



234
235
236
237
238
239
240
241
242
243
# File 'lib/relaton_ccsds/data_fetcher.rb', line 234

def merge_links(bib, file) # rubocop:disable Metrics/AbcSize
  hash = YAML.load_file file
  bib2 = BibliographicItem.from_hash hash
  if bib.link[0].type == bib2.link[0].type
    Util.info "links are the same.", key: file
    return
  end
  Util.info "links are merged.", key: file
  bib.link << bib2.link[0]
end

#parse_and_save(doc, results, retired) ⇒ void

This method returns an undefined value.

Parse document and save to file

Parameters:

  • doc (Hash)

    document data

  • results (Array<Hash>)

    collection of documents

  • retired (Boolean)

    if true then document is retired



94
95
96
97
98
99
100
101
# File 'lib/relaton_ccsds/data_fetcher.rb', line 94

def parse_and_save(doc, results, retired)
  bibitem = DataParser.new(doc, results).parse
  if retired
    predecessor = DataParser.new(doc, results, bibitem).parse
    save_bib predecessor
  end
  save_bib bibitem
end

#save_bib(bib) ⇒ void

This method returns an undefined value.

Save bibitem to file

Parameters:



110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/relaton_ccsds/data_fetcher.rb', line 110

def save_bib(bib)
  search_instance_translation bib
  id = bib.docidentifier.first.id
  file = File.join @output, "#{id.gsub(/[.\s-]+/, '-')}.#{@ext}"
  if @files.include?(file)
    Util.info "(#{file}) file already exists. Trying to merge links ..."
    merge_links bib, file
  else @files << file
  end
  File.write file, content(bib), encoding: "UTF-8"
  index.add_or_update id, file
end

#search_instance_translation(bib) ⇒ void

This method returns an undefined value.

Search translation and instance relation

Parameters:



130
131
132
133
134
135
136
137
# File 'lib/relaton_ccsds/data_fetcher.rb', line 130

def search_instance_translation(bib)
  bibid = bib.docidentifier.first.id.dup
  if bibid.sub!(TRRGX, "")
    search_relations bibid, bib
  else
    search_translations bibid, bib
  end
end

#search_relations(bibid, bib) ⇒ void

This method returns an undefined value.

Search instance or translation relation

Parameters:



147
148
149
150
151
152
153
154
# File 'lib/relaton_ccsds/data_fetcher.rb', line 147

def search_relations(bibid, bib)
  index.search do |row|
    id = row[:id].sub(TRRGX, "")
    next if id != bibid || row[:id] == bib.docidentifier.first.id

    create_relations bib, row[:file]
  end
end

#search_translations(bibid, bib) ⇒ Object



156
157
158
159
160
161
162
# File 'lib/relaton_ccsds/data_fetcher.rb', line 156

def search_translations(bibid, bib)
  index.search do |row|
    next unless row[:id].match?(/^#{bibid}#{TRRGX}/)

    create_instance_relation bib, row[:file]
  end
end

#translation_relation_types(bib) ⇒ Array<String>

Translation or instance relation types

Parameters:

Returns:

  • (Array<String>)

    relation types



188
189
190
191
192
193
194
# File 'lib/relaton_ccsds/data_fetcher.rb', line 188

def translation_relation_types(bib)
  if bib.docidentifier.first.id.match?(TRRGX)
    ["hasTranslation"] * 2
  else
    ["instanceOf", "hasInstance"]
  end
end