Class: Relaton3gpp::DataFetcher
- Inherits:
-
Object
- Object
- Relaton3gpp::DataFetcher
- Defined in:
- lib/relaton_3gpp/data_fetcher.rb
Constant Summary collapse
- CURRENT =
"current.yaml".freeze
Class Method Summary collapse
-
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch.
Instance Method Summary collapse
- #add_affiliation(contrib, affiliation) ⇒ Object
-
#add_contributor(bib1, bib2) ⇒ Object
rubocop:disable Metrics/MethodLength,Metrics/AbcSize.
-
#add_transposed_relation(bib1, bib2) ⇒ Relaton3gpp::BibliographicItem
Add transposed relation.
-
#check_transposed_date(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
Check if date of one bibliographic item is transposed to another.
-
#fetch(renewal) ⇒ Object
Parse documents.
-
#file_name(bib) ⇒ String
Generate file name.
-
#get_file(renewal) ⇒ String?
Get file from FTP.
- #index ⇒ Object
-
#initialize(output, format) ⇒ DataFetcher
constructor
Data fetcher initializer.
-
#merge_duplication(bib, file) ⇒ Relaton3gpp::BibliographicItem?
Merge duplication.
-
#save_doc(bib) ⇒ Object
Save document to file.
- #serialise(bib) ⇒ Object
-
#transposed_relation(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
If one of bibliographic items has date gereater than anotherm=, make it relation.
-
#update_link(bib1, bib2) ⇒ Boolean
Update link in case one of bibliographic items has no link.
Constructor Details
#initialize(output, format) ⇒ DataFetcher
Data fetcher initializer
10 11 12 13 14 15 16 17 18 19 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 10 def initialize(output, format) require "fileutils" require "net/ftp" require "csv" @output = output @format = format @ext = format.sub(/^bib/, "") @files = [] end |
Class Method Details
.fetch(source, output: "data", format: "yaml") ⇒ Object
Initialize fetcher and run fetch
32 33 34 35 36 37 38 39 40 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 32 def self.fetch(source, output: "data", format: "yaml") t1 = Time.now puts "Started at: #{t1}" FileUtils.mkdir_p output new(output, format).fetch(source == "status-smg-3GPP-force") t2 = Time.now puts "Stopped at: #{t2}" puts "Done in: #{(t2 - t1).round} sec." end |
Instance Method Details
#add_affiliation(contrib, affiliation) ⇒ Object
250 251 252 253 254 255 256 257 258 259 260 261 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 250 def add_affiliation(contrib, affiliation) changed = false affiliation.each do |a| unless contrib.entity.affiliation.include? a contrib.entity.affiliation << a changed = true end end changed end |
#add_contributor(bib1, bib2) ⇒ Object
rubocop:disable Metrics/MethodLength,Metrics/AbcSize
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 231 def add_contributor(bib1, bib2) # rubocop:disable Metrics/MethodLength,Metrics/AbcSize changed = false bib2.contributor.each do |bc| next if bc.entity.is_a? RelatonBib::Organization existed = bib1.contributor.find { |ic| ic.entity.name == bc.entity.name } if existed chng = add_affiliation existed, bc.entity.affiliation changed ||= chng else bib1.contributor << bc changed = true end end changed end |
#add_transposed_relation(bib1, bib2) ⇒ Relaton3gpp::BibliographicItem
Add transposed relation
223 224 225 226 227 228 229 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 223 def add_transposed_relation(bib1, bib2) bib2.relation.each { |r| bib1.relation << r } bib2.instance_variable_set :@relation, RelatonBib::DocRelationCollection.new([]) dec = RelatonBib::FormattedString.new content: "equivalent" rel = RelatonBib::DocumentRelation.new(type: "adoptedAs", bibitem: bib2, description: dec) bib1.relation << rel end |
#check_transposed_date(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
Check if date of one bibliographic item is transposed to another
204 205 206 207 208 209 210 211 212 213 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 204 def check_transposed_date(bib, existed) if bib.date[0].on < existed.date[0].on add_transposed_relation bib, existed [bib, existed, true] elsif bib.date[0].on > existed.date[0].on add_transposed_relation existed, bib [existed, bib, true] else [bib, existed, false] end end |
#fetch(renewal) ⇒ Object
Parse documents
47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 47 def fetch(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize file = get_file renewal return unless file && File.exist?(file) && File.size(file) > 20_000_000 if renewal FileUtils.rm_f File.join(@output, "/*") # if renewal && dbs["2001-04-25_schedule"].any? index.remove_all # if renewal end CSV.open(file, "r:bom|utf-8", headers: true).each do |row| save_doc Parser.parse(row) end File.write CURRENT, @current.to_yaml, encoding: "UTF-8" index.save end |
#file_name(bib) ⇒ String
Generate file name
278 279 280 281 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 278 def file_name(bib) name = bib.docnumber.gsub(/[\s,:\/]/, "_").squeeze("_").upcase File.join @output, "#{name}.#{@ext}" end |
#get_file(renewal) ⇒ String?
Get file from FTP. If file does not exist or changed, return nil
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 69 def get_file(renewal) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity @current = YAML.load_file CURRENT if File.exist? CURRENT @current ||= {} n = 0 begin ftp = Net::FTP.new("www.3gpp.org") ftp.resume = true ftp.login ftp.chdir "/Information/Databases/" file_path = ftp.list("*.csv").first return unless file_path d, t, _, file = file_path.split dt = DateTime.strptime("#{d} #{t}", "%m-%d-%y %I:%M%p") if !renewal && file == @current["file"] && !@current["date"].empty? && dt == DateTime.parse(@current["date"]) return end tmp_file = File.join Dir.tmpdir, "3gpp.csv" ftp.get(file, tmp_file) rescue Net::ReadTimeout => e n += 1 retry if n < 5 raise e end @current["file"] = file @current["date"] = dt.to_s tmp_file end |
#index ⇒ Object
21 22 23 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 21 def index @index ||= Relaton::Index.find_or_create "3gpp", file: "index-v1.yaml" end |
#merge_duplication(bib, file) ⇒ Relaton3gpp::BibliographicItem?
Merge duplication
148 149 150 151 152 153 154 155 156 157 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 148 def merge_duplication(bib, file) hash = YAML.load_file file existed = BibliographicItem.from_hash hash changed = update_link bib, existed bib1, bib2, chng = transposed_relation bib, existed changed ||= chng chng = add_contributor(bib1, bib2) changed ||= chng bib1 if changed end |
#save_doc(bib) ⇒ Object
Save document to file
125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 125 def save_doc(bib) # rubocop:disable Metrics/MethodLength return unless bib bib1 = bib file = file_name(bib1) if @files.include? file bib1 = merge_duplication bib1, file Util.warn "File #{file} already exists. Document: #{bib.docnumber}" if bib1.nil? else @files << file index.add_or_update bib1.docnumber, file end File.write file, serialise(bib1), encoding: "UTF-8" unless bib1.nil? end |
#serialise(bib) ⇒ Object
263 264 265 266 267 268 269 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 263 def serialise(bib) case @format when "xml" then bib.to_xml(bibdata: true) when "yaml" then bib.to_hash.to_yaml else bib.send("to_#{@format}") end end |
#transposed_relation(bib, existed) ⇒ Array<Relaton3gpp::BibliographicItem, Boolean>
If one of bibliographic items has date gereater than anotherm=, make it relation
187 188 189 190 191 192 193 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 187 def transposed_relation(bib, existed) # rubocop:disable Metrics/CyclomaticComplexity return [bib, existed, false] if bib.date.none? && existed.date.none? || bib.date.any? && existed.date.none? return [existed, bib, true] if bib.date.none? && existed.date.any? check_transposed_date bib, existed end |
#update_link(bib1, bib2) ⇒ Boolean
Update link in case one of bibliographic items has no link
167 168 169 170 171 172 173 174 175 176 |
# File 'lib/relaton_3gpp/data_fetcher.rb', line 167 def update_link(bib1, bib2) if bib1.link.any? && bib2.link.empty? bib2.instance_variable_set(:@link, bib1.link) true elsif bib1.link.empty? && bib2.link.any? bib1.instance_variable_set(:@link, bib2.link) true else false end end |