Class: GHArchive::Downloader
- Inherits:
-
Object
- Object
- GHArchive::Downloader
show all
- Includes:
- Utils
- Defined in:
- lib/gh-archive/downloader.rb
Instance Method Summary
collapse
Methods included from Utils
#each_time, #get_gha_filename, #read_gha_file, #read_gha_file_content
Constructor Details
#initialize(folder, decompress = false) ⇒ Downloader
Returns a new instance of Downloader.
7
8
9
10
11
12
13
14
15
|
# File 'lib/gh-archive/downloader.rb', line 7
def initialize(folder, decompress = false)
@logger = Logger.new(STDERR)
@decompress = decompress
@folder = folder
@max = nil
Dir.mkdir(@folder) unless FileTest.exist?(@folder)
raise "A file exist with the desired folder name #{folder}" unless FileTest.directory?(@folder)
end
|
Instance Method Details
#download(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
# File 'lib/gh-archive/downloader.rb', line 26
def download(from = Time.gm(2015, 1, 1), to = Time.now)
archive = []
self.each_time(from, to) do |current_time|
filename = self.get_gha_filename(current_time)
out_filename = filename.clone
out_filename.gsub!(".json.gz", ".json") if @decompress
target_file = File.join(@folder, out_filename)
if FileTest.exist?(target_file)
@logger.info("Skipping existing file for #{current_time}")
next
else
@logger.info("Downloading file for #{current_time}")
end
File.open(target_file, 'w') do |f|
URI.open("http://data.gharchive.org/#{filename}") do |gz|
if @decompress
f << self.read_gha_file_content(gz)
else
f << gz.read
end
end
end
archive << target_file
if @max && archive.size > @max
last = archive.shift
@logger.info("Removing local file #{last}")
File.unlink(last)
end
yield filename if block_given?
end
end
|
#logger=(logger) ⇒ Object
22
23
24
|
# File 'lib/gh-archive/downloader.rb', line 22
def logger=(logger)
@logger = logger
end
|
#max(max) ⇒ Object
17
18
19
20
|
# File 'lib/gh-archive/downloader.rb', line 17
def max(max)
@max = max
return self
end
|