Class: GHArchive::Downloader

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/gh-archive/downloader.rb

Direct Known Subclasses

GHADownloader

Instance Method Summary collapse

Methods included from Utils

#each_time, #get_gha_filename, #read_gha_file, #read_gha_file_content

Constructor Details

#initialize(folder, decompress = false) ⇒ Downloader

Returns a new instance of Downloader.



7
8
9
10
11
12
13
14
15
# File 'lib/gh-archive/downloader.rb', line 7

def initialize(folder, decompress = false)
    @logger = Logger.new(STDERR)
    @decompress = decompress
    @folder = folder
    @max = nil
    
    Dir.mkdir(@folder) unless FileTest.exist?(@folder)
    raise "A file exist with the desired folder name #{folder}" unless FileTest.directory?(@folder)
end

Instance Method Details

#download(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/gh-archive/downloader.rb', line 26

def download(from = Time.gm(2015, 1, 1), to = Time.now)
    archive = []
    self.each_time(from, to) do |current_time|
        filename = self.get_gha_filename(current_time)
        out_filename = filename.clone
        out_filename.gsub!(".json.gz", ".json") if @decompress
        
        target_file = File.join(@folder, out_filename)
        if FileTest.exist?(target_file)
            @logger.info("Skipping existing file for #{current_time}")
            next
        else
            @logger.info("Downloading file for #{current_time}")
        end
        
        File.open(target_file, 'w') do |f|
            URI.open("http://data.gharchive.org/#{filename}") do |gz|
                if @decompress
                    f << self.read_gha_file_content(gz)
                else
                    f << gz.read
                end
            end
        end
        archive << target_file
        
        if @max && archive.size > @max
            last = archive.shift
            @logger.info("Removing local file #{last}")
            File.unlink(last)
        end
        
        yield filename if block_given?
    end
end

#logger=(logger) ⇒ Object



22
23
24
# File 'lib/gh-archive/downloader.rb', line 22

def logger=(logger)
    @logger = logger
end

#max(max) ⇒ Object



17
18
19
20
# File 'lib/gh-archive/downloader.rb', line 17

def max(max)
    @max = max
    return self
end