Class: GHArchive::Provider

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/gh-archive/providers.rb

Direct Known Subclasses

GHAProvider, FolderProvider, OnlineProvider

Defined Under Namespace

Classes: GHAException

Instance Method Summary collapse

Methods included from Utils

#each_time, #get_gha_filename, #read_gha_file, #read_gha_file_content

Constructor Details

#initializeProvider

Returns a new instance of Provider.



7
8
9
10
11
12
13
14
15
# File 'lib/gh-archive/providers.rb', line 7

def initialize
    @logger = Logger.new(STDOUT)
    
    @includes = {}
    @excludes = {}
    
    @checkpoint_name = nil
    @use_json = true
end

Instance Method Details

#each(from = Time.gm(2015, 1, 1), to = Time.now) ⇒ Object



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/gh-archive/providers.rb', line 87

def each(from = Time.gm(2015, 1, 1), to = Time.now)
    exceptions = []
    
    from = restore_checkpoint(from)
    
    self.each_time(from, to) do |current_time|
        events = []
        
        update_checkpoint(current_time)
        
        begin
            events = self.get(current_time)
        rescue GHAException => e
            @logger.warn(e.message)
            next
        rescue => e
            @logger.error("An exception occurred for #{current_time}: #{e.message}")
            exceptions << e
            next
        end
        
        events.each do |event|
            skip = false
            @includes.each do |key, value|
                skip = true unless value.include?(event[key])
            end
            
            @excludes.each do |key, value|
                skip = true if value.include?(event[key])
            end
            next if skip
            
            if @use_json
                yield event, current_time
            else
                yield GHArchive::Event.parse(event), current_time
            end
        end
        
        @logger.info("Scanned #{current_time}")
        
        events.clear
        GC.start
    end
    
    update_checkpoint(to)
    
    return exceptions
end

#exclude(**args) ⇒ Object



49
50
51
52
53
54
55
56
# File 'lib/gh-archive/providers.rb', line 49

def exclude(**args)
    args.each do |key, value|
        @excludes[key.to_s] = [] unless @excludes[key.to_s]
        @excludes[key.to_s] << value
    end
    
    return self
end

#get(date) ⇒ Object



36
37
38
# File 'lib/gh-archive/providers.rb', line 36

def get(date)
    raise "Not implemented"
end

#include(**args) ⇒ Object



40
41
42
43
44
45
46
47
# File 'lib/gh-archive/providers.rb', line 40

def include(**args)
    args.each do |key, value|
        @includes[key.to_s] = [] unless @includes[key.to_s]
        @includes[key.to_s] << value
    end
    
    return self
end

#logger=(logger) ⇒ Object Also known as: use_logger



29
30
31
32
33
# File 'lib/gh-archive/providers.rb', line 29

def logger=(logger)
    @logger = logger
    
    return self
end

#parse_eventsObject



23
24
25
26
27
# File 'lib/gh-archive/providers.rb', line 23

def parse_events
    @use_json = false
    
    return self
end

#restore_checkpoint(from) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/gh-archive/providers.rb', line 58

def restore_checkpoint(from)
    if @checkpoint_name && FileTest.exist?(@checkpoint_name)
        # Note that this throws an exception if the file is not readable. This is the intended behavior.
        # As opposed to that, failing to save the checkpoint information just results in a warning on the log.
        loaded_from = Marshal.load(File.read(@checkpoint_name))
        raise "The loaded checkpoint (#{loaded_from}) occurs before the current from date (#{from})" if loaded_from < from
        
        @logger.info("Valid checkpoint loaded. Restored execution from #{loaded_from}.")
        
        return loaded_from
    else
        return from
    end
end

#update_checkpoint(current_time) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/gh-archive/providers.rb', line 73

def update_checkpoint(current_time)
    if @checkpoint_name
        begin
            File.open(@checkpoint_name, "wb") do |f|
                f.write(Marshal.dump(current_time))
            end
        rescue
            @logger.warn(
                "Unable to save the checkpoint at the specified location (#{File.expand_path(@checkpoint_name)})."
            )
        end
    end
end

#use_checkpoint(filename) ⇒ Object



17
18
19
20
21
# File 'lib/gh-archive/providers.rb', line 17

def use_checkpoint(filename)
    @checkpoint_name = filename
    
    return self
end