Class: Apollo::PlatformProgram
Constant Summary
collapse
- DEFAULT_OPTIONS =
{
:version => nil
}
Constants inherited
from BaseProgram
BaseProgram::CONFIG_DIR
Instance Attribute Summary
Attributes inherited from BaseProgram
#amqp, #config, #mongo, #options, #optparser
Instance Method Summary
collapse
Methods inherited from BaseProgram
get_config_path, #init_amqp, #init_mongo, #init_seeds, #init_seeds_crawlers, #load_config, #load_config_file, #load_configs, #parse_options, #request_exit, require_files
Constructor Details
Initializer - Constructor
63
64
65
66
67
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 63
def initialize
super
self.options.merge!(DEFAULT_OPTIONS)
end
|
Instance Method Details
#enqueue_crawlers_urls(amqp, crawlers = Apollo::Crawler::BaseCrawler.subclasses, opts = {}) ⇒ Object
100
101
102
103
104
105
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 100
def enqueue_crawlers_urls(amqp, crawlers=Apollo::Crawler::BaseCrawler.subclasses, opts={})
crawlers.each do |crawler|
i = crawler.new
Apollo::Scheduler::BaseScheduler::schedule(i.url, crawler)
end
end
|
#init_agents(amqp, opts = {}) ⇒ Object
125
126
127
128
129
130
131
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 125
def init_agents(amqp, opts={})
puts "Initializing agents"
init_crawlers(amqp, opts)
init_domainers(amqp, opts)
init_fetchers(amqp, opts)
end
|
#init_crawlers(amqp, opts = {}) ⇒ Object
107
108
109
110
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 107
def init_crawlers(amqp, opts={})
crawlers = []
crawlers << Apollo::Agent::CrawlerAgent.new(amqp, self.options)
end
|
#init_domainers(amqp, opts = {}) ⇒ Object
112
113
114
115
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 112
def init_domainers(amqp, opts={})
domainers = []
domainers << Apollo::Agent::DomainerAgent.new(amqp, self.options)
end
|
#init_domains(opts = {}) ⇒ Object
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 133
def init_domains(opts={})
path = File.join(File.dirname(__FILE__), "../../../tmp/top-1m.csv")
puts "#{path}"
if(File.exists?(path) == false)
return 0
end
Thread::new {
Apollo::Helper::Mongo::csv_bulk_insert(path, Apollo::Model::Domain, 1000, false) do |row|
rank = row[0].to_i
name = row[1]
res = {
:rank => rank,
:name => name
}
res
end
}
end
|
#init_fetchers(amqp, opts = {}) ⇒ Object
117
118
119
120
121
122
123
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 117
def init_fetchers(amqp, opts={})
fetchers = []
fetchers << Apollo::Agent::FetcherAgent.new(amqp, self.options)
enqueue_crawlers_urls(amqp, Apollo::Crawler::BaseCrawler.subclasses, opts)
end
|
#init_options ⇒ Object
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 69
def init_options()
self.optparser = OptionParser.new do | opts |
opts.banner = "Usage: apollo-platform [OPTIONS]"
opts.separator ""
opts.separator "Specific options:"
opts.on('-h', '--help', 'Display this screen') do
self.options[:show_help] = true
end
opts.on('-e', '--environment [NAME]', "Environment used, default '#{options[:env]}'") do |name|
self.options[:env] = name
end
opts.on('-d', '--daemon', 'Run Apollo Platform daemon') do
self.options[:daemon] = true
end
opts.on('-v', '--verbose', 'Enable verbose output') do
self.options[:verbose] = true
end
opts.on('-V', '--version', 'Show version info') do
self.options[:version] = true
end
end
end
|
#init_program(args) ⇒ Object
155
156
157
158
159
160
161
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 155
def init_program(args)
res = super(args)
return res unless res.nil?
init_agents(self.amqp, self.options)
return nil
end
|
#process_options(args) ⇒ Object
163
164
165
166
167
168
169
170
171
172
173
174
175
176
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 163
def process_options(args)
if(self.options[:version])
puts Apollo::VERSION
return 0
end
if(self.options[:show_help])
puts optparser
return 0
end
return nil
end
|
#requeue_fetching_urls(opts = {}) ⇒ Object
178
179
180
181
182
183
184
185
186
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 178
def requeue_fetching_urls(opts={})
urls = Apollo::Model::QueuedUrl.where(:state => :fetching)
urls.each do |url|
puts "Requeing '#{url.inspect}'" if opts[:verbose]
url.state = :queued
url.save
end
end
|
#run(args = ARGV) ⇒ Object
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
|
# File 'lib/apollo_crawler/program/platform_program.rb', line 189
def run(args = ARGV)
res = super(args)
return res unless res.nil?
init_domains()
requeue_fetching_urls(self.options)
res_code = 0
if(self.options[:daemon])
planner = Apollo::Planner::SmartPlanner.new(self.amqp, self.mongo, self.options)
res_code = planner.run(self.options)
end
return request_exit(res_code)
end
|