Class: Kabutops::Crawler
Direct Known Subclasses
Spider
Class Method Summary
collapse
Instance Method Summary
collapse
callbacks, notify
#append_features, #included
#storage
#logger
Class Method Details
.<<(resource) ⇒ Object
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
# File 'lib/kabutops/crawler.rb', line 46
def << resource
if debug
params[:collection] ||= []
params[:collection] << resource
return
end
key = resource[:id] || resource[:url]
if key.nil?
raise "url must be specified for resource"
else
perform_async(resource.to_hash)
end
end
|
.adapters ⇒ Object
22
23
24
|
# File 'lib/kabutops/crawler.rb', line 22
def adapters
@adapters ||= []
end
|
.crawl(collection = nil) ⇒ Object
35
36
37
38
39
40
41
42
43
44
|
# File 'lib/kabutops/crawler.rb', line 35
def crawl collection=nil
if storage[:status].nil?
(collection || params[:collection] || []).each do |resource|
self << resource
end
storage[:status] = :in_progress
elsif storage[:status] == :in_progress
end
end
|
.crawl!(collection = nil) ⇒ Object
30
31
32
33
|
# File 'lib/kabutops/crawler.rb', line 30
def crawl! collection=nil
reset!
crawl(collection)
end
|
.reset! ⇒ Object
26
27
28
|
# File 'lib/kabutops/crawler.rb', line 26
def reset!
storage[:status] = nil
end
|
Instance Method Details
#<<(resource) ⇒ Object
88
89
90
|
# File 'lib/kabutops/crawler.rb', line 88
def << resource
self.class << resource
end
|
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
# File 'lib/kabutops/crawler.rb', line 63
def perform resource
resource = Hashie::Mash.new(resource)
adapters = self.class.adapters.select do |adapter|
params.skip_existing ? adapter.find(resource).nil? : true
end
return if adapters.nil?
page = crawl(resource)
return if page.nil?
return unless (self.class.notify(:store_if, resource, page) || []).all?
adapters.each do |adapter|
adapter.process(resource, page)
end
rescue Exception => e
unless self.class.debug
logger.error(e.message)
logger.error(e.backtrace.join("\n"))
end
sleep params[:wait] || 0
raise e
end
|