Class: Kabutops::Crawler
Class Attribute Summary collapse
Class Method Summary
collapse
Instance Method Summary
collapse
check_storage, storage, storage=
included
included
Class Attribute Details
.params ⇒ Object
Returns the value of attribute params.
9
10
11
|
# File 'lib/kabutops/crawler.rb', line 9
def params
@params
end
|
Class Method Details
.<<(resource) ⇒ Object
45
46
47
|
# File 'lib/kabutops/crawler.rb', line 45
def << resource
perform_async(resource)
end
|
.adapters ⇒ Object
31
32
33
|
# File 'lib/kabutops/crawler.rb', line 31
def adapters
@adapters
end
|
.crawl!(collection = nil) ⇒ Object
35
36
37
38
39
40
41
42
43
|
# File 'lib/kabutops/crawler.rb', line 35
def crawl! collection=nil
if storage(:status) == :none
@collection = collection || @params[:collection] || []
@collection.each do |resource|
raise "url must be specified" if resource[:id].nil?
perform_async(resource)
end
end
end
|
Instance Method Details
#<<(resource) ⇒ Object
66
67
68
|
# File 'lib/kabutops/crawler.rb', line 66
def << resource
self.class.perform_async(resource)
end
|
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
|
# File 'lib/kabutops/crawler.rb', line 50
def perform resource
resource = resource.inject({}) { |h, (k, v)| h[k.to_sym] = v; h }
content = Cachy.cache_if(self.class.params[:cache], resource[:url]) do
agent = Mechanize.new
agent.get(resource[:url]).body
end
page = Nokogiri::HTML(content)
self.class.adapters.each do |adapter|
adapter.process(resource, page)
end
end
|