Class: Kabutops::Crawler

Inherits:
Object
  • Object
show all
Extended by:
Kabutops::CrawlerExtensions::PStoreStorage
Includes:
Kabutops::CrawlerExtensions::Callback, Kabutops::CrawlerExtensions::ElasticSearch
Defined in:
lib/kabutops/crawler.rb

Class Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Kabutops::CrawlerExtensions::PStoreStorage

check_storage, storage, storage=

Methods included from Kabutops::CrawlerExtensions::Callback

included

Methods included from Kabutops::CrawlerExtensions::ElasticSearch

included

Class Attribute Details

.paramsObject (readonly)

Returns the value of attribute params.



9
10
11
# File 'lib/kabutops/crawler.rb', line 9

def params
  @params
end

Class Method Details

.<<(resource) ⇒ Object



45
46
47
# File 'lib/kabutops/crawler.rb', line 45

def << resource
  perform_async(resource)
end

.adaptersObject



31
32
33
# File 'lib/kabutops/crawler.rb', line 31

def adapters
  @adapters
end

.crawl!(collection = nil) ⇒ Object



35
36
37
38
39
40
41
42
43
# File 'lib/kabutops/crawler.rb', line 35

def crawl! collection=nil
  if storage(:status) == :none
    @collection = collection || @params[:collection] || []
    @collection.each do |resource|
      raise "url must be specified" if resource[:id].nil?
      perform_async(resource)
    end
  end
end

Instance Method Details

#<<(resource) ⇒ Object



66
67
68
# File 'lib/kabutops/crawler.rb', line 66

def << resource
  self.class.perform_async(resource)
end

#perform(resource) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/kabutops/crawler.rb', line 50

def perform resource
  resource = resource.inject({}) { |h, (k, v)| h[k.to_sym] = v; h }

  content = Cachy.cache_if(self.class.params[:cache], resource[:url]) do
    agent = Mechanize.new
    #agent.set_proxy(*self.class.params[:proxy])
    agent.get(resource[:url]).body
  end

  page = Nokogiri::HTML(content)

  self.class.adapters.each do |adapter|
    adapter.process(resource, page)
  end
end