Class: Kabutops::Crawler

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Extensions::CallbackSupport

callbacks, notify

Methods included from Extensions::Includable

#append_features, #included

Methods included from Kabutops::CrawlerExtensions::PStoreStorage

#storage

Methods included from Extensions::Logging

#logger

Class Method Details

.<<(resource) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/kabutops/crawler.rb', line 36

def << resource
  if debug
    params[:collection] ||= []
    params[:collection] << resource
    return
  end

  key = resource[:id] || resource[:url]
  @map ||= Hashie::Mash.new

  if key.nil?
    raise "url must be specified for resource"
  elsif @map[key]
    # resource with an id already in map
  else
    perform_async(resource.to_hash)
    @map[key] = resource
  end
end

.adaptersObject



22
23
24
# File 'lib/kabutops/crawler.rb', line 22

def adapters
  @adapters ||= []
end

.crawl!(collection = nil) ⇒ Object



26
27
28
29
30
31
32
33
34
# File 'lib/kabutops/crawler.rb', line 26

def crawl! collection=nil
  @map ||= Hashie::Mash.new

  if storage[:status].nil?
    (collection || params[:collection] || []).each do |resource|
      self << resource
    end
  end
end

Instance Method Details

#<<(resource) ⇒ Object



71
72
73
# File 'lib/kabutops/crawler.rb', line 71

def << resource
  self.class << resource
end

#perform(resource) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/kabutops/crawler.rb', line 57

def perform resource
  resource = Hashie::Mash.new(resource)
  page = crawl(resource)

  self.class.adapters.each do |adapter|
    adapter.process(resource, page)
  end
rescue Exception => e
  logger.error(e.message)
  logger.error(e.backtrace.join("\n"))
  sleep self.params[:wait] || 0
  raise e
end