Class: Kabutops::Crawler

Direct Known Subclasses

Spider

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Extensions::CallbackSupport

callbacks, notify

Methods included from Extensions::Includable

#append_features, #included

Methods included from Kabutops::CrawlerExtensions::PStoreStorage

#storage

Methods included from Extensions::Logging

#logger

Class Method Details

.<<(resource) ⇒ Object



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/kabutops/crawler.rb', line 46

def << resource
  if debug
    params[:collection] ||= []
    params[:collection] << resource
    return
  end

  key = resource[:id] || resource[:url]

  if key.nil?
    raise "url must be specified for resource"
  else
    perform_async(resource.to_hash)
  end
end

.adaptersObject



22
23
24
# File 'lib/kabutops/crawler.rb', line 22

def adapters
  @adapters ||= []
end

.crawl(collection = nil) ⇒ Object



35
36
37
38
39
40
41
42
43
44
# File 'lib/kabutops/crawler.rb', line 35

def crawl collection=nil
  if storage[:status].nil?
    (collection || params[:collection] || []).each do |resource|
      self << resource
    end
    storage[:status] = :in_progress
  elsif storage[:status] == :in_progress
    # pass
  end
end

.crawl!(collection = nil) ⇒ Object



30
31
32
33
# File 'lib/kabutops/crawler.rb', line 30

def crawl! collection=nil
  reset!
  crawl(collection)
end

.reset!Object



26
27
28
# File 'lib/kabutops/crawler.rb', line 26

def reset!
  storage[:status] = nil
end

Instance Method Details

#<<(resource) ⇒ Object



88
89
90
# File 'lib/kabutops/crawler.rb', line 88

def << resource
  self.class << resource
end

#perform(resource) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/kabutops/crawler.rb', line 63

def perform resource
  resource = Hashie::Mash.new(resource)

  adapters = self.class.adapters.select do |adapter|
    params.skip_existing ? adapter.find(resource).nil? : true
  end

  return if adapters.nil?
  page = crawl(resource)
  return if page.nil?
  return unless (self.class.notify(:store_if, resource, page) || []).all?

  adapters.each do |adapter|
    adapter.process(resource, page)
  end
rescue Exception => e
  unless self.class.debug
    logger.error(e.message)
    logger.error(e.backtrace.join("\n"))
  end

  sleep params[:wait] || 0
  raise e
end