Module: Spiderman
- Extended by:
- ActiveSupport::Concern
- Defined in:
- lib/spiderman.rb,
lib/spiderman/runner.rb,
lib/spiderman/railtie.rb,
lib/spiderman/version.rb
Overview
Turn any class into a crawler by including this module.
Example:
class MySpider < ApplicationJob # Yup, you can define this in a job
queue_as :crawler
include Spiderman
crawl "https://example.com/" do |response|
response.css('.selector a').each do |a|
process! a["href"], :listing
end
end
process :listing do |response|
process! response.css('img'), :image
save_the_thing response.css('.some_selector')
end
process :image do |response|
# Do something with the image file
end
def save_the_thing(thing)
# logic here for saving the thing
end
end
Defined Under Namespace
Constant Summary collapse
- VERSION =
"2.0.0"
Class Method Summary collapse
Instance Method Summary collapse
- #crawl! ⇒ Object
- #name ⇒ Object
- #perform(url, with = nil) ⇒ Object
- #process!(url, with = nil) ⇒ Object
Class Method Details
.add(clazz) ⇒ Object
118 119 120 |
# File 'lib/spiderman.rb', line 118 def add(clazz) list.push(clazz) end |
.find(name) ⇒ Object
114 115 116 |
# File 'lib/spiderman.rb', line 114 def find(name) self.list.detect { |crawler| crawler.name.demodulize.underscore == name } end |
.list ⇒ Object
105 106 107 |
# File 'lib/spiderman.rb', line 105 def list @list ||= [] end |
.run(crawler = nil) ⇒ Object
109 110 111 112 |
# File 'lib/spiderman.rb', line 109 def run(crawler = nil) crawlers = crawler ? [find(crawler)] : list crawlers.each(&:crawl!) end |
Instance Method Details
#crawl! ⇒ Object
79 80 81 82 83 |
# File 'lib/spiderman.rb', line 79 def crawl! crawler.urls.each do |url| process! url end end |
#name ⇒ Object
99 100 101 |
# File 'lib/spiderman.rb', line 99 def name self.class.name.demodulize end |
#perform(url, with = nil) ⇒ Object
93 94 95 96 97 |
# File 'lib/spiderman.rb', line 93 def perform(url, with = nil) handler = crawler.handler_for(with || url) response = crawler.request(url) instance_exec response, &handler end |
#process!(url, with = nil) ⇒ Object
85 86 87 88 89 90 91 |
# File 'lib/spiderman.rb', line 85 def process!(url, with = nil) if defined?(ActiveJob) && self.is_a?(ActiveJob::Base) self.class.perform_later(url.to_s, with) else perform(url, with) end end |