Class: CobwebCrawlHelper
- Inherits:
-
Object
- Object
- CobwebCrawlHelper
- Defined in:
- lib/cobweb_crawl_helper.rb
Overview
The crawl class gives easy access to information about the crawl, and gives the ability to stop a crawl
Constant Summary collapse
- BATCH_SIZE =
200
- FINISHED =
"Finished"
- STARTING =
"Starting"
- CANCELLED =
"Cancelled"
Instance Attribute Summary collapse
-
#id ⇒ Object
Returns the value of attribute id.
Instance Method Summary collapse
- #destroy ⇒ Object
-
#initialize(data) ⇒ CobwebCrawlHelper
constructor
A new instance of CobwebCrawlHelper.
- #statistics ⇒ Object
- #status ⇒ Object
Constructor Details
#initialize(data) ⇒ CobwebCrawlHelper
Returns a new instance of CobwebCrawlHelper.
11 12 13 14 15 16 |
# File 'lib/cobweb_crawl_helper.rb', line 11 def initialize(data) @data = data # TAKING A LONG TIME TO RUN ON PRODUCTION BOX @stats = Stats.new(data) end |
Instance Attribute Details
#id ⇒ Object
Returns the value of attribute id.
4 5 6 |
# File 'lib/cobweb_crawl_helper.rb', line 4 def id @id end |
Instance Method Details
#destroy ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
# File 'lib/cobweb_crawl_helper.rb', line 18 def destroy = @data [:queue_name] = "cobweb_crawl_job" unless .has_key?(:queue_name) if RESQUE_INSTALLED [:processing_queue] = "CobwebJob" unless .has_key?(:processing_queue) [:crawl_finished_queue] = "CobwebFinishedJob" unless .has_key?(:crawl_finished_queue) end if SIDEKIQ_INSTALLED [:processing_queue] = "CrawlWorker" unless .has_key?(:processing_queue) [:crawl_finished_queue] = "CrawlFinishedWorker" unless .has_key?(:crawl_finished_queue) end # set status as cancelled now so that we don't enqueue any further pages self.statistics.end_crawl(@data, true) counter = 0 while(counter < 200) do break if self.statistics.get_status == CANCELLED sleep 1 counter += 1 end if [:queue_system] == :resque && RESQUE_INSTALLED position = Resque.size([:queue_name]) until position == 0 position-=BATCH_SIZE position = 0 if position < 0 job_items = Resque.peek([:queue_name], position, BATCH_SIZE) job_items.each do |item| if item["args"][0]["crawl_id"] == id # remove this job from the queue Resque.dequeue(CrawlJob, item["args"][0]) end end end end if [:queue_system] == :sidekiq && SIDEKIQ_INSTALLED queue = Sidekiq::Queue.new("crawl_worker") queue.each do |job| job.delete if job.args[0]["crawl_id"] == id end process_queue_name = Kernel.const_get([:processing_queue]).["queue"] queue = Sidekiq::Queue.new(process_queue_name) queue.each do |job| job.delete if job.args[0]["crawl_id"] == id end end if [:crawl_finished_queue] && [:queue_system] == :resque && RESQUE_INSTALLED additional_stats = {:crawl_id => id, :crawled_base_url => @stats.redis.get("crawled_base_url")} additional_stats[:redis_options] = @data[:redis_options] unless @data[:redis_options] == {} additional_stats[:source_id] = [:source_id] unless [:source_id].nil? Resque.enqueue([:crawl_finished_queue], @stats.get_statistics.merge(additional_stats)) end if [:crawl_finished_queue] && [:queue_system] == :sidekiq && SIDEKIQ_INSTALLED additional_stats = {:crawl_id => id, :crawled_base_url => @stats.redis.get("crawled_base_url")} additional_stats[:redis_options] = @data[:redis_options] unless @data[:redis_options] == {} additional_stats[:source_id] = [:source_id] unless [:source_id].nil? Kernel.const_get([:crawl_finished_queue]).perform_async(@stats.get_statistics.merge(additional_stats)) end end |
#statistics ⇒ Object
89 90 91 |
# File 'lib/cobweb_crawl_helper.rb', line 89 def statistics @stats end |
#status ⇒ Object
93 94 95 |
# File 'lib/cobweb_crawl_helper.rb', line 93 def status statistics.get_status end |