Class: JustCrawl::Register
- Inherits:
-
Object
- Object
- JustCrawl::Register
- Defined in:
- lib/just_crawl/register.rb
Defined Under Namespace
Classes: Result
Instance Method Summary collapse
- #add(pages) ⇒ Object
- #completed(page) ⇒ Object
- #error_pages ⇒ Object
- #errors? ⇒ Boolean
- #finished? ⇒ Boolean
-
#initialize ⇒ Register
constructor
A new instance of Register.
- #next_page ⇒ Object
- #no_links_found? ⇒ Boolean
- #processing_size ⇒ Object
- #retry(page) ⇒ Object
- #summarize ⇒ Object
Constructor Details
#initialize ⇒ Register
Returns a new instance of Register.
7 8 9 10 11 |
# File 'lib/just_crawl/register.rb', line 7 def initialize @unprocessed = Set.new @processing = Set.new @processed = Set.new end |
Instance Method Details
#add(pages) ⇒ Object
13 14 15 16 17 18 19 |
# File 'lib/just_crawl/register.rb', line 13 def add(pages) new_pages = pages.to_set - @processed - @processing - @unprocessed new_pages.each do |new_page| puts " Adding #{new_page.url}" if $verbose end @unprocessed.merge(new_pages) end |
#completed(page) ⇒ Object
36 37 38 39 |
# File 'lib/just_crawl/register.rb', line 36 def completed(page) @processed << page @processing.delete(page) end |
#error_pages ⇒ Object
49 50 51 |
# File 'lib/just_crawl/register.rb', line 49 def error_pages @processed.select(&:error) end |
#errors? ⇒ Boolean
53 54 55 |
# File 'lib/just_crawl/register.rb', line 53 def errors? !error_pages.empty? end |
#finished? ⇒ Boolean
41 42 43 |
# File 'lib/just_crawl/register.rb', line 41 def finished? (@unprocessed.size + @processing.size).zero? end |
#next_page ⇒ Object
21 22 23 24 25 26 27 28 29 |
# File 'lib/just_crawl/register.rb', line 21 def next_page page = @unprocessed.first @unprocessed.delete(page) @processing << page if page if @processing.size > EM.threadpool_size puts "WARNING: #{@processing.size} pages are being process when EM threadpool only has #{EM.threadpool_size} threads." end page end |
#no_links_found? ⇒ Boolean
68 69 70 |
# File 'lib/just_crawl/register.rb', line 68 def no_links_found? @processed.size <= 1 end |
#processing_size ⇒ Object
45 46 47 |
# File 'lib/just_crawl/register.rb', line 45 def processing_size @processing.size end |
#retry(page) ⇒ Object
31 32 33 34 |
# File 'lib/just_crawl/register.rb', line 31 def retry(page) @unprocessed << page @processing.delete(page) end |
#summarize ⇒ Object
57 58 59 60 61 62 63 64 65 66 |
# File 'lib/just_crawl/register.rb', line 57 def summarize if errors? puts "\nPages with errors:" error_pages.each do |page| puts page.to_s end else puts "\n#{@processed.size} pages crawled without errors." end end |