Class: JustCrawl::Engine
- Inherits:
-
Object
- Object
- JustCrawl::Engine
- Defined in:
- lib/just_crawl/engine.rb
Constant Summary collapse
- DEFAULT_OPTIONS =
{ domain: '', start: ['/'], username: '', password: '', verbose: false, session_id: false }.freeze
- IGNORE =
[/#/, /mailto:/, /skype:/, /logout/, /javascript:/, %r{/xhr/}, /https:/, /\.pdf$/, /^$/, /tel:/].freeze
- VALID_RESPONSE_CODES =
[200, 302].freeze
- MAX_REDIRECTS =
3
- LINE_WIDTH =
78
Instance Attribute Summary collapse
-
#options ⇒ Object
readonly
Returns the value of attribute options.
Instance Method Summary collapse
- #errors? ⇒ Boolean
-
#initialize(caller_options = {}) ⇒ Engine
constructor
A new instance of Engine.
- #no_links_found? ⇒ Boolean
- #process_next ⇒ Object
- #run ⇒ Object
- #summarize ⇒ Object
Constructor Details
#initialize(caller_options = {}) ⇒ Engine
Returns a new instance of Engine.
20 21 22 23 24 25 26 27 28 |
# File 'lib/just_crawl/engine.rb', line 20 def initialize( = {}) @options = DEFAULT_OPTIONS.merge() @authorization = Base64.encode64("#{[:username]}:#{[:password]}") @register = JustCrawl::Register.new start_pages = [:start].to_a.map { |page| Page.new(@register, page, '/') } @register.add(start_pages) end |
Instance Attribute Details
#options ⇒ Object (readonly)
Returns the value of attribute options.
18 19 20 |
# File 'lib/just_crawl/engine.rb', line 18 def @options end |
Instance Method Details
#errors? ⇒ Boolean
50 51 52 |
# File 'lib/just_crawl/engine.rb', line 50 def errors? @register.errors? end |
#no_links_found? ⇒ Boolean
54 55 56 |
# File 'lib/just_crawl/engine.rb', line 54 def no_links_found? @register.no_links_found? end |
#process_next ⇒ Object
36 37 38 39 40 41 42 43 44 |
# File 'lib/just_crawl/engine.rb', line 36 def process_next return if @register.processing_size >= EM.threadpool_size if @register.finished? EventMachine.stop elsif (page = @register.next_page) retrieve(page) process_next end end |
#run ⇒ Object
30 31 32 33 34 |
# File 'lib/just_crawl/engine.rb', line 30 def run EventMachine.run do process_next end end |
#summarize ⇒ Object
46 47 48 |
# File 'lib/just_crawl/engine.rb', line 46 def summarize @register.summarize end |