Class: JustCrawl::Engine

Inherits:
Object
  • Object
show all
Defined in:
lib/just_crawl/engine.rb

Constant Summary collapse

DEFAULT_OPTIONS =
{
  domain:  '',
  start: ['/'],
  username: '',
  password: '',
  verbose: false,
  session_id: false
}.freeze
IGNORE =
[/#/, /mailto:/, /skype:/, /logout/, /javascript:/, %r{/xhr/}, /https:/, /\.pdf$/, /^$/, /tel:/].freeze
VALID_RESPONSE_CODES =
[200, 302].freeze
MAX_REDIRECTS =
3
LINE_WIDTH =
78

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(caller_options = {}) ⇒ Engine

Returns a new instance of Engine.



20
21
22
23
24
25
26
27
28
# File 'lib/just_crawl/engine.rb', line 20

def initialize(caller_options = {})
  @options = DEFAULT_OPTIONS.merge(caller_options)
  @authorization = Base64.encode64("#{options[:username]}:#{options[:password]}")
  @register = JustCrawl::Register.new

  start_pages = options[:start].to_a.map { |page| Page.new(@register, page, '/') }

  @register.add(start_pages)
end

Instance Attribute Details

#optionsObject (readonly)

Returns the value of attribute options.



18
19
20
# File 'lib/just_crawl/engine.rb', line 18

def options
  @options
end

Instance Method Details

#errors?Boolean

Returns:

  • (Boolean)


50
51
52
# File 'lib/just_crawl/engine.rb', line 50

def errors?
  @register.errors?
end

Returns:

  • (Boolean)


54
55
56
# File 'lib/just_crawl/engine.rb', line 54

def no_links_found?
  @register.no_links_found?
end

#process_nextObject



36
37
38
39
40
41
42
43
44
# File 'lib/just_crawl/engine.rb', line 36

def process_next
  return if @register.processing_size >= EM.threadpool_size
  if @register.finished?
    EventMachine.stop
  elsif (page = @register.next_page)
    retrieve(page)
    process_next
  end
end

#runObject



30
31
32
33
34
# File 'lib/just_crawl/engine.rb', line 30

def run
  EventMachine.run do
    process_next
  end
end

#summarizeObject



46
47
48
# File 'lib/just_crawl/engine.rb', line 46

def summarize
  @register.summarize
end