Class: HttpSpell::Spider

Inherits:
Object
  • Object
show all
Defined in:
lib/http_spell/spider.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(starting_point, included: nil, excluded: [], verbose: false, tracing: false) ⇒ Spider

Returns a new instance of Spider.



13
14
15
16
17
18
19
20
21
# File 'lib/http_spell/spider.rb', line 13

def initialize(starting_point, included: nil, excluded: [], verbose: false, tracing: false)
  @todo = []
  @done = []
  todo << URI(starting_point)
  @included = included || [/^#{starting_point}/]
  @excluded = excluded
  @verbose = verbose
  @tracing = tracing
end

Instance Attribute Details

#doneObject (readonly)

Returns the value of attribute done.



11
12
13
# File 'lib/http_spell/spider.rb', line 11

def done
  @done
end

#todoObject (readonly)

Returns the value of attribute todo.



11
12
13
# File 'lib/http_spell/spider.rb', line 11

def todo
  @todo
end

Instance Method Details

#startObject



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/http_spell/spider.rb', line 23

def start
  success = true

  while todo.any?
    url = todo.pop

    begin
      extracted = links(url) do |u, d|
        yield u, d if block_given?
      rescue StandardError
        warn "Callback error for #{url}: #{$ERROR_INFO}"
        warn $ERROR_INFO.backtrace if @tracing
      end

      done.append(url)
      new_links = (extracted - done - todo).uniq

      if new_links.any?
        warn "Adding #{new_links.size} new links found at #{url}" if @verbose
        todo.concat(extracted - done - todo).uniq!
      end
    rescue StandardError
      warn "Skipping #{url} because of #{$ERROR_INFO.message}"
      warn $ERROR_INFO.backtrace if @tracing
      success = false
    end
  end

  success
end