Class: Webspinne::Visit

Inherits:
Object
  • Object
show all
Defined in:
lib/webspinne/visit.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(uri) ⇒ Visit

Returns a new instance of Visit.



5
6
7
# File 'lib/webspinne/visit.rb', line 5

def initialize(uri)
  @site = Site.new(uri)
end

Instance Attribute Details

#agentObject (readonly)

Returns the value of attribute agent.



3
4
5
# File 'lib/webspinne/visit.rb', line 3

def agent
  @agent
end

#indexObject (readonly)

Returns the value of attribute index.



3
4
5
# File 'lib/webspinne/visit.rb', line 3

def index
  @index
end

#siteObject (readonly)

Returns the value of attribute site.



3
4
5
# File 'lib/webspinne/visit.rb', line 3

def site
  @site
end

Instance Method Details

#exec_visit(link) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/webspinne/visit.rb', line 34

def exec_visit link
  link.visited!
  page = agent.get(link.uri)
  
  # e.g. images have no links
  if page.respond_to? :links
    page.links.each do |link|
      if link.uri.to_s != ''
        plan_visit link.uri.to_s
      end
    end
    true
  else
    false
  end
rescue => e
  puts e.class
  false
end

#plan_visit(uri) ⇒ Object



54
55
56
# File 'lib/webspinne/visit.rb', line 54

def plan_visit uri
  index << Link.new(uri, site.onsite?(uri))
end

#run(max_follows) ⇒ Object



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/webspinne/visit.rb', line 9

def run max_follows
  @agent = Mechanize.new
  @index = Index.new

  puts "visiting #{site.uri} - following max. #{max_follows} links"

  plan_visit site.uri
  max_follows.times do
    if link = index.next_unvisited_onsite_link
      if exec_visit(link)
        print '.'
      else
        print 'f'
      end
    else
      # no more unvisited onsite links
      break
    end
  end

  puts

  self
end