Class: Magellan::Explorer

Inherits:
Object
  • Object
show all
Defined in:
lib/magellan/explorer.rb

Overview

:nodoc:

Constant Summary collapse

UNKNOWN_CONTENT =
"unknown"

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(urls, links) ⇒ Explorer

:nodoc:



8
9
10
11
# File 'lib/magellan/explorer.rb', line 8

def initialize(urls,links) # :nodoc:
  @links = links
  @urls = urls
end

Class Method Details

.create_result(url, destination_url, status_code, links, content_type) ⇒ Object

:nodoc:



41
42
43
# File 'lib/magellan/explorer.rb', line 41

def self.create_result(url,destination_url,status_code,links,content_type) # :nodoc:
  Result.new(status_code,url,destination_url,links.map{|link| link.to_s},content_type)
end

Instance Method Details

#exploreObject

:nodoc:



13
14
15
16
17
18
19
# File 'lib/magellan/explorer.rb', line 13

def explore # :nodoc:
  reqs = []
  @urls.each do |url|
    reqs.push Thread.new { explore_a(url) }
  end
  reqs.collect { |req| req.value }
end

#explore_a(url) ⇒ Object

:nodoc:



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/magellan/explorer.rb', line 21

def explore_a(url) # :nodoc:
  begin
    agent = WWW::Mechanize.new
    agent.user_agent = "Ruby/#{RUBY_VERSION}"
    doc = agent.get(url)
    destination_url = doc.uri.to_s
    status_code = doc.code
    #TODO: clean this up, this is very hacky, I would rather pass in a hpricot doc to create a result
    if doc.respond_to?(:content_type) && doc.content_type.starts_with?("text/html")
      Explorer.create_result(url, destination_url, status_code, doc.links_to_other_documents(@links),doc.content_type)
    else
      Explorer.create_result(url, destination_url, status_code, [], doc.respond_to?(:content_type) ? doc.content_type : UNKNOWN_CONTENT)
    end
  rescue WWW::Mechanize::ResponseCodeError => the_error
    Explorer.create_result(url, url, the_error.response_code, [],UNKNOWN_CONTENT)
  rescue Timeout::Error
    Explorer.create_result(url, url, "504", [],UNKNOWN_CONTENT)
  end
end