Class: Benchmark::HTTP::Spider

Inherits:
Object
  • Object
show all
Includes:
Async::Await
Defined in:
lib/benchmark/http/spider.rb

Instance Method Summary collapse

Constructor Details

#initialize(depth: nil, ignore: nil) ⇒ Spider

Returns a new instance of Spider.



38
39
40
41
# File 'lib/benchmark/http/spider.rb', line 38

def initialize(depth: nil, ignore: nil)
	@depth = depth
	@ignore = ignore
end

Instance Method Details



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/benchmark/http/spider.rb', line 43

def extract_links(url, response)
	base = url
	
	body = response.read
	
	begin
		filter = LinksFilter.parse(body)
	rescue
		Console.logger.error(self) {$!}
		return []
	end
	
	if filter.base
		base = base + filter.base
	end
	
	filter.links.collect do |href|
		next if href.nil? or href.empty?
		
		begin
			full_url = base + href
			
			if full_url.host == url.host && full_url.kind_of?(URI::HTTP)
				yield full_url
			end
		rescue ArgumentError, URI::InvalidURIError
			Console.logger.warn(self) {"Could not fetch #{href}, relative to #{base}!"}
			next # Don't accumulate an item into the resulting array.
		end
	end.compact
end