Class: Apollo::Crawler::SpiderCrawler
Instance Method Summary
collapse
Methods inherited from BaseCrawler
create_metadoc, #enqueue_url, #etl, fetch, #fetch_document, #initialize, name_re, #process_url, try_get_doc, try_get_url, #url_processed?
Instance Method Details
34
35
36
|
# File 'lib/apollo_crawler/crawler/spider_crawler.rb', line 34
def (doc)
[]
end
|
38
39
40
41
42
43
44
45
46
47
48
49
|
# File 'lib/apollo_crawler/crawler/spider_crawler.rb', line 38
def (doc)
res = doc.xpath("//a").map { |node|
url = BaseCrawler.try_get_url(self.url, node['href']).to_s
next if url.nil?
{
:link => url
}
}
return res.uniq
end
|
#name ⇒ Object
26
27
28
|
# File 'lib/apollo_crawler/crawler/spider_crawler.rb', line 26
def name()
return "Spider"
end
|
#url ⇒ Object
30
31
32
|
# File 'lib/apollo_crawler/crawler/spider_crawler.rb', line 30
def url()
return "http://www.wikipedia.org/"
end
|