Class: Apollo::Crawler::XkcdCrawler
Constant Summary
collapse
- @@MATCHER_ITEM =
"//div[@id = 'comic']/img"
Instance Method Summary
collapse
Methods inherited from BaseCrawler
create_metadoc, #enqueue_url, #etl, fetch, #fetch_document, #initialize, name_re, #process_url, try_get_doc, try_get_url, #url_processed?
Instance Method Details
36
37
38
39
40
41
42
43
|
# File 'lib/apollo_crawler/crawler/xkcd_crawler.rb', line 36
def (doc)
res = doc.xpath(@@MATCHER_ITEM).map { |node|
{
:text => node['title'],
:link => URI.join(self.url, node['src']).to_s
}
}
end
|
45
46
47
48
49
50
51
52
|
# File 'lib/apollo_crawler/crawler/xkcd_crawler.rb', line 45
def (doc)
res = doc.xpath("//ul[@class = 'comicNav']/li/a[@accesskey = 'p']").map { |node|
{
:link => URI.join(self.url, node['href']).to_s
}
}
res.uniq
end
|
#name ⇒ Object
28
29
30
|
# File 'lib/apollo_crawler/crawler/xkcd_crawler.rb', line 28
def name()
return "Xkcd"
end
|
#url ⇒ Object
32
33
34
|
# File 'lib/apollo_crawler/crawler/xkcd_crawler.rb', line 32
def url()
return "http://xkcd.com/"
end
|