Class: Apollo::Crawler::StackoverflowCrawler
- Inherits:
-
BaseCrawler
- Object
- BaseCrawler
- Apollo::Crawler::StackoverflowCrawler
show all
- Defined in:
- lib/apollo_crawler/crawler/stackoverflow_crawler.rb
Constant Summary
collapse
- @@MATCHER_ITEM =
"//div[@class = 'summary']/h3/a"
Instance Method Summary
collapse
Methods inherited from BaseCrawler
create_metadoc, #enqueue_url, #etl, fetch, #fetch_document, #initialize, name_re, #process_url, try_get_doc, try_get_url, #url_processed?
Instance Method Details
36
37
38
39
40
41
42
43
44
45
46
47
48
|
# File 'lib/apollo_crawler/crawler/stackoverflow_crawler.rb', line 36
def (doc)
res = doc.xpath(@@MATCHER_ITEM).map { |node|
url = BaseCrawler.try_get_url(self.url, node['href']).to_s
next if url.nil?
{
:text => node.text,
:link => url
}
}
return res
end
|
50
51
52
53
54
55
56
57
58
59
60
61
|
# File 'lib/apollo_crawler/crawler/stackoverflow_crawler.rb', line 50
def (doc)
res = doc.xpath("(//div[@class = 'pager fl']/a)[last()]").map { |node|
url = BaseCrawler.try_get_url(self.url, node['href']).to_s
next if url.nil?
{
:link => url
}
}
return res.uniq
end
|
#name ⇒ Object
28
29
30
|
# File 'lib/apollo_crawler/crawler/stackoverflow_crawler.rb', line 28
def name()
return "Stackoverflow"
end
|
#url ⇒ Object
32
33
34
|
# File 'lib/apollo_crawler/crawler/stackoverflow_crawler.rb', line 32
def url()
return "http://stackoverflow.com/questions"
end
|