Class: HackerNews::Scraper
- Inherits:
-
Object
- Object
- HackerNews::Scraper
- Defined in:
- lib/hacker_news/scraper.rb
Constant Summary collapse
- URL =
"news.ycombinator.com"
Class Method Summary collapse
Class Method Details
.articles(pages = 5) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/hacker_news/scraper.rb', line 7 def self.articles(pages = 5) pagination = "/" result = [] 0.upto(pages - 1) do |page| response = Net::HTTP.start( URL, 80 ) do |http| http.get(pagination, "User-Agent" => "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2").body end doc = Nokogiri::HTML(response) articles = doc.css("td.title > a:first-child") next_page = articles.pop pagination = next_page[:href] articles.each_with_index {|a, i| result << Item.new(a, (page * 30) + i + 1)} end result end |
.comments(item_id) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 |
# File 'lib/hacker_news/scraper.rb', line 26 def self.comments(item_id) response = Net::HTTP.start( "news.ycombinator.com", 80 ) do |http| http.get("/item?id=#{item_id}", "User-Agent" => "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/535.3 (KHTML, like Gecko) Chrome/15.0.872.0 Safari/535.2").body end doc = Nokogiri::HTML(response) comment_images = doc.css('img[src="http://ycombinator.com/images/s.gif"]').select {|c| c.attributes["width"].value == "0"} top_level_comments = comment_images.inject([]) do |arr, comment_image| comment = comment_image.parent.parent.parent.parent.parent arr << Comment.new(item_id, comment) if comment && comment.css("span.comment font")[0] arr end end |