Module: SportHeadlines::Scraper
- Defined in:
- lib/sport_headlines/scraper.rb
Class Method Summary collapse
Class Method Details
.scrape_article(article) ⇒ Object
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
# File 'lib/sport_headlines/scraper.rb', line 33 def self.scrape_article(article) doc = Nokogiri::HTML(open(article.article_url)) p_text = "" if article.site.site_name == "ESPN" doc.search(".article-body p").each do |p| p_text += " " + p.text + "\n\n" end elsif article.site.site_name == "Bleacher Report" doc.search(".article_body p").each do |p| p_text += " " + p.text + "\n" end elsif article.site.site_name == "Pro Football Talk" doc.search(".post-body p").each do |p| p_text += " " + p.text + "\n\n" end end article.content = p_text end |
.scrape_site_headlines(site) ⇒ Object
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
# File 'lib/sport_headlines/scraper.rb', line 3 def self.scrape_site_headlines(site) doc = Nokogiri::HTML(open(site.site_url)) site.clear_articles if site.site_name == "ESPN" doc.search(".headlines li").each do |headline| new_article ||= SportHeadlines::Article.new new_article.title ||= headline.search("a").text new_article.article_url ||= site.site_url + headline.search("a").attribute("href").value site.add_article(new_article) new_article.site = site end elsif site.site_name == "Bleacher Report" doc.search(".headlineArticles li").each do |headline| new_article = SportHeadlines::Article.new new_article.title = headline.search("span.title").text new_article.article_url = headline.search("a.title").attribute("href").value site.add_article(new_article) new_article.site = site end else doc.search("#top-headlines li").each do |headline| new_article = SportHeadlines::Article.new new_article.title = headline.text new_article.article_url = headline.search("a").attribute("href").value site.add_article(new_article) new_article.site = site end end end |