Module: SportHeadlines::Scraper

Defined in:
lib/sport_headlines/scraper.rb

Class Method Summary collapse

Class Method Details

.scrape_article(article) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/sport_headlines/scraper.rb', line 33

def self.scrape_article(article)
  doc = Nokogiri::HTML(open(article.article_url))
  p_text = ""

  if article.site.site_name == "ESPN"
    doc.search(".article-body p").each do |p|
      p_text += "    " + p.text + "\n\n"
    end
  elsif article.site.site_name == "Bleacher Report"
    doc.search(".article_body p").each do |p|
      p_text += "    " + p.text + "\n"
    end
  elsif article.site.site_name == "Pro Football Talk"
    doc.search(".post-body p").each do |p|
      p_text += "    " + p.text + "\n\n"
    end
  end

  article.content = p_text
end

.scrape_site_headlines(site) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/sport_headlines/scraper.rb', line 3

def self.scrape_site_headlines(site)
  doc = Nokogiri::HTML(open(site.site_url))
  site.clear_articles
  if site.site_name == "ESPN"
    doc.search(".headlines li").each do |headline|
      new_article ||= SportHeadlines::Article.new
      new_article.title ||= headline.search("a").text
      new_article.article_url ||= site.site_url + headline.search("a").attribute("href").value
      site.add_article(new_article)
      new_article.site = site
    end
  elsif site.site_name == "Bleacher Report"
    doc.search(".headlineArticles li").each do |headline|
      new_article = SportHeadlines::Article.new
      new_article.title = headline.search("span.title").text
      new_article.article_url = headline.search("a.title").attribute("href").value
      site.add_article(new_article)
      new_article.site = site
    end
  else
    doc.search("#top-headlines li").each do |headline|
      new_article = SportHeadlines::Article.new
      new_article.title = headline.text
      new_article.article_url = headline.search("a").attribute("href").value
      site.add_article(new_article)
      new_article.site = site
    end
  end
end