Class: Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/top_box/scraper.rb

Class Method Summary collapse

Class Method Details

.scrape_movie_listObject


2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# File 'lib/top_box/scraper.rb', line 2

def self.scrape_movie_list
  doc = Nokogiri::HTML(open('https://www.imdb.com/chart/boxoffice/'))
  titles = doc.css('tr .titleColumn').collect{|x| x.text.strip}
  urls = doc.css('tr .titleColumn a').collect{|x| x.attribute('href').value[0,16]} #/title/tt3104988
  weeks_in_theater = doc.css('.weeksColumn').collect{|x| x.text}
  total_gross = doc.css('.ratingColumn .secondaryInfo').collect{|x| x.text}

  movies_attributes=[]
  i=0
  while i<titles.length
    att_hash={}
    att_hash[:url] = urls[i]
    att_hash[:title] = titles[i]
    att_hash[:weeks_in_theater] = weeks_in_theater[i]
    att_hash[:total_gross] = total_gross[i]
    movies_attributes << att_hash
    i+=1
  end
  movies_attributes
end

.scrape_movie_page(url) ⇒ Object

‘/title/tt3104988’


23
24
25
# File 'lib/top_box/scraper.rb', line 23

def self.scrape_movie_page( url ) #'/title/tt3104988'
  doc = Nokogiri::HTML(open('https://www.imdb.com' + url))
end

.scrape_review_page(url) ⇒ Object

‘/title/tt3104988/criticreviews?ref_=tt_ov_rt’


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/top_box/scraper.rb', line 27

def self.scrape_review_page( url ) #'/title/tt3104988/criticreviews?ref_=tt_ov_rt'
  doc = Nokogiri::HTML(open('https://www.imdb.com/' + url))
  scores = doc.css('.critscore').collect{|y| y.text.strip}
  publications = doc.css('.review b span').collect{|y| y.text}
  authors = doc.css('.review span span').collect{|y| y.text}
    #some_reviews may not have authors listed
  summarys = doc.css('.review div').collect{|y| y.text.strip}

  review_attributes=[]
  i=0
  while i < scores.length
    att_hash={}
    att_hash[:score] = scores[i]
    att_hash[:publication] = publications[i]
    att_hash[:author] = authors[i]
    att_hash[:summary] = summarys[i]
    review_attributes << att_hash
    i+=1
  end
  review_attributes
end