Class: Scraper
- Inherits:
-
Object
- Object
- Scraper
- Defined in:
- lib/top_box/scraper.rb
Class Method Summary collapse
- .scrape_movie_list ⇒ Object
-
.scrape_movie_page(url) ⇒ Object
‘/title/tt3104988’.
-
.scrape_review_page(url) ⇒ Object
‘/title/tt3104988/criticreviews?ref_=tt_ov_rt’.
Class Method Details
.scrape_movie_list ⇒ Object
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 |
# File 'lib/top_box/scraper.rb', line 2 def self.scrape_movie_list doc = Nokogiri::HTML(open('https://www.imdb.com/chart/boxoffice/')) titles = doc.css('tr .titleColumn').collect{|x| x.text.strip} urls = doc.css('tr .titleColumn a').collect{|x| x.attribute('href').value[0,16]} #/title/tt3104988 weeks_in_theater = doc.css('.weeksColumn').collect{|x| x.text} total_gross = doc.css('.ratingColumn .secondaryInfo').collect{|x| x.text} movies_attributes=[] i=0 while i<titles.length att_hash={} att_hash[:url] = urls[i] att_hash[:title] = titles[i] att_hash[:weeks_in_theater] = weeks_in_theater[i] att_hash[:total_gross] = total_gross[i] movies_attributes << att_hash i+=1 end movies_attributes end |
.scrape_movie_page(url) ⇒ Object
‘/title/tt3104988’
23 24 25 |
# File 'lib/top_box/scraper.rb', line 23 def self.scrape_movie_page( url ) #'/title/tt3104988' doc = Nokogiri::HTML(open('https://www.imdb.com' + url)) end |
.scrape_review_page(url) ⇒ Object
‘/title/tt3104988/criticreviews?ref_=tt_ov_rt’
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/top_box/scraper.rb', line 27 def self.scrape_review_page( url ) #'/title/tt3104988/criticreviews?ref_=tt_ov_rt' doc = Nokogiri::HTML(open('https://www.imdb.com/' + url)) scores = doc.css('.critscore').collect{|y| y.text.strip} publications = doc.css('.review b span').collect{|y| y.text} = doc.css('.review span span').collect{|y| y.text} #some_reviews may not have authors listed summarys = doc.css('.review div').collect{|y| y.text.strip} review_attributes=[] i=0 while i < scores.length att_hash={} att_hash[:score] = scores[i] att_hash[:publication] = publications[i] att_hash[:author] = [i] att_hash[:summary] = summarys[i] review_attributes << att_hash i+=1 end review_attributes end |