Class: ComingSoon::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/coming_soon/scraper.rb

Instance Method Summary collapse

Instance Method Details

#scrape_details(soon) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/coming_soon/scraper.rb', line 30

def scrape_details(soon)
	
	redirect_failed = false

	begin
		doc_synop1 = Nokogiri::HTML(open(soon.url)) # Uses the HTTP 'movieoverview' url
	rescue
		redirect_failed = true # A HTTP to HTTPS redirect failed
	end

	if !redirect_failed
		if !doc_synop1.css("a.movie-synopsis-link").any? &&
			doc_synop1.css("span#SynopsisTextLabel").any?
			# If a READ FULL SYNOPSIS link is not present and any
			# text is available, use that text for the synopsis
			soon.synopsis = doc_synop1.css("span#SynopsisTextLabel").text
		else
			scrape_plotsummary(soon)
		end
	else
		scrape_plotsummary(soon)
	end

end

#scrape_moviesObject



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/coming_soon/scraper.rb', line 3

def scrape_movies

	doc = Nokogiri::HTML(open("http://www.fandango.com/moviescomingsoon"))
	# name: doc.css("li.visual-item a.visual-title").text.strip
	# start_date: doc.css("li.visual-item span").text
	# url: doc.css("li.visual-item a").attribute("href").value

	movie_list = doc.css("li.visual-item")
	
	count = 0

	movie_list.each do |movie| 
		soon = ComingSoon::Movie.new
		soon.name = movie.css("a.visual-title").text.strip
		soon.start_date = movie.css("span").text
		soon.url = movie.css("a").attribute("href").value

		scrape_details(soon)

		count+=1
		if count > 19 # Scrapes only 20 movies
			break
		end
	end
	
end

#scrape_plotsummary(soon) ⇒ Object



55
56
57
58
59
60
61
62
# File 'lib/coming_soon/scraper.rb', line 55

def scrape_plotsummary(soon)
	# Scrape the synopsis using the HTTP 'plotsummary' url
	# This is also executed after an HTTP to HTTPS redirect failed
	synop_url = soon.url.sub(/movieoverview/, 'plotsummary')
	doc_synop2 = Nokogiri::HTML(open(synop_url))
	soon.synopsis = doc_synop2.css("p.subpage-descriptive-content").text
	
end