Class: BeerAdvocate::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/beer_advocate/scraper.rb

Constant Summary collapse

BEER_ADVOCATE_URL =
"https://www.beeradvocate.com/lists/popular/"
@@scraped_urls =
[]

Class Method Summary collapse

Class Method Details

.find_url(url) ⇒ Object



10
11
12
13
14
# File 'lib/beer_advocate/scraper.rb', line 10

def self.find_url(url)
  self.scraped_urls.find do |scraped_url_hash| 
    scraped_url_hash.keys.to_s.split('"')[1] == url
  end
end

.find_url_details(url) ⇒ Object



16
17
18
19
20
21
# File 'lib/beer_advocate/scraper.rb', line 16

def self.find_url_details(url)
  found_url = self.scraped_urls.find do |scraped_url_hash| 
    scraped_url_hash.keys.to_s.split('"')[1] == url
  end
  found_url[url.to_sym]
end

.scrape_brewery_page(brewery_url) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# File 'lib/beer_advocate/scraper.rb', line 82

def self.scrape_brewery_page(brewery_url)
  #puts "SCRAPING #{brewery_url}"
  
  html = open(brewery_url)
  brewery_page = Nokogiri::HTML(html)
  
  info_box = brewery_page.css("div#info_box.break")
  info_box.css("br").each{|br| br.replace("\n")}
  info_array = info_box.text.split("\n\n\n\n")
  jumbled_phone_number = info_array[1].split("\n\n")[1]

  #binding.pry

  brewery_hash = {}
  brewery_hash[:type] = info_array[0].strip
  brewery_hash[:address] = info_array[1].split("\n\n")[0]
  brewery_hash[:phone_number] = jumbled_phone_number.split("|")[0].strip
  brewery_hash[:website] = jumbled_phone_number.split("\n")[1].strip

  #binding.pry
  
  scraped_url_hash = {}
  scraped_url_hash[brewery_url.to_sym] = brewery_hash
  self.scraped_urls << scraped_url_hash
  
  brewery_hash
end

.scrape_list_pageObject



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
# File 'lib/beer_advocate/scraper.rb', line 23

def self.scrape_list_page
  #puts "SCRAPING #{BEER_ADVOCATE_URL}"
  
  html = open(BEER_ADVOCATE_URL)
  beer_list_page = Nokogiri::HTML(html)
  
  beer_hashes = []
  
  beer_list = beer_list_page.css("tr")[2..-1]

  beer_list.each do |beer|
    beer_hash = {}
    beer_hash[:name] = beer.css("td")[1].css("a")[0].text
    beer_hash[:brewery] = beer.css("td")[1].css("a")[1].text
    beer_hash[:style] = beer.css("td")[1].css("a")[2].text
    beer_hash[:abv] = beer.css("td")[1].css("span").text.split('|')[1].strip
    beer_hash[:review_count] = beer.css("td")[2].text
    beer_hash[:score] = beer.css("td")[3].text
    beer_hash[:name_url] = "http://www.beeradvocate.com#{beer.css("td")[1].css("a")[0]["href"]}?sort=topr&start=0"
    beer_hash[:brewery_url] = "http://www.beeradvocate.com#{beer.css("td")[1].css("a")[1]["href"]}"
    wrong_style_url_with_right_style_id = "http://www.beeradvocate.com#{beer.css("td")[1].css("a")[2]["href"]}"
    style_id = wrong_style_url_with_right_style_id.scan(/\d+/)[0]
    beer_hash[:style_url] = "http://www.beeradvocate.com/beer/styles/#{style_id}"
    beer_hashes << beer_hash
    #binding.pry
  end
  
  beer_hashes
end

.scrape_name_page(name_url) ⇒ Object



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/beer_advocate/scraper.rb', line 53

def self.scrape_name_page(name_url)
  #puts "SCRAPING #{name_url}"
  
  html = open(name_url)
  name_page = Nokogiri::HTML(html)
  
  info_box = name_page.css("div#info_box")
  reviews = name_page.css("div#rating_fullview_container")
  #binding.pry
  name_hash = {}
  name_hash[:pdev] = name_page.css("span.ba-pdev").text.strip
  
  name_hash[:top_reviews] = []
  reviews.each do |review|
    review.css("br").each{|br| br.replace("\n")}
    review_text = review.text.split("\n\n")[1]
    name_hash[:top_reviews] << review_text
  end
  
  name_hash[:top_reviews] = name_hash[:top_reviews][1..9]
  name_hash[:top_reviews].delete_if {|review| review.length < 40}
  
  scraped_url_hash = {}
  scraped_url_hash[name_url.to_sym] = name_hash
  self.scraped_urls << scraped_url_hash
  
  name_hash
end

.scrape_style_page(style_url) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/beer_advocate/scraper.rb', line 110

def self.scrape_style_page(style_url)
  #puts "SCRAPING #{style_url}"
  
  html = open(style_url)
  style_page = Nokogiri::HTML(html)
  
  description_box_array = style_page.css("div#ba-content div").text.split("\n")
  jumbled_details = description_box_array[2].split("|")
  
  style_hash = {}
  style_hash[:description] = description_box_array[1].strip
  style_hash[:abv] = jumbled_details[0].strip
  style_hash[:ibu] = jumbled_details[1].strip
  style_hash[:glassware] = jumbled_details[2].strip
  
  scraped_url_hash = {}
  scraped_url_hash[style_url.to_sym] = style_hash
  self.scraped_urls << scraped_url_hash
  
  style_hash
end

.scraped_urlsObject



6
7
8
# File 'lib/beer_advocate/scraper.rb', line 6

def self.scraped_urls
  @@scraped_urls
end