Class: BeerAdvocate::Scraper
- Inherits:
-
Object
- Object
- BeerAdvocate::Scraper
- Defined in:
- lib/beer_advocate/scraper.rb
Constant Summary collapse
- BEER_ADVOCATE_URL =
"https://www.beeradvocate.com/lists/popular/"
- @@scraped_urls =
[]
Class Method Summary collapse
- .find_url(url) ⇒ Object
- .find_url_details(url) ⇒ Object
- .scrape_brewery_page(brewery_url) ⇒ Object
- .scrape_list_page ⇒ Object
- .scrape_name_page(name_url) ⇒ Object
- .scrape_style_page(style_url) ⇒ Object
- .scraped_urls ⇒ Object
Class Method Details
.find_url(url) ⇒ Object
10 11 12 13 14 |
# File 'lib/beer_advocate/scraper.rb', line 10 def self.find_url(url) self.scraped_urls.find do |scraped_url_hash| scraped_url_hash.keys.to_s.split('"')[1] == url end end |
.find_url_details(url) ⇒ Object
16 17 18 19 20 21 |
# File 'lib/beer_advocate/scraper.rb', line 16 def self.find_url_details(url) found_url = self.scraped_urls.find do |scraped_url_hash| scraped_url_hash.keys.to_s.split('"')[1] == url end found_url[url.to_sym] end |
.scrape_brewery_page(brewery_url) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
# File 'lib/beer_advocate/scraper.rb', line 82 def self.scrape_brewery_page(brewery_url) #puts "SCRAPING #{brewery_url}" html = open(brewery_url) brewery_page = Nokogiri::HTML(html) info_box = brewery_page.css("div#info_box.break") info_box.css("br").each{|br| br.replace("\n")} info_array = info_box.text.split("\n\n\n\n") jumbled_phone_number = info_array[1].split("\n\n")[1] #binding.pry brewery_hash = {} brewery_hash[:type] = info_array[0].strip brewery_hash[:address] = info_array[1].split("\n\n")[0] brewery_hash[:phone_number] = jumbled_phone_number.split("|")[0].strip brewery_hash[:website] = jumbled_phone_number.split("\n")[1].strip #binding.pry scraped_url_hash = {} scraped_url_hash[brewery_url.to_sym] = brewery_hash self.scraped_urls << scraped_url_hash brewery_hash end |
.scrape_list_page ⇒ Object
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/beer_advocate/scraper.rb', line 23 def self.scrape_list_page #puts "SCRAPING #{BEER_ADVOCATE_URL}" html = open(BEER_ADVOCATE_URL) beer_list_page = Nokogiri::HTML(html) beer_hashes = [] beer_list = beer_list_page.css("tr")[2..-1] beer_list.each do |beer| beer_hash = {} beer_hash[:name] = beer.css("td")[1].css("a")[0].text beer_hash[:brewery] = beer.css("td")[1].css("a")[1].text beer_hash[:style] = beer.css("td")[1].css("a")[2].text beer_hash[:abv] = beer.css("td")[1].css("span").text.split('|')[1].strip beer_hash[:review_count] = beer.css("td")[2].text beer_hash[:score] = beer.css("td")[3].text beer_hash[:name_url] = "http://www.beeradvocate.com#{beer.css("td")[1].css("a")[0]["href"]}?sort=topr&start=0" beer_hash[:brewery_url] = "http://www.beeradvocate.com#{beer.css("td")[1].css("a")[1]["href"]}" wrong_style_url_with_right_style_id = "http://www.beeradvocate.com#{beer.css("td")[1].css("a")[2]["href"]}" style_id = wrong_style_url_with_right_style_id.scan(/\d+/)[0] beer_hash[:style_url] = "http://www.beeradvocate.com/beer/styles/#{style_id}" beer_hashes << beer_hash #binding.pry end beer_hashes end |
.scrape_name_page(name_url) ⇒ Object
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/beer_advocate/scraper.rb', line 53 def self.scrape_name_page(name_url) #puts "SCRAPING #{name_url}" html = open(name_url) name_page = Nokogiri::HTML(html) info_box = name_page.css("div#info_box") reviews = name_page.css("div#rating_fullview_container") #binding.pry name_hash = {} name_hash[:pdev] = name_page.css("span.ba-pdev").text.strip name_hash[:top_reviews] = [] reviews.each do |review| review.css("br").each{|br| br.replace("\n")} review_text = review.text.split("\n\n")[1] name_hash[:top_reviews] << review_text end name_hash[:top_reviews] = name_hash[:top_reviews][1..9] name_hash[:top_reviews].delete_if {|review| review.length < 40} scraped_url_hash = {} scraped_url_hash[name_url.to_sym] = name_hash self.scraped_urls << scraped_url_hash name_hash end |
.scrape_style_page(style_url) ⇒ Object
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/beer_advocate/scraper.rb', line 110 def self.scrape_style_page(style_url) #puts "SCRAPING #{style_url}" html = open(style_url) style_page = Nokogiri::HTML(html) description_box_array = style_page.css("div#ba-content div").text.split("\n") jumbled_details = description_box_array[2].split("|") style_hash = {} style_hash[:description] = description_box_array[1].strip style_hash[:abv] = jumbled_details[0].strip style_hash[:ibu] = jumbled_details[1].strip style_hash[:glassware] = jumbled_details[2].strip scraped_url_hash = {} scraped_url_hash[style_url.to_sym] = style_hash self.scraped_urls << scraped_url_hash style_hash end |
.scraped_urls ⇒ Object
6 7 8 |
# File 'lib/beer_advocate/scraper.rb', line 6 def self.scraped_urls @@scraped_urls end |