Class: NPRBestBooks::Scraper
- Inherits:
-
Object
- Object
- NPRBestBooks::Scraper
- Defined in:
- lib/npr_best_books/scraper.rb
Overview
require ‘rubygems’ require ‘nokogiri’ require ‘open-uri’ require ‘json’
Class Method Summary collapse
Class Method Details
.books ⇒ Object
8 9 10 11 12 13 14 15 |
# File 'lib/npr_best_books/scraper.rb', line 8 def self.books puts "Loading books..." html = Nokogiri::HTML(open("http://web.archive.org/web/20160622040558/http://apps.npr.org/best-books-2015/")) html_script_data = html.css("script").select {|s| s.children.text.include?("BOOKS") } script_data = html_script_data[0].children.text all_books = self.parse_data(script_data) parsed = JSON.parse(all_books) end |
.lookup_amazon(url) ⇒ Object
24 25 26 27 |
# File 'lib/npr_best_books/scraper.rb', line 24 def self.lookup_amazon(url) amazon_html = Nokogiri::HTML(open(url)) amazon_html end |
.parse_data(data) ⇒ Object
17 18 19 20 21 22 |
# File 'lib/npr_best_books/scraper.rb', line 17 def self.parse_data(data) part1 = data.gsub("\n window.BOOKS = [{","[{") part2 = part1.gsub("}];","}]") part3 = part2.gsub("ANALYTICS.setupChartbeat();","") part3 end |