Class: NPRBestBooks::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/npr_best_books/scraper.rb

Overview

require ‘rubygems’ require ‘nokogiri’ require ‘open-uri’ require ‘json’

Class Method Summary collapse

Class Method Details

.booksObject



8
9
10
11
12
13
14
15
# File 'lib/npr_best_books/scraper.rb', line 8

def self.books
  puts "Loading books..."
  html = Nokogiri::HTML(open("http://web.archive.org/web/20160622040558/http://apps.npr.org/best-books-2015/"))
  html_script_data = html.css("script").select {|s| s.children.text.include?("BOOKS") }
  script_data = html_script_data[0].children.text
  all_books = self.parse_data(script_data)
  parsed = JSON.parse(all_books)
end

.lookup_amazon(url) ⇒ Object



24
25
26
27
# File 'lib/npr_best_books/scraper.rb', line 24

def self.lookup_amazon(url)
  amazon_html = Nokogiri::HTML(open(url))
  amazon_html
end

.parse_data(data) ⇒ Object



17
18
19
20
21
22
# File 'lib/npr_best_books/scraper.rb', line 17

def self.parse_data(data)
  part1 = data.gsub("\n    window.BOOKS = [{","[{")
  part2 = part1.gsub("}];","}]")
  part3 = part2.gsub("ANALYTICS.setupChartbeat();","")
  part3
end