Class: NytJourneys::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/nyt_journeys/scraper.rb

Class Method Summary collapse

Class Method Details

.scrape_quotes(quotes_url = "http://www.quotationspage.com/search.php3?homesearch=journey&page=1") ⇒ Object

quote: quotations_page.css(“dt.quote”), .css(“a”).text

author: quotations_page.css("dd.author), .css("a").text


34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/nyt_journeys/scraper.rb', line 34

def self.scrape_quotes(quotes_url="http://www.quotationspage.com/search.php3?homesearch=journey&page=1")
  quotations_page = Nokogiri::HTML(open(quotes_url))
  quotes = []
  quotations_page.css("dt.quote").each do |quote|
    item = "#{quote.css("a").text}"
    quotes << item
  end
  authors = []
  quotations_page.css("dd.author").each do |author|
     item = "#{author.css("a")[4].text}"
     authors << item
  end
  quotes.map.with_index do |quote, index|
    "#{quote} ~ #{authors[index]}"
  end
end

.scrape_type_detail_page(types_hash) ⇒ Object

trip_name: .css(“h2.item-title”).text trip_url: .css.attribute(“href”).value



19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/nyt_journeys/scraper.rb', line 19

def self.scrape_type_detail_page(types_hash)
  type_details = Nokogiri::HTML(open(types_hash.values[1]))
  trips =[]
  type_details.css("li.journey-list-item > a").each do |trip|
    trips_hash = {}
    trips_hash[:name] = trip.css("h2.item-title").text
    trips_hash[:url] = trip.attribute("href").value
    trips_hash[:type] = types_hash.values[0]
    trips << trips_hash
  end
  trips  # an array of trip hashes with name, url, and type properties
end

.scrape_type_summary_page(summary_url) ⇒ Object

type_name: .css(“h3.trip-type-list-title”).text type_url: .css(“a.view-all-link”).attribute(“href”).value



5
6
7
8
9
10
11
12
13
14
15
# File 'lib/nyt_journeys/scraper.rb', line 5

def self.scrape_type_summary_page(summary_url)
  type_summary = Nokogiri::HTML(open(summary_url))
  types = []
  type_summary.css("div.trip-type-listings").each do |type|
    type_hash = {}
    type_hash[:type_name] = type.css("h3.trip-type-list-title").text
    type_hash[:type_url] = type.css("a.view-all-link").attribute("href").value
    types << type_hash
  end
  types  # an array of type hashes with type_name and type_url properties
end