Class: NytJourneys::Scraper
- Inherits:
-
Object
- Object
- NytJourneys::Scraper
- Defined in:
- lib/nyt_journeys/scraper.rb
Class Method Summary collapse
-
.scrape_quotes(quotes_url = "http://www.quotationspage.com/search.php3?homesearch=journey&page=1") ⇒ Object
quote: quotations_page.css(“dt.quote”), .css(“a”).text author: quotations_page.css(“dd.author), .css(”a“).text.
-
.scrape_type_detail_page(types_hash) ⇒ Object
trip_name: .css(“h2.item-title”).text trip_url: .css.attribute(“href”).value.
-
.scrape_type_summary_page(summary_url) ⇒ Object
type_name: .css(“h3.trip-type-list-title”).text type_url: .css(“a.view-all-link”).attribute(“href”).value.
Class Method Details
.scrape_quotes(quotes_url = "http://www.quotationspage.com/search.php3?homesearch=journey&page=1") ⇒ Object
quote: quotations_page.css(“dt.quote”), .css(“a”).text
author: quotations_page.css("dd.author), .css("a").text
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
# File 'lib/nyt_journeys/scraper.rb', line 34 def self.scrape_quotes(quotes_url="http://www.quotationspage.com/search.php3?homesearch=journey&page=1") quotations_page = Nokogiri::HTML(open(quotes_url)) quotes = [] quotations_page.css("dt.quote").each do |quote| item = "#{quote.css("a").text}" quotes << item end = [] quotations_page.css("dd.author").each do || item = "#{.css("a")[4].text}" << item end quotes.map.with_index do |quote, index| "#{quote} ~ #{[index]}" end end |
.scrape_type_detail_page(types_hash) ⇒ Object
trip_name: .css(“h2.item-title”).text trip_url: .css.attribute(“href”).value
19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/nyt_journeys/scraper.rb', line 19 def self.scrape_type_detail_page(types_hash) type_details = Nokogiri::HTML(open(types_hash.values[1])) trips =[] type_details.css("li.journey-list-item > a").each do |trip| trips_hash = {} trips_hash[:name] = trip.css("h2.item-title").text trips_hash[:url] = trip.attribute("href").value trips_hash[:type] = types_hash.values[0] trips << trips_hash end trips # an array of trip hashes with name, url, and type properties end |
.scrape_type_summary_page(summary_url) ⇒ Object
type_name: .css(“h3.trip-type-list-title”).text type_url: .css(“a.view-all-link”).attribute(“href”).value
5 6 7 8 9 10 11 12 13 14 15 |
# File 'lib/nyt_journeys/scraper.rb', line 5 def self.scrape_type_summary_page(summary_url) type_summary = Nokogiri::HTML(open(summary_url)) types = [] type_summary.css("div.trip-type-listings").each do |type| type_hash = {} type_hash[:type_name] = type.css("h3.trip-type-list-title").text type_hash[:type_url] = type.css("a.view-all-link").attribute("href").value types << type_hash end types # an array of type hashes with type_name and type_url properties end |