Class: RainJackets::Scraper
- Inherits:
-
Object
- Object
- RainJackets::Scraper
- Defined in:
- lib/rain_jackets/scraper.rb
Class Method Summary collapse
-
.get_page ⇒ Object
Take HTML string returned by open-uri’s open method and convert it into a NodeSet.
-
.initialize_jacket_objects ⇒ Object
Instantiate jacket objects and assign attributes to each instance.
- .scrape_jackets_table ⇒ Object
Class Method Details
.get_page ⇒ Object
Take HTML string returned by open-uri’s open method and convert it into a NodeSet
5 6 7 |
# File 'lib/rain_jackets/scraper.rb', line 5 def self.get_page Nokogiri::HTML(open("https://www.outdoorgearlab.com/topics/clothing-womens/best-rain-jacket-womens")) end |
.initialize_jacket_objects ⇒ Object
Instantiate jacket objects and assign attributes to each instance
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/rain_jackets/scraper.rb', line 17 def self.initialize_jacket_objects all_jackets = [] # Determines which row you're on, hence which property you're trying to populate # Iterate through collection of table rows scrape_jackets_table.each_with_index do |tr_element, tr_index| # Scrape product Name and URL if tr_index == 0 # Grab the element that contains our desired data product_name_row = tr_element.css("div.compare_product_name") # Iterate through each of td element and instantiate new jacket product_name_row.each do |td_element| # Create a new Jacket instance out of each aray element # instantiating Jacket objects and giving each jacket object the correct attributes new_jacket = RainJackets::Jacket.new new_jacket.name = td_element.text #access text content inside an element scraped by Nokogiri new_jacket.url = "https://www.outdoorgearlab.com" + td_element.css("a").first.attributes["href"].value all_jackets << new_jacket end # Scrape Price elsif tr_index == 2 product_price_row = tr_element.css("td.compare_items span") # Iterate through each td_element (column) and # Populate each jacket's price attribute product_price_row.each_with_index do |td_element, td_index| td_value = td_element.text all_jackets[td_index].price = td_value # Price in string "$149.93" end # Scrape Overall Rating elsif tr_index == 3 = tr_element.css("div.rating_score") # Iterate through each rating_score (column) and # Populate each jacket's overall_rating attribute .each_with_index do |, | all_jackets[]. = .text #an integer end # Scrape Pros elsif tr_index == 5 pros_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index| td_value = td_element.text all_jackets[td_index].pros = td_value end # Scrape Cons elsif tr_index == 6 pros_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index| td_value = td_element.text all_jackets[td_index].cons = td_value end # Scrape Description elsif tr_index == 7 description_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index| td_value = td_element.text all_jackets[td_index].description = td_value end # Scrape rating categories if tr_index is between 9-14 elsif (9..14).include?(tr_index) tr_element.css("div.rating_score").each_with_index do |, | jacket = all_jackets[] = .text if tr_index == 9 jacket. = elsif tr_index == 10 jacket. = elsif tr_index == 11 jacket. = elsif tr_index == 12 jacket. = elsif tr_index == 13 jacket. = elsif tr_index == 14 jacket. = end end end end # Store all_jackets array in Jacket class variable @@all RainJackets::Jacket.all = all_jackets end |
.scrape_jackets_table ⇒ Object
9 10 11 12 13 14 |
# File 'lib/rain_jackets/scraper.rb', line 9 def self.scrape_jackets_table # Use CSS selector to grab all HTML elements that contain a table # return array of Nokogiri XML elements representing # table described on scraped webpage self.get_page.css("div.content_table_xwide tr") end |