Class: RainJackets::Scraper

Inherits:
Object
  • Object
show all
Defined in:
lib/rain_jackets/scraper.rb

Class Method Summary collapse

Class Method Details

.get_pageObject

Take HTML string returned by open-uri’s open method and convert it into a NodeSet



5
6
7
# File 'lib/rain_jackets/scraper.rb', line 5

def self.get_page
  Nokogiri::HTML(open("https://www.outdoorgearlab.com/topics/clothing-womens/best-rain-jacket-womens"))
end

.initialize_jacket_objectsObject

Instantiate jacket objects and assign attributes to each instance



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/rain_jackets/scraper.rb', line 17

def self.initialize_jacket_objects
  all_jackets = []

  # Determines which row you're on, hence which property you're trying to populate
  # Iterate through collection of table rows

  scrape_jackets_table.each_with_index do |tr_element, tr_index|

    # Scrape product Name and URL
    if tr_index == 0
      # Grab the element that contains our desired data
      product_name_row = tr_element.css("div.compare_product_name")

      # Iterate through each of td element and instantiate new jacket
      product_name_row.each do |td_element|
        # Create a new Jacket instance out of each aray element
        # instantiating Jacket objects and giving each jacket object the correct attributes
        new_jacket = RainJackets::Jacket.new
        new_jacket.name = td_element.text #access text content inside an element scraped by Nokogiri
        new_jacket.url = "https://www.outdoorgearlab.com" + td_element.css("a").first.attributes["href"].value
        all_jackets << new_jacket
      end

    # Scrape Price
    elsif tr_index == 2
      product_price_row = tr_element.css("td.compare_items span")
      # Iterate through each td_element (column) and
      # Populate each jacket's price attribute
      product_price_row.each_with_index do |td_element, td_index|
        td_value = td_element.text
        all_jackets[td_index].price = td_value # Price in string "$149.93"
      end

    # Scrape Overall Rating
    elsif tr_index == 3
      overall_rating_row = tr_element.css("div.rating_score")
      # Iterate through each rating_score (column) and
      # Populate each jacket's overall_rating attribute
      overall_rating_row.each_with_index do |rating_score, rating_row_index|
        all_jackets[rating_row_index].overall_rating = rating_score.text #an integer
      end

    # Scrape Pros
    elsif tr_index == 5
      pros_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index|
        td_value = td_element.text
        all_jackets[td_index].pros = td_value
      end

    # Scrape Cons
    elsif tr_index == 6
      pros_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index|
        td_value = td_element.text
        all_jackets[td_index].cons = td_value
      end

    # Scrape Description
    elsif tr_index == 7
      description_row = tr_element.css("td.compare_items").each_with_index do |td_element, td_index|
        td_value = td_element.text
        all_jackets[td_index].description = td_value
      end

    # Scrape rating categories if tr_index is between 9-14
    elsif (9..14).include?(tr_index)
      tr_element.css("div.rating_score").each_with_index do |rating_score, rating_row_index|
        jacket = all_jackets[rating_row_index]
        rating_score = rating_score.text

        if tr_index == 9
          jacket.water_resistance_rating = rating_score
        elsif tr_index == 10
          jacket.breathability_rating = rating_score
        elsif tr_index == 11
          jacket.comfort_rating = rating_score
        elsif tr_index == 12
          jacket.weight_rating = rating_score
        elsif tr_index == 13
          jacket.durability_rating = rating_score
        elsif tr_index == 14
          jacket.packed_size_rating = rating_score
        end
      end
    end
  end
  # Store all_jackets array in Jacket class variable @@all
  RainJackets::Jacket.all = all_jackets
end

.scrape_jackets_tableObject



9
10
11
12
13
14
# File 'lib/rain_jackets/scraper.rb', line 9

def self.scrape_jackets_table
  # Use CSS selector to grab all HTML elements that contain a table
  # return array of Nokogiri XML elements representing
  # table described on scraped webpage
  self.get_page.css("div.content_table_xwide tr")
end