Class: AmazonWishList
- Inherits:
-
Object
- Object
- AmazonWishList
- Defined in:
- lib/amazon_wish_miner/amazon_wish_list.rb
Constant Summary collapse
- REVEAL_OPTIONS =
[:all, :purchased, :unpurchased].freeze
- SORT_OPTIONS =
{date_added: "date-added", title: 'universal-title', price_high: 'universal-price-desc', price_low: 'universal-price', date_updated: 'last-updated', priority: 'priority'}.freeze
Instance Attribute Summary collapse
-
#id ⇒ Object
Returns the value of attribute id.
-
#wishes ⇒ Object
Returns the value of attribute wishes.
Class Method Summary collapse
- .check_for_redirect(url) ⇒ Object
- .find_lek_from_response(response) ⇒ Object
- .get_all_wishlist_pages(url_without_qstring, query_params) ⇒ Object
-
.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') ⇒ Object
def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = ‘com’) raise “invalid reveal” unless REVEAL_OPTIONS.include?(reveal) raise “invalid sort” unless SORT_OPTIONS.
- .get_wishlist_page(url_without_qstring, query_params) ⇒ Object
- .page_query_string(query_params) ⇒ Object
-
.wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') ⇒ Object
the method below only sends one request, so it less likely to be identified as being used by a scraper.
- .wishlist_from_url(url) ⇒ Object
Instance Method Summary collapse
-
#initialize(id, wishes) ⇒ AmazonWishList
constructor
A new instance of AmazonWishList.
Constructor Details
#initialize(id, wishes) ⇒ AmazonWishList
Returns a new instance of AmazonWishList.
10 11 12 13 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 10 def initialize(id, wishes) @id = id @wishes = wishes end |
Instance Attribute Details
#id ⇒ Object
Returns the value of attribute id.
3 4 5 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 3 def id @id end |
#wishes ⇒ Object
Returns the value of attribute wishes.
3 4 5 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 3 def wishes @wishes end |
Class Method Details
.check_for_redirect(url) ⇒ Object
92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 92 def self.check_for_redirect(url) begin response = RestClient::Request.execute(method: :get, url: url, max_redirects: 0) rescue RestClient::ExceptionWithResponse => err if err.response.code / 100 == 3 url = err.response.headers[:location] retry else raise err end end url end |
.find_lek_from_response(response) ⇒ Object
76 77 78 79 80 81 82 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 76 def self.find_lek_from_response(response) # As of the time of writing this, "lastEvaluatedKey", abbreviated as "lek", # is used to keep track of what portions of the wishlist have already been # loaded, and is sent in the query string of ajax calls to get the next page start_of_lek = response.body.split('name="lastEvaluatedKey" value="')[1] start_of_lek.split('" class="lastEvaluatedKey"')[0] end |
.get_all_wishlist_pages(url_without_qstring, query_params) ⇒ Object
54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 54 def self.get_all_wishlist_pages(url_without_qstring, query_params) responses = Array.new loop do response = self.get_wishlist_page(url_without_qstring, query_params) responses << response return responses if response.body.include?("Find a gift") # as of the #=> time this was written, this phrase appears only on the last page lek = self.find_lek_from_response(response) query_params[:lek] = lek # the rest of the query_params hash stays the same end end |
.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') ⇒ Object
def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = ‘com’)
raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
raise "invalid sort" unless SORT_OPTIONS[sort]
query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
# lek is nil for the first page
url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"
pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
wishes = AmazonWish.parse_wishes_from_pages(pages)
AmazonWishList.new(amazon_list_id, wishes)
end
49 50 51 52 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 49 def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') wishes = AmazonWish.wishes_from_attributes(wishlist_attributes(amazon_list_id, reveal, sort, tld)) AmazonWishList.new(amazon_list_id, wishes) end |
.get_wishlist_page(url_without_qstring, query_params) ⇒ Object
66 67 68 69 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 66 def self.get_wishlist_page(url_without_qstring, query_params) query_string = self.page_query_string(query_params) RestClient.get(url_without_qstring + query_string) end |
.page_query_string(query_params) ⇒ Object
71 72 73 74 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 71 def self.page_query_string(query_params) "?reveal=#{query_params[:reveal]}&layout=standard&sort=#{query_params[:sort_string]})" + (query_params[:lek] ? "&lek=#{query_params[:lek]}&type=wishlist&ajax=true" : '') end |
.wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') ⇒ Object
the method below only sends one request, so it less likely to be identified as being used by a scraper
16 17 18 19 20 21 22 23 24 25 26 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 16 def self.wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal) raise "invalid sort" unless SORT_OPTIONS[sort] query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]} # lek is nil for the first page url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}" pages = self.get_all_wishlist_pages(url_without_qstring, query_params) AmazonWish.attributes_from_responses(pages) end |
.wishlist_from_url(url) ⇒ Object
84 85 86 87 88 89 90 |
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 84 def self.wishlist_from_url(url) url = HTTParty.get(url).request.last_uri.to_s #url = check_for_redirect(url) id_start = url.split('/wishlist/')[1] id = id_start.split('/').find { |str| str != 'ls' } get_wishlist(id) end |