Class: AmazonWishList

Inherits:
Object
  • Object
show all
Defined in:
lib/amazon_wish_miner/amazon_wish_list.rb

Constant Summary collapse

REVEAL_OPTIONS =
[:all, :purchased, :unpurchased].freeze
SORT_OPTIONS =
{date_added: "date-added", title: 'universal-title',
price_high: 'universal-price-desc', price_low: 'universal-price',
date_updated: 'last-updated', priority: 'priority'}.freeze

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(id, wishes) ⇒ AmazonWishList

Returns a new instance of AmazonWishList.



10
11
12
13
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 10

def initialize(id, wishes)
  @id = id
  @wishes = wishes
end

Instance Attribute Details

#idObject

Returns the value of attribute id.



3
4
5
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 3

def id
  @id
end

#wishesObject

Returns the value of attribute wishes.



3
4
5
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 3

def wishes
  @wishes
end

Class Method Details

.check_for_redirect(url) ⇒ Object



92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 92

def self.check_for_redirect(url)
  begin
    response = RestClient::Request.execute(method: :get, url: url, max_redirects: 0)
  rescue RestClient::ExceptionWithResponse => err
    if err.response.code / 100 == 3
      url = err.response.headers[:location]
      retry
    else
      raise err
    end
  end
  url
end

.find_lek_from_response(response) ⇒ Object



76
77
78
79
80
81
82
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 76

def self.find_lek_from_response(response)
  # As of the time of writing this, "lastEvaluatedKey", abbreviated as "lek",
  # is used to keep track of what portions of the wishlist have already been
  # loaded, and is sent in the query string of ajax calls to get the next page
  start_of_lek = response.body.split('name="lastEvaluatedKey" value="')[1]
  start_of_lek.split('" class="lastEvaluatedKey"')[0]
end

.get_all_wishlist_pages(url_without_qstring, query_params) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 54

def self.get_all_wishlist_pages(url_without_qstring, query_params)
  responses = Array.new
  loop do
    response = self.get_wishlist_page(url_without_qstring, query_params)
    responses << response
    return responses if response.body.include?("Find a gift") # as of the
    #=> time this was written, this phrase appears only on the last page
    lek = self.find_lek_from_response(response)
    query_params[:lek] = lek # the rest of the query_params hash stays the same
  end
end

.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') ⇒ Object

def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = ‘com’)

  raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
  raise "invalid sort" unless SORT_OPTIONS[sort]

  query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
  # lek is nil for the first page
  url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"

  pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
  wishes = AmazonWish.parse_wishes_from_pages(pages)
  AmazonWishList.new(amazon_list_id, wishes)
end


49
50
51
52
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 49

def self.get_wishlist(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
  wishes = AmazonWish.wishes_from_attributes(wishlist_attributes(amazon_list_id, reveal, sort, tld))
  AmazonWishList.new(amazon_list_id, wishes)
end

.get_wishlist_page(url_without_qstring, query_params) ⇒ Object



66
67
68
69
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 66

def self.get_wishlist_page(url_without_qstring, query_params)
  query_string = self.page_query_string(query_params)
  RestClient.get(url_without_qstring + query_string)
end

.page_query_string(query_params) ⇒ Object



71
72
73
74
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 71

def self.page_query_string(query_params)
  "?reveal=#{query_params[:reveal]}&layout=standard&sort=#{query_params[:sort_string]})" +
  (query_params[:lek] ? "&lek=#{query_params[:lek]}&type=wishlist&ajax=true" : '')
end

.wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com') ⇒ Object

the method below only sends one request, so it less likely to be identified as being used by a scraper



16
17
18
19
20
21
22
23
24
25
26
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 16

def self.wishlist_attributes(amazon_list_id, reveal = :all, sort = :date_added, tld = 'com')
  raise "invalid reveal" unless REVEAL_OPTIONS.include?(reveal)
  raise "invalid sort" unless SORT_OPTIONS[sort]

  query_params = {reveal: reveal.to_s, sort_string: SORT_OPTIONS[sort]}
  # lek is nil for the first page
  url_without_qstring = "http://www.amazon.#{tld}/hz/wishlist/ls/#{amazon_list_id}"

  pages = self.get_all_wishlist_pages(url_without_qstring, query_params)
  AmazonWish.attributes_from_responses(pages)
end

.wishlist_from_url(url) ⇒ Object



84
85
86
87
88
89
90
# File 'lib/amazon_wish_miner/amazon_wish_list.rb', line 84

def self.wishlist_from_url(url)
  url = HTTParty.get(url).request.last_uri.to_s
  #url = check_for_redirect(url)
  id_start = url.split('/wishlist/')[1]
  id = id_start.split('/').find { |str| str != 'ls' }
  get_wishlist(id)
end