Class: AmazonWish

Inherits:
Object
  • Object
show all
Defined in:
lib/amazon_wish_miner/amazon_wish.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(asin, title) ⇒ AmazonWish

Returns a new instance of AmazonWish.



5
6
7
8
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 5

def initialize(asin, title)
  @title = title
  @asin = asin
end

Instance Attribute Details

#asinObject (readonly)

Returns the value of attribute asin.



3
4
5
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 3

def asin
  @asin
end

#titleObject (readonly)

Returns the value of attribute title.



3
4
5
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 3

def title
  @title
end

Class Method Details

.attributes_from_responses(page_responses) ⇒ Object



92
93
94
95
96
97
98
99
100
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 92

def self.attributes_from_responses(page_responses)
  page_responses.each_with_object(Array.new) do |pr, item_attrs|
    Nokogiri::HTML(pr).css('ul#g-items li').each do |li|
      aln = li.css('.a-link-normal')
      attrs_hash = { title: aln.attribute('title').value, href: aln.attribute('href').value }
      item_attrs << attrs_hash
    end
  end
end

.draps_from_list_items(list_items) ⇒ Object



29
30
31
32
33
34
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 29

def self.draps_from_list_items(list_items)
  list_items.each_with_object(Array.new) do |li, wish_asins|
    drap = li['data-reposition-action-params']
    wish_asins << external_id_from_drap(drap)
  end
end

.external_id_from_drap(drap) ⇒ Object

the page refers to the ASIN as “itemExternalID”



36
37
38
39
40
41
42
43
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 36

def self.external_id_from_drap(drap) # the page refers to the ASIN as "itemExternalID"
  attrs = drap.split(',')
  attr_substrings = attrs.map { |elem| elem.split(':') }
  ied_attr = attr_substrings.find { |ss| ss.include?("{\"itemExternalId\"")}
  id_string = ied_attr.last
  ids_arr = id_string.split('|')
  ids_arr.first
end

.get_title_from_page(page) ⇒ Object



81
82
83
84
85
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 81

def self.get_title_from_page(page)
  span_title = page.css('span[id$="roductTitle"]').children.text.strip
  return span_title unless span_title == ""
  meta_title = page.css('meta[name="title"]')[0].attribute('content').value
end

.item_from_asin(asin_arg) ⇒ Object



54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 54

def self.item_from_asin(asin_arg)
  item_url = 'https://www.amazon.com/dp/' + asin_arg
  response = RestClient.get(item_url)
  page = Nokogiri::HTML(response)
  title = get_title_from_page(page)
  # not a typo, css selectors are
  #=> case sensetive, and we need to capture e.g. both "productTitle" and "ebookProductTitle"
  # price = page.css('priceblock_ourprice')
  # TODO: parse prices
  # description = parse_feature_bullets(page.css('div#feature-bullets'))
  # TODO: get description parsing to work for different types of items
  AmazonWish.new(asin_arg, title)
end

.item_from_attributes(attr_hash) ⇒ Object



72
73
74
75
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 72

def self.item_from_attributes(attr_hash)
  asin = attr_hash[:href].split('/')[2]
  AmazonWish.new(asin, attr_hash[:title])
end

.list_items_from_response(page_responses) ⇒ Object



20
21
22
23
24
25
26
27
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 20

def self.list_items_from_response(page_responses)
  page_responses.each_with_object(Array.new) do |response, list_items|
    page = Nokogiri::HTML(response)
    page.css('ul#g-items li').each do |li|
      list_items << li
    end
  end
end

.parse_feature_bullets(feature_bullets_div) ⇒ Object



77
78
79
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 77

def self.parse_feature_bullets(feature_bullets_div)
  bullets = feature_bullets_div.css('ul li')
end

.parse_wishes_from_pages(page_responses) ⇒ Object



14
15
16
17
18
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 14

def self.parse_wishes_from_pages(page_responses)
  list_items = self.list_items_from_response(page_responses)
  wish_asins = self.draps_from_list_items(list_items)
  wishes_from_asins(wish_asins)
end

.trim_title(untrimmed_title) ⇒ Object



87
88
89
90
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 87

def self.trim_title(untrimmed_title)
  chars = untrimmed_title.chars
  chars.drop_while(&TITLE_TRIMMER).reverse.drop_while(&TITLE_TRIMMER).reverse.join
end

.wishes_from_asins(asins) ⇒ Object

parsing item info from the item’s own url rather than from the wishlist

> means that we can reuse the method below to scrape item info



48
49
50
51
52
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 48

def self.wishes_from_asins(asins)
  asins.map do |asin_elem|
    self.item_from_asin(asin_elem)
  end
end

.wishes_from_attributes(attr_hash_array) ⇒ Object



68
69
70
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 68

def self.wishes_from_attributes(attr_hash_array)
  attr_hash_array.map { |attr_hash| item_from_attributes(attr_hash)}
end

Instance Method Details

#urlObject



10
11
12
# File 'lib/amazon_wish_miner/amazon_wish.rb', line 10

def url
  "https://www.amazon.com/dp/#{@asin}"
end