Module: Amazon

Defined in:
lib/amazon-search.rb

Overview

actions of Amazon search

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.image_urlObject

Returns the value of attribute image_url.



6
7
8
# File 'lib/amazon-search.rb', line 6

def image_url
  @image_url
end

.priceObject

Returns the value of attribute price.



6
7
8
# File 'lib/amazon-search.rb', line 6

def price
  @price
end

.product_numObject

Returns the value of attribute product_num.



6
7
8
# File 'lib/amazon-search.rb', line 6

def product_num
  @product_num
end

.product_urlObject

Returns the value of attribute product_url.



6
7
8
# File 'lib/amazon-search.rb', line 6

def product_url
  @product_url
end

.productsObject

Returns the value of attribute products.



6
7
8
# File 'lib/amazon-search.rb', line 6

def products
  @products
end

.reviewsObject

Returns the value of attribute reviews.



6
7
8
# File 'lib/amazon-search.rb', line 6

def reviews
  @reviews
end

.sellerObject

Returns the value of attribute seller.



6
7
8
# File 'lib/amazon-search.rb', line 6

def seller
  @seller
end

.starsObject

Returns the value of attribute stars.



6
7
8
# File 'lib/amazon-search.rb', line 6

def stars
  @stars
end

.titleObject

Returns the value of attribute title.



6
7
8
# File 'lib/amazon-search.rb', line 6

def title
  @title
end

Class Method Details

.display_productObject

puts product details to console



94
95
96
97
98
99
100
101
102
103
# File 'lib/amazon-search.rb', line 94

def display_product
  STDOUT.puts '--' * 50
  STDOUT.puts "title: \t\t#{@title}"
  STDOUT.puts "seller: \t#{@seller}"
  STDOUT.puts "price: \t\t#{@price}"
  STDOUT.puts "stars: \t\t#{@stars}"
  STDOUT.puts "reviews: \t#{@reviews}"
  STDOUT.puts "image url: \t#{@image_href}"
  STDOUT.puts "product url: \t#{@url}"
end

.examine_current_pagenumObject

examine current_pagenum



43
44
45
46
47
48
49
# File 'lib/amazon-search.rb', line 43

def examine_current_pagenum
  @current_pagenum =
    @current_page.search '//*[contains(concat( " ", @class, " " ),
      concat( " ", "pagnCur", " " ))]'

  @current_pagenum = @current_pagenum.text.to_i # need integer for checks
end

.extract_product_dataObject

extract product data



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/amazon-search.rb', line 106

def extract_product_data
  # TODO: fix this global variable...

  # nokogiri syntax is needed when iterating...not mechanize!
  # extract useful stuff from product html
  @current_divs.each do |html|
    # first select raw html
    title = html.at_css('.s-access-title')
    seller = html.at_css('.a-row > .a-spacing-none')
    price = html.at_css('.s-price')
    stars = html.at_css('.a-icon-star')
    reviews = html.at_css('span+ .a-text-normal')
    image_href = html.at_css('.s-access-image')
    url = html.at_css('.a-row > a')

    break if title.nil? == true # if it's nil it's prob an ad
    break if price.nil? == true # no price? prob not worthy item
    break if stars.nil? == true # no stars? not worth it

    # extract text and set variables for puts
    @title = title.text
    @price = price.text
    @stars = stars.text
    @image_href = image_href['src']
    @url = url['href']

    # movies sometimes have text in review class
    if numeric?(reviews.text)
      @reviews = reviews.text
    else
      @reviews = 'Unknown'
    end

    if seller.nil? == true # sometimes seller is nil on movies, etc.
      @seller = 'Unknown'
    else
      @seller = seller.text
    end

    # don't overload the server
    sleep(0.05)

    display_product

    # store extracted text in products hash
    # key is product count
    $products[@product_num] = {
      :title => @title,
      :price => @price,
      :stars => @stars,
      :reviews => @reviews,
      :image_href => @image_href,
      :url => @url,
      :seller => @seller
    }

    @product_num += 1 # ready for next product
  end
end

.find_formObject

finds Amazon search box



31
32
33
34
# File 'lib/amazon-search.rb', line 31

def find_form
  @main_page = @agent.get('http://amazon.com')
  @search_form = @main_page.form_with :name => 'site-search'
end

.find_last_pagenumObject

find last page number



52
53
54
55
56
57
58
# File 'lib/amazon-search.rb', line 52

def find_last_pagenum
  @last_pagenum =
   @current_page.search '//*[contains(concat( " ", @class, " " ),
     concat( " ", "pagnDisabled", " " ))]'

  @last_pagenum = @last_pagenum.text.to_i # need integer for checks
end

.load_next_pageObject

load next page



61
62
63
64
65
66
67
68
# File 'lib/amazon-search.rb', line 61

def load_next_page
  examine_current_pagenum # does this need to be here?

  # find next page link
  @next_page_link = @current_page.link_with :text => /Next Page/
  @next_page = @next_page_link.click unless @current_pagenum == @last_pagenum
  @current_page = @agent.get(@next_page.uri)
end

.numeric?(s) ⇒ Boolean

used for checking strings

Returns:

  • (Boolean)


89
90
91
# File 'lib/amazon-search.rb', line 89

def numeric?(s)
  !!Float(s) rescue false
end

.scanObject

cycle through search result pages and store product html



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
# File 'lib/amazon-search.rb', line 71

def scan
  @pages = {}

  find_last_pagenum

  @last_pagenum.times do # paginate until on last page.
    examine_current_pagenum

    @current_divs = @current_page.search('//li[starts-with(@id, "result")]')
    @pages[@page_num] = @current_divs # store page results

    extract_product_data
    load_next_page
  end
  puts "\n(scan complete.)"
end

.search(keywords) ⇒ Object

main method: process Amazon search



10
11
12
13
14
15
16
17
18
# File 'lib/amazon-search.rb', line 10

def search(keywords)
  @keywords = keywords
  set_initial_values
  set_agent
  find_form
  submit_form
  scan
  $products
end

.set_agentObject

prepares Mechanize



26
27
28
# File 'lib/amazon-search.rb', line 26

def set_agent
  @agent = Mechanize.new { |a| a.user_agent_alias = 'Mac Safari' }
end

.set_initial_valuesObject



20
21
22
23
# File 'lib/amazon-search.rb', line 20

def set_initial_values
  $products = {}
  @product_num = 0
end

.submit_formObject

submits Amazon search box



37
38
39
40
# File 'lib/amazon-search.rb', line 37

def submit_form
  @search_form.field_with(:name => 'field-keywords').value = @keywords
  @current_page = @agent.submit @search_form # submits form
end