Class: Amazoned::Client

Inherits:
Object
  • Object
show all
Defined in:
lib/amazoned/client.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(asin) ⇒ Client

Returns a new instance of Client.



6
7
8
# File 'lib/amazoned/client.rb', line 6

def initialize(asin)
  @asin = asin
end

Instance Attribute Details

#asinObject (readonly)

Returns the value of attribute asin.



4
5
6
# File 'lib/amazoned/client.rb', line 4

def asin
  @asin
end

Class Method Details

.sleep_time(num_retries) ⇒ Object

Taken from Stripe API Stripe uses jitter to smooth server load; we use it to obfuscate timing detection of our scraper bot github.com/stripe/stripe-ruby/blob/ec66c3f0f44274f885de8d13de5dce2657932121/lib/stripe/stripe_client.rb#L80



54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/amazoned/client.rb', line 54

def self.sleep_time(num_retries)
  # Apply exponential backoff with initial_network_retry_delay on the
  # number of num_retries so far as inputs. Do not allow the number to exceed
  # max_network_retry_delay.
  sleep_seconds = [Amazoned.initial_network_retry_delay * (2**(num_retries - 1)), Amazoned.max_network_retry_delay].min

  # Apply some jitter by randomizing the value in the range of (sleep_seconds
  # / 2) to (sleep_seconds).
  sleep_seconds *= (0.5 * (1 + rand))

  # But never sleep less than the base sleep seconds.
  sleep_seconds = [Amazoned.initial_network_retry_delay, sleep_seconds].max

  sleep_seconds
end

Instance Method Details

#callObject



10
11
12
13
# File 'lib/amazoned/client.rb', line 10

def call
  response = get_product
  Amazoned::Parser.new(response).call
end

#get_product(num_retries = 1) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/amazoned/client.rb', line 15

def get_product(num_retries = 1)
  agent = Mechanize.new.tap do |web|
    web.html_parser = HtmlParser # Avoid encoding issues: https://stackoverflow.com/a/20666246/3448554
    web.user_agent_alias = (["Linux Firefox", "Linux Mozilla", "Mac Firefox", "Mac Mozilla", "Mac Safari", "Windows Chrome", "Windows IE 10", "Windows IE 11", "Windows Edge", "Windows Mozilla", "Windows Firefox"]).sample # spoof every request with common User Agents available in Mechanize as a way to hit fewer CAPTCHA walls
  end

  begin
    # Start GET request of Amazon page using ASIN.
    response = agent.get("https://www.amazon.com/dp/#{asin}")
    if request_failed(response)
      puts "Request failed!  Trying again..."
      # On failure, recursively try again to be resilient against one-off failures
      if num_retries <= Amazoned.max_network_retries
        sleep self.class.sleep_time(num_retries)
        get_product(num_retries += 1)
      else
        handle_failed_request!(response)
      end
    else
      response
    end
  rescue Mechanize::ResponseCodeError => e
    raise Amazoned::ProductNotFoundError
  end
end

#handle_failed_request!(response) ⇒ Object



46
47
48
49
# File 'lib/amazoned/client.rb', line 46

def handle_failed_request!(response)
  # Raise this error when we can't penetrate Amazon's CAPTCHA wall
  raise Amazoned::BotDeniedAccessError if response.xpath('//p[contains(text(), "Sorry, we just need to make sure")]').any?
end

#request_failed(response) ⇒ Object



41
42
43
44
# File 'lib/amazoned/client.rb', line 41

def request_failed(response)
  return true if response.xpath('//p[contains(text(), "Sorry, we just need to make sure")]').any? # captcha hit
  false
end