Class: KindleUtil::AmazonCrawler

Inherits:
Object
  • Object
show all
Defined in:
lib/kindle_util/amazon_crawler.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(user, pass, cache) ⇒ AmazonCrawler

Returns a new instance of AmazonCrawler.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/kindle_util/amazon_crawler.rb', line 8

def initialize(user, pass, cache)
  @user = user
  @pass = pass
  @cache_file = File.expand_path("~/.kindle_util.json")
  
  @agent = Mechanize.new do |agent|
    agent.user_agent_alias = 'Mac Safari'
    agent.follow_meta_refresh = true
    agent.redirect_ok = true
  end
  
  @owned_items = JSON.parse(File.read(@cache_file)) rescue []
  if @owned_items.size == 0 || ! cache
    @owned_items = fetch_ownership
    File.write(@cache_file, @owned_items.to_json)
  end
end

Instance Attribute Details

#owned_itemsObject (readonly)

Returns the value of attribute owned_items.



6
7
8
# File 'lib/kindle_util/amazon_crawler.rb', line 6

def owned_items
  @owned_items
end

Instance Method Details

#fetch_ownershipObject



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/kindle_util/amazon_crawler.rb', line 52

def fetch_ownership()
  
  owned_items = []
  ownership_url = "https://www.amazon.com/gp/digital/fiona/manage/features/order-history/ajax/queryOwnership_refactored.html"
  
  offset = 0
  count = 100
  has_more = true
  while has_more do
    logger.debug "Fetching ownership data offset=#{offset}, count=#{count}"
    ownership_data = @agent.post(ownership_url, "contentType" => "all",
                                                "randomizer" => rand(10000000000000),
                                                "count" => count,
                                                "offset" => offset)
    data = JSON.parse(ownership_data.body)
    items = data['data']['items']
    items = unescape(items)
    owned_items.concat(items)
    
    has_more = data['data']['hasMore'].to_i != 0
    offset += count
  end
  logger.debug "Got data for #{owned_items.size} books"
  return owned_items
end

#loginObject



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/kindle_util/amazon_crawler.rb', line 26

def 
  @manage_kindle_page ||= begin
    logger.debug "Logging into amazon web ui"
    home_page = @agent.get("http://www.amazon.com")
    logger.debug "Got home page: #{home_page.title.strip}"
     = home_page.link_with(:text => "Manage Your Kindle").click
    logger.debug "Got sign in page: #{.title.strip}"
    form = .forms.first
    form.email = @user
    form['ap_signin_existing_radio'] = "1"
    form.password = @pass
    manage_page = form.submit
    logger.debug "Got manage page: #{manage_page.title.strip}"
    manage_page
  end
end

#reset_lpr(item) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
# File 'lib/kindle_util/amazon_crawler.rb', line 78

def reset_lpr(item)
  
  asin = item['asin']
  sid = @agent.cookies.find {|c| c.name == "session-id" }.value
  reset_lpr_url = "https://www.amazon.com/gp/digital/fiona/du/reset-lpr.html/ref=kinw_myk_lpr"
  logger.debug "Resetting last page read: asin=#{asin}, sid=#{sid}"
  response = @agent.post(reset_lpr_url, "asin" => asin, "sid" => sid)
  data = JSON.parse(response.body.gsub("'", '"'))
  logger.error "Failed to reset last page read for asin=#{asin}: #{data["error"]}" if data["error"]
  data['data'].to_i == 1
end

#unescape(data) ⇒ Object



43
44
45
46
47
48
49
50
# File 'lib/kindle_util/amazon_crawler.rb', line 43

def unescape(data)
  case data
    when Array then data.collect {|d| unescape(d) }
    when Hash then Hash[data.collect {|k, v| [unescape(k), unescape(v)] }]
    when String then CGI.unescapeHTML(data)
    else data
  end
end