Class: Crown::Amazon::AsinExtractor
- Inherits:
-
Object
- Object
- Crown::Amazon::AsinExtractor
- Defined in:
- lib/crown/amazon.rb
Overview
——————————————————————- #
AsinExtractor
指定した URI の a タグ href 属性から Amazon の ASIN と思われる
情報を抽出するクラス.
——————————————————————- #
Instance Method Summary collapse
-
#get(uri) ⇒ Object
————————————————————— # get ————————————————————— #.
-
#initialize(proxy_host = nil, proxy_port = nil) ⇒ AsinExtractor
constructor
A new instance of AsinExtractor.
Constructor Details
#initialize(proxy_host = nil, proxy_port = nil) ⇒ AsinExtractor
Returns a new instance of AsinExtractor.
53 54 55 56 |
# File 'lib/crown/amazon.rb', line 53 def initialize(proxy_host = nil, proxy_port = nil) @proxy_host = proxy_host @proxy_port = proxy_port end |
Instance Method Details
#get(uri) ⇒ Object
————————————————————— #
get
————————————————————— #
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/crown/amazon.rb', line 61 def get(uri) result = Array.new parser = URI.parse(uri.strip) path = parser.path path += '?' + parser.query if (parser.query != nil) session = Net::HTTP.new(parser.host, parser.port, @proxy_host, @proxy_port) response = Crown::HTTP.get(session, path) return result if (response == nil || response.code.to_i != 200) html = Nokogiri::HTML.parse(response.body) html.search("a").each { |node| next if (node['href'] == nil) begin parser = URI.parse(node['href'].strip) rescue URI::InvalidURIError parser = URI.parse(URI.encode(node['href'].strip)) end next if (parser == nil || parser.host == nil || parser.path == nil) if (parser.host.match(/^(?:www\.)?amazon\.(?:com|ca|co\.uk|de|co\.jp|jp|fr|cn)$/) != nil) asin = get_asin(parser.path, parser.query) result.push(asin) if (asin != nil && !result.include?(asin)) end } return result end |