Module: ExternalUrl
- Defined in:
- lib/api_helpers/external_url.rb
Constant Summary collapse
- REQUEST_HEADERS =
{ 'User-Agent' => 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.11) Gecko/20071127 Firefox/2.0.0.11' }
Class Method Summary collapse
-
.fetch_response(url, limit = 10, debug = false) ⇒ Object
returns a hash containing :success flag; if true, you’ll have the :response (thus response.body) and :final_uri (e.g. if redirected) If false a :message is set and :final_uri.
-
.just_page_content(page) ⇒ Object
Note: This method is only used now by the Validator, and is only suitable for the Validator.
Class Method Details
.fetch_response(url, limit = 10, debug = false) ⇒ Object
returns a hash containing :success flag; if true, you’ll have the :response (thus response.body) and :final_uri (e.g. if redirected) If false a :message is set and :final_uri
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
# File 'lib/api_helpers/external_url.rb', line 32 def self.fetch_response(url, limit = 10, debug = false) begin if limit == 0 return {:success => false, :response => nil, :message => "Redirected too many times", :final_uri => url} end = self.invalid_uri(url) if return {:success => false, :response => nil, :message => , :final_uri => url} end uri = URI.safe_parse(url.to_s) http_request = Net::HTTP.new(uri.host) if debug puts "http request: #{http_request.inspect}" end # Adding user agent header helps some merchants feel more comfortable with our bot no_host_url = uri.to_s.gsub(/.*?#{uri.host}(.*)/,'\1') if debug puts "http request to: #{no_host_url}" end response = http_request.get(no_host_url, REQUEST_HEADERS) if debug puts "http response: #{response.inspect}" end case response when Net::HTTPSuccess then if debug puts "Success, final url: #{url}" ExternalUrl.to_file(url, response.body, "html") ExternalUrl.to_file(url, ExternalUrl.just_page_content(response.body), "txt") end {:success => true, :response => response, :final_uri => url} when Net::HTTPRedirection then redirect_url = to_absolute_url(response['location'], url) if debug puts "Redirecting to #{redirect_url}" end self.fetch_response(redirect_url, limit - 1, debug) else {:success => false, :response => response, :final_uri => url} end rescue Exception => exp {:success => false, :response => nil, :message => exp., :final_uri => url} end end |
.just_page_content(page) ⇒ Object
Note: This method is only used now by the Validator, and is only suitable for the Validator.
10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/api_helpers/external_url.rb', line 10 def self.just_page_content(page) # No longer used justbody = /.*?<body.*?>(.*)<\/body>/im comments = /(<!--.*?-->)(.{1,40})/m nostyle = /<style.*?<\/style>/im = /<.*?>/im noentities = /&.*?;/ noextrawhitespace = /(\s)+/im # Remove comments, unless inside of JavaScript (because frequently JavaScript has good matches for model numbers, etc.) page.gsub(comments) do |c| comment = $1 post = $2 if post =~ /<\/script/ comment + post else post end end.gsub(nostyle,' ').gsub(,'').gsub(noentities,' ').gsub(noextrawhitespace,'\1') end |