Class: HTTP::Browser
- Inherits:
-
Object
- Object
- HTTP::Browser
- Defined in:
- lib/http_validator.rb
Instance Attribute Summary collapse
-
#base_url ⇒ Object
Returns the value of attribute base_url.
-
#browser ⇒ Object
Returns the value of attribute browser.
-
#element_info ⇒ Object
Returns the value of attribute element_info.
-
#elements ⇒ Object
Returns the value of attribute elements.
-
#random_user_agent ⇒ Object
Returns the value of attribute random_user_agent.
Instance Method Summary collapse
- #clear ⇒ Object
- #element_details(e) ⇒ Object
- #get_elements ⇒ Object
-
#initialize(url = '') ⇒ Browser
constructor
A new instance of Browser.
Constructor Details
#initialize(url = '') ⇒ Browser
Returns a new instance of Browser.
29 30 31 32 33 34 35 36 37 38 39 40 41 |
# File 'lib/http_validator.rb', line 29 def initialize(url='') rand(Time.now.tv_sec) @base_url = url @random_user_agent = false @browser = Curl::Easy.new @browser.max_redirects=3 @browser. = true @browser. = 'cookies.txt' @browser.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1" @element_types = %w(script img link a) @moved_keys = ['HTTP/1.1 302 Moved Temporarily', 'HTTP/1.1 301 Moved Permanently'] clear end |
Instance Attribute Details
#base_url ⇒ Object
Returns the value of attribute base_url.
28 29 30 |
# File 'lib/http_validator.rb', line 28 def base_url @base_url end |
#browser ⇒ Object
Returns the value of attribute browser.
28 29 30 |
# File 'lib/http_validator.rb', line 28 def browser @browser end |
#element_info ⇒ Object
Returns the value of attribute element_info.
28 29 30 |
# File 'lib/http_validator.rb', line 28 def element_info @element_info end |
#elements ⇒ Object
Returns the value of attribute elements.
28 29 30 |
# File 'lib/http_validator.rb', line 28 def elements @elements end |
#random_user_agent ⇒ Object
Returns the value of attribute random_user_agent.
28 29 30 |
# File 'lib/http_validator.rb', line 28 def random_user_agent @random_user_agent end |
Instance Method Details
#clear ⇒ Object
43 44 45 46 |
# File 'lib/http_validator.rb', line 43 def clear @element_info = {} @elements = [] end |
#element_details(e) ⇒ Object
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/http_validator.rb', line 48 def element_details(e) if e =~ /^http:\/\/|^https:\/\// url = e else url = "#{@base_url.gsub(/\/$/, '')}/#{e}" end url.gsub!(/\/\//, '/') url.gsub!(/http:\//, 'http://') url.gsub!(/https:\//, 'https://') @browser.url = url.strip if @random_user_agent index = rand(Constants::USER_AGENTS.length) @browser.headers["User-Agent"] = Constants::USER_AGENTS[index].gsub(/\n/, '') end @browser.perform headers = HTTP::Header.parse(@browser.header_str) @moved_keys.each { |k| if headers.has_key?(k) @elements << headers['Location'] @elements.delete(e) @element_info.delete(e) return end } if @element_info.has_key?(e) @element_info[e] << {:expected_size => headers['Content-Length'].strip.to_i, :actual_size => @browser.body_str.length, :headers => headers} else begin @element_info[e] = [{:expected_size => headers['Content-Length'].strip.to_i, :actual_size => @browser.body_str.length, :headers => headers}] rescue => ex end end end |
#get_elements ⇒ Object
87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
# File 'lib/http_validator.rb', line 87 def get_elements @browser.url = @base_url @browser.perform doc = Nokogiri::HTML(@browser.body_str) @element_types.each { |elem| e = nil doc.search("//#{elem}").each { |item| begin if elem == 'a' || elem == 'link' e = item.attributes['href'].value elsif %(img script).include?(elem) e = item.attributes['src'].value end rescue => ex end @elements << e unless e == nil } } end |