Class: HTTP::Browser

Inherits:
Object
  • Object
show all
Defined in:
lib/http_validator.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(url = '') ⇒ Browser

Returns a new instance of Browser.



29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/http_validator.rb', line 29

def initialize(url='')
  rand(Time.now.tv_sec)
  @base_url = url
  @random_user_agent = false
  @browser = Curl::Easy.new
  @browser.max_redirects=3
  @browser.enable_cookies = true
  @browser.cookiejar = 'cookies.txt'
  @browser.headers["User-Agent"] = "Mozilla/5.0 (X11; Linux i686) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.186 Safari/535.1"
  @element_types = %w(script img link a)
  @moved_keys = ['HTTP/1.1 302 Moved Temporarily', 'HTTP/1.1 301 Moved Permanently']
  clear
end

Instance Attribute Details

#base_urlObject

Returns the value of attribute base_url.



28
29
30
# File 'lib/http_validator.rb', line 28

def base_url
  @base_url
end

#browserObject

Returns the value of attribute browser.



28
29
30
# File 'lib/http_validator.rb', line 28

def browser
  @browser
end

#element_infoObject

Returns the value of attribute element_info.



28
29
30
# File 'lib/http_validator.rb', line 28

def element_info
  @element_info
end

#elementsObject

Returns the value of attribute elements.



28
29
30
# File 'lib/http_validator.rb', line 28

def elements
  @elements
end

#random_user_agentObject

Returns the value of attribute random_user_agent.



28
29
30
# File 'lib/http_validator.rb', line 28

def random_user_agent
  @random_user_agent
end

Instance Method Details

#clearObject



43
44
45
46
# File 'lib/http_validator.rb', line 43

def clear
  @element_info = {}
  @elements = []
end

#element_details(e) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/http_validator.rb', line 48

def element_details(e)
  if e =~ /^http:\/\/|^https:\/\//
    url = e
  else
    url = "#{@base_url.gsub(/\/$/, '')}/#{e}"
  end
  url.gsub!(/\/\//, '/')
  url.gsub!(/http:\//, 'http://')
  url.gsub!(/https:\//, 'https://')

  @browser.url = url.strip
  if @random_user_agent
    index = rand(Constants::USER_AGENTS.length)
    @browser.headers["User-Agent"] = Constants::USER_AGENTS[index].gsub(/\n/, '')
  end
  @browser.perform

  headers = HTTP::Header.parse(@browser.header_str)
  @moved_keys.each { |k|
    if headers.has_key?(k)
      @elements << headers['Location']
      @elements.delete(e)
      @element_info.delete(e)
      return
    end
  }
  if @element_info.has_key?(e)
    @element_info[e] << {:expected_size => headers['Content-Length'].strip.to_i, 
                        :actual_size => @browser.body_str.length, :headers => headers}
  else
    begin
      @element_info[e] = [{:expected_size => headers['Content-Length'].strip.to_i, 
                          :actual_size => @browser.body_str.length, :headers => headers}]
    rescue => ex
    end
  end

end

#get_elementsObject



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/http_validator.rb', line 87

def get_elements
  @browser.url = @base_url
  @browser.perform
  doc = Nokogiri::HTML(@browser.body_str)
  @element_types.each { |elem|
    e = nil

    doc.search("//#{elem}").each { |item| 
      begin
        if elem == 'a' || elem == 'link'
          e = item.attributes['href'].value
        elsif %(img script).include?(elem)
          e = item.attributes['src'].value
        end
      rescue => ex
      end
      @elements << e unless e == nil
    }
  }
end