Class: Saper::Browser

Inherits:
Object
  • Object
show all
Defined in:
lib/saper/core/browser.rb

Constant Summary collapse

AGENTS =
{
  :ie6     => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  :ie7     => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  :ie8     => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  :ie9     => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
  :mozilla => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
  :safari  => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22',
  :iphone  => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
  :ipad    => 'Mozilla/5.0 (iPad; U; CPU OS 3_2 like Mac OS X; en-us) AppleWebKit/531.21.10 (KHTML, like Gecko) Version/4.0.4 Mobile/7B334b Safari/531.21.10',
  :android => 'Mozilla/5.0 (Linux; U; Android 3.0; en-us; Xoom Build/HRI39) AppleWebKit/534.13 (KHTML, like Gecko) Version/4.0 Safari/534.13',
  :saper   => 'Mozilla/5.0 (compatible; Saper Ruby client %s)' % Saper::VERSION
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Saper::Browser

Returns a new Browser instance.

Parameters:

  • options (Hash) (defaults to: {})

    a customizable set of options

Options Hash (options):

  • :agent (Symbol)

    User agent

  • :headers (Hash)

    Additional request headers

  • :logger (Logger)

    Logger instance



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/saper/core/browser.rb', line 31

def initialize(options = {})
  @agent    = options.delete(:agent) || :saper
  @headers  = options.delete(:headers)
  @logger   = options.delete(:logger) || Saper::Logger.new
  @history  = []
  @received = 0
  @sent     = 0
  @mech = Mechanize.new do |a|
    a.robots                 = false
    a.user_agent             = agent
    a.request_headers        = headers
    a.pluggable_parser.csv   = nil
    a.pluggable_parser.html  = nil
    a.pluggable_parser.xhtml = nil
    a.pluggable_parser.xml   = nil
  end
  @mech.pre_connect_hook do |agent, req|
    @sent += req.to_hash.to_s.size
    @sent += (req.body.nil? ? 0 : req.body.size)
  end
  @mech.post_connect_hook do |agent, uri, resp, body|
    @received += resp.to_hash.to_s.size
    @received += body.size
  end
  @logger.new_browser(self)
end

Instance Attribute Details

#historyObject (readonly)

Array of requested URLs.



5
6
7
# File 'lib/saper/core/browser.rb', line 5

def history
  @history
end

#receivedObject (readonly)

Approximate number of bytes received.



8
9
10
# File 'lib/saper/core/browser.rb', line 8

def received
  @received
end

#sentObject (readonly)

Approximate number of bytes sent.



11
12
13
# File 'lib/saper/core/browser.rb', line 11

def sent
  @sent
end

Instance Method Details

#agentString

Returns User-Agent string used with requests.

Returns:

  • (String)


103
104
105
# File 'lib/saper/core/browser.rb', line 103

def agent
  AGENTS[@agent.to_sym] || @agent.to_s
end

#get(url, query = {}) ⇒ Saper::Document

Performs a GET request and returns Saper::Document.

Parameters:

  • url (String)

    URL to request

  • query (Hash) (defaults to: {})

    query options

Returns:

  • (Saper::Document)


74
75
76
77
78
79
80
81
# File 'lib/saper/core/browser.rb', line 74

def get(url, query = {})
  @logger.new_get_request(url)
  @history.push url
  data = @mech.get(url, query)
  Saper::Items::Document.new data.body, data.uri, data.header
rescue Mechanize::ResponseCodeError
  Saper::Items::Nothing.new # TODO: change to custom exception
end

#headersHash

Returns additional request headers.

Returns:

  • (Hash)


66
67
68
# File 'lib/saper/core/browser.rb', line 66

def headers
  @headers.respond_to?(:to_hash) ? @headers : {}
end

#post(url, query = {}, headers = {}) ⇒ Saper::Document

Performs a POST request and returns Saper::Document.

Parameters:

  • url (String)

    URL to request

  • query (Hash) (defaults to: {})

    payload

Returns:

  • (Saper::Document)


87
88
89
90
91
92
93
94
# File 'lib/saper/core/browser.rb', line 87

def post(url, query = {}, headers = {})
  @logger.new_post_request(url)
  @history.push url
  data = @mech.post(url, query, headers)
  Saper::Items::Document.new data.body, data.uri, data.header
rescue Mechanize::ResponseCodeError
  Saper::Items::Nothing.new # TODO: change to custom exception
end

#post_with_bearer_token(url, query, token) ⇒ Object

TODO:


97
98
99
# File 'lib/saper/core/browser.rb', line 97

def post_with_bearer_token(url, query, token)
  post(url, query, { "Authorization" => "Bearer %s" % token })
end

#requestsInteger

Returns the number of HTTP requests.

Returns:

  • (Integer)


60
61
62
# File 'lib/saper/core/browser.rb', line 60

def requests
  @history.size
end