Class: Mechanize

Inherits:
Object
  • Object
show all
Defined in:
lib/mechanize.rb,
lib/mechanize/file.rb,
lib/mechanize/form.rb,
lib/mechanize/page.rb,
lib/mechanize/util.rb,
lib/mechanize/chain.rb,
lib/mechanize/cookie.rb,
lib/mechanize/headers.rb,
lib/mechanize/history.rb,
lib/mechanize/inspect.rb,
lib/mechanize/page/base.rb,
lib/mechanize/page/link.rb,
lib/mechanize/page/meta.rb,
lib/mechanize/cookie_jar.rb,
lib/mechanize/file_saver.rb,
lib/mechanize/form/field.rb,
lib/mechanize/page/frame.rb,
lib/mechanize/page/image.rb,
lib/mechanize/page/label.rb,
lib/mechanize/form/button.rb,
lib/mechanize/form/option.rb,
lib/mechanize/monkey_patch.rb,
lib/mechanize/chain/handler.rb,
lib/mechanize/file_response.rb,
lib/mechanize/form/check_box.rb,
lib/mechanize/form/file_upload.rb,
lib/mechanize/form/select_list.rb,
lib/mechanize/form/image_button.rb,
lib/mechanize/form/radio_button.rb,
lib/mechanize/pluggable_parsers.rb,
lib/mechanize/chain/auth_headers.rb,
lib/mechanize/chain/ssl_resolver.rb,
lib/mechanize/chain/uri_resolver.rb,
lib/mechanize/content_type_error.rb,
lib/mechanize/response_code_error.rb,
lib/mechanize/chain/custom_headers.rb,
lib/mechanize/chain/header_resolver.rb,
lib/mechanize/chain/response_reader.rb,
lib/mechanize/chain/pre_connect_hook.rb,
lib/mechanize/chain/request_resolver.rb,
lib/mechanize/form/multi_select_list.rb,
lib/mechanize/chain/parameter_resolver.rb,
lib/mechanize/unsupported_scheme_error.rb,
lib/mechanize/chain/connection_resolver.rb,
lib/mechanize/chain/response_body_parser.rb,
lib/mechanize/chain/body_decoding_handler.rb,
lib/mechanize/redirect_limit_reached_error.rb,
lib/mechanize/chain/response_header_handler.rb,
lib/mechanize/redirect_not_get_or_head_error.rb

Overview

:stopdoc:

Defined Under Namespace

Modules: Handler Classes: Chain, ContentTypeError, Cookie, CookieJar, File, FileResponse, FileSaver, Form, Headers, History, Page, PluggableParser, RedirectLimitReachedError, RedirectNotGetOrHeadError, ResponseCodeError, UnsupportedSchemeError, Util

Constant Summary collapse

VERSION =

The version of Mechanize you are using.

'1.0.0'
AGENT_ALIASES =

User Agent aliases

{
  'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
  'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
  'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
  'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
  'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
  'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
  'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
  'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
  'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize {|_self| ... } ⇒ Mechanize

Returns a new instance of Mechanize.

Yields:

  • (_self)

Yield Parameters:

  • _self (Mechanize)

    the object that the method was called on



116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
# File 'lib/mechanize.rb', line 116

def initialize
  # attr_accessors
  @cookie_jar     = CookieJar.new
  @log            = nil
  @open_timeout   = nil
  @read_timeout   = nil
  @user_agent     = AGENT_ALIASES['Mechanize']
  @watch_for_set  = nil
  @history_added  = nil
  @ca_file        = nil # OpenSSL server certificate file

  # callback for OpenSSL errors while verifying the server certificate
  # chain, can be used for debugging or to ignore errors by always
  # returning _true_
  @verify_callback = nil
  @cert           = nil # OpenSSL Certificate
  @key            = nil # OpenSSL Private Key
  @pass           = nil # OpenSSL Password
  @redirect_ok    = true # Should we follow redirects?
  @gzip_enabled   = true

  # attr_readers
  @history        = Mechanize::History.new
  @pluggable_parser = PluggableParser.new

  # Auth variables
  @user           = nil # Auth User
  @password       = nil # Auth Password
  @digest         = nil # DigestAuth Digest
  @auth_hash      = {}  # Keep track of urls for sending auth
  @request_headers= {}  # A hash of request headers to be used

  # Proxy settings
  @proxy_addr     = nil
  @proxy_pass     = nil
  @proxy_port     = nil
  @proxy_user     = nil

  @conditional_requests = true

  @follow_meta_refresh  = false
  @redirection_limit    = 20

  # Connection Cache & Keep alive
  @connection_cache = {}
  @keep_alive_time  = 300
  @keep_alive       = true

  @scheme_handlers  = Hash.new { |h,k|
    h[k] = lambda { |link, page|
      raise UnsupportedSchemeError.new(k)
    }
  }
  @scheme_handlers['http']      = lambda { |link, page| link }
  @scheme_handlers['https']     = @scheme_handlers['http']
  @scheme_handlers['relative']  = @scheme_handlers['http']
  @scheme_handlers['file']      = @scheme_handlers['http']

  @pre_connect_hook = Chain::PreConnectHook.new
  @post_connect_hook = Chain::PostConnectHook.new

  @html_parser = self.class.html_parser

  yield self if block_given?
end

Class Attribute Details

.html_parserObject

Returns the value of attribute html_parser.



107
108
109
# File 'lib/mechanize.rb', line 107

def html_parser
  @html_parser
end

.logObject

Returns the value of attribute log.



107
108
109
# File 'lib/mechanize.rb', line 107

def log
  @log
end

Instance Attribute Details

#ca_fileObject

Returns the value of attribute ca_file.



73
74
75
# File 'lib/mechanize.rb', line 73

def ca_file
  @ca_file
end

#certObject

Returns the value of attribute cert.



75
76
77
# File 'lib/mechanize.rb', line 75

def cert
  @cert
end

#conditional_requestsObject

Returns the value of attribute conditional_requests.



81
82
83
# File 'lib/mechanize.rb', line 81

def conditional_requests
  @conditional_requests
end

Returns the value of attribute cookie_jar.



69
70
71
# File 'lib/mechanize.rb', line 69

def cookie_jar
  @cookie_jar
end

#follow_meta_refreshObject

Returns the value of attribute follow_meta_refresh.



82
83
84
# File 'lib/mechanize.rb', line 82

def follow_meta_refresh
  @follow_meta_refresh
end

#gzip_enabledObject

Returns the value of attribute gzip_enabled.



78
79
80
# File 'lib/mechanize.rb', line 78

def gzip_enabled
  @gzip_enabled
end

#historyObject (readonly)

Returns the value of attribute history.



100
101
102
# File 'lib/mechanize.rb', line 100

def history
  @history
end

#history_addedObject

Returns the value of attribute history_added.



84
85
86
# File 'lib/mechanize.rb', line 84

def history_added
  @history_added
end

#html_parserObject

The HTML parser to be used when parsing documents



98
99
100
# File 'lib/mechanize.rb', line 98

def html_parser
  @html_parser
end

#keep_aliveObject

Returns the value of attribute keep_alive.



80
81
82
# File 'lib/mechanize.rb', line 80

def keep_alive
  @keep_alive
end

#keep_alive_timeObject

Returns the value of attribute keep_alive_time.



79
80
81
# File 'lib/mechanize.rb', line 79

def keep_alive_time
  @keep_alive_time
end

#keyObject

Returns the value of attribute key.



74
75
76
# File 'lib/mechanize.rb', line 74

def key
  @key
end

#open_timeoutObject

Returns the value of attribute open_timeout.



70
71
72
# File 'lib/mechanize.rb', line 70

def open_timeout
  @open_timeout
end

#passObject

Returns the value of attribute pass.



76
77
78
# File 'lib/mechanize.rb', line 76

def pass
  @pass
end

#pluggable_parserObject (readonly)

Returns the value of attribute pluggable_parser.



101
102
103
# File 'lib/mechanize.rb', line 101

def pluggable_parser
  @pluggable_parser
end

#proxy_addrObject (readonly)

Proxy settings



92
93
94
# File 'lib/mechanize.rb', line 92

def proxy_addr
  @proxy_addr
end

#proxy_passObject (readonly)

Returns the value of attribute proxy_pass.



93
94
95
# File 'lib/mechanize.rb', line 93

def proxy_pass
  @proxy_pass
end

#proxy_portObject (readonly)

Returns the value of attribute proxy_port.



94
95
96
# File 'lib/mechanize.rb', line 94

def proxy_port
  @proxy_port
end

#proxy_userObject (readonly)

Returns the value of attribute proxy_user.



95
96
97
# File 'lib/mechanize.rb', line 95

def proxy_user
  @proxy_user
end

#read_timeoutObject

Returns the value of attribute read_timeout.



70
71
72
# File 'lib/mechanize.rb', line 70

def read_timeout
  @read_timeout
end

#redirect_okObject Also known as: follow_redirect?

Returns the value of attribute redirect_ok.



77
78
79
# File 'lib/mechanize.rb', line 77

def redirect_ok
  @redirect_ok
end

#redirection_limitObject

Returns the value of attribute redirection_limit.



86
87
88
# File 'lib/mechanize.rb', line 86

def redirection_limit
  @redirection_limit
end

#request_headersObject

A hash of custom request headers



89
90
91
# File 'lib/mechanize.rb', line 89

def request_headers
  @request_headers
end

#scheme_handlersObject

Returns the value of attribute scheme_handlers.



85
86
87
# File 'lib/mechanize.rb', line 85

def scheme_handlers
  @scheme_handlers
end

#user_agentObject

Returns the value of attribute user_agent.



71
72
73
# File 'lib/mechanize.rb', line 71

def user_agent
  @user_agent
end

#verify_callbackObject

Returns the value of attribute verify_callback.



83
84
85
# File 'lib/mechanize.rb', line 83

def verify_callback
  @verify_callback
end

#watch_for_setObject

Returns the value of attribute watch_for_set.



72
73
74
# File 'lib/mechanize.rb', line 72

def watch_for_set
  @watch_for_set
end

Class Method Details

.inherited(child) ⇒ Object



109
110
111
112
113
# File 'lib/mechanize.rb', line 109

def inherited(child)
  child.html_parser ||= html_parser
  child.log ||= log
  super
end

Instance Method Details

#auth(user, password) ⇒ Object Also known as: basic_auth

Sets the user and password to be used for authentication.



213
214
215
216
# File 'lib/mechanize.rb', line 213

def auth(user, password)
  @user       = user
  @password   = password
end

#backObject

Equivalent to the browser back button. Returns the most recent page visited.



320
321
322
# File 'lib/mechanize.rb', line 320

def back
  @history.pop
end

#click(link) ⇒ Object

Clicks the Mechanize::Link object passed in and returns the page fetched.



311
312
313
314
315
316
# File 'lib/mechanize.rb', line 311

def click(link)
  referer = link.page rescue referer = nil
  href = link.respond_to?(:href) ? link.href :
    (link['href'] || link['src'])
  get(:url => href, :referer => (referer || current_page()))
end

#cookiesObject

Returns a list of cookies stored in the cookie jar.



208
209
210
# File 'lib/mechanize.rb', line 208

def cookies
  @cookie_jar.to_a
end

#current_pageObject Also known as: page

Returns the current page loaded by Mechanize



408
409
410
# File 'lib/mechanize.rb', line 408

def current_page
  @history.last
end

#delete(url, query_params = {}, options = {}) ⇒ Object

DELETE to url with query_params, and setting options:

delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})


280
281
282
283
284
# File 'lib/mechanize.rb', line 280

def delete(url, query_params = {}, options = {})
  page = head(url, query_params, options.merge({:verb => :delete}))
  add_to_history(page)
  page
end

#get(options, parameters = [], referer = nil) {|page| ... } ⇒ Object

Fetches the URL passed in and returns a page.

Yields:



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# File 'lib/mechanize.rb', line 220

def get(options, parameters = [], referer = nil)
  verb = :get

  unless options.is_a? Hash
    url = options
    unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
      referer = parameters
      parameters = []
    end
  else
    raise ArgumentError.new("url must be specified") unless url = options[:url]
    parameters = options[:params] || []
    referer    = options[:referer]
    headers    = options[:headers]
    verb       = options[:verb] || verb
  end

  unless referer
    if url.to_s =~ /^http/
      referer = Page.new(nil, {'content-type'=>'text/html'})
    else
      referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
    end
  end

  # FIXME: Huge hack so that using a URI as a referer works.  I need to
  # refactor everything to pass around URIs but still support
  # Mechanize::Page#base
  unless referer.is_a?(Mechanize::File)
    referer = referer.is_a?(String) ?
    Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
      Page.new(referer, {'content-type' => 'text/html'})
  end

  # fetch the page
  page = fetch_page(  :uri      => url,
                      :referer  => referer,
                      :headers  => headers || {},
                      :verb     => verb,
                      :params   => parameters
                      )
  add_to_history(page)
  yield page if block_given?
  page
end

#get_file(url) ⇒ Object

Fetch a file and return the contents of the file.



305
306
307
# File 'lib/mechanize.rb', line 305

def get_file(url)
  get(url).body
end

#head(url, query_params = {}, options = {}) {|page| ... } ⇒ Object

HEAD to url with query_params, and setting options:

head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})

Yields:



291
292
293
294
295
296
297
298
299
300
301
302
# File 'lib/mechanize.rb', line 291

def head(url, query_params = {}, options = {})
  options = {
    :uri      => url,
    :headers  => {},
    :params   => query_params,
    :verb     => :head
  }.merge(options)
  # fetch the page
  page = fetch_page(options)
  yield page if block_given?
  page
end

#logObject



185
# File 'lib/mechanize.rb', line 185

def log; self.class.log end

#log=(l) ⇒ Object



184
# File 'lib/mechanize.rb', line 184

def log=(l); self.class.log = l end

#max_historyObject



183
# File 'lib/mechanize.rb', line 183

def max_history; @history.max_size end

#max_history=(length) ⇒ Object



182
# File 'lib/mechanize.rb', line 182

def max_history=(length); @history.max_size = length end

#post(url, query = {}, headers = {}) ⇒ Object

Posts to the given URL with the request entity. The request entity is specified by either a string, or a list of key-value pairs represented by a hash or an array of arrays.

Examples:

agent.post('http://example.com/', "foo" => "bar")

agent.post('http://example.com/', [ ["foo", "bar"] ])

agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')


334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/mechanize.rb', line 334

def post(url, query={}, headers={})
  if query.is_a?(String)
    return request_with_entity(:post, url, query, :headers => headers)
  end
  node = {}
  # Create a fake form
  class << node
    def search(*args); []; end
  end
  node['method'] = 'POST'
  node['enctype'] = 'application/x-www-form-urlencoded'

  form = Form.new(node)
  query.each { |k,v|
    if v.is_a?(IO)
      form.enctype = 'multipart/form-data'
      ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
      ul.file_data = v.read
      form.file_uploads << ul
    else
      form.fields << Form::Field.new({'name' => k.to_s},v)
    end
  }
  post_form(url, form, headers)
end

#post_connect_hooksObject



191
192
193
# File 'lib/mechanize.rb', line 191

def post_connect_hooks
  @post_connect_hook.hooks
end

#pre_connect_hooksObject



187
188
189
# File 'lib/mechanize.rb', line 187

def pre_connect_hooks
  @pre_connect_hook.hooks
end

#pretty_print(q) ⇒ Object



5
6
7
8
9
10
11
12
# File 'lib/mechanize/inspect.rb', line 5

def pretty_print(q)
  q.object_group(self) {
    q.breakable
    q.pp cookie_jar
    q.breakable
    q.pp current_page
  }
end

#put(url, entity, options = {}) ⇒ Object

PUT to url with entity, and setting options:

put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})


271
272
273
# File 'lib/mechanize.rb', line 271

def put(url, entity, options = {})
  request_with_entity(:put, url, entity, options)
end

#request_with_entity(verb, url, entity, options = {}) ⇒ Object



382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
# File 'lib/mechanize.rb', line 382

def request_with_entity(verb, url, entity, options={})
  cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})

  options = {
    :uri      => url,
    :referer  => cur_page,
    :headers  => {},
  }.update(options)

  headers = {
    'Content-Type' => 'application/octet-stream',
    'Content-Length' => entity.size.to_s,
  }.update(options[:headers])

  options.update({
                   :verb => verb,
                   :params => [entity],
                   :headers => headers,
                 })

  page = fetch_page(options)
  add_to_history(page)
  page
end

#set_proxy(addr, port, user = nil, pass = nil) ⇒ Object

Sets the proxy address, port, user, and password addr should be a host, with no “http://”



197
198
199
# File 'lib/mechanize.rb', line 197

def set_proxy(addr, port, user = nil, pass = nil)
  @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
end

#submit(form, button = nil, headers = {}) ⇒ Object

Submit a form with an optional button. Without a button:

page = agent.get('http://example.com')
agent.submit(page.forms.first)

With a button

agent.submit(page.forms.first, page.forms.first.buttons.first)


366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# File 'lib/mechanize.rb', line 366

def submit(form, button=nil, headers={})
  form.add_button_to_query(button) if button
  case form.method.upcase
  when 'POST'
    post_form(form.action, form, headers)
  when 'GET'
    get(  :url      => form.action.gsub(/\?[^\?]*$/, ''),
          :params   => form.build_query,
          :headers  => headers,
          :referer  => form.page
          )
  else
    raise "unsupported method: #{form.method.upcase}"
  end
end

#transactObject

Runs given block, then resets the page history as it was before. self is given as a parameter to the block. Returns the value of the block.



427
428
429
430
431
432
433
434
# File 'lib/mechanize.rb', line 427

def transact
  history_backup = @history.dup
  begin
    yield self
  ensure
    @history = history_backup
  end
end

#user_agent_alias=(al) ⇒ Object

Set the user agent for the Mechanize object. See AGENT_ALIASES



203
204
205
# File 'lib/mechanize.rb', line 203

def user_agent_alias=(al)
  self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
end

#visited?(url) ⇒ Boolean

Returns whether or not a url has been visited

Returns:

  • (Boolean)


413
414
415
# File 'lib/mechanize.rb', line 413

def visited?(url)
  ! visited_page(url).nil?
end

#visited_page(url) ⇒ Object

Returns a visited page for the url passed in, otherwise nil



418
419
420
421
422
423
# File 'lib/mechanize.rb', line 418

def visited_page(url)
  if url.respond_to? :href
    url = url.href
  end
  @history.visited_page(resolve(url))
end