Class: Mechanize

Inherits:
Object
  • Object
show all
Defined in:
lib/mechanize.rb,
lib/mechanize/file.rb,
lib/mechanize/form.rb,
lib/mechanize/page.rb,
lib/mechanize/util.rb,
lib/mechanize/chain.rb,
lib/mechanize/cookie.rb,
lib/mechanize/headers.rb,
lib/mechanize/history.rb,
lib/mechanize/inspect.rb,
lib/mechanize/page/base.rb,
lib/mechanize/page/link.rb,
lib/mechanize/page/meta.rb,
lib/mechanize/cookie_jar.rb,
lib/mechanize/file_saver.rb,
lib/mechanize/form/field.rb,
lib/mechanize/page/frame.rb,
lib/mechanize/form/button.rb,
lib/mechanize/form/option.rb,
lib/mechanize/monkey_patch.rb,
lib/mechanize/chain/handler.rb,
lib/mechanize/file_response.rb,
lib/mechanize/page/encoding.rb,
lib/mechanize/form/check_box.rb,
lib/mechanize/form/file_upload.rb,
lib/mechanize/form/select_list.rb,
lib/mechanize/form/image_button.rb,
lib/mechanize/form/radio_button.rb,
lib/mechanize/pluggable_parsers.rb,
lib/mechanize/chain/auth_headers.rb,
lib/mechanize/chain/ssl_resolver.rb,
lib/mechanize/chain/uri_resolver.rb,
lib/mechanize/content_type_error.rb,
lib/mechanize/response_code_error.rb,
lib/mechanize/chain/custom_headers.rb,
lib/mechanize/chain/post_page_hook.rb,
lib/mechanize/chain/header_resolver.rb,
lib/mechanize/chain/response_reader.rb,
lib/mechanize/chain/pre_connect_hook.rb,
lib/mechanize/chain/request_resolver.rb,
lib/mechanize/form/multi_select_list.rb,
lib/mechanize/chain/parameter_resolver.rb,
lib/mechanize/unsupported_scheme_error.rb,
lib/mechanize/chain/connection_resolver.rb,
lib/mechanize/chain/response_body_parser.rb,
lib/mechanize/chain/body_decoding_handler.rb,
lib/mechanize/redirect_limit_reached_error.rb,
lib/mechanize/chain/response_header_handler.rb,
lib/mechanize/redirect_not_get_or_head_error.rb

Overview

:stopdoc:

Defined Under Namespace

Modules: Handler, PageEncoding Classes: Chain, ContentTypeError, Cookie, CookieJar, File, FileResponse, FileSaver, Form, Headers, History, Page, PluggableParser, RedirectLimitReachedError, RedirectNotGetOrHeadError, ResponseCodeError, UnsupportedSchemeError, Util

Constant Summary collapse

VERSION =

The version of Mechanize you are using.

'0.9.3'
AGENT_ALIASES =

User Agent aliases

{
  'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
  'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
  'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
  'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
  'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
  'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
  'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
  'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(params = {}) {|_self| ... } ⇒ Mechanize

Returns a new instance of Mechanize.

Yields:

  • (_self)

Yield Parameters:

  • _self (Mechanize)

    the object that the method was called on



103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/mechanize.rb', line 103

def initialize(params = {})
  # attr_accessors
  @cookie_jar     = CookieJar.new
  @log            = nil
  @open_timeout   = nil
  @read_timeout   = nil
  @user_agent     = AGENT_ALIASES['Mechanize']
  @watch_for_set  = nil
  @history_added  = nil
  @ca_file        = nil # OpenSSL server certificate file

  # callback for OpenSSL errors while verifying the server certificate
  # chain, can be used for debugging or to ignore errors by always
  # returning _true_
  @verify_callback = nil
  @cert           = nil # OpenSSL Certificate
  @key            = nil # OpenSSL Private Key
  @pass           = nil # OpenSSL Password
  @redirect_ok    = true # Should we follow redirects?

  # attr_readers
  @history        = Mechanize::History.new
  @pluggable_parser = PluggableParser.new

  # Auth variables
  @user           = nil # Auth User
  @password       = nil # Auth Password
  @digest         = nil # DigestAuth Digest
  @auth_hash      = {}  # Keep track of urls for sending auth
  @request_headers= {}  # A hash of request headers to be used

  # Proxy settings
  @proxy_addr     = nil
  @proxy_pass     = nil
  @proxy_port     = nil
  @proxy_user     = nil

  @conditional_requests = true

  @follow_meta_refresh  = false
  @redirection_limit    = 20

  # Connection Cache & Keep alive
  @connection_cache = {}
  @keep_alive_time  = 300
  @keep_alive       = true

  @use_content_disposition = false

  @scheme_handlers  = Hash.new { |h,k|
    h[k] = lambda { |link, page|
      raise UnsupportedSchemeError.new(k)
    }
  }
  @scheme_handlers['http']      = lambda { |link, page| link }
  @scheme_handlers['https']     = @scheme_handlers['http']
  @scheme_handlers['relative']  = @scheme_handlers['http']
  @scheme_handlers['file']      = @scheme_handlers['http']

  @pre_connect_hook = Chain::PreConnectHook.new
  @post_connect_hook = Chain::PostConnectHook.new

  @post_page_hook = Chain::PostPageHook.new

  @html_parser = self.class.html_parser

  set_init_param(params)

  yield self if block_given?
end

Class Attribute Details

.html_parserObject

Returns the value of attribute html_parser.



101
102
103
# File 'lib/mechanize.rb', line 101

def html_parser
  @html_parser
end

.logObject

Returns the value of attribute log.



101
102
103
# File 'lib/mechanize.rb', line 101

def log
  @log
end

Instance Attribute Details

#ca_fileObject

Returns the value of attribute ca_file.



71
72
73
# File 'lib/mechanize.rb', line 71

def ca_file
  @ca_file
end

#certObject

Returns the value of attribute cert.



73
74
75
# File 'lib/mechanize.rb', line 73

def cert
  @cert
end

#conditional_requestsObject

Returns the value of attribute conditional_requests.



78
79
80
# File 'lib/mechanize.rb', line 78

def conditional_requests
  @conditional_requests
end

Returns the value of attribute cookie_jar.



67
68
69
# File 'lib/mechanize.rb', line 67

def cookie_jar
  @cookie_jar
end

#follow_meta_refreshObject

Returns the value of attribute follow_meta_refresh.



79
80
81
# File 'lib/mechanize.rb', line 79

def follow_meta_refresh
  @follow_meta_refresh
end

#historyObject (readonly)

Returns the value of attribute history.



94
95
96
# File 'lib/mechanize.rb', line 94

def history
  @history
end

#history_addedObject

Returns the value of attribute history_added.



81
82
83
# File 'lib/mechanize.rb', line 81

def history_added
  @history_added
end

#html_parserObject

The HTML parser to be used when parsing documents



89
90
91
# File 'lib/mechanize.rb', line 89

def html_parser
  @html_parser
end

#keep_aliveObject

Returns the value of attribute keep_alive.



77
78
79
# File 'lib/mechanize.rb', line 77

def keep_alive
  @keep_alive
end

#keep_alive_timeObject

Returns the value of attribute keep_alive_time.



76
77
78
# File 'lib/mechanize.rb', line 76

def keep_alive_time
  @keep_alive_time
end

#keyObject

Returns the value of attribute key.



72
73
74
# File 'lib/mechanize.rb', line 72

def key
  @key
end

#open_timeoutObject

Returns the value of attribute open_timeout.



68
69
70
# File 'lib/mechanize.rb', line 68

def open_timeout
  @open_timeout
end

#passObject

Returns the value of attribute pass.



74
75
76
# File 'lib/mechanize.rb', line 74

def pass
  @pass
end

#pluggable_parserObject (readonly)

Returns the value of attribute pluggable_parser.



95
96
97
# File 'lib/mechanize.rb', line 95

def pluggable_parser
  @pluggable_parser
end

#post_page_hookObject (readonly)

Returns the value of attribute post_page_hook.



96
97
98
# File 'lib/mechanize.rb', line 96

def post_page_hook
  @post_page_hook
end

#read_timeoutObject

Returns the value of attribute read_timeout.



68
69
70
# File 'lib/mechanize.rb', line 68

def read_timeout
  @read_timeout
end

#redirect_okObject Also known as: follow_redirect?

Returns the value of attribute redirect_ok.



75
76
77
# File 'lib/mechanize.rb', line 75

def redirect_ok
  @redirect_ok
end

#redirection_limitObject

Returns the value of attribute redirection_limit.



83
84
85
# File 'lib/mechanize.rb', line 83

def redirection_limit
  @redirection_limit
end

#request_headersObject

A hash of custom request headers



86
87
88
# File 'lib/mechanize.rb', line 86

def request_headers
  @request_headers
end

#scheme_handlersObject

Returns the value of attribute scheme_handlers.



82
83
84
# File 'lib/mechanize.rb', line 82

def scheme_handlers
  @scheme_handlers
end

#use_content_dispositionObject

use Content-Disposition header or not



92
93
94
# File 'lib/mechanize.rb', line 92

def use_content_disposition
  @use_content_disposition
end

#user_agentObject

Returns the value of attribute user_agent.



69
70
71
# File 'lib/mechanize.rb', line 69

def user_agent
  @user_agent
end

#verify_callbackObject

Returns the value of attribute verify_callback.



80
81
82
# File 'lib/mechanize.rb', line 80

def verify_callback
  @verify_callback
end

#watch_for_setObject

Returns the value of attribute watch_for_set.



70
71
72
# File 'lib/mechanize.rb', line 70

def watch_for_set
  @watch_for_set
end

Instance Method Details

#auth(user, password) ⇒ Object Also known as: basic_auth

Sets the user and password to be used for authentication.



209
210
211
212
# File 'lib/mechanize.rb', line 209

def auth(user, password)
  @user       = user
  @password   = password
end

#backObject

Equivalent to the browser back button. Returns the most recent page visited.



312
313
314
# File 'lib/mechanize.rb', line 312

def back
  @history.pop
end

#click(link) ⇒ Object

Clicks the Mechanize::Link object passed in and returns the page fetched.



303
304
305
306
307
308
# File 'lib/mechanize.rb', line 303

def click(link)
  referer = link.page rescue referer = nil
  href = link.respond_to?(:href) ? link.href :
    (link['href'] || link['src'])
  get(:url => href, :referer => (referer || current_page()))
end

#cookiesObject

Returns a list of cookies stored in the cookie jar.



204
205
206
# File 'lib/mechanize.rb', line 204

def cookies
  @cookie_jar.to_a
end

#current_pageObject Also known as: page

Returns the current page loaded by Mechanize



400
401
402
# File 'lib/mechanize.rb', line 400

def current_page
  @history.last
end

#delete(url, query_params = {}, options = {}) ⇒ Object

DELETE to url with query_params, and setting options:

delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})


272
273
274
275
276
# File 'lib/mechanize.rb', line 272

def delete(url, query_params = {}, options = {})
  page = head(url, query_params, options.merge({:verb => :delete}))
  add_to_history(page)
  page
end

#get(options, parameters = [], referer = nil) {|page| ... } ⇒ Object

Fetches the URL passed in and returns a page.

Yields:



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
# File 'lib/mechanize.rb', line 216

def get(options, parameters = [], referer = nil)
  unless options.is_a? Hash
    url = options
    unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
      referer = parameters
      parameters = []
    end
  else
    raise ArgumentError.new("url must be specified") unless url = options[:url]
    parameters = options[:params] || []
    referer = options[:referer]
    headers = options[:headers]
  end

  unless referer
    if url.to_s =~ /^http/
      referer = Page.new(nil, {'content-type'=>'text/html'})
    else
      referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
    end
  end

  # FIXME: Huge hack so that using a URI as a referer works.  I need to
  # refactor everything to pass around URIs but still support
  # Mechanize::Page#base
  unless referer.is_a?(Mechanize::File)
    referer = referer.is_a?(String) ?
    Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
      Page.new(referer, {'content-type' => 'text/html'})
  end

  # fetch the page
  page = fetch_page(  :uri      => url,
                      :referer  => referer,
                      :headers  => headers || {},
                      :params   => parameters
                      )
  add_to_history(page)
  yield page if block_given?
  page
end

#get_file(url) ⇒ Object

Fetch a file and return the contents of the file.



297
298
299
# File 'lib/mechanize.rb', line 297

def get_file(url)
  get(url).body
end

#head(url, query_params = {}, options = {}) {|page| ... } ⇒ Object

HEAD to url with query_params, and setting options:

head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})

Yields:



283
284
285
286
287
288
289
290
291
292
293
294
# File 'lib/mechanize.rb', line 283

def head(url, query_params = {}, options = {})
  options = {
    :uri      => url,
    :headers  => {},
    :params   => query_params,
    :verb     => :head
  }.merge(options)
  # fetch the page
  page = fetch_page(options)
  yield page if block_given?
  page
end

#logObject



177
# File 'lib/mechanize.rb', line 177

def log; self.class.log end

#log=(l) ⇒ Object



176
# File 'lib/mechanize.rb', line 176

def log=(l); self.class.log = l end

#max_historyObject



175
# File 'lib/mechanize.rb', line 175

def max_history; @history.max_size end

#max_history=(length) ⇒ Object



174
# File 'lib/mechanize.rb', line 174

def max_history=(length); @history.max_size = length end

#post(url, query = {}, headers = {}) ⇒ Object

Posts to the given URL with the request entity. The request entity is specified by either a string, or a list of key-value pairs represented by a hash or an array of arrays.

Examples:

agent.post('http://example.com/', "foo" => "bar")

agent.post('http://example.com/', [ ["foo", "bar"] ])

agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')


326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
# File 'lib/mechanize.rb', line 326

def post(url, query={}, headers={})
  if query.is_a?(String)
    return request_with_entity(:post, url, query, :headers => headers)
  end
  node = {}
  # Create a fake form
  class << node
    def search(*args); []; end
  end
  node['method'] = 'POST'
  node['enctype'] = 'application/x-www-form-urlencoded'

  form = Form.new(node)
  query.each { |k,v|
    if v.is_a?(IO)
      form.enctype = 'multipart/form-data'
      ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
      ul.file_data = v.read
      form.file_uploads << ul
    else
      form.fields << Form::Field.new(k.to_s,v)
    end
  }
  post_form(url, form, headers)
end

#post_connect_hooksObject



183
184
185
# File 'lib/mechanize.rb', line 183

def post_connect_hooks
  @post_connect_hook.hooks
end

#post_page_hooksObject



187
188
189
# File 'lib/mechanize.rb', line 187

def post_page_hooks
  @post_page_hook.hooks
end

#pre_connect_hooksObject



179
180
181
# File 'lib/mechanize.rb', line 179

def pre_connect_hooks
  @pre_connect_hook.hooks
end

#pretty_print(q) ⇒ Object



5
6
7
8
9
10
11
12
# File 'lib/mechanize/inspect.rb', line 5

def pretty_print(q)
  q.object_group(self) {
    q.breakable
    q.pp cookie_jar
    q.breakable
    q.pp current_page
  }
end

#put(url, entity, options = {}) ⇒ Object

PUT to url with entity, and setting options:

put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})


263
264
265
# File 'lib/mechanize.rb', line 263

def put(url, entity, options = {})
  request_with_entity(:put, url, entity, options)
end

#request_with_entity(verb, url, entity, options = {}) ⇒ Object



374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
# File 'lib/mechanize.rb', line 374

def request_with_entity(verb, url, entity, options={})
  cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})

  options = {
    :uri      => url,
    :referer  => cur_page,
    :headers  => {},
  }.update(options)

  headers = {
    'Content-Type' => 'application/octet-stream',
    'Content-Length' => entity.size.to_s,
  }.update(options[:headers])

  options.update({
                   :verb => verb,
                   :params => [entity],
                   :headers => headers,
                 })

  page = fetch_page(options)
  add_to_history(page)
  page
end

#set_proxy(addr, port, user = nil, pass = nil) ⇒ Object

Sets the proxy address, port, user, and password addr should be a host, with no “http://”



193
194
195
# File 'lib/mechanize.rb', line 193

def set_proxy(addr, port, user = nil, pass = nil)
  @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
end

#submit(form, button = nil, headers = {}) ⇒ Object

Submit a form with an optional button. Without a button:

page = agent.get('http://example.com')
agent.submit(page.forms.first)

With a button

agent.submit(page.forms.first, page.forms.first.buttons.first)


358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
# File 'lib/mechanize.rb', line 358

def submit(form, button=nil, headers={})
  form.add_button_to_query(button) if button
  case form.method.upcase
  when 'POST'
    post_form(form.action, form, headers)
  when 'GET'
    get(  :url      => form.action.gsub(/\?[^\?]*$/, ''),
          :params   => form.build_query,
          :headers  => headers,
          :referer  => form.page
          )
  else
    raise "unsupported method: #{form.method.upcase}"
  end
end

#transactObject

Runs given block, then resets the page history as it was before. self is given as a parameter to the block. Returns the value of the block.



422
423
424
425
426
427
428
429
# File 'lib/mechanize.rb', line 422

def transact
  history_backup = @history.dup
  begin
    yield self
  ensure
    @history = history_backup
  end
end

#user_agent_alias=(al) ⇒ Object

Set the user agent for the Mechanize object. See AGENT_ALIASES



199
200
201
# File 'lib/mechanize.rb', line 199

def user_agent_alias=(al)
  self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
end

#visited?(url) ⇒ Boolean

Returns whether or not a url has been visited

Returns:

  • (Boolean)


405
406
407
# File 'lib/mechanize.rb', line 405

def visited?(url)
  ! visited_page(url).nil?
end

#visited_page(url) ⇒ Object

Returns a visited page for the url passed in, otherwise nil agent.visited_page(-1) is agent.history



411
412
413
414
415
416
417
418
# File 'lib/mechanize.rb', line 411

def visited_page(url)
  if url.kind_of?(Integer)
    history[url]
  else
    url = url.href if url.respond_to?(:href)
    @history.visited_page(resolve(url))
  end
end