Class: WWW::Mechanize

Inherits:
Object
  • Object
show all
Defined in:
lib/www/mechanize.rb,
lib/www/mechanize/file.rb,
lib/www/mechanize/form.rb,
lib/www/mechanize/page.rb,
lib/www/mechanize/util.rb,
lib/www/mechanize/chain.rb,
lib/www/mechanize/cookie.rb,
lib/www/mechanize/headers.rb,
lib/www/mechanize/history.rb,
lib/www/mechanize/inspect.rb,
lib/www/mechanize/page/base.rb,
lib/www/mechanize/page/link.rb,
lib/www/mechanize/page/meta.rb,
lib/www/mechanize/cookie_jar.rb,
lib/www/mechanize/file_saver.rb,
lib/www/mechanize/form/field.rb,
lib/www/mechanize/page/frame.rb,
lib/www/mechanize/form/button.rb,
lib/www/mechanize/form/option.rb,
lib/www/mechanize/monkey_patch.rb,
lib/www/mechanize/file_response.rb,
lib/www/mechanize/form/check_box.rb,
lib/www/mechanize/form/file_upload.rb,
lib/www/mechanize/form/select_list.rb,
lib/www/mechanize/form/image_button.rb,
lib/www/mechanize/form/radio_button.rb,
lib/www/mechanize/pluggable_parsers.rb,
lib/www/mechanize/chain/auth_headers.rb,
lib/www/mechanize/chain/ssl_resolver.rb,
lib/www/mechanize/chain/uri_resolver.rb,
lib/www/mechanize/content_type_error.rb,
lib/www/mechanize/response_code_error.rb,
lib/www/mechanize/chain/custom_headers.rb,
lib/www/mechanize/chain/header_resolver.rb,
lib/www/mechanize/chain/response_reader.rb,
lib/www/mechanize/chain/pre_connect_hook.rb,
lib/www/mechanize/chain/request_resolver.rb,
lib/www/mechanize/form/multi_select_list.rb,
lib/www/mechanize/chain/parameter_resolver.rb,
lib/www/mechanize/unsupported_scheme_error.rb,
lib/www/mechanize/chain/connection_resolver.rb,
lib/www/mechanize/chain/response_body_parser.rb,
lib/www/mechanize/chain/body_decoding_handler.rb,
lib/www/mechanize/redirect_limit_reached_error.rb,
lib/www/mechanize/chain/response_header_handler.rb,
lib/www/mechanize/redirect_not_get_or_head_error.rb

Overview

:stopdoc:

Defined Under Namespace

Classes: Chain, ContentTypeError, Cookie, CookieJar, File, FileResponse, FileSaver, Form, Headers, History, Page, PluggableParser, RedirectLimitReachedError, RedirectNotGetOrHeadError, ResponseCodeError, UnsupportedSchemeError, Util

Constant Summary collapse

VERSION =

The version of Mechanize you are using.

'0.9.3'
AGENT_ALIASES =

User Agent aliases

{
  'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
  'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
  'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
  'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
  'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
  'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
  'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
  'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize {|_self| ... } ⇒ Mechanize

Returns a new instance of Mechanize.

Yields:

  • (_self)

Yield Parameters:



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/www/mechanize.rb', line 100

def initialize
  # attr_accessors
  @cookie_jar     = CookieJar.new
  @log            = nil
  @open_timeout   = nil
  @read_timeout   = nil
  @user_agent     = AGENT_ALIASES['Mechanize']
  @watch_for_set  = nil
  @history_added  = nil
  @ca_file        = nil # OpenSSL server certificate file

  # callback for OpenSSL errors while verifying the server certificate
  # chain, can be used for debugging or to ignore errors by always
  # returning _true_
  @verify_callback = nil
  @cert           = nil # OpenSSL Certificate
  @key            = nil # OpenSSL Private Key
  @pass           = nil # OpenSSL Password
  @redirect_ok    = true # Should we follow redirects?

  # attr_readers
  @history        = WWW::Mechanize::History.new
  @pluggable_parser = PluggableParser.new

  # Auth variables
  @user           = nil # Auth User
  @password       = nil # Auth Password
  @digest         = nil # DigestAuth Digest
  @auth_hash      = {}  # Keep track of urls for sending auth
  @request_headers= {}  # A hash of request headers to be used

  # Proxy settings
  @proxy_addr     = nil
  @proxy_pass     = nil
  @proxy_port     = nil
  @proxy_user     = nil

  @conditional_requests = true

  @follow_meta_refresh  = false
  @redirection_limit    = 20

  # Connection Cache & Keep alive
  @connection_cache = {}
  @keep_alive_time  = 300
  @keep_alive       = true

  @scheme_handlers  = Hash.new { |h,k|
    h[k] = lambda { |link, page|
      raise UnsupportedSchemeError.new(k)
    }
  }
  @scheme_handlers['http']      = lambda { |link, page| link }
  @scheme_handlers['https']     = @scheme_handlers['http']
  @scheme_handlers['relative']  = @scheme_handlers['http']
  @scheme_handlers['file']      = @scheme_handlers['http']

  @pre_connect_hook = Chain::PreConnectHook.new
  @post_connect_hook = Chain::PostConnectHook.new

  @html_parser = self.class.html_parser

  yield self if block_given?
end

Class Attribute Details

.html_parserObject

Returns the value of attribute html_parser.



98
99
100
# File 'lib/www/mechanize.rb', line 98

def html_parser
  @html_parser
end

.logObject

Returns the value of attribute log.



98
99
100
# File 'lib/www/mechanize.rb', line 98

def log
  @log
end

Instance Attribute Details

#ca_fileObject

Returns the value of attribute ca_file.



72
73
74
# File 'lib/www/mechanize.rb', line 72

def ca_file
  @ca_file
end

#certObject

Returns the value of attribute cert.



74
75
76
# File 'lib/www/mechanize.rb', line 74

def cert
  @cert
end

#conditional_requestsObject

Returns the value of attribute conditional_requests.



79
80
81
# File 'lib/www/mechanize.rb', line 79

def conditional_requests
  @conditional_requests
end

Returns the value of attribute cookie_jar.



68
69
70
# File 'lib/www/mechanize.rb', line 68

def cookie_jar
  @cookie_jar
end

#follow_meta_refreshObject

Returns the value of attribute follow_meta_refresh.



80
81
82
# File 'lib/www/mechanize.rb', line 80

def follow_meta_refresh
  @follow_meta_refresh
end

#historyObject (readonly)

Returns the value of attribute history.



92
93
94
# File 'lib/www/mechanize.rb', line 92

def history
  @history
end

#history_addedObject

Returns the value of attribute history_added.



82
83
84
# File 'lib/www/mechanize.rb', line 82

def history_added
  @history_added
end

#html_parserObject

The HTML parser to be used when parsing documents



90
91
92
# File 'lib/www/mechanize.rb', line 90

def html_parser
  @html_parser
end

#keep_aliveObject

Returns the value of attribute keep_alive.



78
79
80
# File 'lib/www/mechanize.rb', line 78

def keep_alive
  @keep_alive
end

#keep_alive_timeObject

Returns the value of attribute keep_alive_time.



77
78
79
# File 'lib/www/mechanize.rb', line 77

def keep_alive_time
  @keep_alive_time
end

#keyObject

Returns the value of attribute key.



73
74
75
# File 'lib/www/mechanize.rb', line 73

def key
  @key
end

#open_timeoutObject

Returns the value of attribute open_timeout.



69
70
71
# File 'lib/www/mechanize.rb', line 69

def open_timeout
  @open_timeout
end

#passObject

Returns the value of attribute pass.



75
76
77
# File 'lib/www/mechanize.rb', line 75

def pass
  @pass
end

#pluggable_parserObject (readonly)

Returns the value of attribute pluggable_parser.



93
94
95
# File 'lib/www/mechanize.rb', line 93

def pluggable_parser
  @pluggable_parser
end

#read_timeoutObject

Returns the value of attribute read_timeout.



69
70
71
# File 'lib/www/mechanize.rb', line 69

def read_timeout
  @read_timeout
end

#redirect_okObject Also known as: follow_redirect?

Returns the value of attribute redirect_ok.



76
77
78
# File 'lib/www/mechanize.rb', line 76

def redirect_ok
  @redirect_ok
end

#redirection_limitObject

Returns the value of attribute redirection_limit.



84
85
86
# File 'lib/www/mechanize.rb', line 84

def redirection_limit
  @redirection_limit
end

#request_headersObject

A hash of custom request headers



87
88
89
# File 'lib/www/mechanize.rb', line 87

def request_headers
  @request_headers
end

#scheme_handlersObject

Returns the value of attribute scheme_handlers.



83
84
85
# File 'lib/www/mechanize.rb', line 83

def scheme_handlers
  @scheme_handlers
end

#user_agentObject

Returns the value of attribute user_agent.



70
71
72
# File 'lib/www/mechanize.rb', line 70

def user_agent
  @user_agent
end

#verify_callbackObject

Returns the value of attribute verify_callback.



81
82
83
# File 'lib/www/mechanize.rb', line 81

def verify_callback
  @verify_callback
end

#watch_for_setObject

Returns the value of attribute watch_for_set.



71
72
73
# File 'lib/www/mechanize.rb', line 71

def watch_for_set
  @watch_for_set
end

Instance Method Details

#auth(user, password) ⇒ Object Also known as: basic_auth

Sets the user and password to be used for authentication.



196
197
198
199
# File 'lib/www/mechanize.rb', line 196

def auth(user, password)
  @user       = user
  @password   = password
end

#backObject

Equivalent to the browser back button. Returns the most recent page visited.



299
300
301
# File 'lib/www/mechanize.rb', line 299

def back
  @history.pop
end

#click(link) ⇒ Object

Clicks the WWW::Mechanize::Link object passed in and returns the page fetched.



290
291
292
293
294
295
# File 'lib/www/mechanize.rb', line 290

def click(link)
  referer = link.page rescue referer = nil
  href = link.respond_to?(:href) ? link.href :
    (link['href'] || link['src'])
  get(:url => href, :referer => (referer || current_page()))
end

#cookiesObject

Returns a list of cookies stored in the cookie jar.



191
192
193
# File 'lib/www/mechanize.rb', line 191

def cookies
  @cookie_jar.to_a
end

#current_pageObject Also known as: page

Returns the current page loaded by Mechanize



387
388
389
# File 'lib/www/mechanize.rb', line 387

def current_page
  @history.last
end

#delete(url, query_params = {}, options = {}) ⇒ Object

DELETE to url with query_params, and setting options:

delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})


259
260
261
262
263
# File 'lib/www/mechanize.rb', line 259

def delete(url, query_params = {}, options = {})
  page = head(url, query_params, options.merge({:verb => :delete}))
  add_to_history(page)
  page
end

#get(options, parameters = [], referer = nil) {|page| ... } ⇒ Object

Fetches the URL passed in and returns a page.

Yields:



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/www/mechanize.rb', line 203

def get(options, parameters = [], referer = nil)
  unless options.is_a? Hash
    url = options
    unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
      referer = parameters
      parameters = []
    end
  else
    raise ArgumentError.new("url must be specified") unless url = options[:url]
    parameters = options[:params] || []
    referer = options[:referer]
    headers = options[:headers]
  end

  unless referer
    if url.to_s =~ /^http/
      referer = Page.new(nil, {'content-type'=>'text/html'})
    else
      referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
    end
  end

  # FIXME: Huge hack so that using a URI as a referer works.  I need to
  # refactor everything to pass around URIs but still support
  # WWW::Mechanize::Page#base
  unless referer.is_a?(WWW::Mechanize::File)
    referer = referer.is_a?(String) ?
      Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
      Page.new(referer, {'content-type' => 'text/html'})
  end

  # fetch the page
  page = fetch_page(  :uri      => url,
                      :referer  => referer,
                      :headers  => headers || {},
                      :params   => parameters
                   )
  add_to_history(page)
  yield page if block_given?
  page
end

#get_file(url) ⇒ Object

Fetch a file and return the contents of the file.



284
285
286
# File 'lib/www/mechanize.rb', line 284

def get_file(url)
  get(url).body
end

#head(url, query_params = {}, options = {}) {|page| ... } ⇒ Object

HEAD to url with query_params, and setting options:

head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})

Yields:



270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/www/mechanize.rb', line 270

def head(url, query_params = {}, options = {})
  options = {
    :uri      => url,
    :headers  => {},
    :params   => query_params,
    :verb     => :head
  }.merge(options)
  # fetch the page
  page = fetch_page(options)
  yield page if block_given?
  page
end

#logObject



168
# File 'lib/www/mechanize.rb', line 168

def log; self.class.log end

#log=(l) ⇒ Object



167
# File 'lib/www/mechanize.rb', line 167

def log=(l); self.class.log = l end

#max_historyObject



166
# File 'lib/www/mechanize.rb', line 166

def max_history; @history.max_size end

#max_history=(length) ⇒ Object



165
# File 'lib/www/mechanize.rb', line 165

def max_history=(length); @history.max_size = length end

#post(url, query = {}, headers = {}) ⇒ Object

Posts to the given URL with the request entity. The request entity is specified by either a string, or a list of key-value pairs represented by a hash or an array of arrays.

Examples:

agent.post('http://example.com/', "foo" => "bar")

agent.post('http://example.com/', [ ["foo", "bar"] ])

agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')


313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
# File 'lib/www/mechanize.rb', line 313

def post(url, query={}, headers={})
  if query.is_a?(String)
    return request_with_entity(:post, url, query, :headers => headers)
  end
  node = {}
  # Create a fake form
  class << node
    def search(*args); []; end
  end
  node['method'] = 'POST'
  node['enctype'] = 'application/x-www-form-urlencoded'

  form = Form.new(node)
  query.each { |k,v|
    if v.is_a?(IO)
      form.enctype = 'multipart/form-data'
      ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
      ul.file_data = v.read
      form.file_uploads << ul
    else
      form.fields << Form::Field.new(k.to_s,v)
    end
  }
  post_form(url, form, headers)
end

#post_connect_hooksObject



174
175
176
# File 'lib/www/mechanize.rb', line 174

def post_connect_hooks
  @post_connect_hook.hooks
end

#pre_connect_hooksObject



170
171
172
# File 'lib/www/mechanize.rb', line 170

def pre_connect_hooks
  @pre_connect_hook.hooks
end

#pretty_print(q) ⇒ Object



6
7
8
9
10
11
12
13
# File 'lib/www/mechanize/inspect.rb', line 6

def pretty_print(q)
  q.object_group(self) {
    q.breakable
    q.pp cookie_jar
    q.breakable
    q.pp current_page
  }
end

#put(url, entity, options = {}) ⇒ Object

PUT to url with entity, and setting options:

put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})


250
251
252
# File 'lib/www/mechanize.rb', line 250

def put(url, entity, options = {})
  request_with_entity(:put, url, entity, options)
end

#request_with_entity(verb, url, entity, options = {}) ⇒ Object



361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
# File 'lib/www/mechanize.rb', line 361

def request_with_entity(verb, url, entity, options={})
  cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})

  options = {
    :uri      => url,
    :referer  => cur_page,
    :headers  => {},
  }.update(options)

  headers = {
    'Content-Type' => 'application/octet-stream',
    'Content-Length' => entity.size.to_s,
  }.update(options[:headers])

  options.update({
    :verb => verb,
    :params => [entity],
    :headers => headers,
  })

  page = fetch_page(options)
  add_to_history(page)
  page
end

#set_proxy(addr, port, user = nil, pass = nil) ⇒ Object

Sets the proxy address, port, user, and password addr should be a host, with no “http://”



180
181
182
# File 'lib/www/mechanize.rb', line 180

def set_proxy(addr, port, user = nil, pass = nil)
  @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
end

#submit(form, button = nil, headers = {}) ⇒ Object

Submit a form with an optional button. Without a button:

page = agent.get('http://example.com')
agent.submit(page.forms.first)

With a button

agent.submit(page.forms.first, page.forms.first.buttons.first)


345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
# File 'lib/www/mechanize.rb', line 345

def submit(form, button=nil, headers={})
  form.add_button_to_query(button) if button
  case form.method.upcase
  when 'POST'
    post_form(form.action, form, headers)
  when 'GET'
    get(  :url      => form.action.gsub(/\?[^\?]*$/, ''),
          :params   => form.build_query,
          :headers  => headers,
          :referer  => form.page
       )
  else
    raise "unsupported method: #{form.method.upcase}"
  end
end

#transactObject

Runs given block, then resets the page history as it was before. self is given as a parameter to the block. Returns the value of the block.



406
407
408
409
410
411
412
413
# File 'lib/www/mechanize.rb', line 406

def transact
  history_backup = @history.dup
  begin
    yield self
  ensure
    @history = history_backup
  end
end

#user_agent_alias=(al) ⇒ Object

Set the user agent for the Mechanize object. See AGENT_ALIASES



186
187
188
# File 'lib/www/mechanize.rb', line 186

def user_agent_alias=(al)
  self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
end

#visited?(url) ⇒ Boolean

Returns whether or not a url has been visited

Returns:

  • (Boolean)


392
393
394
# File 'lib/www/mechanize.rb', line 392

def visited?(url)
  ! visited_page(url).nil?
end

#visited_page(url) ⇒ Object

Returns a visited page for the url passed in, otherwise nil



397
398
399
400
401
402
# File 'lib/www/mechanize.rb', line 397

def visited_page(url)
  if url.respond_to? :href
    url = url.href
  end
  @history.visited_page(resolve(url))
end