Class: WWW::Mechanize

Inherits:
Object
  • Object
show all
Defined in:
lib/www/mechanize.rb,
lib/www/mechanize/file.rb,
lib/www/mechanize/form.rb,
lib/www/mechanize/page.rb,
lib/www/mechanize/util.rb,
lib/www/mechanize/chain.rb,
lib/www/mechanize/cookie.rb,
lib/www/mechanize/headers.rb,
lib/www/mechanize/history.rb,
lib/www/mechanize/inspect.rb,
lib/www/mechanize/page/base.rb,
lib/www/mechanize/page/link.rb,
lib/www/mechanize/page/meta.rb,
lib/www/mechanize/cookie_jar.rb,
lib/www/mechanize/file_saver.rb,
lib/www/mechanize/form/field.rb,
lib/www/mechanize/page/frame.rb,
lib/www/mechanize/form/button.rb,
lib/www/mechanize/form/option.rb,
lib/www/mechanize/monkey_patch.rb,
lib/www/mechanize/file_response.rb,
lib/www/mechanize/form/check_box.rb,
lib/www/mechanize/form/file_upload.rb,
lib/www/mechanize/form/select_list.rb,
lib/www/mechanize/form/image_button.rb,
lib/www/mechanize/form/radio_button.rb,
lib/www/mechanize/pluggable_parsers.rb,
lib/www/mechanize/chain/auth_headers.rb,
lib/www/mechanize/chain/ssl_resolver.rb,
lib/www/mechanize/chain/uri_resolver.rb,
lib/www/mechanize/content_type_error.rb,
lib/www/mechanize/response_code_error.rb,
lib/www/mechanize/chain/custom_headers.rb,
lib/www/mechanize/chain/header_resolver.rb,
lib/www/mechanize/chain/response_reader.rb,
lib/www/mechanize/chain/pre_connect_hook.rb,
lib/www/mechanize/chain/request_resolver.rb,
lib/www/mechanize/form/multi_select_list.rb,
lib/www/mechanize/chain/parameter_resolver.rb,
lib/www/mechanize/unsupported_scheme_error.rb,
lib/www/mechanize/chain/connection_resolver.rb,
lib/www/mechanize/chain/response_body_parser.rb,
lib/www/mechanize/chain/body_decoding_handler.rb,
lib/www/mechanize/redirect_limit_reached_error.rb,
lib/www/mechanize/chain/response_header_handler.rb,
lib/www/mechanize/redirect_not_get_or_head_error.rb

Overview

:stopdoc:

Defined Under Namespace

Classes: Chain, ContentTypeError, Cookie, CookieJar, File, FileResponse, FileSaver, Form, Headers, History, Page, PluggableParser, RedirectLimitReachedError, RedirectNotGetOrHeadError, ResponseCodeError, UnsupportedSchemeError, Util

Constant Summary collapse

VERSION =

The version of Mechanize you are using.

'0.9.2'
AGENT_ALIASES =

User Agent aliases

{
  'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
  'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
  'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
  'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
  'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
  'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
  'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
  'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize {|_self| ... } ⇒ Mechanize

Returns a new instance of Mechanize.

Yields:

  • (_self)

Yield Parameters:



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
# File 'lib/www/mechanize.rb', line 96

def initialize
  # attr_accessors
  @cookie_jar     = CookieJar.new
  @log            = nil
  @open_timeout   = nil
  @read_timeout   = nil
  @user_agent     = AGENT_ALIASES['Mechanize']
  @watch_for_set  = nil
  @history_added  = nil
  @ca_file        = nil # OpenSSL server certificate file

  # callback for OpenSSL errors while verifying the server certificate
  # chain, can be used for debugging or to ignore errors by always
  # returning _true_
  @verify_callback = nil
  @cert           = nil # OpenSSL Certificate
  @key            = nil # OpenSSL Private Key
  @pass           = nil # OpenSSL Password
  @redirect_ok    = true # Should we follow redirects?
  
  # attr_readers
  @history        = WWW::Mechanize::History.new
  @pluggable_parser = PluggableParser.new
  
  # Auth variables
  @user           = nil # Auth User
  @password       = nil # Auth Password
  @digest         = nil # DigestAuth Digest
  @auth_hash      = {}  # Keep track of urls for sending auth
  
  # Proxy settings
  @proxy_addr     = nil
  @proxy_pass     = nil
  @proxy_port     = nil
  @proxy_user     = nil
  
  @conditional_requests = true
  
  @follow_meta_refresh  = false
  @redirection_limit    = 20
  
  # Connection Cache & Keep alive
  @connection_cache = {}
  @keep_alive_time  = 300
  @keep_alive       = true

  @scheme_handlers  = Hash.new { |h,k|
    h[k] = lambda { |link, page|
      raise UnsupportedSchemeError.new(k)
    }
  }
  @scheme_handlers['http']      = lambda { |link, page| link }
  @scheme_handlers['https']     = @scheme_handlers['http']
  @scheme_handlers['relative']  = @scheme_handlers['http']
  @scheme_handlers['file']      = @scheme_handlers['http']

  @pre_connect_hook = Chain::PreConnectHook.new
  @post_connect_hook = Chain::PostConnectHook.new
  
  yield self if block_given?
end

Class Attribute Details

.html_parserObject

Returns the value of attribute html_parser.



94
95
96
# File 'lib/www/mechanize.rb', line 94

def html_parser
  @html_parser
end

.logObject

Returns the value of attribute log.



94
95
96
# File 'lib/www/mechanize.rb', line 94

def log
  @log
end

Instance Attribute Details

#ca_fileObject

Returns the value of attribute ca_file.



74
75
76
# File 'lib/www/mechanize.rb', line 74

def ca_file
  @ca_file
end

#certObject

Returns the value of attribute cert.



76
77
78
# File 'lib/www/mechanize.rb', line 76

def cert
  @cert
end

#conditional_requestsObject

Returns the value of attribute conditional_requests.



81
82
83
# File 'lib/www/mechanize.rb', line 81

def conditional_requests
  @conditional_requests
end

Returns the value of attribute cookie_jar.



70
71
72
# File 'lib/www/mechanize.rb', line 70

def cookie_jar
  @cookie_jar
end

#follow_meta_refreshObject

Returns the value of attribute follow_meta_refresh.



82
83
84
# File 'lib/www/mechanize.rb', line 82

def follow_meta_refresh
  @follow_meta_refresh
end

#historyObject (readonly)

Returns the value of attribute history.



88
89
90
# File 'lib/www/mechanize.rb', line 88

def history
  @history
end

#history_addedObject

Returns the value of attribute history_added.



84
85
86
# File 'lib/www/mechanize.rb', line 84

def history_added
  @history_added
end

#keep_aliveObject

Returns the value of attribute keep_alive.



80
81
82
# File 'lib/www/mechanize.rb', line 80

def keep_alive
  @keep_alive
end

#keep_alive_timeObject

Returns the value of attribute keep_alive_time.



79
80
81
# File 'lib/www/mechanize.rb', line 79

def keep_alive_time
  @keep_alive_time
end

#keyObject

Returns the value of attribute key.



75
76
77
# File 'lib/www/mechanize.rb', line 75

def key
  @key
end

#open_timeoutObject

Returns the value of attribute open_timeout.



71
72
73
# File 'lib/www/mechanize.rb', line 71

def open_timeout
  @open_timeout
end

#passObject

Returns the value of attribute pass.



77
78
79
# File 'lib/www/mechanize.rb', line 77

def pass
  @pass
end

#pluggable_parserObject (readonly)

Returns the value of attribute pluggable_parser.



89
90
91
# File 'lib/www/mechanize.rb', line 89

def pluggable_parser
  @pluggable_parser
end

#read_timeoutObject

Returns the value of attribute read_timeout.



71
72
73
# File 'lib/www/mechanize.rb', line 71

def read_timeout
  @read_timeout
end

#redirect_okObject Also known as: follow_redirect?

Returns the value of attribute redirect_ok.



78
79
80
# File 'lib/www/mechanize.rb', line 78

def redirect_ok
  @redirect_ok
end

#redirection_limitObject

Returns the value of attribute redirection_limit.



86
87
88
# File 'lib/www/mechanize.rb', line 86

def redirection_limit
  @redirection_limit
end

#scheme_handlersObject

Returns the value of attribute scheme_handlers.



85
86
87
# File 'lib/www/mechanize.rb', line 85

def scheme_handlers
  @scheme_handlers
end

#user_agentObject

Returns the value of attribute user_agent.



72
73
74
# File 'lib/www/mechanize.rb', line 72

def user_agent
  @user_agent
end

#verify_callbackObject

Returns the value of attribute verify_callback.



83
84
85
# File 'lib/www/mechanize.rb', line 83

def verify_callback
  @verify_callback
end

#watch_for_setObject

Returns the value of attribute watch_for_set.



73
74
75
# File 'lib/www/mechanize.rb', line 73

def watch_for_set
  @watch_for_set
end

Instance Method Details

#auth(user, password) ⇒ Object Also known as: basic_auth

Sets the user and password to be used for authentication.



189
190
191
192
# File 'lib/www/mechanize.rb', line 189

def auth(user, password)
  @user       = user
  @password   = password
end

#backObject

Equivalent to the browser back button. Returns the most recent page visited.



294
295
296
# File 'lib/www/mechanize.rb', line 294

def back
  @history.pop
end

#click(link) ⇒ Object

Clicks the WWW::Mechanize::Link object passed in and returns the page fetched.



285
286
287
288
289
290
# File 'lib/www/mechanize.rb', line 285

def click(link)
  referer = link.page rescue referer = nil
  href = link.respond_to?(:href) ? link.href :
    (link['href'] || link['src'])
  get(:url => href, :referer => (referer || current_page()))
end

#cookiesObject

Returns a list of cookies stored in the cookie jar.



184
185
186
# File 'lib/www/mechanize.rb', line 184

def cookies
  @cookie_jar.to_a
end

#current_pageObject Also known as: page

Returns the current page loaded by Mechanize



350
351
352
# File 'lib/www/mechanize.rb', line 350

def current_page
  @history.last
end

#delete(url, query_params = {}, options = {}) ⇒ Object

DELETE to url with query_params, and setting options:

delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})


254
255
256
257
258
# File 'lib/www/mechanize.rb', line 254

def delete(url, query_params = {}, options = {})
  page = head(url, query_params, options.merge({:verb => :delete}))
  add_to_history(page)
  page
end

#get(options, parameters = [], referer = nil) {|page| ... } ⇒ Object

Fetches the URL passed in and returns a page.

Yields:



196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/www/mechanize.rb', line 196

def get(options, parameters = [], referer = nil)
  unless options.is_a? Hash
    url = options
    unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
      referer = parameters
      parameters = []
    end
  else
    raise ArgumentError.new("url must be specified") unless url = options[:url]
    parameters = options[:params] || []
    referer = options[:referer]
    headers = options[:headers]
  end

  unless referer
    if url.to_s =~ /^http/
      referer = Page.new(nil, {'content-type'=>'text/html'})
    else
      referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
    end
  end

  # FIXME: Huge hack so that using a URI as a referer works.  I need to
  # refactor everything to pass around URIs but still support
  # WWW::Mechanize::Page#base
  unless referer.is_a?(WWW::Mechanize::File)
    referer = referer.is_a?(String) ?
      Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
      Page.new(referer, {'content-type' => 'text/html'})
  end

  # fetch the page
  page = fetch_page(  :uri      => url,
                      :referer  => referer,
                      :headers  => headers || {},
                      :params   => parameters
                   )
  add_to_history(page)
  yield page if block_given?
  page
end

#get_file(url) ⇒ Object

Fetch a file and return the contents of the file.



279
280
281
# File 'lib/www/mechanize.rb', line 279

def get_file(url)
  get(url).body
end

#head(url, query_params = {}, options = {}) {|page| ... } ⇒ Object

HEAD to url with query_params, and setting options:

head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})

Yields:



265
266
267
268
269
270
271
272
273
274
275
276
# File 'lib/www/mechanize.rb', line 265

def head(url, query_params = {}, options = {})
  options = {
    :uri      => url,
    :headers  => {},
    :params   => query_params,
    :verb     => :head
  }.merge(options)
  # fetch the page
  page = fetch_page(options)
  yield page if block_given?
  page
end

#logObject



161
# File 'lib/www/mechanize.rb', line 161

def log; self.class.log end

#log=(l) ⇒ Object



160
# File 'lib/www/mechanize.rb', line 160

def log=(l); self.class.log = l end

#max_historyObject



159
# File 'lib/www/mechanize.rb', line 159

def max_history; @history.max_size end

#max_history=(length) ⇒ Object



158
# File 'lib/www/mechanize.rb', line 158

def max_history=(length); @history.max_size = length end

#post(url, query = {}) ⇒ Object

Posts to the given URL wht the query parameters passed in. Query parameters can be passed as a hash, or as an array of arrays. Example:

agent.post('http://example.com/', "foo" => "bar")

or

agent.post('http://example.com/', [ ["foo", "bar"] ])


304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
# File 'lib/www/mechanize.rb', line 304

def post(url, query={})
  node = {}
  # Create a fake form
  class << node
    def search(*args); []; end
  end
  node['method'] = 'POST'
  node['enctype'] = 'application/x-www-form-urlencoded'
  
  form = Form.new(node)
  query.each { |k,v|
    if v.is_a?(IO)
      form.enctype = 'multipart/form-data'
      ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
      ul.file_data = v.read
      form.file_uploads << ul
    else
      form.fields << Form::Field.new(k.to_s,v)
    end
  }
  post_form(url, form)
end

#post_connect_hooksObject



167
168
169
# File 'lib/www/mechanize.rb', line 167

def post_connect_hooks
  @post_connect_hook.hooks
end

#pre_connect_hooksObject



163
164
165
# File 'lib/www/mechanize.rb', line 163

def pre_connect_hooks
  @pre_connect_hook.hooks
end

#pretty_print(q) ⇒ Object



6
7
8
9
10
11
12
13
# File 'lib/www/mechanize/inspect.rb', line 6

def pretty_print(q)
  q.object_group(self) {
    q.breakable
    q.pp cookie_jar
    q.breakable
    q.pp current_page
  }
end

#put(url, query_params = {}, options = {}) ⇒ Object

PUT to url with query_params, and setting options:

put('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})


243
244
245
246
247
# File 'lib/www/mechanize.rb', line 243

def put(url, query_params = {}, options = {})
  page = head(url, query_params, options.merge({:verb => :put}))
  add_to_history(page)
  page
end

#set_proxy(addr, port, user = nil, pass = nil) ⇒ Object

Sets the proxy address, port, user, and password addr should be a host, with no “http://”



173
174
175
# File 'lib/www/mechanize.rb', line 173

def set_proxy(addr, port, user = nil, pass = nil)
  @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
end

#submit(form, button = nil, headers = {}) ⇒ Object

Submit a form with an optional button. Without a button:

page = agent.get('http://example.com')
agent.submit(page.forms.first)

With a button

agent.submit(page.forms.first, page.forms.first.buttons.first)


333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
# File 'lib/www/mechanize.rb', line 333

def submit(form, button=nil, headers={})
  form.add_button_to_query(button) if button
  case form.method.upcase
  when 'POST'
    post_form(form.action, form, headers)
  when 'GET'
    get(  :url      => form.action.gsub(/\?[^\?]*$/, ''),
          :params   => form.build_query,
          :headers  => headers,
          :referer  => form.page
       )
  else
    raise "unsupported method: #{form.method.upcase}"
  end
end

#transactObject

Runs given block, then resets the page history as it was before. self is given as a parameter to the block. Returns the value of the block.



369
370
371
372
373
374
375
376
# File 'lib/www/mechanize.rb', line 369

def transact
  history_backup = @history.dup
  begin
    yield self
  ensure
    @history = history_backup
  end
end

#user_agent_alias=(al) ⇒ Object

Set the user agent for the Mechanize object. See AGENT_ALIASES



179
180
181
# File 'lib/www/mechanize.rb', line 179

def user_agent_alias=(al)
  self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
end

#visited?(url) ⇒ Boolean

Returns whether or not a url has been visited

Returns:

  • (Boolean)


355
356
357
# File 'lib/www/mechanize.rb', line 355

def visited?(url)
  ! visited_page(url).nil?
end

#visited_page(url) ⇒ Object

Returns a visited page for the url passed in, otherwise nil



360
361
362
363
364
365
# File 'lib/www/mechanize.rb', line 360

def visited_page(url)
  if url.respond_to? :href
    url = url.href
  end
  @history.visited_page(resolve(url))
end