Class: Mechanize

Inherits:
Object
  • Object
show all
Defined in:
lib/mechanize.rb,
lib/mechanize/file.rb,
lib/mechanize/form.rb,
lib/mechanize/page.rb,
lib/mechanize/util.rb,
lib/mechanize/chain.rb,
lib/mechanize/cookie.rb,
lib/mechanize/headers.rb,
lib/mechanize/history.rb,
lib/mechanize/inspect.rb,
lib/mechanize/page/base.rb,
lib/mechanize/page/link.rb,
lib/mechanize/page/meta.rb,
lib/mechanize/cookie_jar.rb,
lib/mechanize/file_saver.rb,
lib/mechanize/form/field.rb,
lib/mechanize/page/frame.rb,
lib/mechanize/page/image.rb,
lib/mechanize/page/label.rb,
lib/mechanize/form/button.rb,
lib/mechanize/form/option.rb,
lib/mechanize/monkey_patch.rb,
lib/mechanize/chain/handler.rb,
lib/mechanize/file_response.rb,
lib/mechanize/form/check_box.rb,
lib/mechanize/form/file_upload.rb,
lib/mechanize/form/select_list.rb,
lib/mechanize/form/image_button.rb,
lib/mechanize/form/radio_button.rb,
lib/mechanize/pluggable_parsers.rb,
lib/mechanize/chain/auth_headers.rb,
lib/mechanize/chain/ssl_resolver.rb,
lib/mechanize/chain/uri_resolver.rb,
lib/mechanize/content_type_error.rb,
lib/mechanize/response_code_error.rb,
lib/mechanize/chain/custom_headers.rb,
lib/mechanize/chain/header_resolver.rb,
lib/mechanize/chain/response_reader.rb,
lib/mechanize/chain/pre_connect_hook.rb,
lib/mechanize/chain/request_resolver.rb,
lib/mechanize/form/multi_select_list.rb,
lib/mechanize/chain/parameter_resolver.rb,
lib/mechanize/unsupported_scheme_error.rb,
lib/mechanize/chain/connection_resolver.rb,
lib/mechanize/chain/response_body_parser.rb,
lib/mechanize/chain/body_decoding_handler.rb,
lib/mechanize/redirect_limit_reached_error.rb,
lib/mechanize/chain/response_header_handler.rb,
lib/mechanize/redirect_not_get_or_head_error.rb

Overview

:stopdoc:

Defined Under Namespace

Modules: Handler Classes: Chain, ContentTypeError, Cookie, CookieJar, File, FileResponse, FileSaver, Form, Headers, History, Page, PluggableParser, RedirectLimitReachedError, RedirectNotGetOrHeadError, ResponseCodeError, UnsupportedSchemeError, Util

Constant Summary collapse

VERSION =

The version of Mechanize you are using.

'0.9.3'
AGENT_ALIASES =

User Agent aliases

{
  'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
  'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
  'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
  'Mac Safari' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/418 (KHTML, like Gecko) Safari/417.9.3',
  'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.8.0.3) Gecko/20060426 Firefox/1.5.0.3',
  'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
  'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
  'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
  'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
  'Mechanize' => "WWW-Mechanize/#{VERSION} (http://rubyforge.org/projects/mechanize/)"
}

Class Attribute Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize {|_self| ... } ⇒ Mechanize

Returns a new instance of Mechanize.

Yields:

  • (_self)

Yield Parameters:

  • _self (Mechanize)

    the object that the method was called on



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/mechanize.rb', line 99

def initialize
  # attr_accessors
  @cookie_jar     = CookieJar.new
  @log            = nil
  @open_timeout   = nil
  @read_timeout   = nil
  @user_agent     = AGENT_ALIASES['Mechanize']
  @watch_for_set  = nil
  @history_added  = nil
  @ca_file        = nil # OpenSSL server certificate file

  # callback for OpenSSL errors while verifying the server certificate
  # chain, can be used for debugging or to ignore errors by always
  # returning _true_
  @verify_callback = nil
  @cert           = nil # OpenSSL Certificate
  @key            = nil # OpenSSL Private Key
  @pass           = nil # OpenSSL Password
  @redirect_ok    = true # Should we follow redirects?

  # attr_readers
  @history        = Mechanize::History.new
  @pluggable_parser = PluggableParser.new

  # Auth variables
  @user           = nil # Auth User
  @password       = nil # Auth Password
  @digest         = nil # DigestAuth Digest
  @auth_hash      = {}  # Keep track of urls for sending auth
  @request_headers= {}  # A hash of request headers to be used

  # Proxy settings
  @proxy_addr     = nil
  @proxy_pass     = nil
  @proxy_port     = nil
  @proxy_user     = nil

  @conditional_requests = true

  @follow_meta_refresh  = false
  @redirection_limit    = 20

  # Connection Cache & Keep alive
  @connection_cache = {}
  @keep_alive_time  = 300
  @keep_alive       = true

  @scheme_handlers  = Hash.new { |h,k|
    h[k] = lambda { |link, page|
      raise UnsupportedSchemeError.new(k)
    }
  }
  @scheme_handlers['http']      = lambda { |link, page| link }
  @scheme_handlers['https']     = @scheme_handlers['http']
  @scheme_handlers['relative']  = @scheme_handlers['http']
  @scheme_handlers['file']      = @scheme_handlers['http']

  @pre_connect_hook = Chain::PreConnectHook.new
  @post_connect_hook = Chain::PostConnectHook.new

  @html_parser = self.class.html_parser

  yield self if block_given?
end

Class Attribute Details

.html_parserObject

Returns the value of attribute html_parser.



97
98
99
# File 'lib/mechanize.rb', line 97

def html_parser
  @html_parser
end

.logObject

Returns the value of attribute log.



97
98
99
# File 'lib/mechanize.rb', line 97

def log
  @log
end

Instance Attribute Details

#ca_fileObject

Returns the value of attribute ca_file.



71
72
73
# File 'lib/mechanize.rb', line 71

def ca_file
  @ca_file
end

#certObject

Returns the value of attribute cert.



73
74
75
# File 'lib/mechanize.rb', line 73

def cert
  @cert
end

#conditional_requestsObject

Returns the value of attribute conditional_requests.



78
79
80
# File 'lib/mechanize.rb', line 78

def conditional_requests
  @conditional_requests
end

Returns the value of attribute cookie_jar.



67
68
69
# File 'lib/mechanize.rb', line 67

def cookie_jar
  @cookie_jar
end

#follow_meta_refreshObject

Returns the value of attribute follow_meta_refresh.



79
80
81
# File 'lib/mechanize.rb', line 79

def follow_meta_refresh
  @follow_meta_refresh
end

#historyObject (readonly)

Returns the value of attribute history.



91
92
93
# File 'lib/mechanize.rb', line 91

def history
  @history
end

#history_addedObject

Returns the value of attribute history_added.



81
82
83
# File 'lib/mechanize.rb', line 81

def history_added
  @history_added
end

#html_parserObject

The HTML parser to be used when parsing documents



89
90
91
# File 'lib/mechanize.rb', line 89

def html_parser
  @html_parser
end

#keep_aliveObject

Returns the value of attribute keep_alive.



77
78
79
# File 'lib/mechanize.rb', line 77

def keep_alive
  @keep_alive
end

#keep_alive_timeObject

Returns the value of attribute keep_alive_time.



76
77
78
# File 'lib/mechanize.rb', line 76

def keep_alive_time
  @keep_alive_time
end

#keyObject

Returns the value of attribute key.



72
73
74
# File 'lib/mechanize.rb', line 72

def key
  @key
end

#open_timeoutObject

Returns the value of attribute open_timeout.



68
69
70
# File 'lib/mechanize.rb', line 68

def open_timeout
  @open_timeout
end

#passObject

Returns the value of attribute pass.



74
75
76
# File 'lib/mechanize.rb', line 74

def pass
  @pass
end

#pluggable_parserObject (readonly)

Returns the value of attribute pluggable_parser.



92
93
94
# File 'lib/mechanize.rb', line 92

def pluggable_parser
  @pluggable_parser
end

#read_timeoutObject

Returns the value of attribute read_timeout.



68
69
70
# File 'lib/mechanize.rb', line 68

def read_timeout
  @read_timeout
end

#redirect_okObject Also known as: follow_redirect?

Returns the value of attribute redirect_ok.



75
76
77
# File 'lib/mechanize.rb', line 75

def redirect_ok
  @redirect_ok
end

#redirection_limitObject

Returns the value of attribute redirection_limit.



83
84
85
# File 'lib/mechanize.rb', line 83

def redirection_limit
  @redirection_limit
end

#request_headersObject

A hash of custom request headers



86
87
88
# File 'lib/mechanize.rb', line 86

def request_headers
  @request_headers
end

#scheme_handlersObject

Returns the value of attribute scheme_handlers.



82
83
84
# File 'lib/mechanize.rb', line 82

def scheme_handlers
  @scheme_handlers
end

#user_agentObject

Returns the value of attribute user_agent.



69
70
71
# File 'lib/mechanize.rb', line 69

def user_agent
  @user_agent
end

#verify_callbackObject

Returns the value of attribute verify_callback.



80
81
82
# File 'lib/mechanize.rb', line 80

def verify_callback
  @verify_callback
end

#watch_for_setObject

Returns the value of attribute watch_for_set.



70
71
72
# File 'lib/mechanize.rb', line 70

def watch_for_set
  @watch_for_set
end

Instance Method Details

#auth(user, password) ⇒ Object Also known as: basic_auth

Sets the user and password to be used for authentication.



195
196
197
198
# File 'lib/mechanize.rb', line 195

def auth(user, password)
  @user       = user
  @password   = password
end

#backObject

Equivalent to the browser back button. Returns the most recent page visited.



298
299
300
# File 'lib/mechanize.rb', line 298

def back
  @history.pop
end

#click(link) ⇒ Object

Clicks the Mechanize::Link object passed in and returns the page fetched.



289
290
291
292
293
294
# File 'lib/mechanize.rb', line 289

def click(link)
  referer = link.page rescue referer = nil
  href = link.respond_to?(:href) ? link.href :
    (link['href'] || link['src'])
  get(:url => href, :referer => (referer || current_page()))
end

#cookiesObject

Returns a list of cookies stored in the cookie jar.



190
191
192
# File 'lib/mechanize.rb', line 190

def cookies
  @cookie_jar.to_a
end

#current_pageObject Also known as: page

Returns the current page loaded by Mechanize



386
387
388
# File 'lib/mechanize.rb', line 386

def current_page
  @history.last
end

#delete(url, query_params = {}, options = {}) ⇒ Object

DELETE to url with query_params, and setting options:

delete('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})


258
259
260
261
262
# File 'lib/mechanize.rb', line 258

def delete(url, query_params = {}, options = {})
  page = head(url, query_params, options.merge({:verb => :delete}))
  add_to_history(page)
  page
end

#get(options, parameters = [], referer = nil) {|page| ... } ⇒ Object

Fetches the URL passed in and returns a page.

Yields:



202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
# File 'lib/mechanize.rb', line 202

def get(options, parameters = [], referer = nil)
  unless options.is_a? Hash
    url = options
    unless parameters.respond_to?(:each) # FIXME: Remove this in 0.8.0
      referer = parameters
      parameters = []
    end
  else
    raise ArgumentError.new("url must be specified") unless url = options[:url]
    parameters = options[:params] || []
    referer = options[:referer]
    headers = options[:headers]
  end

  unless referer
    if url.to_s =~ /^http/
      referer = Page.new(nil, {'content-type'=>'text/html'})
    else
      referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
    end
  end

  # FIXME: Huge hack so that using a URI as a referer works.  I need to
  # refactor everything to pass around URIs but still support
  # Mechanize::Page#base
  unless referer.is_a?(Mechanize::File)
    referer = referer.is_a?(String) ?
    Page.new(URI.parse(referer), {'content-type' => 'text/html'}) :
      Page.new(referer, {'content-type' => 'text/html'})
  end

  # fetch the page
  page = fetch_page(  :uri      => url,
                      :referer  => referer,
                      :headers  => headers || {},
                      :params   => parameters
                      )
  add_to_history(page)
  yield page if block_given?
  page
end

#get_file(url) ⇒ Object

Fetch a file and return the contents of the file.



283
284
285
# File 'lib/mechanize.rb', line 283

def get_file(url)
  get(url).body
end

#head(url, query_params = {}, options = {}) {|page| ... } ⇒ Object

HEAD to url with query_params, and setting options:

head('http://tenderlovemaking.com/', {'q' => 'foo'}, :headers => {})

Yields:



269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/mechanize.rb', line 269

def head(url, query_params = {}, options = {})
  options = {
    :uri      => url,
    :headers  => {},
    :params   => query_params,
    :verb     => :head
  }.merge(options)
  # fetch the page
  page = fetch_page(options)
  yield page if block_given?
  page
end

#logObject



167
# File 'lib/mechanize.rb', line 167

def log; self.class.log end

#log=(l) ⇒ Object



166
# File 'lib/mechanize.rb', line 166

def log=(l); self.class.log = l end

#max_historyObject



165
# File 'lib/mechanize.rb', line 165

def max_history; @history.max_size end

#max_history=(length) ⇒ Object



164
# File 'lib/mechanize.rb', line 164

def max_history=(length); @history.max_size = length end

#post(url, query = {}, headers = {}) ⇒ Object

Posts to the given URL with the request entity. The request entity is specified by either a string, or a list of key-value pairs represented by a hash or an array of arrays.

Examples:

agent.post('http://example.com/', "foo" => "bar")

agent.post('http://example.com/', [ ["foo", "bar"] ])

agent.post('http://example.com/', "<message>hello</message>", 'Content-Type' => 'application/xml')


312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
# File 'lib/mechanize.rb', line 312

def post(url, query={}, headers={})
  if query.is_a?(String)
    return request_with_entity(:post, url, query, :headers => headers)
  end
  node = {}
  # Create a fake form
  class << node
    def search(*args); []; end
  end
  node['method'] = 'POST'
  node['enctype'] = 'application/x-www-form-urlencoded'

  form = Form.new(node)
  query.each { |k,v|
    if v.is_a?(IO)
      form.enctype = 'multipart/form-data'
      ul = Form::FileUpload.new(k.to_s,::File.basename(v.path))
      ul.file_data = v.read
      form.file_uploads << ul
    else
      form.fields << Form::Field.new(k.to_s,v)
    end
  }
  post_form(url, form, headers)
end

#post_connect_hooksObject



173
174
175
# File 'lib/mechanize.rb', line 173

def post_connect_hooks
  @post_connect_hook.hooks
end

#pre_connect_hooksObject



169
170
171
# File 'lib/mechanize.rb', line 169

def pre_connect_hooks
  @pre_connect_hook.hooks
end

#pretty_print(q) ⇒ Object



5
6
7
8
9
10
11
12
# File 'lib/mechanize/inspect.rb', line 5

def pretty_print(q)
  q.object_group(self) {
    q.breakable
    q.pp cookie_jar
    q.breakable
    q.pp current_page
  }
end

#put(url, entity, options = {}) ⇒ Object

PUT to url with entity, and setting options:

put('http://tenderlovemaking.com/', 'new content', :headers => {'Content-Type' => 'text/plain'})


249
250
251
# File 'lib/mechanize.rb', line 249

def put(url, entity, options = {})
  request_with_entity(:put, url, entity, options)
end

#request_with_entity(verb, url, entity, options = {}) ⇒ Object



360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
# File 'lib/mechanize.rb', line 360

def request_with_entity(verb, url, entity, options={})
  cur_page = current_page || Page.new( nil, {'content-type'=>'text/html'})

  options = {
    :uri      => url,
    :referer  => cur_page,
    :headers  => {},
  }.update(options)

  headers = {
    'Content-Type' => 'application/octet-stream',
    'Content-Length' => entity.size.to_s,
  }.update(options[:headers])

  options.update({
                   :verb => verb,
                   :params => [entity],
                   :headers => headers,
                 })

  page = fetch_page(options)
  add_to_history(page)
  page
end

#set_proxy(addr, port, user = nil, pass = nil) ⇒ Object

Sets the proxy address, port, user, and password addr should be a host, with no “http://”



179
180
181
# File 'lib/mechanize.rb', line 179

def set_proxy(addr, port, user = nil, pass = nil)
  @proxy_addr, @proxy_port, @proxy_user, @proxy_pass = addr, port, user, pass
end

#submit(form, button = nil, headers = {}) ⇒ Object

Submit a form with an optional button. Without a button:

page = agent.get('http://example.com')
agent.submit(page.forms.first)

With a button

agent.submit(page.forms.first, page.forms.first.buttons.first)


344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# File 'lib/mechanize.rb', line 344

def submit(form, button=nil, headers={})
  form.add_button_to_query(button) if button
  case form.method.upcase
  when 'POST'
    post_form(form.action, form, headers)
  when 'GET'
    get(  :url      => form.action.gsub(/\?[^\?]*$/, ''),
          :params   => form.build_query,
          :headers  => headers,
          :referer  => form.page
          )
  else
    raise "unsupported method: #{form.method.upcase}"
  end
end

#transactObject

Runs given block, then resets the page history as it was before. self is given as a parameter to the block. Returns the value of the block.



405
406
407
408
409
410
411
412
# File 'lib/mechanize.rb', line 405

def transact
  history_backup = @history.dup
  begin
    yield self
  ensure
    @history = history_backup
  end
end

#user_agent_alias=(al) ⇒ Object

Set the user agent for the Mechanize object. See AGENT_ALIASES



185
186
187
# File 'lib/mechanize.rb', line 185

def user_agent_alias=(al)
  self.user_agent = AGENT_ALIASES[al] || raise("unknown agent alias")
end

#visited?(url) ⇒ Boolean

Returns whether or not a url has been visited

Returns:

  • (Boolean)


391
392
393
# File 'lib/mechanize.rb', line 391

def visited?(url)
  ! visited_page(url).nil?
end

#visited_page(url) ⇒ Object

Returns a visited page for the url passed in, otherwise nil



396
397
398
399
400
401
# File 'lib/mechanize.rb', line 396

def visited_page(url)
  if url.respond_to? :href
    url = url.href
  end
  @history.visited_page(resolve(url))
end