Class: Mechanize::Page
- Extended by:
- Forwardable, ElementMatcher
- Defined in:
- lib/mechanize/page.rb,
lib/mechanize/inspect.rb,
lib/mechanize/page/base.rb,
lib/mechanize/page/image.rb,
lib/mechanize/page/label.rb,
lib/mechanize/monkey_patch.rb
Overview
Defined Under Namespace
Classes: Base, Frame, Image, Label, Link, MetaRefresh
Instance Attribute Summary collapse
-
#encodings ⇒ Object
readonly
Possible encodings for this page based on HTTP headers and meta elements.
-
#mech ⇒ Object
Returns the value of attribute mech.
Attributes inherited from File
#body, #code, #filename, #response, #uri
Class Method Summary collapse
- .charset(content_type) ⇒ Object
-
.meta_charset(body) ⇒ Object
Retrieves all charsets from
meta
tags inbody
. -
.meta_content_type(body) ⇒ Object
Retrieves the last
content-type
set by ameta
tag inbody
. - .response_header_charset(response) ⇒ Object
Instance Method Summary collapse
-
#base ⇒ Object
:method: bases_with(criteria).
-
#bases ⇒ Object
Return a list of all base tags.
-
#canonical_uri ⇒ Object
Return the canonical URI for the page if there is a link tag with href=“canonical”.
-
#content_type ⇒ Object
Get the content type.
- #detected_encoding ⇒ Object
- #encoding ⇒ Object
- #encoding=(encoding) ⇒ Object
-
#encoding_error?(parser = nil) ⇒ Boolean
Return whether parser result has errors related to encoding or not.
-
#form ⇒ Object
:method: forms_with(criteria).
-
#forms ⇒ Object
Return a list of all form tags.
-
#frame ⇒ Object
:method: frames_with(criteria).
-
#frames ⇒ Object
Return a list of all frame tags.
-
#iframe ⇒ Object
:method: iframes_with(criteria).
-
#iframes ⇒ Object
Return a list of all iframe tags.
- #image_urls ⇒ Object
-
#images ⇒ Object
Return a list of all img tags.
-
#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page
constructor
A new instance of Page.
-
#labels ⇒ Object
Return a list of all label tags.
- #labels_hash ⇒ Object
-
#link ⇒ Object
:method: links_with(criteria).
-
#links ⇒ Object
Return a list of all link and area tags.
- #meta_charset ⇒ Object
-
#meta_refresh ⇒ Object
Return a list of all meta refresh elements.
- #parser ⇒ Object (also: #root)
- #pretty_print(q) ⇒ Object
- #reset ⇒ Object
- #response_header_charset ⇒ Object
- #title ⇒ Object
Methods included from ElementMatcher
Methods inherited from File
Constructor Details
#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page
Returns a new instance of Page.
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/mechanize/page.rb', line 23 def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) raise Mechanize::ContentTypeError, response['content-type'] unless response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i @meta_content_type = nil @encoding = nil @encodings = [nil] raise 'no' if mech and not Mechanize === mech @mech = mech reset @encodings << Mechanize::Util.detect_charset(body) if body @encodings.concat self.class.response_header_charset(response) if body # Force the encoding to be 8BIT so we can perform regular expressions. # We'll set it to the detected encoding later body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding @encodings.concat self.class. body = self.class. body @meta_content_type = if end @encodings << mech.default_encoding if mech and mech.default_encoding super uri, response, body, code end |
Instance Attribute Details
#encodings ⇒ Object (readonly)
Possible encodings for this page based on HTTP headers and meta elements
21 22 23 |
# File 'lib/mechanize/page.rb', line 21 def encodings @encodings end |
#mech ⇒ Object
Returns the value of attribute mech.
16 17 18 |
# File 'lib/mechanize/page.rb', line 16 def mech @mech end |
Class Method Details
.charset(content_type) ⇒ Object
336 337 338 339 340 |
# File 'lib/mechanize/page.rb', line 336 def self.charset content_type charset = content_type[/charset=([^; ]+)/i, 1] return nil if charset == 'none' charset end |
.meta_charset(body) ⇒ Object
Retrieves all charsets from meta
tags in body
354 355 356 357 358 359 360 361 362 363 364 365 366 367 |
# File 'lib/mechanize/page.rb', line 354 def self. body # HACK use .map body.scan(/<meta .*?>/i).map do || if =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then $2 elsif =~ /http-equiv\s*=\s*(["'])?content-type\1/i then =~ /content=(["'])?(.*?)\1/i m_charset = charset $2 m_charset if m_charset end end.compact end |
.meta_content_type(body) ⇒ Object
Retrieves the last content-type
set by a meta
tag in body
372 373 374 375 376 377 378 379 380 381 382 |
# File 'lib/mechanize/page.rb', line 372 def self. body body.scan(/<meta .*?>/i).reverse.map do || if =~ /http-equiv\s*=\s*(["'])?content-type\1/i then =~ /content=(["'])?(.*?)\1/i return $2 end end nil end |
.response_header_charset(response) ⇒ Object
342 343 344 345 346 347 348 349 |
# File 'lib/mechanize/page.rb', line 342 def self.response_header_charset response charsets = [] response.each do |header, value| next unless value =~ /charset/i charsets << charset(value) end charsets end |
Instance Method Details
#base ⇒ Object
:method: bases_with(criteria)
Find all base tags matching criteria
. Example:
page.bases_with(:href => /foo/).each do |base|
puts base.href
end
217 |
# File 'lib/mechanize/page.rb', line 217 elements_with :base |
#bases ⇒ Object
Return a list of all base tags
288 289 290 291 |
# File 'lib/mechanize/page.rb', line 288 def bases @bases ||= search('base').map { |node| Base.new(node, @mech, self) } end |
#canonical_uri ⇒ Object
Return the canonical URI for the page if there is a link tag with href=“canonical”.
143 144 145 146 147 148 149 150 151 |
# File 'lib/mechanize/page.rb', line 143 def canonical_uri link = at('link[@rel="canonical"][@href]') return unless link href = link['href'] URI href rescue URI::InvalidURIError URI Mechanize::Util.uri_escape href end |
#content_type ⇒ Object
Get the content type
154 155 156 |
# File 'lib/mechanize/page.rb', line 154 def content_type @meta_content_type || response['content-type'] end |
#detected_encoding ⇒ Object
71 72 73 |
# File 'lib/mechanize/page.rb', line 71 def detected_encoding Mechanize::Util.detect_charset(body) end |
#encoding ⇒ Object
91 92 93 |
# File 'lib/mechanize/page.rb', line 91 def encoding parser.respond_to?(:encoding) ? parser.encoding : nil end |
#encoding=(encoding) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/mechanize/page.rb', line 75 def encoding=(encoding) reset @encoding = encoding if @parser parser_encoding = @parser.encoding if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase) # lazy reinitialize the parser with the new encoding @parser = nil end end encoding end |
#encoding_error?(parser = nil) ⇒ Boolean
Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is vaild.
97 98 99 100 101 102 103 104 105 |
# File 'lib/mechanize/page.rb', line 97 def encoding_error?(parser=nil) parser = self.parser unless parser return false if parser.errors.empty? parser.errors.any? do |error| error. =~ /(indicate\ encoding)| (Invalid\ char)| (input\ conversion\ failed)/x end end |
#form ⇒ Object
:method: forms_with(criteria)
Find all forms form matching criteria
. Example:
page.forms_with(:action => '/post/login.php').each do |f|
...
end
181 |
# File 'lib/mechanize/page.rb', line 181 elements_with :form |
#forms ⇒ Object
Return a list of all form tags
267 268 269 270 271 272 273 |
# File 'lib/mechanize/page.rb', line 267 def forms @forms ||= search('form').map do |html_form| form = Mechanize::Form.new(html_form, @mech, self) form.action ||= @uri.to_s form end end |
#frame ⇒ Object
:method: frames_with(criteria)
Find all frame tags matching criteria
. Example:
page.frames_with(:src => /foo/).each do |frame|
p frame.src
end
235 |
# File 'lib/mechanize/page.rb', line 235 elements_with :frame |
#frames ⇒ Object
Return a list of all frame tags
295 296 297 298 |
# File 'lib/mechanize/page.rb', line 295 def frames @frames ||= search('frame').map { |node| Frame.new(node, @mech, self) } end |
#iframe ⇒ Object
:method: iframes_with(criteria)
Find all iframe tags matching criteria
. Example:
page.iframes_with(:src => /foo/).each do |iframe|
p iframe.src
end
253 |
# File 'lib/mechanize/page.rb', line 253 elements_with :iframe |
#iframes ⇒ Object
Return a list of all iframe tags
302 303 304 305 |
# File 'lib/mechanize/page.rb', line 302 def iframes @iframes ||= search('iframe').map { |node| Frame.new(node, @mech, self) } end |
#image_urls ⇒ Object
314 315 316 |
# File 'lib/mechanize/page.rb', line 314 def image_urls @image_urls ||= images.map(&:url).uniq end |
#images ⇒ Object
Return a list of all img tags
309 310 311 312 |
# File 'lib/mechanize/page.rb', line 309 def images @images ||= search('img').map { |node| Image.new(node, self) } end |
#labels ⇒ Object
Return a list of all label tags
320 321 322 323 |
# File 'lib/mechanize/page.rb', line 320 def labels @labels ||= search('label').map { |node| Label.new(node, self) } end |
#labels_hash ⇒ Object
325 326 327 328 329 330 331 332 333 334 |
# File 'lib/mechanize/page.rb', line 325 def labels_hash unless @labels_hash hash = {} labels.each do |label| hash[label.node['for']] = label if label.for end @labels_hash = hash end return @labels_hash end |
#link ⇒ Object
:method: links_with(criteria)
Find all links matching criteria
. Example:
page.links_with(:href => /foo/).each do |link|
puts link.href
end
199 |
# File 'lib/mechanize/page.rb', line 199 elements_with :link |
#links ⇒ Object
Return a list of all link and area tags
257 258 259 260 261 262 263 |
# File 'lib/mechanize/page.rb', line 257 def links @links ||= %w{ a area }.map do |tag| search(tag).map do |node| Link.new(node, @mech, self) end end.flatten end |
#meta_charset ⇒ Object
67 68 69 |
# File 'lib/mechanize/page.rb', line 67 def self.class.(body) end |
#meta_refresh ⇒ Object
Return a list of all meta refresh elements
278 279 280 281 282 283 284 |
# File 'lib/mechanize/page.rb', line 278 def query = @mech. == :anywhere ? 'meta' : 'head > meta' @meta_refresh ||= search(query).map do |node| MetaRefresh.from_node node, self, uri end.compact end |
#parser ⇒ Object Also known as: root
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/mechanize/page.rb', line 107 def parser return @parser if @parser return nil unless @body if @encoding then @parser = @mech.html_parser.parse html_body, nil, @encoding elsif mech.force_default_encoding then @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding else @encodings.reverse_each do |encoding| @parser = @mech.html_parser.parse html_body, nil, encoding break unless encoding_error? @parser end end @parser end |
#pretty_print(q) ⇒ Object
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/mechanize/inspect.rb', line 15 def pretty_print(q) q.object_group(self) { q.breakable q.group(1, '{url', '}') {q.breakable; q.pp uri } q.breakable q.group(1, '{meta_refresh', '}') { .each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{title', '}') { q.breakable; q.pp title } q.breakable q.group(1, '{iframes', '}') { iframes.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{frames', '}') { frames.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{links', '}') { links.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{forms', '}') { forms.each { |form| q.breakable; q.pp form } } } end |
#reset ⇒ Object
128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/mechanize/page.rb', line 128 def reset @bases = nil @forms = nil @frames = nil @iframes = nil @links = nil @labels = nil @labels_hash = nil @meta_refresh = nil @parser = nil @title = nil end |
#response_header_charset ⇒ Object
63 64 65 |
# File 'lib/mechanize/page.rb', line 63 def response_header_charset self.class.response_header_charset(response) end |
#title ⇒ Object
55 56 57 58 59 60 61 |
# File 'lib/mechanize/page.rb', line 55 def title @title ||= if doc = parser title = doc.search('title').inner_text title.empty? ? nil : title end end |