Class: Mechanize::Page
- Extended by:
- Forwardable, ElementMatcher
- Defined in:
- lib/mechanize/page.rb
Overview
Defined Under Namespace
Classes: Base, Frame, Image, Label, Link, MetaRefresh
Constant Summary
Constants included from Parser
Mechanize::Parser::SPECIAL_FILENAMES
Instance Attribute Summary collapse
-
#encodings ⇒ Object
readonly
Possible encodings for this page based on HTTP headers and meta elements.
-
#mech ⇒ Object
Returns the value of attribute mech.
Attributes inherited from File
Attributes included from Parser
Class Method Summary collapse
- .charset(content_type) ⇒ Object
-
.meta_charset(body) ⇒ Object
Retrieves all charsets from
meta
tags inbody
. -
.meta_content_type(body) ⇒ Object
Retrieves the last
content-type
set by ameta
tag inbody
. - .response_header_charset(response) ⇒ Object
Instance Method Summary collapse
-
#base ⇒ Object
:method: bases_with(criteria).
-
#bases ⇒ Object
Return a list of all base tags.
-
#canonical_uri ⇒ Object
Return the canonical URI for the page if there is a link tag with href=“canonical”.
-
#content_type ⇒ Object
Get the content type.
- #detected_encoding ⇒ Object
- #encoding ⇒ Object
- #encoding=(encoding) ⇒ Object
-
#encoding_error?(parser = nil) ⇒ Boolean
Return whether parser result has errors related to encoding or not.
-
#form ⇒ Object
:method: forms_with(criteria).
-
#forms ⇒ Object
Return a list of all form tags.
-
#frame ⇒ Object
:method: frames_with(criteria).
-
#frames ⇒ Object
Return a list of all frame tags.
-
#iframe ⇒ Object
:method: iframes_with(criteria).
-
#iframes ⇒ Object
Return a list of all iframe tags.
- #image_urls ⇒ Object
-
#images ⇒ Object
Return a list of all img tags.
-
#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page
constructor
A new instance of Page.
-
#labels ⇒ Object
Return a list of all label tags.
- #labels_hash ⇒ Object
-
#link ⇒ Object
:method: links_with(criteria).
-
#links ⇒ Object
Return a list of all link and area tags.
- #meta_charset ⇒ Object
-
#meta_refresh ⇒ Object
Return a list of all meta refresh elements.
- #parser ⇒ Object (also: #root)
-
#pretty_print(q) ⇒ Object
:nodoc:.
- #reset ⇒ Object
- #response_header_charset ⇒ Object
- #title ⇒ Object
Methods included from ElementMatcher
Methods inherited from File
Methods included from Parser
#extract_filename, #fill_header, #find_free_name
Constructor Details
#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page
Returns a new instance of Page.
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/mechanize/page.rb', line 23 def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) raise Mechanize::ContentTypeError, response['content-type'] unless response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i @meta_content_type = nil @encoding = nil @encodings = [nil] raise 'no' if mech and not Mechanize === mech @mech = mech reset @encodings << Mechanize::Util.detect_charset(body) if body @encodings.concat self.class.response_header_charset(response) if body # Force the encoding to be 8BIT so we can perform regular expressions. # We'll set it to the detected encoding later body.force_encoding 'ASCII-8BIT' if body.respond_to? :force_encoding @encodings.concat self.class. body = self.class. body @meta_content_type = if end @encodings << mech.default_encoding if mech and mech.default_encoding super uri, response, body, code end |
Instance Attribute Details
#encodings ⇒ Object (readonly)
Possible encodings for this page based on HTTP headers and meta elements
21 22 23 |
# File 'lib/mechanize/page.rb', line 21 def encodings @encodings end |
#mech ⇒ Object
Returns the value of attribute mech.
16 17 18 |
# File 'lib/mechanize/page.rb', line 16 def mech @mech end |
Class Method Details
.charset(content_type) ⇒ Object
367 368 369 370 371 |
# File 'lib/mechanize/page.rb', line 367 def self.charset content_type charset = content_type[/charset=([^; ]+)/i, 1] return nil if charset == 'none' charset end |
.meta_charset(body) ⇒ Object
Retrieves all charsets from meta
tags in body
386 387 388 389 390 391 392 393 394 395 396 397 398 399 |
# File 'lib/mechanize/page.rb', line 386 def self. body # HACK use .map body.scan(/<meta .*?>/i).map do || if =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then $2 elsif =~ /http-equiv\s*=\s*(["'])?content-type\1/i then =~ /content\s*=\s*(["'])?(.*?)\1/i m_charset = charset $2 if $2 m_charset if m_charset end end.compact end |
.meta_content_type(body) ⇒ Object
Retrieves the last content-type
set by a meta
tag in body
404 405 406 407 408 409 410 411 412 413 414 |
# File 'lib/mechanize/page.rb', line 404 def self. body body.scan(/<meta .*?>/i).reverse.map do || if =~ /http-equiv\s*=\s*(["'])?content-type\1/i then =~ /content=(["'])?(.*?)\1/i return $2 end end nil end |
.response_header_charset(response) ⇒ Object
373 374 375 376 377 378 379 380 381 |
# File 'lib/mechanize/page.rb', line 373 def self.response_header_charset response charsets = [] response.each do |header, value| next unless header == 'content-type' next unless value =~ /charset/i charsets << charset(value) end charsets end |
Instance Method Details
#base ⇒ Object
:method: bases_with(criteria)
Find all base tags matching criteria
. Example:
page.bases_with(:href => /foo/).each do |base|
puts base.href
end
248 |
# File 'lib/mechanize/page.rb', line 248 elements_with :base |
#bases ⇒ Object
Return a list of all base tags
319 320 321 322 |
# File 'lib/mechanize/page.rb', line 319 def bases @bases ||= search('base').map { |node| Base.new(node, @mech, self) } end |
#canonical_uri ⇒ Object
Return the canonical URI for the page if there is a link tag with href=“canonical”.
174 175 176 177 178 179 180 181 182 |
# File 'lib/mechanize/page.rb', line 174 def canonical_uri link = at('link[@rel="canonical"][@href]') return unless link href = link['href'] URI href rescue URI::InvalidURIError URI Mechanize::Util.uri_escape href end |
#content_type ⇒ Object
Get the content type
185 186 187 |
# File 'lib/mechanize/page.rb', line 185 def content_type @meta_content_type || response['content-type'] end |
#detected_encoding ⇒ Object
71 72 73 |
# File 'lib/mechanize/page.rb', line 71 def detected_encoding Mechanize::Util.detect_charset(body) end |
#encoding ⇒ Object
91 92 93 |
# File 'lib/mechanize/page.rb', line 91 def encoding parser.respond_to?(:encoding) ? parser.encoding : nil end |
#encoding=(encoding) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/mechanize/page.rb', line 75 def encoding=(encoding) reset @encoding = encoding if @parser parser_encoding = @parser.encoding if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase) # lazy reinitialize the parser with the new encoding @parser = nil end end encoding end |
#encoding_error?(parser = nil) ⇒ Boolean
Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is vaild.
97 98 99 100 101 102 103 104 105 |
# File 'lib/mechanize/page.rb', line 97 def encoding_error?(parser=nil) parser = self.parser unless parser return false if parser.errors.empty? parser.errors.any? do |error| error. =~ /(indicate\ encoding)| (Invalid\ char)| (input\ conversion\ failed)/x end end |
#form ⇒ Object
:method: forms_with(criteria)
Find all forms form matching criteria
. Example:
page.forms_with(:action => '/post/login.php').each do |f|
...
end
212 |
# File 'lib/mechanize/page.rb', line 212 elements_with :form |
#forms ⇒ Object
Return a list of all form tags
298 299 300 301 302 303 304 |
# File 'lib/mechanize/page.rb', line 298 def forms @forms ||= search('form').map do |html_form| form = Mechanize::Form.new(html_form, @mech, self) form.action ||= @uri.to_s form end end |
#frame ⇒ Object
:method: frames_with(criteria)
Find all frame tags matching criteria
. Example:
page.frames_with(:src => /foo/).each do |frame|
p frame.src
end
266 |
# File 'lib/mechanize/page.rb', line 266 elements_with :frame |
#frames ⇒ Object
Return a list of all frame tags
326 327 328 329 |
# File 'lib/mechanize/page.rb', line 326 def frames @frames ||= search('frame').map { |node| Frame.new(node, @mech, self) } end |
#iframe ⇒ Object
:method: iframes_with(criteria)
Find all iframe tags matching criteria
. Example:
page.iframes_with(:src => /foo/).each do |iframe|
p iframe.src
end
284 |
# File 'lib/mechanize/page.rb', line 284 elements_with :iframe |
#iframes ⇒ Object
Return a list of all iframe tags
333 334 335 336 |
# File 'lib/mechanize/page.rb', line 333 def iframes @iframes ||= search('iframe').map { |node| Frame.new(node, @mech, self) } end |
#image_urls ⇒ Object
345 346 347 |
# File 'lib/mechanize/page.rb', line 345 def image_urls @image_urls ||= images.map(&:url).uniq end |
#images ⇒ Object
Return a list of all img tags
340 341 342 343 |
# File 'lib/mechanize/page.rb', line 340 def images @images ||= search('img').map { |node| Image.new(node, self) } end |
#labels ⇒ Object
Return a list of all label tags
351 352 353 354 |
# File 'lib/mechanize/page.rb', line 351 def labels @labels ||= search('label').map { |node| Label.new(node, self) } end |
#labels_hash ⇒ Object
356 357 358 359 360 361 362 363 364 365 |
# File 'lib/mechanize/page.rb', line 356 def labels_hash unless @labels_hash hash = {} labels.each do |label| hash[label.node['for']] = label if label.for end @labels_hash = hash end return @labels_hash end |
#link ⇒ Object
:method: links_with(criteria)
Find all links matching criteria
. Example:
page.links_with(:href => /foo/).each do |link|
puts link.href
end
230 |
# File 'lib/mechanize/page.rb', line 230 elements_with :link |
#links ⇒ Object
Return a list of all link and area tags
288 289 290 291 292 293 294 |
# File 'lib/mechanize/page.rb', line 288 def links @links ||= %w{ a area }.map do |tag| search(tag).map do |node| Link.new(node, @mech, self) end end.flatten end |
#meta_charset ⇒ Object
67 68 69 |
# File 'lib/mechanize/page.rb', line 67 def self.class.(body) end |
#meta_refresh ⇒ Object
Return a list of all meta refresh elements
309 310 311 312 313 314 315 |
# File 'lib/mechanize/page.rb', line 309 def query = @mech. == :anywhere ? 'meta' : 'head > meta' @meta_refresh ||= search(query).map do |node| MetaRefresh.from_node node, self, uri end.compact end |
#parser ⇒ Object Also known as: root
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/mechanize/page.rb', line 107 def parser return @parser if @parser return nil unless @body if @encoding then @parser = @mech.html_parser.parse html_body, nil, @encoding elsif mech.force_default_encoding then @parser = @mech.html_parser.parse html_body, nil, @mech.default_encoding else @encodings.reverse_each do |encoding| @parser = @mech.html_parser.parse html_body, nil, encoding break unless encoding_error? @parser end end @parser end |
#pretty_print(q) ⇒ Object
:nodoc:
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/mechanize/page.rb', line 128 def pretty_print(q) # :nodoc: q.object_group(self) { q.breakable q.group(1, '{url', '}') {q.breakable; q.pp uri } q.breakable q.group(1, '{meta_refresh', '}') { .each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{title', '}') { q.breakable; q.pp title } q.breakable q.group(1, '{iframes', '}') { iframes.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{frames', '}') { frames.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{links', '}') { links.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{forms', '}') { forms.each { |form| q.breakable; q.pp form } } } end |
#reset ⇒ Object
159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/mechanize/page.rb', line 159 def reset @bases = nil @forms = nil @frames = nil @iframes = nil @links = nil @labels = nil @labels_hash = nil @meta_refresh = nil @parser = nil @title = nil end |
#response_header_charset ⇒ Object
63 64 65 |
# File 'lib/mechanize/page.rb', line 63 def response_header_charset self.class.response_header_charset(response) end |
#title ⇒ Object
55 56 57 58 59 60 61 |
# File 'lib/mechanize/page.rb', line 55 def title @title ||= if doc = parser title = doc.search('title').inner_text title.empty? ? nil : title end end |