Class: Mechanize::Page
- Extended by:
- Forwardable, ElementMatcher
- Defined in:
- lib/mechanize/page.rb
Overview
Defined Under Namespace
Classes: Base, Frame, Image, Label, Link, MetaRefresh
Constant Summary collapse
- DEFAULT_RESPONSE =
{ 'content-type' => 'text/html', }.freeze
Constants included from Parser
Mechanize::Parser::SPECIAL_FILENAMES
Instance Attribute Summary collapse
-
#encodings ⇒ Object
readonly
Possible encodings for this page based on HTTP headers and meta elements.
-
#mech ⇒ Object
Returns the value of attribute mech.
Attributes inherited from File
Attributes included from Parser
Class Method Summary collapse
- .charset(content_type) ⇒ Object (also: charset_from_content_type)
-
.meta_charset(body) ⇒ Object
Retrieves all charsets from
meta
tags inbody
. -
.meta_content_type(body) ⇒ Object
Retrieves the last
content-type
set by ameta
tag inbody
. - .response_header_charset(response) ⇒ Object
Instance Method Summary collapse
-
#base ⇒ Object
:method: bases_with.
-
#bases ⇒ Object
Return a list of all base tags.
-
#canonical_uri ⇒ Object
Return the canonical URI for the page if there is a link tag with href=“canonical”.
-
#content_type ⇒ Object
Get the content type.
- #detected_encoding ⇒ Object
- #encoding ⇒ Object
- #encoding=(encoding) ⇒ Object
-
#encoding_error?(parser = nil) ⇒ Boolean
Return whether parser result has errors related to encoding or not.
-
#form ⇒ Object
:method: forms_with.
-
#forms ⇒ Object
Return a list of all form tags.
-
#frame ⇒ Object
:method: frames_with.
-
#frames ⇒ Object
Return a list of all frame tags.
-
#iframe ⇒ Object
:method: iframes_with.
-
#iframes ⇒ Object
Return a list of all iframe tags.
-
#image ⇒ Object
:method: images_with.
- #image_urls ⇒ Object
-
#images ⇒ Object
Return a list of all img tags.
-
#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page
constructor
A new instance of Page.
-
#labels ⇒ Object
Return a list of all label tags.
- #labels_hash ⇒ Object
-
#link ⇒ Object
:method: links_with.
-
#links ⇒ Object
Return a list of all link and area tags.
- #meta_charset ⇒ Object
-
#meta_refresh ⇒ Object
Return a list of all meta refresh elements.
-
#parser ⇒ Object
(also: #root)
:method: at_xpath.
-
#pretty_print(q) ⇒ Object
:nodoc:.
- #reset ⇒ Object
- #response_header_charset ⇒ Object
- #title ⇒ Object
Methods included from ElementMatcher
Methods inherited from File
Methods included from Parser
#extract_filename, #fill_header, #find_free_name
Constructor Details
#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page
Returns a new instance of Page.
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/mechanize/page.rb', line 28 def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil) response ||= DEFAULT_RESPONSE @meta_content_type = nil @encoding = nil @encodings = [nil] raise 'no' if mech and not Mechanize === mech @mech = mech reset @encodings << Mechanize::Util.detect_charset(body) if body @encodings.concat self.class.response_header_charset(response) if body @encodings.concat self.class. body = self.class. body @meta_content_type = if end @encodings << mech.default_encoding if mech and mech.default_encoding super uri, response, body, code end |
Instance Attribute Details
#encodings ⇒ Object (readonly)
Possible encodings for this page based on HTTP headers and meta elements
26 27 28 |
# File 'lib/mechanize/page.rb', line 26 def encodings @encodings end |
#mech ⇒ Object
Returns the value of attribute mech.
21 22 23 |
# File 'lib/mechanize/page.rb', line 21 def mech @mech end |
Class Method Details
.charset(content_type) ⇒ Object Also known as: charset_from_content_type
576 577 578 579 580 |
# File 'lib/mechanize/page.rb', line 576 def charset content_type charset = content_type[/;(?:\s*,)?\s*charset\s*=\s*([^()<>@,;:\\\"\/\[\]?={}\s]+)/i, 1] return nil if charset == 'none' charset end |
.meta_charset(body) ⇒ Object
Retrieves all charsets from meta
tags in body
598 599 600 601 602 603 604 605 606 607 608 609 610 611 |
# File 'lib/mechanize/page.rb', line 598 def self. body # HACK use .map body.scan(/<meta .*?>/i).map do || if =~ /charset\s*=\s*(["'])?\s*(.+)\s*\1/i then $2 elsif =~ /http-equiv\s*=\s*(["'])?content-type\1/i then =~ /content\s*=\s*(["'])?(.*?)\1/i m_charset = charset $2 if $2 m_charset if m_charset end end.compact end |
.meta_content_type(body) ⇒ Object
Retrieves the last content-type
set by a meta
tag in body
616 617 618 619 620 621 622 623 624 625 626 |
# File 'lib/mechanize/page.rb', line 616 def self. body body.scan(/<meta .*?>/i).reverse.map do || if =~ /http-equiv\s*=\s*(["'])?content-type\1/i then =~ /content=(["'])?(.*?)\1/i return $2 end end nil end |
.response_header_charset(response) ⇒ Object
585 586 587 588 589 590 591 592 593 |
# File 'lib/mechanize/page.rb', line 585 def self.response_header_charset response charsets = [] response.each do |header, value| next unless header == 'content-type' next unless value =~ /charset/i charsets << charset(value) end charsets end |
Instance Method Details
#base ⇒ Object
:method: bases_with
:call-seq: bases_with(criteria)
Find all base tags matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “base tag(s)”.
Example:
page.bases_with(href: /foo/).each do |base|
puts base.href
end
378 |
# File 'lib/mechanize/page.rb', line 378 elements_with :base |
#bases ⇒ Object
Return a list of all base tags
527 528 529 530 |
# File 'lib/mechanize/page.rb', line 527 def bases @bases ||= search('base').map { |node| Base.new(node, @mech, self) } end |
#canonical_uri ⇒ Object
Return the canonical URI for the page if there is a link tag with href=“canonical”.
179 180 181 182 183 184 185 186 187 |
# File 'lib/mechanize/page.rb', line 179 def canonical_uri link = at('link[@rel="canonical"][@href]') return unless link href = link['href'] URI href rescue URI::InvalidURIError URI Mechanize::Util.uri_escape href end |
#content_type ⇒ Object
Get the content type
190 191 192 |
# File 'lib/mechanize/page.rb', line 190 def content_type @meta_content_type || response['content-type'] end |
#detected_encoding ⇒ Object
71 72 73 |
# File 'lib/mechanize/page.rb', line 71 def detected_encoding Mechanize::Util.detect_charset(body) end |
#encoding ⇒ Object
91 92 93 94 95 |
# File 'lib/mechanize/page.rb', line 91 def encoding parser.encoding rescue NoMethodError nil end |
#encoding=(encoding) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/mechanize/page.rb', line 75 def encoding=(encoding) reset @encoding = encoding if @parser parser_encoding = @parser.encoding if parser_encoding && encoding && parser_encoding.casecmp(encoding) != 0 # lazy reinitialize the parser with the new encoding @parser = nil end end encoding end |
#encoding_error?(parser = nil) ⇒ Boolean
Return whether parser result has errors related to encoding or not. false indicates just parser has no encoding errors, not encoding is valid.
99 100 101 102 103 104 105 106 107 108 |
# File 'lib/mechanize/page.rb', line 99 def encoding_error?(parser=nil) parser = self.parser unless parser return false if parser.errors.empty? parser.errors.any? do |error| error..scrub =~ /(indicate\ encoding)| (Invalid\ bytes)| (Invalid\ char)| (input\ conversion\ failed)/x end end |
#form ⇒ Object
:method: forms_with
:call-seq:
forms_with(name)
forms_with(name: name_matcher, id: id_matcher, class: class_matcher,
search: search_expression, xpath: xpath_expression, css: css_expression,
action: action_matcher, ...)
Find all forms form matching criteria. If a string is given, it is taken as a name attribute value. If a hash is given, forms are narrowed by the key-value pairs as follows.
:id, :dom_id: selects forms with a #dom_id value that matches this value.
:class, :dom_class: selects forms with a #dom_class value that matches this value. Note that class attribute values are compared literally as string, so forms_with(class: “a”) does not match a form with class=“a b”. Use forms_with(css: “form.a”) instead.
:search: only selects forms matching this selector expression.
:xpath: only selects forms matching this XPath expression.
:css: only selects forms matching this CSS selector expression.
:action, :method, etc.: narrows forms by a given attribute value using the === operator.
Example:
page.forms_with(css: '#content table.login_box form', method: /\APOST\z/i, ).each do |f|
...
end
301 |
# File 'lib/mechanize/page.rb', line 301 elements_with :form |
#forms ⇒ Object
Return a list of all form tags
506 507 508 509 510 511 512 |
# File 'lib/mechanize/page.rb', line 506 def forms @forms ||= search('form').map do |html_form| form = Mechanize::Form.new(html_form, @mech, self) form.action ||= @uri.to_s form end end |
#frame ⇒ Object
:method: frames_with
:call-seq: frames_with(criteria)
Find all frame tags matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “frame tag(s)”.
Example:
page.frames_with(src: /foo/).each do |frame|
p frame.src
end
416 |
# File 'lib/mechanize/page.rb', line 416 elements_with :frame |
#frames ⇒ Object
Return a list of all frame tags
534 535 536 537 |
# File 'lib/mechanize/page.rb', line 534 def frames @frames ||= search('frame').map { |node| Frame.new(node, @mech, self) } end |
#iframe ⇒ Object
:method: iframes_with
:call-seq: iframes_with(criteria)
Find all iframe tags matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “iframe tag(s)”.
Example:
page.iframes_with(src: /foo/).each do |iframe|
p iframe.src
end
454 |
# File 'lib/mechanize/page.rb', line 454 elements_with :iframe |
#iframes ⇒ Object
Return a list of all iframe tags
541 542 543 544 |
# File 'lib/mechanize/page.rb', line 541 def iframes @iframes ||= search('iframe').map { |node| Frame.new(node, @mech, self) } end |
#image ⇒ Object
:method: images_with
:call-seq: images_with(criteria)
Find all images matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “image(s)”.
Example:
page.images_with(src: /jpg\Z/).each do |img|
img.fetch.save
end
492 |
# File 'lib/mechanize/page.rb', line 492 elements_with :image |
#image_urls ⇒ Object
553 554 555 |
# File 'lib/mechanize/page.rb', line 553 def image_urls @image_urls ||= images.map(&:url).uniq end |
#images ⇒ Object
Return a list of all img tags
548 549 550 551 |
# File 'lib/mechanize/page.rb', line 548 def images @images ||= search('img').map { |node| Image.new(node, self) } end |
#labels ⇒ Object
Return a list of all label tags
559 560 561 562 |
# File 'lib/mechanize/page.rb', line 559 def labels @labels ||= search('label').map { |node| Label.new(node, self) } end |
#labels_hash ⇒ Object
564 565 566 567 568 569 570 571 572 573 |
# File 'lib/mechanize/page.rb', line 564 def labels_hash unless @labels_hash hash = {} labels.each do |label| hash[label.node['for']] = label if label.for end @labels_hash = hash end return @labels_hash end |
#link ⇒ Object
:method: links_with
:call-seq:
links_with(criteria)
Find all links matching criteria
. See forms_with
for details of criteria
, where for “form(s)” read “link(s)”.
Example:
page.links_with(href: /foo/).each do |link|
puts link.href
end
340 |
# File 'lib/mechanize/page.rb', line 340 elements_with :link |
#links ⇒ Object
Return a list of all link and area tags
496 497 498 499 500 501 502 |
# File 'lib/mechanize/page.rb', line 496 def links @links ||= %w{ a area }.map do |tag| search(tag).map do |node| Link.new(node, @mech, self) end end.flatten end |
#meta_charset ⇒ Object
67 68 69 |
# File 'lib/mechanize/page.rb', line 67 def self.class.(body) end |
#meta_refresh ⇒ Object
Return a list of all meta refresh elements
517 518 519 520 521 522 523 |
# File 'lib/mechanize/page.rb', line 517 def query = @mech. == :anywhere ? 'meta' : 'head > meta' @meta_refresh ||= search(query).map do |node| MetaRefresh.from_node node, self end.compact end |
#parser ⇒ Object Also known as: root
:method: at_xpath
Shorthand for parser.at_xpath
.
See also Nokogiri::XML::Node#at_xpath for details.
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 |
# File 'lib/mechanize/page.rb', line 236 def parser return @parser if @parser return unless @body url = @uri && @uri.to_s if @encoding @parser = mech.html_parser.parse html_body, url, @encoding elsif mech.force_default_encoding @parser = mech.html_parser.parse html_body, url, @mech.default_encoding else @encodings.reverse_each do |encoding| @parser = mech.html_parser.parse html_body, url, encoding break unless encoding_error? @parser end end @parser end |
#pretty_print(q) ⇒ Object
:nodoc:
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/mechanize/page.rb', line 133 def pretty_print(q) # :nodoc: q.object_group(self) { q.breakable q.group(1, '{url', '}') {q.breakable; q.pp uri } q.breakable q.group(1, '{meta_refresh', '}') { .each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{title', '}') { q.breakable; q.pp title } q.breakable q.group(1, '{iframes', '}') { iframes.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{frames', '}') { frames.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{links', '}') { links.each { |link| q.breakable; q.pp link } } q.breakable q.group(1, '{forms', '}') { forms.each { |form| q.breakable; q.pp form } } } end |
#reset ⇒ Object
164 165 166 167 168 169 170 171 172 173 174 175 |
# File 'lib/mechanize/page.rb', line 164 def reset @bases = nil @forms = nil @frames = nil @iframes = nil @links = nil @labels = nil @labels_hash = nil @meta_refresh = nil @parser = nil @title = nil end |
#response_header_charset ⇒ Object
63 64 65 |
# File 'lib/mechanize/page.rb', line 63 def response_header_charset self.class.response_header_charset(response) end |
#title ⇒ Object
55 56 57 58 59 60 61 |
# File 'lib/mechanize/page.rb', line 55 def title @title ||= if doc = parser title = doc.xpath('string(((/html/head | /html | /head | /)/title)[1])').to_s title.empty? ? nil : title end end |