Class: WWW::Mechanize::Page

Inherits:
File
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/www/mechanize/page.rb,
lib/www/mechanize/inspect.rb,
lib/www/mechanize/page/base.rb,
lib/www/mechanize/page/link.rb,
lib/www/mechanize/page/meta.rb,
lib/www/mechanize/page/frame.rb,
lib/www/mechanize/monkey_patch.rb

Overview

Synopsis

This class encapsulates an HTML page. If Mechanize finds a content type of ‘text/html’, this class will be instantiated and returned.

Example

require 'rubygems'
require 'mechanize'

agent = WWW::Mechanize.new
agent.get('http://google.com/').class  #=> WWW::Mechanize::Page

Defined Under Namespace

Classes: Base, Frame, Link, Meta

Instance Attribute Summary collapse

Attributes inherited from File

#body, #code, #filename, #response, #uri

Instance Method Summary collapse

Methods inherited from File

#save_as

Constructor Details

#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page

Returns a new instance of Page.



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/www/mechanize/page.rb', line 25

def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
  @encoding = nil

  method = response.respond_to?(:each_header) ? :each_header : :each
  response.send(method) do |header,v|
    next unless v =~ /charset/i
    encoding = v.split('=').last.strip
    @encoding = encoding unless encoding == 'none'
  end

  # Force the encoding to be 8BIT so we can perform regular expressions.
  # We'll set it to the detected encoding later
  body.force_encoding('ASCII-8BIT') if defined?(Encoding) && body

  @encoding ||= Util.detect_charset(body)

  super(uri, response, body, code)
  @mech           ||= mech

  @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i

  raise Mechanize::ContentTypeError.new(response['content-type']) unless
     response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
  @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
end

Instance Attribute Details

#mechObject

Returns the value of attribute mech.



23
24
25
# File 'lib/www/mechanize/page.rb', line 23

def mech
  @mech
end

Instance Method Details

#basesObject



155
156
157
158
# File 'lib/www/mechanize/page.rb', line 155

def bases
  @bases ||=
    search('base').map { |node| Base.new(node, @mech, self) }
end

#content_typeObject

Get the content type



91
92
93
# File 'lib/www/mechanize/page.rb', line 91

def content_type
  response['content-type']
end

#encodingObject



71
72
73
# File 'lib/www/mechanize/page.rb', line 71

def encoding
  parser.respond_to?(:encoding) ? parser.encoding : nil
end

#encoding=(encoding) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/www/mechanize/page.rb', line 57

def encoding=(encoding)
  @encoding = encoding

  if @parser
    parser_encoding = @parser.encoding
    if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
      # lazy reinitialize the parser with the new encoding
      @parser = nil
    end
  end

  encoding
end

#formsObject



133
134
135
136
137
138
139
# File 'lib/www/mechanize/page.rb', line 133

def forms
  @forms ||= search('form').map do |html_form|
    form = Form.new(html_form, @mech, self)
    form.action ||= @uri.to_s
    form
  end
end

#framesObject



160
161
162
163
# File 'lib/www/mechanize/page.rb', line 160

def frames
  @frames ||=
    search('frame').map { |node| Frame.new(node, @mech, self) }
end

#iframesObject



165
166
167
168
# File 'lib/www/mechanize/page.rb', line 165

def iframes
  @iframes ||= 
    search('iframe').map { |node| Frame.new(node, @mech, self) }
end


125
126
127
128
129
130
131
# File 'lib/www/mechanize/page.rb', line 125

def links
  @links ||= %w{ a area }.map do |tag|
    search(tag).map do |node|
      Link.new(node, @mech, self)
    end
  end.flatten
end

#metaObject



141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/www/mechanize/page.rb', line 141

def meta
  @meta ||= search('meta').map do |node|
    next unless node['http-equiv'] && node['content']
    (equiv, content) = node['http-equiv'], node['content']
    if equiv && equiv.downcase == 'refresh'
      Meta.parse(content, uri) do |delay, href|
        node['delay'] = delay
        node['href'] = href
        Meta.new(node, @mech, self)
      end
    end
  end.compact
end

#parserObject Also known as: root



75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/www/mechanize/page.rb', line 75

def parser
  return @parser if @parser

  if body && response
    if mech.html_parser == Nokogiri::HTML
      @parser = mech.html_parser.parse(html_body, nil, @encoding)
    else
      @parser = mech.html_parser.parse(html_body)
    end
  end

  @parser
end

#pretty_print(q) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/www/mechanize/inspect.rb', line 16

def pretty_print(q)
  q.object_group(self) {
    q.breakable
    q.group(1, '{url', '}') {q.breakable; q.pp uri }
    q.breakable
    q.group(1, '{meta', '}') {
      meta.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{title', '}') { q.breakable; q.pp title }
    q.breakable
    q.group(1, '{iframes', '}') {
      iframes.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{frames', '}') {
      frames.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{links', '}') {
      links.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{forms', '}') {
      forms.each { |form| q.breakable; q.pp form }
    }
  }
end

#titleObject



51
52
53
54
55
# File 'lib/www/mechanize/page.rb', line 51

def title
  @title ||= if parser && search('title').inner_text.length > 0
    search('title').inner_text
  end
end