Class: WWW::Mechanize::Page

Inherits:
File
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/www/mechanize/page.rb,
lib/www/mechanize/inspect.rb,
lib/www/mechanize/page/base.rb,
lib/www/mechanize/page/link.rb,
lib/www/mechanize/page/meta.rb,
lib/www/mechanize/page/frame.rb,
lib/www/mechanize/monkey_patch.rb

Overview

Synopsis

This class encapsulates an HTML page. If Mechanize finds a content type of ‘text/html’, this class will be instantiated and returned.

Example

require 'rubygems'
require 'mechanize'

agent = WWW::Mechanize.new
agent.get('http://google.com/').class  #=> WWW::Mechanize::Page

Defined Under Namespace

Classes: Base, Frame, Link, Meta

Instance Attribute Summary collapse

Attributes inherited from File

#body, #code, #filename, #response, #uri

Instance Method Summary collapse

Methods inherited from File

#save_as

Constructor Details

#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page

Returns a new instance of Page.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
# File 'lib/www/mechanize/page.rb', line 26

def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
  @encoding = nil
  response.each do |header,v|
    next unless v =~ /charset/i
    @encoding = v.split('=').last.strip
  end
  @encoding ||= Util.detect_charset(body)
  body = Util.to_native_charset(body, @encoding) rescue body

  super(uri, response, body, code)
  @mech           ||= mech

  raise Mechanize::ContentTypeError.new(response['content-type']) unless
     response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
  @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
end

Instance Attribute Details

#encodingObject

Returns the value of attribute encoding.



24
25
26
# File 'lib/www/mechanize/page.rb', line 24

def encoding
  @encoding
end

#mechObject

Returns the value of attribute mech.



23
24
25
# File 'lib/www/mechanize/page.rb', line 23

def mech
  @mech
end

Instance Method Details

#basesObject



129
130
131
132
# File 'lib/www/mechanize/page.rb', line 129

def bases
  @bases ||=
    search('base').map { |node| Base.new(node, @mech, self) }
end

#content_typeObject

Get the content type



66
67
68
# File 'lib/www/mechanize/page.rb', line 66

def content_type
  response['content-type']
end

#formsObject



108
109
110
111
112
113
114
# File 'lib/www/mechanize/page.rb', line 108

def forms
  @forms ||= search('form').map do |html_form|
    form = Form.new(html_form, @mech, self)
    form.action ||= @uri.to_s
    form
  end
end

#framesObject



134
135
136
137
# File 'lib/www/mechanize/page.rb', line 134

def frames
  @frames ||=
    search('frame').map { |node| Frame.new(node, @mech, self) }
end

#iframesObject



139
140
141
142
# File 'lib/www/mechanize/page.rb', line 139

def iframes
  @iframes ||= 
    search('iframe').map { |node| Frame.new(node, @mech, self) }
end


100
101
102
103
104
105
106
# File 'lib/www/mechanize/page.rb', line 100

def links
  @links ||= %w{ a area }.map do |tag|
    search(tag).map do |node|
      Link.new(node, @mech, self)
    end
  end.flatten
end

#metaObject



116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/www/mechanize/page.rb', line 116

def meta
  @meta ||= search('meta').map do |node|
    next unless node['http-equiv'] && node['content']
    (equiv, content) = node['http-equiv'], node['content']
    if equiv && equiv.downcase == 'refresh'
      if content && content =~ /^\d+\s*;\s*url\s*=\s*'?([^\s']+)/i
        node['href'] = $1
        Meta.new(node, @mech, self)
      end
    end
  end.compact
end

#parserObject Also known as: root



49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/www/mechanize/page.rb', line 49

def parser
  return @parser if @parser

  if body && response
    html_body = body.length > 0 ? body : '<html></html>'
    if WWW::Mechanize.html_parser == Nokogiri::HTML
      @parser = Mechanize.html_parser.parse(html_body, nil, @encoding)
    else
      @parser = Mechanize.html_parser.parse(html_body)
    end
  end

  @parser
end

#pretty_print(q) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/www/mechanize/inspect.rb', line 16

def pretty_print(q)
  q.object_group(self) {
    q.breakable
    q.group(1, '{url', '}') {q.breakable; q.pp uri }
    q.breakable
    q.group(1, '{meta', '}') {
      meta.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{title', '}') { q.breakable; q.pp title }
    q.breakable
    q.group(1, '{iframes', '}') {
      iframes.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{frames', '}') {
      frames.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{links', '}') {
      links.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{forms', '}') {
      forms.each { |form| q.breakable; q.pp form }
    }
  }
end

#titleObject



43
44
45
46
47
# File 'lib/www/mechanize/page.rb', line 43

def title
  @title ||= if parser && search('title').inner_text.length > 0
    search('title').inner_text
  end
end