Class: Mechanize::Page

Inherits:
File
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/mechanize/page.rb,
lib/mechanize/inspect.rb,
lib/mechanize/page/base.rb,
lib/mechanize/page/link.rb,
lib/mechanize/page/meta.rb,
lib/mechanize/page/frame.rb,
lib/mechanize/page/image.rb,
lib/mechanize/page/label.rb,
lib/mechanize/monkey_patch.rb

Overview

Synopsis

This class encapsulates an HTML page. If Mechanize finds a content type of ‘text/html’, this class will be instantiated and returned.

Example

require 'rubygems'
require 'mechanize'

agent = Mechanize.new
agent.get('http://google.com/').class  #=> Mechanize::Page

Defined Under Namespace

Classes: Base, Frame, Image, Label, Link, Meta

Instance Attribute Summary collapse

Attributes inherited from File

#body, #code, #filename, #response, #uri

Instance Method Summary collapse

Methods inherited from File

#save_as

Constructor Details

#initialize(uri = nil, response = nil, body = nil, code = nil, mech = nil) ⇒ Page

Returns a new instance of Page.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/mechanize/page.rb', line 26

def initialize(uri=nil, response=nil, body=nil, code=nil, mech=nil)
  @encoding = nil

  method = response.respond_to?(:each_header) ? :each_header : :each
  response.send(method) do |header,v|
    next unless v =~ /charset/i
    encoding = v[/charset=([^; ]+)/, 1]
    @encoding = encoding unless encoding == 'none'
  end

  # Force the encoding to be 8BIT so we can perform regular expressions.
  # We'll set it to the detected encoding later
  body.force_encoding('ASCII-8BIT') if body && body.respond_to?(:force_encoding)

  @encoding ||= Util.detect_charset(body)

  super(uri, response, body, code)
  @mech           ||= mech

  @encoding = nil if html_body =~ /<meta[^>]*charset[^>]*>/i

  raise Mechanize::ContentTypeError.new(response['content-type']) unless
    response['content-type'] =~ /^(text\/html)|(application\/xhtml\+xml)/i
  @parser = @links = @forms = @meta = @bases = @frames = @iframes = nil
end

Instance Attribute Details

#mechObject

Returns the value of attribute mech.



24
25
26
# File 'lib/mechanize/page.rb', line 24

def mech
  @mech
end

Instance Method Details

#basesObject

Return a list of all base tags



242
243
244
245
# File 'lib/mechanize/page.rb', line 242

def bases
  @bases ||=
    search('base').map { |node| Base.new(node, @mech, self) }
end

#content_typeObject

Get the content type



92
93
94
# File 'lib/mechanize/page.rb', line 92

def content_type
  response['content-type']
end

#encodingObject



72
73
74
# File 'lib/mechanize/page.rb', line 72

def encoding
  parser.respond_to?(:encoding) ? parser.encoding : nil
end

#encoding=(encoding) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/mechanize/page.rb', line 58

def encoding=(encoding)
  @encoding = encoding

  if @parser
    parser_encoding = @parser.encoding
    if (parser_encoding && parser_encoding.downcase) != (encoding && encoding.downcase)
      # lazy reinitialize the parser with the new encoding
      @parser = nil
    end
  end

  encoding
end

#formsObject

Return a list of all form tags



216
217
218
219
220
221
222
# File 'lib/mechanize/page.rb', line 216

def forms
  @forms ||= search('form').map do |html_form|
    form = Form.new(html_form, @mech, self)
    form.action ||= @uri.to_s
    form
  end
end

#framesObject

Return a list of all frame tags



249
250
251
252
# File 'lib/mechanize/page.rb', line 249

def frames
  @frames ||=
    search('frame').map { |node| Frame.new(node, @mech, self) }
end

#iframesObject

Return a list of all iframe tags



256
257
258
259
# File 'lib/mechanize/page.rb', line 256

def iframes
  @iframes ||=
    search('iframe').map { |node| Frame.new(node, @mech, self) }
end

#image_urlsObject



268
269
270
# File 'lib/mechanize/page.rb', line 268

def image_urls
  @image_urls ||= images.map(&:url).uniq
end

#imagesObject

Return a list of all img tags



263
264
265
266
# File 'lib/mechanize/page.rb', line 263

def images
  @images ||=
    search('img').map { |node| Image.new(node, self) }
end

#labelsObject

Return a list of all label tags



274
275
276
277
# File 'lib/mechanize/page.rb', line 274

def labels
  @labels ||=
    search('label').map { |node| Label.new(node, self) }
end

#labels_hashObject



279
280
281
282
283
284
285
286
287
288
# File 'lib/mechanize/page.rb', line 279

def labels_hash
  unless @labels_hash
    hash = {}
    labels.each do |label|
      hash[label.node['for']] = label if label.for
    end
    @labels_hash = hash
  end
  return @labels_hash
end

Return a list of all link and area tags



206
207
208
209
210
211
212
# File 'lib/mechanize/page.rb', line 206

def links
  @links ||= %w{ a area }.map do |tag|
    search(tag).map do |node|
      Link.new(node, @mech, self)
    end
  end.flatten
end

#metaObject

Return a list of all meta tags



226
227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/mechanize/page.rb', line 226

def meta
  @meta ||= search('head > meta').map do |node|
    next unless node['http-equiv'] && node['content']
    (equiv, content) = node['http-equiv'], node['content']
    if equiv && equiv.downcase == 'refresh'
      Meta.parse(content, uri) do |delay, href|
        node['delay'] = delay
        node['href'] = href
        Meta.new(node, @mech, self)
      end
    end
  end.compact
end

#parserObject Also known as: root



76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/mechanize/page.rb', line 76

def parser
  return @parser if @parser

  if body && response
    if mech.html_parser == Nokogiri::HTML
      @parser = mech.html_parser.parse(html_body, nil, @encoding)
    else
      @parser = mech.html_parser.parse(html_body)
    end
  end

  @parser
end

#pretty_print(q) ⇒ Object



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/mechanize/inspect.rb', line 15

def pretty_print(q)
  q.object_group(self) {
    q.breakable
    q.group(1, '{url', '}') {q.breakable; q.pp uri }
    q.breakable
    q.group(1, '{meta', '}') {
      meta.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{title', '}') { q.breakable; q.pp title }
    q.breakable
    q.group(1, '{iframes', '}') {
      iframes.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{frames', '}') {
      frames.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{links', '}') {
      links.each { |link| q.breakable; q.pp link }
    }
    q.breakable
    q.group(1, '{forms', '}') {
      forms.each { |form| q.breakable; q.pp form }
    }
  }
end

#titleObject



52
53
54
55
56
# File 'lib/mechanize/page.rb', line 52

def title
  @title ||= if parser && search('title').inner_text.length > 0
               search('title').inner_text
             end
end