Class: HTMLTree::Parser

Inherits:
HTML::StackingParser show all
Defined in:
lib/html/tree.rb

Constant Summary

Constants inherited from HTML::SGMLParser

HTML::SGMLParser::Attrfind, HTML::SGMLParser::Charref, HTML::SGMLParser::Commentclose, HTML::SGMLParser::Commentopen, HTML::SGMLParser::Endbracket, HTML::SGMLParser::Endtagfind, HTML::SGMLParser::Endtagopen, HTML::SGMLParser::Entitydefs, HTML::SGMLParser::Entityref, HTML::SGMLParser::Incomplete, HTML::SGMLParser::Interesting, HTML::SGMLParser::Special, HTML::SGMLParser::Starttagopen, HTML::SGMLParser::Tagfind

Instance Attribute Summary

Attributes inherited from HTML::SGMLParser

#src_range

Instance Method Summary collapse

Methods inherited from HTML::StackingParser

#feed, #last_tag, #parent_tag, #parse_file_named, #stack, #strip_whitespace=

Methods inherited from HTML::SGMLParser

#close, #feed, #finish_endtag, #finish_starttag, #get_source, #goahead, #handle_charref, #handle_data, #handle_data_range, #handle_endtag, #handle_entityref, #handle_starttag, #has_context, #parse_comment, #parse_endtag, #parse_special, #parse_starttag, #report_unbalanced, #set_range, #setliteral, #setnomoretags, #unknown_charref, #unknown_endtag, #unknown_entityref, #unknown_starttag

Constructor Details

#initialize(verbose = false, strip_white = true) ⇒ Parser

verbose

if true, will warn to $stderr on unknown

tags/entities/characters, as well as missing end tags and extra end tags.

strip_white

if true, remove all non-essential whitespace. Note

that there are browser bugs that may cause this to change the appearance of HTML (even though it shouldn’t by the standard).



30
31
32
33
# File 'lib/html/tree.rb', line 30

def initialize(verbose=false, strip_white=true)
  super
  reset
end

Instance Method Details

#htmlObject

Return the <html> node, if any.



48
49
50
# File 'lib/html/tree.rb', line 48

def html
  @rootNode.html_node()
end

#resetObject

Reset this parser so that it can parse a new document.



36
37
38
39
# File 'lib/html/tree.rb', line 36

def reset
  super
  @rootNode = @currentNode = Document.new
end

#treeObject

Return the tree that was built. This will be an HTMLTree::Element that represents the whole document. The <html> node is a child of this.



43
44
45
# File 'lib/html/tree.rb', line 43

def tree
  @rootNode
end