Class: HTMLTree::XMLParser

Inherits:
HTML::StackingParser show all
Defined in:
lib/html/xmltree.rb

Constant Summary

Constants inherited from HTML::SGMLParser

HTML::SGMLParser::Attrfind, HTML::SGMLParser::Charref, HTML::SGMLParser::Commentclose, HTML::SGMLParser::Commentopen, HTML::SGMLParser::Endbracket, HTML::SGMLParser::Endtagfind, HTML::SGMLParser::Endtagopen, HTML::SGMLParser::Entitydefs, HTML::SGMLParser::Entityref, HTML::SGMLParser::Incomplete, HTML::SGMLParser::Interesting, HTML::SGMLParser::Special, HTML::SGMLParser::Starttagopen, HTML::SGMLParser::Tagfind

Instance Attribute Summary

Attributes inherited from HTML::SGMLParser

#src_range

Instance Method Summary collapse

Methods inherited from HTML::StackingParser

#feed, #last_tag, #parent_tag, #parse_file_named, #stack, #strip_whitespace=

Methods inherited from HTML::SGMLParser

#close, #feed, #finish_endtag, #finish_starttag, #get_source, #goahead, #handle_charref, #handle_data, #handle_data_range, #handle_endtag, #handle_entityref, #handle_starttag, #has_context, #parse_comment, #parse_endtag, #parse_special, #parse_starttag, #report_unbalanced, #set_range, #setliteral, #setnomoretags, #unknown_charref, #unknown_endtag, #unknown_entityref, #unknown_starttag

Constructor Details

#initialize(verbose = false, strip_white = true) ⇒ XMLParser

verbose

if true, will warn to $stderr on unknown

tags/entities/characters, as well as missing end tags and extra end tags.

strip_white

if true, remove all non-essential whitespace. Note

that there are browser bugs that may cause this to change the appearance of HTML (even though it shouldn’t by the standard).



42
43
44
45
# File 'lib/html/xmltree.rb', line 42

def initialize(verbose=false, strip_white=true)
  super
  reset
end

Instance Method Details

#documentObject

Return the document that was built. This will be an REXML::Document that represents the whole document. The <html> node is a child of this.



56
57
58
# File 'lib/html/xmltree.rb', line 56

def document
  @rootNode
end

#htmlObject

Return the <html> node, if any.



70
71
72
# File 'lib/html/xmltree.rb', line 70

def html
  @rootNode.root.elements['html']
end

#resetObject

Reset this parser so that it can parse a new document.



48
49
50
51
# File 'lib/html/xmltree.rb', line 48

def reset
  super
  @rootNode = @currentNode = REXML::Document.new()
end

#rootObject

Return the root of the document, if any.



65
66
67
# File 'lib/html/xmltree.rb', line 65

def root
  @rootNode.root()
end

#treeObject



60
61
62
# File 'lib/html/xmltree.rb', line 60

def tree
  document
end