Class: HTMLDeprecated::Document

Inherits:
Object
  • Object
show all
Defined in:
lib/rails/deprecated_sanitizer/html-scanner/html/document.rb

Overview

A top-level HTML document. You give it a body of text, and it will parse that text into a tree of nodes.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, strict = false, xml = false) ⇒ Document

Create a new Document from the given text.



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/rails/deprecated_sanitizer/html-scanner/html/document.rb', line 15

def initialize(text, strict=false, xml=false)
  tokenizer = Tokenizer.new(text)
  @root = Node.new(nil)
  node_stack = [ @root ]
  while token = tokenizer.next
    node = Node.parse(node_stack.last, tokenizer.line, tokenizer.position, token, strict)

    node_stack.last.children << node unless node.tag? && node.closing == :close
    if node.tag?
      if node_stack.length > 1 && node.closing == :close
        if node_stack.last.name == node.name
          if node_stack.last.children.empty?
            node_stack.last.children << Text.new(node_stack.last, node.line, node.position, "")
          end
          node_stack.pop
        else
          open_start = node_stack.last.position - 20
          open_start = 0 if open_start < 0
          close_start = node.position - 20
          close_start = 0 if close_start < 0
          msg = <<EOF.strip
ignoring attempt to close #{node_stack.last.name} with #{node.name}
  opened at byte #{node_stack.last.position}, line #{node_stack.last.line}
  closed at byte #{node.position}, line #{node.line}
  attributes at open: #{node_stack.last.attributes.inspect}
  text around open: #{text[open_start,40].inspect}
  text around close: #{text[close_start,40].inspect}
EOF
          strict ? raise(msg) : warn(msg)
        end
      elsif !node.childless?(xml) && node.closing != :close
        node_stack.push node
      end
    end
  end
end

Instance Attribute Details

#rootObject (readonly)

The root of the parsed document.



12
13
14
# File 'lib/rails/deprecated_sanitizer/html-scanner/html/document.rb', line 12

def root
  @root
end

Instance Method Details

#find(conditions) ⇒ Object

Search the tree for (and return) the first node that matches the given conditions. The conditions are interpreted differently for different node types, see HTMLDeprecated::Text#find and HTML::Tag#find.



55
56
57
# File 'lib/rails/deprecated_sanitizer/html-scanner/html/document.rb', line 55

def find(conditions)
  @root.find(conditions)
end

#find_all(conditions) ⇒ Object

Search the tree for (and return) all nodes that match the given conditions. The conditions are interpreted differently for different node types, see HTMLDeprecated::Text#find and HTML::Tag#find.



62
63
64
# File 'lib/rails/deprecated_sanitizer/html-scanner/html/document.rb', line 62

def find_all(conditions)
  @root.find_all(conditions)
end