Class: Nokogiri::XML::FragmentHandler

Inherits:
SAX::Document show all
Defined in:
lib/nokogiri/xml/fragment_handler.rb

Overview

:nodoc:

Constant Summary collapse

QNAME_REGEX =
/(.*):(.*)/

Instance Method Summary collapse

Methods inherited from SAX::Document

#end_document, #end_element_namespace, #error, #start_document, #start_element_namespace, #warning, #xmldecl

Constructor Details

#initialize(node, original_html) ⇒ FragmentHandler

Returns a new instance of FragmentHandler.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/nokogiri/xml/fragment_handler.rb', line 6

def initialize node, original_html
  @doc_started    = false
  @document       = node.document
  @stack          = [node]
  @html_eh        = node.kind_of? HTML::DocumentFragment

  # the regexes used in start_element() and characters() anchor at
  # start-of-line, but we really only want them to anchor at
  # start-of-doc. so let's only save up to the first newline.
  #
  # this implementation choice was the result of some benchmarks, if
  # you're curious: http://gist.github.com/115936
  #
  @original_html = original_html.lstrip
  newline_index = @original_html.index("\n")
  @original_html = @original_html[0,newline_index] if newline_index
end

Instance Method Details

#cdata_block(string) ⇒ Object



63
64
65
# File 'lib/nokogiri/xml/fragment_handler.rb', line 63

def cdata_block string
  @stack.last << CDATA.new(@document, string)
end

#characters(string) ⇒ Object



54
55
56
57
# File 'lib/nokogiri/xml/fragment_handler.rb', line 54

def characters string
  @doc_started = true if @original_html.strip =~ %r{^\s*#{Regexp.escape(string.strip)}}
  @stack.last << Text.new(string, @document)
end

#comment(string) ⇒ Object



59
60
61
# File 'lib/nokogiri/xml/fragment_handler.rb', line 59

def comment string
  @stack.last << Comment.new(@document, string)
end

#end_element(name) ⇒ Object



67
68
69
70
# File 'lib/nokogiri/xml/fragment_handler.rb', line 67

def end_element name
  return unless @stack.last.name == name
  @stack.pop
end

#start_element(name, attrs = []) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/nokogiri/xml/fragment_handler.rb', line 24

def start_element name, attrs = []
  regex = @html_eh ? %r{^\s*<#{Regexp.escape(name)}}i :
                     %r{^\s*<#{Regexp.escape(name)}}

  @doc_started = true if @original_html =~ regex
  return unless @doc_started

  ns = nil
  if @document.root
    match = name.match(QNAME_REGEX)
    if match
      prefix, name = match[1], match[2]
      ns = @document.root.namespace_definitions.detect { |x|
        x.prefix == prefix
      }
    end
  end

  node = Element.new(name, @document)
  attrs << "" unless (attrs.length % 2) == 0
  Hash[*attrs].each do |k,v|
    node[k] = v
  end

  node.namespace = ns if ns

  @stack.last << node
  @stack << node
end