Class: HTML5::XMLParser

Inherits:
HTMLParser show all
Defined in:
lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb

Overview

liberal XML parser

Direct Known Subclasses

XHTMLParser

Instance Attribute Summary

Attributes inherited from HTMLParser

#errors, #first_start_tag, #inner_html, #insert_from_table, #last_phase, #phase, #phases, #tokenizer, #tree

Instance Method Summary collapse

Methods inherited from HTMLParser

#_, #_parse, parse, #parse, #parse_error, parse_fragment, #parse_fragment, #reset_insertion_mode

Constructor Details

#initialize(options = {}) ⇒ XMLParser

Returns a new instance of XMLParser.



22
23
24
25
# File 'lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb', line 22

def initialize(options = {})
  super options
  @phases[:initial] = XmlRootPhase.new(self, @tree)
end

Instance Method Details

#normalize_token(token) ⇒ Object



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# File 'lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb', line 27

def normalize_token(token)
  case token[:type]
  when :StartTag, :EmptyTag
    # We need to remove the duplicate attributes and convert attributes
    # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}

    token[:data] = Hash[*token[:data].reverse.flatten]

    # For EmptyTags, process both a Start and an End tag
    if token[:type] == :EmptyTag
      save = @tokenizer.content_model_flag
      @phase.processStartTag(token[:name], token[:data])
      @tokenizer.content_model_flag = save
      token[:data] = {}
      token[:type] = :EndTag
    end

  when :Characters
    # un-escape RCDATA_ELEMENTS (e.g. style, script)
    if @tokenizer.content_model_flag == :CDATA
      token[:data] = token[:data].
        gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
    end

  when :EndTag
    if token[:data]
       parse_error("attributes-in-end-tag")
    end

  when :Comment
    # Rescue CDATA from the comments
    if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
      token[:type] = :Characters
      token[:data] = token[:data][7 ... -2]
    end
  end

  return token
end