Class: HTML5::XMLParser

# File 'lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb', line 22

def initialize(options = {})
  super options
  @phases[:initial] = XmlRootPhase.new(self, @tree)
end

Instance Method Details

#normalize_token(token) ⇒ `Object`

# File 'lib/feed_tools/vendor/html5/lib/html5/liberalxmlparser.rb', line 27

def normalize_token(token)
  case token[:type]
  when :StartTag, :EmptyTag
    # We need to remove the duplicate attributes and convert attributes
    # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"}

    token[:data] = Hash[*token[:data].reverse.flatten]

    # For EmptyTags, process both a Start and an End tag
    if token[:type] == :EmptyTag
      save = @tokenizer.content_model_flag
      @phase.processStartTag(token[:name], token[:data])
      @tokenizer.content_model_flag = save
      token[:data] = {}
      token[:type] = :EndTag
    end

  when :Characters
    # un-escape RCDATA_ELEMENTS (e.g. style, script)
    if @tokenizer.content_model_flag == :CDATA
      token[:data] = token[:data].
        gsub('&lt;','<').gsub('&gt;','>').gsub('&amp;','&')
    end

  when :EndTag
    if token[:data]
       parse_error("attributes-in-end-tag")
    end

  when :Comment
    # Rescue CDATA from the comments
    if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]"
      token[:type] = :Characters
      token[:data] = token[:data][7 ... -2]
    end
  end

  return token
end

Class: HTML5::XMLParser

Overview

Direct Known Subclasses

Instance Attribute Summary

Attributes inherited from HTMLParser

Instance Method Summary collapse

Methods inherited from HTMLParser

Constructor Details

#initialize(options = {}) ⇒ XMLParser

Instance Method Details

#normalize_token(token) ⇒ Object

#initialize(options = {}) ⇒ `XMLParser`

#normalize_token(token) ⇒ `Object`