Class: HTML5::XMLParser
- Inherits:
-
HTMLParser
- Object
- HTMLParser
- HTML5::XMLParser
- Defined in:
- lib/html5/liberalxmlparser.rb
Overview
liberal XML parser
Direct Known Subclasses
Instance Attribute Summary
Attributes inherited from HTMLParser
#errors, #first_start_tag, #inner_html, #insert_from_table, #last_phase, #phase, #phases, #tokenizer, #tree
Instance Method Summary collapse
-
#initialize(options = {}) ⇒ XMLParser
constructor
A new instance of XMLParser.
- #normalize_token(token) ⇒ Object
Methods inherited from HTMLParser
#_, #_parse, parse, #parse, #parse_error, parse_fragment, #parse_fragment, #reset_insertion_mode
Constructor Details
#initialize(options = {}) ⇒ XMLParser
Returns a new instance of XMLParser.
22 23 24 25 |
# File 'lib/html5/liberalxmlparser.rb', line 22 def initialize( = {}) super @phases[:initial] = XmlRootPhase.new(self, @tree) end |
Instance Method Details
#normalize_token(token) ⇒ Object
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/html5/liberalxmlparser.rb', line 27 def normalize_token(token) case token[:type] when :StartTag, :EmptyTag # We need to remove the duplicate attributes and convert attributes # to a Hash so that [["x", "y"], ["x", "z"]] becomes {"x": "y"} token[:data] = Hash[*token[:data].reverse.flatten] # For EmptyTags, process both a Start and an End tag if token[:type] == :EmptyTag save = @tokenizer.content_model_flag @phase.processStartTag(token[:name], token[:data]) @tokenizer.content_model_flag = save token[:data] = {} token[:type] = :EndTag end when :Characters # un-escape RCDATA_ELEMENTS (e.g. style, script) if @tokenizer.content_model_flag == :CDATA token[:data] = token[:data]. gsub('<','<').gsub('>','>').gsub('&','&') end when :EndTag if token[:data] parse_error("attributes-in-end-tag") end when :Comment # Rescue CDATA from the comments if token[:data][0..6] == "[CDATA[" and token[:data][-2..-1] == "]]" token[:type] = :Characters token[:data] = token[:data][7 ... -2] end end return token end |