Class: Volt::SandlebarsParser
- Defined in:
- lib/volt/server/html_parser/sandlebars_parser.rb
Overview
Parses html and bindings based on ejohn.org/files/htmlparser.js
takes the html and a handler object that will have the following methods called as each is seen: comment, text, binding, start_tag, end_tag
This is not a full html parser, but should cover most common cases.
Constant Summary collapse
- START_TAG =
regex matchers
/^<([-!\:A-Za-z0-9_]+)((?:\s+[\w\-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/
- END_TAG =
/^<\/([-!\:A-Za-z0-9_]+)[^>]*>/
- ATTRIBUTES =
/([-\:A-Za-z0-9_]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/
- BLOCK =
Types of elements
truth_hash(%w(a address applet blockquote button center dd del dir div dl dt fieldset form frameset hr iframe ins isindex li map menu noframes noscript object ol p pre script table tbody td tfoot th thead tr ul))
- EMPTY =
truth_hash(%w(area base basefont br col frame hr img input isindex link meta param embed))
- INLINE =
truth_hash(%w(abbr acronym applet b basefont bdo big br button cite code del dfn em font i iframe img input ins kbd label map object q s samp script select small span strike strong sub sup textarea tt u var))
- CLOSE_SELF =
truth_hash(%w(colgroup dd dt li options p td tfoot th thead tr))
- SPECIAL =
truth_hash(%w(script style))
- FILL_IN_ATTRIBUTES =
truth_hash(%w(checked compact declare defer disabled ismap multiple nohref noresize noshade nowrap readonly selected))
Class Method Summary collapse
Instance Method Summary collapse
- #end_tag(tag, tag_name) ⇒ Object
-
#initialize(html, handler, file_path = nil) ⇒ SandlebarsParser
constructor
A new instance of SandlebarsParser.
- #last ⇒ Object
- #parse ⇒ Object
- #raise_parse_error(error) ⇒ Object
- #special_tag(close_tag, body) ⇒ Object
-
#start_binding ⇒ Object
Findings the end of a binding.
- #start_tag(tag, tag_name, rest, unary) ⇒ Object
- #text(text) ⇒ Object
Constructor Details
#initialize(html, handler, file_path = nil) ⇒ SandlebarsParser
Returns a new instance of SandlebarsParser.
36 37 38 39 40 41 42 43 44 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 36 def initialize(html, handler, file_path = nil) @html = StringScanner.new(html) @handler = handler @file_path = file_path @stack = [] parse end |
Class Method Details
.truth_hash(array) ⇒ Object
15 16 17 18 19 20 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 15 def self.truth_hash(array) hash = {} array.each { |v| hash[v] = true } hash end |
Instance Method Details
#end_tag(tag, tag_name) ⇒ Object
194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 194 def end_tag(tag, tag_name) # If no tag name is provided, close all the way up new_size = 0 if tag # Find the closest tag that closes. (@stack.size - 1).downto(0) do |index| if @stack[index] == tag_name new_size = index break end end end if new_size >= 0 if @handler.respond_to?(:end_tag) (@stack.size - 1).downto(new_size) do |index| @handler.end_tag(@stack[index]) end end @stack = @stack[0...new_size] end end |
#last ⇒ Object
46 47 48 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 46 def last @stack.last end |
#parse ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 50 def parse loop do if last && SPECIAL[last] # In a script or style tag, just look for the first end close_tag = "</#{last}>" body = @html.scan_until(/#{close_tag}/) special_tag(close_tag, body) elsif @html.scan(/\<\!--/) # start comment comment = @html.scan_until(/--\>/) comment = comment[0..-4] @handler.comment(comment) if @handler.respond_to?(:comment) elsif (tag = @html.scan(START_TAG)) tag_name = @html[1] rest = @html[2] unary = @html[3] start_tag(tag, tag_name, rest, unary) elsif @html.scan(END_TAG) tag_name = @html[1] end_tag(tag_name, tag_name) elsif (escaped = @html.scan(/\{\{\{(.*?)\}\}\}([^\}]|$)/)) # Anything between {{{ and }}} is escaped and not processed (treaded as text) if escaped[-1] != '}' # Move back if we matched a new non } for close, skip if we hit the end @html.pos = @html.pos - 1 end text(@html[1]) elsif (binding = @html.scan(/\{\{/)) # We are in text mode and matched the start of a binding start_binding elsif (text = @html.scan(/\{/)) # A single { outside of a binding text(text) elsif (text = @html.scan(/(?:[^\<\{]+)/)) # matched text up until the next html tag text(text) else # Nothing left break end end end_tag(nil, nil) end |
#raise_parse_error(error) ⇒ Object
133 134 135 136 137 138 139 140 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 133 def raise_parse_error(error) line_number = @html.pre_match.count("\n") + 1 error_str = error + " on line: #{line_number}" error_str += " of #{@file_path}" if @file_path fail HTMLParseError, error_str end |
#special_tag(close_tag, body) ⇒ Object
219 220 221 222 223 224 225 226 227 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 219 def special_tag(close_tag, body) body = body[0..((-1 * close_tag.size) - 1)] body = body.gsub(/\<\!--(.*?)--\>/, '\\1').gsub(/\<\!\[CDATA\[(.*?)\]\]\>/, '\\1') text(body) end_tag(last, last) end |
#start_binding ⇒ Object
Findings the end of a binding
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 104 def start_binding binding = '' open_count = 1 # scan until we reach a {{ or }} loop do binding << @html.scan_until(/(\{\{|\}\}|\n|\Z)/) match = @html[1] if match == '}}' # close open_count -= 1 break if open_count == 0 elsif match == '{{' # open more open_count += 1 elsif match == "\n" || @html.eos? # Starting new tag, should be closed before this # or end of doc before closed binding raise_parse_error("unclosed binding: {#{binding.strip}") else fail 'should not reach here' end end binding = binding[0..-3] @handler.binding(binding) if @handler.respond_to?(:binding) end |
#start_tag(tag, tag_name, rest, unary) ⇒ Object
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 142 def start_tag(tag, tag_name, rest, unary) section_tag = tag_name[0] == ':' && tag_name[1] =~ /[A-Z]/ tag_name = tag_name.downcase # handle doctype so we get it output exactly the same way if tag_name == '!doctype' @handler.text(tag) if @handler.respond_to?(:start_tag) return end # Auto-close the last inline tag if we started a new block if BLOCK[tag_name] if last && INLINE[last] end_tag(nil, last) end end # Some tags close themselves when a new one of themselves is reached. # ex, a tr will close the previous tr if CLOSE_SELF[tag_name] && last == tag_name end_tag(nil, tag_name) end unary = EMPTY[tag_name] || !unary.blank? # Section tag's are also unary unless unary || section_tag @stack.push(tag_name) end if @handler.respond_to?(:start_tag) attributes = {} # Take the rest string and extract the attributes, filling in any # "fill in" attribute values if not provided. rest.scan(ATTRIBUTES).each do |match| name = match[0] value = match[1] || match[2] || match[3] || FILL_IN_ATTRIBUTES[name] || '' attributes[name] = value end if section_tag @handler.start_section(tag_name, attributes, unary) else @handler.start_tag(tag_name, attributes, unary) end end end |
#text(text) ⇒ Object
99 100 101 |
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 99 def text(text) @handler.text(text) if @handler.respond_to?(:text) end |