Class: Volt::SandlebarsParser

Inherits:
Object
  • Object
show all
Defined in:
lib/volt/server/html_parser/sandlebars_parser.rb

Overview

Parses html and bindings based on ejohn.org/files/htmlparser.js

takes the html and a handler object that will have the following methods called as each is seen: comment, text, binding, start_tag, end_tag

This is not a full html parser, but should cover most common cases.

Constant Summary collapse

START_TAG =

regex matchers

/^<([-!\:A-Za-z0-9_]+)((?:\s+[\w\-]+(?:\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|[^>\s]+))?)*)\s*(\/?)>/
END_TAG =
/^<\/([-!\:A-Za-z0-9_]+)[^>]*>/
ATTRIBUTES =
/([-\:A-Za-z0-9_]+)(?:\s*=\s*(?:(?:"((?:\\.|[^"])*)")|(?:'((?:\\.|[^'])*)')|([^>\s]+)))?/
BLOCK =

Types of elements

truth_hash(%w(a address applet blockquote button center dd del dir div dl dt fieldset form frameset hr iframe ins isindex li map menu noframes noscript object ol p pre script table tbody td tfoot th thead tr ul))
EMPTY =
truth_hash(%w(area base basefont br col frame hr img input isindex link meta param embed))
INLINE =
truth_hash(%w(abbr acronym applet b basefont bdo big br button cite code del dfn em font i iframe img input ins kbd label map object q s samp script select small span strike strong sub sup textarea tt u var))
CLOSE_SELF =
truth_hash(%w(colgroup dd dt li options p td tfoot th thead tr))
SPECIAL =
truth_hash(%w(script style))
FILL_IN_ATTRIBUTES =
truth_hash(%w(checked compact declare defer disabled ismap multiple nohref noresize noshade nowrap readonly selected))

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(html, handler, file_path = nil) ⇒ SandlebarsParser

Returns a new instance of SandlebarsParser.



36
37
38
39
40
41
42
43
44
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 36

def initialize(html, handler, file_path = nil)
  @html      = StringScanner.new(html)
  @handler   = handler
  @file_path = file_path

  @stack = []

  parse
end

Class Method Details

.truth_hash(array) ⇒ Object



15
16
17
18
19
20
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 15

def self.truth_hash(array)
  hash = {}
  array.each { |v| hash[v] = true }

  hash
end

Instance Method Details

#end_tag(tag, tag_name) ⇒ Object



194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 194

def end_tag(tag, tag_name)
  # If no tag name is provided, close all the way up
  new_size = 0

  if tag
    # Find the closest tag that closes.
    (@stack.size - 1).downto(0) do |index|
      if @stack[index] == tag_name
        new_size = index
        break
      end
    end
  end

  if new_size >= 0
    if @handler.respond_to?(:end_tag)
      (@stack.size - 1).downto(new_size) do |index|
        @handler.end_tag(@stack[index])
      end
    end

    @stack = @stack[0...new_size]
  end
end

#lastObject



46
47
48
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 46

def last
  @stack.last
end

#parseObject



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 50

def parse
  loop do
    if last && SPECIAL[last]
      # In a script or style tag, just look for the first end
      close_tag = "</#{last}>"
      body      = @html.scan_until(/#{close_tag}/)
      special_tag(close_tag, body)
    elsif @html.scan(/\<\!--/)
      # start comment
      comment = @html.scan_until(/--\>/)
      comment = comment[0..-4]

      @handler.comment(comment) if @handler.respond_to?(:comment)
    elsif (tag = @html.scan(START_TAG))
      tag_name = @html[1]
      rest     = @html[2]
      unary    = @html[3]

      start_tag(tag, tag_name, rest, unary)
    elsif @html.scan(END_TAG)
      tag_name = @html[1]

      end_tag(tag_name, tag_name)
    elsif (escaped = @html.scan(/\{\{\{(.*?)\}\}\}([^\}]|$)/))
      # Anything between {{{ and }}} is escaped and not processed (treaded as text)
      if escaped[-1] != '}'
        # Move back if we matched a new non } for close, skip if we hit the end
        @html.pos = @html.pos - 1
      end

      text(@html[1])
    elsif (binding = @html.scan(/\{\{/))
      # We are in text mode and matched the start of a binding
      start_binding
    elsif (text = @html.scan(/\{/))
      # A single { outside of a binding
      text(text)
    elsif (text = @html.scan(/(?:[^\<\{]+)/))
      # matched text up until the next html tag
      text(text)
    else
      # Nothing left
      break
    end
  end

  end_tag(nil, nil)
end

#raise_parse_error(error) ⇒ Object



133
134
135
136
137
138
139
140
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 133

def raise_parse_error(error)
  line_number = @html.pre_match.count("\n") + 1

  error_str = error + " on line: #{line_number}"
  error_str += " of #{@file_path}" if @file_path

  fail HTMLParseError, error_str
end

#special_tag(close_tag, body) ⇒ Object



219
220
221
222
223
224
225
226
227
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 219

def special_tag(close_tag, body)
  body = body[0..((-1 * close_tag.size) - 1)]

  body = body.gsub(/\<\!--(.*?)--\>/, '\\1').gsub(/\<\!\[CDATA\[(.*?)\]\]\>/, '\\1')

  text(body)

  end_tag(last, last)
end

#start_bindingObject

Findings the end of a binding



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 104

def start_binding
  binding    = ''
  open_count = 1

  # scan until we reach a {{ or }}
  loop do
    binding << @html.scan_until(/(\{\{|\}\}|\n|\Z)/)

    match = @html[1]
    if match == '}}'
      # close
      open_count -= 1
      break if open_count == 0
    elsif match == '{{'
      # open more
      open_count += 1
    elsif match == "\n" || @html.eos?
      # Starting new tag, should be closed before this
      # or end of doc before closed binding
      raise_parse_error("unclosed binding: {#{binding.strip}")
    else
      fail 'should not reach here'
    end
  end

  binding = binding[0..-3]
  @handler.binding(binding) if @handler.respond_to?(:binding)
end

#start_tag(tag, tag_name, rest, unary) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 142

def start_tag(tag, tag_name, rest, unary)
   = tag_name[0] == ':' && tag_name[1] =~ /[A-Z]/

  tag_name = tag_name.downcase

  # handle doctype so we get it output exactly the same way
  if tag_name == '!doctype'
    @handler.text(tag) if @handler.respond_to?(:start_tag)
    return
  end

  # Auto-close the last inline tag if we started a new block
  if BLOCK[tag_name]
    if last && INLINE[last]
      end_tag(nil, last)
    end
  end

  # Some tags close themselves when a new one of themselves is reached.
  # ex, a tr will close the previous tr
  if CLOSE_SELF[tag_name] && last == tag_name
    end_tag(nil, tag_name)
  end

  unary = EMPTY[tag_name] || !unary.blank?

  # Section tag's are also unary
  unless unary || 
    @stack.push(tag_name)
  end

  if @handler.respond_to?(:start_tag)
    attributes = {}

    # Take the rest string and extract the attributes, filling in any
    # "fill in" attribute values if not provided.
    rest.scan(ATTRIBUTES).each do |match|
      name = match[0]

      value = match[1] || match[2] || match[3] || FILL_IN_ATTRIBUTES[name] || ''

      attributes[name] = value
    end

    if 
      @handler.start_section(tag_name, attributes, unary)
    else
      @handler.start_tag(tag_name, attributes, unary)
    end
  end
end

#text(text) ⇒ Object



99
100
101
# File 'lib/volt/server/html_parser/sandlebars_parser.rb', line 99

def text(text)
  @handler.text(text) if @handler.respond_to?(:text)
end