Class: Pegex::Parser
- Inherits:
-
Object
- Object
- Pegex::Parser
- Defined in:
- lib/pegex/parser.rb
Defined Under Namespace
Classes: PegexParseError
Instance Attribute Summary collapse
-
#debug ⇒ Object
Returns the value of attribute debug.
-
#grammar ⇒ Object
Returns the value of attribute grammar.
-
#input ⇒ Object
Returns the value of attribute input.
-
#parent ⇒ Object
Returns the value of attribute parent.
-
#receiver ⇒ Object
Returns the value of attribute receiver.
-
#rule ⇒ Object
Returns the value of attribute rule.
Instance Method Summary collapse
- #format_error(msg) ⇒ Object
-
#initialize {|_self| ... } ⇒ Parser
constructor
A new instance of Parser.
- #match_all(list, parent = nil) ⇒ Object
- #match_any(list, parent = nil) ⇒ Object
- #match_err(error, parent = nil) ⇒ Object
- #match_next(next_) ⇒ Object
- #match_next_with_sep(next_) ⇒ Object
- #match_ref(ref, parent) ⇒ Object
- #match_ref_trace(ref, parent) ⇒ Object
- #match_rgx(regexp, parent = nil) ⇒ Object
- #optimize_grammar(start) ⇒ Object
- #optimize_node(node) ⇒ Object
- #parse(input, start = nil) ⇒ Object
- #throw_error(msg) ⇒ Object
- #trace(action) ⇒ Object
Constructor Details
#initialize {|_self| ... } ⇒ Parser
Returns a new instance of Parser.
17 18 19 20 21 22 23 24 |
# File 'lib/pegex/parser.rb', line 17 def initialize @position = 0 @farthest = 0 @optimized = false @throw_on_error = true @debug = ENV['RUBY_PEGEX_DEBUG'] || $PegexParserDebug || false yield self if block_given? end |
Instance Attribute Details
#debug ⇒ Object
Returns the value of attribute debug.
15 16 17 |
# File 'lib/pegex/parser.rb', line 15 def debug @debug end |
#grammar ⇒ Object
Returns the value of attribute grammar.
9 10 11 |
# File 'lib/pegex/parser.rb', line 9 def grammar @grammar end |
#input ⇒ Object
Returns the value of attribute input.
11 12 13 |
# File 'lib/pegex/parser.rb', line 11 def input @input end |
#parent ⇒ Object
Returns the value of attribute parent.
13 14 15 |
# File 'lib/pegex/parser.rb', line 13 def parent @parent end |
#receiver ⇒ Object
Returns the value of attribute receiver.
10 11 12 |
# File 'lib/pegex/parser.rb', line 10 def receiver @receiver end |
#rule ⇒ Object
Returns the value of attribute rule.
14 15 16 |
# File 'lib/pegex/parser.rb', line 14 def rule @rule end |
Instance Method Details
#format_error(msg) ⇒ Object
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 |
# File 'lib/pegex/parser.rb', line 258 def format_error msg buffer = @buffer position = @farthest real_pos = @position line = buffer[0, position].scan(/\n/).size + 1 column = position - (buffer.rindex("\n", position) || -1) pretext = @buffer[ position < 50 ? 0 : position - 50, position < 50 ? position : 50 ] context = @buffer[position, 50] pretext.gsub! /.*\n/m, '' context.gsub! /\n/, "\\n" return <<"..." Error parsing Pegex document: msg: #{msg} line: #{line} column: #{column} context: #{pretext}#{context} #{' ' * (pretext.length + 10)}^ position: #{position} (#{real_pos} pre-lookahead) ... end |
#match_all(list, parent = nil) ⇒ Object
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/pegex/parser.rb', line 192 def match_all list, parent=nil position = @position set = [] len = 0 list.each do |elem| if match = match_next(elem) if !(elem['+asr'] or elem['-skip']) set.concat(match) len += 1 end else @farthest = position if (@position = position) > @farthest return false end end set = [set] if len > 1 return set end |
#match_any(list, parent = nil) ⇒ Object
211 212 213 214 215 216 217 218 |
# File 'lib/pegex/parser.rb', line 211 def match_any list, parent=nil list.each do |elem| if (match = match_next(elem)) return match end end return false end |
#match_err(error, parent = nil) ⇒ Object
220 221 222 |
# File 'lib/pegex/parser.rb', line 220 def match_err error, parent=nil throw_error(error) end |
#match_next(next_) ⇒ Object
123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 |
# File 'lib/pegex/parser.rb', line 123 def match_next next_ return match_next_with_sep(next_) if next_['.sep'] rule, method, kind, min, max, assertion = next_.values_at 'rule', 'method', 'kind', '+min', '+max', '+asr' position, match, count = @position, [], 0 while return_ = method.call(rule, next_) position = @position unless assertion count += 1 match.concat return_ break if max == 1 end if max != 1 match = [match] @farthest = position if (@position = position) > @farthest end result = (count >= min and (max == 0 or count <= max)) ^ (assertion == -1) if not(result) or assertion @farthest = position if (@position = position) > @farthest end return result ? next_['-skip'] ? [] : match : false end |
#match_next_with_sep(next_) ⇒ Object
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 |
# File 'lib/pegex/parser.rb', line 149 def match_next_with_sep next_ rule, method, kind, min, max, sep = next_.values_at 'rule', 'method', 'kind', '+min', '+max', '.sep' position, match, count, scount, smin, smax = @position, [], 0, 0, *(sep.values_at('+min', '+max')) while return_ = method.call(rule, next_) position = @position count += 1 match.concat(return_) return_ = match_next(sep) or break match.concat(smax == 1 ? return_ : return_[0]) if !return_.empty? scount += 1 end match = [match] if max != 1 result = count >= min and (max == 0 or count <= max) if count == scount and not sep['+eok'] @farthest = position if (@position = position) > @farthest end return(result ? next_['-skip'] ? [] : match : false) end |
#match_ref(ref, parent) ⇒ Object
172 173 174 175 176 177 178 179 |
# File 'lib/pegex/parser.rb', line 172 def match_ref ref, parent rule = @tree[ref] match = match_next(rule) or return false return Pegex::Constant::Dummy unless rule['action'] @rule, @parent = ref, parent result = rule['action'].call(match.first) return (result.equal? Pegex::Constant::Null) ? result : [result] end |
#match_ref_trace(ref, parent) ⇒ Object
224 225 226 227 228 229 230 231 232 233 234 235 |
# File 'lib/pegex/parser.rb', line 224 def match_ref_trace ref, parent rule = @tree[ref] trace = ! rule['+asr'] trace("try_#{ref}") if trace result = nil if (result = match_ref(ref, parent)) trace("got_#{ref}") if trace else trace("not_#{ref}") if trace end return result end |
#match_rgx(regexp, parent = nil) ⇒ Object
181 182 183 184 185 186 187 188 189 190 |
# File 'lib/pegex/parser.rb', line 181 def match_rgx regexp, parent=nil buffer = @buffer[@position .. -1] (m = buffer.match regexp) or return false @position += m[0].length # TODO use m.captures match = m[1..-1] match = [ match ] if m.length > 2 @farthest = @position if @position > @farthest return match end |
#optimize_grammar(start) ⇒ Object
76 77 78 79 80 81 82 83 84 |
# File 'lib/pegex/parser.rb', line 76 def optimize_grammar start return if @optimized @tree.each_pair do |name, node| next if node.kind_of? String optimize_node(node) end optimize_node('.ref' => start) @optimized = true end |
#optimize_node(node) ⇒ Object
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/pegex/parser.rb', line 86 def optimize_node node ['ref', 'rgx', 'all', 'any', 'err', 'code', 'xxx'].each do |kind| fail if kind == 'xxx' if node['rule'] = node[".#{kind}"] node['kind'] = kind node['method'] = self.method "match_#{kind}" break end end min, max = node.values_at '+min', '+max' node['+min'] ||= max.nil? ? 1 : 0 node['+max'] ||= min.nil? ? 1 : 0 node['+asr'] ||= nil node['+min'] = node['+min'].to_i node['+max'] = node['+max'].to_i if ['any', 'all'].include? node['kind'] node['rule'].each do |elem| optimize_node elem end elsif node['kind'] == 'ref' ref = node['rule'] rule = @tree[ref] if @receiver.respond_to? "got_#{ref}" rule['action'] = receiver.method "got_#{ref}" elsif receiver.respond_to? 'gotrule' rule['action'] = receiver.method 'gotrule' end node['method'] = self.method 'match_ref_trace' if @debug elsif node['kind'] == 'rgx' node['rule'] = Regexp.new "\\A#{node['.rgx']}" end if sep = node['.sep'] optimize_node(sep) end end |
#parse(input, start = nil) ⇒ Object
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/pegex/parser.rb', line 26 def parse input, start=nil @position = 0 if not input.kind_of? Pegex::Input input = Pegex::Input.new {|i| i.string = input} end @input = input @input.open unless @input.open? @buffer = @input.read @length = @buffer.length fail "No 'grammar'. Can't parse" unless @grammar @tree = @grammar.tree ||= @grammar.make_tree start_rule_ref = start || @tree['+toprule'] || (@tree['TOP'] ? 'TOP' : nil) or fail "No starting rule for Pegex::Parser::parse" optimize_grammar(start_rule_ref) fail "No 'receiver'. Can't parse" unless @receiver # XXX does ruby have problems with circulat references? @receiver.parser = self if @receiver.respond_to? 'initial' @rule = start_rule_ref @parent = {} @receiver.initial end match = match_ref(start_rule_ref, {}) @input.close if !match or @position < @length throw_error "Parse document failed for some reason" return end if @receiver.respond_to? 'final' @rule = start_rule_ref @parent = {} match = [ @receiver.final(match.first) ] end return match.first end |
#throw_error(msg) ⇒ Object
252 253 254 255 256 |
# File 'lib/pegex/parser.rb', line 252 def throw_error msg @error = format_error msg return nil unless @throw_on_error raise PegexParseError, @error end |
#trace(action) ⇒ Object
237 238 239 240 241 242 243 244 245 246 247 248 |
# File 'lib/pegex/parser.rb', line 237 def trace action indent = !!action.match(/^try_/) @indent ||= 0 @indent -= 1 unless indent $stderr.print ' ' * @indent @indent += 1 if indent snippet = @buffer[@position..-1] snippet = snippet[0..30] + '...' if snippet.length > 30; snippet.gsub! /\n/, "\\n" $stderr.printf "%-30s", action $stderr.print indent ? " >#{snippet}<\n" : "\n" end |