Class: Kanocc::EarleyParser

Inherits:
Object
  • Object
show all
Defined in:
lib/kanocc/earley.rb

Overview

Parser for Kanocc based on Earleys algorithm. For a description see: Alfred V. Aho, Jeffrey D. Ullman, The Theory of Parsing, Translation and Compiling, or try a web search engine of your choice with ‘Earley parsing’

Earley’s parser will parse according to any zcontext-free grammar using O(n*n*n) time and O(n*n) space, n being the length of input. If the grammar is unambigous time/space complexity is O(n*n)/O(n*n). As of yet (version 0.1) the implementation is surely not optimal, so time/space complexity is probably worse.

Christian Surlykke 2007.

Constant Summary collapse

ErrorRule =
GrammarRule.new(Error, [], nil)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(kanocc, logger) ⇒ EarleyParser

Returns a new instance of EarleyParser.



44
45
46
47
# File 'lib/kanocc/earley.rb', line 44

def initialize(kanocc, logger)
  @kanocc = kanocc
  @logger = logger
end

Instance Attribute Details

#kanoccObject

Returns the value of attribute kanocc.



40
41
42
# File 'lib/kanocc/earley.rb', line 40

def kanocc
  @kanocc
end

#loggerObject

Returns the value of attribute logger.



40
41
42
# File 'lib/kanocc/earley.rb', line 40

def logger
  @logger
end

Instance Method Details

#all_derives_right(items) ⇒ Object



245
246
247
248
249
250
# File 'lib/kanocc/earley.rb', line 245

def all_derives_right(items)
  items.each do |item|
    return false unless item.rule.derives_right
  end
  return true
end

#find_error_itemsObject



167
168
169
170
171
172
173
174
# File 'lib/kanocc/earley.rb', line 167

def find_error_items
  for n in (@inputPos - 1).downto(0) do
    if @items.items_n_and_symbol_after_dot(n, Error).size > 0
      return n
    end
  end
  return nil
end

#find_highest(items, &expr) ⇒ Object



226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/kanocc/earley.rb', line 226

def find_highest(items, &expr)
  collect = []
  top_val = nil;
  items.each do |item|
    val = expr.call(item)
    if top_val == nil or top_val < val
      collect = [item]
      top_val = val
    elsif top_val == val
      collect << item
    end
  end
  return collect
end

#handle_errorObject



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/kanocc/earley.rb', line 141

def handle_error
  if j = find_error_items()
    @items.add(ErrorRule, 0, j, @inputPos - 1, -1)
    predict_and_complete(@inputPos - 1, true)
    if @logger
	  @logger.info("Items at #{@inputPos - 1} after error handling:\n" + 
            @items.items_at_n(@inputPos - 1).map {|item| item.inspect}.join("\n"))
	end
	scan
    predict_and_complete(@inputPos)
    if @logger
      @logger.info("Items at #{@inputPos} after error handling:\n" +
                   @items.items_at_n(@inputPos).map {|item| item.inspect}.join("\n"))
    end
  else
	expected_terminals =
	  @items.items_at_n(@inputPos - 1).map { |item| item.rule.rhs[item.dot]}.find_all do |gs|
 gs.is_a? String or (gs.is_a? Class and gs.ancestors.include?(Token))
	  end.uniq

	pos, length = @scanner.current_match.start_pos, @scanner.current_match.length
	offending_input = @scanner.input[pos, length].inspect
	raise ParseException.new(expected_terminals, offending_input, pos)
  end
end

#is_nonterminal?(symbol) ⇒ Boolean

Returns:

  • (Boolean)


252
253
254
# File 'lib/kanocc/earley.rb', line 252

def is_nonterminal?(symbol)
  symbol.respond_to?(:rules)
end

#make_parse(item, pos, prev_pos) ⇒ Object

FIXME Generates stack overflow when files are large.

15000-2000 inputsymbols with the calculator syntax.

Should be rewritten to something non-recursive



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
# File 'lib/kanocc/earley.rb', line 191

def make_parse(item, pos, prev_pos)
  return if item.dot <= 0

  prev_item = @items.find(item.rule, item.dot - 1, item.j, prev_pos)
  prev_prev_pos = prev_item.rule.derives_right ? prev_item.prev_pos_min : prev_item.prev_pos_max
  
  if is_nonterminal?(item.symbol_before_dot)
    subitem, sub_prev_pos = pick_subitem(item.symbol_before_dot, pos, prev_pos)
    make_parse(prev_item, prev_pos, prev_prev_pos)
    make_parse(subitem, pos, sub_prev_pos)
    @kanocc.report_reduction(subitem.rule)
  else
    make_parse(prev_item, prev_pos, prev_prev_pos)
    symbol = item.symbol_before_dot
    @kanocc.report_token(@input_symbols[pos], symbol)
  end
end

#parse(scanner) ⇒ Object



59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/kanocc/earley.rb', line 59

def parse(scanner)
  @scanner = scanner
  prepare
  
 while (@scanner.next_match!) do
    @inputPos += 1
    @input_symbols.push(scanner.current_match)
    @items.prepare_for_n(@inputPos)
    # scan, predict and complete until no more can be added

    scan

    predict_and_complete(@inputPos)

    if @logger
      @logger.info("\nItems at #{@inputPos}:\n" +
                   @input_symbols[@inputPos].inspect + "\n" +
                   @items.items_at_n(@inputPos).map{|item| " " + item.inspect}.join("\n") + "\n")
    end

    handle_error if @items.number_at_n(@inputPos) == 0
  end

  reduce
end

#pick_subitem(nonterminal, pos, prev_pos) ⇒ Object



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/kanocc/earley.rb', line 209

def pick_subitem(nonterminal, pos, prev_pos)
  #debugger
  items = @items.full_items_by_lhs_j_and_n(nonterminal, prev_pos, pos)

  raise "pick_subitem could not find any items" if items.size <= 0
  items = find_highest(items) {|item| precedence(item)}

  derives_right = all_derives_right(items)
  if derives_right
    items = find_highest(items) {|item| -item.prev_pos_min}
  else
    items = find_highest(items){|item| item.prev_pos_max}
  end

  return items[0], derives_right ? items[0].prev_pos_min : items[0].prev_pos_max
end

#precedence(item) ⇒ Object



241
242
243
# File 'lib/kanocc/earley.rb', line 241

def precedence(item)
  item.rule.precedence || 0
end

#predict_and_complete(pos, show = false) ⇒ Object

Predict: For any item of form [A -> a*Bb, j, n] and for all rules of form B -> c, add [B -> *c, n, n].

Complete: Given an item of form [A->X*, j, n], find all items of form [B -> a*Ab, i, j], and add [B -> aA*b, i, n].

Predict and complete until nothing further can be added.



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/kanocc/earley.rb', line 120

def predict_and_complete(pos, show=false)
  prev_size = 0
  while true do
    break if prev_size >= @items.number_at_n(pos)
    prev_size = @items.number_at_n(pos)
    @items.items_at_n(pos).each do |item|
      if item.dot >= item.rule.rhs.length
        # complete
        @items.items_n_and_symbol_after_dot(item.j, item.rule.lhs).each do |previtem|
          @items.add(previtem.rule, previtem.dot + 1, previtem.j, pos, item.j)
        end
      elsif item.rule.rhs[item.dot].respond_to?(:rules)
        # predict
        item.rule.rhs[item.dot].rules.each do |rule|
          @items.add(rule, 0, pos, pos, -1)
        end
      end
    end
  end
end

#prepareObject



85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/kanocc/earley.rb', line 85

def prepare
  @items = ItemSet.new
  @inputPos = 0
  @input_symbols = [nil]
  @recoveryPoints = []
  @start_symbol.rules.each do |rule|
    @items.add(rule, 0, 0, 0, -1)
  end
  predict_and_complete(0)
  if @logger
    @logger.info("\nItems at 0:\n" +
                 @items.items_at_n(0).map{|item| " " + item.inspect}.join("\n") + "\n")
  end
end

#reduceObject



176
177
178
179
180
181
182
183
184
185
186
# File 'lib/kanocc/earley.rb', line 176

def reduce
  item = @items.items_at_n(@inputPos).find do |item|
    @start_symbol == item.rule.lhs and item.dot == 1
  end
  if item
    # There is at most one of those
    make_parse(item, @inputPos, 0)
  else
    raise(KanoccException, "It didn't parse")
  end
end

#scanObject

Scan: At position n, for each terminal a in current match, and each item of form [A -> x*ay, i, n-1], add [A -> xa*y, i, n]



102
103
104
105
106
107
108
109
110
# File 'lib/kanocc/earley.rb', line 102

def scan

  @scanner.current_match.terminals.each do |terminal|
    @items.items_n_and_symbol_after_dot(@inputPos -1, terminal).each do |item|
       @items.add(item.rule, item.dot + 1, item.j, @inputPos,  @inputPos - 1)
    end
  end

end

#start_symbol=(start_symbol) ⇒ Object



49
50
51
52
53
54
55
56
# File 'lib/kanocc/earley.rb', line 49

def start_symbol=(start_symbol)   
  @start_symbol = Class.new(StartSymbol) do
    def self.to_s
     "S'"
    end
    rule(start_symbol)
  end 
end