Class: Dhaka::LexerSupport::DFA

Inherits:
StateMachine show all
Defined in:
lib/dhaka/lexer/dfa.rb

Overview

:nodoc:

Instance Attribute Summary

Attributes inherited from StateMachine

#start_state

Instance Method Summary collapse

Methods inherited from StateMachine

#to_dot

Constructor Details

#initialize(regex) ⇒ DFA

Returns a new instance of DFA.



26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/dhaka/lexer/dfa.rb', line 26

def initialize(regex)
  @regex = regex

  tokenize_result = RegexTokenizer.tokenize(@regex)
  raise InvalidRegexException.new(tokenize_error_message(tokenize_result)) if tokenize_result.has_error?
  
  parse_result = RegexParser.parse(tokenize_result)
  raise InvalidRegexException.new(parse_error_message(parse_result)) if parse_result.has_error?

  ast = parse_result
  ast.calculate_follow_sets

  super(ItemSet.new(ast.first))
end

Instance Method Details

#dest_key_for(key, char) ⇒ Object



54
55
56
57
58
59
60
# File 'lib/dhaka/lexer/dfa.rb', line 54

def dest_key_for key, char
  result = ItemSet.new
  key.each do |position|
    result.merge(position.follow_set) if position.character == char
  end
  result
end

#match(input) ⇒ Object



83
84
85
# File 'lib/dhaka/lexer/dfa.rb', line 83

def match(input)
  DFARun.new(self, input).match
end

#new_state_for_key(key) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/dhaka/lexer/dfa.rb', line 62

def new_state_for_key key
  accepting = key.detect {|position| position.accepting} 
  if accepting
    new_state = State.new(self, accepting.action(@regex))
  else
    new_state = State.new(self)
  end
  if key.any? {|position| position.checkpoint}
    new_state.checkpoint_actions << CheckpointAction.new(@regex)
  end
  new_state
end

#parse_error_message(parse_result) ⇒ Object



45
46
47
48
49
50
51
52
# File 'lib/dhaka/lexer/dfa.rb', line 45

def parse_error_message(parse_result)
  unexpected_token = parse_result.unexpected_token
  if unexpected_token.symbol_name == END_SYMBOL_NAME
    "Unexpected end of regex."
  else
    "Unexpected token #{parse_result.unexpected_token.symbol_name}: #{@regex.dup.insert(parse_result.unexpected_token.input_position, '>>>')}"
  end
end

#tokenize_error_message(tokenize_result) ⇒ Object



41
42
43
# File 'lib/dhaka/lexer/dfa.rb', line 41

def tokenize_error_message(tokenize_result)
  "Invalid character #{@regex[tokenize_result.unexpected_char_index].chr}: #{@regex.dup.insert(tokenize_result.unexpected_char_index, '>>>')}"
end

#transition_characters(key) ⇒ Object



75
76
77
78
79
80
81
# File 'lib/dhaka/lexer/dfa.rb', line 75

def transition_characters key
  result = Set.new
  key.each do |node|
    result << node.character unless (node.accepting || node.checkpoint)
  end
  result
end