Class: Kleene::DFA

Inherits:
Object
  • Object
show all
Defined in:
lib/kleene/dfa.rb

Overview

->(transition : DFATransition, token : Char, token_index : Int32) : Nil { … } alias DFATransitionCallback = Proc(DFATransition, Char, Int32, Nil)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, dfa_state_to_nfa_state_sets = Hash.new, transition_callbacks = nil, origin_nfa: nil) ⇒ DFA

Returns a new instance of DFA.



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/kleene/dfa.rb', line 36

def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, dfa_state_to_nfa_state_sets = Hash.new, transition_callbacks = nil, origin_nfa: nil)
  @start_state = start_state
  @current_state = start_state
  @transitions = transitions
  @dfa_state_to_nfa_state_sets = dfa_state_to_nfa_state_sets

  @alphabet = alphabet + all_transitions.map(&:token)

  @states = reachable_states(@start_state)
  @final_states = Set.new

  @nfa_state_to_dfa_state_sets = Hash.new
  @dfa_state_to_nfa_state_sets.each do |dfa_state, nfa_state_set|
    nfa_state_set.each do |nfa_state|
      dfa_state_set = @nfa_state_to_dfa_state_sets[nfa_state] ||= Set.new
      dfa_state_set << dfa_state
    end
  end

  @transition_callbacks = transition_callbacks || Hash.new
  @transition_callbacks_per_destination_state = Hash.new

  @origin_nfa = origin_nfa

  update_final_states
  reset_current_state
end

Instance Attribute Details

#alphabetObject

: Set(Char)



22
23
24
# File 'lib/kleene/dfa.rb', line 22

def alphabet
  @alphabet
end

#current_stateObject

: State



25
26
27
# File 'lib/kleene/dfa.rb', line 25

def current_state
  @current_state
end

#dfa_state_to_nfa_state_setsObject

: Hash(State, Set(State)) # this map contains (dfa_state => nfa_state_set) pairs



28
29
30
# File 'lib/kleene/dfa.rb', line 28

def dfa_state_to_nfa_state_sets
  @dfa_state_to_nfa_state_sets
end

#final_statesObject

: Set(State)



27
28
29
# File 'lib/kleene/dfa.rb', line 27

def final_states
  @final_states
end

#nfa_state_to_dfa_state_setsObject

: Hash(State, Set(State)) # this map contains (nfa_state => dfa_state_set) pairs



29
30
31
# File 'lib/kleene/dfa.rb', line 29

def nfa_state_to_dfa_state_sets
  @nfa_state_to_dfa_state_sets
end

#start_stateObject

: State



24
25
26
# File 'lib/kleene/dfa.rb', line 24

def start_state
  @start_state
end

#statesObject

: Set(State)



23
24
25
# File 'lib/kleene/dfa.rb', line 23

def states
  @states
end

#transition_callbacksObject

: Hash(DFATransition, DFATransitionCallback)



30
31
32
# File 'lib/kleene/dfa.rb', line 30

def transition_callbacks
  @transition_callbacks
end

#transition_callbacks_per_destination_stateObject

: Hash(State, DFATransitionCallback)



31
32
33
# File 'lib/kleene/dfa.rb', line 31

def transition_callbacks_per_destination_state
  @transition_callbacks_per_destination_state
end

#transitionsObject

: Hash(State, Hash(Char, DFATransition))



26
27
28
# File 'lib/kleene/dfa.rb', line 26

def transitions
  @transitions
end

Instance Method Details

#accept?Boolean

Returns:

  • (Boolean)


175
176
177
# File 'lib/kleene/dfa.rb', line 175

def accept?
  @current_state.final?
end

#add_transition(token, from_state, to_state) ⇒ Object



128
129
130
131
132
133
134
# File 'lib/kleene/dfa.rb', line 128

def add_transition(token, from_state, to_state)
  @alphabet << token      # alphabet is a set, so there will be no duplications
  @states << to_state     # states is a set, so there will be no duplications (to_state should be the only new state)
  new_transition = DFATransition.new(token, from_state, to_state)
  @transitions[from_state][token] = new_transition
  new_transition
end

#all_transitionsObject

: Array(DFATransition)



76
77
78
# File 'lib/kleene/dfa.rb', line 76

def all_transitions() # : Array(DFATransition)
  transitions.flat_map {|state, char_transition_map| char_transition_map.values }
end

#clear_error_statesObject



72
73
74
# File 'lib/kleene/dfa.rb', line 72

def clear_error_states
  @error_states = nil
end

#deep_cloneObject

transition callbacks are not copied beacuse it is assumed that the state transition callbacks may be stateful and reference structures or states that only exist in ‘self`, but not the cloned copy.



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/kleene/dfa.rb', line 93

def deep_clone
  old_states = @states.to_a
  new_states = old_states.map(&:dup)
  state_mapping = old_states.zip(new_states).to_h
  transition_mapping = Hash.new
  new_transitions = transitions.map do |state, char_transition_map|
    [
      state_mapping[state],
      char_transition_map.map do |char, old_transition|
        new_transition = DFATransition.new(old_transition.token, state_mapping[old_transition.from], state_mapping[old_transition.to])
        transition_mapping[old_transition] = new_transition
        [char, new_transition]
      end.to_h
    ]
  end.to_h
  # new_transition_callbacks = transition_callbacks.map do |transition, callback|
  #   {
  #     transition_mapping[transition],
  #     callback
  #   }
  # end.to_h

  new_dfa_state_to_nfa_state_sets = dfa_state_to_nfa_state_sets.map {|dfa_state, nfa_state_set| [state_mapping[dfa_state], nfa_state_set] }.to_h

  DFA.new(state_mapping[@start_state], @alphabet.clone, new_transitions, new_dfa_state_to_nfa_state_sets, origin_nfa: origin_nfa).set_regex_pattern(regex_pattern)
end

#error?Boolean

Returns:

  • (Boolean)


179
180
181
# File 'lib/kleene/dfa.rb', line 179

def error?
  @current_state.error?
end

#error_statesObject



68
69
70
# File 'lib/kleene/dfa.rb', line 68

def error_states
  @error_states ||= @states.select {|s| s.error? }.to_set
end

#handle_token!(input_token, token_index) ⇒ Object

accept an input token and transition to the next state in the state machine



171
172
173
# File 'lib/kleene/dfa.rb', line 171

def handle_token!(input_token, token_index)
  @current_state = next_state(@current_state, input_token, token_index)
end

#match?(input) ⇒ Boolean

Returns:

  • (Boolean)


136
137
138
139
140
141
142
143
144
145
146
# File 'lib/kleene/dfa.rb', line 136

def match?(input)
  reset_current_state

  input.each_char.with_index do |char, index|
    handle_token!(char, index)
  end

  if accept?
    MatchRef.new(input, 0...input.size)
  end
end

#matches(input) ⇒ Object

Returns an array of matches found anywhere in the input string



164
165
166
167
168
# File 'lib/kleene/dfa.rb', line 164

def matches(input)
  (0...input.size).reduce([]) do |memo, offset|
    memo + matches_at_offset(input, offset)
  end
end

#matches_at_offset(input, input_start_offset) ⇒ Object

Returns an array of matches found in the input string, each of which begins at the offset input_start_offset



149
150
151
152
153
154
155
156
157
158
159
160
161
# File 'lib/kleene/dfa.rb', line 149

def matches_at_offset(input, input_start_offset)
  reset_current_state

  matches = []
  (input_start_offset...input.size).each do |offset|
    token = input[offset]
    handle_token!(token, offset)
    if accept?
      matches << MatchRef.new(input, input_start_offset..offset)
    end
  end
  matches
end

#minimize!Object

This is an implementation of the “Reducing a DFA to a Minimal DFA” algorithm presented here: web.cecs.pdx.edu/~harry/compilers/slides/LexicalPart4.pdf This implements Hopcroft’s algorithm as presented on page 142 of the first edition of the dragon book.



244
245
246
# File 'lib/kleene/dfa.rb', line 244

def minimize!
  # todo: I'll implement this when I need it
end

#next_state(from_state, input_token, token_index) ⇒ Object

this function transitions from state to state on an input token



197
198
199
200
201
202
203
204
205
# File 'lib/kleene/dfa.rb', line 197

def next_state(from_state, input_token, token_index)
  transition = @transitions[from_state][input_token] || raise("No DFA transition found. Input token #{input_token} not in DFA alphabet.")

  # invoke the relevant transition callback function
  transition_callbacks[transition].try {|callback_fn| callback_fn.call(transition, input_token, token_index) }
  transition_callbacks_per_destination_state[transition.to].try {|callback_fn| callback_fn.call(transition, input_token, token_index) }

  transition.to
end

#on_transition(transition, &blk) ⇒ Object



80
81
82
# File 'lib/kleene/dfa.rb', line 80

def on_transition(transition, &blk)
  @transition_callbacks[transition] = blk
end

#on_transition_to(state, &blk) ⇒ Object



84
85
86
# File 'lib/kleene/dfa.rb', line 84

def on_transition_to(state, &blk)
  @transition_callbacks_per_destination_state[state] = blk
end

#origin_nfaObject



64
65
66
# File 'lib/kleene/dfa.rb', line 64

def origin_nfa
  @origin_nfa || raise("This DFA was not created from an NFA, therefore it has no origin_nfa.")
end

#reachable_states(start_state) ⇒ Object

Returns a set of State objects which are reachable through any transition path from the DFA’s start_state.



208
209
210
211
212
213
214
215
216
217
218
# File 'lib/kleene/dfa.rb', line 208

def reachable_states(start_state)
  visited_states = Set.new()
  unvisited_states = Set[start_state]
  while !unvisited_states.empty?
    outbound_transitions = unvisited_states.flat_map {|state| @transitions[state].try(&:values) || Array.new }
    destination_states = outbound_transitions.map(&:to).to_set
    visited_states.merge(unvisited_states)         # add the unvisited states to the visited_states
    unvisited_states = destination_states - visited_states
  end
  visited_states
end

#regex_patternObject



253
254
255
# File 'lib/kleene/dfa.rb', line 253

def regex_pattern
  @regex_pattern || "<<empty>>"
end

#reset_current_stateObject



124
125
126
# File 'lib/kleene/dfa.rb', line 124

def reset_current_state
  @current_state = @start_state
end

#set_regex_pattern(pattern) ⇒ Object



248
249
250
251
# File 'lib/kleene/dfa.rb', line 248

def set_regex_pattern(pattern)
  @regex_pattern = pattern
  self
end

#shallow_cloneObject



88
89
90
# File 'lib/kleene/dfa.rb', line 88

def shallow_clone
  DFA.new(start_state, alphabet, transitions, dfa_state_to_nfa_state_sets, transition_callbacks, origin_nfa: origin_nfa).set_regex_pattern(regex_pattern)
end

#to_s(verbose = false) ⇒ Object

this is currently broken def to_nfa

dfa = self.deep_clone
NFA.new(dfa.start_state, dfa.alphabet.clone, dfa.transitions)
# todo: add all of this machine's transitions to the new machine
# @transitions.each {|t| nfa.add_transition(t.token, t.from, t.to) }
# nfa

end



229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/kleene/dfa.rb', line 229

def to_s(verbose = false)
  if verbose
    retval = states.map(&:to_s).join("\n")
    retval += "\n"
    all_transitions.each do |t|
      retval += "#{t.from.id} -> #{t.token} -> #{t.to.id}\n"
    end
    retval
  else
    regex_pattern
  end
end

#update_final_statesObject



120
121
122
# File 'lib/kleene/dfa.rb', line 120

def update_final_states
  @final_states = @states.select {|s| s.final? }.to_set
end