Class: Kleene::NFA
- Inherits:
-
Object
- Object
- Kleene::NFA
- Defined in:
- lib/kleene/nfa.rb
Instance Attribute Summary collapse
-
#alphabet ⇒ Object
: Set(Char).
-
#current_states ⇒ Object
: Set(State).
-
#final_states ⇒ Object
: Set(State).
-
#start_state ⇒ Object
: State.
-
#states ⇒ Object
: Set(State).
-
#transitions ⇒ Object
: Hash(State, Hash(Char, Set(NFATransition))).
Instance Method Summary collapse
- #accept? ⇒ Boolean
- #add_state(new_state) ⇒ Object
- #add_states(states) ⇒ Object
- #add_transition(token, from_state, to_state) ⇒ Object
-
#all_transitions ⇒ Object
: Array(NFATransition).
- #deep_clone ⇒ Object
-
#epsilon_closure(state_set) ⇒ Object
Determine the epsilon closure of the given state set That is, determine what states are reachable on an epsilon transition from the current state set (@current_states).
- #error_states ⇒ Object
- #graphviz ⇒ Object
-
#handle_token!(input_token) ⇒ Object
process another input token.
-
#initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, initial_states = nil) ⇒ NFA
constructor
A new instance of NFA.
-
#match?(input) ⇒ Boolean
: MatchRef?.
-
#matches(input) ⇒ Object
Returns an array of matches found anywhere in the input string.
-
#matches_at_offset(input, input_start_offset) ⇒ Object
Returns an array of matches found in the input string, each of which begins at the offset input_start_offset.
- #next_states(state_set, input_token) ⇒ Object
-
#reachable_states(start_state) ⇒ Object
Returns a set of State objects which are reachable through any transition path from the NFA’s start_state.
- #regex_pattern ⇒ Object
- #remove_state(state) ⇒ Object
- #reset_current_states ⇒ Object
- #set_regex_pattern(pattern) ⇒ Object
-
#to_dfa ⇒ Object
This implements the subset construction algorithm presented on page 118 of the first edition of the dragon book.
- #to_s(verbose = false) ⇒ Object
-
#transitions_from(state_set) ⇒ Object
def transitions_from(state) # : Set(NFATransition) @transitions&.values&.reduce {|memo, set_of_transisions| memo | set_of_transisions} || Set.new end.
- #update_final_states ⇒ Object
Constructor Details
#initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, initial_states = nil) ⇒ NFA
Returns a new instance of NFA.
33 34 35 36 37 38 39 40 41 42 43 44 45 |
# File 'lib/kleene/nfa.rb', line 33 def initialize(start_state, alphabet = DEFAULT_ALPHABET, transitions = Hash.new, initial_states = nil) @start_state = start_state @transitions = transitions @alphabet = alphabet + all_transitions.map(&:token) @states = initial_states || reachable_states(start_state) @current_states = Set.new @final_states = Set.new update_final_states reset_current_states end |
Instance Attribute Details
#alphabet ⇒ Object
: Set(Char)
25 26 27 |
# File 'lib/kleene/nfa.rb', line 25 def alphabet @alphabet end |
#current_states ⇒ Object
: Set(State)
29 30 31 |
# File 'lib/kleene/nfa.rb', line 29 def current_states @current_states end |
#final_states ⇒ Object
: Set(State)
30 31 32 |
# File 'lib/kleene/nfa.rb', line 30 def final_states @final_states end |
#start_state ⇒ Object
: State
27 28 29 |
# File 'lib/kleene/nfa.rb', line 27 def start_state @start_state end |
#states ⇒ Object
: Set(State)
26 27 28 |
# File 'lib/kleene/nfa.rb', line 26 def states @states end |
#transitions ⇒ Object
: Hash(State, Hash(Char, Set(NFATransition)))
28 29 30 |
# File 'lib/kleene/nfa.rb', line 28 def transitions @transitions end |
Instance Method Details
#accept? ⇒ Boolean
176 177 178 |
# File 'lib/kleene/nfa.rb', line 176 def accept? @current_states.any?(&:final?) end |
#add_state(new_state) ⇒ Object
97 98 99 |
# File 'lib/kleene/nfa.rb', line 97 def add_state(new_state) @states << new_state end |
#add_states(states) ⇒ Object
101 102 103 |
# File 'lib/kleene/nfa.rb', line 101 def add_states(states) @states.merge(states) end |
#add_transition(token, from_state, to_state) ⇒ Object
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# File 'lib/kleene/nfa.rb', line 110 def add_transition(token, from_state, to_state) # # make sure states EITHER have a single outbound epsilon transition OR non-epsilon outbound transitions; they can't have both # if token == NFATransition::Epsilon # # make sure from_state doesn't have any outbound non-epsilon transitions # raise "Error: Non-epsilon transitions are already present on #{from_state.to_s}! States may EITHER have a single outbound epsilon transision OR have outbound non-epsilon transitions, but not both." if transitions_from(from_state).any? {|t| !t.epsilon? } # else # # make sure from_state doesn't have any outbound epsilon transition # raise "Error: Epsilon transitions are already present on #{from_state.to_s}! States may EITHER have a single outbound epsilon transision OR have outbound non-epsilon transitions, but not both." if transitions_from(from_state).any?(&:epsilon?) # end @alphabet << token # alphabet is a set, so there will be no duplications @states << from_state @states << to_state new_transition = NFATransition.new(token, from_state, to_state) char_transition_map = @transitions[from_state] ||= Hash.new set_of_transisions = char_transition_map[token] ||= Set.new set_of_transisions << new_transition new_transition end |
#all_transitions ⇒ Object
: Array(NFATransition)
47 48 49 |
# File 'lib/kleene/nfa.rb', line 47 def all_transitions() # : Array(NFATransition) transitions.flat_map {|state, char_transition_map| char_transition_map.values.flat_map(&:to_a) } end |
#deep_clone ⇒ Object
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/kleene/nfa.rb', line 66 def deep_clone old_states = @states.to_a new_states = old_states.map(&:dup) state_mapping = old_states.zip(new_states).to_h new_transitions = transitions.map {|state, char_transition_map| [ state_mapping[state], char_transition_map.map {|char, set_of_transisions| [ char, set_of_transisions.map {|transition| NFATransition.new(transition.token, state_mapping[transition.from], state_mapping[transition.to])}.to_set ] }.to_h ] }.to_h NFA.new(state_mapping[@start_state], @alphabet.clone, new_transitions, new_states.to_set).set_regex_pattern(regex_pattern) end |
#epsilon_closure(state_set) ⇒ Object
Determine the epsilon closure of the given state set That is, determine what states are reachable on an epsilon transition from the current state set (@current_states). Returns a Set of State objects.
202 203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/kleene/nfa.rb', line 202 def epsilon_closure(state_set) # : Set(State) state_set = state_set.is_a?(State) ? Set[state_set] : state_set visited_states = Set.new() unvisited_states = state_set while !unvisited_states.empty? epsilon_transitions = unvisited_states.compact_map {|state| @transitions.dig(state, NFATransition::Epsilon) }.flat_map(&:to_a) destination_states = epsilon_transitions.map(&:to).to_set visited_states.merge(unvisited_states) # add the unvisited states to the visited_states unvisited_states = destination_states - visited_states end visited_states end |
#error_states ⇒ Object
93 94 95 |
# File 'lib/kleene/nfa.rb', line 93 def error_states @states.select(&:error?).to_set end |
#graphviz ⇒ Object
267 268 269 270 271 272 273 274 275 276 277 278 |
# File 'lib/kleene/nfa.rb', line 267 def graphviz retval = "digraph G { " all_transitions.each do |t| transition_label = t.epsilon? ? "ε" : t.token retval += "#{t.from.id} -> #{t.to.id} [label=\"#{transition_label}\"];" end @final_states.each do |s| retval += "#{s.id} [color=lightblue2, style=filled, shape=doublecircle];" end retval += " }" retval end |
#handle_token!(input_token) ⇒ Object
process another input token
172 173 174 |
# File 'lib/kleene/nfa.rb', line 172 def handle_token!(input_token) @current_states = next_states(@current_states, input_token) end |
#match?(input) ⇒ Boolean
: MatchRef?
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# File 'lib/kleene/nfa.rb', line 154 def match?(input) # : MatchRef? # puts "match?(\"#{input}\")" # puts self.to_s reset_current_states # puts @current_states.map(&:id) input.each_char.with_index do |char, index| # puts char handle_token!(char) # puts @current_states.map(&:id) end if accept? MatchRef.new(input, 0...input.size) end end |
#matches(input) ⇒ Object
Returns an array of matches found anywhere in the input string
148 149 150 151 152 |
# File 'lib/kleene/nfa.rb', line 148 def matches(input) (0...input.size).reduce([]) do |memo, offset| memo + matches_at_offset(input, offset) end end |
#matches_at_offset(input, input_start_offset) ⇒ Object
Returns an array of matches found in the input string, each of which begins at the offset input_start_offset
133 134 135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/kleene/nfa.rb', line 133 def matches_at_offset(input, input_start_offset) reset_current_states matches = [] (input_start_offset...input.size).each do |offset| token = input[offset] handle_token!(token) if accept? matches << MatchRef.new(input, input_start_offset..offset) end end matches end |
#next_states(state_set, input_token) ⇒ Object
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
# File 'lib/kleene/nfa.rb', line 180 def next_states(state_set, input_token) # Retrieve a list of states in the epsilon closure of the given state set epsilon_reachable_states = epsilon_closure(state_set) # puts "epsilon_reachable_states = #{epsilon_reachable_states.map(&:id)}" # Build an array of outbound transitions from each state in the epsilon-closure # Filter the outbound transitions, selecting only those that accept the input we are given. outbound_transitions = epsilon_reachable_states.compact_map {|state| @transitions.dig(state, input_token) }.flat_map(&:to_a) # puts "outbound_transitions = #{outbound_transitions.inspect}" # Build an array of epsilon-closures of each transition's destination state. destination_state_epsilon_closures = outbound_transitions.map {|transition| epsilon_closure(transition.to) } # Union each of the epsilon-closures (each is a set) together to form a flat array of states in the epsilon-closure of all of our current states. next_states = destination_state_epsilon_closures.reduce {|combined_state_set, individual_state_set| combined_state_set.merge(individual_state_set) } next_states || Set.new end |
#reachable_states(start_state) ⇒ Object
Returns a set of State objects which are reachable through any transition path from the NFA’s start_state.
216 217 218 219 220 221 222 223 224 225 226 |
# File 'lib/kleene/nfa.rb', line 216 def reachable_states(start_state) visited_states = Set.new() unvisited_states = Set[start_state] while !unvisited_states.empty? outbound_transitions = unvisited_states.flat_map {|state| @transitions[state]&.values&.flat_map(&:to_a) || Array.new } destination_states = outbound_transitions.map(&:to).to_set visited_states.merge(unvisited_states) # add the unvisited states to the visited_states unvisited_states = destination_states - visited_states end visited_states end |
#regex_pattern ⇒ Object
299 300 301 |
# File 'lib/kleene/nfa.rb', line 299 def regex_pattern @regex_pattern || "<<empty>>" end |
#remove_state(state) ⇒ Object
105 106 107 108 |
# File 'lib/kleene/nfa.rb', line 105 def remove_state(state) raise "Unable to remove state from NFA: at least one transition leads to or from the state." if all_transitions.any? {|transition| transition.from == state || transition.to == state } @states.delete(state) end |
#reset_current_states ⇒ Object
89 90 91 |
# File 'lib/kleene/nfa.rb', line 89 def reset_current_states @current_states = epsilon_closure(@start_state) end |
#set_regex_pattern(pattern) ⇒ Object
294 295 296 297 |
# File 'lib/kleene/nfa.rb', line 294 def set_regex_pattern(pattern) @regex_pattern = pattern self end |
#to_dfa ⇒ Object
This implements the subset construction algorithm presented on page 118 of the first edition of the dragon book. I found a similar explanation at: web.cecs.pdx.edu/~harry/compilers/slides/LexicalPart3.pdf
230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 |
# File 'lib/kleene/nfa.rb', line 230 def to_dfa state_map = Hash.new # this map contains (nfa_state_set => dfa_state) pairs dfa_transitions = Hash.new dfa_alphabet = @alphabet - Set[NFATransition::Epsilon] visited_state_sets = Set.new() nfa_start_state_set = epsilon_closure(@start_state) unvisited_state_sets = Set[nfa_start_state_set] dfa_start_state = State.new(nfa_start_state_set.any?(&:final?), nfa_start_state_set.any?(&:error?)) state_map[nfa_start_state_set] = dfa_start_state until unvisited_state_sets.empty? # take one of the unvisited state sets state_set = unvisited_state_sets.first current_dfa_state = state_map[state_set] # Figure out the set of next-states for each token in the alphabet # Add each set of next-states to unvisited_state_sets dfa_alphabet.each do |token| next_nfa_state_set = next_states(state_set, token) unvisited_state_sets << next_nfa_state_set # this new DFA state, next_dfa_state, represents the next nfa state set, next_nfa_state_set next_dfa_state = state_map[next_nfa_state_set] ||= State.new(next_nfa_state_set.any?(&:final?), next_nfa_state_set.any?(&:error?)) char_transition_map = dfa_transitions[current_dfa_state] ||= Hash.new char_transition_map[token] = DFATransition.new(token, current_dfa_state, next_dfa_state) end visited_state_sets << state_set unvisited_state_sets = unvisited_state_sets - visited_state_sets end # `state_map.invert` is sufficient to convert from a (nfa_state_set => dfa_state) mapping to a (dfa_state => nfa_state_set) mapping, because the mappings are strictly one-to-one. DFA.new(state_map[nfa_start_state_set], dfa_alphabet, dfa_transitions, state_map.invert, origin_nfa: self).set_regex_pattern(regex_pattern) end |
#to_s(verbose = false) ⇒ Object
280 281 282 283 284 285 286 287 288 289 290 291 292 |
# File 'lib/kleene/nfa.rb', line 280 def to_s(verbose = false) if verbose retval = states.map(&:to_s).join("\n") retval += "\n" all_transitions.each do |t| transition_label = t.epsilon? ? "epsilon" : t.token retval += "#{t.from.id} -> #{transition_label} -> #{t.to.id}\n" end retval else regex_pattern end end |
#transitions_from(state_set) ⇒ Object
def transitions_from(state) # : Set(NFATransition)
@transitions[state]&.values&.reduce {|memo, set_of_transisions| memo | set_of_transisions} || Set.new
end
54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/kleene/nfa.rb', line 54 def transitions_from(state_set) # : Set(NFATransition) case state_set when State @transitions[state_set]&.values&.reduce {|memo, set_of_transisions| memo | set_of_transisions} || Set.new when Set state_set.map {|state| transitions_from(state) }.reduce {|memo, state_set| memo | state_set } else raise "boom" end end |
#update_final_states ⇒ Object
85 86 87 |
# File 'lib/kleene/nfa.rb', line 85 def update_final_states @final_states = @states.select { |s| s.final? }.to_set end |