Class: Kleene::OnlineDFA
- Inherits:
-
Object
- Object
- Kleene::OnlineDFA
- Includes:
- DSL
- Defined in:
- lib/kleene/online_dfa.rb
Instance Attribute Summary collapse
-
#composite_dfa ⇒ Object
: DFA.
-
#composite_nfa ⇒ Object
: NFA.
-
#dead_end_nfa_state_to_dead_end_nfa ⇒ Object
: Hash(State, NFA).
-
#dfa_to_index ⇒ Object
: Hash(DFA, Int32).
-
#machines_by_index ⇒ Object
: Hash(Int32, MachineTuple).
-
#nfa_to_index ⇒ Object
: Hash(NFA, Int32).
-
#nfa_with_dead_err_to_index ⇒ Object
: Hash(NFA, Int32).
-
#nfas_with_err_state ⇒ Object
readonly
Returns the value of attribute nfas_with_err_state.
Instance Method Summary collapse
-
#create_composite_nfa(nfas) ⇒ Object
create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA’s start state.
-
#ingest(input, debug = false) ⇒ Object
#ingest(input) is the online-style matching interface.
-
#initialize(nfas) ⇒ OnlineDFA
constructor
A new instance of OnlineDFA.
-
#machines_from_dfa(dfa) ⇒ Object
: MachineTuple.
-
#machines_from_nfa(nfa) ⇒ Object
: MachineTuple.
-
#machines_from_nfa_with_dead_err(nfa_with_dead_err) ⇒ Object
: MachineTuple.
- #matches ⇒ Object
-
#reset ⇒ Object
: OnlineMatchTracker.
- #setup_callbacks(dfa) ⇒ Object
Methods included from DSL
#any, #append, #append!, #dot, #kleene, #literal, #optional, #plus, #range, #seq, #seq2, #union, #union!, #with_err, #with_err!, #with_err_dead_end, #with_err_dead_end!
Constructor Details
#initialize(nfas) ⇒ OnlineDFA
Returns a new instance of OnlineDFA.
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/kleene/online_dfa.rb', line 29 def initialize(nfas) composite_alphabet = nfas.reduce(Set.new) {|memo, nfa| memo | nfa.alphabet } @original_nfas = nfas @nfas_with_err_state = nfas.map {|nfa| with_err_dead_end(nfa, composite_alphabet) } # copy NFAs and add dead-end error states to each of them dfas = @original_nfas.map(&:to_dfa) @nfa_to_index = @original_nfas.map.with_index {|nfa, index| [nfa, index] }.to_h @nfa_with_dead_err_to_index = @nfas_with_err_state.map.with_index {|nfa, index| [nfa, index] }.to_h @dfa_to_index = dfas.map.with_index {|dfa, index| [dfa, index] }.to_h @machines_by_index = @original_nfas.zip(nfas_with_err_state, dfas).map.with_index {|tuple, index| nfa, nfa_with_dead_err, dfa = tuple; [index, MachineTuple.new(nfa, nfa_with_dead_err, dfa)] }.to_h # build a mapping of (state -> nfa) pairs that capture which nfa owns each state @dead_end_nfa_state_to_dead_end_nfa = Hash.new @nfas_with_err_state.each do |nfa_with_dead_err| nfa_with_dead_err.states.each do |state| @dead_end_nfa_state_to_dead_end_nfa[state] = nfa_with_dead_err end end # create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA's start state @composite_nfa = create_composite_nfa(@nfas_with_err_state) @composite_dfa = @composite_nfa.to_dfa reset end |
Instance Attribute Details
#composite_dfa ⇒ Object
: DFA
22 23 24 |
# File 'lib/kleene/online_dfa.rb', line 22 def composite_dfa @composite_dfa end |
#composite_nfa ⇒ Object
: NFA
21 22 23 |
# File 'lib/kleene/online_dfa.rb', line 21 def composite_nfa @composite_nfa end |
#dead_end_nfa_state_to_dead_end_nfa ⇒ Object
: Hash(State, NFA)
20 21 22 |
# File 'lib/kleene/online_dfa.rb', line 20 def dead_end_nfa_state_to_dead_end_nfa @dead_end_nfa_state_to_dead_end_nfa end |
#dfa_to_index ⇒ Object
: Hash(DFA, Int32)
27 28 29 |
# File 'lib/kleene/online_dfa.rb', line 27 def dfa_to_index @dfa_to_index end |
#machines_by_index ⇒ Object
: Hash(Int32, MachineTuple)
24 25 26 |
# File 'lib/kleene/online_dfa.rb', line 24 def machines_by_index @machines_by_index end |
#nfa_to_index ⇒ Object
: Hash(NFA, Int32)
25 26 27 |
# File 'lib/kleene/online_dfa.rb', line 25 def nfa_to_index @nfa_to_index end |
#nfa_with_dead_err_to_index ⇒ Object
: Hash(NFA, Int32)
26 27 28 |
# File 'lib/kleene/online_dfa.rb', line 26 def nfa_with_dead_err_to_index @nfa_with_dead_err_to_index end |
#nfas_with_err_state ⇒ Object (readonly)
Returns the value of attribute nfas_with_err_state.
19 20 21 |
# File 'lib/kleene/online_dfa.rb', line 19 def nfas_with_err_state @nfas_with_err_state end |
Instance Method Details
#create_composite_nfa(nfas) ⇒ Object
create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA’s start state
69 70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/kleene/online_dfa.rb', line 69 def create_composite_nfa(nfas) nfa = union!(nfas) # add epsilon transitions from all the states except the start state back to the start state nfa.states.each do |state| if state != nfa.start_state nfa.add_transition(NFATransition::Epsilon, state, nfa.start_state) end end nfa.update_final_states nfa end |
#ingest(input, debug = false) ⇒ Object
#ingest(input) is the online-style matching interface
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/kleene/online_dfa.rb', line 92 def ingest(input, debug = false) # : Hash(NFA, Array(MatchRef)) mt = @match_tracker start_index_of_input_fragment_in_buffer = @buffer.length input.each_char.with_index do |char, index| @active_composite_dfa.handle_token!(char, start_index_of_input_fragment_in_buffer + index) end @buffer << input start_index_to_nfas_that_may_match = mt.invert_candidate_match_start_positions mt.empty_matches.each do |nfa_with_dead_err, indices| original_nfa = machines_from_nfa_with_dead_err(nfa_with_dead_err).nfa indices.select {|index| index >= start_index_of_input_fragment_in_buffer }.each do |index| mt.add_match(original_nfa, MatchRef.new(@buffer, index...index)) end end input.each_char.with_index do |char, index| index_in_buffer = start_index_of_input_fragment_in_buffer + index @active_candidate_dfas.reject! do |active_dfa_tuple| dfa_clone, original_nfa, start_of_match_index = active_dfa_tuple dfa_clone.handle_token!(char, index_in_buffer) mt.add_match(original_nfa, MatchRef.new(@buffer, start_of_match_index..index_in_buffer)) if dfa_clone.accept? dfa_clone.error? end if nfas_with_dead_err = start_index_to_nfas_that_may_match[index_in_buffer] nfas_with_dead_err.each do |nfa_with_dead_err| machines = machines_from_nfa_with_dead_err(nfa_with_dead_err) original_nfa = machines.nfa dfa = machines.dfa dfa_clone = dfa.shallow_clone dfa_clone.handle_token!(char, index_in_buffer) mt.add_match(original_nfa, MatchRef.new(@buffer, index_in_buffer..index_in_buffer)) if dfa_clone.accept? @active_candidate_dfas << [dfa_clone, original_nfa, index_in_buffer] unless dfa_clone.error? end end end matches end |
#machines_from_dfa(dfa) ⇒ Object
: MachineTuple
64 65 66 |
# File 'lib/kleene/online_dfa.rb', line 64 def machines_from_dfa(dfa) # : MachineTuple machines_by_index[dfa_to_index[dfa]] end |
#machines_from_nfa(nfa) ⇒ Object
: MachineTuple
56 57 58 |
# File 'lib/kleene/online_dfa.rb', line 56 def machines_from_nfa(nfa) # : MachineTuple machines_by_index[nfa_to_index[nfa]] end |
#machines_from_nfa_with_dead_err(nfa_with_dead_err) ⇒ Object
: MachineTuple
60 61 62 |
# File 'lib/kleene/online_dfa.rb', line 60 def machines_from_nfa_with_dead_err(nfa_with_dead_err) # : MachineTuple machines_by_index[nfa_with_dead_err_to_index[nfa_with_dead_err]] end |
#matches ⇒ Object
142 143 144 |
# File 'lib/kleene/online_dfa.rb', line 142 def matches @match_tracker.matches end |
#reset ⇒ Object
: OnlineMatchTracker
84 85 86 87 88 89 |
# File 'lib/kleene/online_dfa.rb', line 84 def reset # : OnlineMatchTracker @active_composite_dfa = @composite_dfa.deep_clone @active_candidate_dfas = [] @match_tracker = setup_callbacks(@active_composite_dfa) @buffer = "" end |
#setup_callbacks(dfa) ⇒ Object
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 |
# File 'lib/kleene/online_dfa.rb', line 146 def setup_callbacks(dfa) match_tracker = OnlineMatchTracker.new # 1. identify DFA states that correspond to successful match of first character of the NFAs epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state) nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = composite_nfa.transitions_from(epsilon_closure_of_nfa_start_state). reject {|transition| transition.epsilon? || transition.to.error? }. map(&:to).to_set dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa. compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dfa_state_to_dead_end_nfas_that_have_matched_their_first_character = Hash.new dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.each do |dfa_state| dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # 2. identify DFA states that correspond to final states in the NFAs nfa_final_states = @nfas_with_err_state.map(&:final_states).reduce(Set.new) {|memo, state_set| memo | state_set } dfa_states_that_correspond_to_nfa_final_states = nfa_final_states.compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dead_end_nfas_that_have_transitioned_to_final_state = Hash.new dfa_states_that_correspond_to_nfa_final_states.each do |dfa_state| dead_end_nfas_that_have_transitioned_to_final_state[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_final_states.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # 3. Identify DFA states that correspond to successful match without even having seen any characters. # These are cases where the NFA's start state is a final state or can reach a final state by following only epsilon transitions. nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state = epsilon_closure_of_nfa_start_state.select(&:final?).to_set dfa_states_that_represent_both_start_states_and_final_states = nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state. compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters = Hash.new dfa_states_that_represent_both_start_states_and_final_states.each do |dfa_state| dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # set up call transition call backs, since the callbacks may only be defined once per state and transition # For (1): # Set up transition callbacks to push the index position of the start of a match of each NFA that has begun # to be matched on the transition to one of the states in (1) # For (2): # set up transition callbacks to push the index position of the end of a successful match onto the list # of successful matches for the NFA that matched # For (3): # set up transision callbacks to capture successful empty matches destination_dfa_states_for_callbacks = dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa | dfa_states_that_correspond_to_nfa_final_states destination_dfa_states_for_callbacks.each do |dfa_state| dfa.on_transition_to(dfa_state) do |transition, token, token_index| destination_dfa_state = transition.to should_track_empty_match = dfa_states_that_represent_both_start_states_and_final_states.includes?(destination_dfa_state) should_track_start_of_candidate_match = should_track_empty_match || dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(destination_dfa_state) should_track_end_of_match = dfa_states_that_correspond_to_nfa_final_states.includes?(destination_dfa_state) if should_track_empty_match dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state].each do |nfa_with_dead_end| match_tracker.add_empty_match(nfa_with_dead_end, token_index) end end if should_track_start_of_candidate_match nfas_that_matched_first_character = dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[destination_dfa_state] || Set.new nfas_that_matched_empty_match = dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state] || Set.new dead_end_nfas_that_are_starting_to_match = nfas_that_matched_first_character | nfas_that_matched_empty_match dead_end_nfas_that_are_starting_to_match.each do |nfa_with_dead_end| match_tracker.add_start_of_candidate_match(nfa_with_dead_end, token_index) end end if should_track_end_of_match dead_end_nfas_that_have_transitioned_to_final_state[destination_dfa_state].each do |nfa_with_dead_end| match_tracker.add_end_of_match(nfa_with_dead_end, token_index) end end end end match_tracker end |