Class: Kleene::BatchMultiMatchDFA
- Inherits:
-
MultiMatchDFA
- Object
- MultiMatchDFA
- Kleene::BatchMultiMatchDFA
- Defined in:
- lib/kleene/multi_match_dfa.rb
Instance Attribute Summary
Attributes inherited from MultiMatchDFA
#composite_dfa, #composite_nfa, #dead_end_nfa_state_to_dead_end_nfa, #dfa_to_index, #machines_by_index, #nfa_to_index, #nfa_with_dead_err_to_index, #nfas_with_err_state
Instance Method Summary collapse
-
#match_tracker(input) ⇒ Object
: BatchMatchTracker.
-
#matches(input) ⇒ Object
#matches(input) is the batch-style matching interface.
- #setup_callbacks(dfa) ⇒ Object
Methods inherited from MultiMatchDFA
#create_composite_nfa, #initialize, #machines_from_dfa, #machines_from_nfa, #machines_from_nfa_with_dead_err
Methods included from DSL
#any, #append, #append!, #dot, #kleene, #literal, #optional, #plus, #range, #seq, #seq2, #union, #union!, #with_err, #with_err!, #with_err_dead_end, #with_err_dead_end!
Constructor Details
This class inherits a constructor from Kleene::MultiMatchDFA
Instance Method Details
#match_tracker(input) ⇒ Object
: BatchMatchTracker
126 127 128 129 130 131 132 133 134 135 |
# File 'lib/kleene/multi_match_dfa.rb', line 126 def match_tracker(input) # : BatchMatchTracker dfa = @composite_dfa.deep_clone match_tracker = setup_callbacks(dfa) input.each_char.with_index do |char, index| dfa.handle_token!(char, index) end match_tracker end |
#matches(input) ⇒ Object
#matches(input) is the batch-style matching interface
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/kleene/multi_match_dfa.rb', line 84 def matches(input) # : Hash(NFA, Array(MatchRef)) mt = match_tracker(input) start_index_to_nfas_that_may_match = mt.invert_candidate_match_start_positions mt.empty_matches.each do |nfa_with_dead_err, indices| original_nfa = machines_from_nfa_with_dead_err(nfa_with_dead_err).nfa indices.each do |index| mt.add_match(original_nfa, MatchRef.new(input, index...index)) end end active_dfas = Array.new # the Int32 represents the start of match input.each_char.with_index do |char, index| active_dfas.reject! do |active_dfa_tuple| dfa_clone, original_nfa, start_of_match_index = active_dfa_tuple dfa_clone.handle_token!(char, index) mt.add_match(original_nfa, MatchRef.new(input, start_of_match_index..index)) if dfa_clone.accept? dfa_clone.error? end if nfas_with_dead_err = start_index_to_nfas_that_may_match[index] nfas_with_dead_err.each do |nfa_with_dead_err| machines = machines_from_nfa_with_dead_err(nfa_with_dead_err) original_nfa = machines.nfa dfa = machines.dfa dfa_clone = dfa.shallow_clone dfa_clone.handle_token!(char, index) mt.add_match(original_nfa, MatchRef.new(input, index..index)) if dfa_clone.accept? active_dfas << [dfa_clone, original_nfa, index] unless dfa_clone.error? end end end mt.matches end |
#setup_callbacks(dfa) ⇒ Object
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/kleene/multi_match_dfa.rb', line 137 def setup_callbacks(dfa) match_tracker = BatchMatchTracker.new # 1. identify DFA states that correspond to successful match of first character of the NFAs epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state) nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = composite_nfa.transitions_from(epsilon_closure_of_nfa_start_state). reject {|transition| transition.epsilon? || transition.to.error? }. map(&:to).to_set dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa. compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dfa_state_to_dead_end_nfas_that_have_matched_their_first_character = Hash.new dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.each do |dfa_state| dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # 2. identify DFA states that correspond to final states in the NFAs nfa_final_states = @nfas_with_err_state.map(&:final_states).reduce(Set.new) {|memo, state_set| memo | state_set } dfa_states_that_correspond_to_nfa_final_states = nfa_final_states.compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dead_end_nfas_that_have_transitioned_to_final_state = Hash.new dfa_states_that_correspond_to_nfa_final_states.each do |dfa_state| dead_end_nfas_that_have_transitioned_to_final_state[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_final_states.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # 3. Identify DFA states that correspond to successful match without even having seen any characters. # These are cases where the NFA's start state is a final state or can reach a final state by following only epsilon transitions. nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state = epsilon_closure_of_nfa_start_state.select(&:final?).to_set dfa_states_that_represent_both_start_states_and_final_states = nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state. compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }. reduce(Set.new) {|memo, state_set| memo | state_set } dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters = Hash.new dfa_states_that_represent_both_start_states_and_final_states.each do |dfa_state| dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state]. select {|nfa_state| nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.includes?(nfa_state) }. compact_map do |nfa_state| dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map end.to_set end # set up call transition call backs, since the callbacks may only be defined once per state and transition # For (1): # Set up transition callbacks to push the index position of the start of a match of each NFA that has begun # to be matched on the transition to one of the states in (1) # For (2): # set up transition callbacks to push the index position of the end of a successful match onto the list # of successful matches for the NFA that matched # For (3): # set up transision callbacks to capture successful empty matches destination_dfa_states_for_callbacks = dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa | dfa_states_that_correspond_to_nfa_final_states destination_dfa_states_for_callbacks.each do |dfa_state| dfa.on_transition_to(dfa_state) do |transition, token, token_index| destination_dfa_state = transition.to should_track_empty_match = dfa_states_that_represent_both_start_states_and_final_states.includes?(destination_dfa_state) should_track_start_of_candidate_match = should_track_empty_match || dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(destination_dfa_state) should_track_end_of_match = dfa_states_that_correspond_to_nfa_final_states.includes?(destination_dfa_state) if should_track_empty_match dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state].each do |nfa_with_dead_end| match_tracker.add_empty_match(nfa_with_dead_end, token_index) end end if should_track_start_of_candidate_match nfas_that_matched_first_character = dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[destination_dfa_state] || Set.new nfas_that_matched_empty_match = dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state] || Set.new dead_end_nfas_that_are_starting_to_match = nfas_that_matched_first_character | nfas_that_matched_empty_match dead_end_nfas_that_are_starting_to_match.each do |nfa_with_dead_end| match_tracker.add_start_of_candidate_match(nfa_with_dead_end, token_index) end end if should_track_end_of_match dead_end_nfas_that_have_transitioned_to_final_state[destination_dfa_state].each do |nfa_with_dead_end| match_tracker.add_end_of_match(nfa_with_dead_end, token_index) end end end end match_tracker end |