Class: Kleene::BatchMultiMatchDFA

Inherits:
MultiMatchDFA show all
Defined in:
lib/kleene/multi_match_dfa.rb

Instance Attribute Summary

Attributes inherited from MultiMatchDFA

#composite_dfa, #composite_nfa, #dead_end_nfa_state_to_dead_end_nfa, #dfa_to_index, #machines_by_index, #nfa_to_index, #nfa_with_dead_err_to_index, #nfas_with_err_state

Instance Method Summary collapse

Methods inherited from MultiMatchDFA

#create_composite_nfa, #initialize, #machines_from_dfa, #machines_from_nfa, #machines_from_nfa_with_dead_err

Methods included from DSL

#any, #append, #append!, #dot, #kleene, #literal, #optional, #plus, #range, #seq, #seq2, #union, #union!, #with_err, #with_err!, #with_err_dead_end, #with_err_dead_end!

Constructor Details

This class inherits a constructor from Kleene::MultiMatchDFA

Instance Method Details

#match_tracker(input) ⇒ Object

: BatchMatchTracker



126
127
128
129
130
131
132
133
134
135
# File 'lib/kleene/multi_match_dfa.rb', line 126

def match_tracker(input) # : BatchMatchTracker
  dfa = @composite_dfa.deep_clone
  match_tracker = setup_callbacks(dfa)

  input.each_char.with_index do |char, index|
    dfa.handle_token!(char, index)
  end

  match_tracker
end

#matches(input) ⇒ Object

#matches(input) is the batch-style matching interface



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/kleene/multi_match_dfa.rb', line 84

def matches(input) # : Hash(NFA, Array(MatchRef))
  mt = match_tracker(input)

  start_index_to_nfas_that_may_match = mt.invert_candidate_match_start_positions

  mt.empty_matches.each do |nfa_with_dead_err, indices|
    original_nfa = machines_from_nfa_with_dead_err(nfa_with_dead_err).nfa
    indices.each do |index|
      mt.add_match(original_nfa, MatchRef.new(input, index...index))
    end
  end

  active_dfas = Array.new    # the Int32 represents the start of match

  input.each_char.with_index do |char, index|
    active_dfas.reject! do |active_dfa_tuple|
      dfa_clone, original_nfa, start_of_match_index = active_dfa_tuple

      dfa_clone.handle_token!(char, index)
      mt.add_match(original_nfa, MatchRef.new(input, start_of_match_index..index)) if dfa_clone.accept?

      dfa_clone.error?
    end

    if nfas_with_dead_err = start_index_to_nfas_that_may_match[index]
      nfas_with_dead_err.each do |nfa_with_dead_err|
        machines = machines_from_nfa_with_dead_err(nfa_with_dead_err)
        original_nfa = machines.nfa
        dfa = machines.dfa
        dfa_clone = dfa.shallow_clone

        dfa_clone.handle_token!(char, index)
        mt.add_match(original_nfa, MatchRef.new(input, index..index)) if dfa_clone.accept?

        active_dfas << [dfa_clone, original_nfa, index] unless dfa_clone.error?
      end
    end
  end

  mt.matches
end

#setup_callbacks(dfa) ⇒ Object



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/kleene/multi_match_dfa.rb', line 137

def setup_callbacks(dfa)
  match_tracker = BatchMatchTracker.new

  # 1. identify DFA states that correspond to successful match of first character of the NFAs
  epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state)
  nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = composite_nfa.transitions_from(epsilon_closure_of_nfa_start_state).
                                                                                                     reject {|transition| transition.epsilon? || transition.to.error? }.
                                                                                                     map(&:to).to_set
  dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.
                                                                                          compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
                                                                                          reduce(Set.new) {|memo, state_set| memo | state_set }
  dfa_state_to_dead_end_nfas_that_have_matched_their_first_character = Hash.new
  dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.each do |dfa_state|
    dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
                                                                                        select {|nfa_state| nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(nfa_state) }.
                                                                                        compact_map do |nfa_state|
      dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state    # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
    end.to_set
  end

  # 2. identify DFA states that correspond to final states in the NFAs
  nfa_final_states = @nfas_with_err_state.map(&:final_states).reduce(Set.new) {|memo, state_set| memo | state_set }
  dfa_states_that_correspond_to_nfa_final_states = nfa_final_states.compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
                                                                    reduce(Set.new) {|memo, state_set| memo | state_set }
  dead_end_nfas_that_have_transitioned_to_final_state = Hash.new
  dfa_states_that_correspond_to_nfa_final_states.each do |dfa_state|
    dead_end_nfas_that_have_transitioned_to_final_state[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
                                                                         select {|nfa_state| nfa_final_states.includes?(nfa_state) }.
                                                                         compact_map do |nfa_state|
      dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state    # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
    end.to_set
  end

  # 3. Identify DFA states that correspond to successful match without even having seen any characters.
  #    These are cases where the NFA's start state is a final state or can reach a final state by following only epsilon transitions.
  nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state = epsilon_closure_of_nfa_start_state.select(&:final?).to_set
  dfa_states_that_represent_both_start_states_and_final_states = nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.
                                                                    compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
                                                                    reduce(Set.new) {|memo, state_set| memo | state_set }
  dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters = Hash.new
  dfa_states_that_represent_both_start_states_and_final_states.each do |dfa_state|
    dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
                                                                                                 select {|nfa_state| nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.includes?(nfa_state) }.
                                                                                                 compact_map do |nfa_state|
      dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state    # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
    end.to_set
  end

  # set up call transition call backs, since the callbacks may only be defined once per state and transition
  # For (1):
  #    Set up transition callbacks to push the index position of the start of a match of each NFA that has begun
  #    to be matched on the transition to one of the states in (1)
  # For (2):
  #    set up transition callbacks to push the index position of the end of a successful match onto the list
  #    of successful matches for the NFA that matched
  # For (3):
  #    set up transision callbacks to capture successful empty matches
  destination_dfa_states_for_callbacks = dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa | dfa_states_that_correspond_to_nfa_final_states
  destination_dfa_states_for_callbacks.each do |dfa_state|
    dfa.on_transition_to(dfa_state) do |transition, token, token_index|
      destination_dfa_state = transition.to

      should_track_empty_match = dfa_states_that_represent_both_start_states_and_final_states.includes?(destination_dfa_state)
      should_track_start_of_candidate_match = should_track_empty_match || dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(destination_dfa_state)
      should_track_end_of_match = dfa_states_that_correspond_to_nfa_final_states.includes?(destination_dfa_state)

      if should_track_empty_match
        dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state].each do |nfa_with_dead_end|
          match_tracker.add_empty_match(nfa_with_dead_end, token_index)
        end
      end

      if should_track_start_of_candidate_match
        nfas_that_matched_first_character = dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[destination_dfa_state] || Set.new
        nfas_that_matched_empty_match = dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state] || Set.new
        dead_end_nfas_that_are_starting_to_match = nfas_that_matched_first_character | nfas_that_matched_empty_match
        dead_end_nfas_that_are_starting_to_match.each do |nfa_with_dead_end|
          match_tracker.add_start_of_candidate_match(nfa_with_dead_end, token_index)
        end
      end

      if should_track_end_of_match
        dead_end_nfas_that_have_transitioned_to_final_state[destination_dfa_state].each do |nfa_with_dead_end|
          match_tracker.add_end_of_match(nfa_with_dead_end, token_index)
        end
      end
    end
  end

  match_tracker
end