Class: Kleene::OnlineDFA

Inherits:
Object
  • Object
show all
Includes:
DSL
Defined in:
lib/kleene/online_dfa.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from DSL

#any, #append, #append!, #dot, #kleene, #literal, #optional, #plus, #range, #seq, #seq2, #union, #union!, #with_err, #with_err!, #with_err_dead_end, #with_err_dead_end!

Constructor Details

#initialize(nfas) ⇒ OnlineDFA

Returns a new instance of OnlineDFA.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/kleene/online_dfa.rb', line 29

def initialize(nfas)
  composite_alphabet = nfas.reduce(Set.new) {|memo, nfa| memo | nfa.alphabet }

  @original_nfas = nfas
  @nfas_with_err_state = nfas.map {|nfa| with_err_dead_end(nfa, composite_alphabet) }     # copy NFAs and add dead-end error states to each of them
  dfas = @original_nfas.map(&:to_dfa)

  @nfa_to_index = @original_nfas.map.with_index {|nfa, index| [nfa, index] }.to_h
  @nfa_with_dead_err_to_index = @nfas_with_err_state.map.with_index {|nfa, index| [nfa, index] }.to_h
  @dfa_to_index = dfas.map.with_index {|dfa, index| [dfa, index] }.to_h
  @machines_by_index = @original_nfas.zip(nfas_with_err_state, dfas).map.with_index {|tuple, index| nfa, nfa_with_dead_err, dfa = tuple; [index, MachineTuple.new(nfa, nfa_with_dead_err, dfa)] }.to_h

  # build a mapping of (state -> nfa) pairs that capture which nfa owns each state
  @dead_end_nfa_state_to_dead_end_nfa = Hash.new
  @nfas_with_err_state.each do |nfa_with_dead_err|
    nfa_with_dead_err.states.each do |state|
      @dead_end_nfa_state_to_dead_end_nfa[state] = nfa_with_dead_err
    end
  end

  # create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA's start state
  @composite_nfa = create_composite_nfa(@nfas_with_err_state)
  @composite_dfa = @composite_nfa.to_dfa

  reset
end

Instance Attribute Details

#composite_dfaObject

: DFA



22
23
24
# File 'lib/kleene/online_dfa.rb', line 22

def composite_dfa
  @composite_dfa
end

#composite_nfaObject

: NFA



21
22
23
# File 'lib/kleene/online_dfa.rb', line 21

def composite_nfa
  @composite_nfa
end

#dead_end_nfa_state_to_dead_end_nfaObject

: Hash(State, NFA)



20
21
22
# File 'lib/kleene/online_dfa.rb', line 20

def dead_end_nfa_state_to_dead_end_nfa
  @dead_end_nfa_state_to_dead_end_nfa
end

#dfa_to_indexObject

: Hash(DFA, Int32)



27
28
29
# File 'lib/kleene/online_dfa.rb', line 27

def dfa_to_index
  @dfa_to_index
end

#machines_by_indexObject

: Hash(Int32, MachineTuple)



24
25
26
# File 'lib/kleene/online_dfa.rb', line 24

def machines_by_index
  @machines_by_index
end

#nfa_to_indexObject

: Hash(NFA, Int32)



25
26
27
# File 'lib/kleene/online_dfa.rb', line 25

def nfa_to_index
  @nfa_to_index
end

#nfa_with_dead_err_to_indexObject

: Hash(NFA, Int32)



26
27
28
# File 'lib/kleene/online_dfa.rb', line 26

def nfa_with_dead_err_to_index
  @nfa_with_dead_err_to_index
end

#nfas_with_err_stateObject (readonly)

Returns the value of attribute nfas_with_err_state.



19
20
21
# File 'lib/kleene/online_dfa.rb', line 19

def nfas_with_err_state
  @nfas_with_err_state
end

Instance Method Details

#create_composite_nfa(nfas) ⇒ Object

create a composite NFA as the union of all the NFAs with epsilon transitions from every NFA state back to the union NFA’s start state



69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/kleene/online_dfa.rb', line 69

def create_composite_nfa(nfas)
  nfa = union!(nfas)

  # add epsilon transitions from all the states except the start state back to the start state
  nfa.states.each do |state|
    if state != nfa.start_state
      nfa.add_transition(NFATransition::Epsilon, state, nfa.start_state)
    end
  end

  nfa.update_final_states

  nfa
end

#ingest(input, debug = false) ⇒ Object

#ingest(input) is the online-style matching interface



92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/kleene/online_dfa.rb', line 92

def ingest(input, debug = false) # : Hash(NFA, Array(MatchRef))
  mt = @match_tracker

  start_index_of_input_fragment_in_buffer = @buffer.length

  input.each_char.with_index do |char, index|
    @active_composite_dfa.handle_token!(char, start_index_of_input_fragment_in_buffer + index)
  end

  @buffer << input

  start_index_to_nfas_that_may_match = mt.invert_candidate_match_start_positions

  mt.empty_matches.each do |nfa_with_dead_err, indices|
    original_nfa = machines_from_nfa_with_dead_err(nfa_with_dead_err).nfa
    indices.select {|index| index >= start_index_of_input_fragment_in_buffer }.each do |index|
      mt.add_match(original_nfa, MatchRef.new(@buffer, index...index))
    end
  end

  input.each_char.with_index do |char, index|
    index_in_buffer = start_index_of_input_fragment_in_buffer + index

    @active_candidate_dfas.reject! do |active_dfa_tuple|
      dfa_clone, original_nfa, start_of_match_index = active_dfa_tuple

      dfa_clone.handle_token!(char, index_in_buffer)
      mt.add_match(original_nfa, MatchRef.new(@buffer, start_of_match_index..index_in_buffer)) if dfa_clone.accept?

      dfa_clone.error?
    end

    if nfas_with_dead_err = start_index_to_nfas_that_may_match[index_in_buffer]
      nfas_with_dead_err.each do |nfa_with_dead_err|
        machines = machines_from_nfa_with_dead_err(nfa_with_dead_err)
        original_nfa = machines.nfa
        dfa = machines.dfa
        dfa_clone = dfa.shallow_clone

        dfa_clone.handle_token!(char, index_in_buffer)
        mt.add_match(original_nfa, MatchRef.new(@buffer, index_in_buffer..index_in_buffer)) if dfa_clone.accept?

        @active_candidate_dfas << [dfa_clone, original_nfa, index_in_buffer] unless dfa_clone.error?
      end
    end
  end

  matches
end

#machines_from_dfa(dfa) ⇒ Object

: MachineTuple



64
65
66
# File 'lib/kleene/online_dfa.rb', line 64

def machines_from_dfa(dfa) # : MachineTuple
  machines_by_index[dfa_to_index[dfa]]
end

#machines_from_nfa(nfa) ⇒ Object

: MachineTuple



56
57
58
# File 'lib/kleene/online_dfa.rb', line 56

def machines_from_nfa(nfa) # : MachineTuple
  machines_by_index[nfa_to_index[nfa]]
end

#machines_from_nfa_with_dead_err(nfa_with_dead_err) ⇒ Object

: MachineTuple



60
61
62
# File 'lib/kleene/online_dfa.rb', line 60

def machines_from_nfa_with_dead_err(nfa_with_dead_err) # : MachineTuple
  machines_by_index[nfa_with_dead_err_to_index[nfa_with_dead_err]]
end

#matchesObject



142
143
144
# File 'lib/kleene/online_dfa.rb', line 142

def matches
  @match_tracker.matches
end

#resetObject

: OnlineMatchTracker



84
85
86
87
88
89
# File 'lib/kleene/online_dfa.rb', line 84

def reset # : OnlineMatchTracker
  @active_composite_dfa = @composite_dfa.deep_clone
  @active_candidate_dfas = []
  @match_tracker = setup_callbacks(@active_composite_dfa)
  @buffer = ""
end

#setup_callbacks(dfa) ⇒ Object



146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/kleene/online_dfa.rb', line 146

def setup_callbacks(dfa)
  match_tracker = OnlineMatchTracker.new

  # 1. identify DFA states that correspond to successful match of first character of the NFAs
  epsilon_closure_of_nfa_start_state = composite_nfa.epsilon_closure(composite_nfa.start_state)
  nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = composite_nfa.transitions_from(epsilon_closure_of_nfa_start_state).
                                                                                                     reject {|transition| transition.epsilon? || transition.to.error? }.
                                                                                                     map(&:to).to_set
  dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa = nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.
                                                                                          compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
                                                                                          reduce(Set.new) {|memo, state_set| memo | state_set }
  dfa_state_to_dead_end_nfas_that_have_matched_their_first_character = Hash.new
  dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.each do |dfa_state|
    dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
                                                                                        select {|nfa_state| nfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(nfa_state) }.
                                                                                        compact_map do |nfa_state|
      dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state    # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
    end.to_set
  end

  # 2. identify DFA states that correspond to final states in the NFAs
  nfa_final_states = @nfas_with_err_state.map(&:final_states).reduce(Set.new) {|memo, state_set| memo | state_set }
  dfa_states_that_correspond_to_nfa_final_states = nfa_final_states.compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
                                                                    reduce(Set.new) {|memo, state_set| memo | state_set }
  dead_end_nfas_that_have_transitioned_to_final_state = Hash.new
  dfa_states_that_correspond_to_nfa_final_states.each do |dfa_state|
    dead_end_nfas_that_have_transitioned_to_final_state[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
                                                                         select {|nfa_state| nfa_final_states.includes?(nfa_state) }.
                                                                         compact_map do |nfa_state|
      dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state    # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
    end.to_set
  end

  # 3. Identify DFA states that correspond to successful match without even having seen any characters.
  #    These are cases where the NFA's start state is a final state or can reach a final state by following only epsilon transitions.
  nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state = epsilon_closure_of_nfa_start_state.select(&:final?).to_set
  dfa_states_that_represent_both_start_states_and_final_states = nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.
                                                                    compact_map {|nfa_state| dfa.nfa_state_to_dfa_state_sets[nfa_state] }.
                                                                    reduce(Set.new) {|memo, state_set| memo | state_set }
  dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters = Hash.new
  dfa_states_that_represent_both_start_states_and_final_states.each do |dfa_state|
    dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[dfa_state] = dfa.dfa_state_to_nfa_state_sets[dfa_state].
                                                                                                 select {|nfa_state| nfa_final_states_that_are_epsilon_reachable_from_nfa_start_state.includes?(nfa_state) }.
                                                                                                 compact_map do |nfa_state|
      dead_end_nfa_state_to_dead_end_nfa[nfa_state] unless nfa_state == composite_nfa.start_state    # composite_nfa.start_state is not referenced in the dead_end_nfa_state_to_dead_end_nfa map
    end.to_set
  end

  # set up call transition call backs, since the callbacks may only be defined once per state and transition
  # For (1):
  #    Set up transition callbacks to push the index position of the start of a match of each NFA that has begun
  #    to be matched on the transition to one of the states in (1)
  # For (2):
  #    set up transition callbacks to push the index position of the end of a successful match onto the list
  #    of successful matches for the NFA that matched
  # For (3):
  #    set up transision callbacks to capture successful empty matches
  destination_dfa_states_for_callbacks = dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa | dfa_states_that_correspond_to_nfa_final_states
  destination_dfa_states_for_callbacks.each do |dfa_state|
    dfa.on_transition_to(dfa_state) do |transition, token, token_index|
      destination_dfa_state = transition.to

      should_track_empty_match = dfa_states_that_represent_both_start_states_and_final_states.includes?(destination_dfa_state)
      should_track_start_of_candidate_match = should_track_empty_match || dfa_states_that_correspond_to_successful_match_of_first_character_of_component_nfa.includes?(destination_dfa_state)
      should_track_end_of_match = dfa_states_that_correspond_to_nfa_final_states.includes?(destination_dfa_state)

      if should_track_empty_match
        dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state].each do |nfa_with_dead_end|
          match_tracker.add_empty_match(nfa_with_dead_end, token_index)
        end
      end

      if should_track_start_of_candidate_match
        nfas_that_matched_first_character = dfa_state_to_dead_end_nfas_that_have_matched_their_first_character[destination_dfa_state] || Set.new
        nfas_that_matched_empty_match = dfa_state_to_dead_end_nfas_that_have_matched_before_handling_any_characters[destination_dfa_state] || Set.new
        dead_end_nfas_that_are_starting_to_match = nfas_that_matched_first_character | nfas_that_matched_empty_match
        dead_end_nfas_that_are_starting_to_match.each do |nfa_with_dead_end|
          match_tracker.add_start_of_candidate_match(nfa_with_dead_end, token_index)
        end
      end

      if should_track_end_of_match
        dead_end_nfas_that_have_transitioned_to_final_state[destination_dfa_state].each do |nfa_with_dead_end|
          match_tracker.add_end_of_match(nfa_with_dead_end, token_index)
        end
      end
    end
  end

  match_tracker
end