Module: Kleene::DSL

Extended by:
DSL
Included in:
DSL, MultiMatchDFA, OnlineDFA
Defined in:
lib/kleene/dsl.rb

Instance Method Summary collapse

Instance Method Details

#any(token_collection, alphabet = DEFAULT_ALPHABET) ⇒ Object

two states: start state and final state structure: start state -> transitions for each token in the token collection -> final state



31
32
33
34
35
36
37
38
39
40
# File 'lib/kleene/dsl.rb', line 31

def any(token_collection, alphabet = DEFAULT_ALPHABET)
  start = State.new
  nfa = NFA.new(start, alphabet)
  final = State.new(true)
  token_collection.each {|token| nfa.add_transition(token, start, final) }
  nfa.update_final_states
  regex_pattern = "[#{token_collection.join("")}]"
  nfa.set_regex_pattern(regex_pattern)
  nfa
end

#append(a, b) ⇒ Object

Append b onto a Appending produces a machine that matches all the strings in machine a followed by all the strings in machine b. This differs from ‘seq` in that the composite machine’s final states are the union of machine a’s final states and machine b’s final states.



118
119
120
121
122
# File 'lib/kleene/dsl.rb', line 118

def append(a, b)
  a = a.deep_clone
  b = b.deep_clone
  append!(a, b)
end

#append!(a, b) ⇒ Object

Destructively append b onto a Appending produces a machine that matches all the strings in machine a followed by all the strings in machine b. This differs from ‘seq` in that the composite machine’s final states are the union of machine a’s final states and machine b’s final states.



127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/kleene/dsl.rb', line 127

def append!(a, b)
  a.alphabet = a.alphabet | b.alphabet

  # add an epsilon transition from each final state of machine a to the start state of maachine b.
  a.final_states.each do |final_state|
    a.add_transition(NFATransition::Epsilon, final_state, b.start_state)
  end

  # add all of machine b's transitions to machine a
  b.all_transitions.each {|transition| a.add_transition(transition.token, transition.from, transition.to) }
  # a.final_states = a.final_states | b.final_states
  a.update_final_states

  a
end

#dot(alphabet = DEFAULT_ALPHABET) ⇒ Object

two states: start state and final state structure: start state -> transitions for every token in the alphabet -> final state



44
45
46
# File 'lib/kleene/dsl.rb', line 44

def dot(alphabet = DEFAULT_ALPHABET)
  any(alphabet, alphabet).set_regex_pattern(".")
end

#kleene(machine) ⇒ Object

Implements Kleene Star, as defined in the Ragel manual in section 2.5.6 of www.colm.net/files/ragel/ragel-guide-6.10.pdf: The machine resulting from the Kleene Star operator will match zero or more repetitions of the machine it is applied to. It creates a new start state and an additional final state. Epsilon transitions are drawn between the new start state and the old start state, between the new start state and the new final state, and between the final states of the machine and the new start state.



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/kleene/dsl.rb', line 201

def kleene(machine)
  machine = machine.deep_clone
  start = State.new
  final = State.new(true)

  nfa = NFA.new(start, machine.alphabet)
  nfa.add_states(machine.states)
  nfa.add_transition(NFATransition::Epsilon, start, final)
  nfa.add_transition(NFATransition::Epsilon, start, machine.start_state)
  machine.final_states.each do |final_state|
    nfa.add_transition(NFATransition::Epsilon, final_state, start)
    final_state.final = false
  end

  # add all of machine's transitions to the new machine
  (machine.all_transitions).each {|t| nfa.add_transition(t.token, t.from, t.to) }
  nfa.update_final_states

  nfa.set_regex_pattern("#{machine.regex_pattern}*")
end

#literal(token_stream, alphabet = DEFAULT_ALPHABET) ⇒ Object

given a string with N characters in it: N+1 states: start state and N other states structure: start state -> transition for first character in the string -> state for having observed first character in the string ->

transition for second character in the string -> state for having observed second character in the string ->
...
transition for last character in the string -> state for having observed last character in the string (marked final)


15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/kleene/dsl.rb', line 15

def literal(token_stream, alphabet = DEFAULT_ALPHABET)
  start = current_state = State.new
  nfa = NFA.new(start, alphabet)
  token_stream.each_char do |token|
    next_state = State.new
    nfa.add_transition(token, current_state, next_state)
    current_state = next_state
  end
  current_state.final = true
  nfa.update_final_states
  nfa.set_regex_pattern(token_stream)
  nfa
end

#optional(machine) ⇒ Object



226
227
228
229
# File 'lib/kleene/dsl.rb', line 226

def optional(machine)
  empty = NFA.new(State.new(true), machine.alphabet).set_regex_pattern("")
  union(machine, empty).set_regex_pattern("#{machine.regex_pattern}?")
end

#plus(machine) ⇒ Object



222
223
224
# File 'lib/kleene/dsl.rb', line 222

def plus(machine)
  seq(machine, kleene(machine)).set_regex_pattern("#{machine.regex_pattern}+")
end

#range(c_begin, c_end, alphabet = DEFAULT_ALPHABET) ⇒ Object

This implements a character class, and is specifically for use with matching strings



49
50
51
# File 'lib/kleene/dsl.rb', line 49

def range(c_begin, c_end, alphabet = DEFAULT_ALPHABET)
  any((c_begin..c_end).to_a, alphabet).set_regex_pattern("[#{c_begin}-#{c_end}]")
end

#seq(*nfas) ⇒ Object



143
144
145
# File 'lib/kleene/dsl.rb', line 143

def seq(*nfas)
  nfas.flatten.reduce {|memo_nfa, nfa| seq2(memo_nfa, nfa) }
end

#seq2(a, b) ⇒ Object

Implements concatenation, as defined in the Ragel manual in section 2.5.5 of www.colm.net/files/ragel/ragel-guide-6.10.pdf: Seq produces a machine that matches all the strings in machine ‘a` followed by all the strings in machine `b`. Seq draws epsilon transitions from the final states of thefirst machine to the start state of the second machine. The final states of the first machine lose their final state status, unless the start state of the second machine is final as well.



151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/kleene/dsl.rb', line 151

def seq2(a, b)
  a = a.deep_clone
  b = b.deep_clone

  a = append!(a, b)

  # make sure that b's final states are the only final states in a after we have appended b onto a
  a.states.each {|state| state.final = b.final_states.includes?(state) }
  a.update_final_states

  a.set_regex_pattern("#{a.regex_pattern}#{b.regex_pattern}")
end

#union(*nfas) ⇒ Object

Build a new machine consisting of a new start state with epsilon transitions to the start state of all the given NFAs in ‘nfas`. The resulting machine’s final states are the set of final states from all the NFAs in ‘nfas`.

Implements Union, as defined in the Ragel manual in section 2.5.1 of www.colm.net/files/ragel/ragel-guide-6.10.pdf: The union operation produces a machine that matches any string in machine one or machine two. The operation first creates a new start state. Epsilon transitions are drawn from the new start state to the start states of both input machines. The resulting machine has a final state setequivalent to the union of the final state sets of both input machines.



172
173
174
175
176
# File 'lib/kleene/dsl.rb', line 172

def union(*nfas)
  nfas.flatten!
  nfas = nfas.map(&:deep_clone)
  union!(nfas)
end

#union!(nfas) ⇒ Object

same as union, but doesn’t deep clone the constituent nfas



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/kleene/dsl.rb', line 179

def union!(nfas)
  start = State.new
  composite_alphabet = nfas.map(&:alphabet).reduce {|memo, alphabet| memo | alphabet }
  new_nfa = NFA.new(start, composite_alphabet)

  # add epsilon transitions from the start state of the new machine to the start state of machines a and b
  nfas.each do |nfa|
    new_nfa.add_states(nfa.states)
    new_nfa.add_transition(NFATransition::Epsilon, start, nfa.start_state)
    nfa.all_transitions.each {|t| new_nfa.add_transition(t.token, t.from, t.to) }
  end

  new_nfa.update_final_states

  new_nfa.set_regex_pattern("#{nfas.map(&:regex_pattern).join("|")}")
end

#with_err(nfa, alphabet = nfa.alphabet) ⇒ Object

always clones the given nfa and returns a new nfa with a non-final error state



56
57
58
# File 'lib/kleene/dsl.rb', line 56

def with_err(nfa, alphabet = nfa.alphabet)
  with_err!(nfa.deep_clone, alphabet)
end

#with_err!(nfa, alphabet = nfa.alphabet) ⇒ Object

adds and error state to the NFA, create error transitions from all non-error states to the error state on any unhandled token. the error state transitions to itself on any token.



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/kleene/dsl.rb', line 62

def with_err!(nfa, alphabet = nfa.alphabet)
  error_state = nfa.states.find(&:error?)
  return nfa if error_state

  error_state = State.new_error_state
  nfa.add_state(error_state)

  nfa.states.each do |state|
    tokens_on_outbound_transitions = nfa.transitions_from(state).map(&:token)
    missing_tokens = alphabet - tokens_on_outbound_transitions
    missing_tokens.each do |token|
      nfa.add_transition(token, state, error_state)
    end
  end

  nfa.remove_state(error_state) if nfa.all_transitions.none? {|transition| transition.from == error_state || transition.to == error_state }

  nfa.set_regex_pattern("/#{nfa.regex_pattern}/E")
end

#with_err_dead_end(nfa, alphabet = nfa.alphabet) ⇒ Object

always clones the given nfa and returns a new nfa with a non-final error state



83
84
85
# File 'lib/kleene/dsl.rb', line 83

def with_err_dead_end(nfa, alphabet = nfa.alphabet)
  with_err_dead_end!(nfa.deep_clone, alphabet)
end

#with_err_dead_end!(nfa, alphabet = nfa.alphabet) ⇒ Object

adds and error state to the NFA, create error transitions from all non-error states to the error state on any unhandled token. the error state doesn’t have any outbound transitions.



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/kleene/dsl.rb', line 89

def with_err_dead_end!(nfa, alphabet = nfa.alphabet)
  error_state = nfa.states.find(&:error?)
  return nfa if error_state

  error_state = State.new_error_state
  nfa.add_state(error_state)

  nfa.states.each do |state|
    unless state.error?
      tokens_on_outbound_transitions = nfa.transitions_from(state).map(&:token).to_set
      only_outbound_transition_is_epsilon_transition = tokens_on_outbound_transitions.size == 1 && tokens_on_outbound_transitions.first == NFATransition::Epsilon
      unless only_outbound_transition_is_epsilon_transition
        missing_tokens = (alphabet - Set[NFATransition::Epsilon]) - tokens_on_outbound_transitions
        missing_tokens.each do |token|
          nfa.add_transition(token, state, error_state)
        end
      end
    end
  end

  # remove the error state if it has no inbound or outbound transitions
  nfa.remove_state(error_state) if nfa.all_transitions.none? {|transition| transition.from == error_state || transition.to == error_state }

  nfa.set_regex_pattern("/#{nfa.regex_pattern}/DE")
end