Class: HMM::Classifier

Inherits:
Object
  • Object
show all
Defined in:
lib/hmm.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeClassifier

Member variables: pi – initial state distribution a – state transition probabilities b – state-conditional observation probabilities o_lex – index of observation labels q_lex – index of state labels debug – flag for verbose output to stdout train – a list of labelled sequences for supervised training



28
29
30
# File 'lib/hmm.rb', line 28

def initialize
	@o_lex, @q_lex, @train = [], [], []
end

Instance Attribute Details

#aObject

Returns the value of attribute a.



18
19
20
# File 'lib/hmm.rb', line 18

def a
  @a
end

#bObject

Returns the value of attribute b.



18
19
20
# File 'lib/hmm.rb', line 18

def b
  @b
end

#debugObject

Returns the value of attribute debug.



18
19
20
# File 'lib/hmm.rb', line 18

def debug
  @debug
end

#o_lexObject

Returns the value of attribute o_lex.



18
19
20
# File 'lib/hmm.rb', line 18

def o_lex
  @o_lex
end

#piObject

Returns the value of attribute pi.



18
19
20
# File 'lib/hmm.rb', line 18

def pi
  @pi
end

#q_lexObject

Returns the value of attribute q_lex.



18
19
20
# File 'lib/hmm.rb', line 18

def q_lex
  @q_lex
end

#trainObject

Returns the value of attribute train.



18
19
20
# File 'lib/hmm.rb', line 18

def train
  @train
end

Instance Method Details

#accuracy(o, q) ⇒ Object



382
383
384
385
386
387
388
389
390
# File 'lib/hmm.rb', line 382

def accuracy(o, q)
	# token level accuracy across a set of sequences
	correct, total = 0.0, 0.0
	o.length.times do |i|
		correct += (NArray.to_na(decode(o[i])).eq NArray.to_na(q[i])).sum
		total += o[i].length
	end
	correct/total
end

#add_to_train(o, q) ⇒ Object



32
33
34
35
36
# File 'lib/hmm.rb', line 32

def add_to_train(o, q)
	@o_lex |= o # add new tokens to indexed lexicon
	@q_lex |= q
	@train << Sequence.new(index(o, @o_lex), index(q, @q_lex))
end

#backward_probability(sequence) ⇒ Object



334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
# File 'lib/hmm.rb', line 334

def backward_probability(sequence)
	beta = NArray.float(sequence.length, q_lex.length).fill(-Infinity)
	
	beta[-1, true] = log(1)
	
	(sequence.length-2).downto(0) do |t|
		q_lex.each_index do |i|
			q_lex.each_index do |j|
				beta[t, i] = log_add([beta[t,i], log(@a[i, j]) \
					+ log(@b[j, index(sequence[t+1], o_lex)]) \
					+ beta[t+1, j]])
			end
		end
	end

	beta
end

#decode(o_sequence) ⇒ Object



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# File 'lib/hmm.rb', line 352

def decode(o_sequence)
	# Viterbi!  with log probability math to avoid underflow
	
	# encode observations
	o_sequence = index(o_sequence, @o_lex)
	
	# initialize.  skipping the 0 initialization for psi, as it's never used.
	# psi will have T-1 elements instead of T, allowing it
	# to control the backtrack iterator later.
	delta, psi = [log(pi)+log(b[true, o_sequence.shift])], []

	# recursive step
	o_sequence.each do |o|
		psi << argmax(delta.last+log(a))
		delta << (delta.last+log(a)).max(0)+log(b[true, o])
	end
	
	# initialize Q* with final state
	q_star = [delta.last.sort_index[-1]]
	
	# backtrack the optimal state sequence into Q*
	psi.reverse.each do |psi_t|
		q_star.unshift psi_t[q_star.first]
	end
	
	puts "delta:", exp(delta).inspect, "psi:", exp(psi).inspect if @debug
	
	return deindex(q_star, @q_lex)
end

#forward_probability(sequence) ⇒ Object



295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/hmm.rb', line 295

def forward_probability(sequence)
	alpha = NArray.float(sequence.length, q_lex.length).fill(-Infinity)
	
	alpha[0, true] = log(@pi) + log(@b[true, index(sequence.first, o_lex)])
	
	sequence.each_with_index do |o, t|
		next if t==0
		q_lex.each_index do |i|
			q_lex.each_index do |j|
				alpha[t, i] = log_add([alpha[t, i], alpha[t-1, j]+log(@a[j, i])])
			end
			alpha[t, i] += log(b[i, index(o, o_lex)])
		end
	end
	alpha
end

#gamma(xi) ⇒ Object



280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/hmm.rb', line 280

def gamma(xi)
	gamma = NArray.float(xi.shape[0], xi.shape[1]).fill(-Infinity)
	
	0.upto gamma.shape[0] - 1 do |t|
		q_lex.each_index do |i|
			q_lex.each_index do |j|
				gamma[t, i] = log_add([gamma[t, i], xi[t, i, j]])
			end
		end
	end
	
	puts "Gamma: #{gamma.inspect}" if debug
	gamma
end

#likelihood(sequence) ⇒ Object



317
318
319
# File 'lib/hmm.rb', line 317

def likelihood(sequence)
	exp(log_likelihood(sequence))
end

#log_add(values) ⇒ Object



321
322
323
324
325
326
327
328
329
330
331
332
# File 'lib/hmm.rb', line 321

def log_add(values)
	x = values.max
	if x > -Infinity
		sum_diffs = 0
		values.each do |value|
			sum_diffs += Math::E**(value - x)
		end
		return x + log(sum_diffs)
	else
		return x
	end
end

#log_likelihood(sequence) ⇒ Object



312
313
314
315
# File 'lib/hmm.rb', line 312

def log_likelihood(sequence)
	alpha = forward_probability(sequence)
	log_add(alpha[-1, true])
end

#train_unsupervised(sequences, max_iterations = 10) ⇒ Object



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/hmm.rb', line 135

def train_unsupervised(sequences, max_iterations = 10)
	# initialize model parameters if we don't already have an estimate
	@pi ||= NArray.float(@q_lex.length).fill(1)/@q_lex.length			
	@a ||= NArray.float(@q_lex.length, @q_lex.length).fill(1)/@q_lex.length
	@b ||= NArray.float(@q_lex.length, @o_lex.length).fill(1)/@q_lex.length
	puts @pi.inspect, @a.inspect, @b.inspect if debug
	
	converged = false
	last_logprob = 0
	iteration = 0
	#max_iterations = 10 #1000 #kwargs.get('max_iterations', 1000)
	epsilon = 1e-6 # kwargs.get('convergence_logprob', 1e-6)
	
	max_iterations.times do |iteration|
		puts "iteration ##{iteration}" #if debug

		_A_numer = NArray.float(q_lex.length,q_lex.length).fill(-Infinity)
		_B_numer = NArray.float(q_lex.length, o_lex.length).fill(-Infinity)
		_A_denom = NArray.float(q_lex.length).fill(-Infinity)
		_B_denom = NArray.float(q_lex.length).fill(-Infinity)
		_Pi = NArray.float(q_lex.length)
		
		logprob = 0.0
		
		#logprob = last_logprob + 1 # take this out
		
		sequences.each do |sequence|
			# just in case, skip if sequence contains unrecognized tokens
			next unless (sequence-o_lex).empty?
			
			# compute forward and backward probabilities
			alpha = forward_probability(sequence)
			beta = backward_probability(sequence)
			lpk = log_add(alpha[-1, true]) #sum of last alphas. divide by this to get probs
			logprob += lpk
			
			local_A_numer = NArray.float(q_lex.length,q_lex.length).fill(-Infinity)
			local_B_numer = NArray.float(q_lex.length, o_lex.length).fill(-Infinity)
			local_A_denom = NArray.float(q_lex.length).fill(-Infinity)
			local_B_denom = NArray.float(q_lex.length).fill(-Infinity)
			local_Pi = NArray.float(q_lex.length)
			
			sequence.each_with_index do |o, t|
				o_next = index(sequence[t+1], o_lex) if t < sequence.length-1
				
				q_lex.each_index do |i|
					if t < sequence.length-1
						q_lex.each_index do |j|
							local_A_numer[i, j] =  \
								log_add([local_A_numer[i, j], \
								alpha[t, i] + \
									log(@a[i,j]) + \
									log(@b[j,o_next]) + \
									beta[t+1, j]])
						end
						local_A_denom[i] = log_add([local_A_denom[i],
									alpha[t, i] + beta[t, i]])
	
					else
						local_B_denom[i] = log_add([local_A_denom[i],
									alpha[t, i] + beta[t, i]])
					end
					local_B_numer[i, index(o,o_lex)] = log_add([local_B_numer[i, index(o, o_lex)],
						alpha[t, i] + beta[t, i]])

				end
				
				puts local_A_numer.inspect if debug
				
				q_lex.each_index do |i|
					q_lex.each_index do |j|
						_A_numer[i, j] = log_add([_A_numer[i, j],
							local_A_numer[i, j] - lpk])
					end
					o_lex.each_index do |k|	
						_B_numer[i, k] = log_add([_B_numer[i, k], local_B_numer[i, k] - lpk])
					end
					_A_denom[i] = log_add([_A_denom[i], local_A_denom[i] - lpk])
					_B_denom[i] = log_add([_B_denom[i], local_B_denom[i] - lpk])
				end
				
			end
		
			puts alpha.collect{|x| Math::E**x}.inspect if debug
		end		

		puts _A_denom.inspect if debug

		q_lex.each_index do |i|
			q_lex.each_index do |j|
				#puts 2**(_A_numer[i,j] - _A_denom[i]), _A_numer[i,j], _A_denom[i]
				@a[i, j] = Math::E**(_A_numer[i,j] - _A_denom[i])
			end
			o_lex.each_index do |k|
				@b[i, k] = Math::E**(_B_numer[i,k] - _B_denom[i])
			end
			# This comment appears in NLTK:
			# Rabiner says the priors don't need to be updated. I don't
			# believe him. FIXME
		end
			

		if iteration > 0 and (logprob - last_logprob).abs < epsilon
			puts "CONVERGED: #{(logprob - last_logprob).abs}" if debug
			puts "epsilon: #{epsilon}" if debug
			break
		end
		
		puts "LogProb: #{logprob}" #if debug
		
		last_logprob = logprob
	end
end

#train_unsupervised2(sequences) ⇒ Object



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# File 'lib/hmm.rb', line 64

def train_unsupervised2(sequences)
	# for debugging ONLY
	orig_sequences = sequences.clone
	sequences = [sequences.sum]
	
	# initialize model parameters if we don't already have an estimate
	@pi ||= NArray.float(@q_lex.length).fill(1)/@q_lex.length			
	@a ||= NArray.float(@q_lex.length, @q_lex.length).fill(1)/@q_lex.length
	@b ||= NArray.float(@q_lex.length, @o_lex.length).fill(1)/@q_lex.length
	puts @pi.inspect, @a.inspect, @b.inspect if debug
	
	max_iterations = 1 #1000 #kwargs.get('max_iterations', 1000)
	epsilon = 1e-6 # kwargs.get('convergence_logprob', 1e-6)
	
	max_iterations.times do |iteration|
		puts "iteration ##{iteration}" #if debug
		logprob = 0.0
		
		sequences.each do |sequence|
			# just in case, skip if sequence contains unrecognized tokens
			next unless (sequence-o_lex).empty?
			
			# compute forward and backward probabilities
			alpha = forward_probability(sequence)
			beta = backward_probability(sequence)
			lpk = log_add(alpha[-1, true]) #sum of last alphas. divide by this to get probs
			logprob += lpk
			
			xi = xi(sequence)
			gamma = gamma(xi)
			
			localA = NArray.float(q_lex.length,q_lex.length)
			localB = NArray.float(q_lex.length,o_lex.length)
			
			q_lex.each_index do |i|
				q_lex.each_index do |j|
					numA = -Infinity
					denomA = -Infinity
					sequence.each_index do |t|
						break if t >= sequence.length-1
						numA = log_add([numA, xi[t, i, j]])
						denomA = log_add([denomA, gamma[t, i]])
					end
					localA[i,j] = numA - denomA
				end
				
				o_lex.each_index do |k|
					numB = -Infinity
					denomB = -Infinity
					sequence.each_index do |t|
						break if t >= sequence.length-1
						denomB = log_add([denomB, gamma[t, i]])
						next unless k == index(sequence[t], o_lex)
						numB = log_add([numB, gamma[t, i]])
					end
					localB[i, k] = numB - denomB
				end
				
			end
			
			puts "LogProb: #{logprob}"
			
			@a = localA.collect{|x| Math::E**x}
			@b = localB.collect{|x| Math::E**x}
			#@pi = gamma[0, true] / gamma[0, true].sum
			
		end
	end
end

#xi(sequence) ⇒ Object



249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/hmm.rb', line 249

def xi(sequence)
	xi = NArray.float(sequence.length-1, q_lex.length, q_lex.length)
	
	alpha = forward_probability(sequence)
	beta = backward_probability(sequence)
	
	0.upto sequence.length-2 do |t|
		denom = 0
		q_lex.each_index do |i|
			q_lex.each_index do |j|
				x = alpha[t, i] + log(@a[i,j]) + \
					log(@b[j,index(sequence[t+1], o_lex)]) + \
					beta[t+1, j]
				denom = log_add([denom, x])
			end
		end
		
		q_lex.each_index do |i|
			q_lex.each_index do |j|
				numer = alpha[t, i] + log(@a[i,j]) + \
					log(@b[j,index(sequence[t+1], o_lex)]) + \
					beta[t+1, j]
				xi[t, i, j] = numer - denom
			end
		end
	end
	
	puts "Xi: #{xi.inspect}" if debug
	xi
end