Class: TM::Candidate

Inherits:
Object
  • Object
show all
Defined in:
lib/nysol/dictionary.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(iterNo) ⇒ Candidate

Returns a new instance of Candidate.



220
221
222
223
224
# File 'lib/nysol/dictionary.rb', line 220

def initialize(iterNo)
	@sCandidates = Hash.new # 単純エントリ
	@cCandidates = Hash.new # 複合エントリ
	@iterNo = iterNo
end

Instance Attribute Details

#cCandidatesObject (readonly)

Returns the value of attribute cCandidates.



217
218
219
# File 'lib/nysol/dictionary.rb', line 217

def cCandidates
  @cCandidates
end

#iterNoObject (readonly)

Returns the value of attribute iterNo.



218
219
220
# File 'lib/nysol/dictionary.rb', line 218

def iterNo
  @iterNo
end

#sCandidatesObject (readonly)

Returns the value of attribute sCandidates.



216
217
218
# File 'lib/nysol/dictionary.rb', line 216

def sCandidates
  @sCandidates
end

Instance Method Details

#add(entry, pol) ⇒ Object

候補表現としてentryを極性polにて追加する



227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/nysol/dictionary.rb', line 227

def add(entry,pol)
	key=entry.to_s
	# --- 単純エントリ
	if entry.scFlag==0 then
		if @sCandidates.has_key?(key)
			@sCandidates[key].countUp(pol)
		else
			@sCandidates[key] = SimpleEntryCand.new(entry,pol)
		end

	# --- 複合エントリ
	else
		if @cCandidates.has_key?(key)
			@cCandidates[key].countUp(pol)
		else
			@cCandidates[key] = ComplexEntryCand.new(entry,pol)
		end
	end
end

#addCand(cand) ⇒ Object

Candidateを統合する(Multi process対応にて利用)



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'lib/nysol/dictionary.rb', line 248

def addCand(cand)
	# 単純エントリ
	cand.sCandidates.each{|key,sCand|
		key=sCand.sEntry.to_s
		if not @sCandidates.has_key?(key)
			@sCandidates[key] = SimpleEntryCand.new(sCand.sEntry,0)
		end
#puts "sCand.count=(#{sCand.posCount},#{sCand.negCount})"
		@sCandidates[key].posCount += sCand.posCount
		@sCandidates[key].negCount += sCand.negCount
	}
	# 複合エントリ
	cand.cCandidates.each{|key,cCand|
		key=cCand.cEntry.to_s
		if not @cCandidates.has_key?(key)
			@cCandidates[key] = ComplexEntryCand.new(cCand.cEntry,0)
		end
		@cCandidates[key].posCount += cCand.posCount
		@cCandidates[key].negCount += cCand.negCount
	}
end

#estProbObject

既に極性が判定されているエントリの誤用率を、その極性における推定誤用率として用いる



384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
# File 'lib/nysol/dictionary.rb', line 384

def estProb
	posErrCount=0
	posTtlCount=0
	negErrCount=0
	negTtlCount=0
	@sCandidates.each{|key,cand|
		if    cand.sEntry.polarity== 1 then
			posErrCount+=cand.negCount
			posTtlCount+=cand.ttlCount
		elsif cand.sEntry.polarity==-1 then
			negErrCount+=cand.posCount
			negTtlCount+=cand.ttlCount
		end
	}

	posProb=negProb=0.0
	if posTtlCount>0 then
		posProb=posErrCount.to_f / posTtlCount.to_f
	end
	if negTtlCount>0 then
		negProb=negErrCount.to_f / negTtlCount.to_f
	end
	return posProb,negProb
end

#evalConf(errProb, posSigLevel = 0.05, negSigLevel = 0.05) ⇒ Object

posErrProb,negErrPorbをそれぞれの極性における誤用率と仮定し、 各エントリの極性出現数が有意であればconfFlgをtrueとする。



330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'lib/nysol/dictionary.rb', line 330

def evalConf(errProb, posSigLevel=0.05,negSigLevel=0.05)
	probDist=ProbDist.new
	@sCandidates.each{|key,cand|
		# 肯定極性
		if    cand.polarity == 1 then
			# 二項分布B(ttlCount,posErrPorb)においてcand.posCount以上の確率を求める
			cand.conf=probDist.prob(cand.posCount, cand.ttlCount, errProb)
			if cand.conf < posSigLevel
				cand.confFlg=true
			else
				cand.confFlg=false
			end

		# 否定極性
		elsif cand.polarity==-1 then
			# 二項分布B(ttlCount,negErrPorb)においてcand.posCount以上の確率を求める
			cand.conf=probDist.prob(cand.negCount, cand.ttlCount, errProb)
			if cand.conf < negSigLevel
				cand.confFlg=true
			else
				cand.confFlg=false
			end
		else
			cand.confFlg=false
		end
	}

	@cCandidates.each{|key,cand|
		# 肯定極性
		if    cand.polarity == 1 then
			# 二項分布B(ttlCount,posErrPorb)においてcand.posCount以上の確率を求める
			cand.conf=probDist.prob(cand.posCount, cand.ttlCount, errProb)
			if cand.conf < posSigLevel
				cand.confFlg=true
			else
				cand.confFlg=false
			end

		# 否定極性
		elsif cand.polarity==-1 then
			# 二項分布B(ttlCount,negErrPorb)においてcand.posCount以上の確率を求める
			cand.conf=probDist.prob(cand.negCount, cand.ttlCount, errProb)
			if cand.conf < negSigLevel
				cand.confFlg=true
			else
				cand.confFlg=false
			end
		else
			cand.confFlg=false
		end
	}
end

#evalPol(th) ⇒ Object

一定割合(th)以上の極性を求める posCount/(posCount+negCount)がth以上であれば+1 negCount/(posCount+negCount)がth以上であれば-1 その他は0



307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
# File 'lib/nysol/dictionary.rb', line 307

def evalPol(th)
	@sCandidates.each{|key,cand|
		if    cand.posCount.to_f/(cand.posCount+cand.negCount).to_f >= th then
			cand.polarity=+1
		elsif cand.negCount.to_f/(cand.posCount+cand.negCount).to_f >= th then
			cand.polarity=-1
		else
			cand.polarity= 0
		end
	}
	@cCandidates.each{|key,cand|
		if    cand.posCount.to_f/(cand.posCount+cand.negCount).to_f >= th then
			cand.polarity=+1
		elsif cand.negCount.to_f/(cand.posCount+cand.negCount).to_f >= th then
			cand.polarity=-1
		else
			cand.polarity= 0
		end
	}
end

#evalSupp(sSupp, cSupp) ⇒ Object

supportに満たないエントリを削除する



286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
# File 'lib/nysol/dictionary.rb', line 286

def evalSupp(sSupp, cSupp)
	@sCandidates.each{|key,cand|
		if cand.ttlCount>=sSupp then
			cand.suppFlg=true
		else
			cand.suppFlg=false
		end
	}
	@cCandidates.each{|key,cand|
		if cand.ttlCount>=cSupp then
			cand.suppFlg=true
		else
			cand.suppFlg=false
		end
	}
end

#setTotalCount(tbl) ⇒ Object

エントリ文字列をキーとした総件数表をttlCountにセットする



271
272
273
274
275
276
277
278
279
280
281
282
283
# File 'lib/nysol/dictionary.rb', line 271

def setTotalCount(tbl)
	# 単純エントリ
	@sCandidates.each{|key,sCand|
		count=tbl[key]
		sCand.ttlCount = count
	}
	# 複合エントリ
	@cCandidates.each{|key,cCand|
		count=tbl[key]
		cCand.ttlCount = count
	}
		
end

#show(sort = 0, fp = STDERR) ⇒ Object



476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
# File 'lib/nysol/dictionary.rb', line 476

def show(sort=0,fp=STDERR)
	@sCandidates.values.sort{|a,b|
		if sort==0 then
			a.ttlCount <=> b.ttlCount
		elsif sort==1 then
			(a.posCount <=> b.posCount)*(-2) + (a.ttlCount <=> b.ttlCount)*(-1)
		else
			(a.negCount <=> b.negCount)*(-2) + (a.ttlCount <=> b.ttlCount)*(-1)
		end
	}.each{|cand|
		cand.show(fp)
	}
	@cCandidates.values.sort{|a,b|
		if sort==0 then
			a.ttlCount <=> b.ttlCount
		elsif sort==1 then
			(a.posCount <=> b.posCount)*(-2) + (a.ttlCount <=> b.ttlCount)*(-1)
		else
			(a.negCount <=> b.negCount)*(-2) + (a.ttlCount <=> b.ttlCount)*(-1)
		end
	}.each{|cand|
		cand.show(fp)
	}
end

#writeDic(dicName, chkFlg = true) ⇒ Object



409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
# File 'lib/nysol/dictionary.rb', line 409

def writeDic(dicName, chkFlg=true)
	count=0
  File::open(dicName, "w"){|wfp|
		wfp.puts "用言句,格助詞句,格助詞,極性,iterNo,pos件数,neg件数,全件数"
		sOutput=Hash.new # 同じcEntryを出力しない為に出力されたsEntryを登録しておくHash
		@sCandidates.each{|key,cand|
			iterNo=cand.sEntry.iterNo
#if cand.sEntry.iterNo < 0 then
#print "begin: "; cand.sEntry.dPhrase.writePhrase(STDOUT) ; puts "(#{iterNo})"
#end
			if chkFlg then
				if iterNo < 0 then  # 辞書に登録されていなかったentry(辞書に登録されているentryは無条件で出力)
					next if cand.ttlCount > 5000
					#next if (cand.posCount+cand.negCount).to_f/cand.ttlCount.to_f > 0.5
					next if (not cand.suppFlg or cand.polarity==0 or not cand.confFlg) and cand.sEntry.polarity==0
				end
			end
			cand.sEntry.dPhrase.writePhrase(wfp) ; wfp.print ",,,"
			if cand.sEntry.polarity==0 then
				wfp.print cand.polarity            ; wfp.print ","
			else
				wfp.print cand.sEntry.polarity     ; wfp.print ","
			end
			#iterNo=cand.sEntry.iterNo
#cand.sEntry.dPhrase.writePhrase(STDOUT)
#puts ": it0=#{iterNo}, #{@iterNo}"
			iterNo=@iterNo if iterNo==-1
#puts "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx #{@iterNo}, #{iterNo} "
#puts "it1=#{iterNo}, #{@iterNo}"
			wfp.print iterNo                     ; wfp.print ","
			wfp.print cand.posCount              ; wfp.print ","
			wfp.print cand.negCount              ; wfp.print ","
			wfp.print cand.ttlCount              ; wfp.print "\n"
			count+=1

			sOutput[cand.sEntry.dPhrase.to_s]=1
		}
		@cCandidates.each{|key,cand|
			iterNo=cand.cEntry.iterNo
			if chkFlg then
				if iterNo < 0 then  # 辞書に登録されていなかったentry(辞書に登録されているentryは無条件で出力)
					next if cand.ttlCount > 5000
					#next if (cand.posCount+cand.negCount).to_f/cand.ttlCount.to_f > 0.5
					next if (not cand.suppFlg or cand.polarity==0 or not cand.confFlg) and cand.cEntry.polarity==0
					next if sOutput[cand.cEntry.dPhrase.to_s]!=nil # 用言句がsEntryにあれば出力しない
				end
			end
			cand.cEntry.dPhrase.writePhrase(wfp)   ; wfp.print ","
			cand.cEntry.pPhrase.writeWord(wfp)     ; wfp.print ","
			cand.cEntry.pPhrase.writeParticle(wfp) ; wfp.print ","
			if cand.cEntry.polarity==0 then
				wfp.print cand.polarity              ; wfp.print ","
			else
				wfp.print cand.cEntry.polarity       ; wfp.print ","
			end
			iterNo=cand.cEntry.iterNo
			iterNo=@iterNo if iterNo==-1
			wfp.print iterNo                       ; wfp.print ","
			wfp.print cand.posCount                ; wfp.print ","
			wfp.print cand.negCount                ; wfp.print ","
			wfp.print cand.ttlCount                ; wfp.print "\n"
			count+=1
		}
	}
	return count
end