Class: TM::Token

Inherits:
Object
  • Object
show all
Defined in:
lib/nysol/token.rb

Overview

形態素を表すクラス(双方向リスト)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(xmlToken, chunk) ⇒ Token

id省略時は終端目的の空tokenとして初期化する。 def initialize(id=nil,nxt, prv, word=nil,class1=nil,class2=nil,class3=nil,class4=nil,form1=nil,form2=nil,chunk=nil)



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/nysol/token.rb', line 33

def initialize(xmlToken, chunk)
	# 双方向リストの設定
		if chunk.tokens.size>0 then
     @prev = chunk.tokens.last
	else
     @prev = chunk.dummy # 終端
	end
    @next = chunk.dummy # 終端
    @prev.next = self if @prev!=nil

	# 各種メンバ変数の設定
	@chunk  = chunk
	if xmlToken==nil then
		@id = nil  # dummy Chunk(双方向リストの終端)
		@word   = ""
		@orgWord= ""
		@class1 = ""
		@class2 = ""
		@class3 = ""
		@class4 = ""
		@form1  = ""
		@form2  = ""
	else
		@id     = xmlToken.attribute("id").to_s
		@word   = xmlToken.attribute("word").to_s
		@orgWord= xmlToken.attribute("orgWord").to_s
		@class1 = xmlToken.attribute("class1").to_s
		@class2 = xmlToken.attribute("class2").to_s
		@class3 = xmlToken.attribute("class3").to_s
		@class4 = xmlToken.attribute("class4").to_s
		@form1  = xmlToken.attribute("form1").to_s
		@form2  = xmlToken.attribute("form2").to_s
	end
end

Instance Attribute Details

#chunkObject (readonly)

このtokenが属するchunk



27
28
29
# File 'lib/nysol/token.rb', line 27

def chunk
  @chunk
end

#class1Object (readonly)

Returns the value of attribute class1.



21
22
23
# File 'lib/nysol/token.rb', line 21

def class1
  @class1
end

#class2Object (readonly)

Returns the value of attribute class2.



22
23
24
# File 'lib/nysol/token.rb', line 22

def class2
  @class2
end

#class3Object (readonly)

Returns the value of attribute class3.



23
24
25
# File 'lib/nysol/token.rb', line 23

def class3
  @class3
end

#class4Object (readonly)

Returns the value of attribute class4.



24
25
26
# File 'lib/nysol/token.rb', line 24

def class4
  @class4
end

#form1Object (readonly)

Returns the value of attribute form1.



25
26
27
# File 'lib/nysol/token.rb', line 25

def form1
  @form1
end

#form2Object (readonly)

Returns the value of attribute form2.



26
27
28
# File 'lib/nysol/token.rb', line 26

def form2
  @form2
end

#idObject (readonly)

tokenID(文字列)



18
19
20
# File 'lib/nysol/token.rb', line 18

def id
  @id
end

#nextObject

次のtoken



28
29
30
# File 'lib/nysol/token.rb', line 28

def next
  @next
end

#orgWordObject (readonly)

オリジナル語



20
21
22
# File 'lib/nysol/token.rb', line 20

def orgWord
  @orgWord
end

#prevObject

前のtoken



29
30
31
# File 'lib/nysol/token.rb', line 29

def prev
  @prev
end

#wordObject (readonly)

原型語



19
20
21
# File 'lib/nysol/token.rb', line 19

def word
  @word
end

Instance Method Details

#declined?Boolean

用言かどうか判定

Returns:

  • (Boolean)


98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/nysol/token.rb', line 98

def declined?
	return true if class1 == "動詞"
	return true if class1 == "形容詞"
	return true if class1 == "名詞" and class2 == "形容動詞語幹"

	# サ変接続名詞の体言止めは用言とする。
	# 00000001,4,1,2,D,2,消費,消費,名詞,サ変接続,*,*,*,*
	# 00000001,4,1,2,D,3,者,者,名詞,接尾,一般,*,*,*
	# 00000001,4,1,2,D,4,心理,心理,名詞,一般,*,*,*,*
	# 00000001,4,1,2,D,5,も,も,助詞,係助詞,*,*,*,*
	# 00000001,4,2,-1,D,6,急降下,急降下,名詞,サ変接続,*,*,*,*
	# 00000001,4,2,-1,D,7,。,。,記号,句点,*,*,*,*
	return true if class1 == "名詞" and class2 == "サ変接続" and self.next.word == ""
	return false
end

#ignoreClass?Boolean

Returns:

  • (Boolean)


114
115
116
117
118
119
120
121
122
# File 'lib/nysol/token.rb', line 114

def ignoreClass?()
	return true if ["連体詞", "接頭詞", "接続詞", "助詞", "助動詞", "感動詞", "記号", "フィラー", "その他", "未知語"].index(class1)
	#if class1=="名詞" then
	#	return true if class2=="数"
	#	return true if class2=="固有名詞"
	#end
	return true if word == "*"
	return false
end

#show(fp = STDERR) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
# File 'lib/nysol/token.rb', line 84

def show(fp=STDERR)
	fp.print "\t\t\tToken id=#{@id}(#{chunk.id})"
	fp.print " #{@word}"      if @word!=""
	fp.print "(#{@orgWord})"  if @orgWord!=""
	fp.print " c1=#{@class1}" if @class1!=""
	fp.print " c2=#{@class2}" if @class2!=""
	fp.print " c3=#{@class3}" if @class3!=""
	fp.print " c4=#{@class4}" if @class4!=""
	fp.print " f1=#{@form1}"  if @form1!=""
	fp.print " f2=#{@form2}"  if @form2!=""
	fp.puts  ""
end

#tokenCsvout(fp) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# File 'lib/nysol/token.rb', line 68

def tokenCsvout(fp)
	fp.print "#{chunk.sentence.article.id},"
	fp.print "#{chunk.sentence.id},"
	fp.print "#{chunk.id},"
	fp.print "#{@id},"
	fp.print "#{chunk.sentence.article.date},"
	fp.print "#{@word},"
	fp.print "#{@orgWord},"
	fp.print "#{@class1},"
	fp.print "#{@class2},"
	fp.print "#{@class3},"
	fp.print "#{@class4},"
	fp.print "#{@form1},"
	fp.print "#{@form2}\n"
end