Class: Igo::WordDic
- Inherits:
-
Object
- Object
- Igo::WordDic
- Defined in:
- lib/igo/dictionary.rb
Instance Method Summary collapse
- #cost(word_id) ⇒ Object
-
#initialize(data_dir) ⇒ WordDic
constructor
- コンストラクタ data_dir
-
辞書ファイルのディレクトリパス.
- #search(text, start, result) ⇒ Object
- #search_from_trie_id(trie_id, start, word_length, is_space, result) ⇒ Object
- #word_data(word_id) ⇒ Object
Constructor Details
#initialize(data_dir) ⇒ WordDic
コンストラクタ
- data_dir
-
辞書ファイルのディレクトリパス
147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/igo/dictionary.rb', line 147 def initialize(data_dir) @trie = Searcher.new(data_dir + "/word2id") @data = FileMappedInputStream.get_string(data_dir + "/word.dat") @indices = FileMappedInputStream.get_int_array(data_dir + "/word.ary.idx") fmis = FileMappedInputStream.new(data_dir + "/word.inf") word_count = fmis.size / (4 + 2 + 2 + 2) @data_offsets = fmis.get_int_array(word_count) # 単語の素性データの開始位置 @left_ids = fmis.get_short_array(word_count) # 単語の左文脈ID @right_ids = fmis.get_short_array(word_count) # 単語の右文脈ID @costs = fmis.get_short_array(word_count) # 単語のコスト fmis.close end |
Instance Method Details
#cost(word_id) ⇒ Object
161 162 163 |
# File 'lib/igo/dictionary.rb', line 161 def cost(word_id) return @costs[word_id] end |
#search(text, start, result) ⇒ Object
165 166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/igo/dictionary.rb', line 165 def search(text, start, result) indices = @indices left_ids = @left_ids right_ids = @right_ids @trie.each_common_prefix(text, start, Proc.new { |start, offset, trie_id| ed = @indices[trie_id + 1] for i in indices[trie_id]..(ed - 1) result.push(ViterbiNode.new(i, start, offset, @left_ids[i], right_ids[i], false)) end }) end |
#search_from_trie_id(trie_id, start, word_length, is_space, result) ⇒ Object
179 180 181 182 183 184 |
# File 'lib/igo/dictionary.rb', line 179 def search_from_trie_id(trie_id, start, word_length, is_space, result) ed = @indices[trie_id + 1] for i in @indices[trie_id]..(ed - 1) result.push(ViterbiNode.new(i, start, word_length, @left_ids[i], @right_ids[i], is_space)) end end |
#word_data(word_id) ⇒ Object
186 187 188 |
# File 'lib/igo/dictionary.rb', line 186 def word_data(word_id) return @data.slice(@data_offsets[word_id]*2..@data_offsets[word_id + 1]*2 - 1) end |