Class: WordDic
- Inherits:
-
Object
- Object
- WordDic
- Defined in:
- lib/igo/dictionary.rb
Instance Method Summary collapse
- #cost(word_id) ⇒ Object
-
#initialize(data_dir) ⇒ WordDic
constructor
- コンストラクタ data_dir
-
辞書ファイルのディレクトリパス.
- #search(text, start, result) ⇒ Object
- #search_from_trie_id(trie_id, start, word_length, is_space, result) ⇒ Object
- #word_data(word_id) ⇒ Object
Constructor Details
#initialize(data_dir) ⇒ WordDic
コンストラクタ
- data_dir
-
辞書ファイルのディレクトリパス
136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/igo/dictionary.rb', line 136 def initialize(data_dir) @trie = Searcher.new(data_dir + "/word2id") @data = FileMappedInputStream.get_string(data_dir + "/word.dat") @indices = FileMappedInputStream.get_int_array(data_dir + "/word.ary.idx") fmis = FileMappedInputStream.new(data_dir + "/word.inf") word_count = fmis.size / (4 + 2 + 2 + 2) @data_offsets = fmis.get_int_array(word_count) # 単語の素性データの開始位置 @left_ids = fmis.get_short_array(word_count) # 単語の左文脈ID @right_ids = fmis.get_short_array(word_count) # 単語の右文脈ID @costs = fmis.get_short_array(word_count) # 単語のコスト fmis.close end |
Instance Method Details
#cost(word_id) ⇒ Object
150 151 152 |
# File 'lib/igo/dictionary.rb', line 150 def cost(word_id) return @costs[word_id] end |
#search(text, start, result) ⇒ Object
154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/igo/dictionary.rb', line 154 def search(text, start, result) indices = @indices left_ids = @left_ids right_ids = @right_ids @trie.each_common_prefix(text, start, Proc.new { |start, offset, trie_id| ed = @indices[trie_id + 1] for i in indices[trie_id]..(ed - 1) result.push(ViterbiNode.new(i, start, offset, @left_ids[i], right_ids[i], false)) end }) end |
#search_from_trie_id(trie_id, start, word_length, is_space, result) ⇒ Object
168 169 170 171 172 173 |
# File 'lib/igo/dictionary.rb', line 168 def search_from_trie_id(trie_id, start, word_length, is_space, result) ed = @indices[trie_id + 1] for i in @indices[trie_id]..(ed - 1) result.push(ViterbiNode.new(i, start, word_length, @left_ids[i], @right_ids[i], is_space)) end end |
#word_data(word_id) ⇒ Object
175 176 177 |
# File 'lib/igo/dictionary.rb', line 175 def word_data(word_id) return @data.slice(@data_offsets[word_id]*2..@data_offsets[word_id + 1]*2 - 1) end |