Class: Igo::WordDic

Inherits:
Object
  • Object
show all
Defined in:
lib/igo/dictionary.rb

Instance Method Summary collapse

Constructor Details

#initialize(data_dir) ⇒ WordDic

コンストラクタ

data_dir

辞書ファイルのディレクトリパス



142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/igo/dictionary.rb', line 142

def initialize(data_dir)
  @trie = Searcher.new(data_dir + "/word2id")
  @data = FileMappedInputStream.get_string(data_dir + "/word.dat")
  @indices = FileMappedInputStream.get_int_array(data_dir + "/word.ary.idx")

  fmis = FileMappedInputStream.new(data_dir + "/word.inf")
  word_count = fmis.size / (4 + 2 + 2 + 2)
  @data_offsets = fmis.get_int_array(word_count)   # 単語の素性データの開始位置
  @left_ids     = fmis.get_short_array(word_count) # 単語の左文脈ID
  @right_ids    = fmis.get_short_array(word_count) # 単語の右文脈ID
  @costs        = fmis.get_short_array(word_count) # 単語のコスト
  fmis.close
end

Instance Method Details

#cost(word_id) ⇒ Object



156
157
158
# File 'lib/igo/dictionary.rb', line 156

def cost(word_id)
  return @costs[word_id]
end

#search(text, start, result) ⇒ Object



160
161
162
163
164
165
166
167
168
169
170
171
172
# File 'lib/igo/dictionary.rb', line 160

def search(text, start, result)
  indices = @indices
  left_ids = @left_ids
  right_ids = @right_ids

  @trie.each_common_prefix(text, start, Proc.new { |start, offset, trie_id|
    ed = @indices[trie_id + 1]
  
    for i in indices[trie_id]..(ed - 1)
      result.push(ViterbiNode.new(i, start, offset, @left_ids[i], right_ids[i], false))
    end
  })
end

#search_from_trie_id(trie_id, start, word_length, is_space, result) ⇒ Object



174
175
176
177
178
179
# File 'lib/igo/dictionary.rb', line 174

def search_from_trie_id(trie_id, start, word_length, is_space, result)
  ed = @indices[trie_id + 1]
  for i in @indices[trie_id]..(ed - 1)
    result.push(ViterbiNode.new(i, start, word_length, @left_ids[i], @right_ids[i], is_space))
  end
end

#word_data(word_id) ⇒ Object



181
182
183
# File 'lib/igo/dictionary.rb', line 181

def word_data(word_id)
  return @data.slice(@data_offsets[word_id]*2..@data_offsets[word_id + 1]*2 - 1)
end