Class: WordDic

Inherits:
Object
  • Object
show all
Defined in:
lib/igo/dictionary.rb

Instance Method Summary collapse

Constructor Details

#initialize(data_dir) ⇒ WordDic

コンストラクタ

data_dir

辞書ファイルのディレクトリパス



136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/igo/dictionary.rb', line 136

def initialize(data_dir)
  @trie = Searcher.new(data_dir + "/word2id")
  @data = FileMappedInputStream.get_string(data_dir + "/word.dat")
  @indices = FileMappedInputStream.get_int_array(data_dir + "/word.ary.idx")
  
  fmis = FileMappedInputStream.new(data_dir + "/word.inf")
  word_count = fmis.size / (4 + 2 + 2 + 2)
  @data_offsets = fmis.get_int_array(word_count)   # 単語の素性データの開始位置
  @left_ids     = fmis.get_short_array(word_count) # 単語の左文脈ID
  @right_ids    = fmis.get_short_array(word_count) # 単語の右文脈ID
  @costs        = fmis.get_short_array(word_count) # 単語のコスト
  fmis.close
end

Instance Method Details

#cost(word_id) ⇒ Object



150
151
152
# File 'lib/igo/dictionary.rb', line 150

def cost(word_id)
  return @costs[word_id]
end

#search(text, start, result) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/igo/dictionary.rb', line 154

def search(text, start, result)
  indices = @indices
  left_ids = @left_ids
  right_ids = @right_ids
  
  @trie.each_common_prefix(text, start, Proc.new { |start, offset, trie_id|
    ed = @indices[trie_id + 1]
    
    for i in indices[trie_id]..(ed - 1)
      result.push(ViterbiNode.new(i, start, offset, @left_ids[i], right_ids[i], false))
    end
  })
end

#search_from_trie_id(trie_id, start, word_length, is_space, result) ⇒ Object



168
169
170
171
172
173
# File 'lib/igo/dictionary.rb', line 168

def search_from_trie_id(trie_id, start, word_length, is_space, result)
  ed = @indices[trie_id + 1]
  for i in @indices[trie_id]..(ed - 1)
    result.push(ViterbiNode.new(i, start, word_length, @left_ids[i], @right_ids[i], is_space))
  end
end

#word_data(word_id) ⇒ Object



175
176
177
# File 'lib/igo/dictionary.rb', line 175

def word_data(word_id)
  return @data.slice(@data_offsets[word_id]*2..@data_offsets[word_id + 1]*2 - 1)
end