Method: Analects::Tokenizer#initialize

Defined in:
lib/analects/tokenizer.rb

#initialize(chars_dic = '/tmp/chars.dic', words_dic = '/tmp/words.dic') ⇒ Tokenizer

Returns a new instance of Tokenizer.



6
7
8
9
10
11
12
# File 'lib/analects/tokenizer.rb', line 6

def initialize(chars_dic = '/tmp/chars.dic', words_dic = '/tmp/words.dic')
  unless File.exist?(chars_dic) && File.exist?(words_dic)
    create_dict_from_cedict( chars_dic, words_dic )
  end
  #RMMSeg::Dictionary.dictionaries = [[:chars, chars_dic], [:words, words_dic]]
  RMMSeg::Config.dictionaries = [[chars_dic, true], [words_dic, false]]
end