Class: PinYin::Backend::MMSeg

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby-pinyin/backend/mmseg.rb

Instance Method Summary collapse

Constructor Details

#initializeMMSeg

Returns a new instance of MMSeg.



9
10
11
12
13
14
15
# File 'lib/ruby-pinyin/backend/mmseg.rb', line 9

def initialize
  @simple = Simple.new

  RMMSeg::Dictionary.dictionaries.delete_if {|(type, path)| type == :words}
  RMMSeg::Dictionary.dictionaries.push [:words, File.expand_path('../../data/words.dic', __FILE__)]
  RMMSeg::Dictionary.load_dictionaries
end

Instance Method Details

#romanize(str, tone = nil, include_punctuations = false) ⇒ Object



17
18
19
20
21
22
23
24
25
26
# File 'lib/ruby-pinyin/backend/mmseg.rb', line 17

def romanize(str, tone=nil, include_punctuations=false)
  return [] unless str && str.length > 0

  words = segment str

  base = @simple.romanize(str, tone, include_punctuations)
  patch = words.map {|w| format(w, tone) }.flatten

  apply base, patch
end

#segment(str) ⇒ Object



28
29
30
31
32
33
34
35
36
# File 'lib/ruby-pinyin/backend/mmseg.rb', line 28

def segment(str)
  algor = RMMSeg::Algorithm.new str

  words = []
  while token = algor.next_token
    words.push token.text.force_encoding("UTF-8")
  end
  words
end