Class: PinYin::Backend::EZSeg

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby-pinyin-ez/backend/ezseg.rb

Instance Method Summary collapse

Constructor Details

#initialize(override_files = []) ⇒ EZSeg

Returns a new instance of EZSeg.



5
6
7
# File 'lib/ruby-pinyin-ez/backend/ezseg.rb', line 5

def initialize(override_files=[])
  @override_files = override_files || []
end

Instance Method Details

#romanize(str, tone = nil, include_punctuations = false) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/ruby-pinyin-ez/backend/ezseg.rb', line 11

def romanize(str, tone=nil, include_punctuations=false)
  return [] unless str && str.length > 0
  words = segment str

  res = []
  words.each do |word|
    if str && !str.empty?
      word.unpack('U*').each_with_index do |t,idx|
        code = sprintf('%x',t).upcase
        readings = codes[code]

        if readings
          multiple_arr = readings.collect{|one| Value.new(format([one], tone), false)}
          res << (multiple_arr.length > 1 ? multiple_arr : multiple_arr[0])
        else
          val = [t].pack('U*')
          if val =~ /^[0-9a-zA-Z\s]*$/ # 复原,去除特殊字符,如全角符号等。
            if res.last && res.last.respond_to?(:english?) && res.last.english?
              res.last << Value.new(val, true)
            elsif val != ' '
              res << Value.new(val, true)
            end
          elsif include_punctuations
            val = [Punctuation[code]].pack('H*') if Punctuation.include?(code)
            (res.last ? res.last : res) << Value.new(val, false)
          end
        end
      end
    end
  end
  res
end