Class: Pinyin
- Inherits:
-
Object
- Object
- Pinyin
- Defined in:
- lib/chinese_pinyin.rb
Constant Summary collapse
- TONE_MARK =
{ a: %w(ā á ǎ à a), o: %w(ō ó ǒ ò o), e: %w(ē é ě è e), i: %w(ī í ǐ ì i), u: %w(ū ú ǔ ù u), v: %w(ǖ ǘ ǚ ǜ ü) }
Class Attribute Summary collapse
-
.ruby1 ⇒ Object
Returns the value of attribute ruby1.
-
.table ⇒ Object
Returns the value of attribute table.
Class Method Summary collapse
Class Attribute Details
.ruby1 ⇒ Object
Returns the value of attribute ruby1.
19 20 21 |
# File 'lib/chinese_pinyin.rb', line 19 def ruby1 @ruby1 end |
.table ⇒ Object
Returns the value of attribute table.
18 19 20 |
# File 'lib/chinese_pinyin.rb', line 18 def table @table end |
Class Method Details
.init_table ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/chinese_pinyin.rb', line 21 def init_table return if @table # Ruby 2.0以后默认即为UTF-8编码,使用新的码表以提升效率 @ruby1 = !!(RUBY_VERSION =~ /^1/) datfile = @ruby1 ? 'Mandarin.dat' : 'pinyin-utf8.dat' @table = {} file = File.join(File.dirname(__FILE__), "../data/#{datfile}") File.open(file, "r:UTF-8",) do |file| while line = file.gets key, value = line.split(' ', 2) @table[key] = value end end end |
.init_word_table ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 |
# File 'lib/chinese_pinyin.rb', line 38 def init_word_table return if @words_table @words_table = {} if ENV["WORDS_FILE"] File.open(ENV["WORDS_FILE"]) do |file| while line = file.gets key, value = line.sub("\n", "").split('|', 2) @words_table[key] = value end end end end |
.translate(chars, options = {}) ⇒ Object Also known as: t
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# File 'lib/chinese_pinyin.rb', line 53 def translate(chars, ={}) chars = chars.encode("UTF-8") splitter = .fetch(:splitter, ' ') tonemarks = .fetch(:tonemarks, false) tone = .fetch(:tone, false || tonemarks) camel = .fetch(:camelcase, false) init_word_table results = @words_table[chars] if results results = results.split results.map!(&:downcase) results.map!(&:capitalize) if camel results.map! { |x| (48..57).include?(x[-1].ord) ? x.chop! : x } unless tone return results.join(splitter) end init_table results = [] is_english = false chars.scan(/./).each do |char| key = @ruby1 ? sprintf("%X", char.unpack("U").first) : char if @table[key] results << splitter if is_english is_english = false = @table[key].chomp.split(' ', 2)[0] .downcase! if @ruby1 .chop! unless tone .capitalize! if camel if tonemarks tone_index = [-1].to_i = [0...-1] %w(a o e i u v).each { |v| break if .tr! v, TONE_MARK[v.to_sym][tone_index - 1] } end if block_given? results << (yield , results.size) else results << results << splitter end else if char =~ /[a-zA-Z0-9]/ results << char elsif results.last != splitter results << splitter else end is_english = true end end results.join('').chomp(splitter) end |