Module: HangulTools

Defined in:
lib/hangul_tools.rb,
lib/hangul_tools/version.rb

Overview

Defined Under Namespace

Modules: Version

Constant Summary collapse

LEADS =
[ nil, 'g', 'gg', 'n', 'd', 'dd', 'r', 'm', 'b', 'bb' ,'s', 'ss', nil, 'j', 'jj', 'ch', 'k', 't', 'p', 'h' ]
TAILS =
[ nil, 'g', 'gg', 'gs', 'n', 'nj', 'nh', 'd', 'l', 'lg', 'lm', 'lb', 'ls', 'lt', 'lp', 'lh', 'm', 'b', 'bs', 's', 'ss', 'ng', 'j', 'ch', 'k', 't', 'p', 'h' ]
VOWELS =
{
  revised:           [ nil, 'a', 'ae', 'ya', 'yae', 'eo', 'e', 'yeo', 'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', 'weo', 'we', 'wi', 'yu', 'eu', 'yi', 'i' ],
  mccune_reischauer: [ nil, 'a', 'ae', 'ya', 'yae', 'ŏ',  'e', '',  'ye', 'o', 'wa', 'wae', 'oe', 'yo', 'u', '',  'we', 'wi', 'yu', 'ŭ',  'ŭi', 'i' ]
}
BLENDS =
{
  revised: {},
  mccune_reischauer: { "si" => "shi", "sy" => "shy", "swi" => "shwi" }
}

Class Method Summary collapse

Class Method Details

.decompose(text) ⇒ Object

it is assumed that ‘text` contains nothing but hangul codepoints



22
23
24
25
26
27
28
29
30
# File 'lib/hangul_tools.rb', line 22

def self.decompose(text)
  text.codepoints.map do |point|
    tail = (point - 44032) % 28
    vowel = 1 + ((point - 44032 - tail) % 588) / 28
    lead = 1 + (point - 44032) / 588

    [lead, vowel, tail]
  end
end

.matricesObject



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/hangul_tools.rb', line 71

def self.matrices
  @matrices ||= {}.tap do |hash|
    raw = File.read(__FILE__).lines
    split_at = raw.index("__END__\n")

    key = lines = nil
    raw[(split_at+1)..-1].each do |line|
      if line =~ /^(\w+):$/
        hash[key.to_sym] = parse_matrix(lines) if lines
        key = $1
        lines = []
      elsif line !~ /^$/
        lines << line
      end
    end

    hash[key.to_sym] = parse_matrix(lines) if lines
  end
end

.parse_matrix(lines) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/hangul_tools.rb', line 91

def self.parse_matrix(lines)
  lead = lines.first.split(/\s+/)[1..-1].map do |v|
    if v == '_'
      nil
    elsif v == 'final'
      :final
    else
      v
    end
  end

  matrix = {}

  lines[1..-1].each do |line|
    tail, *sounds = line.split(/\s+/)

    if tail == 'initial'
      tail = :initial
    elsif tail == 'voiced'
      tail = :voiced
    elsif tail == '_'
      tail = nil
    end

    sounds.map! { |s| s == '_' ? nil : s }

    matrix[tail] = Hash[lead.zip(sounds)]
  end

  matrix
end

.romanize(text, system: :revised, initial: :initial) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
# File 'lib/hangul_tools.rb', line 5

def self.romanize(text, system: :revised, initial: :initial)
  matrix = matrices[system]
  vowels = VOWELS[system]

  text.scan(/[\uAC00-\uD7a3]+|[^\uAC00-\uD7a3]+/).map.with_index do |string, idx|
    if string =~ /[\uAC00-\uD7a3]/
      romanize_with_system(string, system, idx > 0 ? :voiced : initial)
    else
      string
    end
  end.join
end

.romanize_with_system(text, system, voiced) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/hangul_tools.rb', line 32

def self.romanize_with_system(text, system, voiced)
  matrix = matrices[system]
  vowels = VOWELS[system]
  blends = BLENDS[system]

  syllables = decompose(text)
  phonemes = []

  syllables.each.with_index do |(lead, vowel, tail), idx|
    prior = (idx > 0) ? TAILS[syllables[idx-1][2].to_i] : voiced
    final = syllables[idx+1] ? false : true

    phonemes << (matrix[prior] || {})[LEADS[lead]]
    phonemes << vowels[vowel]

    if final
      phonemes << (matrix[TAILS[tail]] || {})[:final]
    end
  end

  result = phonemes.compact.join

  blends.each do |pattern, blend|
    result = result.gsub(pattern, blend)
  end

  result
end