Class: Gimchi

Inherits:
Object
  • Object
show all
Defined in:
lib/gimchi.rb,
lib/gimchi/char.rb,
lib/gimchi/patch_1.8.rb,
lib/gimchi/pronouncer.rb

Defined Under Namespace

Classes: Char, Pronouncer

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.chosungsObject (readonly)

Returns the value of attribute chosungs.



12
13
14
# File 'lib/gimchi.rb', line 12

def chosungs
  @chosungs
end

.jongsungsObject (readonly)

Returns the value of attribute jongsungs.



12
13
14
# File 'lib/gimchi.rb', line 12

def jongsungs
  @jongsungs
end

.jungsungsObject (readonly)

Returns the value of attribute jungsungs.



12
13
14
# File 'lib/gimchi.rb', line 12

def jungsungs
  @jungsungs
end

Class Method Details

.Char(ch) ⇒ Object



14
15
16
# File 'lib/gimchi.rb', line 14

def Char ch
  kchar ch
end

.chosung?(ch) ⇒ Boolean

Parameters:

  • ch (String)

Returns:

  • (Boolean)


46
47
48
# File 'lib/gimchi.rb', line 46

def chosung? ch
  @chosung_set.include? ch
end

.complete_korean_char?(ch) ⇒ Boolean

Checks if the given character is a “complete” korean character. “Complete” Korean character must have chosung and jungsung, with optional jongsung.

Parameters:

  • ch (String)

    A string of size 1

Returns:

  • (Boolean)

Raises:

  • (ArgumentError)


74
75
76
77
78
79
# File 'lib/gimchi.rb', line 74

def complete_korean_char? ch
  raise ArgumentError.new('Lengthy input') if str_length(ch) > 1

  # Range of Korean chracters in Unicode 2.0: AC00(가) ~ D7A3(힣)
  ch.unpack('U').all? { | c | c >= 0xAC00 && c <= 0xD7A3 }
end

.compose(chosung, jungsung = nil, jongsung = nil) ⇒ String

Compose 3 elements into a Korean character String

Parameters:

  • chosung (String)
  • jungsung (String) (defaults to: nil)
  • jongsung (String) (defaults to: nil)

Returns:

  • (String)


30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/gimchi.rb', line 30

def compose chosung, jungsung = nil, jongsung = nil
  if chosung.nil? && jungsung.nil?
    ""
  elsif chosung && jungsung
    n1, n2, n3 =
    n1 = chosungs.index(chosung) || 0
    n2 = jungsungs.index(jungsung) || 0
    n3 = ([nil] + jongsungs).index(jongsung) || 0
    [ 0xAC00 + n1 * (21 * 28) + n2 * 28 + n3 ].pack('U')
  else
    chosung || jungsung
  end
end

.decompose(ch) ⇒ Array

Decompose a Korean character into 3 components

Parameters:

  • ch (String)

    Korean character

Returns:

  • (Array)


21
22
23
# File 'lib/gimchi.rb', line 21

def decompose ch
  kchar(ch).to_a
end

.jongsung?(ch) ⇒ Boolean

Parameters:

  • ch (String)

Returns:

  • (Boolean)


58
59
60
# File 'lib/gimchi.rb', line 58

def jongsung? ch
  @jongsung_set.include? ch
end

.jungsung?(ch) ⇒ Boolean

Parameters:

  • ch (String)

Returns:

  • (Boolean)


52
53
54
# File 'lib/gimchi.rb', line 52

def jungsung? ch
  @jungsung_set.include? ch
end

.kchar(ch) ⇒ Object

Deprecated.


83
84
85
# File 'lib/gimchi.rb', line 83

def kchar ch
  Gimchi::Char.new(ch)
end

.korean_char?(ch) ⇒ Boolean Also known as: kchar?

Checks if the given character is a korean character.

Parameters:

  • ch (String)

    A string of size 1

Returns:

  • (Boolean)

Raises:

  • (ArgumentError)


64
65
66
67
68
# File 'lib/gimchi.rb', line 64

def korean_char? ch
  raise ArgumentError.new('Lengthy input') if str_length(ch) > 1

  complete_korean_char?(ch) || @all.include?(ch)
end

.pronounce(str, options = {}) ⇒ String

Returns the pronunciation of the given string containing Korean characters. Takes optional options hash.

Parameters:

  • Input (String)

    string

  • options (Hash) (defaults to: {})

    Options

Options Hash (options):

  • each_char (Boolean)

    Each character of the string is pronounced respectively.

  • slur (Boolean)

    Strings separated by whitespaces are processed again as if they were contiguous.

  • number (Boolean)

    Numberic parts of the string is also pronounced in Korean.

  • except (Array)

    Allows you to skip certain transformations.

Returns:

  • (String)

    Output string



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
# File 'lib/gimchi.rb', line 106

def pronounce str, options = {}
  options = {
    :each_char => false,
    :slur      => false,
    :number    => true,
    :except    => [],
    :debug     => false
  }.merge options

  str = read_number(str) if options[:number]

  result, transforms = @pronouncer.send :pronounce!, str, options

  if options[:debug]
    return result, transforms
  else
    return result
  end
end

.read_number(str) ⇒ String

Reads numeric expressions in Korean way.

Parameters:

  • str (String, Number)

    Numeric type or String containing numeric expressions

Returns:

  • (String)

    Output string



90
91
92
93
94
# File 'lib/gimchi.rb', line 90

def read_number str
  str.to_s.gsub(/(([+-]\s*)?[0-9,]*,*[0-9]+(\.[0-9]+(e[+-][0-9]+)?)?)(\s*.)?/) {
    read_number_sub($1, $5)
  }
end

.romanize(str, options = {}) ⇒ String

Returns the romanization (alphabetical notation) of the given Korean string. en.wikipedia.org/wiki/Korean_romanization

Parameters:

  • str (String)

    Input Korean string

  • options (Hash) (defaults to: {})

    Options

Options Hash (options):

  • as_pronounced (Boolean)

    If true, #pronounce is internally called before romanize

  • number (Boolean)

    Whether to read numeric expressions in the string

  • slur (Boolean)

    Same as :slur in #pronounce

Returns:

  • (String)

    Output string in Roman Alphabet

See Also:

  • Korean#pronounce


135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/gimchi.rb', line 135

def romanize str, options = {}
  options = {
    :as_pronounced => true,
    :number        => true,
    :slur          => false
  }.merge options

  rdata = @config[:romanization]
  post_subs = rdata[:post_substitution]
  rdata = [rdata[:chosung], rdata[:jungsung], rdata[:jongsung]]

  str = pronounce str,
    :each_char => !options[:as_pronounced],
    :number    => options[:number],
    :slur      => options[:slur],
    # 제1항 [붙임 1] ‘ㅢ’는 ‘ㅣ’로 소리 나더라도 ‘ui’로 적는다.
    :except => %w[rule_5_3]
  dash = rdata[0][""]
  romanization = ""

  romanize_chunk = lambda do |chunk|
    chunk.each_char.map { |ch| kchar(ch) rescue ch }.each do |kc|
      kc.to_a.each_with_index do |comp, idx|
        next if comp.nil?
        comp = rdata[idx][comp] || comp
        comp = comp[1..-1] if comp[0, 1] == dash &&
            (romanization.empty? || romanization[-1, 1] =~ /\s/)
        romanization += comp
      end
    end

    return post_subs.keys.inject(romanization) { | output, pattern |
      output.gsub(pattern, post_subs[pattern])
    }
  end

  k_chunk = ""
  str.each_char do | c |
    if korean_char? c
      k_chunk += c
    else
      unless k_chunk.empty?
        romanization = romanize_chunk.call k_chunk
        k_chunk = ""
      end
      romanization += c
    end
  end
  romanization = romanize_chunk.call k_chunk unless k_chunk.empty?
  romanization
end