Module: Addressable::IDNA

Defined in:: lib/addressable/idna/pure.rb,
lib/addressable/idna/native.rb

Defined Under Namespace

Classes: PunycodeBadInput, PunycodeBigOutput, PunycodeOverflow

Constant Summary collapse

UNICODE_TABLE =

File.expand_path(
  File.join(File.dirname(__FILE__), '../../..', 'data/unicode.data')
)

ACE_PREFIX =

"xn--"

UTF8_REGEX =

/\A(?:
[\x09\x0A\x0D\x20-\x7E]               # ASCII
| [\xC2-\xDF][\x80-\xBF]              # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF]          # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}   # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF]          # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2}       # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3}           # planes 4nil5
| \xF4[\x80-\x8F][\x80-\xBF]{2}       # plane 16
)*\z/mnx

UTF8_REGEX_MULTIBYTE =

/(?:
[\xC2-\xDF][\x80-\xBF]                # non-overlong 2-byte
| \xE0[\xA0-\xBF][\x80-\xBF]          # excluding overlongs
| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}   # straight 3-byte
| \xED[\x80-\x9F][\x80-\xBF]          # excluding surrogates
| \xF0[\x90-\xBF][\x80-\xBF]{2}       # planes 1-3
| [\xF1-\xF3][\x80-\xBF]{3}           # planes 4nil5
| \xF4[\x80-\x8F][\x80-\xBF]{2}       # plane 16
)/mnx

HANGUL_SBASE =

0xac00

HANGUL_LBASE =

0x1100

HANGUL_LCOUNT =

HANGUL_VBASE =

0x1161

HANGUL_VCOUNT =

HANGUL_TBASE =

0x11a7

HANGUL_TCOUNT =

HANGUL_NCOUNT = 588

HANGUL_VCOUNT * HANGUL_TCOUNT

HANGUL_SCOUNT = 11172

HANGUL_LCOUNT * HANGUL_NCOUNT

UNICODE_DATA_COMBINING_CLASS =

UNICODE_DATA_EXCLUSION =

UNICODE_DATA_CANONICAL =

UNICODE_DATA_COMPATIBILITY =

UNICODE_DATA_UPPERCASE =

UNICODE_DATA_LOWERCASE =

UNICODE_DATA_TITLECASE =

UNICODE_DATA = This is a sparse Unicode table. Codepoints without entries are assumed to have the value: [0, 0, nil, nil, nil, nil, nil]

File.open(UNICODE_TABLE, "rb") do |file|
  Marshal.load(file.read)
end

COMPOSITION_TABLE =

{}

UNICODE_MAX_LENGTH =

ACE_MAX_LENGTH =

PUNYCODE_BASE =

PUNYCODE_TMIN =

PUNYCODE_TMAX =

PUNYCODE_SKEW =

PUNYCODE_DAMP =

PUNYCODE_INITIAL_BIAS =

PUNYCODE_INITIAL_N =

0x80

PUNYCODE_DELIMITER =

0x2D

PUNYCODE_MAXINT =

1 << 64

PUNYCODE_PRINT_ASCII =

"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" +
"\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n" +
" !\"\#$%&'()*+,-./" +
"0123456789:;<=>?" +
"@ABCDEFGHIJKLMNO" +
"PQRSTUVWXYZ[\\]^_" +
"`abcdefghijklmno" +
"pqrstuvwxyz{|}~\n"

Class Method Summary collapse

.lookup_unicode_combining_class(codepoint) ⇒ Object
.lookup_unicode_compatibility(codepoint) ⇒ Object
.lookup_unicode_composition(unpacked) ⇒ Object
.lookup_unicode_lowercase(codepoint) ⇒ Object
.punycode_adapt(delta, numpoints, firsttime) ⇒ Object

Bias adaptation method.
.punycode_basic?(codepoint) ⇒ Boolean
.punycode_decode(value) ⇒ Object
.punycode_decode_digit(codepoint) ⇒ Object

Returns the numeric value of a basic codepoint (for use in representing integers) in the range 0 to base - 1, or PUNYCODE_BASE if codepoint does not represent a value.
.punycode_delimiter?(codepoint) ⇒ Boolean
.punycode_encode(value) ⇒ Object
.punycode_encode_digit(d) ⇒ Object
.to_ascii(value) ⇒ Object

Converts from a Unicode internationalized domain name to an ASCII domain name as described in RFC 3490.
.to_unicode(value) ⇒ Object

Converts from an ASCII domain name to a Unicode internationalized domain name as described in RFC 3490.
.unicode_compose(unpacked) ⇒ Object
.unicode_compose_pair(ch_one, ch_two) ⇒ Object
.unicode_decompose(unpacked) ⇒ Object
.unicode_decompose_hangul(codepoint) ⇒ Object
.unicode_downcase(input) ⇒ String private

Unicode aware downcase method.
.unicode_normalize_kc(value) ⇒ Object

Unicode normalization form KC.
.unicode_sort_canonical(unpacked) ⇒ Object

Class Method Details

.lookup_unicode_combining_class(codepoint) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 274

def self.lookup_unicode_combining_class(codepoint)
  codepoint_data = UNICODE_DATA[codepoint]
  (codepoint_data ?
    (codepoint_data[UNICODE_DATA_COMBINING_CLASS] || 0) :
    0)
end

.lookup_unicode_compatibility(codepoint) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 282

def self.lookup_unicode_compatibility(codepoint)
  codepoint_data = UNICODE_DATA[codepoint]
  (codepoint_data ?
    codepoint_data[UNICODE_DATA_COMPATIBILITY] : nil)
end

.lookup_unicode_composition(unpacked) ⇒ `Object`



297
298
299

# File 'lib/addressable/idna/pure.rb', line 297

def self.lookup_unicode_composition(unpacked)
  return COMPOSITION_TABLE[unpacked]
end

.lookup_unicode_lowercase(codepoint) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 289

def self.lookup_unicode_lowercase(codepoint)
  codepoint_data = UNICODE_DATA[codepoint]
  (codepoint_data ?
    (codepoint_data[UNICODE_DATA_LOWERCASE] || codepoint) :
    codepoint)
end

.punycode_adapt(delta, numpoints, firsttime) ⇒ `Object`

Bias adaptation method

# File 'lib/addressable/idna/pure.rb', line 641

def self.punycode_adapt(delta, numpoints, firsttime)
  delta = firsttime ? delta / PUNYCODE_DAMP : delta >> 1
  # delta >> 1 is a faster way of doing delta / 2
  delta += delta / numpoints
  difference = PUNYCODE_BASE - PUNYCODE_TMIN

  k = 0
  while delta > (difference * PUNYCODE_TMAX) / 2
    delta /= difference
    k += PUNYCODE_BASE
  end

  k + (difference + 1) * delta / (delta + PUNYCODE_SKEW)
end

.punycode_basic?(codepoint) ⇒ `Boolean`

Returns:

(Boolean)



609
610
611

# File 'lib/addressable/idna/pure.rb', line 609

def self.punycode_basic?(codepoint)
  codepoint < 0x80
end

.punycode_decode(value) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 487

def self.punycode_decode(punycode)
  input = []
  output = []

  if ACE_MAX_LENGTH * 2 < punycode.size
    raise PunycodeBigOutput, "Output would exceed the space provided."
  end
  punycode.each_byte do |c|
    unless c >= 0 && c <= 127
      raise PunycodeBadInput, "Input is invalid."
    end
    input.push(c)
  end

  input_length = input.length
  output_length = [UNICODE_MAX_LENGTH]

  # Initialize the state
  n = PUNYCODE_INITIAL_N

  out = i = 0
  max_out = output_length[0]
  bias = PUNYCODE_INITIAL_BIAS

  # Handle the basic code points:  Let b be the number of input code
  # points before the last delimiter, or 0 if there is none, then
  # copy the first b code points to the output.

  b = 0
  input_length.times do |j|
    b = j if punycode_delimiter?(input[j])
  end
  if b > max_out
    raise PunycodeBigOutput, "Output would exceed the space provided."
  end

  b.times do |j|
    unless punycode_basic?(input[j])
      raise PunycodeBadInput, "Input is invalid."
    end
    output[out] = input[j]
    out+=1
  end

  # Main decoding loop:  Start just after the last delimiter if any
  # basic code points were copied; start at the beginning otherwise.

  in_ = b > 0 ? b + 1 : 0
  while in_ < input_length

    # in_ is the index of the next character to be consumed, and
    # out is the number of code points in the output array.

    # Decode a generalized variable-length integer into delta,
    # which gets added to i.  The overflow checking is easier
    # if we increase i as we go, then subtract off its starting
    # value at the end to obtain delta.

    oldi = i; w = 1; k = PUNYCODE_BASE
    while true
      if in_ >= input_length
        raise PunycodeBadInput, "Input is invalid."
      end
      digit = punycode_decode_digit(input[in_])
      in_+=1
      if digit >= PUNYCODE_BASE
        raise PunycodeBadInput, "Input is invalid."
      end
      if digit > (PUNYCODE_MAXINT - i) / w
        raise PunycodeOverflow, "Input needs wider integers to process."
      end
      i += digit * w
      t = (
        if k <= bias
          PUNYCODE_TMIN
        elsif k >= bias + PUNYCODE_TMAX
          PUNYCODE_TMAX
        else
          k - bias
        end
      )
      break if digit < t
      if w > PUNYCODE_MAXINT / (PUNYCODE_BASE - t)
        raise PunycodeOverflow, "Input needs wider integers to process."
      end
      w *= PUNYCODE_BASE - t
      k += PUNYCODE_BASE
    end

    bias = punycode_adapt(i - oldi, out + 1, oldi == 0)

    # I was supposed to wrap around from out + 1 to 0,
    # incrementing n each time, so we'll fix that now:

    if i / (out + 1) > PUNYCODE_MAXINT - n
      raise PunycodeOverflow, "Input needs wider integers to process."
    end
    n += i / (out + 1)
    i %= out + 1

    # Insert n at position i of the output:

    # not needed for Punycode:
    # raise PUNYCODE_INVALID_INPUT if decode_digit(n) <= base
    if out >= max_out
      raise PunycodeBigOutput, "Output would exceed the space provided."
    end

    #memmove(output + i + 1, output + i, (out - i) * sizeof *output)
    output[i + 1, out - i] = output[i, out - i]
    output[i] = n
    i += 1

    out += 1
  end

  output_length[0] = out

  output.pack("U*")
end

.punycode_decode_digit(codepoint) ⇒ `Object`

Returns the numeric value of a basic codepoint (for use in representing integers) in the range 0 to base - 1, or PUNYCODE_BASE if codepoint does not represent a value.

# File 'lib/addressable/idna/pure.rb', line 627

def self.punycode_decode_digit(codepoint)
  if codepoint - 48 < 10
    codepoint - 22
  elsif codepoint - 65 < 26
    codepoint - 65
  elsif codepoint - 97 < 26
    codepoint - 97
  else
    PUNYCODE_BASE
  end
end

.punycode_delimiter?(codepoint) ⇒ `Boolean`

Returns:

(Boolean)



614
615
616

# File 'lib/addressable/idna/pure.rb', line 614

def self.punycode_delimiter?(codepoint)
  codepoint == PUNYCODE_DELIMITER
end

.punycode_encode(value) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 367

def self.punycode_encode(unicode)
  input = unicode.unpack("U*")
  output = [0] * (ACE_MAX_LENGTH + 1)
  input_length = input.size
  output_length = [ACE_MAX_LENGTH]

  # Initialize the state
  n = PUNYCODE_INITIAL_N
  delta = out = 0
  max_out = output_length[0]
  bias = PUNYCODE_INITIAL_BIAS

  # Handle the basic code points:
  input_length.times do |j|
    if punycode_basic?(input[j])
      if max_out - out < 2
        raise PunycodeBigOutput,
          "Output would exceed the space provided."
      end
      output[out] = input[j]
      out += 1
    end
  end

  h = b = out

  # h is the number of code points that have been handled, b is the
  # number of basic code points, and out is the number of characters
  # that have been output.

  if b > 0
    output[out] = PUNYCODE_DELIMITER
    out += 1
  end

  # Main encoding loop:

  while h < input_length
    # All non-basic code points < n have been
    # handled already.  Find the next larger one:

    m = PUNYCODE_MAXINT
    input_length.times do |j|
      m = input[j] if (n...m) === input[j]
    end

    # Increase delta enough to advance the decoder's
    # <n,i> state to <m,0>, but guard against overflow:

    if m - n > (PUNYCODE_MAXINT - delta) / (h + 1)
      raise PunycodeOverflow, "Input needs wider integers to process."
    end
    delta += (m - n) * (h + 1)
    n = m

    input_length.times do |j|
      # Punycode does not need to check whether input[j] is basic:
      if input[j] < n
        delta += 1
        if delta == 0
          raise PunycodeOverflow,
            "Input needs wider integers to process."
        end
      end

      if input[j] == n
        # Represent delta as a generalized variable-length integer:

        q = delta; k = PUNYCODE_BASE
        while true
          if out >= max_out
            raise PunycodeBigOutput,
              "Output would exceed the space provided."
          end
          t = (
            if k <= bias
              PUNYCODE_TMIN
            elsif k >= bias + PUNYCODE_TMAX
              PUNYCODE_TMAX
            else
              k - bias
            end
          )
          break if q < t
          output[out] =
            punycode_encode_digit(t + (q - t) % (PUNYCODE_BASE - t))
          out += 1
          q = (q - t) / (PUNYCODE_BASE - t)
          k += PUNYCODE_BASE
        end

        output[out] = punycode_encode_digit(q)
        out += 1
        bias = punycode_adapt(delta, h + 1, h == b)
        delta = 0
        h += 1
      end
    end

    delta += 1
    n += 1
  end

  output_length[0] = out

  outlen = out
  outlen.times do |j|
    c = output[j]
    unless c >= 0 && c <= 127
      raise Exception, "Invalid output char."
    end
    unless PUNYCODE_PRINT_ASCII[c]
      raise PunycodeBadInput, "Input is invalid."
    end
  end

  output[0..outlen].map { |x| x.chr }.join("").sub(/\0+\z/, "")
end

.punycode_encode_digit(d) ⇒ `Object`



619
620
621

# File 'lib/addressable/idna/pure.rb', line 619

def self.punycode_encode_digit(d)
  d + 22 + 75 * ((d < 26) ? 1 : 0)
end

.to_ascii(value) ⇒ `Object`

Converts from a Unicode internationalized domain name to an ASCII domain name as described in RFC 3490.

# File 'lib/addressable/idna/pure.rb', line 66

def self.to_ascii(input)
  input = input.dup
  if input.respond_to?(:force_encoding)
    input.force_encoding(Encoding::ASCII_8BIT)
  end
  if input =~ UTF8_REGEX && input =~ UTF8_REGEX_MULTIBYTE
    parts = unicode_downcase(input).split('.')
    parts.map! do |part|
      if part.respond_to?(:force_encoding)
        part.force_encoding(Encoding::ASCII_8BIT)
      end
      if part =~ UTF8_REGEX && part =~ UTF8_REGEX_MULTIBYTE
        ACE_PREFIX + punycode_encode(unicode_normalize_kc(part))
      else
        part
      end
    end
    parts.join('.')
  else
    input
  end
end

.to_unicode(value) ⇒ `Object`

Converts from an ASCII domain name to a Unicode internationalized domain name as described in RFC 3490.

# File 'lib/addressable/idna/pure.rb', line 91

def self.to_unicode(input)
  parts = input.split('.')
  parts.map! do |part|
    if part =~ /^#{ACE_PREFIX}/
      punycode_decode(part[/^#{ACE_PREFIX}(.+)/, 1])
    else
      part
    end
  end
  output = parts.join('.')
  if output.respond_to?(:force_encoding)
    output.force_encoding(Encoding::UTF_8)
  end
  output
end

.unicode_compose(unpacked) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 129

def self.unicode_compose(unpacked)
  unpacked_result = []
  length = unpacked.length

  return unpacked if length == 0

  starter = unpacked[0]
  starter_cc = lookup_unicode_combining_class(starter)
  starter_cc = 256 if starter_cc != 0
  for i in 1...length
    ch = unpacked[i]
    cc = lookup_unicode_combining_class(ch)

    if (starter_cc == 0 &&
        (composite = unicode_compose_pair(starter, ch)) != nil)
      starter = composite
      startercc = lookup_unicode_combining_class(composite)
    else
      unpacked_result << starter
      starter = ch
      startercc = cc
    end
  end
  unpacked_result << starter
  return unpacked_result
end

.unicode_compose_pair(ch_one, ch_two) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 157

def self.unicode_compose_pair(ch_one, ch_two)
  if ch_one >= HANGUL_LBASE && ch_one < HANGUL_LBASE + HANGUL_LCOUNT &&
      ch_two >= HANGUL_VBASE && ch_two < HANGUL_VBASE + HANGUL_VCOUNT
    # Hangul L + V
    return HANGUL_SBASE + (
      (ch_one - HANGUL_LBASE) * HANGUL_VCOUNT + (ch_two - HANGUL_VBASE)
    ) * HANGUL_TCOUNT
  elsif ch_one >= HANGUL_SBASE &&
      ch_one < HANGUL_SBASE + HANGUL_SCOUNT &&
      (ch_one - HANGUL_SBASE) % HANGUL_TCOUNT == 0 &&
      ch_two >= HANGUL_TBASE && ch_two < HANGUL_TBASE + HANGUL_TCOUNT
       # Hangul LV + T
    return ch_one + (ch_two - HANGUL_TBASE)
  end

  p = []
  ucs4_to_utf8 = lambda do |ch|
    # For some reason, rcov likes to drop BUS errors here.
    if ch < 128
      p << ch
    elsif ch < 2048
      p << (ch >> 6 | 192)
      p << (ch & 63 | 128)
    elsif ch < 0x10000
      p << (ch >> 12 | 224)
      p << (ch >> 6 & 63 | 128)
      p << (ch & 63 | 128)
    elsif ch < 0x200000
      p << (ch >> 18 | 240)
      p << (ch >> 12 & 63 | 128)
      p << (ch >> 6 & 63 | 128)
      p << (ch & 63 | 128)
    elsif ch < 0x4000000
      p << (ch >> 24 | 248)
      p << (ch >> 18 & 63 | 128)
      p << (ch >> 12 & 63 | 128)
      p << (ch >> 6 & 63 | 128)
      p << (ch & 63 | 128)
    elsif ch < 0x80000000
      p << (ch >> 30 | 252)
      p << (ch >> 24 & 63 | 128)
      p << (ch >> 18 & 63 | 128)
      p << (ch >> 12 & 63 | 128)
      p << (ch >> 6 & 63 | 128)
      p << (ch & 63 | 128)
    end
  end

  ucs4_to_utf8.call(ch_one)
  ucs4_to_utf8.call(ch_two)

  return lookup_unicode_composition(p)
end

.unicode_decompose(unpacked) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 236

def self.unicode_decompose(unpacked)
  unpacked_result = []
  for cp in unpacked
    if cp >= HANGUL_SBASE && cp < HANGUL_SBASE + HANGUL_SCOUNT
      l, v, t = unicode_decompose_hangul(cp)
      unpacked_result << l
      unpacked_result << v if v
      unpacked_result << t if t
    else
      dc = lookup_unicode_compatibility(cp)
      unless dc
        unpacked_result << cp
      else
        unpacked_result.concat(unicode_decompose(dc.unpack("U*")))
      end
    end
  end
  return unpacked_result
end

.unicode_decompose_hangul(codepoint) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 257

def self.unicode_decompose_hangul(codepoint)
  sindex = codepoint - HANGUL_SBASE;
  if sindex < 0 || sindex >= HANGUL_SCOUNT
    l = codepoint
    v = t = nil
    return l, v, t
  end
  l = HANGUL_LBASE + sindex / HANGUL_NCOUNT
  v = HANGUL_VBASE + (sindex % HANGUL_NCOUNT) / HANGUL_TCOUNT
  t = HANGUL_TBASE + sindex % HANGUL_TCOUNT
  if t == HANGUL_TBASE
    t = nil
  end
  return l, v, t
end

.unicode_downcase(input) ⇒ `String`

This method is part of a private API. You should avoid using this method if possible, as it may be removed or be changed in the future.

Unicode aware downcase method.

Parameters:

input (String) —

The input string.

Returns:

(String) —

The downcased result.

# File 'lib/addressable/idna/pure.rb', line 122

def self.unicode_downcase(input)
  unpacked = input.unpack("U*")
  unpacked.map! { |codepoint| lookup_unicode_lowercase(codepoint) }
  return unpacked.pack("U*")
end

.unicode_normalize_kc(value) ⇒ `Object`

Unicode normalization form KC.

# File 'lib/addressable/idna/pure.rb', line 108

def self.unicode_normalize_kc(input)
  unpacked = input.unpack("U*")
  unpacked =
    unicode_compose(unicode_sort_canonical(unicode_decompose(unpacked)))
  return unpacked.pack("U*")
end

.unicode_sort_canonical(unpacked) ⇒ `Object`

# File 'lib/addressable/idna/pure.rb', line 212

def self.unicode_sort_canonical(unpacked)
  unpacked = unpacked.dup
  i = 1
  length = unpacked.length

  return unpacked if length < 2

  while i < length
    last = unpacked[i-1]
    ch = unpacked[i]
    last_cc = lookup_unicode_combining_class(last)
    cc = lookup_unicode_combining_class(ch)
    if cc != 0 && last_cc != 0 && last_cc > cc
      unpacked[i] = last
      unpacked[i-1] = ch
      i -= 1 if i > 1
    else
      i += 1
    end
  end
  return unpacked
end

Module: Addressable::IDNA

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.lookup_unicode_combining_class(codepoint) ⇒ Object

.lookup_unicode_compatibility(codepoint) ⇒ Object

.lookup_unicode_composition(unpacked) ⇒ Object

.lookup_unicode_lowercase(codepoint) ⇒ Object

.punycode_adapt(delta, numpoints, firsttime) ⇒ Object

.punycode_basic?(codepoint) ⇒ Boolean

.punycode_decode(value) ⇒ Object

.punycode_decode_digit(codepoint) ⇒ Object

.punycode_delimiter?(codepoint) ⇒ Boolean

.punycode_encode(value) ⇒ Object

.punycode_encode_digit(d) ⇒ Object

.to_ascii(value) ⇒ Object

.to_unicode(value) ⇒ Object

.unicode_compose(unpacked) ⇒ Object

.unicode_compose_pair(ch_one, ch_two) ⇒ Object

.unicode_decompose(unpacked) ⇒ Object

.unicode_decompose_hangul(codepoint) ⇒ Object

.unicode_downcase(input) ⇒ String

.unicode_normalize_kc(value) ⇒ Object

.unicode_sort_canonical(unpacked) ⇒ Object

.lookup_unicode_combining_class(codepoint) ⇒ `Object`

.lookup_unicode_compatibility(codepoint) ⇒ `Object`

.lookup_unicode_composition(unpacked) ⇒ `Object`

.lookup_unicode_lowercase(codepoint) ⇒ `Object`

.punycode_adapt(delta, numpoints, firsttime) ⇒ `Object`

.punycode_basic?(codepoint) ⇒ `Boolean`

.punycode_decode(value) ⇒ `Object`

.punycode_decode_digit(codepoint) ⇒ `Object`

.punycode_delimiter?(codepoint) ⇒ `Boolean`

.punycode_encode(value) ⇒ `Object`

.punycode_encode_digit(d) ⇒ `Object`

.to_ascii(value) ⇒ `Object`

.to_unicode(value) ⇒ `Object`

.unicode_compose(unpacked) ⇒ `Object`

.unicode_compose_pair(ch_one, ch_two) ⇒ `Object`

.unicode_decompose(unpacked) ⇒ `Object`

.unicode_decompose_hangul(codepoint) ⇒ `Object`

.unicode_downcase(input) ⇒ `String`

.unicode_normalize_kc(value) ⇒ `Object`

.unicode_sort_canonical(unpacked) ⇒ `Object`