Module: UnicodeUtils::Impl

Defined in:: lib/unicode_utils/nfc.rb,
lib/unicode_utils/debug.rb,
lib/unicode_utils/each_word.rb,
lib/unicode_utils/read_cdata.rb,
lib/unicode_utils/conditional_casing.rb,
lib/unicode_utils/canonical_decomposition.rb,
lib/unicode_utils/compatibility_decomposition.rb,
lib/unicode_utils/hangul_syllable_decomposition.rb

Overview

:nodoc:

Defined Under Namespace

Modules: NFC Classes: AfterIConditionalCasing, AfterSoftDottedConditionalCasing, BeforeDotConditionalCasing, ConditionalCasing, FinalSigmaConditionalCasing, MoreAboveConditionalCasing, NotBeforeDotConditionalCasing

Constant Summary collapse

COMPOSITION_EXCLUSION_SET =

Impl.read_code_point_set("composition_exclusion_set")

CANONICAL_COMPOSITION_MAP =

Hash.new.tap do |m|
  CANONICAL_DECOMPOSITION_MAP.each_pair { |comp, decomp|
    if decomp.length == 2
      (m[decomp[0]] ||= {})[decomp[1]] = comp
    end
  }
end

DEBUG_COLUMNS =

{
  "Char" => -> cp {
    case cp
    when 0x07 then '"\a"'
    when 0x08 then '"\b"'
    when 0x09 then '"\t"'
    when 0x0A then '"\n"'
    when 0x0D then '"\r"'
    else
      if UnicodeUtils.graphic_char?(cp) &&
            UnicodeUtils.char_display_width(cp) > 0
        '"' + cp.chr(Encoding::UTF_8) + '"'
      else
        "N/A"
      end
    end
  },
  "Ordinal" => -> cp {
    cp.to_s(16).upcase.rjust(7)
  },
  "Sid" => -> cp {
    UnicodeUtils.sid(cp)
  },
  "General Category" => -> cp {
    UnicodeUtils.general_category(cp).to_s
  },
  "UTF-8" => -> cp {
    begin
      cp.chr(Encoding::UTF_8).bytes.map { |b| sprintf("%02X", b) }.join(" ")
    rescue RangeError # surrogate code points are not valid in utf-8
      "N/A"
    end
  }
}

EAST_ASIAN_WIDTH_SYMBOL_MAP =

{
  1 => :Ambiguous,
  2 => :Halfwidth,
  3 => :Wide,
  4 => :Fullwidth,
  5 => :Narrow
}.freeze

NAME_ALIAS_TYPE_TO_SYMBOL_MAP =

{
  1 => :correction,
  2 => :control,
  3 => :alternate,
  4 => :figment,
  5 => :abbreviation
}.freeze

LANGS_WITH_RULES =

{:tr => true, :lt => true, :az => true}

CONDITIONAL_UPCASE_MAP =

read_conditional_casings("cond_uc_map")

CONDITIONAL_DOWNCASE_MAP =

read_conditional_casings("cond_lc_map")

CONDITIONAL_TITLECASE_MAP =

read_conditional_casings("cond_tc_map")

Class Method Summary collapse

.append_hangul_syllable_decomposition(str, s) ⇒ Object
.append_recursive_canonical_decomposition_mapping(str, mapping) ⇒ Object
.append_recursive_compatibility_decomposition_mapping(str, cp) ⇒ Object
.column_widths(table) ⇒ Object
.composition(str) ⇒ Object
.conditional_downcase_mapping(cp, str, pos, language_id) ⇒ Object
.conditional_titlecase_mapping(cp, str, pos, language_id) ⇒ Object
.conditional_upcase_mapping(cp, str, pos, language_id) ⇒ Object
.open_cdata_file(filename, &block) ⇒ Object
.print_row(row, column_widths, io) ⇒ Object
.print_separator_row(column_widths, io) ⇒ Object
.print_table(table, io) ⇒ Object
.put_into_canonical_order(str) ⇒ Object
.read_code_point_map(filename) ⇒ Object
.read_code_point_set(filename) ⇒ Object
.read_combining_class_map ⇒ Object
.read_conditional_casings(filename) ⇒ Object
.read_east_asian_width_per_cp(filename) ⇒ Object
.read_east_asian_width_ranges(filename) ⇒ Object
.read_general_category_per_cp(filename) ⇒ Object
.read_general_category_ranges(filename) ⇒ Object
.read_hexdigit_map(filename) ⇒ Object

Read a map whose keys are code points (6 hexgdigits, converted to integer) and whose values are single hexdigits (converted to integer).
.read_multivalued_map(filename) ⇒ Object
.read_name_aliases(filename) ⇒ Object
.read_names(filename) ⇒ Object
.read_range_to_hexdigit_list(filename) ⇒ Object

Returns a list (array) of pairs (two element Arrays) of Range (code points) and associated integer value.
.read_symbol_map(filename) ⇒ Object
.word_break?(cs, i) ⇒ Boolean

Class Method Details

.append_hangul_syllable_decomposition(str, s) ⇒ `Object`

# File 'lib/unicode_utils/hangul_syllable_decomposition.rb', line 20

def self.append_hangul_syllable_decomposition(str, s)
  # constants
  sbase = 0xAC00
  lbase = 0x1100
  vbase = 0x1161
  tbase = 0x11A7
  scount = 11172
  lcount = 19
  vcount = 21
  tcount = 28
  ncount = vcount * tcount

  sindex = s - sbase
  if 0 <= sindex && sindex < scount
    l = lbase + sindex / ncount
    v = vbase + (sindex % ncount) / tcount
    t = tbase + sindex % tcount
    str << l << v
    str << t if t != tbase
  else
    str << s
  end
end

.append_recursive_canonical_decomposition_mapping(str, mapping) ⇒ `Object`

# File 'lib/unicode_utils/canonical_decomposition.rb', line 48

def self.append_recursive_canonical_decomposition_mapping(str, mapping)
  mapping.each { |cp|
    mapping_ = CANONICAL_DECOMPOSITION_MAP[cp]
    if mapping_
      append_recursive_canonical_decomposition_mapping(str, mapping_)
    else
      str << cp
    end
  }
end

.append_recursive_compatibility_decomposition_mapping(str, cp) ⇒ `Object`

# File 'lib/unicode_utils/compatibility_decomposition.rb', line 41

def self.append_recursive_compatibility_decomposition_mapping(str, cp)
  mapping = COMPATIBILITY_DECOMPOSITION_MAP[cp]
  mapping ||= CANONICAL_DECOMPOSITION_MAP[cp]
  if mapping
    mapping.each { |c|
      append_recursive_compatibility_decomposition_mapping(str, c)
    }
  else
    str << cp
  end
end

.column_widths(table) ⇒ `Object`

# File 'lib/unicode_utils/debug.rb', line 89

def self.column_widths(table)
  Array.new.tap { |column_widths|
    table.each_with_index { |row|
      row.each_with_index { |txt, col_i|
        dw = UnicodeUtils.display_width(txt)
        cw = column_widths[col_i]
        column_widths[col_i] = dw if cw.nil? || cw < dw
      }
    }
  }
end

.composition(str) ⇒ `Object`

# File 'lib/unicode_utils/nfc.rb', line 46

def self.composition(str)
  ### constants for hangul composition ###
  sbase = 0xAC00
  lbase = 0x1100
  vbase = 0x1161
  tbase = 0x11A7
  lcount = 19
  vcount = 21
  tcount = 28
  ncount = vcount * tcount
  scount = lcount * ncount
  ########################################

  String.new.force_encoding(str.encoding).tap do |res|
    last_starter = nil
    uncomposable_non_starters = []
    str.each_codepoint { |cp|
      if COMBINING_CLASS_MAP[cp] == 0 # starter?
        combined = false
        if last_starter && uncomposable_non_starters.empty?
          ### hangul ###
          lindex = last_starter - lbase
          if 0 <= lindex && lindex < lcount
            vindex = cp - vbase
            if 0 <= vindex && vindex <= vcount
              last_starter =
                sbase + (lindex * vcount + vindex) * tcount
              combined = true
            end
          end
          unless combined
            sindex = last_starter - sbase
            if 0 <= sindex && sindex < scount && (sindex % tcount) == 0
              tindex = cp - tbase
              if 0 <= tindex && tindex < tcount
                last_starter += tindex
                combined = true
              end
            end
          end
          ##############
          unless combined
            map = Impl::CANONICAL_COMPOSITION_MAP[last_starter]
            composition = map && map[cp]
            if composition && Impl::NFC.primary_composite?(composition)
              last_starter = composition
              combined = true
            end
          end
        end
        unless combined
          res << last_starter if last_starter
          uncomposable_non_starters.each { |nc| res << nc }
          uncomposable_non_starters.clear
          last_starter = cp
        end
      else
        last_non_starter = uncomposable_non_starters.last
        if last_non_starter && Impl::NFC.blocked?(last_non_starter, cp)
          uncomposable_non_starters << cp
        else
          map = Impl::CANONICAL_COMPOSITION_MAP[last_starter]
          composition = map && map[cp]
          if composition && Impl::NFC.primary_composite?(composition)
            last_starter = composition
          else
            uncomposable_non_starters << cp
          end
        end
      end
    }
    res << last_starter if last_starter
    uncomposable_non_starters.each { |nc| res << nc }
  end
end

.conditional_downcase_mapping(cp, str, pos, language_id) ⇒ `Object`

# File 'lib/unicode_utils/conditional_casing.rb', line 140

def self.conditional_downcase_mapping(cp, str, pos, language_id)
  lang_map = CONDITIONAL_DOWNCASE_MAP[cp]
  if lang_map
    casing = lang_map[language_id] || lang_map[nil]
    if casing && casing.context_match?(str, pos)
      casing.mapping
    end
  end
end

.conditional_titlecase_mapping(cp, str, pos, language_id) ⇒ `Object`

# File 'lib/unicode_utils/conditional_casing.rb', line 150

def self.conditional_titlecase_mapping(cp, str, pos, language_id)
  lang_map = CONDITIONAL_TITLECASE_MAP[cp]
  if lang_map
    casing = lang_map[language_id] || lang_map[nil]
    if casing && casing.context_match?(str, pos)
      casing.mapping
    end
  end
end

.conditional_upcase_mapping(cp, str, pos, language_id) ⇒ `Object`

# File 'lib/unicode_utils/conditional_casing.rb', line 130

def self.conditional_upcase_mapping(cp, str, pos, language_id)
  lang_map = CONDITIONAL_UPCASE_MAP[cp]
  if lang_map
    casing = lang_map[language_id] || lang_map[nil]
    if casing && casing.context_match?(str, pos)
      casing.mapping
    end
  end
end

.open_cdata_file(filename, &block) ⇒ `Object`



27
28
29

# File 'lib/unicode_utils/read_cdata.rb', line 27

def self.open_cdata_file(filename, &block)
  File.open(File.join(CDATA_DIR, filename), "r:US-ASCII:-", &block)
end

.print_row(row, column_widths, io) ⇒ `Object`

# File 'lib/unicode_utils/debug.rb', line 101

def self.print_row(row, column_widths, io)
  row.each_with_index { |txt, col_i|
    io.print(" ")
    io.print(txt)
    if col_i != row.length - 1
      dw = UnicodeUtils.display_width(txt)
      d = column_widths[col_i] - dw
      io.print(" " * (d + 1))
      io.print("|")
    end
  }
  io.puts
end

.print_separator_row(column_widths, io) ⇒ `Object`

# File 'lib/unicode_utils/debug.rb', line 115

def self.print_separator_row(column_widths, io)
  column_widths.each_with_index { |cw, col_i|
    io.print("-" * (cw + 2))
    if col_i != column_widths.length - 1
      io.print("+")
    end
  }
  io.puts
end

.print_table(table, io) ⇒ `Object`

# File 'lib/unicode_utils/debug.rb', line 125

def self.print_table(table, io)
  cws = column_widths(table)
  print_row(table[0], cws, io)
  print_separator_row(cws, io)
  table[1..-1].each { |row|
    print_row(row, cws, io)
  }
  io.flush
end

.put_into_canonical_order(str) ⇒ `Object`

# File 'lib/unicode_utils/canonical_decomposition.rb', line 59

def self.put_into_canonical_order(str)
  reorder_needed = false
  last_cp = nil
  last_cc = nil
  str.each_codepoint { |cp|
    cc = COMBINING_CLASS_MAP[cp]
    if last_cp && cc != 0 && last_cc > cc
      reorder_needed = true
      break
    end
    last_cp = cp
    last_cc = cc
  }
  return str unless reorder_needed
  res = String.new.force_encoding(str.encoding)
  last_cp = nil
  last_cc = nil
  str.each_codepoint { |cp|
    cc = COMBINING_CLASS_MAP[cp]
    if last_cp
      if cc != 0 && last_cc > cc
        res << cp
        cp = nil
        cc = nil
      end
      res << last_cp
    end
    last_cp = cp
    last_cc = cc
  }
  res << last_cp if last_cp
  put_into_canonical_order(res)
end

.read_code_point_map(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 43

def self.read_code_point_map(filename)
  Hash.new.tap { |map|
    open_cdata_file(filename) do |input|
      buffer = "x" * 6
      buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, buffer)
        map[buffer.to_i(16)] = input.read(6, buffer).to_i(16)
      end
    end
  }
end

.read_code_point_set(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 31

def self.read_code_point_set(filename)
  Hash.new.tap { |set|
    open_cdata_file(filename) do |input|
      buffer = "x" * 6
      buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, buffer)
        set[buffer.to_i(16)] = true
      end
    end
  }
end

.read_combining_class_map ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 101

def self.read_combining_class_map
  Hash.new.tap { |map|
    open_cdata_file("combining_class_map") do |input|
      buffer = "x" * 6
      buffer.force_encoding(Encoding::US_ASCII)
      cc_buffer = "x" * 2
      cc_buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, buffer)
        map[buffer.to_i(16)] = input.read(2, cc_buffer).to_i(16)
      end
    end
  }
end

.read_conditional_casings(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 84

def self.read_conditional_casings(filename)
  Hash.new.tap { |cp_map|
    open_cdata_file(filename) do |input|
      input.each_line { |line|
        line.chomp!
        record = line.split(";")
        cp = record[0].to_i(16)
        mapping = record[1].split(",").map { |c| c.to_i(16) }
        language_id = record[2].empty? ? nil : record[2].to_sym
        context = record[3] && record[3].gsub('_', '')
        casing = Impl.const_get("#{context}ConditionalCasing").new(mapping)
        (cp_map[cp] ||= {})[language_id] = casing
      }
    end
  }
end

.read_east_asian_width_per_cp(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 151

def self.read_east_asian_width_per_cp(filename)
  # like read_hexdigit_map, but with translation to symbol values
  Hash.new(:Neutral).tap { |map|
    open_cdata_file(filename) do |input|
      buffer = "x" * 6
      buffer.force_encoding(Encoding::US_ASCII)
      val_buffer = "x"
      val_buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, buffer)
        map[buffer.to_i(16)] =
          EAST_ASIAN_WIDTH_SYMBOL_MAP[input.read(1, val_buffer).to_i(16)]
      end
    end
  }
end

.read_east_asian_width_ranges(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 167

def self.read_east_asian_width_ranges(filename)
  read_range_to_hexdigit_list(filename).tap { |list|
    list.each { |pair|
      pair[1] = EAST_ASIAN_WIDTH_SYMBOL_MAP[pair[1]]
    }
  }
end

.read_general_category_per_cp(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 175

def self.read_general_category_per_cp(filename)
  Hash.new.tap { |map|
    open_cdata_file(filename) do |input|
      cp_buffer = "x" * 6
      cp_buffer.force_encoding(Encoding::US_ASCII)
      cat_buffer = "x" * 2
      cat_buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, cp_buffer)
        map[cp_buffer.to_i(16)] = input.read(2, cat_buffer).to_sym
      end
    end
  }
end

.read_general_category_ranges(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 189

def self.read_general_category_ranges(filename)
  Array.new.tap { |list|
    open_cdata_file(filename) do |input|
      cp_buffer = "x" * 6
      cp_buffer.force_encoding(Encoding::US_ASCII)
      cat_buffer = "x" * 2
      cat_buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, cp_buffer)
        list << [
          Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)),
          input.read(2, cat_buffer).to_sym
        ]
      end
    end
  }
end

.read_hexdigit_map(filename) ⇒ `Object`

Read a map whose keys are code points (6 hexgdigits, converted to integer) and whose values are single hexdigits (converted to integer).

# File 'lib/unicode_utils/read_cdata.rb', line 118

def self.read_hexdigit_map(filename)
  Hash.new.tap { |map|
    open_cdata_file(filename) do |input|
      buffer = "x" * 6
      buffer.force_encoding(Encoding::US_ASCII)
      val_buffer = "x"
      val_buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, buffer)
        map[buffer.to_i(16)] = input.read(1, val_buffer).to_i(16)
      end
    end
  }
end

.read_multivalued_map(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 55

def self.read_multivalued_map(filename)
  Hash.new.tap { |map|
    open_cdata_file(filename) do |input|
      buffer = "x" * 6
      buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, buffer)
        cp = buffer.to_i(16)
        mapping = []
        while input.read(6, buffer).getbyte(0) != 120
          mapping << buffer.to_i(16)
        end
        map[cp] = mapping
      end
    end
  }
end

.read_name_aliases(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 219

def self.read_name_aliases(filename)
  Hash.new.tap { |map|
    open_cdata_file(filename) do |input|
      cp_buffer = "x" * 6
      cp_buffer.force_encoding(Encoding::US_ASCII)
      ac_buffer = "x" * 1
      ac_buffer.force_encoding(Encoding::US_ASCII)
      at_buffer = "x" * 1
      at_buffer.force_encoding(Encoding::US_ASCII)
      al_buffer = "x" * 2
      al_buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, cp_buffer)
        aliases = Array.new(input.read(1, ac_buffer).to_i(16))
        0.upto(aliases.length - 1) { |i|
          type = NAME_ALIAS_TYPE_TO_SYMBOL_MAP[input.read(1, at_buffer).to_i(16)]
          name = input.read(input.read(2, al_buffer).to_i(16))
          aliases[i] = NameAlias.new(name.freeze, type)
        }
        map[cp_buffer.to_i(16)] = aliases.freeze
      end
    end
  }
end

.read_names(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 72

def self.read_names(filename)
  Hash.new.tap { |map|
    open_cdata_file(filename) do |input|
      buffer = "x" * 6
      buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, buffer)
        map[buffer.to_i(16)] = input.gets.tap { |x| x.chomp! }
      end
    end
  }
end

.read_range_to_hexdigit_list(filename) ⇒ `Object`

Returns a list (array) of pairs (two element Arrays) of Range (code points) and associated integer value.

# File 'lib/unicode_utils/read_cdata.rb', line 134

def self.read_range_to_hexdigit_list(filename)
  Array.new.tap { |list|
    open_cdata_file(filename) do |input|
      cp_buffer = "x" * 6
      cp_buffer.force_encoding(Encoding::US_ASCII)
      val_buffer = "x"
      val_buffer.force_encoding(Encoding::US_ASCII)
      while input.read(6, cp_buffer)
        list << [
          Range.new(cp_buffer.to_i(16), input.read(6, cp_buffer).to_i(16)),
          input.read(1, val_buffer).to_i(16)
        ]
      end
    end
  }
end

.read_symbol_map(filename) ⇒ `Object`

# File 'lib/unicode_utils/read_cdata.rb', line 206

def self.read_symbol_map(filename)
  Hash.new.tap { |map|
    open_cdata_file(filename) do |input|
      input.each_line { |line|
        parts = line.split(";")
        parts[0].strip!
        parts[1].strip!
        map[parts[0].to_sym] = parts[1].to_sym
      }
    end
  }
end

.word_break?(cs, i) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/unicode_utils/each_word.rb', line 41

def self.word_break?(cs, i)
  # wb3
  cs_i = cs[i]
  i1 = i + 1
  cs_i1 = cs[i1]
  if cs_i == 0x0 && cs_i1 == 0x1
    return false
  end
  # wb3a
  if cs_i == 0x2 || cs_i == 0x0 || cs_i == 0x1
    return true
  end
  # wb3b
  if cs_i1 == 0x2 || cs_i1 == 0x0 || cs_i1 == 0x1
    return true
  end
  # wb5
  i0 = i
  # inline skip_l
  c = nil
  loop { c = cs[i0]; break unless c == 0x3 || c == 0x4; i0 -= 1 }
  ci0 = c
  if ci0 == 0x6 && cs_i1 == 0x6
    return false
  end
  # wb6
  i2 = i1 + 1
  # inline skip_r
  loop { c = cs[i2]; break unless c == 0x3 || c == 0x4; i2 += 1 }
  if ci0 == 0x6 && (cs_i1 == 0x7 || cs_i1 == 0x9) && cs[i2] == 0x6
    return false
  end
  # wb7
  i_1 = i0 - 1
  # inline skip_l
  loop { c = cs[i_1]; break unless c == 0x3 || c == 0x4; i_1 -= 1 }
  if cs[i_1] == 0x6 && (ci0 == 0x7 || ci0 == 0x9) && cs_i1 == 0x6
    return false
  end
  # wb8
  if ci0 == 0xA && cs_i1 == 0xA
    return false
  end
  # wb9
  if ci0 == 0x6 && cs_i1 == 0xA
    return false
  end
  # wb10
  if ci0 == 0xA && cs_i1 == 0x6
    return false
  end
  # wb11
  if cs[i_1] == 0xA && (ci0 == 0x8 || ci0 == 0x9) && cs_i1 == 0xA
    return false
  end
  # wb12
  if ci0 == 0xA && (cs_i1 == 0x8 || cs_i1 == 0x9) && cs[i2] == 0xA
    return false
  end
  # wb13
  if ci0 == 0x5 && cs_i1 == 0x5
    return false
  end
  # wb13a
  if (ci0 == 0x6 || ci0 == 0xA || ci0 == 0x5 || ci0 == 0xB) && cs_i1 == 0xB
    return false
  end
  # wb13b
  if ci0 == 0xB && (cs_i1 == 0x6 || cs_i1 == 0xA || cs_i1 == 0x5)
    return false
  end
  # wb13c
  if ci0 == 0xC && cs_i1 == 0xC
    return false
  end
  # break unless next char is Extend/Format
  cs_i1 != 0x3 && cs_i1 != 0x4
end

Module: UnicodeUtils::Impl

Overview

Defined Under Namespace

Constant Summary collapse

Class Method Summary collapse

Class Method Details

.append_hangul_syllable_decomposition(str, s) ⇒ Object

.append_recursive_canonical_decomposition_mapping(str, mapping) ⇒ Object

.append_recursive_compatibility_decomposition_mapping(str, cp) ⇒ Object

.column_widths(table) ⇒ Object

.composition(str) ⇒ Object

.conditional_downcase_mapping(cp, str, pos, language_id) ⇒ Object

.conditional_titlecase_mapping(cp, str, pos, language_id) ⇒ Object

.conditional_upcase_mapping(cp, str, pos, language_id) ⇒ Object

.open_cdata_file(filename, &block) ⇒ Object

.print_row(row, column_widths, io) ⇒ Object

.print_separator_row(column_widths, io) ⇒ Object

.print_table(table, io) ⇒ Object

.put_into_canonical_order(str) ⇒ Object

.read_code_point_map(filename) ⇒ Object

.read_code_point_set(filename) ⇒ Object

.read_combining_class_map ⇒ Object

.read_conditional_casings(filename) ⇒ Object

.read_east_asian_width_per_cp(filename) ⇒ Object

.read_east_asian_width_ranges(filename) ⇒ Object

.read_general_category_per_cp(filename) ⇒ Object

.read_general_category_ranges(filename) ⇒ Object

.read_hexdigit_map(filename) ⇒ Object

.read_multivalued_map(filename) ⇒ Object

.read_name_aliases(filename) ⇒ Object

.read_names(filename) ⇒ Object

.read_range_to_hexdigit_list(filename) ⇒ Object

.read_symbol_map(filename) ⇒ Object

.word_break?(cs, i) ⇒ Boolean