Module: Unicode::Name

Defined in:
lib/unicode/name.rb,
lib/unicode/name/index.rb,
lib/unicode/name/constants.rb

Constant Summary collapse

HANGUL_START =
44032
HANGUL_END =
55203
HANGUL_MEDIAL_MAX =
588
HANGUL_FINAL_MAX =
28
VERSION =
"1.13.5"
UNICODE_VERSION =
"16.0.0"
DATA_DIRECTORY =
File.expand_path(File.dirname(__FILE__) + "/../../../data/").freeze
INDEX_FILENAME =
(DATA_DIRECTORY + "/name.marshal.gz").freeze

Class Method Summary collapse

Class Method Details

.aliases(char) ⇒ Object



45
46
47
48
49
# File 'lib/unicode/name.rb', line 45

def self.aliases(char)
  codepoint = char.unpack("U")[0]
  require_relative "name/index" unless defined? ::Unicode::Name::INDEX
  INDEX[:ALIASES][codepoint]
end

.correct(char) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
# File 'lib/unicode/name.rb', line 33

def self.correct(char)
  codepoint = char.unpack("U")[0]
  require_relative "name/index" unless defined? ::Unicode::Name::INDEX
  if correction = INDEX[:ALIASES][codepoint] &&
                  INDEX[:ALIASES][codepoint][:correction] &&
                  INDEX[:ALIASES][codepoint][:correction][-1]
    correction
  else
    unicode_name(char)
  end
end

.hangul_decomposition(codepoint) ⇒ Object



83
84
85
86
87
88
89
# File 'lib/unicode/name.rb', line 83

def self.hangul_decomposition(codepoint)
  base = codepoint - HANGUL_START
  final = base % HANGUL_FINAL_MAX
  medial = (base % HANGUL_MEDIAL_MAX) / HANGUL_FINAL_MAX
  initial = base / HANGUL_MEDIAL_MAX
  "#{INDEX[:JAMO][:INITIAL][initial]}#{INDEX[:JAMO][:MEDIAL][medial]}#{INDEX[:JAMO][:FINAL][final]}"
end

.insert_words(raw_name) ⇒ Object



91
92
93
94
95
96
97
98
99
100
# File 'lib/unicode/name.rb', line 91

def self.insert_words(raw_name)
  raw_name.chars.map{ |char|
    codepoint = char.ord
    if codepoint < INDEX[:REPLACE_BASE]
      char
    else
      "#{INDEX[:COMMON_WORDS][codepoint - INDEX[:REPLACE_BASE]]} "
    end
  }.join.chomp
end

.label(char) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/unicode/name.rb', line 51

def self.label(char)
  codepoint = char.unpack("U")[0]
  codepoint_pretty = "%.4X" % codepoint
  require_relative "name/index" unless defined? ::Unicode::Name::INDEX
  require "unicode/types" unless defined? ::Unicode::Types
  case Unicode::Types.type(char)
  when "Graphic", "Format"
    nil
  when "Control"
    "<control-#{codepoint_pretty}>"
  when "Private-use"
    "<private-use-#{codepoint_pretty}>"
  when "Surrogate"
    "<surrogate-#{codepoint_pretty}>"
  when "Noncharacter"
    "<noncharacter-#{codepoint_pretty}>"
  when "Reserved"
    "<reserved-#{codepoint_pretty}>"
  end
end

.readable(char) ⇒ Object



72
73
74
75
76
77
78
79
80
# File 'lib/unicode/name.rb', line 72

def self.readable(char)
  correct(char) ||
  ( as = aliases(char) ) &&
  ( as[:control]      && as[:control][0]      ||
    as[:figment]      && as[:figment][0]      ||
    as[:alternate]    && as[:alternate][0]    ||
    as[:abbreviation] && as[:abbreviation][0]  ) ||
  label(char)
end

.unicode_name(char) ⇒ Object Also known as: of

Don’t overwrite Module.name



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/unicode/name.rb', line 11

def self.unicode_name(char) 
  codepoint = char.unpack("U")[0]
  require_relative "name/index" unless defined? ::Unicode::Name::INDEX

  if res = INDEX[:NAMES][codepoint]
    return insert_words(res)
  end

  INDEX[:CP_RANGES].each{|prefix, range|
    if range.any?{ |range| codepoint >= range[0] && codepoint <= range[1] }
      return "%s%.4X" % [prefix, codepoint]
    end
  }

  if codepoint >= HANGUL_START && codepoint <= HANGUL_END
    "HANGUL SYLLABLE %s" % hangul_decomposition(codepoint)
  else
    nil
  end
end