Module: Unicode::Confusable

Defined in:
lib/unicode/confusable.rb,
lib/unicode/confusable/index.rb,
lib/unicode/confusable/constants.rb,
lib/unicode/confusable/ignorable.rb

Constant Summary collapse

VERSION =
"1.12.0"
UNICODE_VERSION =
"16.0.0"
DATA_DIRECTORY =
File.expand_path(File.dirname(__FILE__) + "/../../../data/").freeze
INDEX_FILENAME =
(DATA_DIRECTORY + "/confusable.marshal.gz").freeze
IGNORABLE =
INDEX[:IGNORABLE].reduce([]){|acc, cur|
  acc + [*(cur.is_a?(Array) ? Range.new(*cur) : cur)]
}.freeze

Class Method Summary collapse

Class Method Details

.confusable?(string1, string2) ⇒ Boolean

Returns:

  • (Boolean)


9
10
11
# File 'lib/unicode/confusable.rb', line 9

def self.confusable?(string1, string2)
  skeleton(string1) == skeleton(string2)
end

.list(char, partial_mapping_allowed = true) ⇒ Object



24
25
26
27
28
29
30
31
32
# File 'lib/unicode/confusable.rb', line 24

def self.list(char, partial_mapping_allowed = true)
  require_relative 'confusable/index' unless defined? ::Unicode::Confusable::INDEX
  codepoint = char.codepoints.first or raise ArgumentError, "no data given to Unicode::Confusable.list"
  if partial_mapping_allowed
    INDEX[:CONFUSABLE].select{ |k,v| v == codepoint || v.is_a?(Array) && v.include?(codepoint) }.keys.map{ |codepoint| [codepoint].pack("U*") }
  else
    INDEX[:CONFUSABLE].select{ |k,v| v == codepoint }.keys.map{ |codepoint| [codepoint].pack("U") }
  end
end

.skeleton(string) ⇒ Object



13
14
15
16
17
18
19
20
21
22
# File 'lib/unicode/confusable.rb', line 13

def self.skeleton(string)
  require_relative 'confusable/index' unless defined? ::Unicode::Confusable::INDEX
  UnicodeNormalize.normalize(
    UnicodeNormalize.normalize(string, :nfd).each_codepoint.map{ |codepoint|
      unless IGNORABLE.include?(codepoint)
        INDEX[:CONFUSABLE][codepoint] || codepoint
      end
    }.flatten.compact.pack("U*"), :nfd
  )
end