Module: Stringex::Unidecoder

Defined in:
lib/stringex/unidecoder.rb

Constant Summary collapse

CODEPOINTS =

Contains Unicode codepoints, loading as needed from YAML files

Hash.new{|h, k|
  h[k] = YAML.load_file(File.join(File.expand_path(File.dirname(__FILE__)), "unidecoder_data", "#{k}.yml"))
}
LOCAL_CODEPOINTS =
Hash.new

Class Method Summary collapse

Class Method Details

.decode(string) ⇒ Object

Returns string with its UTF-8 characters transliterated to ASCII ones

You’re probably better off just using the added String#to_ascii



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# File 'lib/stringex/unidecoder.rb', line 16

def decode(string)
  string.gsub(/[^\x00-\x7f]/u) do |codepoint|
    if localized = local_codepoint(codepoint)
      localized
    else
      begin
        unpacked = codepoint.unpack("U")[0]
        CODEPOINTS[code_group(unpacked)][grouped_point(unpacked)]
      rescue
        # Hopefully this won't come up much
        # TODO: Make this note something to the user that is reportable to me perhaps
        "?"
      end
    end
  end
end

.default_localeObject

Returns default locale for localized transliterations. NOTE: Will set @locale as well.



72
73
74
75
# File 'lib/stringex/unidecoder.rb', line 72

def default_locale
  @default_locale ||= "en"
  @locale = @default_locale
end

.default_locale=(new_locale) ⇒ Object

Sets the default locale for localized transliterations. NOTE: Will set @locale as well.



78
79
80
81
82
# File 'lib/stringex/unidecoder.rb', line 78

def default_locale=(new_locale)
  @default_locale = new_locale
  # Seems logical that @locale should be the new default
  @locale = new_locale
end

.encode(codepoint) ⇒ Object

Returns character for the given Unicode codepoint



34
35
36
# File 'lib/stringex/unidecoder.rb', line 34

def encode(codepoint)
  ["0x#{codepoint}".to_i(16)].pack("U")
end

.in_yaml_file(character) ⇒ Object

Returns string indicating which file (and line) contains the transliteration value for the character



40
41
42
43
# File 'lib/stringex/unidecoder.rb', line 40

def in_yaml_file(character)
  unpacked = character.unpack("U")[0]
  "#{code_group(unpacked)}.yml (line #{grouped_point(unpacked) + 2})"
end

.local_codepoint(codepoint) ⇒ Object

Returns the localized transliteration for a codepoint



85
86
87
88
# File 'lib/stringex/unidecoder.rb', line 85

def local_codepoint(codepoint)
  locale_hash = LOCAL_CODEPOINTS[locale] || LOCAL_CODEPOINTS[locale.is_a?(Symbol) ? locale.to_s : locale.to_sym]
  locale_hash && locale_hash[codepoint]
end

.localeObject

Returns locale for localized transliterations



56
57
58
59
60
61
62
63
64
# File 'lib/stringex/unidecoder.rb', line 56

def locale
  if @locale
    @locale
  elsif defined?(I18n)
    I18n.locale
  else
    default_locale
  end
end

.locale=(new_locale) ⇒ Object

Sets locale for localized transliterations



67
68
69
# File 'lib/stringex/unidecoder.rb', line 67

def locale=(new_locale)
  @locale = new_locale
end

.localize_from(hash_or_path_to_file) ⇒ Object

Adds localized transliterations to Unidecoder



46
47
48
49
50
51
52
53
# File 'lib/stringex/unidecoder.rb', line 46

def localize_from(hash_or_path_to_file)
  hash = if hash_or_path_to_file.is_a?(Hash)
    hash_or_path_to_file
  else
    YAML.load_file(hash_or_path_to_file)
  end
  verify_local_codepoints hash
end

.with_default_locale(&block) ⇒ Object

Runs a block with default locale



100
101
102
# File 'lib/stringex/unidecoder.rb', line 100

def with_default_locale(&block)
  with_locale default_locale, &block
end

.with_locale(new_locale, &block) ⇒ Object

Runs a block with a temporary locale setting, returning the locale to the original state when complete



91
92
93
94
95
96
97
# File 'lib/stringex/unidecoder.rb', line 91

def with_locale(new_locale, &block)
  new_locale = default_locale if new_locale == :default
  original_locale = locale
  self.locale = new_locale
  block.call
  self.locale = original_locale
end