Class: Invoca::Utils::GuaranteedUTF8String

Inherits:
Object
  • Object
show all
Defined in:
lib/invoca/utils/guaranteed_utf8_string.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(string) ⇒ GuaranteedUTF8String

Returns a new instance of GuaranteedUTF8String.



12
13
14
# File 'lib/invoca/utils/guaranteed_utf8_string.rb', line 12

def initialize(string)
  @to_string = self.class.normalize_string(string)
end

Instance Attribute Details

#to_stringObject (readonly) Also known as: to_s

Returns the value of attribute to_string.



10
11
12
# File 'lib/invoca/utils/guaranteed_utf8_string.rb', line 10

def to_string
  @to_string
end

Class Method Details

.normalize_all_strings(value, **options) ⇒ Object

Walks a JSON doc of hashes, arrays, and values and normalizes all strings found to UTF-8



48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/invoca/utils/guaranteed_utf8_string.rb', line 48

def normalize_all_strings(value, **options)
  case value
  when Hash
    value.each_with_object({}) do |(k, v), result|
      result[normalize_all_strings(k, **options)] = normalize_all_strings(v, **options)
    end
  when Array
    value.map { |v| normalize_all_strings(v, **options) }
  when String
    normalize_string(value, **options)
  else
    value
  end
end

.normalize_string(orig_string, normalize_utf16: true, normalize_cp1252: true, normalize_newlines: true, remove_utf8_bom: true, replace_unicode_beyond_ffff: true) ⇒ Object

normalizes a string to UTF-8



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/invoca/utils/guaranteed_utf8_string.rb', line 25

def normalize_string(orig_string,
                     normalize_utf16:              true,
                     normalize_cp1252:             true,
                     normalize_newlines:           true,
                     remove_utf8_bom:              true,
                     replace_unicode_beyond_ffff:  true)
  string =  if orig_string.is_a?(String) ||
              (orig_string.respond_to?(:to_s) &&
                orig_string.method(:to_s).owner != Kernel) # the lame .to_s from Kernel just calls .inspect :(
              orig_string.to_s.dup
            else
              raise ArgumentError, "must be passed a string or an object with a non-Kernel .to_s method but instead was #{orig_string.class} #{orig_string.inspect}"
            end
  string.force_encoding('UTF-8')
  normalize_string_from_utf8(string,
                             normalize_utf16: normalize_utf16,
                             normalize_cp1252: normalize_cp1252,
                             normalize_newlines: normalize_newlines,
                             remove_utf8_bom: remove_utf8_bom,
                             replace_unicode_beyond_ffff: replace_unicode_beyond_ffff)
end