Module: DHC::FixInvalidEncodingConcern::ClassMethods

Defined in:
lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb

Instance Method Summary collapse

Instance Method Details

#fix_invalid_encoding(string) ⇒ Object

fix strings that contain non-UTF8 encoding in a forceful way should none of the fix-attempts be successful, an empty string is returned instead



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/dhc/concerns/dhc/fix_invalid_encoding_concern.rb', line 13

def fix_invalid_encoding(string)
  return string unless string.is_a?(String)
  result = string.dup

  # we assume it's ISO-8859-1 first
  if !result.valid_encoding? || !utf8?(result)
    result.encode!('UTF-8', 'ISO-8859-1', invalid: :replace, undef: :replace, replace: '')
  end

  # if it's still an issue, try with BINARY
  if !result.valid_encoding? || !utf8?(result)
    result.encode!('UTF-8', 'BINARY', invalid: :replace, undef: :replace, replace: '')
  end

  # if its STILL an issue, return an empty string :(
  if !result.valid_encoding? || !utf8?(result)
    result = ''
  end

  result
end