Module: LHC::FixInvalidEncodingConcern::ClassMethods

Defined in:
lib/lhc/concerns/lhc/fix_invalid_encoding_concern.rb

Instance Method Summary collapse

Instance Method Details

#fix_invalid_encoding(string) ⇒ Object

fix strings that contain non-UTF8 encoding in a forceful way should none of the fix-attempts be successful, an empty string is returned instead



13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/lhc/concerns/lhc/fix_invalid_encoding_concern.rb', line 13

def fix_invalid_encoding(string)
  return string unless string.is_a?(String)

  result = string.dup

  # we assume it's ISO-8859-1 first
  if !result.valid_encoding? || !utf8?(result)
    result.encode!('UTF-8', 'ISO-8859-1', invalid: :replace, undef: :replace, replace: '')
  end

  # if it's still an issue, try with BINARY
  if !result.valid_encoding? || !utf8?(result)
    result.encode!('UTF-8', 'BINARY', invalid: :replace, undef: :replace, replace: '')
  end

  # if its STILL an issue, return an empty string :(
  if !result.valid_encoding? || !utf8?(result)
    result = ""
  end

  result
end