Module: Eco::Data::Files::Encoding

Includes:
Language::AuxiliarLogger
Included in:
ClassMethods, InstanceMethods
Defined in:
lib/eco/data/files/encoding.rb

Constant Summary collapse

BOM_BYTES =
[239, 187, 191]

Instance Attribute Summary

Attributes included from Language::AuxiliarLogger

#logger

Instance Method Summary collapse

Methods included from Language::AuxiliarLogger

#log

Instance Method Details

#encoding(path) ⇒ Object



48
49
50
# File 'lib/eco/data/files/encoding.rb', line 48

def encoding(path)
  has_bom?(path) ? "bom" : "utf-8"
end

#file_empty?(path) ⇒ Boolean

Returns:

  • (Boolean)


68
69
70
71
# File 'lib/eco/data/files/encoding.rb', line 68

def file_empty?(path)
  return true if !File.file?(path)
  File.zero?(path)
end

#file_exists?(file) ⇒ Boolean

Returns:

  • (Boolean)


63
64
65
66
# File 'lib/eco/data/files/encoding.rb', line 63

def file_exists?(file)
  return false if !file
  return File.exist?(file) || File.exist?(File.expand_path(file))
end

#get_file_content_with_encoding(file, encoding: nil) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/eco/data/files/encoding.rb', line 17

def get_file_content_with_encoding(file, encoding: nil)
  encoding ||= scoped_encoding(file)
  unless !encoding || encoding == 'utf-8'
    msg = "File encoding: '#{encoding}'"
    logger.debug(msg)
    puts msg
  end

  bom_enc = encoding && encoding.split('|')[0] == 'bom'
  if has_bom?(file) || bom_enc
    content  = remove_bom(File.read(file, encoding: 'utf-8'))
    encoding = 'utf-8'
  else
    content = File.read(file, encoding: encoding)
  end
  return nil unless content
  content = content.encode("utf-8") unless encoding.include?('utf-8')
  content
end

#has_bom?(path) ⇒ Boolean

Returns:

  • (Boolean)


9
10
11
12
13
14
15
# File 'lib/eco/data/files/encoding.rb', line 9

def has_bom?(path)
  return false if !path || file_empty?(path)
  File.open(path, "rb") do |f|
    bytes = f.read(3)
    return bytes.unpack('C*') == BOM_BYTES
  end
end

#remove_bom(content) ⇒ Object

Changes encoding from bom to utf8 https://stackoverflow.com/a/24916365/4352306



39
40
41
42
43
44
45
46
# File 'lib/eco/data/files/encoding.rb', line 39

def remove_bom(content)
  if content.bytes[0..2] == BOM_BYTES
    bom     = BOM_BYTES.pack('C*').force_encoding('utf-8').encode('utf-8')
    content = content.sub(bom, '')
    content.force_encoding('utf-8')
  end
  content
end

#scoped_encoding(path) ⇒ Object

Gives the parameter as it should



53
54
55
56
57
58
59
60
61
# File 'lib/eco/data/files/encoding.rb', line 53

def scoped_encoding(path)
  unless file_exists?(path)
    logger.error("File does not exist: #{path}")
    return nil
  end
  encoding ||= encoding(path)
  encoding   = (encoding == "bom") ? "#{encoding}|utf-8": encoding
  encoding
end