Module: Eco::Data::Files::Encoding

Includes:
Language::AuxiliarLogger
Included in:
ClassMethods, InstanceMethods
Defined in:
lib/eco/data/files/encoding.rb

Constant Summary collapse

BOM_BYTES =
[239, 187, 191].freeze

Instance Attribute Summary

Attributes included from Language::AuxiliarLogger

#logger

Instance Method Summary collapse

Methods included from Language::AuxiliarLogger

#log

Instance Method Details

#encoding(path) ⇒ Object



52
53
54
# File 'lib/eco/data/files/encoding.rb', line 52

def encoding(path)
  has_bom?(path) ? "bom" : "utf-8"
end

#file_empty?(path) ⇒ Boolean

Returns:

  • (Boolean)


74
75
76
77
78
# File 'lib/eco/data/files/encoding.rb', line 74

def file_empty?(path)
  return true unless File.file?(path)

  File.zero?(path)
end

#file_exists?(file) ⇒ Boolean

Returns:

  • (Boolean)


68
69
70
71
72
# File 'lib/eco/data/files/encoding.rb', line 68

def file_exists?(file)
  return false unless file

  File.exist?(file) || File.exist?(File.expand_path(file))
end

#get_file_content_with_encoding(file, encoding: nil) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/eco/data/files/encoding.rb', line 17

def get_file_content_with_encoding(file, encoding: nil)
  encoding ||= scoped_encoding(file)

  unless !encoding || encoding == 'utf-8'
    msg = "File encoding: '#{encoding}'"
    log(:debug) { msg }
    puts msg
  end

  bom_enc = encoding && encoding.split('|')[0] == 'bom'
  if has_bom?(file) || bom_enc
    content  = remove_bom(File.read(file, encoding: 'utf-8'))
    encoding = 'utf-8'
  else
    content = File.read(file, encoding: encoding)
  end

  return unless content

  content = content.encode("utf-8") unless encoding.include?('utf-8')
  content
end

#has_bom?(path) ⇒ Boolean

Returns:

  • (Boolean)


9
10
11
12
13
14
15
# File 'lib/eco/data/files/encoding.rb', line 9

def has_bom?(path)
  return false if !path || file_empty?(path)
  File.open(path, "rb") do |f|
    bytes = f.read(3)
    return bytes.unpack('C*') == BOM_BYTES
  end
end

#remove_bom(content) ⇒ Object

Changes encoding from bom to utf8 https://stackoverflow.com/a/24916365/4352306



42
43
44
45
46
47
48
49
50
# File 'lib/eco/data/files/encoding.rb', line 42

def remove_bom(content)
  if content.bytes[0..2] == BOM_BYTES
    bom     = BOM_BYTES.pack('C*').force_encoding('utf-8').encode('utf-8')
    content = content.sub(bom, '')
    content.force_encoding('utf-8')
  end

  content
end

#scoped_encoding(path) ⇒ Object

Gives the parameter as it should



57
58
59
60
61
62
63
64
65
66
# File 'lib/eco/data/files/encoding.rb', line 57

def scoped_encoding(path)
  unless file_exists?(path)
    log(:error) { "File does not exist: #{path}" }
    return nil
  end

  encoding ||= encoding(path)
  encoding   = "#{encoding}|utf-8" if encoding == "bom"
  encoding
end