Module: Sprockets::EncodingUtils

Extended by:
EncodingUtils
Included in:
EncodingUtils
Defined in:
lib/sprockets/encoding_utils.rb

Overview

Internal: HTTP transport encoding and charset detecting related functions. Mixed into Environment.

Constant Summary collapse

CHARSET_DETECT =

Internal: Shorthand aliases for detecter functions.

{}
BOM =

Internal: Mapping unicode encodings to byte order markers.

{
  Encoding::UTF_32LE => [0xFF, 0xFE, 0x00, 0x00],
  Encoding::UTF_32BE => [0x00, 0x00, 0xFE, 0xFF],
  Encoding::UTF_8    => [0xEF, 0xBB, 0xBF],
  Encoding::UTF_16LE => [0xFF, 0xFE],
  Encoding::UTF_16BE => [0xFE, 0xFF]
}
CHARSET_START =

Internal: @charset bytes

[0x40, 0x63, 0x68, 0x61, 0x72, 0x73, 0x65, 0x74, 0x20, 0x22]
CHARSET_SIZE =
CHARSET_START.size

Instance Method Summary collapse

Instance Method Details

#base64(str) ⇒ Object

Public: Use base64 to encode data.

str - String data

Returns a encoded String

[View source]

73
74
75
# File 'lib/sprockets/encoding_utils.rb', line 73

def base64(str)
  Base64.strict_encode64(str)
end

#charlock_detect(str) ⇒ Object

Internal: Use Charlock Holmes to detect encoding.

To enable this code path, require ‘charlock_holmes’

Returns encoded String.

[View source]

122
123
124
125
126
127
128
129
130
# File 'lib/sprockets/encoding_utils.rb', line 122

def charlock_detect(str)
  if defined? CharlockHolmes::EncodingDetector
    if detected = CharlockHolmes::EncodingDetector.detect(str)
      str.force_encoding(detected[:encoding]) if detected[:encoding]
    end
  end

  str
end

#deflate(str) ⇒ Object

Public: Use deflate to compress data.

str - String data

Returns a compressed String

[View source]

19
20
21
22
23
24
25
26
27
28
# File 'lib/sprockets/encoding_utils.rb', line 19

def deflate(str)
  deflater = Zlib::Deflate.new(
    Zlib::BEST_COMPRESSION,
    -Zlib::MAX_WBITS,
    Zlib::MAX_MEM_LEVEL,
    Zlib::DEFAULT_STRATEGY
  )
  deflater << str
  deflater.finish
end

#detect(str) ⇒ Object

Public: Basic string detecter.

Attempts to parse any Unicode BOM otherwise falls back to the environment’s external encoding.

str - ASCII-8BIT encoded String

Returns encoded String.

[View source]

100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/sprockets/encoding_utils.rb', line 100

def detect(str)
  str = detect_unicode_bom(str)

  # Attempt Charlock detection
  if str.encoding == Encoding::BINARY
    charlock_detect(str)
  end

  # Fallback to environment's external encoding
  if str.encoding == Encoding::BINARY
    str.force_encoding(Encoding.default_external)
  end

  str
end

#detect_css(str) ⇒ Object

Public: Detect and strip @charset from CSS style sheet.

str - String.

Returns a encoded String.

[View source]

178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/sprockets/encoding_utils.rb', line 178

def detect_css(str)
  str = detect_unicode_bom(str)

  if name = scan_css_charset(str)
    encoding = Encoding.find(name)
    str = str.dup
    str.force_encoding(encoding)
    len = "@charset \"#{name}\";".encode(encoding).size
    str.slice!(0, len)
    str
  end

  # Fallback to UTF-8
  if str.encoding == Encoding::BINARY
    str.force_encoding(Encoding::UTF_8)
  end

  str
end

#detect_html(str) ⇒ Object

Public: Detect charset from HTML document.

Attempts to parse any Unicode BOM otherwise attempt Charlock detection and finally falls back to the environment’s external encoding.

str - String.

Returns a encoded String.

[View source]

245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/sprockets/encoding_utils.rb', line 245

def detect_html(str)
  str = detect_unicode_bom(str)

  # Attempt Charlock detection
  if str.encoding == Encoding::BINARY
    charlock_detect(str)
  end

  # Fallback to environment's external encoding
  if str.encoding == Encoding::BINARY
    str.force_encoding(Encoding.default_external)
  end

  str
end

#detect_unicode(str) ⇒ Object

Public: Detect Unicode string.

Attempts to parse Unicode BOM and falls back to UTF-8.

str - ASCII-8BIT encoded String

Returns encoded String.

[View source]

139
140
141
142
143
144
145
146
147
148
# File 'lib/sprockets/encoding_utils.rb', line 139

def detect_unicode(str)
  str = detect_unicode_bom(str)

  # Fallback to UTF-8
  if str.encoding == Encoding::BINARY
    str.force_encoding(Encoding::UTF_8)
  end

  str
end

#detect_unicode_bom(str) ⇒ Object

Public: Detect and strip BOM from possible unicode string.

str - ASCII-8BIT encoded String

Returns UTF 8/16/32 encoded String without BOM or the original String if no BOM was present.

[View source]

157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/sprockets/encoding_utils.rb', line 157

def detect_unicode_bom(str)
  bom_bytes = str.byteslice(0, 4).bytes.to_a

  BOM.each do |encoding, bytes|
    if bom_bytes[0, bytes.size] == bytes
      str = str.dup
      str.force_encoding(Encoding::BINARY)
      str.slice!(0, bytes.size)
      str.force_encoding(encoding)
      return str
    end
  end

  return str
end

#gzip(str) ⇒ Object

Public: Use gzip to compress data.

str - String data

Returns a compressed String

[View source]

59
60
61
62
63
64
65
66
# File 'lib/sprockets/encoding_utils.rb', line 59

def gzip(str)
  io = StringIO.new
  gz = Zlib::GzipWriter.new(io, Zlib::BEST_COMPRESSION)
  gz.mtime = 1
  gz << str
  gz.finish
  io.string
end

#scan_css_charset(str) ⇒ Object

Internal: Scan binary CSS string for @charset encoding name.

str - ASCII-8BIT encoded String

Returns encoding String name or nil.

[View source]

208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/sprockets/encoding_utils.rb', line 208

def scan_css_charset(str)
  buf = []
  i = 0

  str.each_byte.each do |byte|
    # Halt on line breaks
    break if byte == 0x0A || byte == 0x0D

    # Only ascii bytes
    next unless 0x0 < byte && byte <= 0xFF

    if i < CHARSET_SIZE
    elsif i == CHARSET_SIZE
      if buf == CHARSET_START
        buf = []
      else
        break
      end
    elsif byte == 0x22
      return buf.pack('C*')
    end

    buf << byte
    i += 1
  end

  nil
end

#unmarshaled_deflated(str, window_bits = -Zlib::MAX_WBITS)) ⇒ Object

Internal: Unmarshal optionally deflated data.

Checks leading marshal header to see if the bytes are uncompressed otherwise inflate the data an unmarshal.

str - Marshaled String window_bits - Integer deflate window size. See ZLib::Inflate.new()

Returns unmarshaled Object or raises an Exception.

[View source]

39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/sprockets/encoding_utils.rb', line 39

def unmarshaled_deflated(str, window_bits = -Zlib::MAX_WBITS)
  major, minor = str[0], str[1]
  if major && major.ord == Marshal::MAJOR_VERSION &&
      minor && minor.ord <= Marshal::MINOR_VERSION
    marshaled = str
  else
    begin
      marshaled = Zlib::Inflate.new(window_bits).inflate(str)
    rescue Zlib::DataError
      marshaled = str
    end
  end
  Marshal.load(marshaled)
end