Class: LogStash::Util::Charset

Inherits:
Object
  • Object
show all
Defined in:
lib/logstash/util/charset.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(charset) ⇒ Charset

Returns a new instance of Charset.



7
8
9
# File 'lib/logstash/util/charset.rb', line 7

def initialize(charset)
  @charset = charset
end

Instance Attribute Details

#loggerObject

Returns the value of attribute logger.



6
7
8
# File 'lib/logstash/util/charset.rb', line 6

def logger
  @logger
end

Instance Method Details

#convert(data) ⇒ Object



11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/logstash/util/charset.rb', line 11

def convert(data)
  data.force_encoding(@charset)
  if @charset == "UTF-8"
    # Some users don't know the charset of their logs or just don't know they
    # can set the charset setting.
    if !data.valid_encoding?
      @logger.warn("Received an event that has a different character encoding than you configured.", :text => data.inspect[1..-2], :expected_charset => @charset)
      #if @force_lossy_charset_conversion
        ## Janky hack to force ruby to re-encode UTF-8 with replacement chars.
        #data.force_encoding("CP65001")
        #data = data.encode("UTF-8", :invalid => :replace, :undef => :replace)
      #else
      #end

      # A silly hack to help convert some of the unknown bytes to
      # somewhat-readable escape codes. The [1..-2] is to trim the quotes
      # ruby puts on the value.
      data = data.inspect[1..-2]
    else
      # The user has declared the character encoding of this data is
      # something other than UTF-8. Let's convert it (as cleanly as possible)
      # into UTF-8 so we can use it with JSON, etc.
      data = data.encode("UTF-8", :invalid => :replace, :undef => :replace)
    end
  end
  return data
end