Class: ChupaText::UTF8Converter
- Inherits:
-
Object
- Object
- ChupaText::UTF8Converter
- Defined in:
- lib/chupa-text/utf8-converter.rb
Instance Method Summary collapse
- #convert ⇒ Object
-
#initialize(string, max_size: nil) ⇒ UTF8Converter
constructor
A new instance of UTF8Converter.
Constructor Details
#initialize(string, max_size: nil) ⇒ UTF8Converter
Returns a new instance of UTF8Converter.
19 20 21 22 |
# File 'lib/chupa-text/utf8-converter.rb', line 19 def initialize(string, max_size: nil) @string = string @max_size = max_size end |
Instance Method Details
#convert ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/chupa-text/utf8-converter.rb', line 24 def convert encoding = @string.encoding case encoding when Encoding::UTF_8 bom_size, bom_encoding = detect_bom if bom_size utf8_string = @string.byteslice(bom_size, @string.bytesize - bom_size) else utf8_string = @string end return truncate(utf8_string) when Encoding::ASCII_8BIT return truncate(@string) if @string.ascii_only? else utf8_string = @string.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: "") return truncate(utf8_string) end bom_size, bom_encoding = detect_bom if bom_encoding string_without_bom = @string.byteslice(bom_size, @string.bytesize - bom_size) utf8_string = string_without_bom.encode(Encoding::UTF_8, bom_encoding, invalid: :replace, undef: :replace, replace: "") return truncate(utf8_string) end guessed_encoding = guess_encoding if guessed_encoding truncate(@string.encode(Encoding::UTF_8, guessed_encoding, invalid: :replace, undef: :replace, replace: "")) else if @max_size utf8_string = @string.byteslice(0, @max_size) else utf8_string = @string.dup end utf8_string.force_encoding(Encoding::UTF_8) utf8_string.scrub!("") utf8_string.gsub!(/\p{Control}+/, "") utf8_string end end |