Module: Mail::Utilities
Defined Under Namespace
Classes: BestEffortCharsetEncoder, StrictCharsetEncoder
Constant Summary collapse
- TO_CRLF_REGEX =
Regexp.new("(?<!\r)\n|\r(?!\n)")
Class Attribute Summary collapse
-
.charset_encoder ⇒ Object
Returns the value of attribute charset_encoder.
Class Method Summary collapse
- .b_value_decode(str) ⇒ Object
- .b_value_encode(str, encoding = nil) ⇒ Object
-
.binary_unsafe_to_crlf(string) ⇒ Object
:nodoc:.
-
.binary_unsafe_to_lf(string) ⇒ Object
:nodoc:.
- .bracket(str) ⇒ Object
- .decode_base64(str) ⇒ Object
- .decode_utf7(utf7) ⇒ Object
- .encode_base64(str) ⇒ Object
-
.encode_utf7(string) ⇒ Object
From Ruby stdlib Net::IMAP.
- .escape_bracket(str) ⇒ Object
-
.escape_paren(str) ⇒ Object
Escapes any parenthesis in a string that are unescaped this uses a Ruby 1.9.1 regexp feature of negative look behind.
- .get_constant(klass, string) ⇒ Object
- .has_constant?(klass, string) ⇒ Boolean
- .param_decode(str, encoding) ⇒ Object
- .param_encode(str) ⇒ Object
- .paren(str) ⇒ Object
-
.pick_encoding(charset) ⇒ Object
Pick a Ruby encoding corresponding to the message charset.
- .q_value_decode(str) ⇒ Object
- .q_value_encode(str, encoding = nil) ⇒ Object
-
.safe_for_line_ending_conversion?(string) ⇒ Boolean
:nodoc:.
- .string_byteslice(str, *args) ⇒ Object
-
.to_crlf(string) ⇒ Object
Convert line endings to rn unless the string is binary.
-
.to_lf(string) ⇒ Object
Convert line endings to n unless the string is binary.
- .transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8) ⇒ Object
- .uri_parser ⇒ Object
Instance Method Summary collapse
-
#atom_safe?(str) ⇒ Boolean
Returns true if the string supplied is free from characters not allowed as an ATOM.
-
#blank?(value) ⇒ Boolean
Returns true if the object is considered blank.
-
#bracket(str) ⇒ Object
Wraps a string in angle brackets and escapes any that are in the string itself.
-
#capitalize_field(str) ⇒ Object
Capitalizes a string that is joined by hyphens correctly.
-
#constantize(str) ⇒ Object
Takes an underscored word and turns it into a class name.
-
#dasherize(str) ⇒ Object
Swaps out all underscores (_) for hyphens (-) good for stringing from symbols a field name.
-
#dquote(str) ⇒ Object
Wraps supplied string in double quotes and applies -escaping as necessary, unless it is already wrapped.
-
#escape_paren(str) ⇒ Object
Escape parenthesies in a string.
- #generate_message_id ⇒ Object
- #map_lines(str, &block) ⇒ Object
- #map_with_index(enum, &block) ⇒ Object
-
#match_to_s(obj1, obj2) ⇒ Object
Matches two objects with their to_s values case insensitively.
-
#paren(str) ⇒ Object
Wraps a string in parenthesis and escapes any that are in the string itself.
-
#quote_atom(str) ⇒ Object
If the string supplied has ATOM unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified.
-
#quote_phrase(str) ⇒ Object
If the string supplied has PHRASE unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified.
-
#quote_token(str) ⇒ Object
If the string supplied has TOKEN unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified.
-
#token_safe?(str) ⇒ Boolean
Returns true if the string supplied is free from characters not allowed as a TOKEN.
-
#unbracket(str) ⇒ Object
Unwraps a string from being wrapped in parenthesis.
-
#underscoreize(str) ⇒ Object
Swaps out all hyphens (-) for underscores (_) good for stringing to symbols a field name.
-
#unescape(str) ⇒ Object
Removes any -escaping.
-
#unparen(str) ⇒ Object
Unwraps a string from being wrapped in parenthesis.
-
#unquote(str) ⇒ Object
Unwraps supplied string from inside double quotes and removes any -escaping.
- #uri_escape(str) ⇒ Object
- #uri_parser ⇒ Object
- #uri_unescape(str) ⇒ Object
Class Attribute Details
.charset_encoder ⇒ Object
Returns the value of attribute charset_encoder.
334 335 336 |
# File 'lib/mail/utilities.rb', line 334 def charset_encoder @charset_encoder end |
Class Method Details
.b_value_decode(str) ⇒ Object
414 415 416 417 418 419 420 421 422 423 424 425 |
# File 'lib/mail/utilities.rb', line 414 def Utilities.b_value_decode(str) match = str.match(/\=\?(.+)?\?[Bb]\?(.*)\?\=/m) if match charset = match[1] str = Utilities.decode_base64(match[2]) str = charset_encoder.encode(str, charset) end transcode_to_scrubbed_utf8(str) rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError, Encoding::InvalidByteSequenceError warn "WARNING: Encoding conversion failed #{$!}" str.dup.force_encoding(Encoding::UTF_8) end |
.b_value_encode(str, encoding = nil) ⇒ Object
409 410 411 412 |
# File 'lib/mail/utilities.rb', line 409 def Utilities.b_value_encode(str, encoding = nil) encoding = str.encoding.to_s [Utilities.encode_base64(str), encoding] end |
.binary_unsafe_to_crlf(string) ⇒ Object
:nodoc:
243 244 245 |
# File 'lib/mail/utilities.rb', line 243 def self.binary_unsafe_to_crlf(string) #:nodoc: string.gsub(TO_CRLF_REGEX, Constants::CRLF) end |
.binary_unsafe_to_lf(string) ⇒ Object
:nodoc:
233 234 235 |
# File 'lib/mail/utilities.rb', line 233 def self.binary_unsafe_to_lf(string) #:nodoc: string.gsub(/\r\n|\r/, Constants::LF) end |
.bracket(str) ⇒ Object
356 357 358 359 360 |
# File 'lib/mail/utilities.rb', line 356 def Utilities.bracket( str ) str = ::Mail::Utilities.unbracket( str ) str = escape_bracket( str ) '<' + str + '>' end |
.decode_base64(str) ⇒ Object
362 363 364 365 366 367 |
# File 'lib/mail/utilities.rb', line 362 def Utilities.decode_base64(str) if !str.end_with?("=") && str.length % 4 != 0 str = str.ljust((str.length + 3) & ~3, "=") end str.unpack1( 'm' ) end |
.decode_utf7(utf7) ⇒ Object
399 400 401 402 403 404 405 406 407 |
# File 'lib/mail/utilities.rb', line 399 def Utilities.decode_utf7(utf7) utf7.gsub(/&([^-]+)?-/n) do if $1 ($1.tr(",", "/") + "===").unpack1("m").encode(Encoding::UTF_8, Encoding::UTF_16BE) else "&" end end end |
.encode_base64(str) ⇒ Object
369 370 371 |
# File 'lib/mail/utilities.rb', line 369 def Utilities.encode_base64(str) [str].pack( 'm' ) end |
.encode_utf7(string) ⇒ Object
From Ruby stdlib Net::IMAP
388 389 390 391 392 393 394 395 396 397 |
# File 'lib/mail/utilities.rb', line 388 def Utilities.encode_utf7(string) string.gsub(/(&)|[^\x20-\x7e]+/) do if $1 "&-" else base64 = [$&.encode(Encoding::UTF_16BE)].pack("m0") "&" + base64.delete("=").tr("/", ",") + "-" end end.force_encoding(Encoding::ASCII_8BIT) end |
.escape_bracket(str) ⇒ Object
351 352 353 354 |
# File 'lib/mail/utilities.rb', line 351 def Utilities.escape_bracket( str ) re = /(?<!\\)([\<\>])/ # Only match unescaped brackets str.gsub(re) { |s| '\\' + s } end |
.escape_paren(str) ⇒ Object
Escapes any parenthesis in a string that are unescaped this uses a Ruby 1.9.1 regexp feature of negative look behind
340 341 342 343 |
# File 'lib/mail/utilities.rb', line 340 def Utilities.escape_paren( str ) re = /(?<!\\)([\(\)])/ # Only match unescaped parens str.gsub(re) { |s| '\\' + s } end |
.get_constant(klass, string) ⇒ Object
377 378 379 |
# File 'lib/mail/utilities.rb', line 377 def Utilities.get_constant(klass, string) klass.const_get( string ) end |
.has_constant?(klass, string) ⇒ Boolean
373 374 375 |
# File 'lib/mail/utilities.rb', line 373 def Utilities.has_constant?(klass, string) klass.const_defined?( string, false ) end |
.param_decode(str, encoding) ⇒ Object
451 452 453 454 455 456 457 458 |
# File 'lib/mail/utilities.rb', line 451 def Utilities.param_decode(str, encoding) str = uri_parser.unescape(str) str = charset_encoder.encode(str, encoding) if encoding transcode_to_scrubbed_utf8(str) rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError warn "WARNING: Encoding conversion failed #{$!}" str.dup.force_encoding(Encoding::UTF_8) end |
.param_encode(str) ⇒ Object
460 461 462 463 464 |
# File 'lib/mail/utilities.rb', line 460 def Utilities.param_encode(str) encoding = str.encoding.to_s.downcase language = Configuration.instance.param_encode_language "#{encoding}'#{language}'#{uri_parser.escape(str)}" end |
.paren(str) ⇒ Object
345 346 347 348 349 |
# File 'lib/mail/utilities.rb', line 345 def Utilities.paren( str ) str = ::Mail::Utilities.unparen( str ) str = escape_paren( str ) '(' + str + ')' end |
.pick_encoding(charset) ⇒ Object
Pick a Ruby encoding corresponding to the message charset. Most charsets have a Ruby encoding, but some need manual aliasing here.
TODO: add this as a test somewhere:
Encoding.list.map { |e| [e.to_s.upcase == pick_encoding(e.to_s.downcase.gsub("-", "")), e.to_s] }.select {|a,b| !b}
Encoding.list.map { |e| [e.to_s == pick_encoding(e.to_s), e.to_s] }.select {|a,b| !b}
476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 |
# File 'lib/mail/utilities.rb', line 476 def Utilities.pick_encoding(charset) charset = charset.to_s encoding = case charset.downcase # ISO-8859-8-I etc. http://en.wikipedia.org/wiki/ISO-8859-8-I when /^iso[-_]?8859-(\d+)(-i)?$/ "ISO-8859-#{$1}" # ISO-8859-15, ISO-2022-JP and alike when /^iso[-_]?(\d{4})-?(\w{1,2})$/ "ISO-#{$1}-#{$2}" # "ISO-2022-JP-KDDI" and alike when /^iso[-_]?(\d{4})-?(\w{1,2})-?(\w*)$/ "ISO-#{$1}-#{$2}-#{$3}" # UTF-8, UTF-32BE and alike when /^utf[\-_]?(\d{1,2})?(\w{1,2})$/ "UTF-#{$1}#{$2}".gsub(/\A(UTF-(?:16|32))\z/, '\\1BE') # Windows-1252 and alike when /^windows-?(.*)$/ "Windows-#{$1}" when '8bit' Encoding::ASCII_8BIT # alternatives/misspellings of us-ascii seen in the wild when /^iso[-_]?646(-us)?$/, 'us=ascii' Encoding::ASCII # Microsoft-specific alias for MACROMAN when 'macintosh' Encoding::MACROMAN # Microsoft-specific alias for CP949 (Korean) when 'ks_c_5601-1987' Encoding::CP949 # Wrongly written Shift_JIS (Japanese) when 'shift-jis' Encoding::Shift_JIS # GB2312 (Chinese charset) is a subset of GB18030 (its replacement) when 'gb2312' Encoding::GB18030 when 'cp-850' Encoding::CP850 when 'latin2' Encoding::ISO_8859_2 else charset end convert_to_encoding(encoding) end |
.q_value_decode(str) ⇒ Object
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 |
# File 'lib/mail/utilities.rb', line 432 def Utilities.q_value_decode(str) match = str.match(/\=\?(.+)?\?[Qq]\?(.*)\?\=/m) if match charset = match[1] string = match[2].gsub(/_/, '=20') # Remove trailing = if it exists in a Q encoding string = string.sub(/\=$/, '') str = Encodings::QuotedPrintable.decode(string) str = charset_encoder.encode(str, charset) # We assume that binary strings hold utf-8 directly to work around # jruby/jruby#829 which subtly changes String#encode semantics. str.force_encoding(Encoding::UTF_8) if str.encoding == Encoding::ASCII_8BIT end transcode_to_scrubbed_utf8(str) rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError warn "WARNING: Encoding conversion failed #{$!}" str.dup.force_encoding(Encoding::UTF_8) end |
.q_value_encode(str, encoding = nil) ⇒ Object
427 428 429 430 |
# File 'lib/mail/utilities.rb', line 427 def Utilities.q_value_encode(str, encoding = nil) encoding = str.encoding.to_s [Encodings::QuotedPrintable.encode(str), encoding] end |
.safe_for_line_ending_conversion?(string) ⇒ Boolean
:nodoc:
247 248 249 250 251 252 253 |
# File 'lib/mail/utilities.rb', line 247 def self.safe_for_line_ending_conversion?(string) #:nodoc: if string.encoding == Encoding::BINARY string.ascii_only? else string.valid_encoding? end end |
.string_byteslice(str, *args) ⇒ Object
536 537 538 |
# File 'lib/mail/utilities.rb', line 536 def Utilities.string_byteslice(str, *args) str.byteslice(*args) end |
.to_crlf(string) ⇒ Object
Convert line endings to rn unless the string is binary. Used for encoding 8bit and base64 Content-Transfer-Encoding and for convenience when parsing emails with n line endings instead of the required rn.
269 270 271 272 273 274 275 276 |
# File 'lib/mail/utilities.rb', line 269 def self.to_crlf(string) string = string.to_s if safe_for_line_ending_conversion? string binary_unsafe_to_crlf string else string end end |
.to_lf(string) ⇒ Object
Convert line endings to n unless the string is binary. Used for sendmail delivery and for decoding 8bit Content-Transfer-Encoding.
257 258 259 260 261 262 263 264 |
# File 'lib/mail/utilities.rb', line 257 def self.to_lf(string) string = string.to_s if safe_for_line_ending_conversion? string binary_unsafe_to_lf string else string end end |
.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8) ⇒ Object
381 382 383 384 385 |
# File 'lib/mail/utilities.rb', line 381 def Utilities.transcode_charset(str, from_encoding, to_encoding = Encoding::UTF_8) to_encoding = Encoding.find(to_encoding) replacement_char = to_encoding == Encoding::UTF_8 ? '�' : '?' charset_encoder.encode(str.dup, from_encoding).encode(to_encoding, :undef => :replace, :invalid => :replace, :replace => replacement_char) end |
.uri_parser ⇒ Object
466 467 468 |
# File 'lib/mail/utilities.rb', line 466 def Utilities.uri_parser URI::DEFAULT_PARSER end |
Instance Method Details
#atom_safe?(str) ⇒ Boolean
Returns true if the string supplied is free from characters not allowed as an ATOM
11 12 13 |
# File 'lib/mail/utilities.rb', line 11 def atom_safe?( str ) not Constants::ATOM_UNSAFE === str end |
#blank?(value) ⇒ Boolean
Returns true if the object is considered blank. A blank includes things like ”, ‘ ’, nil, and arrays and hashes that have nothing in them.
This logic is mostly shared with ActiveSupport’s blank?
283 284 285 286 287 288 289 290 291 |
# File 'lib/mail/utilities.rb', line 283 def blank?(value) if value.kind_of?(NilClass) true elsif value.kind_of?(String) value !~ /\S/ else value.respond_to?(:empty?) ? value.empty? : !value end end |
#bracket(str) ⇒ Object
Wraps a string in angle brackets and escapes any that are in the string itself
Example:
bracket( 'This is a string' ) #=> '<This is a string>'
131 132 133 |
# File 'lib/mail/utilities.rb', line 131 def bracket( str ) Utilities.bracket( str ) end |
#capitalize_field(str) ⇒ Object
Capitalizes a string that is joined by hyphens correctly.
Example:
string = 'resent-from-field'
capitalize_field( string ) #=> 'Resent-From-Field'
188 189 190 |
# File 'lib/mail/utilities.rb', line 188 def capitalize_field( str ) str.to_s.split("-").map { |v| v.capitalize }.join("-") end |
#constantize(str) ⇒ Object
Takes an underscored word and turns it into a class name
Example:
constantize("hello") #=> "Hello"
constantize("hello-there") #=> "HelloThere"
constantize("hello-there-mate") #=> "HelloThereMate"
199 200 201 |
# File 'lib/mail/utilities.rb', line 199 def constantize( str ) str.to_s.split(/[-_]/).map { |v| v.capitalize }.to_s end |
#dasherize(str) ⇒ Object
Swaps out all underscores (_) for hyphens (-) good for stringing from symbols a field name.
Example:
string = :resent_from_field
dasherize( string ) #=> 'resent-from-field'
210 211 212 |
# File 'lib/mail/utilities.rb', line 210 def dasherize( str ) str.to_s.tr(Constants::UNDERSCORE, Constants::HYPHEN) end |
#dquote(str) ⇒ Object
Wraps supplied string in double quotes and applies -escaping as necessary, unless it is already wrapped.
Example:
string = 'This is a string'
dquote(string) #=> '"This is a string"'
string = 'This is "a string"'
dquote(string #=> '"This is \"a string\"'
68 69 70 |
# File 'lib/mail/utilities.rb', line 68 def dquote( str ) '"' + unquote(str).gsub(/[\\"]/n) {|s| '\\' + s } + '"' end |
#escape_paren(str) ⇒ Object
Escape parenthesies in a string
Example:
str = 'This is (a) string'
escape_paren( str ) #=> 'This is \(a\) string'
155 156 157 |
# File 'lib/mail/utilities.rb', line 155 def escape_paren( str ) Utilities.escape_paren( str ) end |
#generate_message_id ⇒ Object
293 294 295 |
# File 'lib/mail/utilities.rb', line 293 def "<#{Mail.random_tag}@#{::Socket.gethostname}.mail>" end |
#map_lines(str, &block) ⇒ Object
225 226 227 |
# File 'lib/mail/utilities.rb', line 225 def map_lines( str, &block ) str.each_line.map(&block) end |
#map_with_index(enum, &block) ⇒ Object
229 230 231 |
# File 'lib/mail/utilities.rb', line 229 def map_with_index( enum, &block ) enum.each_with_index.map(&block) end |
#match_to_s(obj1, obj2) ⇒ Object
Matches two objects with their to_s values case insensitively
Example:
obj2 = "This_is_An_object"
obj1 = :this_IS_an_object
match_to_s( obj1, obj2 ) #=> true
178 179 180 |
# File 'lib/mail/utilities.rb', line 178 def match_to_s( obj1, obj2 ) obj1.to_s.casecmp(obj2.to_s) == 0 end |
#paren(str) ⇒ Object
Wraps a string in parenthesis and escapes any that are in the string itself.
Example:
paren( 'This is a string' ) #=> '(This is a string)'
108 109 110 |
# File 'lib/mail/utilities.rb', line 108 def paren( str ) Utilities.paren( str ) end |
#quote_atom(str) ⇒ Object
If the string supplied has ATOM unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified
17 18 19 |
# File 'lib/mail/utilities.rb', line 17 def quote_atom( str ) atom_safe?( str ) ? str : dquote(str) end |
#quote_phrase(str) ⇒ Object
If the string supplied has PHRASE unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified
23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/mail/utilities.rb', line 23 def quote_phrase( str ) if str.respond_to?(:force_encoding) original_encoding = str.encoding ascii_str = str.to_s.dup.force_encoding('ASCII-8BIT') if Constants::PHRASE_UNSAFE === ascii_str dquote(ascii_str).force_encoding(original_encoding) else str end else Constants::PHRASE_UNSAFE === str ? dquote(str) : str end end |
#quote_token(str) ⇒ Object
If the string supplied has TOKEN unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified
44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/mail/utilities.rb', line 44 def quote_token( str ) if str.respond_to?(:force_encoding) original_encoding = str.encoding ascii_str = str.to_s.dup.force_encoding('ASCII-8BIT') if token_safe?( ascii_str ) str else dquote(ascii_str).force_encoding(original_encoding) end else token_safe?( str ) ? str : dquote(str) end end |
#token_safe?(str) ⇒ Boolean
Returns true if the string supplied is free from characters not allowed as a TOKEN
38 39 40 |
# File 'lib/mail/utilities.rb', line 38 def token_safe?( str ) not Constants::TOKEN_UNSAFE === str end |
#unbracket(str) ⇒ Object
Unwraps a string from being wrapped in parenthesis
Example:
str = '<This is a string>'
unbracket( str ) #=> 'This is a string'
141 142 143 144 145 146 147 |
# File 'lib/mail/utilities.rb', line 141 def unbracket( str ) if str.start_with?('<') && str.end_with?('>') str.slice(1..-2) else str end end |
#underscoreize(str) ⇒ Object
Swaps out all hyphens (-) for underscores (_) good for stringing to symbols a field name.
Example:
string = :resent_from_field
underscoreize ( string ) #=> 'resent_from_field'
221 222 223 |
# File 'lib/mail/utilities.rb', line 221 def underscoreize( str ) str.to_s.downcase.tr(Constants::HYPHEN, Constants::UNDERSCORE) end |
#unescape(str) ⇒ Object
Removes any -escaping.
Example:
string = 'This is \"a string\"'
unescape(string) #=> 'This is "a string"'
string = '"This is \"a string\""'
unescape(string) #=> '"This is "a string""'
99 100 101 |
# File 'lib/mail/utilities.rb', line 99 def unescape( str ) str.gsub(/\\(.)/, '\1') end |
#unparen(str) ⇒ Object
Unwraps a string from being wrapped in parenthesis
Example:
str = '(This is a string)'
unparen( str ) #=> 'This is a string'
118 119 120 121 122 123 124 |
# File 'lib/mail/utilities.rb', line 118 def unparen( str ) if str.start_with?('(') && str.end_with?(')') str.slice(1..-2) else str end end |
#unquote(str) ⇒ Object
Unwraps supplied string from inside double quotes and removes any -escaping.
Example:
string = '"This is a string"'
unquote(string) #=> 'This is a string'
string = '"This is \"a string\""'
unqoute(string) #=> 'This is "a string"'
82 83 84 85 86 87 88 |
# File 'lib/mail/utilities.rb', line 82 def unquote( str ) if str =~ /^"(.*?)"$/ unescape($1) else str end end |
#uri_escape(str) ⇒ Object
159 160 161 |
# File 'lib/mail/utilities.rb', line 159 def uri_escape( str ) uri_parser.escape(str) end |
#uri_parser ⇒ Object
167 168 169 |
# File 'lib/mail/utilities.rb', line 167 def uri_parser @uri_parser ||= URI.const_defined?(:DEFAULT_PARSER) ? URI::DEFAULT_PARSER : URI end |
#uri_unescape(str) ⇒ Object
163 164 165 |
# File 'lib/mail/utilities.rb', line 163 def uri_unescape( str ) uri_parser.unescape(str) end |