Module: TMail::TextUtils
- Included in:
- TMail, Address, Attachment, Decoder, Encoder, HeaderField, Mail
- Defined in:
- lib/tmail/utils.rb
Overview
Text Utils provides a namespace to define TOKENs, ATOMs, PHRASEs and CONTROL characters that are OK per RFC 2822.
It also provides methods you can call to determine if a string is safe
Constant Summary collapse
- CONTROL_CHAR =
/[#{control}]/n
- ATOM_UNSAFE =
/[#{Regexp.quote aspecial}#{control}#{lwsp}]/n
- PHRASE_UNSAFE =
/[#{Regexp.quote aspecial}#{control}]/n
- TOKEN_UNSAFE =
/[#{Regexp.quote tspecial}#{control}#{lwsp}]/n
- ZONESTR_TABLE =
:stopdoc:
{ 'jst' => 9 * 60, 'eet' => 2 * 60, 'bst' => 1 * 60, 'met' => 1 * 60, 'gmt' => 0, 'utc' => 0, 'ut' => 0, 'nst' => -(3 * 60 + 30), 'ast' => -4 * 60, 'edt' => -4 * 60, 'est' => -5 * 60, 'cdt' => -5 * 60, 'cst' => -6 * 60, 'mdt' => -6 * 60, 'mst' => -7 * 60, 'pdt' => -7 * 60, 'pst' => -8 * 60, 'a' => -1 * 60, 'b' => -2 * 60, 'c' => -3 * 60, 'd' => -4 * 60, 'e' => -5 * 60, 'f' => -6 * 60, 'g' => -7 * 60, 'h' => -8 * 60, 'i' => -9 * 60, # j not use 'k' => -10 * 60, 'l' => -11 * 60, 'm' => -12 * 60, 'n' => 1 * 60, 'o' => 2 * 60, 'p' => 3 * 60, 'q' => 4 * 60, 'r' => 5 * 60, 's' => 6 * 60, 't' => 7 * 60, 'u' => 8 * 60, 'v' => 9 * 60, 'w' => 10 * 60, 'x' => 11 * 60, 'y' => 12 * 60, 'z' => 0 * 60 }
- WDAY =
:stopdoc:
%w( Sun Mon Tue Wed Thu Fri Sat TMailBUG )
- MONTH =
%w( TMailBUG Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec TMailBUG )
- MESSAGE_ID =
/<[^\@>]+\@[^>]+>/
- MIME_ENCODED =
/=\?[^\s?=]+\?[QB]\?[^\s?=]+\?=/i
- NKF_FLAGS =
{ 'EUC' => '-e -m', 'SJIS' => '-s -m' }
- RFC2231_ENCODED =
/\A(?:iso-2022-jp|euc-jp|shift_jis|us-ascii)?'[a-z]*'/in
Instance Method Summary collapse
-
#atom_safe?(str) ⇒ Boolean
Returns true if the string supplied is free from characters not allowed as an ATOM.
- #decode_params(hash) ⇒ Object
- #decode_RFC2231(str) ⇒ Object
-
#join_domain(arr) ⇒ Object
Provides a method to join a domain name by it’s parts and also makes it ATOM safe by quoting it as needed.
- #message_id?(str) ⇒ Boolean
- #mime_encoded?(str) ⇒ Boolean
-
#quote_atom(str) ⇒ Object
If the string supplied has ATOM unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified.
- #quote_boundary ⇒ Object
-
#quote_phrase(str) ⇒ Object
If the string supplied has PHRASE unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified.
-
#quote_token(str) ⇒ Object
If the string supplied has TOKEN unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified.
-
#quote_unquoted_bencode ⇒ Object
AppleMail generates illegal character contained Content-Type parameter like: name==?ISO-2022-JP?B?…=?= so quote.
-
#quote_unquoted_name ⇒ Object
AppleMail generates name=filename attributes in the content type that contain spaces.
- #time2str(tm) ⇒ Object
-
#timezone_string_to_unixtime(str) ⇒ Object
Takes a time zone string from an EMail and converts it to Unix Time (seconds).
- #to_kcode(str) ⇒ Object
-
#token_safe?(str) ⇒ Boolean
Returns true if the string supplied is free from characters not allowed as a TOKEN.
-
#unquote(str) ⇒ Object
Unwraps supplied string from inside double quotes Returns unquoted string.
Instance Method Details
#atom_safe?(str) ⇒ Boolean
Returns true if the string supplied is free from characters not allowed as an ATOM
123 124 125 |
# File 'lib/tmail/utils.rb', line 123 def atom_safe?( str ) not ATOM_UNSAFE === str end |
#decode_params(hash) ⇒ Object
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 |
# File 'lib/tmail/utils.rb', line 272 def decode_params( hash ) new = Hash.new encoded = nil hash.each do |key, value| if m = /\*(?:(\d+)\*)?\z/.match(key) ((encoded ||= {})[m.pre_match] ||= [])[(m[1] || 0).to_i] = value else new[key] = to_kcode(value) end end if encoded encoded.each do |key, strings| new[key] = decode_RFC2231(strings.join('')) end end new end |
#decode_RFC2231(str) ⇒ Object
303 304 305 306 307 308 309 310 |
# File 'lib/tmail/utils.rb', line 303 def decode_RFC2231( str ) m = RFC2231_ENCODED.match(str) or return str begin to_kcode(m.post_match.gsub(/%[\da-f]{2}/in) {|s| s[1,2].hex.chr }) rescue m.post_match.gsub(/%[\da-f]{2}/in, "") end end |
#join_domain(arr) ⇒ Object
Provides a method to join a domain name by it’s parts and also makes it ATOM safe by quoting it as needed
169 170 171 172 173 174 175 176 177 |
# File 'lib/tmail/utils.rb', line 169 def join_domain( arr ) arr.map {|i| if /\A\[.*\]\z/ === i i else quote_atom(i) end }.join('.') end |
#message_id?(str) ⇒ Boolean
260 261 262 |
# File 'lib/tmail/utils.rb', line 260 def ( str ) MESSAGE_ID === str end |
#mime_encoded?(str) ⇒ Boolean
267 268 269 |
# File 'lib/tmail/utils.rb', line 267 def mime_encoded?( str ) MIME_ENCODED === str end |
#quote_atom(str) ⇒ Object
If the string supplied has ATOM unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified
129 130 131 |
# File 'lib/tmail/utils.rb', line 129 def quote_atom( str ) (ATOM_UNSAFE === str) ? dquote(str) : str end |
#quote_boundary ⇒ Object
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 |
# File 'lib/tmail/utils.rb', line 312 def quote_boundary # Make sure the Content-Type boundary= parameter is quoted if it contains illegal characters # (to ensure any special characters in the boundary text are escaped from the parser # (such as = in MS Outlook's boundary text)) if @body =~ /^(.*)boundary=(.*)$/m preamble = $1 remainder = $2 if remainder =~ /;/ remainder =~ /^(.*?)(;.*)$/m boundary_text = $1 post = $2.chomp else boundary_text = remainder.chomp end if boundary_text =~ /[\/\?\=]/ boundary_text = "\"#{boundary_text}\"" unless boundary_text =~ /^".*?"$/ @body = "#{preamble}boundary=#{boundary_text}#{post}" end end end |
#quote_phrase(str) ⇒ Object
If the string supplied has PHRASE unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified
135 136 137 |
# File 'lib/tmail/utils.rb', line 135 def quote_phrase( str ) (PHRASE_UNSAFE === str) ? dquote(str) : str end |
#quote_token(str) ⇒ Object
If the string supplied has TOKEN unsafe characters in it, will return the string quoted in double quotes, otherwise returns the string unmodified
146 147 148 |
# File 'lib/tmail/utils.rb', line 146 def quote_token( str ) (TOKEN_UNSAFE === str) ? dquote(str) : str end |
#quote_unquoted_bencode ⇒ Object
AppleMail generates illegal character contained Content-Type parameter like:
name==?ISO-2022-JP?B?...=?=
so quote. (This case is only value fits in one line.)
336 337 338 339 340 341 342 343 344 |
# File 'lib/tmail/utils.rb', line 336 def quote_unquoted_bencode @body = @body.gsub(%r"(;\s+[-a-z]+=)(=\?.+?)([;\r\n ]|\z)"m) { head, should_quoted, tail = $~.captures # head: "; name=" # should_quoted: "=?ISO-2022-JP?B?...=?=" head << quote_token(should_quoted) << tail } end |
#quote_unquoted_name ⇒ Object
AppleMail generates name=filename attributes in the content type that contain spaces. Need to handle this so the TMail Parser can.
348 349 350 351 352 353 354 355 |
# File 'lib/tmail/utils.rb', line 348 def quote_unquoted_name @body = @body.gsub(%r|(name=)([\w\s.]+)(.*)|m) { head, should_quoted, tail = $~.captures # head: "; name=" # should_quoted: "=?ISO-2022-JP?B?...=?=" head << quote_token(should_quoted) << tail } end |
#time2str(tm) ⇒ Object
244 245 246 247 248 249 250 251 252 253 254 255 |
# File 'lib/tmail/utils.rb', line 244 def time2str( tm ) # [ruby-list:7928] gmt = Time.at(tm.to_i) gmt.gmtime offset = tm.to_i - Time.local(*gmt.to_a[0,6].reverse).to_i # DO NOT USE strftime: setlocale() breaks it sprintf '%s, %s %s %d %02d:%02d:%02d %+.2d%.2d', WDAY[tm.wday], tm.mday, MONTH[tm.month], tm.year, tm.hour, tm.min, tm.sec, *(offset / 60).divmod(60) end |
#timezone_string_to_unixtime(str) ⇒ Object
Takes a time zone string from an EMail and converts it to Unix Time (seconds)
228 229 230 231 232 233 234 235 236 237 |
# File 'lib/tmail/utils.rb', line 228 def timezone_string_to_unixtime( str ) if m = /([\+\-])(\d\d?)(\d\d)/.match(str) sec = (m[2].to_i * 60 + m[3].to_i) * 60 m[1] == '-' ? -sec : sec else min = ZONESTR_TABLE[str.downcase] or raise SyntaxError, "wrong timezone format '#{str}'" min * 60 end end |
#to_kcode(str) ⇒ Object
296 297 298 299 |
# File 'lib/tmail/utils.rb', line 296 def to_kcode( str ) flag = NKF_FLAGS[TMail.KCODE] or return str NKF.nkf(flag, str) end |
#token_safe?(str) ⇒ Boolean
Returns true if the string supplied is free from characters not allowed as a TOKEN
140 141 142 |
# File 'lib/tmail/utils.rb', line 140 def token_safe?( str ) not TOKEN_UNSAFE === str end |
#unquote(str) ⇒ Object
Unwraps supplied string from inside double quotes Returns unquoted string
163 164 165 |
# File 'lib/tmail/utils.rb', line 163 def unquote( str ) str =~ /^"(.*?)"$/m ? $1 : str end |