Class: EncodedWord
- Inherits:
-
Object
- Object
- EncodedWord
- Defined in:
- lib/encoded_word.rb
Instance Method Summary collapse
- #decode ⇒ Object
- #decode_attachments(attachments) ⇒ Object
- #decode_subject(sub) ⇒ Object
- #format_date(engdate) ⇒ Object
- #getmail(line, at) ⇒ Object
-
#initialize(inputdir) ⇒ EncodedWord
constructor
A new instance of EncodedWord.
- #mime_decode(input, out_charset = 'utf-8') ⇒ Object
- #mysplit(line, sep = "\t") ⇒ Object
- #trim_emails(emails) ⇒ Object
Constructor Details
#initialize(inputdir) ⇒ EncodedWord
Returns a new instance of EncodedWord.
6 7 8 9 10 11 12 |
# File 'lib/encoded_word.rb', line 6 def initialize(inputdir) if File::ALT_SEPARATOR @inputdir = inputdir.gsub(File::ALT_SEPARATOR, File::SEPARATOR) else @inputdir = inputdir end end |
Instance Method Details
#decode ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/encoded_word.rb', line 14 def decode Dir.glob(File.join(@inputdir, File.join('**', '*.mlog'))).select do |f| puts f File.open(f + '.plain', 'w') do |out| File.open(f) do |file| f.each_line do |line| line = line.encode('utf-8', file.external_encoding.name, :undef=>:replace, :invalid=>:replace) parts = mysplit(line) newparts = [] newparts << format_date(parts[0]) #date newparts << parts[1] #message-id newparts << decode_subject(parts[2]) #subject newparts << trim_emails(parts[3]) #from newparts << trim_emails(parts[4]) #to newparts << trim_emails(parts[5]) #cc newparts << (parts[6]) unless parts.length == 6 #attachment out.puts newparts.join("\t") end end end end end |
#decode_attachments(attachments) ⇒ Object
101 102 103 104 105 106 107 108 109 |
# File 'lib/encoded_word.rb', line 101 def () return '' unless and .length > 0 parts = mysplit(, "\a") newparts = [] parts.each do |p| newparts << mime_decode(p) end newparts.join("\a") end |
#decode_subject(sub) ⇒ Object
65 66 67 68 69 |
# File 'lib/encoded_word.rb', line 65 def decode_subject(sub) return '' unless sub and sub.length > 0 one = sub.gsub("\a", '') mime_decode(one) end |
#format_date(engdate) ⇒ Object
58 59 60 61 62 63 |
# File 'lib/encoded_word.rb', line 58 def format_date(engdate) return '' unless engdate and engdate.length > 0 dt = DateTime.parse(engdate) dt = dt.new_offset('+0900') dt.strftime("%Y/%m/%d %H:%M:%S") end |
#getmail(line, at) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# File 'lib/encoded_word.rb', line 85 def getmail(line, at) pos1 = 0 pos2 = line.length - 1 (at-1).step(0, -1) do |i| next if line[i] =~ /[\._a-zA-Z0-9-]/ pos1 = i + 1 break end (at+1).upto(line.length) do |i| next if line[i] =~ /[\.a-zA-Z0-9-]/ pos2 = i - 1 break end line[pos1..pos2] end |
#mime_decode(input, out_charset = 'utf-8') ⇒ Object
111 112 113 114 115 116 117 118 119 120 121 |
# File 'lib/encoded_word.rb', line 111 def mime_decode(input, out_charset = 'utf-8') return '' unless input and input.length > 0 ret = input.sub!(/=\?([A-Za-z0-9_-]+)\?([BQbq])\?([^\?]+)\?=/) { charset = $1 enc = $2.upcase word = $3 word = word.unpack({ "B"=>"m*", "Q"=>"M*" }[enc]).first word.encode(out_charset, charset, :undef=>:replace, :invalid=>:replace) } return ret ? mime_decode(input) : input end |
#mysplit(line, sep = "\t") ⇒ Object
38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/encoded_word.rb', line 38 def mysplit(line, sep = "\t") return [] unless line return [] unless line.length > 0 parts = [] pos1 = -1 while true do pos1 += 1 pos2 = line.index("\t", pos1) if pos2 parts << line[pos1...pos2] pos1 = pos2 else parts << line[pos1..-1] break end end return parts end |
#trim_emails(emails) ⇒ Object
71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/encoded_word.rb', line 71 def trim_emails(emails) return '' unless emails and emails.length > 0 pos1 = -1 newparts = [] while true do pos1 += 1 pos2 = emails.index('@', pos1) break unless pos2 newparts << getmail(emails, pos2) pos1 = pos2 end newparts.join("\a") end |