Class: EncodedWord

Inherits:
Object
  • Object
show all
Defined in:
lib/encoded_word.rb

Instance Method Summary collapse

Constructor Details

#initialize(inputdir) ⇒ EncodedWord

Returns a new instance of EncodedWord.



6
7
8
9
10
11
12
# File 'lib/encoded_word.rb', line 6

def initialize(inputdir)
    if File::ALT_SEPARATOR
        @inputdir = inputdir.gsub(File::ALT_SEPARATOR, File::SEPARATOR)
    else
        @inputdir = inputdir
    end
end

Instance Method Details

#decodeObject



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/encoded_word.rb', line 14

def decode
    Dir.glob(File.join(@inputdir, File.join('**', '*.mlog'))).select do |f|
        puts f
        File.open(f + '.plain', 'w') do |out|
        File.open(f) do |file|
        f.each_line do |line|
            line = line.encode('utf-8', file.external_encoding.name, 
                :undef=>:replace, :invalid=>:replace)
            parts = mysplit(line)
            newparts = []
            newparts << format_date(parts[0]) #date
            newparts << parts[1] #message-id
            newparts << decode_subject(parts[2]) #subject
            newparts << trim_emails(parts[3]) #from
            newparts << trim_emails(parts[4]) #to
            newparts << trim_emails(parts[5]) #cc
            newparts << decode_attachments(parts[6]) unless parts.length == 6 #attachment
            out.puts newparts.join("\t")
        end
        end
        end
    end
end

#decode_attachments(attachments) ⇒ Object



101
102
103
104
105
106
107
108
109
# File 'lib/encoded_word.rb', line 101

def decode_attachments(attachments)
    return '' unless attachments and attachments.length > 0
    parts = mysplit(attachments, "\a")
    newparts = []
    parts.each do |p|
        newparts << mime_decode(p)
    end
    newparts.join("\a")
end

#decode_subject(sub) ⇒ Object



65
66
67
68
69
# File 'lib/encoded_word.rb', line 65

def decode_subject(sub)
    return '' unless sub and sub.length > 0
    one = sub.gsub("\a", '')
    mime_decode(one)
end

#format_date(engdate) ⇒ Object



58
59
60
61
62
63
# File 'lib/encoded_word.rb', line 58

def format_date(engdate)
    return '' unless engdate and engdate.length > 0
    dt = DateTime.parse(engdate)
    dt = dt.new_offset('+0900')
    dt.strftime("%Y/%m/%d %H:%M:%S")
end

#getmail(line, at) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/encoded_word.rb', line 85

def getmail(line, at)
    pos1 = 0
    pos2 = line.length - 1
    (at-1).step(0, -1) do |i|
        next if line[i] =~ /[\._a-zA-Z0-9-]/
        pos1 = i + 1
        break
    end
    (at+1).upto(line.length) do |i|
        next if line[i] =~ /[\.a-zA-Z0-9-]/
        pos2 = i - 1
        break
    end
    line[pos1..pos2]
end

#mime_decode(input, out_charset = 'utf-8') ⇒ Object



111
112
113
114
115
116
117
118
119
120
121
# File 'lib/encoded_word.rb', line 111

def mime_decode(input, out_charset = 'utf-8')
     return '' unless input and input.length > 0
     ret = input.sub!(/=\?([A-Za-z0-9_-]+)\?([BQbq])\?([^\?]+)\?=/) {
         charset = $1
         enc = $2.upcase
         word = $3
         word = word.unpack({ "B"=>"m*", "Q"=>"M*" }[enc]).first
         word.encode(out_charset, charset, :undef=>:replace, :invalid=>:replace)
     }
     return ret ? mime_decode(input) : input
end

#mysplit(line, sep = "\t") ⇒ Object



38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/encoded_word.rb', line 38

def mysplit(line, sep = "\t")
    return [] unless line
    return [] unless line.length > 0

    parts = []
    pos1 = -1
    while true do
        pos1 += 1
        pos2 = line.index("\t", pos1)
        if pos2
            parts << line[pos1...pos2]
            pos1 = pos2
        else
            parts << line[pos1..-1]
            break
        end
    end
    return parts
end

#trim_emails(emails) ⇒ Object



71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/encoded_word.rb', line 71

def trim_emails(emails)
    return '' unless emails and emails.length > 0
    pos1 = -1
    newparts = []
    while true do
        pos1 += 1
        pos2 = emails.index('@', pos1)
        break unless pos2
        newparts << getmail(emails, pos2)
        pos1 = pos2
    end
    newparts.join("\a")
end