Class: Diagtool::MaskUtils

Inherits:
Object
  • Object
show all
Defined in:
lib/fluent/diagtool/maskutils.rb

Instance Method Summary collapse

Constructor Details

#initialize(conf, log_level) ⇒ MaskUtils

Returns a new instance of MaskUtils.



25
26
27
28
29
30
31
32
33
34
# File 'lib/fluent/diagtool/maskutils.rb', line 25

def initialize(conf, log_level)
  @words = conf[:words]
  @logger = Logger.new(STDOUT, level: log_level, formatter: proc {|severity, datetime, progname, msg|
    "#{datetime}: [Maskutils] [#{severity}] #{msg}\n"
  })
  @logger.debug("Initialize Maskutils: sanitized word = #{conf[:words]}")
  @hash_seed = conf[:seed]
  @id = {}
  @masklog = Hash.new { |h,k| h[k] = Hash.new(&h.default_proc) }
end

Instance Method Details

#export_masklog(output_file) ⇒ Object



317
318
319
320
321
322
# File 'lib/fluent/diagtool/maskutils.rb', line 317

def export_masklog(output_file)
  masklog_json = JSON.pretty_generate(@masklog)
  File.open(output_file, 'w') do |f|
    f.puts(masklog_json)
  end
end

#is_fqdn?(str) ⇒ Boolean

Returns:

  • (Boolean)


268
269
270
271
272
# File 'lib/fluent/diagtool/maskutils.rb', line 268

def is_fqdn?(str)
  #!!(str =~ /^\b((?=[a-z0-9-]{1,63}\.)[a-z0-9]+(-[a-z0-9]+)*\.)+([A-Za-z0-9]|[A-Za-z0-9][A-Za-z0-9\-]*[A-Za-z0-9])$/)
  !!(str =~ /^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}$/)
  #!!(str =~ /^\b(?=^.{1,254}$)(^(?:(?!\d+\.)[a-zA-Z0-9_\-]{1,63}\.?)+(?:[a-zA-Z]{2,})$)/)
end

#is_ipv4?(str) ⇒ Boolean

Returns:

  • (Boolean)


264
265
266
# File 'lib/fluent/diagtool/maskutils.rb', line 264

def is_ipv4?(str)
  !!(str =~ /^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/)
end

#is_words?(str) ⇒ Boolean

Returns:

  • (Boolean)


274
275
276
277
278
279
280
281
282
283
# File 'lib/fluent/diagtool/maskutils.rb', line 274

def is_words?(str)
  value = false
  @words.each do | l |
    if str == l
      value = true
      break
    end
  end
  return value
end

#mask_colon_pattern(str) ⇒ Object



234
235
236
237
238
239
240
241
242
243
244
245
246
247
# File 'lib/fluent/diagtool/maskutils.rb', line 234

def mask_colon_pattern(str)
  is_mask = false
  l = str.split(':')
  i = 0
  loop do
    is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
    l[i] = chunk_mask if is_mask
    i+=1
    break if i >= l.length || is_mask == true
  end
  str_mask = l.join(':')
  str_mask << ":" if str.end_with?(':')
  return is_mask, str_mask
end

#mask_direct_pattern(str) ⇒ Object



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/fluent/diagtool/maskutils.rb', line 160

def mask_direct_pattern(str)
  is_mask = false
  if str.include?(">")
    str = str.gsub(">",'')
    is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
    str_m = chunk_mask + ">" if is_mask
  elsif str.include?("]")
    str = str.gsub("]",'')
    is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
    str_m = chunk_mask + "]" if is_mask
  else
    is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(str)
    str_mask = chunk_mask if is_mask
  end
  return is_mask, str_mask
end

#mask_equal_pattern(str) ⇒ Object



220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/fluent/diagtool/maskutils.rb', line 220

def mask_equal_pattern(str)
  is_mask = false
  l = str.split('=') ## Mask host=<address:ip/hostname> or bind=<address: ip/hostname>
  i = 0
  loop do
    is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
    l[i] = chunk_mask if is_mask
    i+=1
    break if i >= l.length || is_mask == true
  end
  str_mask = l.join('=')
  return is_mask, str_mask
end

#mask_ipv4_fqdn_words(str) ⇒ Object



285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
# File 'lib/fluent/diagtool/maskutils.rb', line 285

def mask_ipv4_fqdn_words(str)
  str = str.to_s
  mtype = ''
  is_mask = false
  if is_ipv4?(str.gsub(/\\\"|\'|\"|\\\'/,''))
    str = str.gsub(/\\\"|\'|\"|\\\'/,'')
    mtype = 'IPv4'
    is_mask = true
  elsif is_fqdn?(str.gsub(/\\\"|\'|\"|\\\'/,''))
    str = str.gsub(/\\\"|\'|\"|\\\'/,'')
    mtype = 'FQDN'
    is_mask = true
  elsif is_words?(str.gsub(/\\\"|\'|\"|\\\'/,''))
    str = str.gsub(/\\\"|\'|\"|\\\'/,'')
    mtype = 'Word'
    is_mask =true
  end
  if is_mask
    str_mask = mtype + '_' + Digest::MD5.hexdigest(@hash_seed + str)
    put_masklog(str, str_mask)
  else
    str_mask = str
  end
  return is_mask, str, str_mask
end

#mask_slash_pattern(str) ⇒ Object



249
250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/fluent/diagtool/maskutils.rb', line 249

def mask_slash_pattern(str)
  is_mask = false
  l = str.split('/')
  i = 0
  loop do
    is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(l[i])
    l[i] = chunk_mask if is_mask
    i+=1
    break if i >= l.length || is_mask == true
  end
  str_mask = l.join('/')
  str_mask << ":" if str.end_with?(':')
  return is_mask, str_mask
end

#mask_tdlog(input_file, clean) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/fluent/diagtool/maskutils.rb', line 36

def mask_tdlog(input_file, clean)
  line_id = 0
  f = File.open(input_file+'.mask', 'w')
  File.readlines(input_file).each do |line|
    line = line.encode('utf-8', 'binary', :invalid => :replace, :undef => :replace, :replace => '')   # temporary
    @id[:fid] = input_file
    @id[:lid] = line_id
    line_masked = mask_tdlog_inspector(line)
    f.puts(line_masked)
    line_id+=1
  end
  f.close
  FileUtils.rm(input_file) if clean == true
  return input_file+'.mask'
end

#mask_tdlog_gz(input_file, clean) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/fluent/diagtool/maskutils.rb', line 52

def mask_tdlog_gz(input_file, clean)
  line_id = 0
  f = File.open(input_file+'.mask', 'w')
  gunzip_file = input_file+'.mask'+'.tmp'
  Open3.capture3("gunzip --keep -c #{input_file} > #{gunzip_file}")
  File.readlines(gunzip_file).each do |line|
    @id[:fid] = input_file
    @id[:lid] = line_id
    line_masked = mask_tdlog_inspector(line)
    f.puts(line_masked)
    line_id+=1
  end
  f.close
  FileUtils.rm(gunzip_file)
  FileUtils.rm(input_file) if clean == true
  return input_file+'.mask'
end

#mask_tdlog_inspector(line) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/fluent/diagtool/maskutils.rb', line 70

def mask_tdlog_inspector(line)
  i = 0
  contents=[]
  @logger.debug("Input Line: #{line.chomp}")
  @logger.debug("Splitted Line: #{line.split(/\s/)}")
  loop do
    contents[i] = line.split(/\s/)[i].to_s
    @logger.debug("Splitted Line #{i}: #{contents[i]}")
    @id[:cid] = i.to_s
    if contents[i].include?(',')
      contents_s = contents[i].split(',')
      cnt = 0
      loop do
        @id[:cid] = i.to_s + '-' + cnt.to_s
        if contents_s[cnt].include?('://') ## Mask <http/dRuby>://<address:ip/hostname>:<port>
          is_mask, masked_contents = mask_url_pattern(contents_s[cnt])
          if is_mask
            @logger.debug("   URL Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
            contents_s[cnt] = masked_contents
          end
        elsif contents_s[cnt].include?('=')
          is_mask, masked_contents = mask_equal_pattern(contents_s[cnt])
          if is_mask
            @logger.debug("   Equal Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
            contents_s[cnt] = masked_contents
          end
        elsif contents_s[cnt].include?(':') ## Mask <address:ip/hostname>:<port>
          is_mask, masked_contents = mask_colon_pattern(contents_s[cnt])
          if is_mask
            @logger.debug("   Colon Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
            contents_s[cnt] = masked_contents
          end
        elsif contents_s[cnt].include?('/') ## Mask <address:ip/hostname>:<port>
          is_mask, masked_contents = mask_slash_pattern(contents_s[cnt])
          if is_mask
            @logger.debug("   Slash Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
            contents_s[cnt] = masked_contents
          end
        else 
          is_mask, masked_contents = mask_direct_pattern(contents_s[cnt])
          if is_mask
            @logger.debug("   Direct Pattern Detected: #{contents_s[cnt]} -> #{masked_contents}")
            contents_s[cnt] = masked_contents
          end
        end
        cnt+=1
        break if cnt >= contents_s.length 
      end
      contents[i] = contents_s.join(',')
    else
      if contents[i].include?('://') ## Mask <http/dRuby>://<address:ip/hostname>:<port>
        is_mask, masked_contents = mask_url_pattern(contents[i])
        if is_mask
          @logger.debug("   URL Pattern Detected: #{contents[i]} -> #{masked_contents}")
          contents[i] = masked_contents
        end
      elsif contents[i].include?('=')
        is_mask, masked_contents = mask_equal_pattern(contents[i])
        if is_mask
          @logger.debug("   Equal Pattern Detected: #{contents[i]} -> #{masked_contents}")
          contents[i] = masked_contents
        end
      elsif contents[i].include?(':') ## Mask <address:ip/hostname>:<port>
        is_mask, masked_contents = mask_colon_pattern(contents[i])
        if is_mask
          @logger.debug("   Colon Pattern Detected: #{contents[i]} -> #{masked_contents}")
          contents[i] = masked_contents
        end
      elsif contents[i].include?('/')
        is_mask, masked_contents = mask_slash_pattern(contents[i])
        if is_mask
          @logger.debug("   Slash Pattern Detected: #{contents[i]} -> #{masked_contents}")
          contents[i] = masked_contents
        end
      else
        is_mask, masked_contents = mask_direct_pattern(contents[i])
        if is_mask
          @logger.debug("   Direct Pattern Detected: #{contents[i]} -> #{masked_contents}")
          contents[i] = masked_contents
        end
      end
    end
    i+=1
    break if i >= line.split(/\,|\s/).length
  end
  line_masked = contents.join(' ')
  @logger.debug("Masked Line: #{line_masked}")
  return line_masked
end

#mask_url_pattern(str) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/fluent/diagtool/maskutils.rb', line 177

def mask_url_pattern(str)
  is_mask = false
  url = str.split('://')
  cnt_url = 0
  loop do
    if url[cnt_url].include?(':')
      address = url[cnt_url].split(':')
      cnt_address = 0
      loop do
        if address[cnt_address].include?("]")
          is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address].gsub(']',''))
          address[cnt_address] = chunk_mask + "]" if is_mask
        elsif address[cnt_address].include?(">")
          is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address].gsub('>',''))
          address[cnt_address] = chunk_mask + ">" if is_mask
        else
          is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(address[cnt_address])
          address[cnt_address] = chunk_mask if is_mask
        end
        cnt_address+=1
        break if cnt_address >= address.length || is_mask == true
      end
      url[cnt_url] = address.join(':')
    else
      if url[cnt_url].include?("]")
        is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url].gsub(']',''))
        url[cnt_url] = chunk_mask + "]" if is_mask
      elsif url[cnt_url].include?(">")
        is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url].gsub('>',''))
        url[cnt_url] = chunk_mask + ">" if is_mask
      else
        is_mask, chunk, chunk_mask = mask_ipv4_fqdn_words(url[cnt_url])
        url[cnt_url] = chunk_mask if is_mask
      end
    end
    cnt_url+=1
    break if cnt_url >= url.length || is_mask == true
  end
  str_mask = url.join('://')
  str_mask << ":" if str.end_with?(':')
  return is_mask, str_mask
end

#put_masklog(str, str_mask) ⇒ Object



311
312
313
314
315
# File 'lib/fluent/diagtool/maskutils.rb', line 311

def put_masklog(str, str_mask)
  uid = "Line#{@id[:lid]}-#{@id[:cid]}"
  @masklog[@id[:fid]][uid]['original'] = str
  @masklog[@id[:fid]][uid]['mask'] = str_mask
end