Class: Fluent::Plugin::SanitizerFilter

Inherits:
Filter
  • Object
show all
Defined in:
lib/fluent/plugin/filter_sanitizer.rb

Instance Method Summary collapse

Instance Method Details

#configure(conf) ⇒ Object



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 51

def configure(conf)
  super 
  @salt = conf['hash_salt']
  @salt = "" if @salt.nil?
  @hash_scheme = conf['hash_scheme']
  @sanitize_func =
    case @hash_scheme
      when "sha1"
        Proc.new { |str| Digest::SHA1.hexdigest(@salt + str) }
      when "sha256"
        Proc.new { |str| Digest::SHA256.hexdigest(@salt +str) }
      when "sha384"
        Proc.new { |str| Digest::SHA384.hexdigest(@salt +str) }
      when "sha512"
        Proc.new { |str| Digest::SHA512.hexdigest(@salt +str) }
      else
        Proc.new { |str| Digest::MD5.hexdigest(@salt +str) }
    end

  @sanitizerules = []
  @rules.each do |rule|
    if rule.keys.empty?
      raise Fluent::ConfigError, "You need to specify at least one key in rule statement."
    else
      keys = rule.keys
    end
    
    if rule.pattern_ipv4 || !rule.pattern_ipv4
      pattern_ipv4 = rule.pattern_ipv4
    else
      raise Fluent::ConfigError, "true or false is available for pattern_ipv4 option."
    end
 
    if rule.pattern_fqdn || !rule.pattern_fqdn
      pattern_fqdn = rule.pattern_fqdn
    else
      raise Fluent::ConfigError, "true or false is available for pattern_fqdn option."
    end
    
    if rule.pattern_regex.class == Regexp
      pattern_regex = rule.pattern_regex
      regex_capture_group = rule.regex_capture_group
    else
      raise Fluent::ConfigError, "Your need to specify Regexp for pattern_regex option."
    end

    pattern_keywords = rule.pattern_keywords

    regex_prefix = rule.pattern_regex_prefix
    keywords_prefix = rule.pattern_keywords_prefix

    @sanitizerules.push([keys, pattern_ipv4, pattern_fqdn, pattern_regex, regex_capture_group, pattern_keywords, regex_prefix, keywords_prefix])
  end
end

#filter(tag, time, record) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 106

def filter(tag, time, record)
  @sanitizerules.each do |keys, pattern_ipv4, pattern_fqdn, pattern_regex, regex_capture_group, pattern_keywords, regex_prefix, keywords_prefix|  
    keys.each do |key|
      accessor = record_accessor_create("$."+key.to_s)
      begin
          if pattern_ipv4 && accessor.call(record)
            accessor.set(record, sanitize_ipv4_val(accessor.call(record).to_s))
          end
          if pattern_fqdn && accessor.call(record)
            accessor.set(record, sanitize_fqdn_val(accessor.call(record).to_s))
          end
          if !pattern_regex.to_s.eql?("(?-mix:^$)") && accessor.call(record)
            if regex_capture_group.empty?
              accessor.set(record, sanitize_regex_val(accessor.call(record), regex_prefix, pattern_regex))
            else
              accessor.set(record, sanitize_regex_val_capture(accessor.call(record), regex_prefix, pattern_regex, regex_capture_group))
            end
          #end
          end
          if !pattern_keywords.empty? && accessor.call(record)
            accessor.set(record, sanitize_keywords_val(accessor.call(record).to_s, pattern_keywords, keywords_prefix))
          end
        rescue => e
            log.warn "Skipping this key", error_class: e.class, error: e.message
        end
    end
  end
  record
end

#include_fqdn?(str) ⇒ Boolean

Returns:

  • (Boolean)


148
149
150
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 148

def include_fqdn?(str)
  str.match?(/^.*\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}.*$/)
end

#include_ipv4?(str) ⇒ Boolean

Returns:

  • (Boolean)


136
137
138
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 136

def include_ipv4?(str)
  str.match?(/^.*\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}.*$/)
end

#is_fqdn?(str) ⇒ Boolean

Returns:

  • (Boolean)


152
153
154
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 152

def is_fqdn?(str)
  str.match?(/^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}$/)
end

#is_fqdn_port?(str) ⇒ Boolean

Returns:

  • (Boolean)


156
157
158
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 156

def is_fqdn_port?(str)
  str.match?(/^\b(([a-zA-Z0-9]|[a-zA-Z0-9][a-zA-Z0-9\-]*[a-zA-Z0-9])\.){2,}([A-Za-z]|[A-Za-z][A-Za-z\-]*[A-Za-z]){2,}:[0-9]{1,5}$/)
end

#is_ipv4?(str) ⇒ Boolean

Returns:

  • (Boolean)


140
141
142
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 140

def is_ipv4?(str)
  str.match?(/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$/)
end

#is_ipv4_port?(str) ⇒ Boolean

Returns:

  • (Boolean)


144
145
146
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 144

def is_ipv4_port?(str)
  str.match?(/^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:[0-9]{1,5}$/)
end

#is_url?(str) ⇒ Boolean

Returns:

  • (Boolean)


160
161
162
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 160

def is_url?(str)
  str.match?(/^[a-zA-Z0-9]{2,}:\/\/.*$/)
end

#sanitize_fqdn(str) ⇒ Object



172
173
174
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 172

def sanitize_fqdn(str)
  return "FQDN_"+ @sanitize_func.call(str)
end

#sanitize_fqdn_port(str) ⇒ Object



234
235
236
237
238
239
240
241
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 234

def sanitize_fqdn_port(str)
  fqdn_port = []
  str.split(":").each do |s|
    s = sanitize_fqdn(s) if is_fqdn?(s)
    fqdn_port.push(s)
  end
  return fqdn_port.join(":")
end

#sanitize_fqdn_url(str) ⇒ Object



263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 263

def sanitize_fqdn_url(str)
  fqdn_url = []
  str.split("://").each do |s|
    if s.include?("/")
      url_slash = []
      s.split("/").each do |ss|
        ss = sanitize_fqdn(ss) if is_fqdn?(ss)
        ss = sanitize_fqdn_port(ss) if is_fqdn_port?(ss)
        url_slash.push(ss)
      end
      s = url_slash.join("/")
    else
      s = sanitize_fqdn(s) if is_fqdn?(s)
      s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
    end
    fqdn_url.push(s)
  end
  return fqdn_url.join("://")
end

#sanitize_fqdn_val(v) ⇒ Object



317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 317

def sanitize_fqdn_val(v)
  line = []
  if v.include?(",")
    v.split(",").each do |s|
      s = subtract_quotations(s)
      if include_fqdn?(s)
        if is_url?(s)
          s = sanitize_fqdn_url(s)
        else
          s = sanitize_fqdn(s) if is_fqdn?(s)
          s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
        end
      end
      line.push(s)
    end
    return line.join(",")
  else
    v.split().each do |s|
      s = subtract_quotations(s)
      if include_fqdn?(s)
        if is_url?(s)
          s = sanitize_fqdn_url(s)
        else
          s = sanitize_fqdn(s) if is_fqdn?(s)
          s = sanitize_fqdn_port(s) if is_fqdn_port?(s)
        end
      end
      line.push(s)
    end
    $log.debug "[pattern_fqdn] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
    return line.join(" ")
  end
end

#sanitize_ipv4(str) ⇒ Object



168
169
170
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 168

def sanitize_ipv4(str)
  return "IPv4_"+ @sanitize_func.call(str)
end

#sanitize_ipv4_port(str) ⇒ Object



225
226
227
228
229
230
231
232
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 225

def sanitize_ipv4_port(str)
  ip_port = []
  str.split(":").each do |s|
    s =  sanitize_ipv4(s) if is_ipv4?(s)
    ip_port.push(s)
  end
  return ip_port.join(":")
end

#sanitize_ipv4_url(str) ⇒ Object



243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 243

def sanitize_ipv4_url(str)
  ip_url = []
  str.split("://").each do |s|
    if s.include?("/")
      url_slash = []
      s.split("/").each do |ss|
        ss = sanitize_ipv4(ss) if is_ipv4?(ss)
        ss = sanitize_ipv4_port(ss) if is_ipv4_port?(ss)
        url_slash.push(ss)
      end
      s = url_slash.join("/")
    else
      s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
      s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
    end
    ip_url.push(s)
  end
  return ip_url.join("://")
end

#sanitize_ipv4_val(v) ⇒ Object



283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 283

def sanitize_ipv4_val(v)
  line = []
  if v.include?(",")
    v.split(",").each do |s|
      s = subtract_quotations(s)
      if include_ipv4?(s)
        if is_url?(s)
          s = sanitize_ipv4_url(s)
        else
          s = sanitize_ipv4(s) if is_ipv4?(s)
          s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
        end
      end
      line.push(s)
    end
    return line.join(",")
  else
    v.split().each do |s|
      s = subtract_quotations(s)
      if include_ipv4?(s)
        if is_url?(s)
          s = sanitize_ipv4_url(s)
        else
          s = sanitize_ipv4(s) if is_ipv4?(s)
          s = sanitize_ipv4_port(s) if is_ipv4_port?(s)
        end
      end
      line.push(s)
    end
    $log.debug "[pattern_ipv4] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
    return line.join(" ")
  end
end

#sanitize_keyword(str, prefix) ⇒ Object



221
222
223
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 221

def sanitize_keyword(str, prefix)
  return prefix + "_" + @sanitize_func.call(str)
end

#sanitize_keywords_val(v, keywords, prefix) ⇒ Object



363
364
365
366
367
368
369
370
371
372
373
374
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 363

def sanitize_keywords_val(v, keywords, prefix)
  line = []
  v.split().each do |vv|
    if keywords.include?(vv)
      line.push(sanitize_keyword(vv, prefix))
    else
      line.push(vv)
     end
  end
  $log.debug "[pattern_keywords] sanitize '#{v}' to '#{line.join(" ")}'" if v != line.join(" ")
  return line.join(" ")
end

#sanitize_regex(str, prefix, regex) ⇒ Object



182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 182

def sanitize_regex(str, prefix, regex)
  regex_p = Regexp.new(regex)
  if str =~ regex_p
    scans = str.scan(regex).flatten
    if scans.any?{ |e| e.nil? }
      return prefix + "_" + @sanitize_func.call(str)
    else
      scans.each do |s|
        mask = prefix + "_" + @sanitize_func.call(str)
        str = str.gsub(s, mask)
      end
    end
    return str
  else
    $log.debug "[pattern_regex] #{str} does not match given regex #{regex}. skip this rule."
    return str
  end
end

#sanitize_regex_capture(str, prefix, regex, capture_group) ⇒ Object



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 201

def sanitize_regex_capture(str, prefix, regex, capture_group)
  regex_p = Regexp.new(regex)
  if str =~ regex_p
    if str.match(regex).names.include?(capture_group)
      scans = str.scan(regex).flatten
      scans.each do |s|
        mask = prefix + "_" + @sanitize_func.call(str)
        str = str.gsub(s, mask)
      end
      return str
    else
       $log.debug "[pattern_regex] regex pattern matched but capture group '#{capture_group}' does not exist. Skip this rule."
       return str
    end
  else
    $log.debug "[pattern_regex] #{str} does not match given regex #{regex}. Skip this rule."
    return str
  end
end

#sanitize_regex_val(v, prefix, regex) ⇒ Object



351
352
353
354
355
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 351

def sanitize_regex_val(v, prefix, regex)
  s = sanitize_regex(v, prefix, regex)  
  $log.debug "[pattern_regex] sanitize '#{v}' to '#{s}'" if v != s
  return s
end

#sanitize_regex_val_capture(v, prefix, regex, capture_group) ⇒ Object



357
358
359
360
361
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 357

def sanitize_regex_val_capture(v, prefix, regex, capture_group)
  s = sanitize_regex_capture(v, prefix, regex, capture_group)
  $log.debug "[pattern_regex] sanitize '#{v}' to '#{s}'" if v != s
  return s
end

#sanitize_val(str, prefix) ⇒ Object



176
177
178
179
180
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 176

def sanitize_val(str, prefix)
  s = prefix + "_" + @sanitize_func.call(str)
  $log.debug "[pattern_regex] sanitize '#{str}' to '#{s}'" if str != s
  return s
end

#subtract_quotations(str) ⇒ Object



164
165
166
# File 'lib/fluent/plugin/filter_sanitizer.rb', line 164

def subtract_quotations(str)
  str.gsub(/\\\"|\'|\"|\\\'/,'')
end