Class: Fluent::Plugin::SyslogParser

Inherits:
Parser show all
Defined in:
lib/fluent/plugin/parser_syslog.rb

Direct Known Subclasses

Compat::TextParser::SyslogParser

Constant Summary collapse

REGEXP =

TODO: Remove them since these regexps are no longer needed. but keep them for compatibility for now From existence TextParser pattern

/^(?<time>[^ ]*\s*[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[^ :\[]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
REGEXP_WITH_PRI =

From in_syslog default pattern

/^\<(?<pri>[0-9]+)\>(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[^ :\[]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
REGEXP_RFC5424 =
<<~'EOS'.chomp
  (?<time>[^ ]+) (?<host>[!-~]{1,255}) (?<ident>[!-~]{1,48}) (?<pid>[!-~]{1,128}) (?<msgid>[!-~]{1,32}) (?<extradata>(?:\-|(?:\[.*?(?<!\\)\])+))(?: (?<message>.+))?
EOS
REGEXP_RFC5424_NO_PRI =
Regexp.new(<<~'EOS'.chomp % REGEXP_RFC5424, Regexp::MULTILINE)
  \A%s\z
EOS
REGEXP_RFC5424_WITH_PRI =
Regexp.new(<<~'EOS'.chomp % REGEXP_RFC5424, Regexp::MULTILINE)
  \A<(?<pri>[0-9]{1,3})\>[1-9]\d{0,2} %s\z
EOS
REGEXP_DETECT_RFC5424 =
/^\<[0-9]{1,3}\>[1-9]\d{0,2}/
RFC3164_WITHOUT_TIME_AND_PRI_REGEXP =
/(?<host>[^ ]*) (?<ident>[^ :\[]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$/
RFC3164_CAPTURES =
RFC3164_WITHOUT_TIME_AND_PRI_REGEXP.names.freeze
RFC3164_PRI_REGEXP =
/^<(?<pri>[0-9]{1,3})>/
RFC5424_WITHOUT_TIME_AND_PRI_REGEXP =
/(?<host>[!-~]{1,255}) (?<ident>[!-~]{1,48}) (?<pid>[!-~]{1,128}) (?<msgid>[!-~]{1,32}) (?<extradata>(?:\-|(?:\[.*?(?<!\\)\])+))(?: (?<message>.+))?\z/m
RFC5424_CAPTURES =
RFC5424_WITHOUT_TIME_AND_PRI_REGEXP.names.freeze
RFC5424_PRI_REGEXP =
/^<(?<pri>\d{1,3})>\d\d{0,2}\s/
SPLIT_CHAR =
' '.freeze
NILVALUE =
'-'.freeze

Constants inherited from Parser

Parser::AVAILABLE_PARSER_VALUE_TYPES, Parser::PARSER_TYPES, Parser::TRUTHY_VALUES

Constants included from Configurable

Configurable::CONFIG_TYPE_REGISTRY

Instance Attribute Summary

Attributes inherited from Parser

#type_converters

Attributes inherited from Base

#under_plugin_development

Instance Method Summary collapse

Methods inherited from Parser

#build_type_converters, #call, #convert_values, #implement?, #parse_io, #parse_partial_data, #parse_time, #parse_with_timeout, #parser_type, #start, #stop, #string_like_null

Methods included from TimeMixin::Parser

included, #time_parser_create

Methods included from OwnedByMixin

#log, #owner, #owner=

Methods inherited from Base

#acquire_worker_lock, #after_shutdown, #after_shutdown?, #after_start, #after_started?, #before_shutdown, #before_shutdown?, #called_in_test?, #close, #closed?, #configured?, #context_router, #context_router=, #fluentd_worker_id, #get_lock_path, #has_router?, #inspect, #multi_workers_ready?, #plugin_root_dir, #reloadable_plugin?, #shutdown, #shutdown?, #start, #started?, #stop, #stopped?, #string_safe_encoding, #terminate, #terminated?

Methods included from SystemConfig::Mixin

#system_config, #system_config_override

Methods included from Configurable

#config, #configure_proxy_generate, #configured_section_create, included, lookup_type, register_type

Constructor Details

#initializeSyslogParser

Returns a new instance of SyslogParser.



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/fluent/plugin/parser_syslog.rb', line 63

def initialize
  super
  @mutex = Mutex.new
  @regexp = nil
  @regexp3164 = nil
  @regexp5424 = nil
  @regexp_parser = nil
  @time_parser_rfc3164 = nil
  @time_parser_rfc5424 = nil
  @space_count_rfc3164 = nil
  @space_count_rfc5424 = nil
  @skip_space_count_rfc3164 = false
  @skip_space_count_rfc5424 = false
  @time_parser_rfc5424_without_subseconds = nil
end

Instance Method Details

#configure(conf) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/fluent/plugin/parser_syslog.rb', line 79

def configure(conf)
  super

  @regexp_parser = @parser_engine == :regexp
  @regexp = case @message_format
            when :rfc3164
              if @regexp_parser
                class << self
                  alias_method :parse, :parse_rfc3164_regex
                end
              else
                class << self
                  alias_method :parse, :parse_rfc3164
                end
              end
              setup_time_parser_3164(@time_format)
              RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
            when :rfc5424
              if @regexp_parser
                class << self
                  alias_method :parse, :parse_rfc5424_regex
                end
              else
                class << self
                  alias_method :parse, :parse_rfc5424
                end
              end
              @time_format = @rfc5424_time_format unless conf.has_key?('time_format')
              setup_time_parser_5424(@time_format)
              RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
            when :auto
              class << self
                alias_method :parse, :parse_auto
              end
              setup_time_parser_3164(@time_format)
              setup_time_parser_5424(@rfc5424_time_format)
              nil
            end

  if @regexp_parser
    @regexp3164 = RFC3164_WITHOUT_TIME_AND_PRI_REGEXP
    @regexp5424 = RFC5424_WITHOUT_TIME_AND_PRI_REGEXP
  end
end

#parse(text) ⇒ Object



144
145
146
# File 'lib/fluent/plugin/parser_syslog.rb', line 144

def parse(text)
  # This is overwritten in configure
end

#parse_auto(text, &block) ⇒ Object



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/fluent/plugin/parser_syslog.rb', line 148

def parse_auto(text, &block)
  if REGEXP_DETECT_RFC5424.match?(text)
    if @regexp_parser
      parse_rfc5424_regex(text, &block)
    else
      parse_rfc5424(text, &block)
    end
  else
    if @regexp_parser
      parse_rfc3164_regex(text, &block)
    else
      parse_rfc3164(text, &block)
    end
  end
end

#parse_plain(re, time, text, idx, record, capture_list) {|time, record| ... } ⇒ Object

Parameters:

  • time (EventTime)
  • idx (Integer)

    note: this argument is needed to avoid string creation

  • record (Hash)
  • capture_list (Array)

    for performance

Yields:

  • (time, record)


246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/fluent/plugin/parser_syslog.rb', line 246

def parse_plain(re, time, text, idx, record, capture_list, &block)
  m = re.match(text, idx)
  if m.nil?
    yield nil, nil
    return
  end

  capture_list.each { |name|
    if value = (m[name] rescue nil)
      case name
      when "message"
        value.chomp!
        record[name] = value
      else
        record[name] = value
      end
    end
  }

  if @estimate_current_event
    time ||= Fluent::EventTime.now
  end

  yield time, record
end

#parse_rfc3164(text) {|time, record| ... } ⇒ Object

Yields:

  • (time, record)


272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
# File 'lib/fluent/plugin/parser_syslog.rb', line 272

def parse_rfc3164(text, &block)
  pri = nil
  cursor = 0
  if @with_priority
    if text.start_with?('<'.freeze)
      i = text.index('>'.freeze, 1)
      if i < 2
        yield nil, nil
        return
      end
      pri = text.slice(1, i - 1).to_i
      cursor = i + 1
    else
      yield nil, nil
      return
    end
  end

  if @skip_space_count_rfc3164
    # header part
    time_size = 15 # skip Mmm dd hh:mm:ss
    time_end = text[cursor + time_size]
    if time_end == SPLIT_CHAR
      time_str = text.slice(cursor, time_size)
      cursor += 16 # time + ' '
    elsif time_end == '.'.freeze
      # support subsecond time
      i = text.index(SPLIT_CHAR, time_size)
      time_str = text.slice(cursor, i - cursor)
      cursor = i + 1
    else
      yield nil, nil
      return
    end
  else
    i = cursor - 1
    sq = false
    @space_count_rfc3164.times do
      while text[i + 1] == SPLIT_CHAR
        sq = true
        i += 1
      end
      i = text.index(SPLIT_CHAR, i + 1)
    end

    time_str = sq ? text.slice(idx, i - cursor).squeeze(SPLIT_CHAR) : text.slice(cursor, i - cursor)
    cursor = i + 1
  end

  i = text.index(SPLIT_CHAR, cursor)
  if i.nil?
    yield nil, nil
    return
  end
  host_size = i - cursor
  host = text.slice(cursor, host_size)
  cursor += host_size + 1

  record = {'host' => host}
  record['pri'] = pri if pri

  i = text.index(SPLIT_CHAR, cursor)

  # message part
  msg = if i.nil?  # for 'only non-space content case'
          text.slice(cursor, text.bytesize)
        else
          if text[i - 1] == ':'.freeze
            if text[i - 2] == ']'.freeze
              left_braket_pos = text.index('['.freeze, cursor)
              record['ident'] = text.slice(cursor, left_braket_pos - cursor)
              record['pid'] = text.slice(left_braket_pos + 1, i - left_braket_pos - 3) # remove '[' / ']:'
            else
              record['ident'] = text.slice(cursor, i - cursor - 1)
            end
            text.slice(i + 1, text.bytesize)
          else
            if @support_colonless_ident
              if text[i - 1] == ']'.freeze
                left_braket_pos = text.index('['.freeze, cursor)
                record['ident'] = text.slice(cursor, left_braket_pos - cursor)
                record['pid'] = text.slice(left_braket_pos + 1, i - left_braket_pos - 2) # remove '[' / ']'
              else
                record['ident'] = text.slice(cursor, i - cursor)
              end
              text.slice(i + 1, text.bytesize)
            else
              text.slice(cursor, text.bytesize)
            end
          end
        end
  msg.chomp!
  record['message'] = msg

  time = @time_parser_rfc3164.parse(time_str)
  record['time'] = time_str if @keep_time_key

  yield time, record
end

#parse_rfc3164_regex(text, &block) ⇒ Object



166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
# File 'lib/fluent/plugin/parser_syslog.rb', line 166

def parse_rfc3164_regex(text, &block)
  idx = 0
  record = {}

  if @with_priority
    if RFC3164_PRI_REGEXP.match?(text)
      v = text.index('>')
      record['pri'] = text[1..v].to_i # trim `<` and ``>
      idx = v + 1
    else
      yield(nil, nil)
      return
    end
  end

  i = idx - 1
  sq = false
  @space_count_rfc3164.times do
    while text[i + 1] == SPLIT_CHAR
      sq = true
      i += 1
    end

    i = text.index(SPLIT_CHAR, i + 1)
  end

  time_str = sq ? text.slice(idx, i - idx).squeeze(SPLIT_CHAR) : text.slice(idx, i - idx)
  time = @mutex.synchronize { @time_parser_rfc3164.parse(time_str) }
  if @keep_time_key
    record['time'] = time_str
  end

  parse_plain(@regexp3164, time, text, i + 1, record, RFC3164_CAPTURES, &block)
end

#parse_rfc5424(text) {|time, record| ... } ⇒ Object

Yields:

  • (time, record)


374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
# File 'lib/fluent/plugin/parser_syslog.rb', line 374

def parse_rfc5424(text, &block)
  pri = nil
  cursor = 0
  if @with_priority
    if text.start_with?('<'.freeze)
      i = text.index('>'.freeze, 1)
      if i < 2
        yield nil, nil
        return
      end
      pri = text.slice(1, i - 1).to_i
      i = text.index(SPLIT_CHAR, i)
      cursor = i + 1
    else
      yield nil, nil
      return
    end
  end

  # timestamp part
  if @skip_space_count_rfc5424
    i = text.index(SPLIT_CHAR, cursor)
    time_str = text.slice(cursor, i - cursor)
    cursor = i + 1
  else
    i = cursor - 1
    sq = false
    @space_count_rfc5424.times do
      while text[i + 1] == SPLIT_CHAR
        sq = true
        i += 1
      end
      i = text.index(SPLIT_CHAR, i + 1)
    end

    time_str = sq ? text.slice(idx, i - cursor).squeeze(SPLIT_CHAR) : text.slice(cursor, i - cursor)
    cursor = i + 1
  end

  # Repeat same code for the performance

  # host part
  i = text.index(SPLIT_CHAR, cursor)
  unless i
    yield nil, nil
    return
  end
  slice_size = i - cursor
  host = text.slice(cursor, slice_size)
  cursor += slice_size + 1

  # ident part
  i = text.index(SPLIT_CHAR, cursor)
  unless i
    yield nil, nil
    return
  end
  slice_size = i - cursor
  ident = text.slice(cursor, slice_size)
  cursor += slice_size + 1

  # pid part
  i = text.index(SPLIT_CHAR, cursor)
  unless i
    yield nil, nil
    return
  end
  slice_size = i - cursor
  pid = text.slice(cursor, slice_size)
  cursor += slice_size + 1

  # msgid part
  i = text.index(SPLIT_CHAR, cursor)
  unless i
    yield nil, nil
    return
  end
  slice_size = i - cursor
  msgid = text.slice(cursor, slice_size)
  cursor += slice_size + 1

  record = {'host' => host, 'ident' => ident, 'pid' => pid, 'msgid' => msgid}
  record['pri'] = pri if pri

  # extradata part
  ed_start = text[cursor]
  if ed_start == NILVALUE
    record['extradata'] = NILVALUE
    cursor += 1
  else
    start = cursor
    i = text.index('] '.freeze, cursor)
    extradata = if i
                  diff = i + 1 - start # calculate ']' position
                  cursor += diff
                  text.slice(start, diff)
                else  # No message part case
                  cursor = text.bytesize
                  text.slice(start, cursor)
                end
    extradata.tr!("\\".freeze, ''.freeze)
    record['extradata'] = extradata
  end

  # message part
  if cursor != text.bytesize
    msg = text.slice(cursor + 1, text.bytesize)
    msg.chomp!
    record['message'] = msg
  end

  time = begin
           @time_parser_rfc5424.parse(time_str)
         rescue Fluent::TimeParser::TimeParseError
           @time_parser_rfc5424_without_subseconds.parse(time_str)
         end
  record['time'] = time_str if @keep_time_key

  yield time, record
end

#parse_rfc5424_regex(text, &block) ⇒ Object



201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'lib/fluent/plugin/parser_syslog.rb', line 201

def parse_rfc5424_regex(text, &block)
  idx = 0
  record = {}

  if @with_priority
    if (m = RFC5424_PRI_REGEXP.match(text))
      record['pri'] = m['pri'].to_i
      idx = m.end(0)
    else
      yield(nil, nil)
      return
    end
  end

  i = idx - 1
  sq = false
  @space_count_rfc5424.times {
    while text[i + 1] == SPLIT_CHAR
      sq = true
      i += 1
    end

    i = text.index(SPLIT_CHAR, i + 1)
  }

  time_str = sq ? text.slice(idx, i - idx).squeeze(SPLIT_CHAR) : text.slice(idx, i - idx)
  time = @mutex.synchronize do
    begin
      @time_parser_rfc5424.parse(time_str)
    rescue Fluent::TimeParser::TimeParseError => e
      log.trace(e)
      @time_parser_rfc5424_without_subseconds.parse(time_str)
    end
  end

  if @keep_time_key
    record['time'] = time_str
  end
  parse_plain(@regexp5424, time, text, i + 1, record, RFC5424_CAPTURES, &block)
end

#patternsObject

this method is for tests



140
141
142
# File 'lib/fluent/plugin/parser_syslog.rb', line 140

def patterns
  {'format' => @regexp, 'time_format' => @time_format}
end

#setup_time_parser_3164(time_fmt) ⇒ Object



124
125
126
127
128
129
130
# File 'lib/fluent/plugin/parser_syslog.rb', line 124

def setup_time_parser_3164(time_fmt)
  @time_parser_rfc3164 = time_parser_create(format: time_fmt)
  if ['%b %d %H:%M:%S', '%b %d %H:%M:%S.%N'].include?(time_fmt)
    @skip_space_count_rfc3164 = true
  end
  @space_count_rfc3164 = time_fmt.squeeze(' ').count(' ') + 1
end

#setup_time_parser_5424(time_fmt) ⇒ Object



132
133
134
135
136
137
# File 'lib/fluent/plugin/parser_syslog.rb', line 132

def setup_time_parser_5424(time_fmt)
  @time_parser_rfc5424 = time_parser_create(format: time_fmt)
  @time_parser_rfc5424_without_subseconds = time_parser_create(format: "%Y-%m-%dT%H:%M:%S%z")
  @skip_space_count_rfc5424 = time_fmt.count(' ').zero?
  @space_count_rfc5424 = time_fmt.squeeze(' ').count(' ') + 1
end