Class: Fluent::Plugin::CSVParser

Inherits:
Parser show all
Defined in:
lib/fluent/plugin/parser_csv.rb

Direct Known Subclasses

Compat::TextParser::CSVParser

Constant Summary

Constants inherited from Parser

Parser::AVAILABLE_PARSER_VALUE_TYPES, Parser::PARSER_TYPES, Parser::TRUTHY_VALUES

Constants included from Configurable

Configurable::CONFIG_TYPE_REGISTRY

Instance Attribute Summary

Attributes inherited from Parser

#type_converters

Attributes inherited from Base

#under_plugin_development

Instance Method Summary collapse

Methods inherited from Parser

#build_type_converters, #call, #convert_values, #implement?, #initialize, #parse_io, #parse_partial_data, #parse_time, #parse_with_timeout, #parser_type, #start, #stop, #string_like_null

Methods included from TimeMixin::Parser

included, #time_parser_create

Methods included from OwnedByMixin

#log, #owner, #owner=

Methods inherited from Base

#acquire_worker_lock, #after_shutdown, #after_shutdown?, #after_start, #after_started?, #before_shutdown, #before_shutdown?, #called_in_test?, #close, #closed?, #configured?, #context_router, #context_router=, #fluentd_worker_id, #get_lock_path, #has_router?, #initialize, #inspect, #multi_workers_ready?, #plugin_root_dir, #reloadable_plugin?, #shutdown, #shutdown?, #start, #started?, #stop, #stopped?, #string_safe_encoding, #terminate, #terminated?

Methods included from SystemConfig::Mixin

#system_config, #system_config_override

Methods included from Configurable

#config, #configure_proxy_generate, #configured_section_create, included, #initialize, lookup_type, register_type

Constructor Details

This class inherits a constructor from Fluent::Plugin::Parser

Instance Method Details

#configure(conf) ⇒ Object



33
34
35
36
37
38
39
40
41
42
43
44
45
46
# File 'lib/fluent/plugin/parser_csv.rb', line 33

def configure(conf)
  super


  if @parser_engine == :fast
    @quote_char = '"'
    @escape_pattern = Regexp.compile(@quote_char * 2)

    m = method(:parse_fast)
    self.singleton_class.module_eval do
      define_method(:parse, m)
    end
  end
end

#parse(text) {|time, record| ... } ⇒ Object

Yields:

  • (time, record)


48
49
50
51
52
53
# File 'lib/fluent/plugin/parser_csv.rb', line 48

def parse(text, &block)
  values = CSV.parse_line(text, col_sep: @delimiter)
  r = Hash[@keys.zip(values)]
  time, record = convert_values(parse_time(r), r)
  yield time, record
end

#parse_fast(text) {|time, record| ... } ⇒ Object

Yields:

  • (time, record)


55
56
57
58
59
# File 'lib/fluent/plugin/parser_csv.rb', line 55

def parse_fast(text, &block)
  r = parse_fast_internal(text)
  time, record = convert_values(parse_time(r), r)
  yield time, record
end

#parse_fast_internal(text) ⇒ Object

CSV.parse_line is too slow due to initialize lots of object and CSV module doesn’t provide the efficient method for parsing single line. This method avoids the overhead of CSV.parse_line for typical patterns



64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/fluent/plugin/parser_csv.rb', line 64

def parse_fast_internal(text)
  record = {}
  text.chomp!

  return record if text.empty?

  # use while because while is now faster than each_with_index
  columns = text.split(@delimiter, -1)
  num_columns = columns.size
  i = 0
  j = 0
  while j < num_columns
    column = columns[j]

    case column.count(@quote_char)
    when 0
      if column.empty?
        column = nil
      end
    when 1
      if column.start_with?(@quote_char)
        to_merge = [column]
        j += 1
        while j < num_columns
          merged_col = columns[j]
          to_merge << merged_col
          break if merged_col.end_with?(@quote_char)
          j += 1
        end
        column = to_merge.join(@delimiter)[1..-2]
      end
    when 2
      if column.start_with?(@quote_char) && column.end_with?(@quote_char)
        column = column[1..-2]
      end
    else
      if column.start_with?(@quote_char) && column.end_with?(@quote_char)
        column = column[1..-2]
      end
      column.gsub!(@escape_pattern, @quote_char)
    end

    record[@keys[i]] = column
    j += 1
    i += 1
  end
  record
end