Class: Fluent::Plugin::XmlParser

Inherits:
Parser
  • Object
show all
Defined in:
lib/fluent/plugin/parser_xml.rb

Constant Summary collapse

TRUTHY_VALUES =
%w[true yes 1]

Instance Method Summary collapse

Instance Method Details

#configure(config) ⇒ Object



29
30
31
32
33
34
# File 'lib/fluent/plugin/parser_xml.rb', line 29

def configure(config)
  super

  # Create the time parser
  @time_parser = Fluent::TimeParser.new(@time_format)
end

#convert(v, t) ⇒ Object



76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/fluent/plugin/parser_xml.rb', line 76

def convert(v, t)
  case t
  when 'bool'
    return TRUTHY_VALUES.include?(v.to_s.downcase)
  when 'float'
    return v.to_f
  when 'integer'
    return v.to_i
  when 'string'
    return v.to_s
  else
    return v
  end
end

#deep_each_pair(hash, parents = []) ⇒ Object



102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/fluent/plugin/parser_xml.rb', line 102

def deep_each_pair(hash, parents = [])
  hash.each_pair do |k, v|
    if v.is_a?(Hash)
      parents << k

      deep_each_pair(v, parents) { |k, v, parents| yield(k, v, parents) }

      parents.pop
    elsif v.is_a?(Array) && v.size > 2
      attribute = v.last
      elements = v[0..-1]

      elements.each { |e| yield(k, [e, attribute], parents) }
    elsif v.is_a?(Array)
      yield(k, v, parents)
    end
  end
end

#get_field_value(doc, xpath) ⇒ Object



91
92
93
94
95
96
97
98
99
100
# File 'lib/fluent/plugin/parser_xml.rb', line 91

def get_field_value(doc, xpath)
  begin
    elements = doc.xpath(xpath[0])
    throw if elements.nil?

    return elements.first[xpath[1]]
  rescue StandardError
    return
  end
end

#parse(text) ⇒ Object



36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/fluent/plugin/parser_xml.rb', line 36

def parse(text)
  begin
    # Open the XML document
    doc = Nokogiri.XML(text)
    doc.remove_namespaces!

    # Create an empty record which assigns default values for missing
    # keys. See: https://stackoverflow.com/a/3339168.
    record = Hash.new { |h, k| h[k] = {} }

    # Create the time value
    time = @time_parser.parse(get_field_value(doc, @time_xpath))

    # Recursively parse XPath to handle nested structures
    deep_each_pair(@xpath) do |k, xpath, parents|
      # Retrieve the field value from the XPath
      value = get_field_value(doc, xpath)

      # Ignore the field if it has no value
      unless value.nil?
        # Convert the value
        type = @xpath_types.dig(*parents, k) unless @xpath_types.nil?
        value = convert(value, type) unless type.nil?

        # Save the field to the appropriate index (record["a"]["b"]["c"]).
        # See: https://stackoverflow.com/a/14294789.
        parents.inject(record, :[])[k] = value unless value.nil?
      end
    end

    yield(time, record)
  rescue StandardError
    yield(nil, nil)
  end
end