Class: Sensu::Plugins::Prometheus::Metrics

Inherits:
Object
  • Object
show all
Includes:
Checks, Utils::Log
Defined in:
lib/sensu/plugins/prometheus/metrics.rb

Overview

Represents the queryies that will be fired against Prometheus to collect information about monitored resources. Ideally all public methods on this class will be available as a final check.

Constant Summary

Constants included from Checks

Checks::VERSION

Instance Method Summary collapse

Methods included from Checks

#above, #below, #equals, #evaluate

Methods included from Utils::Log

#log, log

Constructor Details

#initialize(prometheus_client) ⇒ Metrics

Returns a new instance of Metrics.



14
15
16
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 14

def initialize(prometheus_client)
  @client = prometheus_client
end

Instance Method Details

#custom(cfg) ⇒ Object

Execute query informed on check’s configuration and makes no modifications on value.



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 20

def custom(cfg)
  metrics = []
  @client.query(cfg['query']).each do |result|
    source = if result['metric'].key? 'app'
               result['metric']['app']
             else
               result['metric']['instance']
             end

    metrics << {
      'source' => source,
      'value' => result['value'][1]
    }
  end
  metrics
end

#disk(cfg) ⇒ Object

Query percentage of mountpoint total disk space size compared with avaiable.



39
40
41
42
43
44
45
46
47
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 39

def disk(cfg)
  mountpoint = "mountpoint=\"#{cfg['mount']}\""
  disk_name = cfg['name'] || nice_disk_name(cfg['mount'])
  query = @client.percent_query_free(
    "node_filesystem_size{#{mountpoint}}",
    "node_filesystem_avail{#{mountpoint}}"
  )
  prepare_metrics("disk_#{disk_name}", @client.query(query))
end

#disk_all(cfg) ⇒ Object

Query percentage of free space on file-systems, ignoring by default ‘tmpfs` or the regexp configured on check.



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 51

def disk_all(cfg)
  results = []
  ignored = cfg['ignore_fs'] || 'tmpfs'
  ignore_fs = "fstype!~\"#{ignored}\""

  queries = [
    {
      'used' => 'node_filesystem_size',
      'free' => 'node_filesystem_avail',
      'name' => 'disk'
    },
    {
      'used' => 'node_filesystem_files',
      'free' => 'node_filesystem_files_free',
      'name' => 'inode'
    }
  ]
  queries.each do |q|
    query = @client.percent_query_free(
      "#{q['used']}{#{ignore_fs}}",
      "#{q['free']}{#{ignore_fs}}"
    )
    @client.query(query).each do |result|
      hostname = result['metric']['instance']
      mountpoint = result['metric']['mountpoint']
      disk_name = nice_disk_name(mountpoint)
      percent = result['value'][1].to_i
      results << { 'output' => "#{q['name'].capitalize}: #{mountpoint}, Usage: #{percent}% |#{q['name']}=#{percent}",
                   'name' => "#{q['name']}_#{disk_name}",
                   'value' => percent,
                   'source' => hostname }
    end
  end
  results
end

#inode(cfg) ⇒ Object

Queyr percentage of free inodes on check’s configured mountpoint.



88
89
90
91
92
93
94
95
96
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 88

def inode(cfg)
  mountpoint = "mountpoint=\"#{cfg['mount']}\""
  disk_name = cfg['name'] || nice_disk_name(cfg['mount'])
  query = @client.percent_query_free(
    "node_filesystem_files{#{mountpoint}}",
    "node_filesystem_files_free{#{mountpoint}}"
  )
  prepare_metrics("inode_#{disk_name}", @client.query(query))
end

#load_per_cluster(cfg) ⇒ Object

Calculates the load of an entire cluster.



180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 180

def load_per_cluster(cfg)
  cluster = cfg['cluster']
  query = format(
    'sum(node_load5{job="%s"})/count(node_cpu{mode="system",job="%s"})',
    cluster,
    cluster
  )

  result = @client.query(query).first
  source = cfg['source']
  value = result['value'][1].to_f.round(2)
  log.debug(
    "[load_per_cluster] value: '#{value}', source: '#{source}'"
  )

  [{ 'source' => source, 'value' => value, 'name' => "#{cluster}_load" }]
end

#load_per_cluster_minus_n(cfg) ⇒ Object

Returns a single metric entry, with the sum of the total load on cluster divided by the total amount of CPUs.



200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 200

def load_per_cluster_minus_n(cfg)
  cluster = cfg['cluster']
  minus_n = cfg['minus_n']
  sum_load = "sum(node_load5{job=\"#{cluster}\"})"
  total_cpus = "count(node_cpu{mode=\"system\",job=\"#{cluster}\"})"
  total_nodes = "count(node_load5{job=\"#{cluster}\"})"

  query = format(
    '%s/(%s-(%s/%s)*%d)',
    sum_load, total_cpus, total_cpus, total_nodes, minus_n
  )
  result = @client.query(query).first
  value = result['value'][1].to_f.round(2)
  source = cfg['source']
  log.debug(
    "[load_per_cluster_minus_n] value: '#{value}', source: '#{source}'"
  )

  [{ 'source' => source, 'value' => value, 'name' => "#{cluster}_load_minus_n" }]
end

#load_per_cpu(_) ⇒ Object

Current load per CPU.



222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 222

def load_per_cpu(_)
  cpu_per_source = {}
  @client.query(
    '(count(node_cpu{mode="system"})by(instance))'
  ).each do |result|
    source = result['metric']['instance']
    cpu_per_source[source] = result['value'][1]
  end

  metrics = []
  @client.query('node_load5').each do |result|
    source = result['metric']['instance']
    value = result['value'][1].to_f.round(2)
    load_on_cpu = value / cpu_per_source[source].to_f
    log.debug(
      "[load_per_cpu] value: '#{load_on_cpu}', source: '#{source}'"
    )
    metrics << {
      'source' => source,
      'value' => load_on_cpu,
      'name' => 'load'
    }
  end
  metrics
end

#memory(_) ⇒ Object

Query the percentage free memory.



153
154
155
156
157
158
159
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 153

def memory(_)
  query = @client.percent_query_free(
    'node_memory_MemTotal',
    'node_memory_MemAvailable'
  )
  prepare_metrics('memory', @client.query(query))
end

#memory_per_cluster(cfg) ⇒ Object

Percentage free memory cluster wide.



162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 162

def memory_per_cluster(cfg)
  cluster = cfg['cluster']
  query = @client.percent_query_free(
    "sum(node_memory_MemTotal{job=\"#{cluster}\"})",
    "sum(node_memory_MemAvailable{job=\"#{cluster}\"})"
  )

  metrics = []
  source = cfg['source']
  @client.query(query).each do |result|
    value = result['value'][1].to_f.round(2)
    log.debug("[memory_per_cluster] value: '#{value}', source: '#{source}'")
    metrics << { 'source' => source, 'value' => value, 'name' => "#{cluster}_memory" }
  end
  metrics
end

#predict_disk_all(cfg) ⇒ Object

Compose query to predict disk usage on the last day.



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 99

def predict_disk_all(cfg)
  disks = []
  days = cfg['days'].to_i
  days_in_seconds = days.to_i * 86_400
  filter = cfg['filter'] || {}
  range_vector = cfg['sample_size'] || '24h'
  exit_code = cfg['exit_code'] || 1
  query = format(
    'predict_linear(node_filesystem_avail%s[%s], %i) < 0',
    filter,
    range_vector,
    days_in_seconds
  )
  @client.query(query).each do |result|
    hostname = result['metric']['instance']
    disk = result['metric']['mountpoint']
    disks << "#{hostname}:#{disk}"
  end

  if disks.empty?
    [{ 'status' => 0,
       'output' => "No disks are predicted to run out of space in the next #{days} days",
       'name' => 'predict_disk_all',
       'source' => cfg['source'] }]
  else
    disks = disks.join(',')
    [{ 'status' => exit_code.to_i,
       'output' => "Disks predicted to run out of space in the next #{days} days: #{disks}",
       'name' => 'predict_disk_all',
       'source' => cfg['source'] }]
  end
end

#service(cfg) ⇒ Object

Service metrics will contain it’s “state” as “value”.



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/sensu/plugins/prometheus/metrics.rb', line 133

def service(cfg)
  results = []
  defaults = {
    'state' => 'active',
    'state_required' => 1
  }
  cfg = defaults.merge(cfg)
  query = format(
    "node_systemd_unit_state{name='%s',state='%s'}",
    cfg['name'], cfg['state']
  )
  prepare_metrics("service_#{cfg['name']}", @client.query(query)).each do |metric|
    metric['status'] = equals(metric['value'], cfg['state_required'])
    metric['output'] = "Service: #{cfg['name']} (#{cfg['state']}=#{metric['value']})"
    results << metric
  end
  results
end