Class: Bosh::Monitor::Plugins::Resurrector

Inherits:
Base
  • Object
show all
Includes:
HttpRequestHelper
Defined in:
lib/bosh/monitor/plugins/resurrector.rb

Instance Attribute Summary collapse

Attributes inherited from Base

#event_kinds, #logger, #options

Instance Method Summary collapse

Methods included from HttpRequestHelper

#send_http_get_request, #send_http_post_request, #send_http_put_request, #send_http_request

Methods inherited from Base

#validate_options

Constructor Details

#initialize(options = {}) ⇒ Resurrector

Returns a new instance of Resurrector.



11
12
13
14
15
16
17
18
19
20
# File 'lib/bosh/monitor/plugins/resurrector.rb', line 11

def initialize(options={})
  super(options)
  director = @options['director']
  raise ArgumentError 'director options not set' unless director

  @url              = URI(director['endpoint'])
  @director_options = director
  @processor        = Bhm.event_processor
  @alert_tracker    = ResurrectorHelper::AlertTracker.new(@options)
end

Instance Attribute Details

#urlObject (readonly)

Returns the value of attribute url.



9
10
11
# File 'lib/bosh/monitor/plugins/resurrector.rb', line 9

def url
  @url
end

Instance Method Details

#process(alert) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/bosh/monitor/plugins/resurrector.rb', line 31

def process(alert)
  deployment = alert.attributes['deployment']
  job = alert.attributes['job']
  id = alert.attributes['instance_id']

  # only when the agent times out do we add deployment, job & id to the alert
  # attributes, so this won't trigger a recreate for other types of alerts
  if deployment && job && id
    agent_key = ResurrectorHelper::JobInstanceKey.new(deployment, job, id)
    @alert_tracker.record(agent_key, alert.created_at)

    payload = {'jobs' => {job => [id]}}

    unless director_info
      logger.error("(Resurrector) director is not responding with the status")
      return
    end

    request = {
        head: {
            'Content-Type' => 'application/json',
            'authorization' => auth_provider(director_info).auth_header
        },
        body: Yajl::Encoder.encode(payload)
    }

    @url.path = "/deployments/#{deployment}/scan_and_fix"

    if @alert_tracker.melting_down?(deployment)
      # freak out
      ts = Time.now.to_i
      @processor.process(:alert,
                         severity: 1,
                         source: "HM plugin resurrector",
                         title: "We are in meltdown.",
                         created_at: ts)

      logger.error("(Resurrector) we are in meltdown.")
    else
      # queue instead, and only queue if it isn't already in the queue
      # what if we can't keep up with the failure rate?
      # - maybe not, maybe the meltdown detection takes care of the rate issue
      logger.warn("(Resurrector) notifying director to recreate unresponsive VM: #{deployment} #{job}/#{id}")

      send_http_put_request(url.to_s, request)
    end

  else
    logger.warn("(Resurrector) event did not have deployment, job and id: #{alert}")
  end
end

#runObject



22
23
24
25
26
27
28
29
# File 'lib/bosh/monitor/plugins/resurrector.rb', line 22

def run
  unless EM.reactor_running?
    logger.error("Resurrector plugin can only be started when event loop is running")
    return false
  end

  logger.info("Resurrector is running...")
end