Class: AlertAgent

Inherits:
Object
  • Object
show all
Defined in:
lib/alert_agent.rb

Constant Summary collapse

QUEUE_NAME =
'failed-cloud-monitors'
FAIL =
'fail'.freeze
OK =
'ok'.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(mq) ⇒ AlertAgent

Returns a new instance of AlertAgent.



20
21
22
23
24
25
26
27
28
29
30
# File 'lib/alert_agent.rb', line 20

def initialize(mq)
  @redis = Redis.new
  @mq = mq
  mq.queue(QUEUE_NAME).subscribe(:ack => true) do |headers, message|
    if AMQP.closing?
      Utils::LOG.info "(ignoring message, will be redelivered later)"
    else
      process_alerts(message, headers)
    end
  end
end

Instance Attribute Details

#mqObject

Returns the value of attribute mq.



16
17
18
# File 'lib/alert_agent.rb', line 16

def mq
  @mq
end

#redisObject

Returns the value of attribute redis.



16
17
18
# File 'lib/alert_agent.rb', line 16

def redis
  @redis
end

Instance Method Details

#format_alert(server) ⇒ Object



80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/alert_agent.rb', line 80

def format_alert(server)
 result = {}
 result['message'] = 'alert'
 result['data'] = {}
 result['data']['Time'] = server['timestamp'].to_s
 result['data']['Severity'] = server['result'].any? {|_, status| status == 'down' } ? 'FAILURE' : 'OKAY'
 result['data']['Type'] = result['data']['Plugin'] = "services"
 result['data']['Host'] = server['host']
 result['data']['alert_message'] = server['result'].sort.map do |service, status|
   "#{service}: #{status}"
 end.join(", ")
 result
end

#get_state(server) ⇒ Object



71
72
73
74
75
76
77
78
# File 'lib/alert_agent.rb', line 71

def get_state(server)
 tries = 0
 @redis[server['host']]
rescue RuntimeError
 @redis = Redis.new
 tries += 1
 retry if tries < 10
end

#process_alert(server) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/alert_agent.rb', line 110

def process_alert(server)
 env = nil
 if server['api'] == 'https://cloud.engineyard.com'
   env = 'production'
 else
   env = 'staging'
 end
 
 report_to_irc(server, env)
 report_to_awsm(server)
 Utils::LOG.info '*' * 50
 Utils::LOG.info server['timestamp']
 Utils::LOG.info server['host']
 Utils::LOG.info server['result']
 Utils::LOG.info server['token']
end

#process_alerts(message, headers) ⇒ Object



32
33
34
35
36
37
38
# File 'lib/alert_agent.rb', line 32

def process_alerts(message, headers)
 alert = JSON.parse(message)
 alert.servers.each do |server|
   process_server(server.merge('env_id' => alert.env_id, 'api' => alert.api))
 end
 headers.ack
end

#process_server(server) ⇒ Object

=> host, “result” => result,

"timestamp" => Time.now.to_i,
'token' => token


43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/alert_agent.rb', line 43

def process_server(server)
 errors = []
 server['result'].each do |service, status|
   errors << service if status == 'down'
 end
 state = get_state(server)
 if errors.empty?
   if state != OK
     set_state(server, OK)
     process_alert(server)
   end
 else
   if state != FAIL
     set_state(server, FAIL)
     process_alert(server)
   end
 end
end

#report_to_awsm(server) ⇒ Object



102
103
104
105
106
107
108
# File 'lib/alert_agent.rb', line 102

def report_to_awsm(server)
  rest = RestClient::Resource.new(server['api'])
  rest["/reporting/#{server['token']}"].post(format_alert(server), {"Accept" => "application/json"})
  Utils::LOG.info "http report to awsm successful"
rescue RestClient::RequestFailed => e
  Utils::LOG.error "http report failed!: #{msg}:#{err}"
end

#report_to_irc(server, env) ⇒ Object



94
95
96
97
98
99
100
# File 'lib/alert_agent.rb', line 94

def report_to_irc(server, env)
 s = ::TCPSocket.new('localhost', 5678)
 s.write "[#{env}] #{server['host']} [#{server['role']}:#{server['env_id']}]\n#{server['result'].inspect}\n"
 s.close
rescue => e
 Utils::LOG.error e.message
end

#set_state(server, state) ⇒ Object



62
63
64
65
66
67
68
69
# File 'lib/alert_agent.rb', line 62

def set_state(server, state)
 tries = 0
 @redis[server['host']] = state
rescue RuntimeError
 @redis = Redis.new
 tries += 1
 retry if tries < 10
end