22import click
33
44from tabulate import tabulate
5+ from alertaclient .models .heartbeat import MAX_LATENCY
56
67
78@click .command ('heartbeats' , short_help = 'List heartbeats' )
9+ @click .option ('--alert' , is_flag = True , help = 'Send alerts on stale or slow heartbeats' )
10+ @click .option ('--severity' , '-s' , metavar = 'SEVERITY' , default = 'major' , help = 'Set the severity for stale heartbeat alerts' )
811@click .option ('--purge' , is_flag = True , help = 'Delete stale heartbeats' )
912@click .pass_obj
10- def cli (obj , purge ):
13+ def cli (obj , alert , severity , purge ):
1114 """List heartbeats."""
1215 client = obj ['client' ]
1316 timezone = obj ['timezone' ]
@@ -18,8 +21,59 @@ def cli(obj, purge):
1821 heartbeats = client .get_heartbeats ()
1922 click .echo (tabulate ([h .tabular (timezone ) for h in heartbeats ], headers = headers , tablefmt = obj ['output' ]))
2023
21- expired = [hb for hb in heartbeats if hb .status == 'expired ' ]
24+ not_ok = [hb for hb in heartbeats if hb .status != 'ok ' ]
2225 if purge :
23- with click .progressbar (expired , label = 'Purging {} heartbeats' .format (len (expired ))) as bar :
26+ with click .progressbar (not_ok , label = 'Purging {} heartbeats' .format (len (not_ok ))) as bar :
2427 for b in bar :
2528 client .delete_heartbeat (b .id )
29+
30+ elif alert :
31+ with click .progressbar (heartbeats , label = 'Alerting {} heartbeats' .format (len (heartbeats ))) as bar :
32+ for b in bar :
33+ params = dict (filter (lambda a : len (a ) == 2 , map (lambda a : a .split (':' ), b .tags )))
34+ environment = params .get ('environment' , 'Production' )
35+ group = params .get ('group' , 'System' )
36+ tags = list (filter (lambda a : not a .startswith ('environment:' ) and not a .startswith ('group:' ), b .tags ))
37+
38+ if b .status == 'expired' : # aka. "stale"
39+ client .send_alert (
40+ resource = b .origin ,
41+ event = 'HeartbeatFail' ,
42+ correlate = ['HeartbeatFail' , 'HeartbeatSlow' , 'HeartbeatOK' ],
43+ group = group ,
44+ environment = environment ,
45+ service = ['Alerta' ],
46+ severity = severity ,
47+ value = '{}' .format (b .since ),
48+ text = 'Heartbeat not received in {} seconds' .format (b .timeout ),
49+ tags = tags ,
50+ type = 'heartbeatAlert'
51+ )
52+ elif b .status == 'slow' :
53+ client .send_alert (
54+ resource = b .origin ,
55+ event = 'HeartbeatSlow' ,
56+ correlate = ['HeartbeatFail' , 'HeartbeatSlow' , 'HeartbeatOK' ],
57+ group = group ,
58+ environment = environment ,
59+ service = ['Alerta' ],
60+ severity = severity ,
61+ value = '{}ms' .format (b .latency ),
62+ text = 'Heartbeat took more than {}ms to be processed' .format (MAX_LATENCY ),
63+ tags = tags ,
64+ type = 'heartbeatAlert'
65+ )
66+ else :
67+ client .send_alert (
68+ resource = b .origin ,
69+ event = 'HeartbeatOK' ,
70+ correlate = ['HeartbeatFail' , 'HeartbeatSlow' , 'HeartbeatOK' ],
71+ group = group ,
72+ environment = environment ,
73+ service = ['Alerta' ],
74+ severity = 'normal' ,
75+ value = '' ,
76+ text = 'Heartbeat OK' ,
77+ tags = tags ,
78+ type = 'heartbeatAlert'
79+ )
0 commit comments