-
Notifications
You must be signed in to change notification settings - Fork 24
/
icinga.go
215 lines (187 loc) · 6.75 KB
/
icinga.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
/*
* Authors:
* Simon Gerber <[email protected]>
*
* License:
* Copyright (c) 2019, VSHN AG, <[email protected]>
* Licensed under "BSD 3-Clause". See LICENSE file.
*/
package webhook
import (
"crypto/sha256"
"fmt"
"regexp"
"sort"
"strings"
"time"
"github.com/prometheus/alertmanager/template"
"github.com/vshn/go-icinga2-client/icinga2"
"github.com/vshn/signalilo/config"
)
// validateServiceName checks that computed service name matches constraints
// given by the Icinga configuration
func validateServiceName(serviceName string) bool {
re := regexp.MustCompile(`^[-+_.:,a-zA-Z0-9]{1,128}$`)
return re.MatchString(serviceName)
}
// mapToStableString converts a map of alert labels to a string
// representation which is stable if the same map of alert labels is provided
// to subsequent calls of mapToStableString.
func mapToStableString(data map[string]string) string {
var keys []string
for k := range data {
if k != "severity" {
keys = append(keys, k)
}
}
sort.Strings(keys)
var sb strings.Builder
for _, k := range keys {
sb.WriteString(fmt.Sprintf("%v:%v ", k, data[k]))
}
return sb.String()
}
// computeServiceName computes the internal service name used for Icinga2
func computeServiceName(
data template.Data,
alert template.Alert,
c config.Configuration) (string, error) {
l := c.GetLogger()
hash := sha256.New()
// use bridge uuid to ensure we can't accidentally touch another
// instance's services
_, _ = hash.Write([]byte(c.GetConfig().UUID))
_, _ = hash.Write([]byte(mapToStableString(alert.Labels)))
// 8 bytes gives us 16 characters
labelhash := fmt.Sprintf("%x", hash.Sum(nil)[:8])
serviceName := alert.Labels["alertname"]
if serviceName == "" {
l.V(2).Infof("alert doesn't have label 'alertname', just using %v as service name", labelhash)
}
serviceName = fmt.Sprintf("%v_%v", serviceName, labelhash)
if validateServiceName(serviceName) {
return serviceName, nil
}
return "", fmt.Errorf("Service name '%v' doesn't match icinga2 constraints", serviceName)
}
// computeDisplayName computes a "human-readable" display name for Icinga2
func computeDisplayName(data template.Data, alert template.Alert) (string, error) {
return alert.Labels["alertname"], nil
}
// severityToExitStatus computes an exitstatus which Icinga2 understands from
// an alert's status and severity label
func severityToExitStatus(status string, severity string, severityLevels map[string]int) int {
// default to "UNKNOWN"
exitstatus := 3
if status == "firing" {
var ok bool
exitstatus, ok = severityLevels[strings.ToLower(severity)]
if !ok {
exitstatus = 3
}
} else if status == "resolved" {
// mark exit status as NORMAL when alert state is "resolved"
exitstatus = 0
}
return exitstatus
}
func createServiceData(hostname string,
serviceName string,
displayName string,
alert template.Alert,
status int,
heartbeatInterval time.Duration,
c config.Configuration) icinga2.Service {
l := c.GetLogger()
config := c.GetConfig()
// build Vars map
serviceVars := make(icinga2.Vars)
// Set defaults
serviceVars["bridge_uuid"] = config.UUID
serviceVars["keep_for"] = config.KeepFor
serviceVars = mapIcingaVariables(serviceVars, alert.Labels, "label_", l)
serviceVars = mapIcingaVariables(serviceVars, alert.Annotations, "annotation_", l)
serviceVars = addStaticIcingaVariables(serviceVars, config.StaticServiceVars, l)
// Create service attrs object
serviceData := icinga2.Service{
Name: serviceName,
DisplayName: displayName,
HostName: hostname,
CheckCommand: config.CheckCommand,
EnableActiveChecks: config.ActiveChecks,
Notes: alert.Annotations["description"],
Vars: serviceVars,
ActionURL: alert.GeneratorURL,
NotesURL: alert.Annotations["runbook_url"],
CheckInterval: config.ChecksInterval.Seconds(),
RetryInterval: config.ChecksInterval.Seconds(),
// We don't usually need soft states in Icinga, since the grace
// periods are already managed by Prometheus/Alertmanager and relevant
// config parameter defaults to 1, but is still tunable for other usecases
MaxCheckAttempts: float64(config.MaxCheckAttempts),
Templates: config.IcingaConfig.Templates,
}
// Check if this is a heartbeat service. Adjust serviceData
// accordingly
if heartbeatInterval.Seconds() > 0.0 {
l.Infof("Creating alert as heartbeat with check interval %v", heartbeatInterval)
// Set dummy text to message annotation on alert
serviceData.Vars["dummy_text"] = alert.Annotations["message"]
// Set exitStatus for missed heartbeat to Alert's severity
serviceData.Vars["dummy_state"] = status
// add 10% onto requested check interval to allow some network
// latency for the check results
serviceData.CheckInterval = heartbeatInterval.Seconds() * 1.1
serviceData.RetryInterval = heartbeatInterval.Seconds() * 1.1
// Enable active checks for heartbeat check
serviceData.EnableActiveChecks = true
}
return serviceData
}
// updateOrCreateService updates or creates an Icinga2 service object from the
// alert passed to the method
func updateOrCreateService(icinga icinga2.Client,
hostname string,
serviceName string,
displayName string,
alert template.Alert,
c config.Configuration) (icinga2.Service, error) {
l := c.GetLogger()
// Check if this alert is a heartbeat alert and extract interval if so
heartbeatInterval := time.Duration(0)
if val, ok := alert.Labels["heartbeat"]; ok {
if alert.Status == "resolved" {
l.Infof("Not processing resolved heartbeat for %v", serviceName)
return icinga2.Service{}, nil
}
interval, err := time.ParseDuration(val)
if err != nil {
return icinga2.Service{}, fmt.Errorf("Unable to parse heartbeat interval: %v", err)
}
heartbeatInterval = interval
}
status := severityToExitStatus(alert.Status, alert.Labels["severity"], c.GetConfig().MergedSeverityLevels)
serviceData := createServiceData(hostname, serviceName, displayName, alert, status, heartbeatInterval, c)
icingaSvc, err := icinga.GetService(serviceData.FullName())
// update or create service, depending on whether object exists
if err == nil {
l.Infof("updating service: %+v\n", icingaSvc.Name)
// Templates needs to be ignored if the service is already created due to the Error:
// Attribute 'templates' could not be set: Error: Attribute cannot be modified.
serviceData.Templates = nil
err := icinga.UpdateService(serviceData)
if err != nil {
return serviceData, err
}
} else if status > 0 {
l.Infof("creating service: %+v with templates: %v\n", serviceName, serviceData.Templates)
err := icinga.CreateService(serviceData)
if err != nil {
return serviceData, err
}
} else {
l.Infof("Not creating service %v; status = %v", serviceName, status)
return icinga2.Service{}, nil
}
return serviceData, nil
}