-
Notifications
You must be signed in to change notification settings - Fork 27
/
events.go
322 lines (302 loc) · 15.8 KB
/
events.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2024 Datadog, Inc.
package v1beta1
import (
corev1 "k8s.io/api/core/v1"
)
const (
EventOnTargetTemplate string = "Failing probably caused by disruption %s: "
SourceDisruptionComponent string = "disruption-controller"
SourceDisruptionCronComponent string = "disruptioncron-controller"
)
type EventCategory string
const (
// TargetEvent only attached to a target
TargetEvent EventCategory = "TargetEvent"
// DisruptionEvent only attached to the disruption
DisruptionEvent EventCategory = "DisruptionEvent"
// DisruptionCronEvent only attached to the disruption cron
DisruptionCronEvent EventCategory = "DisruptionCronEvent"
// ChaosPodEvent only attached to a chaos pod
ChaosPodEvent EventCategory = "ChaosPodEvent"
)
// EventReason is the string that identify an event
type EventReason string
// MatchEventReason check if provided Kubernetes event match actual reason
func (r EventReason) MatchEventReason(e corev1.Event) bool {
return string(r) == e.Reason
}
type Event struct {
Type string // Warning or Normal
Reason EventReason // Short description of the event
OnTargetTemplateMessage string // Template message to attach to the target resource (pod or node). Empty if the event should not be sent to a target (DisruptionEvent only)
OnDisruptionTemplateMessage string // We want to separate the aggregated message from the single message to include more info in the single message
OnDisruptionTemplateAggMessage string // Template message to attach to the disruption. Empty if the event should not be sent on a disruption
Category EventCategory // Either TargetEvent, Event or ChaosPodEvent
}
// Complete list of events sent out by the controller
const (
// Targeted pods related
// Warning events
EventTargetPodWarningState EventReason = "TargetPodInWarningState"
EventTargetContainerWarningState EventReason = "TargetPodContainersInWarningState"
EventTargetLivenessProbeChange EventReason = "TargetPodLivenessProbe"
EventTargetTooManyRestarts EventReason = "TargetPodTooManyRestarts"
EventTargetReadinessProbeChangeBeforeDisruption EventReason = "TargetReadinessProbeChangeBeforeDisruption"
// Normal events
EventTargetPodRecoveredState EventReason = "RecoveredWarningStateInTargetPod"
EventTargetReadinessProbeChangeDuringDisruption EventReason = "ReadinessProbeChangeDuringDisruption"
// Targeted nodes related
// Warning events
EventTargetNodeMemPressureState EventReason = "TargetNodeUnderMemoryPressure"
EventTargetNodeDiskPressureState EventReason = "TargetNodeUnderDiskPressure"
EventTargetNodeUnavailableNetworkState EventReason = "TargetNodeUnavailableNetwork"
EventTargetNodeWarningState EventReason = "TargetNodeInWarningState"
// Normal events
EventTargetNodeRecoveredState EventReason = "RecoveredWarningStateInTargetNode"
// Disruption related events
// Warning events
EventEmptyDisruption EventReason = "EmptyDisruption"
EventDisruptionCreationFailed EventReason = "CreateFailed"
EventDisruptionStuckOnRemoval EventReason = "StuckOnRemoval"
EventInvalidDisruptionLabelSelector EventReason = "InvalidLabelSelector"
EventDisruptionNoMoreValidTargets EventReason = "NoMoreTargets"
EventDisruptionNoTargetsFound EventReason = "NoTargetsFound"
EventInvalidSpecDisruption EventReason = "InvalidSpec"
// Normal events
EventDisruptionChaosPodCreated EventReason = "ChaosPodCreated"
EventDisruptionFinished EventReason = "Finished"
EventDisruptionCreated EventReason = "Created"
EventDisruptionDurationOver EventReason = "DurationOver"
EventDisruptionGCOver EventReason = "GCOver"
EventDisrupted EventReason = "Disrupted"
// DisruptionCron related events
EventDisruptionCronCreated EventReason = "DisruptionCronCreated"
EventDisruptionCronUpdated EventReason = "DisruptionCronUpdated"
EventDisruptionCronDeleted EventReason = "DisruptionCronDeleted"
// Injection related events
// Warning events
EventChaosPodFailedState EventReason = "ChaosPodWarningState"
)
var Events = map[EventReason]Event{
EventTargetPodWarningState: {
Type: corev1.EventTypeWarning,
Reason: EventTargetPodWarningState,
OnDisruptionTemplateMessage: "Targeted pod %s is failing",
OnDisruptionTemplateAggMessage: "Targeted pod(s) are failing",
OnTargetTemplateMessage: EventOnTargetTemplate + "pod is failing",
Category: TargetEvent,
},
EventTargetContainerWarningState: {
Type: corev1.EventTypeWarning,
Reason: EventTargetContainerWarningState,
OnDisruptionTemplateMessage: "Container on targeted pod %s is failing",
OnDisruptionTemplateAggMessage: "Containers on targeted pod(s) are failing",
OnTargetTemplateMessage: EventOnTargetTemplate + "containers on pod are failing",
Category: TargetEvent,
},
EventTargetLivenessProbeChange: {
Type: corev1.EventTypeWarning,
Reason: EventTargetLivenessProbeChange,
OnDisruptionTemplateMessage: "Liveness probe on targeted pod %s are failing",
OnDisruptionTemplateAggMessage: "Liveness probe(s) on targeted pod(s) are failing",
OnTargetTemplateMessage: EventOnTargetTemplate + "liveness probes on pod are failing",
Category: TargetEvent,
}, EventTargetReadinessProbeChangeDuringDisruption: {
Type: corev1.EventTypeNormal,
Reason: EventTargetReadinessProbeChangeDuringDisruption,
OnDisruptionTemplateMessage: "Readiness probe on targeted pod %s is failing",
OnDisruptionTemplateAggMessage: "Readiness probes on targeted pod(s) are failing",
OnTargetTemplateMessage: EventOnTargetTemplate + "readiness probes on pod are failing",
Category: TargetEvent,
}, EventTargetReadinessProbeChangeBeforeDisruption: {
Type: corev1.EventTypeWarning,
Reason: EventTargetReadinessProbeChangeBeforeDisruption,
OnDisruptionTemplateMessage: "Readiness probe on targeted pod %s is failing",
OnDisruptionTemplateAggMessage: "Readiness probes on targeted pod(s) are failing",
OnTargetTemplateMessage: EventOnTargetTemplate + "readiness probes on pod are failing",
Category: TargetEvent,
},
EventTargetTooManyRestarts: {
Type: corev1.EventTypeWarning,
Reason: EventTargetTooManyRestarts,
OnDisruptionTemplateMessage: "Targeted pod %s has restarted too many times",
OnDisruptionTemplateAggMessage: "Targeted pod(s) have restarted too many times",
OnTargetTemplateMessage: EventOnTargetTemplate + "pod has restarted too many times",
Category: TargetEvent,
},
EventTargetPodRecoveredState: {
Type: corev1.EventTypeNormal,
Reason: EventTargetPodRecoveredState,
OnDisruptionTemplateMessage: "Targeted pod %s seems to have recovered",
OnDisruptionTemplateAggMessage: "Targeted pod(s) seem to have recovered",
OnTargetTemplateMessage: "pod seems to have recovered from the disruption %s failure",
Category: TargetEvent,
},
EventTargetNodeMemPressureState: {
Type: corev1.EventTypeWarning,
Reason: EventTargetNodeMemPressureState,
OnDisruptionTemplateMessage: "Targeted node %s is under memory pressure",
OnDisruptionTemplateAggMessage: "Targeted node(s) are under memory pressure",
OnTargetTemplateMessage: EventOnTargetTemplate + "node is under memory pressure",
Category: TargetEvent,
},
EventTargetNodeDiskPressureState: {
Type: corev1.EventTypeWarning,
Reason: EventTargetNodeDiskPressureState,
OnDisruptionTemplateMessage: "Targeted node %s is under disk pressure",
OnDisruptionTemplateAggMessage: "Targeted node(s) are under disk pressure",
OnTargetTemplateMessage: EventOnTargetTemplate + "node is under disk pressure",
Category: TargetEvent,
},
EventTargetNodeUnavailableNetworkState: {
Type: corev1.EventTypeWarning,
Reason: EventTargetNodeUnavailableNetworkState,
OnDisruptionTemplateMessage: "Targeted node %s network is unavailable",
OnDisruptionTemplateAggMessage: "Targeted node(s) network are unavailable",
OnTargetTemplateMessage: EventOnTargetTemplate + "node network is unavaialble",
Category: TargetEvent,
},
EventTargetNodeWarningState: {
Type: corev1.EventTypeWarning,
Reason: EventTargetNodeWarningState,
OnDisruptionTemplateMessage: "Targeted node %s is not ready",
OnDisruptionTemplateAggMessage: "Targeted node(s) are not ready",
OnTargetTemplateMessage: EventOnTargetTemplate + "node is not ready",
Category: TargetEvent,
},
EventTargetNodeRecoveredState: {
Type: corev1.EventTypeNormal,
Reason: EventTargetNodeRecoveredState,
OnDisruptionTemplateMessage: "Targeted node %s seems to have recovered",
OnDisruptionTemplateAggMessage: "Targeted node(s) seem to have recovered",
OnTargetTemplateMessage: "Node seems to have recovered from the disruption %s failure",
Category: TargetEvent,
},
EventDisruptionDurationOver: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionDurationOver,
OnDisruptionTemplateMessage: "The disruption has lived longer than its specified duration, and will be deleted in %s.",
Category: DisruptionEvent,
},
EventDisruptionGCOver: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionGCOver,
OnDisruptionTemplateMessage: "The disruption has lived %s longer than its specified duration, and will now be deleted.",
Category: DisruptionEvent,
},
EventEmptyDisruption: {
Type: corev1.EventTypeWarning,
Reason: EventEmptyDisruption,
OnDisruptionTemplateMessage: "No disruption recognized for \"%s\" therefore no disruption applied.",
Category: DisruptionEvent,
},
EventDisruptionCreationFailed: {
Type: corev1.EventTypeWarning,
Reason: EventDisruptionCreationFailed,
OnDisruptionTemplateMessage: "Injection pod for disruption \"%s\" failed to be created",
Category: DisruptionEvent,
},
EventDisruptionStuckOnRemoval: {
Type: corev1.EventTypeWarning,
Reason: EventDisruptionStuckOnRemoval,
OnDisruptionTemplateMessage: "Instance is stuck on removal because of chaos pods not being able to terminate correctly, please check pods logs before manually removing their finalizer. https://github.com/DataDog/chaos-controller/blob/main/docs/faq.md",
Category: DisruptionEvent,
},
EventInvalidDisruptionLabelSelector: {
Type: corev1.EventTypeWarning,
Reason: EventInvalidDisruptionLabelSelector,
OnDisruptionTemplateMessage: "%s. No targets will be selected.",
Category: DisruptionEvent,
},
EventDisruptionNoMoreValidTargets: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionNoMoreValidTargets,
OnDisruptionTemplateMessage: "No more targets found for injection for this disruption (either ignored or already targeted by another disruption)",
Category: DisruptionEvent,
},
EventDisruptionNoTargetsFound: {
Type: corev1.EventTypeWarning,
Reason: EventDisruptionNoTargetsFound,
OnDisruptionTemplateMessage: "The given label selector did not return any targets. Please ensure that both the selector and the count are correct (should be either a percentage or an integer greater than 0).",
Category: DisruptionEvent,
},
EventInvalidSpecDisruption: {
Type: corev1.EventTypeWarning,
Reason: EventInvalidSpecDisruption,
OnDisruptionTemplateMessage: "%s",
Category: DisruptionEvent,
},
EventDisruptionChaosPodCreated: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionChaosPodCreated,
OnDisruptionTemplateMessage: "Created disruption injection pod for \"%s\"",
Category: DisruptionEvent,
},
EventDisruptionCreated: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionCreated,
OnDisruptionTemplateMessage: "Disruption created",
Category: DisruptionEvent,
},
EventDisruptionCronCreated: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionCronCreated,
OnDisruptionTemplateMessage: "DisruptionCron created",
Category: DisruptionCronEvent,
},
EventDisruptionCronUpdated: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionCronUpdated,
OnDisruptionTemplateMessage: "DisruptionCron updated",
Category: DisruptionCronEvent,
},
EventDisruptionCronDeleted: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionCronDeleted,
OnDisruptionTemplateMessage: "DisruptionCron deleted",
Category: DisruptionCronEvent,
},
EventDisruptionFinished: {
Type: corev1.EventTypeNormal,
Reason: EventDisruptionFinished,
OnDisruptionTemplateMessage: "DisruptionEvent finished",
Category: DisruptionEvent,
},
EventDisrupted: {
Type: corev1.EventTypeNormal,
Reason: EventDisrupted,
OnTargetTemplateMessage: "Pod %s from disruption %s targeted this resource for injection",
Category: DisruptionEvent,
},
EventChaosPodFailedState: {
Type: corev1.EventTypeWarning,
Reason: EventChaosPodFailedState,
OnDisruptionTemplateMessage: "Chaos pod %s is not ready. Phase: '%s'. Reason: '%s'",
OnDisruptionTemplateAggMessage: "Chaos pod(s) are not ready",
Category: ChaosPodEvent,
},
}
// IsNotifiableEvent this event can be broadcasted to our notifiers
func IsNotifiableEvent(event corev1.Event) bool {
return event.Source.Component == SourceDisruptionComponent || event.Source.Component == SourceDisruptionCronComponent
}
func IsRecoveryEvent(event corev1.Event) bool {
return EventTargetNodeRecoveredState.MatchEventReason(event) || EventTargetPodRecoveredState.MatchEventReason(event)
}
func IsDisruptionCompletionEvent(event corev1.Event) bool {
return EventDisruptionFinished.MatchEventReason(event) || EventDisruptionDurationOver.MatchEventReason(event) || EventDisruptionGCOver.MatchEventReason(event)
}
func IsTargetEvent(event corev1.Event) bool {
targetEvent, ok := Events[EventReason(event.Reason)]
if !ok {
return false
}
return event.Source.Component == SourceDisruptionComponent &&
targetEvent.Category == TargetEvent
}
func GetEventReason(event corev1.Event) EventReason {
return EventReason(event.Reason)
}