Skip to content

Commit 55ffc32

Browse files
committed
Ignore static pods in drain, better logging
1 parent 5fa63fb commit 55ffc32

File tree

2 files changed

+54
-12
lines changed

2 files changed

+54
-12
lines changed

controllers/node_force_drain_controller.go

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,13 @@ func (r *NodeForceDrainReconciler) Reconcile(ctx context.Context, req ctrl.Reque
105105
}
106106
drainingNodes = append(drainingNodes, node)
107107
}
108-
l.Info("Draining nodes", "nodes", drainingNodes)
108+
if len(drainingNodes) > 0 {
109+
nodeNames := make([]string, 0, len(drainingNodes))
110+
for _, node := range drainingNodes {
111+
nodeNames = append(nodeNames, node.Name)
112+
}
113+
l.Info("Found draining nodes", "nodes", nodeNames)
114+
}
109115

110116
// Update the last observed node drains for nodes that started draining.
111117
statusChanged := false
@@ -136,7 +142,7 @@ func (r *NodeForceDrainReconciler) Reconcile(ctx context.Context, req ctrl.Reque
136142
if timeUntilNextDrain == 0 || timeUntilDrain < timeUntilNextDrain {
137143
timeUntilNextDrain = timeUntilDrain
138144
}
139-
l.Info("Node is still in grace period", "node", node.Name, "lastObservedNodeDrain", ld)
145+
l.Info("Node is still in grace period", "node", node.Name, "lastObservedNodeDrain", ld, "nodeDrainGracePeriod", fd.Spec.NodeDrainGracePeriod, "timeUntilDrain", timeUntilDrain)
140146
continue
141147
}
142148
l.Info("Node is out of grace period", "node", node.Name, "lastObservedNodeDrain", ld, "nodeDrainGracePeriod", fd.Spec.NodeDrainGracePeriod)
@@ -247,7 +253,7 @@ func (r *NodeForceDrainReconciler) forceDrainNode(ctx context.Context, node core
247253
if pod.DeletionTimestamp != nil {
248254
continue
249255
}
250-
l.Info("Deleting pod", "pod", pod.Name)
256+
l.Info("Deleting pod", "pod", pod.Name, "podNamespace", pod.Namespace)
251257
attemptedDeletion = true
252258
if err := r.Delete(ctx, &pod); err != nil {
253259
deletionErrs = append(deletionErrs, err)
@@ -271,6 +277,7 @@ func (r *NodeForceDrainReconciler) forceDeletePodsOnNode(ctx context.Context, no
271277
var timeUntilNextForceDelete time.Duration
272278
deletionErrs := make([]error, 0, len(pods))
273279
for _, pod := range pods {
280+
l := l.WithValues("pod", pod.Name, "podNamespace", pod.Namespace)
274281
if pod.DeletionTimestamp == nil {
275282
continue
276283
}
@@ -281,11 +288,11 @@ func (r *NodeForceDrainReconciler) forceDeletePodsOnNode(ctx context.Context, no
281288
if timeUntilNextForceDelete == 0 || timeUntilForceDelete < timeUntilNextForceDelete {
282289
timeUntilNextForceDelete = timeUntilForceDelete
283290
}
284-
l.Info("Pod is still in grace period", "pod", pod.Name, "deletionTimestamp", pod.DeletionTimestamp, "gracePeriod", gracePeriod)
291+
l.Info("Pod is still in grace period", "deletionTimestamp", pod.DeletionTimestamp, "gracePeriod", gracePeriod, "timeUntilForceDelete", timeUntilForceDelete)
285292
continue
286293
}
287294

288-
l.Info("Force deleting pod", "pod", pod.Name)
295+
l.Info("Force deleting pod")
289296
if err := r.Delete(ctx, &pod, &client.DeleteOptions{
290297
GracePeriodSeconds: ptr.To(int64(0)),
291298
}); err != nil {
@@ -309,13 +316,18 @@ func (r *NodeForceDrainReconciler) getDeletionCandidatePodsForNode(ctx context.C
309316

310317
filteredPods := make([]corev1.Pod, 0, len(pods.Items))
311318
for _, pod := range pods.Items {
319+
l := l.WithValues("pod", pod.Name, "podNamespace", pod.Namespace)
312320
controlledByActiveDaemonSet, err := r.podIsControlledByExistingDaemonSet(ctx, pod)
313321
if err != nil {
314-
l.Error(err, "Failed to check if pod is controlled by active DaemonSet", "pod", pod.Name)
322+
l.Error(err, "Failed to check if pod is controlled by active DaemonSet")
315323
continue
316324
}
317325
if controlledByActiveDaemonSet {
318-
l.Info("Pod is controlled by active DaemonSet. Skipping", "pod", pod.Name)
326+
l.Info("Pod is controlled by active DaemonSet. Skipping")
327+
continue
328+
}
329+
if r.podIsStatic(pod) {
330+
l.Info("Pod is static. Skipping")
319331
continue
320332
}
321333

@@ -325,6 +337,19 @@ func (r *NodeForceDrainReconciler) getDeletionCandidatePodsForNode(ctx context.C
325337
return filteredPods, nil
326338
}
327339

340+
// podIsStatic returns true if the pod is a static pod.
341+
// https://kubernetes.io/docs/tasks/configure-pod-container/static-pod/
342+
// Such pods are directly managed by the kubelet and should not be deleted.
343+
// The check is based on the pod's controller being the node itself.
344+
func (r *NodeForceDrainReconciler) podIsStatic(pod corev1.Pod) bool {
345+
controllerRef := metav1.GetControllerOf(&pod)
346+
if controllerRef == nil {
347+
return false
348+
}
349+
350+
return controllerRef.APIVersion == "v1" && controllerRef.Kind == "Node"
351+
}
352+
328353
// podIsControlledByExistingDaemonSet returns true if the pod is controlled by an existing DaemonSet.
329354
// This is determined by checking if the pod's controller is a DaemonSet and if the DaemonSet exists in the API.
330355
func (r *NodeForceDrainReconciler) podIsControlledByExistingDaemonSet(ctx context.Context, pod corev1.Pod) (bool, error) {
@@ -345,7 +370,7 @@ func (r *NodeForceDrainReconciler) podIsControlledByExistingDaemonSet(ctx contex
345370
// Edge case: Pod was orphaned
346371
// See https://github.com/kubernetes/kubectl/blob/442e3d141a35703b7637f41339b9f73cad005c47/pkg/drain/filters.go#L174
347372
if apierrors.IsNotFound(err) {
348-
l.Info("No daemon set found for pod", "daemonSet", controllerRef.Name, "pod", pod.Name, "namespace", pod.Namespace)
373+
l.Info("No daemon set found for pod", "daemonSet", controllerRef.Name, "pod", pod.Name, "podNamespace", pod.Namespace)
349374
return false, nil
350375
}
351376
return false, fmt.Errorf("failed to get DaemonSet %s: %w", controllerRef.Name, err)

controllers/node_force_drain_controller_test.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,7 @@ func Test_NodeForceDrainReconciler_Reconcile_E2E(t *testing.T) {
226226
})
227227
}
228228

229-
func Test_NodeForceDrainReconciler_Reconcile_DrainIgnoreActiveDaemonsSets(t *testing.T) {
229+
func Test_NodeForceDrainReconciler_Reconcile_DrainIgnoreActiveDaemonsSetsStaticPods(t *testing.T) {
230230
ctx := log.IntoContext(context.Background(), testr.New(t))
231231

232232
clock := mockClock{now: time.Date(2022, time.April, 4, 8, 0, 0, 0, time.Local)}
@@ -310,6 +310,23 @@ func Test_NodeForceDrainReconciler_Reconcile_DrainIgnoreActiveDaemonsSets(t *tes
310310
NodeName: drainingNode.Name,
311311
},
312312
}
313+
staticPod := &corev1.Pod{
314+
ObjectMeta: metav1.ObjectMeta{
315+
Name: "static-pod",
316+
Namespace: "default",
317+
OwnerReferences: []metav1.OwnerReference{
318+
{
319+
APIVersion: "v1",
320+
Kind: "Node",
321+
Name: drainingNode.Name,
322+
Controller: ptr.To(true),
323+
},
324+
},
325+
},
326+
Spec: corev1.PodSpec{
327+
NodeName: drainingNode.Name,
328+
},
329+
}
313330

314331
forceDrain := &managedupgradev1beta1.NodeForceDrain{
315332
ObjectMeta: metav1.ObjectMeta{
@@ -334,7 +351,7 @@ func Test_NodeForceDrainReconciler_Reconcile_DrainIgnoreActiveDaemonsSets(t *tes
334351
}
335352

336353
cli := controllerClient(t, forceDrain, drainingNode,
337-
podWithNoController, podWithDeploymentController, ownerDaemonSet, podWithActiveDaemonSetController, podWithInvalidDaemonSetController)
354+
podWithNoController, podWithDeploymentController, ownerDaemonSet, podWithActiveDaemonSetController, podWithInvalidDaemonSetController, staticPod)
338355

339356
subject := &NodeForceDrainReconciler{
340357
Client: cli,
@@ -346,7 +363,7 @@ func Test_NodeForceDrainReconciler_Reconcile_DrainIgnoreActiveDaemonsSets(t *tes
346363

347364
var pods corev1.PodList
348365
require.NoError(t, cli.List(ctx, &pods, client.InNamespace("default")))
349-
require.Len(t, pods.Items, 4, "precondition: all testing pods should be present")
366+
require.Len(t, pods.Items, 5, "precondition: all testing pods should be present")
350367

351368
_, err := subject.Reconcile(ctx, requestForObject(forceDrain))
352369
require.NoError(t, err)
@@ -357,7 +374,7 @@ func Test_NodeForceDrainReconciler_Reconcile_DrainIgnoreActiveDaemonsSets(t *tes
357374
for _, pod := range podsAfterDrain.Items {
358375
podNames = append(podNames, pod.Name)
359376
}
360-
require.ElementsMatch(t, podNames, []string{"pod-with-active-daemonset-controller"}, "the pod with an active DaemonSet controller should not have been ignored")
377+
require.ElementsMatch(t, podNames, []string{"pod-with-active-daemonset-controller", "static-pod"}, "the pod with an active DaemonSet controller and the static pod should be left alone")
361378
}
362379

363380
func Test_NodeForceDrainReconciler_Reconcile_MaxIntervalDuringActiveDrain(t *testing.T) {

0 commit comments

Comments
 (0)