From 3b2090c5c049e1adca1d689312b47dcd15dbbc3a Mon Sep 17 00:00:00 2001 From: James Munson Date: Mon, 19 Aug 2024 10:38:12 -0600 Subject: [PATCH] fix(manager): fix logic for when RWX workload is restarted after node failure Signed-off-by: James Munson --- controller/kubernetes_pod_controller.go | 37 ++++++++++++++++++------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/controller/kubernetes_pod_controller.go b/controller/kubernetes_pod_controller.go index cc91d5cd96..6adffb08e3 100644 --- a/controller/kubernetes_pod_controller.go +++ b/controller/kubernetes_pod_controller.go @@ -209,19 +209,12 @@ func (kc *KubernetesPodController) handleWorkloadPodDeletionIfCSIPluginPodIsDown return nil } - storageNetworkSetting, err := kc.ds.GetSettingWithAutoFillingRO(types.SettingNameStorageNetwork) - if err != nil { - log.WithError(err).Warnf("%s. Failed to get setting %v", logAbort, types.SettingNameStorageNetwork) - return nil - } - - storageNetworkForRWXVolumeEnabled, err := kc.ds.GetSettingAsBool(types.SettingNameStorageNetworkForRWXVolumeEnabled) + isStorageNetworkForRWXVolume, err := kc.isStorageNetworkForRWXVolume() if err != nil { - log.WithError(err).Warnf("%s. Failed to get setting %v", logAbort, types.SettingNameStorageNetworkForRWXVolumeEnabled) + log.WithError(err).Warnf("%s. Failed to check isStorageNetwork", logAbort) return nil } - - if !types.IsStorageNetworkForRWXVolume(storageNetworkSetting, storageNetworkForRWXVolumeEnabled) { + if !isStorageNetworkForRWXVolume { return nil } @@ -329,6 +322,20 @@ func (kc *KubernetesPodController) handleWorkloadPodDeletionIfCSIPluginPodIsDown return nil } +func (kc *KubernetesPodController) isStorageNetworkForRWXVolume() (bool, error) { + storageNetworkSetting, err := kc.ds.GetSettingWithAutoFillingRO(types.SettingNameStorageNetwork) + if err != nil { + return false, errors.Wrapf(err, "Failed to get setting %v", types.SettingNameStorageNetwork) + } + + storageNetworkForRWXVolumeEnabled, err := kc.ds.GetSettingAsBool(types.SettingNameStorageNetworkForRWXVolumeEnabled) + if err != nil { + return false, errors.Wrapf(err, "Failed to get setting %v", types.SettingNameStorageNetworkForRWXVolumeEnabled) + } + + return types.IsStorageNetworkForRWXVolume(storageNetworkSetting, storageNetworkForRWXVolumeEnabled), nil +} + // handlePodDeletionIfNodeDown determines whether we are allowed to forcefully delete a pod // from a failed node based on the users chosen NodeDownPodDeletionPolicy. // This is necessary because Kubernetes never forcefully deletes pods on a down node, @@ -476,6 +483,11 @@ func (kc *KubernetesPodController) handlePodDeletionIfVolumeRequestRemount(pod * return err } + isStorageNetworkForRWXVolume, err := kc.isStorageNetworkForRWXVolume() + if err != nil { + kc.logger.WithError(err).Warn("Failed to check isStorageNetwork, assuming not") + } + // Only delete pod which has startTime < vol.Status.RemountRequestAt AND timeNow > vol.Status.RemountRequestAt + delayDuration // The delayDuration is to make sure that we don't repeatedly delete the pod too fast // when vol.Status.RemountRequestAt is updated too quickly by volumeController @@ -493,6 +505,11 @@ func (kc *KubernetesPodController) handlePodDeletionIfVolumeRequestRemount(pod * if vol.Status.RemountRequestedAt == "" { continue } + + if isRegularRWXVolume(vol) && !isStorageNetworkForRWXVolume { + continue + } + remountRequestedAt, err := time.Parse(time.RFC3339, vol.Status.RemountRequestedAt) if err != nil { return err