From 4a8792640bba3f19d8ff3fe0fecc058d5a24a36b Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 2 Dec 2021 23:10:24 +0300 Subject: [PATCH] fix: check for server power state when in use Sometimes if power off event is followed by server allocation immediately, IPMI might "lie" about power status of the server. Server is being powered off, while it reports power on status. In this case Sidero might fail to power on an allocated Server. Workaround that by checking for power state in this state. Signed-off-by: Andrey Smirnov (cherry picked from commit 274ae33fc7c3b4b8f5b517914f730a4db3a9840a) --- app/sidero-controller-manager/controllers/server_controller.go | 3 ++- app/sidero-controller-manager/pkg/constants/constants.go | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/app/sidero-controller-manager/controllers/server_controller.go b/app/sidero-controller-manager/controllers/server_controller.go index 21cb68b31..2afec9b05 100644 --- a/app/sidero-controller-manager/controllers/server_controller.go +++ b/app/sidero-controller-manager/controllers/server_controller.go @@ -216,7 +216,8 @@ func (r *ServerReconciler) Reconcile(req ctrl.Request) (ctrl.Result, error) { } } - return f(true, ctrl.Result{}) + // keep checking power state from time to time, as sometimes IPMI lies about the power state + return f(true, ctrl.Result{RequeueAfter: constants.PowerCheckPeriod}) case !s.Status.InUse && !s.Status.IsClean: // when server is set to PXE boot to be wiped, ConditionPowerCycle is set to mark server // as power cycled to avoid duplicate reboot attempts from subsequent Reconciles diff --git a/app/sidero-controller-manager/pkg/constants/constants.go b/app/sidero-controller-manager/pkg/constants/constants.go index ca05cfee8..c10135404 100644 --- a/app/sidero-controller-manager/pkg/constants/constants.go +++ b/app/sidero-controller-manager/pkg/constants/constants.go @@ -14,6 +14,7 @@ const ( InitrdAsset = "initramfs.xz" DefaultRequeueAfter = time.Second * 20 + PowerCheckPeriod = 5 * time.Minute DefaultServerRebootTimeout = time.Minute * 20 )