From b2e40709c30e48e8699efdca505fb0f69d97e587 Mon Sep 17 00:00:00 2001 From: Shahriyar Jalayeri Date: Wed, 22 Jan 2025 14:04:10 +0200 Subject: [PATCH] Allow device to have multiple reasons for being in maintenance mode This change allows the device to have multiple reasons for being in maintenance mode. This is useful when multiple conditions are met that require the device to be in maintenance mode. For example, if the TPM is in error and the device disk is full, the device can be in maintenance mode for both reasons. Clearing one of the reasons will not take the device out of maintenance mode if there are other reasons for it to be in maintenance mode. Signed-off-by: Shahriyar Jalayeri --- pkg/pillar/cmd/nodeagent/handletimers.go | 5 +- pkg/pillar/cmd/nodeagent/nodeagent.go | 66 +++++++++++------------- pkg/pillar/cmd/zedagent/parseconfig.go | 12 ++--- pkg/pillar/cmd/zedagent/zedagent.go | 8 +-- pkg/pillar/types/zedagenttypes.go | 31 ++++++----- 5 files changed, 61 insertions(+), 61 deletions(-) diff --git a/pkg/pillar/cmd/nodeagent/handletimers.go b/pkg/pillar/cmd/nodeagent/handletimers.go index eb566a5d04..58f4804950 100644 --- a/pkg/pillar/cmd/nodeagent/handletimers.go +++ b/pkg/pillar/cmd/nodeagent/handletimers.go @@ -131,12 +131,9 @@ func handleRebootOnVaultLocked(ctxPtr *nodeagentContext) { scheduleNodeOperation(ctxPtr, errStr, types.BootReasonVaultFailure, types.DeviceOperationReboot) } else { - log.Noticef("Setting %s", - types.MaintenanceModeReasonVaultLockedUp) // there is no image update in progress, this happened after a normal // reboot. enter maintenance mode - ctxPtr.maintMode = true - ctxPtr.maintModeReason = types.MaintenanceModeReasonVaultLockedUp + setMaintenanceModeReason(ctxPtr, types.MaintenanceModeReasonVaultLockedUp, "handleRebootOnVaultLocked") publishNodeAgentStatus(ctxPtr) } } else { diff --git a/pkg/pillar/cmd/nodeagent/nodeagent.go b/pkg/pillar/cmd/nodeagent/nodeagent.go index 2f4f97c615..6b1450edc1 100644 --- a/pkg/pillar/cmd/nodeagent/nodeagent.go +++ b/pkg/pillar/cmd/nodeagent/nodeagent.go @@ -780,14 +780,8 @@ func handleVaultStatusImpl(ctxArg interface{}, key string, if vault.ConversionComplete { ctx.vaultOperational = types.TS_ENABLED // Do we need to clear maintenance? - if ctx.maintMode && - ctx.maintModeReason == types.MaintenanceModeReasonVaultLockedUp { - log.Noticef("Clearing %s", - types.MaintenanceModeReasonVaultLockedUp) - ctx.maintMode = false - ctx.maintModeReason = types.MaintenanceModeReasonNone - publishNodeAgentStatus(ctx) - } + maybeClearMaintenanceModeReason(ctx, types.MaintenanceModeReasonVaultLockedUp, "handleVaultStatusImpl") + publishNodeAgentStatus(ctx) } else { ctx.vaultOperational = types.TS_NONE } @@ -811,7 +805,6 @@ func handleVolumeMgrStatusImpl(ctxArg interface{}, key string, ctx := ctxArg.(*nodeagentContext) vms := statusArg.(types.VolumeMgrStatus) - changed := false // This RemainingSpace takes into account the space reserved for // /persist/newlog plus the percentage/minimum reserved for the rest // of EVE-OS. Thus it can never go negative, but zero means that @@ -819,26 +812,12 @@ func handleVolumeMgrStatusImpl(ctxArg interface{}, key string, // a tiny app instance. if vms.RemainingSpace == 0 { log.Warnf("MaintenanceMode due to no remaining diskspace") - // Do not overwrite a vault maintenance mode - if !ctx.maintMode { - log.Noticef("Setting %s", - types.MaintenanceModeReasonNoDiskSpace) - ctx.maintModeReason = types.MaintenanceModeReasonNoDiskSpace - ctx.maintMode = true - changed = true - } + // Add to maintenance mode reasons + setMaintenanceModeReason(ctx, types.MaintenanceModeReasonNoDiskSpace, "handleVolumeMgrStatusImpl") + publishNodeAgentStatus(ctx) } else { // Do we need to clear maintenance? - if ctx.maintMode && - ctx.maintModeReason == types.MaintenanceModeReasonNoDiskSpace { - log.Noticef("Clearing %s", - types.MaintenanceModeReasonNoDiskSpace) - ctx.maintMode = false - ctx.maintModeReason = types.MaintenanceModeReasonNone - changed = true - } - } - if changed { + maybeClearMaintenanceModeReason(ctx, types.MaintenanceModeReasonNoDiskSpace, "handleVolumeMgrStatusImpl") publishNodeAgentStatus(ctx) } } @@ -880,16 +859,31 @@ func handleTpmStatusImpl(ctxArg interface{}, key string, if tpm.Status == types.MaintenanceModeReasonTpmEncFailure { log.Errorf("handleTpmStatusImpl: TPM manager reported TPM error : %s", tpm.Error) - log.Noticef("Setting %s", types.MaintenanceModeReasonTpmEncFailure) - ctx.maintMode = true - ctx.maintModeReason = types.MaintenanceModeReasonTpmEncFailure + setMaintenanceModeReason(ctx, types.MaintenanceModeReasonTpmEncFailure, "handleTpmStatusImpl") publishNodeAgentStatus(ctx) } else { - if ctx.maintMode && ctx.maintModeReason == types.MaintenanceModeReasonTpmEncFailure { - log.Noticef("Clearing %s", types.MaintenanceModeReasonTpmEncFailure) - ctx.maintMode = false - ctx.maintModeReason = types.MaintenanceModeReasonNone - publishNodeAgentStatus(ctx) - } + maybeClearMaintenanceModeReason(ctx, types.MaintenanceModeReasonTpmEncFailure, "handleTpmStatusImpl") + publishNodeAgentStatus(ctx) + } +} + +func maybeClearMaintenanceModeReason(ctx *nodeagentContext, reason types.MaintenanceModeReason, caller string) { + if ctx.maintModeReason&reason == reason { + clearMaintenanceModeReason(ctx, reason, caller) + } +} + +func setMaintenanceModeReason(ctx *nodeagentContext, reason types.MaintenanceModeReason, caller string) { + log.Noticef("%s setting %s", caller, reason) + ctx.maintModeReason |= reason + ctx.maintMode = true +} + +func clearMaintenanceModeReason(ctx *nodeagentContext, reason types.MaintenanceModeReason, caller string) { + log.Noticef("%s clearing %s", caller, reason) + ctx.maintModeReason &^= reason + if ctx.maintModeReason == types.MaintenanceModeReasonNone { + log.Noticef("%s : No reason to be in maintenance mode, clearing maintenance mode", caller) + ctx.maintMode = false } } diff --git a/pkg/pillar/cmd/zedagent/parseconfig.go b/pkg/pillar/cmd/zedagent/parseconfig.go index 5ee30a01c8..e18fed6b1e 100644 --- a/pkg/pillar/cmd/zedagent/parseconfig.go +++ b/pkg/pillar/cmd/zedagent/parseconfig.go @@ -89,7 +89,7 @@ func parseConfig(getconfigCtx *getconfigContext, config *zconfig.EdgeDevConfig, // Did MaintenanceMode change? if ctx.apiMaintenanceMode != config.MaintenanceMode { ctx.apiMaintenanceMode = config.MaintenanceMode - mergeMaintenanceMode(ctx) + mergeMaintenanceMode(ctx, "parseConfig") } // Did the ForceFallbackCounter change? If so we publish for @@ -2733,7 +2733,7 @@ func parseConfigItems(ctx *getconfigContext, config *zconfig.EdgeDevConfig, newMaintenanceMode := newGlobalConfig.GlobalValueTriState(types.MaintenanceMode) if oldMaintenanceMode != newMaintenanceMode { ctx.zedagentCtx.gcpMaintenanceMode = newMaintenanceMode - mergeMaintenanceMode(ctx.zedagentCtx) + mergeMaintenanceMode(ctx.zedagentCtx, "parseConfigItems") } pub := ctx.zedagentCtx.pubGlobalConfig @@ -2749,7 +2749,7 @@ func parseConfigItems(ctx *getconfigContext, config *zconfig.EdgeDevConfig, // mergeMaintenanceMode handles the configItem override (unless NONE) // and the API setting -func mergeMaintenanceMode(ctx *zedagentContext) { +func mergeMaintenanceMode(ctx *zedagentContext, caller string) { switch ctx.gcpMaintenanceMode { case types.TS_ENABLED: // Overrides everything, and sets maintenance mode @@ -2765,13 +2765,13 @@ func mergeMaintenanceMode(ctx *zedagentContext) { if ctx.apiMaintenanceMode { // set reason as user requested ctx.maintModeReason = types.MaintenanceModeReasonUserRequested - } else if ctx.localMaintenanceMode { + } else if ctx.maintModeReason != ctx.localMaintModeReason { // set reason to reflect exact local reason ctx.maintModeReason = ctx.localMaintModeReason } } - log.Noticef("Changed maintenanceMode to %t, with reason as %s, considering {%v, %v, %v}", - ctx.maintenanceMode, ctx.maintModeReason.String(), ctx.gcpMaintenanceMode, + log.Noticef("%s changed maintenanceMode to %t, with reason as %s, considering {%v, %v, %v}", + caller, ctx.maintenanceMode, ctx.maintModeReason.String(), ctx.gcpMaintenanceMode, ctx.apiMaintenanceMode, ctx.localMaintenanceMode) } diff --git a/pkg/pillar/cmd/zedagent/zedagent.go b/pkg/pillar/cmd/zedagent/zedagent.go index 70e88b7a70..246526c0a8 100644 --- a/pkg/pillar/cmd/zedagent/zedagent.go +++ b/pkg/pillar/cmd/zedagent/zedagent.go @@ -2395,7 +2395,7 @@ func handleGlobalConfigImpl(ctxArg interface{}, key string, ctx.globalConfig = *gcp ctx.GCInitialized = true ctx.gcpMaintenanceMode = gcp.GlobalValueTriState(types.MaintenanceMode) - mergeMaintenanceMode(ctx) + mergeMaintenanceMode(ctx, "handleGlobalConfigImpl") reinitNetdumper(ctx) } @@ -2520,10 +2520,12 @@ func handleNodeAgentStatusImpl(ctxArg interface{}, key string, if status.DevicePoweroff { handleDeviceOperation(ctx, types.DeviceOperationPoweroff) } - if ctx.localMaintenanceMode != status.LocalMaintenanceMode { + if ctx.localMaintenanceMode != status.LocalMaintenanceMode || + ctx.localMaintModeReason != status.LocalMaintenanceModeReason { ctx.localMaintenanceMode = status.LocalMaintenanceMode ctx.localMaintModeReason = status.LocalMaintenanceModeReason - mergeMaintenanceMode(ctx) + + mergeMaintenanceMode(ctx, "handleNodeAgentStatusImpl") } if naHasRealChange(*getconfigCtx.NodeAgentStatus, status) { diff --git a/pkg/pillar/types/zedagenttypes.go b/pkg/pillar/types/zedagenttypes.go index fa6bfa86f9..4f7faa096b 100644 --- a/pkg/pillar/types/zedagenttypes.go +++ b/pkg/pillar/types/zedagenttypes.go @@ -401,20 +401,27 @@ const ( // String returns the verbose equivalent of MaintenanceModeReason code func (mmr MaintenanceModeReason) String() string { - switch mmr { - case MaintenanceModeReasonNone: + reason := []string{} + if mmr == MaintenanceModeReasonNone { return "MaintenanceModeReasonNone" - case MaintenanceModeReasonUserRequested: - return "MaintenanceModeReasonUserRequested" - case MaintenanceModeReasonVaultLockedUp: - return "MaintenanceModeReasonVaultLockedUp" - case MaintenanceModeReasonNoDiskSpace: - return "MaintenanceModeReasonNoDiskSpace" - case MaintenanceModeReasonTpmEncFailure: - return "MaintenanceModeReasonTpmEncFailure" - default: - return fmt.Sprintf("Unknown MaintenanceModeReason %d", mmr) } + if (mmr & MaintenanceModeReasonUserRequested) == MaintenanceModeReasonUserRequested { + reason = append(reason, "MaintenanceModeReasonUserRequested") + } + if (mmr & MaintenanceModeReasonVaultLockedUp) == MaintenanceModeReasonVaultLockedUp { + reason = append(reason, "MaintenanceModeReasonVaultLockedUp") + } + if (mmr & MaintenanceModeReasonNoDiskSpace) == MaintenanceModeReasonNoDiskSpace { + reason = append(reason, "MaintenanceModeReasonNoDiskSpace") + } + if (mmr & MaintenanceModeReasonTpmEncFailure) == MaintenanceModeReasonTpmEncFailure { + reason = append(reason, "MaintenanceModeReasonTpmEncFailure") + } + if len(reason) == 0 { + return "Unknown MaintenanceModeReason" + } + + return strings.Join(reason, "|") } // NodeAgentStatus :