longhorn · innobead · Mar 30, 2024 · Mar 8, 2024 · Mar 13, 2024 · Mar 12, 2024
@@ -345,6 +345,7 @@ func (ec *EngineController) syncEngine(key string) (err error) {
 		if engine.Status.CurrentState != longhorn.InstanceStateRunning {
 			engine.Status.Endpoint = ""
 			engine.Status.ReplicaModeMap = nil
+			engine.Status.ReplicaTransitionTimeMap = nil
 		}
 		return nil
 	}
@@ -870,6 +871,7 @@ func (m *EngineMonitor) refresh(engine *longhorn.Engine) error {
 	}
 
 	currentReplicaModeMap := map[string]longhorn.ReplicaMode{}
+	currentReplicaTransitionTimeMap := map[string]string{}
 	for url, r := range replicaURLModeMap {
 		addr := engineapi.GetAddressFromBackendReplicaURL(url)
 		replica, exists := addressReplicaMap[addr]
@@ -887,22 +889,38 @@ func (m *EngineMonitor) refresh(engine *longhorn.Engine) error {
 
 		currentReplicaModeMap[replica] = r.Mode
 
-		if engine.Status.ReplicaModeMap != nil {
+		if engine.Status.ReplicaModeMap == nil {
+			// We are constructing the ReplicaModeMap for the first time. Construct the ReplicaTransitionTimeMap
+			// alongside it.
+			currentReplicaTransitionTimeMap[replica] = util.Now()
+		} else {
 			if r.Mode != engine.Status.ReplicaModeMap[replica] {
 				switch r.Mode {
 				case longhorn.ReplicaModeERR:
 					m.eventRecorder.Eventf(engine, corev1.EventTypeWarning, constant.EventReasonFaulted, "Detected replica %v (%v) in error", replica, addr)
+					currentReplicaTransitionTimeMap[replica] = util.Now()
 				case longhorn.ReplicaModeWO:
 					m.eventRecorder.Eventf(engine, corev1.EventTypeNormal, constant.EventReasonRebuilding, "Detected rebuilding replica %v (%v)", replica, addr)
+					currentReplicaTransitionTimeMap[replica] = util.Now()
 				case longhorn.ReplicaModeRW:
 					m.eventRecorder.Eventf(engine, corev1.EventTypeNormal, constant.EventReasonRebuilt, "Detected replica %v (%v) has been rebuilt", replica, addr)
+					currentReplicaTransitionTimeMap[replica] = util.Now()
 				default:
 					m.logger.Errorf("Invalid engine replica mode %v", r.Mode)
 				}
+			} else {
+				oldTime, ok := engine.Status.ReplicaTransitionTimeMap[replica]
+				if !ok {
+					m.logger.Errorf("BUG: Replica %v (%v) was previously in mode %v but transition time was not recorded", replica, addr, engine.Status.ReplicaModeMap[replica])
+					currentReplicaTransitionTimeMap[replica] = util.Now()
+				} else {
+					currentReplicaTransitionTimeMap[replica] = oldTime
+				}
 			}
 		}
 	}
 	engine.Status.ReplicaModeMap = currentReplicaModeMap
+	engine.Status.ReplicaTransitionTimeMap = currentReplicaTransitionTimeMap
 
 	snapshots, err := engineClientProxy.SnapshotList(engine)
 	if err != nil {
@@ -1987,6 +2005,7 @@ func (ec *EngineController) Upgrade(e *longhorn.Engine, log *logrus.Entry) (err
 	e.Status.CurrentReplicaAddressMap = e.Spec.UpgradedReplicaAddressMap
 	// reset ReplicaModeMap to reflect the new replicas
 	e.Status.ReplicaModeMap = nil
+	e.Status.ReplicaTransitionTimeMap = nil
 	e.Status.RestoreStatus = nil
 	e.Status.RebuildStatus = nil
 	return nil

@@ -695,7 +695,18 @@ func (c *VolumeController) ReconcileEngineReplicaState(v *longhorn.Volume, es ma
 				r.Spec.RebuildRetryCount = 0
 			}
 			// Set LastHealthyAt to record the last time this replica became RW in an engine.
-			r.Spec.LastHealthyAt = now
+			if transitionTime, ok := e.Status.ReplicaTransitionTimeMap[rName]; !ok {
+				log.Errorf("BUG: Replica %v is in mode %v but transition time was not recorded", r.Name, mode)
+				r.Spec.LastHealthyAt = now
+			} else {
+				after, err := util.TimestampAfterTimestamp(transitionTime, r.Spec.LastHealthyAt)
+				if err != nil {
+					log.Errorf("Failed to check if replica %v transitioned to mode %v after it was last healthy", r.Name, mode)
+				}
+				if after || err != nil {
+					r.Spec.LastHealthyAt = now
+				}
+			}
 			healthyCount++
 		}
 	}

@@ -2996,6 +2996,7 @@ func (s *DataStore) ResetMonitoringEngineStatus(e *longhorn.Engine) (*longhorn.E
 	e.Status.Endpoint = ""
 	e.Status.LastRestoredBackup = ""
 	e.Status.ReplicaModeMap = nil
+	e.Status.ReplicaTransitionTimeMap = nil
 	e.Status.RestoreStatus = nil
 	e.Status.PurgeStatus = nil
 	e.Status.RebuildStatus = nil

@@ -1490,6 +1490,11 @@ spec:
                   type: string
                 nullable: true
                 type: object
+              replicaTransitionTimeMap:
+                additionalProperties:
+                  type: string
+                description: ReplicaTransitionTimeMap records the time a replica in ReplicaModeMap transitions from one mode to another (or from not being in the ReplicaModeMap to being in it). This information is sometimes required by other controllers (e.g. the volume controller uses it to determine the correct value for replica.Spec.lastHealthyAt).
+                type: object
               restoreStatus:
                 additionalProperties:
                   properties:
@@ -2452,18 +2457,18 @@ spec:
               evictionRequested:
                 type: boolean
               failedAt:
-                description: FailedAt is set when a running replica fails or when a running engine is unable to use a replica for any reason. FailedAt indicates the time the failure occurred. When FailedAt is set, a replica is likely to have useful (though possibly stale) data. A replica with FailedAt set must be rebuilt from a non-failed replica (or it can be used in a salvage if all replicas are failed). FailedAt is cleared before a rebuild or salvage.
+                description: FailedAt is set when a running replica fails or when a running engine is unable to use a replica for any reason. FailedAt indicates the time the failure occurred. When FailedAt is set, a replica is likely to have useful (though possibly stale) data. A replica with FailedAt set must be rebuilt from a non-failed replica (or it can be used in a salvage if all replicas are failed). FailedAt is cleared before a rebuild or salvage. FailedAt may be later than the corresponding entry in an engine's replicaTransitionTimeMap because it is set when the volume controller acknowledges the change.
                 type: string
               hardNodeAffinity:
                 type: string
               healthyAt:
-                description: HealthyAt is set the first time a replica becomes read/write in an engine after creation or rebuild. HealthyAt indicates the time the last successful rebuild occurred. When HealthyAt is set, a replica is likely to have useful (though possibly stale) data. HealthyAt is cleared before a rebuild.
+                description: HealthyAt is set the first time a replica becomes read/write in an engine after creation or rebuild. HealthyAt indicates the time the last successful rebuild occurred. When HealthyAt is set, a replica is likely to have useful (though possibly stale) data. HealthyAt is cleared before a rebuild. HealthyAt may be later than the corresponding entry in an engine's replicaTransitionTimeMap because it is set when the volume controller acknowledges the change.
                 type: string
               lastFailedAt:
-                description: LastFailedAt is always set at the same time as FailedAt. Unlike FailedAt, LastFailedAt is never cleared. LastFailedAt is not a reliable indicator of the state of a replica's data. For example, a replica with LastFailedAt may already be healthy and in use again. However, because it is never cleared, it can be compared to LastHealthyAt to help prevent dangerous replica deletion in some corner cases.
+                description: LastFailedAt is always set at the same time as FailedAt. Unlike FailedAt, LastFailedAt is never cleared. LastFailedAt is not a reliable indicator of the state of a replica's data. For example, a replica with LastFailedAt may already be healthy and in use again. However, because it is never cleared, it can be compared to LastHealthyAt to help prevent dangerous replica deletion in some corner cases. LastFailedAt may be later than the corresponding entry in an engine's replicaTransitionTimeMap because it is set when the volume controller acknowledges the change.
                 type: string
               lastHealthyAt:
-                description: LastHealthyAt is set every time a replica becomes read/write in an engine. Unlike HealthyAt, LastHealthyAt is never cleared. LastHealthyAt is not a reliable indicator of the state of a replica's data. For example, a replica with LastHealthyAt set may be in the middle of a rebuild. However, because it is never cleared, it can be compared to LastFailedAt to help prevent dangerous replica deletion in some corner cases.
+                description: LastHealthyAt is set every time a replica becomes read/write in an engine. Unlike HealthyAt, LastHealthyAt is never cleared. LastHealthyAt is not a reliable indicator of the state of a replica's data. For example, a replica with LastHealthyAt set may be in the middle of a rebuild. However, because it is never cleared, it can be compared to LastFailedAt to help prevent dangerous replica deletion in some corner cases. LastHealthyAt may be later than the corresponding entry in an engine's replicaTransitionTimeMap because it is set when the volume controller acknowledges the change.
                 type: string
               logRequested:
                 type: boolean

@@ -153,6 +153,11 @@ type EngineStatus struct {
 	// +nullable
 	ReplicaModeMap map[string]ReplicaMode `json:"replicaModeMap"`
 	// +optional
+	// ReplicaTransitionTimeMap records the time a replica in ReplicaModeMap transitions from one mode to another (or
+	// from not being in the ReplicaModeMap to being in it). This information is sometimes required by other controllers
+	// (e.g. the volume controller uses it to determine the correct value for replica.Spec.lastHealthyAt).
+	ReplicaTransitionTimeMap map[string]string `json:"replicaTransitionTimeMap"`
+	// +optional
 	Endpoint string `json:"endpoint"`
 	// +optional
 	LastRestoredBackup string `json:"lastRestoredBackup"`

@@ -27,25 +27,33 @@ type ReplicaSpec struct {
 	// +optional
 	// HealthyAt is set the first time a replica becomes read/write in an engine after creation or rebuild. HealthyAt
 	// indicates the time the last successful rebuild occurred. When HealthyAt is set, a replica is likely to have
-	// useful (though possibly stale) data. HealthyAt is cleared before a rebuild.
+	// useful (though possibly stale) data. HealthyAt is cleared before a rebuild. HealthyAt may be later than the
+	// corresponding entry in an engine's replicaTransitionTimeMap because it is set when the volume controller
+	// acknowledges the change.
 	HealthyAt string `json:"healthyAt"`
 	// +optional
 	// LastHealthyAt is set every time a replica becomes read/write in an engine. Unlike HealthyAt, LastHealthyAt is
 	// never cleared. LastHealthyAt is not a reliable indicator of the state of a replica's data. For example, a
 	// replica with LastHealthyAt set may be in the middle of a rebuild. However, because it is never cleared, it can be
-	// compared to LastFailedAt to help prevent dangerous replica deletion in some corner cases.
+	// compared to LastFailedAt to help prevent dangerous replica deletion in some corner cases. LastHealthyAt may be
+	// later than the corresponding entry in an engine's replicaTransitionTimeMap because it is set when the volume
+	// controller acknowledges the change.
 	LastHealthyAt string `json:"lastHealthyAt"`
 	// +optional
 	// FailedAt is set when a running replica fails or when a running engine is unable to use a replica for any reason.
 	// FailedAt indicates the time the failure occurred. When FailedAt is set, a replica is likely to have useful
 	// (though possibly stale) data. A replica with FailedAt set must be rebuilt from a non-failed replica (or it can
-	// be used in a salvage if all replicas are failed). FailedAt is cleared before a rebuild or salvage.
+	// be used in a salvage if all replicas are failed). FailedAt is cleared before a rebuild or salvage. FailedAt may
+	// be later than the corresponding entry in an engine's replicaTransitionTimeMap because it is set when the volume
+	// controller acknowledges the change.
 	FailedAt string `json:"failedAt"`
 	// +optional
 	// LastFailedAt is always set at the same time as FailedAt. Unlike FailedAt, LastFailedAt is never cleared.
 	// LastFailedAt is not a reliable indicator of the state of a replica's data. For example, a replica with
 	// LastFailedAt may already be healthy and in use again. However, because it is never cleared, it can be compared to
-	// LastHealthyAt to help prevent dangerous replica deletion in some corner cases.
+	// LastHealthyAt to help prevent dangerous replica deletion in some corner cases. LastFailedAt may be later than the
+	// corresponding entry in an engine's replicaTransitionTimeMap because it is set when the volume controller
+	// acknowledges the change.
 	LastFailedAt string `json:"lastFailedAt"`
 	// +optional
 	DiskID string `json:"diskID"`

@@ -26,6 +26,7 @@ import (
 	"github.com/longhorn/longhorn-manager/upgrade/v14xto150"
 	"github.com/longhorn/longhorn-manager/upgrade/v151to152"
 	"github.com/longhorn/longhorn-manager/upgrade/v153to154"
+	"github.com/longhorn/longhorn-manager/upgrade/v154to155"
 	"github.com/longhorn/longhorn-manager/upgrade/v1beta1"
 
 	longhorn "github.com/longhorn/longhorn-manager/k8s/pkg/apis/longhorn/v1beta2"
@@ -261,6 +262,12 @@ func doResourceUpgrade(namespace string, lhClient *lhclientset.Clientset, kubeCl
 			return err
 		}
 	}
+	if semver.Compare(lhVersionBeforeUpgrade, "v1.5.5") < 0 {
+		logrus.Info("Walking through the resource status upgrade path v1.5.4 to v1.5.5")
+		if err := v154to155.UpgradeResourcesStatus(namespace, lhClient, kubeClient, resourceMaps); err != nil {
+			return err
+		}
+	}
 	if err := upgradeutil.UpdateResourcesStatus(namespace, lhClient, resourceMaps); err != nil {
 		return err
 	}

@@ -1000,6 +1000,8 @@ func UpdateResourcesStatus(namespace string, lhClient *lhclientset.Clientset, re
 		switch resourceKind {
 		case types.LonghornKindNode:
 			err = updateNodesStatus(namespace, lhClient, resourceMap.(map[string]*longhorn.Node))
+		case types.LonghornKindEngine:
+			err = updateEngineStatus(namespace, lhClient, resourceMap.(map[string]*longhorn.Engine))
 		default:
 			return fmt.Errorf("resource kind %v is not able to updated", resourceKind)
 		}
@@ -1030,3 +1032,21 @@ func updateNodesStatus(namespace string, lhClient *lhclientset.Clientset, nodes
 	}
 	return nil
 }
+
+func updateEngineStatus(namespace string, lhClient *lhclientset.Clientset, engines map[string]*longhorn.Engine) error {
+	existingEngineList, err := lhClient.LonghornV1beta2().Engines(namespace).List(context.TODO(), metav1.ListOptions{})
+	if err != nil {
+		return err
+	}
+	for _, existingEngine := range existingEngineList.Items {
+		engine, ok := engines[existingEngine.Name]
+		if !ok {
+			continue
+		}
+
+		if _, err = lhClient.LonghornV1beta2().Engines(namespace).UpdateStatus(context.TODO(), engine, metav1.UpdateOptions{}); err != nil {
+			return err
+		}
+	}
+	return nil
+}
@@ -0,0 +1,55 @@
+package v154to155
+
+import (
+	"github.com/pkg/errors"
+	apierrors "k8s.io/apimachinery/pkg/api/errors"
+	clientset "k8s.io/client-go/kubernetes"
+
+	lhclientset "github.com/longhorn/longhorn-manager/k8s/pkg/client/clientset/versioned"
+	upgradeutil "github.com/longhorn/longhorn-manager/upgrade/util"
+	"github.com/longhorn/longhorn-manager/util"
+)
+
+const (
+	upgradeLogPrefix = "upgrade from v1.5.4 to v1.5.5: "
+)
+
+func UpgradeResources(namespace string, lhClient *lhclientset.Clientset, kubeClient *clientset.Clientset, resourceMaps map[string]interface{}) error {
+	// Currently there are no resources to upgrade. See UpgradeResourceStatus -> upgradeEngineStatus or previous
+	// Longhorn versions for examples.
+	return nil
+}
+
+func UpgradeResourcesStatus(namespace string, lhClient *lhclientset.Clientset, kubeClient *clientset.Clientset, resourceMaps map[string]interface{}) error {
+	// We will probably need to upgrade other resource status as well. See upgradeEngineStatus or previous Longhorn
+	// versions for examples.
+	return upgradeEngineStatus(namespace, lhClient, resourceMaps)
+}
+
+func upgradeEngineStatus(namespace string, lhClient *lhclientset.Clientset, resourceMaps map[string]interface{}) (err error) {
+	defer func() {
+		err = errors.Wrapf(err, upgradeLogPrefix+"upgrade engines failed")
+	}()
+
+	engineMap, err := upgradeutil.ListAndUpdateEnginesInProvidedCache(namespace, lhClient, resourceMaps)
+	if err != nil {
+		if apierrors.IsNotFound(err) {
+			return nil
+		}
+		return errors.Wrapf(err, "failed to list all existing Longhorn engines during the engine status upgrade")
+	}
+
+	for _, e := range engineMap {
+		if e.Status.ReplicaTransitionTimeMap == nil {
+			e.Status.ReplicaTransitionTimeMap = map[string]string{}
+		}
+		for replicaName := range e.Status.ReplicaModeMap {
+			// We don't have any historical information to rely on. Starting at the time of the upgrade.
+			if _, ok := e.Status.ReplicaTransitionTimeMap[replicaName]; !ok {
+				e.Status.ReplicaTransitionTimeMap[replicaName] = util.Now()
+			}
+		}
+	}
+
+	return nil
+}