From 3fe202d7c1f922417bf3e9f26aa680ab993ce5f9 Mon Sep 17 00:00:00 2001 From: Phan Le Date: Wed, 24 Jul 2024 12:00:18 -0700 Subject: [PATCH] If volume is delinquent, switch owner of volume/engine/replica to share manager CR's owner Without the fix, volume/engine/replica continue to wait for the share manager pod to be scheduled (i.e., pod.Spec.NodeName is non empty) to set ownerID to the same pod's node. However, because we don't want to use pod's imformers, when the share manager pod is scheduled, volume/engine/controller might not catch that event and continue to wait. This introduce up to 30s delay and behavioral inconsistency Also the > 30s delay in share manager pod recreation is destroying the RWX fast failover's original goal longhorn-6205 Signed-off-by: Phan Le (cherry picked from commit 18ac5dc6f64396a60b5a8511c864fb831cf5030c) --- controller/engine_controller.go | 19 ++++++++++++------- controller/replica_controller.go | 19 ++++++++++++------- controller/volume_controller.go | 19 ++++++++++++------- 3 files changed, 36 insertions(+), 21 deletions(-) diff --git a/controller/engine_controller.go b/controller/engine_controller.go index 952ca238b8..e3b48d6711 100644 --- a/controller/engine_controller.go +++ b/controller/engine_controller.go @@ -2296,22 +2296,27 @@ func (ec *EngineController) isResponsibleFor(e *longhorn.Engine, defaultEngineIm err = errors.Wrap(err, "error while checking isResponsibleFor") }() - // If a regular RWX is delinquent, try to switch ownership quickly to the node of the newly created share-manager pod - isDelinquent, err := ec.ds.IsNodeDelinquent(e.Status.OwnerID, e.Spec.VolumeName) + // If a regular RWX is delinquent, try to switch ownership quickly to the owner node of the share manager CR + isOwnerNodeDelinquent, err := ec.ds.IsNodeDelinquent(e.Status.OwnerID, e.Spec.VolumeName) if err != nil { return false, err } - if isDelinquent { - pod, err := ec.ds.GetPodRO(ec.namespace, types.GetShareManagerPodNameFromShareManagerName(e.Spec.VolumeName)) + isSpecNodeDelinquent, err := ec.ds.IsNodeDelinquent(e.Spec.NodeID, e.Spec.VolumeName) + if err != nil { + return false, err + } + preferredOwnerID := e.Spec.NodeID + if isOwnerNodeDelinquent || isSpecNodeDelinquent { + sm, err := ec.ds.GetShareManager(e.Spec.VolumeName) if err != nil && !apierrors.IsNotFound(err) { return false, err } - if pod != nil && ec.controllerID == pod.Spec.NodeName { - return true, nil + if sm != nil { + preferredOwnerID = sm.Status.OwnerID } } - isResponsible := isControllerResponsibleFor(ec.controllerID, ec.ds, e.Name, e.Spec.NodeID, e.Status.OwnerID) + isResponsible := isControllerResponsibleFor(ec.controllerID, ec.ds, e.Name, preferredOwnerID, e.Status.OwnerID) // The engine is not running, the owner node doesn't need to have e.Status.CurrentImage // Fall back to the default logic where we pick a running node to be the owner diff --git a/controller/replica_controller.go b/controller/replica_controller.go index 23d68b7153..d08263e8bd 100644 --- a/controller/replica_controller.go +++ b/controller/replica_controller.go @@ -918,22 +918,27 @@ func (rc *ReplicaController) enqueueAllRebuildingReplicaOnCurrentNode() { } func (rc *ReplicaController) isResponsibleFor(r *longhorn.Replica) (bool, error) { - // If a regular RWX is delinquent, try to switch ownership quickly to the node of the newly created share-manager pod - isDelinquent, err := rc.ds.IsNodeDelinquent(r.Status.OwnerID, r.Spec.VolumeName) + // If a regular RWX is delinquent, try to switch ownership quickly to the owner node of the share manager CR + isOwnerNodeDelinquent, err := rc.ds.IsNodeDelinquent(r.Status.OwnerID, r.Spec.VolumeName) if err != nil { return false, err } - if isDelinquent { - pod, err := rc.ds.GetPodRO(rc.namespace, types.GetShareManagerPodNameFromShareManagerName(r.Spec.VolumeName)) + isSpecNodeDelinquent, err := rc.ds.IsNodeDelinquent(r.Spec.NodeID, r.Spec.VolumeName) + if err != nil { + return false, err + } + preferredOwnerID := r.Spec.NodeID + if isOwnerNodeDelinquent || isSpecNodeDelinquent { + sm, err := rc.ds.GetShareManager(r.Spec.VolumeName) if err != nil && !apierrors.IsNotFound(err) { return false, err } - if pod != nil && rc.controllerID == pod.Spec.NodeName { - return true, nil + if sm != nil { + preferredOwnerID = sm.Status.OwnerID } } - return isControllerResponsibleFor(rc.controllerID, rc.ds, r.Name, r.Spec.NodeID, r.Status.OwnerID), nil + return isControllerResponsibleFor(rc.controllerID, rc.ds, r.Name, preferredOwnerID, r.Status.OwnerID), nil } func hasMatchingReplica(replica *longhorn.Replica, replicas map[string]*longhorn.Replica) bool { diff --git a/controller/volume_controller.go b/controller/volume_controller.go index b675f8fc6b..7e1758a802 100644 --- a/controller/volume_controller.go +++ b/controller/volume_controller.go @@ -4362,22 +4362,27 @@ func (c *VolumeController) isResponsibleFor(v *longhorn.Volume, defaultEngineIma err = errors.Wrap(err, "error while checking isResponsibleFor") }() - // If a regular RWX is delinquent, try to switch ownership quickly to the node of the newly created share-manager pod - isDelinquent, err := c.ds.IsNodeDelinquent(v.Status.OwnerID, v.Name) + // If a regular RWX is delinquent, try to switch ownership quickly to the owner node of the share manager CR + isOwnerNodeDelinquent, err := c.ds.IsNodeDelinquent(v.Status.OwnerID, v.Name) if err != nil { return false, err } - if isDelinquent { - pod, err := c.ds.GetPodRO(v.Namespace, types.GetShareManagerPodNameFromShareManagerName(v.Name)) + isSpecNodeDelinquent, err := c.ds.IsNodeDelinquent(v.Spec.NodeID, v.Name) + if err != nil { + return false, err + } + preferredOwnerID := v.Spec.NodeID + if isOwnerNodeDelinquent || isSpecNodeDelinquent { + sm, err := c.ds.GetShareManager(v.Name) if err != nil && !apierrors.IsNotFound(err) { return false, err } - if pod != nil && c.controllerID == pod.Spec.NodeName { - return true, nil + if sm != nil { + preferredOwnerID = sm.Status.OwnerID } } - isResponsible := isControllerResponsibleFor(c.controllerID, c.ds, v.Name, v.Spec.NodeID, v.Status.OwnerID) + isResponsible := isControllerResponsibleFor(c.controllerID, c.ds, v.Name, preferredOwnerID, v.Status.OwnerID) if types.IsDataEngineV1(v.Spec.DataEngine) { readyNodesWithDefaultEI, err := c.ds.ListReadyNodesContainingEngineImageRO(defaultEngineImage)