From 36506438ca6ccdbb7f5e159d3782875062153107 Mon Sep 17 00:00:00 2001 From: Shuo Wu Date: Tue, 21 May 2024 17:01:21 -0700 Subject: [PATCH] scheduler: Disable failed replica reusage for SPDK Longhorn 7199 Signed-off-by: Shuo Wu --- controller/volume_controller.go | 3 +++ scheduler/replica_scheduler.go | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/controller/volume_controller.go b/controller/volume_controller.go index ca4b22dadb..e5d3c16e7f 100644 --- a/controller/volume_controller.go +++ b/controller/volume_controller.go @@ -4836,6 +4836,9 @@ func (c *VolumeController) shouldCleanUpFailedReplica(v *longhorn.Volume, r *lon log.Warnf("Replica %v failed to rebuild too many times", r.Name) return true } + if types.IsDataEngineV2(v.Spec.DataEngine) { + return true + } // Failed too long ago to be useful during a rebuild. if v.Spec.StaleReplicaTimeout > 0 && util.TimestampAfterTimeout(r.Spec.FailedAt, time.Duration(v.Spec.StaleReplicaTimeout)*time.Minute) { diff --git a/scheduler/replica_scheduler.go b/scheduler/replica_scheduler.go index b1c1687814..0fce0ea4cc 100644 --- a/scheduler/replica_scheduler.go +++ b/scheduler/replica_scheduler.go @@ -566,6 +566,10 @@ func filterActiveReplicas(replicas map[string]*longhorn.Replica) map[string]*lon } func (rcs *ReplicaScheduler) CheckAndReuseFailedReplica(replicas map[string]*longhorn.Replica, volume *longhorn.Volume, hardNodeAffinity string) (*longhorn.Replica, error) { + if types.IsDataEngineV2(volume.Spec.DataEngine) { + return nil, nil + } + replicas = filterActiveReplicas(replicas) allNodesInfo, err := rcs.getNodeInfo() @@ -654,6 +658,10 @@ func (rcs *ReplicaScheduler) RequireNewReplica(replicas map[string]*longhorn.Rep return 0 } + if types.IsDataEngineV2(volume.Spec.DataEngine) { + return 0 + } + timeUntilNext, timeOfNext, err := rcs.timeToReplacementReplica(volume) if err != nil { msg := "Failed to get time until replica replacement, will directly replenish a new replica"