Skip to content

Commit

Permalink
Simplify logic in timeToReplacementReplica
Browse files Browse the repository at this point in the history
Longhorn 8043

Signed-off-by: Eric Weber <eric.weber@suse.com>
(cherry picked from commit e685946)
  • Loading branch information
ejweber authored and mergify[bot] committed Mar 7, 2024
1 parent 7da2a91 commit 7d306bd
Showing 1 changed file with 18 additions and 20 deletions.
38 changes: 18 additions & 20 deletions scheduler/replica_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,15 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
diskSoftAntiAffinity = volume.Spec.ReplicaDiskSoftAntiAffinity == longhorn.ReplicaDiskSoftAntiAffinityEnabled
}

timeToReplacementReplica, _, err := rcs.timeToReplacementReplica(volume)
if err != nil {
err = errors.Wrap(err, "failed to get time until replica replacement")
multiError.Append(util.NewMultiError(err.Error()))
return map[string]*Disk{}, multiError
creatingNewReplicasForReplenishment := false
if volume.Status.Robustness == longhorn.VolumeRobustnessDegraded {
timeToReplacementReplica, _, err := rcs.timeToReplacementReplica(volume)
if err != nil {
err = errors.Wrap(err, "failed to get time until replica replacement")
multiError.Append(util.NewMultiError(err.Error()))
return map[string]*Disk{}, multiError
}
creatingNewReplicasForReplenishment = timeToReplacementReplica == 0
}

>>>>>>> 65ceb37c (Refine scheduling behavior with failed replicas)
Expand All @@ -218,7 +222,7 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
}

usedNodes, usedZones, onlyEvictingNodes, onlyEvictingZones := getCurrentNodesAndZones(replicas, nodeInfo,
ignoreFailedReplicas, timeToReplacementReplica == 0)
ignoreFailedReplicas, creatingNewReplicasForReplenishment)

allowEmptyNodeSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyNodeSelectorVolume)
if err != nil {
Expand Down Expand Up @@ -581,7 +585,9 @@ func (rcs *ReplicaScheduler) RequireNewReplica(replicas map[string]*longhorn.Rep
logrus.WithError(err).Errorf(msg)
}
if timeUntilNext > 0 {
logrus.Infof("Replica replenishment is delayed until %v", timeOfNext)
// Adding another second to the checkBackDuration to avoid clock skew.
timeUntilNext = timeUntilNext + time.Second
logrus.Infof("Replica replenishment is delayed until %v", timeOfNext.Add(time.Second))
}
return timeUntilNext
}
Expand Down Expand Up @@ -906,16 +912,9 @@ func getCurrentNodesAndZones(replicas map[string]*longhorn.Replica, nodeInfo map
=======

// timeToReplacementReplica returns the amount of time until Longhorn should create a new replica for a degraded volume,
// even if there are potentially reusable failed replicas. It returns:
// - -time.Duration if there is no need for a replacement replica,
// - 0 if a replacement replica is needed right now (replica-replenishment-wait-interval has elapsed),
// - +time.Duration if a replacement replica will be needed (replica-replenishment-wait-interval has not elapsed).
// even if there are potentially reusable failed replicas. It returns 0 if replica-replenishment-wait-interval has
// elapsed and a new replica is needed right now.
func (rcs *ReplicaScheduler) timeToReplacementReplica(volume *longhorn.Volume) (time.Duration, time.Time, error) {
if volume.Status.Robustness != longhorn.VolumeRobustnessDegraded {
// No replacement replica is needed.
return -1, time.Time{}, nil
}

settingValue, err := rcs.ds.GetSettingAsInt(types.SettingNameReplicaReplenishmentWaitInterval)
if err != nil {
err = errors.Wrapf(err, "failed to get setting ReplicaReplenishmentWaitInterval")
Expand All @@ -930,13 +929,12 @@ func (rcs *ReplicaScheduler) timeToReplacementReplica(volume *longhorn.Volume) (
}

now := rcs.nowHandler()
if now.After(lastDegradedAt.Add(waitInterval)) {
timeOfNext := lastDegradedAt.Add(waitInterval)
if now.After(timeOfNext) {
// A replacement replica is needed now.
return 0, time.Time{}, nil
}

timeOfNext := lastDegradedAt.Add(waitInterval)
// Adding 1 more second to the check back interval to avoid clock skew
return timeOfNext.Sub(now) + time.Second, timeOfNext, nil
return timeOfNext.Sub(now), timeOfNext, nil
}
>>>>>>> 65ceb37c (Refine scheduling behavior with failed replicas)

0 comments on commit 7d306bd

Please sign in to comment.