Skip to content

Commit

Permalink
Simplify logic in timeToReplacementReplica
Browse files Browse the repository at this point in the history
Longhorn 8043

Signed-off-by: Eric Weber <eric.weber@suse.com>
  • Loading branch information
ejweber committed Mar 6, 2024
1 parent bb06506 commit e282109
Showing 1 changed file with 18 additions and 20 deletions.
38 changes: 18 additions & 20 deletions scheduler/replica_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -207,11 +207,15 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
diskSoftAntiAffinity = volume.Spec.ReplicaDiskSoftAntiAffinity == longhorn.ReplicaDiskSoftAntiAffinityEnabled
}

timeToReplacementReplica, _, err := rcs.timeToReplacementReplica(volume)
if err != nil {
err = errors.Wrap(err, "failed to get time until replica replacement")
multiError.Append(util.NewMultiError(err.Error()))
return map[string]*Disk{}, multiError
creatingNewReplicasForReplenishment := false
if volume.Status.Robustness == longhorn.VolumeRobustnessDegraded {
timeToReplacementReplica, _, err := rcs.timeToReplacementReplica(volume)
if err != nil {
err = errors.Wrap(err, "failed to get time until replica replacement")
multiError.Append(util.NewMultiError(err.Error()))
return map[string]*Disk{}, multiError
}
creatingNewReplicasForReplenishment = timeToReplacementReplica == 0
}

getDiskCandidatesFromNodes := func(nodes map[string]*longhorn.Node) (diskCandidates map[string]*Disk, multiError util.MultiError) {
Expand All @@ -230,7 +234,7 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
}

usedNodes, usedZones, onlyEvictingNodes, onlyEvictingZones := getCurrentNodesAndZones(replicas, nodeInfo,
ignoreFailedReplicas, timeToReplacementReplica == 0)
ignoreFailedReplicas, creatingNewReplicasForReplenishment)

allowEmptyNodeSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyNodeSelectorVolume)
if err != nil {
Expand Down Expand Up @@ -606,7 +610,9 @@ func (rcs *ReplicaScheduler) RequireNewReplica(replicas map[string]*longhorn.Rep
logrus.WithError(err).Errorf(msg)
}
if timeUntilNext > 0 {
logrus.Infof("Replica replenishment is delayed until %v", timeOfNext)
// Adding another second to the checkBackDuration to avoid clock skew.
timeUntilNext = timeUntilNext + time.Second
logrus.Infof("Replica replenishment is delayed until %v", timeOfNext.Add(time.Second))
}
return timeUntilNext
}
Expand Down Expand Up @@ -917,16 +923,9 @@ func getCurrentNodesAndZones(replicas map[string]*longhorn.Replica, nodeInfo map
}

// timeToReplacementReplica returns the amount of time until Longhorn should create a new replica for a degraded volume,
// even if there are potentially reusable failed replicas. It returns:
// - -time.Duration if there is no need for a replacement replica,
// - 0 if a replacement replica is needed right now (replica-replenishment-wait-interval has elapsed),
// - +time.Duration if a replacement replica will be needed (replica-replenishment-wait-interval has not elapsed).
// even if there are potentially reusable failed replicas. It returns 0 if replica-replenishment-wait-interval has
// elapsed and a new replica is needed right now.
func (rcs *ReplicaScheduler) timeToReplacementReplica(volume *longhorn.Volume) (time.Duration, time.Time, error) {
if volume.Status.Robustness != longhorn.VolumeRobustnessDegraded {
// No replacement replica is needed.
return -1, time.Time{}, nil
}

settingValue, err := rcs.ds.GetSettingAsInt(types.SettingNameReplicaReplenishmentWaitInterval)
if err != nil {
err = errors.Wrapf(err, "failed to get setting ReplicaReplenishmentWaitInterval")
Expand All @@ -941,12 +940,11 @@ func (rcs *ReplicaScheduler) timeToReplacementReplica(volume *longhorn.Volume) (
}

now := rcs.nowHandler()
if now.After(lastDegradedAt.Add(waitInterval)) {
timeOfNext := lastDegradedAt.Add(waitInterval)
if now.After(timeOfNext) {
// A replacement replica is needed now.
return 0, time.Time{}, nil
}

timeOfNext := lastDegradedAt.Add(waitInterval)
// Adding 1 more second to the check back interval to avoid clock skew
return timeOfNext.Sub(now) + time.Second, timeOfNext, nil
return timeOfNext.Sub(now), timeOfNext, nil
}

0 comments on commit e282109

Please sign in to comment.