Skip to content

Commit

Permalink
Further clarify scheduling logic for evicting replicas
Browse files Browse the repository at this point in the history
Signed-off-by: Eric Weber <eric.weber@suse.com>
  • Loading branch information
ejweber committed Aug 11, 2023
1 parent 943e88c commit d7e23a9
Showing 1 changed file with 14 additions and 26 deletions.
40 changes: 14 additions & 26 deletions scheduler/replica_scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod

usedNodes := map[string]*longhorn.Node{}
usedZones := map[string]bool{}
onlyEvictingNodes := map[string]bool{}
onlyEvictingZones := map[string]bool{}
replicasCountPerNode := map[string]int{}
// Get current nodes and zones
for _, r := range replicas {
if r.Spec.NodeID != "" && r.DeletionTimestamp == nil && r.Spec.FailedAt == "" {
Expand All @@ -207,35 +207,16 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
// one zone.
usedZones[node.Status.Zone] = true
if r.Status.EvictionRequested {
onlyEvictingNodes[node.Name] = true
onlyEvictingZones[node.Status.Zone] = true
} else {
onlyEvictingNodes[node.Name] = false
onlyEvictingZones[node.Status.Zone] = false
}
replicasCountPerNode[r.Spec.NodeID] = replicasCountPerNode[r.Spec.NodeID] + 1
}
}
}

filterNodesWithLessThanTwoReplicas := func(nodes map[string]*longhorn.Node) map[string]*longhorn.Node {
result := map[string]*longhorn.Node{}
for nodeName, node := range nodes {
if replicasCountPerNode[nodeName] < 2 {
result[nodeName] = node
}
}
return result
}

filterNodesInOnlyEvictingZones := func(nodes map[string]*longhorn.Node) map[string]*longhorn.Node {
result := map[string]*longhorn.Node{}
for nodeName, node := range nodes {
if onlyEvictingZones[node.Status.Zone] {
result[nodeName] = node
}
}
return result
}

allowEmptyNodeSelectorVolume, err := rcs.ds.GetSettingAsBool(types.SettingNameAllowEmptyNodeSelectorVolume)
if err != nil {
err = errors.Wrapf(err, "failed to get %v setting", types.SettingNameAllowEmptyNodeSelectorVolume)
Expand All @@ -244,8 +225,9 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
}

unusedNodes := map[string]*longhorn.Node{}
unusedNodesInUnusedZones := map[string]*longhorn.Node{} // By definition, these nodes are also unused.
nodesWithEvictingReplicas := getNodesWithEvictingReplicas(replicas, nodeInfo)
unusedNodesAfterEviction := map[string]*longhorn.Node{}
unusedNodesInUnusedZones := map[string]*longhorn.Node{}
unusedNodesInUnusedZonesAfterEviction := map[string]*longhorn.Node{}

for nodeName, node := range nodeInfo {
// Filter Nodes. If the Nodes don't match the tags, don't bother marking them as candidates.
Expand All @@ -255,6 +237,12 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
if _, ok := usedNodes[nodeName]; !ok {
unusedNodes[nodeName] = node
}
if onlyEvictingNodes[nodeName] {
unusedNodesAfterEviction[nodeName] = node
if onlyEvictingZones[node.Status.Zone] {
unusedNodesInUnusedZonesAfterEviction[nodeName] = node
}
}
if _, ok := usedZones[node.Status.Zone]; !ok {
unusedNodesInUnusedZones[nodeName] = node
}
Expand All @@ -273,7 +261,7 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
fallthrough
// Same as the above. If we cannot schedule two replicas in the same zone, we cannot schedule them on the same node.
case !zoneSoftAntiAffinity && nodeSoftAntiAffinity:
diskCandidates, errors = getDiskCandidatesFromNodes(filterNodesWithLessThanTwoReplicas(filterNodesInOnlyEvictingZones(nodesWithEvictingReplicas)))
diskCandidates, errors = getDiskCandidatesFromNodes(unusedNodesInUnusedZonesAfterEviction)
if len(diskCandidates) > 0 {
return diskCandidates, nil
}
Expand All @@ -284,7 +272,7 @@ func (rcs *ReplicaScheduler) getDiskCandidates(nodeInfo map[string]*longhorn.Nod
return diskCandidates, nil
}
multiError.Append(errors)
diskCandidates, errors = getDiskCandidatesFromNodes(filterNodesWithLessThanTwoReplicas(nodesWithEvictingReplicas))
diskCandidates, errors = getDiskCandidatesFromNodes(unusedNodesAfterEviction)
if len(diskCandidates) > 0 {
return diskCandidates, nil
}
Expand Down

0 comments on commit d7e23a9

Please sign in to comment.