From 04f8b25e45225844af79583047a48d637181e836 Mon Sep 17 00:00:00 2001 From: hc-github-team-nomad-core <82989552+hc-github-team-nomad-core@users.noreply.github.com> Date: Tue, 2 Apr 2024 04:48:22 -0500 Subject: [PATCH] Backport of [gh-19729] Fix logic for updating terminal allocs on clients with max client disconnect into release/1.6.x (#20249) * [gh-19729] Fix logic for updating terminal allocs on clients with max client disconnect (#20181) Only ignore allocs on terminal states that are updated --------- Co-authored-by: Tim Gross * fix: update test --------- Co-authored-by: Juana De La Cuesta Co-authored-by: Tim Gross Co-authored-by: Juanadelacuesta <8647634+Juanadelacuesta@users.noreply.github.com> --- scheduler/reconcile_test.go | 17 +++++---- scheduler/reconcile_util.go | 4 +-- scheduler/reconcile_util_test.go | 62 ++++++++++++++++---------------- 3 files changed, 43 insertions(+), 40 deletions(-) diff --git a/scheduler/reconcile_test.go b/scheduler/reconcile_test.go index 0db33d65ac2..7b66f694cc8 100644 --- a/scheduler/reconcile_test.go +++ b/scheduler/reconcile_test.go @@ -5448,11 +5448,11 @@ func TestReconciler_Disconnected_Client(t *testing.T) { disconnectedAllocStates: disconnectAllocState, isBatch: true, expected: &resultExpectation{ - place: 2, + place: 0, desiredTGUpdates: map[string]*structs.DesiredUpdates{ "web": { Ignore: 2, - Place: 2, + Place: 0, }, }, }, @@ -5468,10 +5468,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) { disconnectedAllocStates: disconnectAllocState, expected: &resultExpectation{ reconnectUpdates: 2, - stop: 0, + place: 2, + stop: 2, desiredTGUpdates: map[string]*structs.DesiredUpdates{ "web": { - Ignore: 5, + Stop: 2, + Ignore: 3, + Place: 2, }, }, }, @@ -5610,13 +5613,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) { disconnectedAllocStates: disconnectAllocState, shouldStopOnDisconnectedNode: true, expected: &resultExpectation{ - stop: 2, + stop: 4, place: 2, desiredTGUpdates: map[string]*structs.DesiredUpdates{ "web": { - Stop: 2, + Stop: 4, Place: 2, - Ignore: 5, + Ignore: 3, }, }, }, diff --git a/scheduler/reconcile_util.go b/scheduler/reconcile_util.go index 7cf7c3fb14c..5287c66bcde 100644 --- a/scheduler/reconcile_util.go +++ b/scheduler/reconcile_util.go @@ -300,9 +300,9 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS } if alloc.TerminalStatus() && !reconnect { - // Terminal allocs, if supportsDisconnectedClient and not reconnect, + // Server-terminal allocs, if supportsDisconnectedClient and not reconnect, // are probably stopped replacements and should be ignored - if supportsDisconnectedClients { + if supportsDisconnectedClients && alloc.ServerTerminalStatus() { ignore[alloc.ID] = alloc continue } diff --git a/scheduler/reconcile_util_test.go b/scheduler/reconcile_util_test.go index 600b9b4918a..0e4f1d49f85 100644 --- a/scheduler/reconcile_util_test.go +++ b/scheduler/reconcile_util_test.go @@ -379,10 +379,10 @@ func TestAllocSet_filterByTainted(t *testing.T) { taintedNodes: nodes, skipNilNodeTest: false, all: allocSet{ - // Allocs on reconnected nodes that are complete are ignored - "ignored-reconnect-complete": { - ID: "ignored-reconnect-complete", - Name: "ignored-reconnect-complete", + // Allocs on reconnected nodes that are complete need to be updated to stop + "untainted-reconnect-complete": { + ID: "untainted-reconnect-complete", + Name: "untainted-reconnect-complete", ClientStatus: structs.AllocClientStatusComplete, DesiredStatus: structs.AllocDesiredStatusRun, Job: testJob, @@ -413,10 +413,10 @@ func TestAllocSet_filterByTainted(t *testing.T) { TaskGroup: "web", AllocStates: unknownAllocState, }, - // Replacement allocs that are complete are ignored - "ignored-reconnect-complete-replacement": { - ID: "ignored-reconnect-complete-replacement", - Name: "ignored-reconnect-complete", + // Replacement allocs that are complete need to be updated + "untainted-reconnect-complete-replacement": { + ID: "untainted-reconnect-complete-replacement", + Name: "untainted-reconnect-complete", ClientStatus: structs.AllocClientStatusComplete, DesiredStatus: structs.AllocDesiredStatusRun, Job: testJob, @@ -449,7 +449,29 @@ func TestAllocSet_filterByTainted(t *testing.T) { PreviousAllocation: "untainted-reconnect-lost", }, }, - untainted: allocSet{}, + untainted: allocSet{ + "untainted-reconnect-complete": { + ID: "untainted-reconnect-complete", + Name: "untainted-reconnect-complete", + ClientStatus: structs.AllocClientStatusComplete, + DesiredStatus: structs.AllocDesiredStatusRun, + Job: testJob, + NodeID: "normal", + TaskGroup: "web", + AllocStates: unknownAllocState, + }, + "untainted-reconnect-complete-replacement": { + ID: "untainted-reconnect-complete-replacement", + Name: "untainted-reconnect-complete", + ClientStatus: structs.AllocClientStatusComplete, + DesiredStatus: structs.AllocDesiredStatusRun, + Job: testJob, + NodeID: "normal", + TaskGroup: "web", + AllocStates: unknownAllocState, + PreviousAllocation: "untainted-reconnect-complete", + }, + }, migrate: allocSet{}, disconnecting: allocSet{}, reconnecting: allocSet{ @@ -465,17 +487,6 @@ func TestAllocSet_filterByTainted(t *testing.T) { }, }, ignore: allocSet{ - - "ignored-reconnect-complete": { - ID: "ignored-reconnect-complete", - Name: "ignored-reconnect-complete", - ClientStatus: structs.AllocClientStatusComplete, - DesiredStatus: structs.AllocDesiredStatusRun, - Job: testJob, - NodeID: "normal", - TaskGroup: "web", - AllocStates: unknownAllocState, - }, "ignored-reconnect-lost": { ID: "ignored-reconnect-lost", Name: "ignored-reconnect-lost", @@ -486,17 +497,6 @@ func TestAllocSet_filterByTainted(t *testing.T) { TaskGroup: "web", AllocStates: unknownAllocState, }, - "ignored-reconnect-complete-replacement": { - ID: "ignored-reconnect-complete-replacement", - Name: "ignored-reconnect-complete", - ClientStatus: structs.AllocClientStatusComplete, - DesiredStatus: structs.AllocDesiredStatusRun, - Job: testJob, - NodeID: "normal", - TaskGroup: "web", - AllocStates: unknownAllocState, - PreviousAllocation: "untainted-reconnect-complete", - }, "ignored-reconnect-failed-replacement": { ID: "ignored-reconnect-failed-replacement", Name: "ignored-reconnect-failed",