Skip to content

Commit

Permalink
Backport of [gh-19729] Fix logic for updating terminal allocs on clie…
Browse files Browse the repository at this point in the history
…nts with max client disconnect into release/1.7.x (#20248)

* [gh-19729] Fix logic for updating terminal allocs on clients with max client disconnect (#20181)

Only ignore allocs on terminal states that are updated
---------

Co-authored-by: Tim Gross <tgross@hashicorp.com>

* [gh-19729] Fix logic for updating terminal allocs on clients with max client disconnect (#20181)

* fix: update test

---------

Co-authored-by: Juana De La Cuesta <juanita.delacuestamorales@hashicorp.com>
Co-authored-by: Tim Gross <tgross@hashicorp.com>
Co-authored-by: Juanadelacuesta <8647634+Juanadelacuesta@users.noreply.github.com>
  • Loading branch information
4 people authored Apr 2, 2024
1 parent a1918e2 commit fe20def
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 52 deletions.
28 changes: 15 additions & 13 deletions scheduler/reconcile_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -940,7 +940,7 @@ func TestReconciler_LostNode_PreventRescheduleOnLost(t *testing.T) {
maxClientDisconnect: pointer.Of(10 * time.Second),
PreventRescheduleOnLost: false,
reschedulePolicy: disabledReschedulePolicy,
expectPlace: 2,
expectPlace: 1,
expectStop: 1,
expectIgnore: 4,
expectDisconnect: 1,
Expand All @@ -951,13 +951,12 @@ func TestReconciler_LostNode_PreventRescheduleOnLost(t *testing.T) {
maxClientDisconnect: pointer.Of(10 * time.Second),
PreventRescheduleOnLost: true,
reschedulePolicy: disabledReschedulePolicy,
expectPlace: 1, // This behaviour needs to be verified
expectPlace: 0,
expectStop: 0,
expectIgnore: 5,
expectDisconnect: 2,
allocStatus: structs.AllocClientStatusUnknown,
},

{
name: "PreventRescheduleOnLost off, MaxClientDisconnect off, Reschedule on",
maxClientDisconnect: nil,
Expand Down Expand Up @@ -991,8 +990,8 @@ func TestReconciler_LostNode_PreventRescheduleOnLost(t *testing.T) {
Attempts: 1,
},
expectPlace: 3,
expectStop: 1,
expectIgnore: 3,
expectStop: 2,
expectIgnore: 2,
expectDisconnect: 1,
allocStatus: structs.AllocClientStatusLost,
},
Expand Down Expand Up @@ -5447,7 +5446,7 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
},
},
{
name: "ignore-reconnect-completed",
name: "update-reconnect-completed",
allocCount: 2,
replace: false,
disconnectedAllocCount: 2,
Expand All @@ -5456,11 +5455,11 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
disconnectedAllocStates: disconnectAllocState,
isBatch: true,
expected: &resultExpectation{
place: 2,
place: 0,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 2,
Place: 2,
Place: 0,
},
},
},
Expand All @@ -5476,10 +5475,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
disconnectedAllocStates: disconnectAllocState,
expected: &resultExpectation{
reconnectUpdates: 2,
stop: 0,
stop: 2,
place: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Ignore: 5,
Ignore: 3,
Place: 2,
Stop: 2,
},
},
},
Expand Down Expand Up @@ -5617,13 +5619,13 @@ func TestReconciler_Disconnected_Client(t *testing.T) {
disconnectedAllocStates: disconnectAllocState,
shouldStopOnDisconnectedNode: true,
expected: &resultExpectation{
stop: 2,
stop: 4,
place: 2,
desiredTGUpdates: map[string]*structs.DesiredUpdates{
"web": {
Stop: 2,
Stop: 4,
Place: 2,
Ignore: 5,
Ignore: 3,
},
},
},
Expand Down
4 changes: 2 additions & 2 deletions scheduler/reconcile_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -293,9 +293,9 @@ func (a allocSet) filterByTainted(taintedNodes map[string]*structs.Node, serverS
}

if alloc.TerminalStatus() && !reconnect {
// Terminal allocs, if supportsDisconnectedClient and not reconnect,
// Server-terminal allocs, if supportsDisconnectedClient and not reconnect,
// are probably stopped replacements and should be ignored
if supportsDisconnectedClients {
if supportsDisconnectedClients && alloc.ServerTerminalStatus() {
ignore[alloc.ID] = alloc
continue
}
Expand Down
70 changes: 33 additions & 37 deletions scheduler/reconcile_util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@ import (
)

func TestAllocSet_filterByTainted(t *testing.T) {
ci.Parallel(t)

nodes := map[string]*structs.Node{
"draining": {
ID: "draining",
Expand Down Expand Up @@ -77,7 +75,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
},
}

type testCase struct {
testCases := []struct {
name string
all allocSet
taintedNodes map[string]*structs.Node
Expand All @@ -93,10 +91,7 @@ func TestAllocSet_filterByTainted(t *testing.T) {
reconnecting allocSet
ignore allocSet
expiring allocSet
}

testCases := []testCase{
// These two cases test that we maintain parity with pre-disconnected-clients behavior.
}{ // These two cases test that we maintain parity with pre-disconnected-clients behavior.
{
name: "lost-client",
supportsDisconnectedClients: false,
Expand Down Expand Up @@ -394,10 +389,10 @@ func TestAllocSet_filterByTainted(t *testing.T) {
taintedNodes: nodes,
skipNilNodeTest: false,
all: allocSet{
// Allocs on reconnected nodes that are complete are ignored
"ignored-reconnect-complete": {
ID: "ignored-reconnect-complete",
Name: "ignored-reconnect-complete",
// Allocs on reconnected nodes that are complete need to be updated to stop
"untainted-reconnect-complete": {
ID: "untainted-reconnect-complete",
Name: "untainted-reconnect-complete",
ClientStatus: structs.AllocClientStatusComplete,
DesiredStatus: structs.AllocDesiredStatusRun,
Job: testJob,
Expand Down Expand Up @@ -428,10 +423,10 @@ func TestAllocSet_filterByTainted(t *testing.T) {
TaskGroup: "web",
AllocStates: unknownAllocState,
},
// Replacement allocs that are complete are ignored
"ignored-reconnect-complete-replacement": {
ID: "ignored-reconnect-complete-replacement",
Name: "ignored-reconnect-complete",
// Replacement allocs that are complete need to be updated
"untainted-reconnect-complete-replacement": {
ID: "untainted-reconnect-complete-replacement",
Name: "untainted-reconnect-complete",
ClientStatus: structs.AllocClientStatusComplete,
DesiredStatus: structs.AllocDesiredStatusRun,
Job: testJob,
Expand Down Expand Up @@ -464,7 +459,29 @@ func TestAllocSet_filterByTainted(t *testing.T) {
PreviousAllocation: "untainted-reconnect-lost",
},
},
untainted: allocSet{},
untainted: allocSet{
"untainted-reconnect-complete": {
ID: "untainted-reconnect-complete",
Name: "untainted-reconnect-complete",
ClientStatus: structs.AllocClientStatusComplete,
DesiredStatus: structs.AllocDesiredStatusRun,
Job: testJob,
NodeID: "normal",
TaskGroup: "web",
AllocStates: unknownAllocState,
},
"untainted-reconnect-complete-replacement": {
ID: "untainted-reconnect-complete-replacement",
Name: "untainted-reconnect-complete",
ClientStatus: structs.AllocClientStatusComplete,
DesiredStatus: structs.AllocDesiredStatusRun,
Job: testJob,
NodeID: "normal",
TaskGroup: "web",
AllocStates: unknownAllocState,
PreviousAllocation: "untainted-reconnect-complete",
},
},
migrate: allocSet{},
disconnecting: allocSet{},
reconnecting: allocSet{
Expand All @@ -480,16 +497,6 @@ func TestAllocSet_filterByTainted(t *testing.T) {
},
},
ignore: allocSet{
"ignored-reconnect-complete": {
ID: "ignored-reconnect-complete",
Name: "ignored-reconnect-complete",
ClientStatus: structs.AllocClientStatusComplete,
DesiredStatus: structs.AllocDesiredStatusRun,
Job: testJob,
NodeID: "normal",
TaskGroup: "web",
AllocStates: unknownAllocState,
},
"ignored-reconnect-lost": {
ID: "ignored-reconnect-lost",
Name: "ignored-reconnect-lost",
Expand All @@ -500,17 +507,6 @@ func TestAllocSet_filterByTainted(t *testing.T) {
TaskGroup: "web",
AllocStates: unknownAllocState,
},
"ignored-reconnect-complete-replacement": {
ID: "ignored-reconnect-complete-replacement",
Name: "ignored-reconnect-complete",
ClientStatus: structs.AllocClientStatusComplete,
DesiredStatus: structs.AllocDesiredStatusRun,
Job: testJob,
NodeID: "normal",
TaskGroup: "web",
AllocStates: unknownAllocState,
PreviousAllocation: "untainted-reconnect-complete",
},
"ignored-reconnect-failed-replacement": {
ID: "ignored-reconnect-failed-replacement",
Name: "ignored-reconnect-failed",
Expand Down

0 comments on commit fe20def

Please sign in to comment.