-
Notifications
You must be signed in to change notification settings - Fork 4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(zbchaos): query current cluster topology #459
Changes from 1 commit
d01034d
f7c3b9f
ebd49d5
c076b40
ffd0a20
85f9a2d
7168ed4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -99,29 +99,55 @@ func waitForChange(flags *Flags) error { | |
if err != nil { | ||
return err | ||
} | ||
if topology.LastChange != nil && topology.LastChange.Id == int64(flags.changeId) { | ||
if topology.LastChange.Status == "COMPLETED" { | ||
internal.LogInfo("Change %d completed successfully", flags.changeId) | ||
return nil | ||
} else { | ||
return fmt.Errorf("change %d failed with status %s", flags.changeId, topology.LastChange.Status) | ||
} | ||
} else if topology.LastChange != nil && topology.LastChange.Id > int64(flags.changeId) { | ||
changeStatus := describeChangeStatus(topology, int64(flags.changeId)) | ||
switch changeStatus { | ||
case ChangeStatusCompleted: | ||
internal.LogInfo("Change %d completed successfully", flags.changeId) | ||
return nil | ||
case ChangeStatusFailed: | ||
internal.LogInfo("Change %d failed with status %s", flags.changeId, topology.LastChange.Status) | ||
return fmt.Errorf("change %d failed with status %s", flags.changeId, topology.LastChange.Status) | ||
case ChangeStatusOutdated: | ||
internal.LogInfo("Change %d is outdated but was most likely completed successfully, latest change is %d", flags.changeId, topology.LastChange.Id) | ||
return nil | ||
} else if topology.PendingChange != nil && topology.PendingChange.Id == int64(flags.changeId) { | ||
case ChangeStatusPending: | ||
competed := len(topology.PendingChange.Completed) | ||
pending := len(topology.PendingChange.Pending) | ||
total := competed + pending | ||
internal.LogInfo("Change %d is %s with %d/%d operations complete", flags.changeId, topology.PendingChange.Status, competed, total) | ||
} else { | ||
case ChangeStatusUnknown: | ||
internal.LogInfo("Change %d not yet started", flags.changeId) | ||
} | ||
|
||
time.Sleep(5 * time.Second) | ||
} | ||
} | ||
|
||
type ChangeStatus string | ||
|
||
const ( | ||
ChangeStatusOutdated ChangeStatus = "OUTDATED" | ||
ChangeStatusCompleted ChangeStatus = "COMPLETED" | ||
ChangeStatusFailed ChangeStatus = "FAILED" | ||
ChangeStatusPending ChangeStatus = "PENDING" | ||
ChangeStatusUnknown ChangeStatus = "UNKNOWN" | ||
) | ||
|
||
func describeChangeStatus(topology *CurrentTopology, changeId int64) ChangeStatus { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🤔 Feels like it makes sense to have this type of status enum inside the returned topology? 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Our API doesn't offer an endpoint to query "what's the status of change X". If we would offer that, the endpoint would return a similar enum as we have defined here, yeah. |
||
if topology.LastChange != nil && topology.LastChange.Id == changeId { | ||
if topology.LastChange.Status == "COMPLETED" { | ||
return ChangeStatusCompleted | ||
} else { | ||
return ChangeStatusFailed | ||
} | ||
} else if topology.LastChange != nil && topology.LastChange.Id > changeId { | ||
return ChangeStatusOutdated | ||
} else if topology.PendingChange != nil && topology.PendingChange.Id == changeId { | ||
return ChangeStatusPending | ||
} else { | ||
return ChangeStatusUnknown | ||
} | ||
} | ||
|
||
func queryTopology(port int) (*CurrentTopology, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔧 Would be great if you could add a IT for it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've added a minimal one now. None of the other commands have integration tests so I hope I did it in a way that is acceptable: ffd0a20 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
url := fmt.Sprintf("http://localhost:%d/actuator/cluster", port) | ||
resp, err := http.Get(url) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
// Copyright 2023 Camunda Services GmbH | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package cmd | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/stretchr/testify/assert" | ||
) | ||
|
||
func Test_DescribeChangeStatusWithPending(t *testing.T) { | ||
// given | ||
topology := CurrentTopology{ | ||
Version: 1, | ||
Brokers: []BrokerState{}, | ||
LastChange: &LastChange{ | ||
Id: 2, | ||
Status: "COMPLETED", | ||
StartedAt: "2021-09-01T12:00:00.000Z", | ||
CompletedAt: "2021-09-01T12:00:00.000Z", | ||
}, | ||
PendingChange: &TopologyChange{ | ||
Id: 3, | ||
Status: "IN_PROGRESS", | ||
StartedAt: "2021-09-01T12:00:00.000Z", | ||
CompletedAt: "2021-09-01T12:00:00.000Z", | ||
InternalVersion: 1, | ||
Completed: []Operation{ | ||
{ | ||
Operation: "ADD", | ||
BrokerId: 1, | ||
}, | ||
}, | ||
Pending: []Operation{ | ||
{ | ||
Operation: "ADD_BROKER", | ||
BrokerId: 2, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
// then | ||
assert.Equal(t, ChangeStatusPending, describeChangeStatus(&topology, 3)) | ||
assert.Equal(t, ChangeStatusCompleted, describeChangeStatus(&topology, 2)) | ||
assert.Equal(t, ChangeStatusOutdated, describeChangeStatus(&topology, 1)) | ||
assert.Equal(t, ChangeStatusUnknown, describeChangeStatus(&topology, 4)) | ||
} | ||
|
||
func Test_DescribeChangeStatusWithoutChanges(t *testing.T) { | ||
// given | ||
topology := CurrentTopology{ | ||
Version: 1, | ||
Brokers: []BrokerState{}, | ||
LastChange: nil, | ||
PendingChange: nil, | ||
} | ||
|
||
// then | ||
assert.Equal(t, ChangeStatusUnknown, describeChangeStatus(&topology, 1)) | ||
assert.Equal(t, ChangeStatusUnknown, describeChangeStatus(&topology, 2)) | ||
} | ||
|
||
func Test_DescribeChangeStatusWithoutCompleted(t *testing.T) { | ||
// given | ||
topology := CurrentTopology{ | ||
Version: 1, | ||
Brokers: []BrokerState{}, | ||
LastChange: nil, | ||
PendingChange: &TopologyChange{ | ||
Id: 3, | ||
Status: "IN_PROGRESS", | ||
StartedAt: "2021-09-01T12:00:00.000Z", | ||
CompletedAt: "2021-09-01T12:00:00.000Z", | ||
InternalVersion: 1, | ||
Completed: []Operation{ | ||
{ | ||
Operation: "ADD", | ||
BrokerId: 1, | ||
}, | ||
}, | ||
Pending: []Operation{ | ||
{ | ||
Operation: "ADD_BROKER", | ||
BrokerId: 2, | ||
}, | ||
}, | ||
}, | ||
} | ||
|
||
// then | ||
assert.Equal(t, ChangeStatusUnknown, describeChangeStatus(&topology, 1)) | ||
assert.Equal(t, ChangeStatusPending, describeChangeStatus(&topology, 3)) | ||
assert.Equal(t, ChangeStatusUnknown, describeChangeStatus(&topology, 4)) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🔧 I would add some maximum time out or something, this looks to me like it could potentially loop forever? I know we assume the change will eventually complete, but still... 😅
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes we should definitely fail at some point.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, good point 👍 Picking a good value is tricky though, I'd expect scaling to take anywhere from 5 minutes to >30 minutes..
Using that in an automated chaos test is problematic anyway because we'll run into job timeouts... 🤔
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
W00t 30 minutes 😆 😅
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've set a timeout of 25 minutes now