Skip to content

Commit ff769e2

Browse files
committed
[Feat] Support StormService pause rollout in upgrade
* Update stormservice golang client * Improve the test coverage * Refactor the API to support manual resume * improve the canary features * Leave e2e test to future PRs * fix lint and verify issues * Polish the canary status * Simplify the canary status fields * Final patch Signed-off-by: Jiaxin Shan <seedjeffwan@gmail.com>
1 parent 8244e51 commit ff769e2

File tree

25 files changed

+3789
-47
lines changed

25 files changed

+3789
-47
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ AIBRIX_IMAGES := $(foreach img,$(IMAGES),$(AIBRIX_CONTAINER_REGISTRY_NAMESPACE)/
1414
IMG ?= ${AIBRIX_CONTAINER_REGISTRY_NAMESPACE}/controller-manager:${IMAGE_TAG}
1515

1616
# ENVTEST_K8S_VERSION refers to the version of kubebuilder assets to be downloaded by envtest binary.
17-
ENVTEST_K8S_VERSION = 1.29.0
17+
ENVTEST_K8S_VERSION = 1.30.0
1818

1919
# Get the currently used golang install path (in GOPATH/bin, unless GOBIN is set)
2020
ifeq (,$(shell go env GOBIN))

api/orchestration/v1alpha1/stormservice_types.go

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ limitations under the License.
1717
package v1alpha1
1818

1919
import (
20+
"strconv"
21+
"time"
22+
2023
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2124
"k8s.io/apimachinery/pkg/util/intstr"
2225
)
@@ -119,6 +122,10 @@ type StormServiceStatus struct {
119122

120123
// The label selector information of the pods belonging to the StormService object.
121124
ScalingTargetSelector string `json:"scalingTargetSelector,omitempty"`
125+
126+
// CanaryStatus tracks the progress of canary deployments.
127+
// +optional
128+
CanaryStatus *CanaryStatus `json:"canaryStatus,omitempty"`
122129
}
123130

124131
// These are valid conditions of a stormService.
@@ -146,6 +153,10 @@ type StormServiceUpdateStrategy struct {
146153

147154
// +optional
148155
MaxSurge *intstr.IntOrString `json:"maxSurge,omitempty" protobuf:"bytes,2,opt,name=maxSurge"`
156+
157+
// Canary defines the canary deployment strategy for gradual rollouts.
158+
// +optional
159+
Canary *CanaryUpdateStrategy `json:"canary,omitempty"`
149160
}
150161

151162
// +enum
@@ -185,6 +196,149 @@ type StormServiceList struct {
185196
Items []StormService `json:"items"`
186197
}
187198

199+
// CanaryUpdateStrategy defines the canary deployment configuration
200+
type CanaryUpdateStrategy struct {
201+
// Steps defines the sequence of canary deployment steps
202+
Steps []CanaryStep `json:"steps,omitempty"`
203+
}
204+
205+
// CanaryStep defines a single step in the canary deployment process
206+
type CanaryStep struct {
207+
// SetWeight defines the percentage of traffic/replicas to route to the new version
208+
// +kubebuilder:validation:Minimum=0
209+
// +kubebuilder:validation:Maximum=100
210+
// +optional
211+
SetWeight *int32 `json:"setWeight,omitempty"`
212+
213+
// Pause defines a pause in the canary deployment
214+
// +optional
215+
Pause *PauseStep `json:"pause,omitempty"`
216+
}
217+
218+
// PauseStep defines pause behavior in canary deployments
219+
type PauseStep struct {
220+
// Duration specifies how long to pause
221+
// - String: "30s", "5m", etc. (parsed as time.Duration)
222+
// - Int: seconds as integer
223+
// - nil: manual pause requiring user intervention
224+
// Resume manual pause by setting duration to "0" or 0
225+
// +optional
226+
Duration *intstr.IntOrString `json:"duration,omitempty"`
227+
}
228+
229+
// DurationSeconds converts the pause duration to seconds
230+
// Returns:
231+
// - >= 0: pause duration in seconds
232+
// - 0: manual pause (nil duration) or resume (duration "0"/0)
233+
// - -1: invalid duration string
234+
func (p *PauseStep) DurationSeconds() int32 {
235+
if p.Duration == nil {
236+
return 0 // Manual pause
237+
}
238+
239+
if p.Duration.Type == intstr.String {
240+
// Try parsing as integer first
241+
if s, err := strconv.ParseInt(p.Duration.StrVal, 10, 32); err == nil {
242+
return int32(s)
243+
}
244+
// Try parsing as duration string
245+
if d, err := time.ParseDuration(p.Duration.StrVal); err == nil {
246+
return int32(d.Seconds())
247+
}
248+
return -1 // Invalid string
249+
}
250+
251+
return p.Duration.IntVal
252+
}
253+
254+
// IsManualPause returns true if this is a manual pause (nil duration)
255+
func (p *PauseStep) IsManualPause() bool {
256+
return p.Duration == nil
257+
}
258+
259+
// IsResume returns true if this represents a resume action (duration 0 or "0")
260+
func (p *PauseStep) IsResume() bool {
261+
if p.Duration == nil {
262+
return false
263+
}
264+
return p.DurationSeconds() == 0
265+
}
266+
267+
// CanaryStatus tracks the progress of a canary deployment
268+
type CanaryStatus struct {
269+
// CurrentStep is the index of the current step in the canary deployment
270+
// +optional
271+
CurrentStep int32 `json:"currentStep,omitempty"`
272+
273+
// PauseConditions indicates the reasons why the canary deployment is paused
274+
// When paused, the first pause condition's StartTime indicates when the pause began
275+
// +optional
276+
PauseConditions []PauseCondition `json:"pauseConditions,omitempty"`
277+
278+
// NOTE: Removed StableRevision and CanaryRevision fields
279+
// Use status.CurrentRevision for stable revision
280+
// Use status.UpdateRevision for canary revision
281+
282+
// Phase indicates the current phase of the canary deployment
283+
// +optional
284+
Phase CanaryPhase `json:"phase,omitempty"`
285+
286+
// NOTE: Removed CanaryReplicas and StableReplicas fields for replica mode
287+
// Use status.UpdatedReplicas for canary replica count
288+
// Calculate stable replicas as: status.Replicas - status.UpdatedReplicas
289+
290+
// RoleCanaryCounts tracks per-role canary pod counts (pooled mode)
291+
// TODO(jiaxin): use top level status instead once the separate PR is merged.
292+
// +optional
293+
RoleCanaryCounts map[string]int32 `json:"roleCanaryCounts,omitempty"`
294+
295+
// TotalCanaryPods is the total number of canary pods across all roles (pooled mode)
296+
// +optional
297+
TotalCanaryPods int32 `json:"totalCanaryPods,omitempty"`
298+
299+
// AbortedAt indicates when the canary deployment was aborted
300+
// +optional
301+
AbortedAt *metav1.Time `json:"abortedAt,omitempty"`
302+
303+
// Message provides details about the current canary state
304+
// +optional
305+
Message string `json:"message,omitempty"`
306+
}
307+
308+
// CanaryPhase represents the phase of a canary deployment
309+
// +enum
310+
type CanaryPhase string
311+
312+
const (
313+
// CanaryPhaseInitializing indicates the canary deployment is starting
314+
CanaryPhaseInitializing CanaryPhase = "Initializing"
315+
// CanaryPhaseProgressing indicates the canary deployment is progressing through steps
316+
CanaryPhaseProgressing CanaryPhase = "Progressing"
317+
// CanaryPhasePaused indicates the canary deployment is paused
318+
CanaryPhasePaused CanaryPhase = "Paused"
319+
// CanaryPhaseCompleted indicates the canary deployment has completed successfully
320+
CanaryPhaseCompleted CanaryPhase = "Completed"
321+
// CanaryPhaseAborted indicates the canary deployment was aborted/rolled back
322+
CanaryPhaseAborted CanaryPhase = "Aborted"
323+
)
324+
325+
// PauseReason represents the reason for a pause condition
326+
// +enum
327+
type PauseReason string
328+
329+
const (
330+
// PauseReasonCanaryPauseStep indicates a pause at a canary step
331+
PauseReasonCanaryPauseStep PauseReason = "CanaryPauseStep"
332+
)
333+
334+
// PauseCondition represents a pause condition in the canary deployment
335+
type PauseCondition struct {
336+
// Reason indicates why the canary deployment was paused
337+
Reason PauseReason `json:"reason"`
338+
// StartTime is when the pause condition was added
339+
StartTime metav1.Time `json:"startTime"`
340+
}
341+
188342
func init() {
189343
SchemeBuilder.Register(&StormService{}, &StormServiceList{})
190344
}

api/orchestration/v1alpha1/zz_generated.deepcopy.go

Lines changed: 126 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)