From 25f282cb76d05b0f1696f02333fb719c08cc26d0 Mon Sep 17 00:00:00 2001 From: Wang Zhenyu Date: Sun, 19 Oct 2025 21:45:04 +0800 Subject: [PATCH 1/4] feat: integration test for podAutoScaler Signed-off-by: Wang Zhenyu --- .../controller/podautoscaler_test.go | 1282 +++++++++++++++++ 1 file changed, 1282 insertions(+) create mode 100644 test/integration/controller/podautoscaler_test.go diff --git a/test/integration/controller/podautoscaler_test.go b/test/integration/controller/podautoscaler_test.go new file mode 100644 index 000000000..bb1bb0b4f --- /dev/null +++ b/test/integration/controller/podautoscaler_test.go @@ -0,0 +1,1282 @@ +/* +Copyright 2025 The Aibrix Team. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package controller + +import ( + "context" + "time" + + "github.com/onsi/ginkgo/v2" + "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + "sigs.k8s.io/controller-runtime/pkg/client" + + autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1" + orchestrationapi "github.com/vllm-project/aibrix/api/orchestration/v1alpha1" + "github.com/vllm-project/aibrix/test/utils/validation" + "github.com/vllm-project/aibrix/test/utils/wrapper" +) + +// Condition type constants from controller +const ( + ConditionReady = "Ready" + ConditionValidSpec = "ValidSpec" + ConditionConflict = "MutilPodAutoscalerConflict" + ConditionScalingActive = "ScalingActive" + ConditionAbleToScale = "AbleToScale" + + ReasonAsExpected = "AsExpected" + ReasonInvalidScalingStrategy = "InvalidScalingStrategy" + ReasonInvalidBounds = "InvalidBounds" + ReasonMissingTargetRef = "MissingScaleTargetRef" + ReasonMetricsConfigError = "MetricsConfigError" +) + +var _ = ginkgo.Describe("PodAutoscaler controller test", func() { + var ns *corev1.Namespace + + // update represents a test step: optional mutation + validation + type update struct { + updateFunc func(pa *autoscalingv1alpha1.PodAutoscaler) + checkFunc func(context.Context, client.Client, *autoscalingv1alpha1.PodAutoscaler) + } + + ginkgo.BeforeEach(func() { + ns = &corev1.Namespace{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-pa-", + }, + } + gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed()) + // Ensure namespace is fully created + gomega.Eventually(func() error { + return k8sClient.Get(ctx, client.ObjectKeyFromObject(ns), ns) + }, time.Second*3).Should(gomega.Succeed()) + }) + + ginkgo.AfterEach(func() { + gomega.Expect(k8sClient.Delete(ctx, ns)).To(gomega.Succeed()) + }) + + // testValidatingCase defines a test case with initial setup and a series of updates + type testValidatingCase struct { + makePodAutoscaler func() *autoscalingv1alpha1.PodAutoscaler + updates []*update + } + + // Helper: creates a deployment for testing + createDeployment := func(name, namespace string, replicas int32) *appsv1.Deployment { + deployment := &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + }, + Spec: appsv1.DeploymentSpec{ + Replicas: &replicas, + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": name, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + "app": name, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "nginx", + Image: "nginx:latest", + }, + }, + }, + }, + }, + } + gomega.Expect(k8sClient.Create(ctx, deployment)).To(gomega.Succeed()) + return deployment + } + + ginkgo.DescribeTable("test PodAutoscaler creation and reconciliation", + func(tc *testValidatingCase) { + pa := tc.makePodAutoscaler() + for _, upd := range tc.updates { + if upd.updateFunc != nil { + upd.updateFunc(pa) + } + + // Run validation check directly (no need to fetch if PA is deleted) + if upd.checkFunc != nil { + upd.checkFunc(ctx, k8sClient, pa) + } + } + }, + + // ========================================================================= + // HPA Strategy - Resource Lifecycle Management + // ========================================================================= + + ginkgo.Entry("HPA Strategy - Create PA → HPA Created", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-hpa-create"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create deployment first + createDeployment("test-deployment", ns.Name, 2) + // Create PodAutoscaler + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Validate HPA is created + hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + gomega.Expect(hpa).ToNot(gomega.BeNil()) + + // Validate HPA OwnerReference + validation.ValidateHPAOwnerReference(hpa, pa.Name, "PodAutoscaler") + + // Validate HPA Spec + validation.ValidateHPASpec(hpa, 1, 5) + validation.ValidateHPAScaleTargetRef(hpa, "Deployment", "test-deployment") + }, + }, + }, + }, + ), + + ginkgo.Entry("HPA Strategy - Update PA → HPA Synced", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-hpa-update"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-2"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("test-deployment-2", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Wait for HPA creation + validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + }, + }, + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Update PodAutoscaler spec + time.Sleep(time.Second * 3) // Wait for initial reconcile + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + minReplicas := int32(2) + fetched.Spec.MinReplicas = &minReplicas + fetched.Spec.MaxReplicas = 10 + gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed()) + time.Sleep(time.Second * 3) // Wait for update to propagate + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Validate HPA is updated with more relaxed timing + gomega.Eventually(func(g gomega.Gomega) { + hpa := validation.GetHPA(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + validation.ValidateHPASpec(hpa, 2, 10) + }, time.Second*15, time.Second*1).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + ginkgo.Entry("HPA Strategy - Delete PA → HPA Deleted (Cascade)", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-hpa-delete"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-3"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("test-deployment-3", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Wait for HPA creation + validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + }, + }, + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Wait for initial reconcile + time.Sleep(time.Second * 3) + // Delete PodAutoscaler + gomega.Expect(k8sClient.Delete(ctx, pa)).To(gomega.Succeed()) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Validate PA is deleted + validation.WaitForPodAutoscalerDeleted(ctx, k8sClient, pa) + // Note: In envtest, HPA cascade deletion via OwnerReference doesn't work + // because garbage collector controller is not running. In real K8s, + // the HPA would be automatically deleted due to OwnerReference. + // We already verified OwnerReference is set correctly in the creation test. + }, + }, + }, + }, + ), + + // ========================================================================= + // Spec Validation Logic + // ========================================================================= + + ginkgo.Entry("Spec Validation - Invalid ScaleTargetRef (empty name)", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-invalid-ref"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", ""). // Empty name + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + // Wait for controller to reconcile + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Validate ValidSpec condition is False + validation.WaitForPodAutoscalerConditionWithReason( + ctx, k8sClient, pa, + ConditionValidSpec, metav1.ConditionFalse, + ReasonMissingTargetRef, + ) + }, + }, + }, + }, + ), + + ginkgo.Entry("Spec Validation - Invalid Replica Bounds (min > max)", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-invalid-bounds"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(5). // min > max + MaxReplicas(3). + ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-4"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("test-deployment-4", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + // Wait for controller to reconcile + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Validate ValidSpec condition is False + validation.WaitForPodAutoscalerConditionWithReason( + ctx, k8sClient, pa, + ConditionValidSpec, metav1.ConditionFalse, + ReasonInvalidBounds, + ) + }, + }, + }, + }, + ), + + // Note: Invalid ScalingStrategy test is skipped because CRD-level validation + // prevents invalid values from being created in the first place. + + // Note: Empty MetricsSources test is skipped because CRD-level validation + // prevents empty metricsSources from being created (minItems=1). + + ginkgo.Entry("Spec Validation - Valid Spec", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-valid-spec"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-7"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("test-deployment-7", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + // Wait for controller to reconcile + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Validate ValidSpec condition is True + validation.WaitForPodAutoscalerConditionWithReason( + ctx, k8sClient, pa, + ConditionValidSpec, metav1.ConditionTrue, + ReasonAsExpected, + ) + }, + }, + }, + }, + ), + + // ========================================================================= + // Conflict Detection Mechanism + // ========================================================================= + + ginkgo.Entry("Conflict Detection - Two PAs target same Deployment", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-conflict-1"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "shared-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create deployment + createDeployment("shared-deployment", ns.Name, 2) + // Create first PA + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // PA1 should not have conflict + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + validation.ValidatePodAutoscalerConditionNotExists(fetched, ConditionConflict) + }, + }, + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create second PA targeting the same deployment + pa2 := wrapper.MakePodAutoscaler("pa-conflict-2"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(10). + ScaleTargetRefWithKind("Deployment", "apps/v1", "shared-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + gomega.Expect(k8sClient.Create(ctx, pa2)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // PA2 should have conflict condition with Status=False (conflict detected) + pa2 := &autoscalingv1alpha1.PodAutoscaler{} + gomega.Eventually(func(g gomega.Gomega) { + err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-conflict-2"}, pa2) + g.Expect(err).ToNot(gomega.HaveOccurred()) + validation.ValidatePodAutoscalerConditionExists(pa2, ConditionConflict) + // When there's a conflict, Status=False (conflict exists) + validation.ValidatePodAutoscalerCondition(pa2, ConditionConflict, metav1.ConditionFalse, "") + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + ginkgo.Entry("Conflict Resolution - Delete first PA", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-resolve-1"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "resolve-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create deployment and two PAs + createDeployment("resolve-deployment", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + + pa2 := wrapper.MakePodAutoscaler("pa-resolve-2"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(10). + ScaleTargetRefWithKind("Deployment", "apps/v1", "resolve-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + gomega.Expect(k8sClient.Create(ctx, pa2)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify PA2 has conflict + pa2 := &autoscalingv1alpha1.PodAutoscaler{} + err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + validation.ValidatePodAutoscalerConditionExists(pa2, ConditionConflict) + }, + }, + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Delete first PA + gomega.Expect(k8sClient.Delete(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + + // Manually trigger PA2 reconcile by updating it (no-op update) + // This forces the controller to re-check the conflict status + pa2 := &autoscalingv1alpha1.PodAutoscaler{} + err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Add an annotation to trigger reconcile + if pa2.Annotations == nil { + pa2.Annotations = make(map[string]string) + } + pa2.Annotations["test.aibrix.ai/force-reconcile"] = time.Now().Format(time.RFC3339) + gomega.Expect(k8sClient.Update(ctx, pa2)).To(gomega.Succeed()) + time.Sleep(time.Second * 3) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // PA2 conflict should be resolved - condition should be removed + pa2 := &autoscalingv1alpha1.PodAutoscaler{} + gomega.Eventually(func(g gomega.Gomega) { + err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2) + g.Expect(err).ToNot(gomega.HaveOccurred()) + // After conflict resolution, the conflict condition should be removed + validation.ValidatePodAutoscalerConditionNotExists(pa2, ConditionConflict) + }, time.Second*15, time.Second*1).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + // ========================================================================= + // Status and Condition Management + // ========================================================================= + + ginkgo.Entry("Status Management - DesiredScale and ActualScale", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-status-scale"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "status-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create deployment with 2 replicas + createDeployment("status-deployment", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify status is updated + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + // ActualScale should reflect deployment replicas + g.Expect(fetched.Status.ActualScale).To(gomega.BeNumerically(">=", 0)) + // DesiredScale should be set + g.Expect(fetched.Status.DesiredScale).To(gomega.BeNumerically(">=", 0)) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + ginkgo.Entry("Condition Management - AbleToScale", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-condition-able"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "condition-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("condition-deployment", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify AbleToScale condition exists and is True + validation.WaitForPodAutoscalerCondition( + ctx, k8sClient, pa, + ConditionAbleToScale, metav1.ConditionTrue, + ) + }, + }, + }, + }, + ), + + ginkgo.Entry("Condition Management - Ready condition transitions", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-condition-ready"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "ready-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("ready-deployment", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify all basic conditions exist + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + validation.ValidatePodAutoscalerConditionExists(fetched, ConditionReady) + validation.ValidatePodAutoscalerConditionExists(fetched, ConditionValidSpec) + validation.ValidatePodAutoscalerConditionExists(fetched, ConditionAbleToScale) + validation.ValidatePodAutoscalerConditionExists(fetched, ConditionScalingActive) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + // ========================================================================= + // Scale Target Management + // ========================================================================= + + ginkgo.Entry("Scale Target - Deployment scaling", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-scale-deployment"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(10). + ScaleTargetRefWithKind("Deployment", "apps/v1", "scale-test-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create deployment + createDeployment("scale-test-deployment", ns.Name, 3) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify PA can get current replicas from deployment + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + // ActualScale should reflect deployment's replicas + g.Expect(fetched.Status.ActualScale).To(gomega.BeNumerically(">=", 0)) + // For HPA strategy, HPA should be created + validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + ginkgo.Entry("Scale Target - Target Resource Not Found", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-target-notfound"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "nonexistent-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Don't create the deployment - test missing target + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Should not crash, ValidSpec should be True (spec itself is valid) + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + // Spec validation should pass (the spec is syntactically correct) + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + // Controller handles missing target gracefully + // HPA will be created even if target doesn't exist (K8s HPA behavior) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + // ========================================================================= + // Boundary Enforcement + // ========================================================================= + + ginkgo.Entry("Boundary Enforcement - maxReplicas enforced in HPA", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-boundary-max"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create deployment with many replicas + createDeployment("boundary-deployment", ns.Name, 8) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // HPA should be created with maxReplicas=5 + hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + validation.ValidateHPASpec(hpa, 1, 5) + // HPA will enforce the max boundary + }, + }, + }, + }, + ), + + ginkgo.Entry("Boundary Enforcement - minReplicas enforced in HPA", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-boundary-min"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(3). + MaxReplicas(10). + ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-min-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create deployment with few replicas + createDeployment("boundary-min-deployment", ns.Name, 1) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // HPA should be created with minReplicas=3 + hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + validation.ValidateHPASpec(hpa, 3, 10) + }, + }, + }, + }, + ), + + ginkgo.Entry("Boundary Enforcement - minReplicas=0 in PA spec", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-boundary-zero"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(0). // Set minReplicas=0 in PA + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-zero-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("boundary-zero-deployment", ns.Name, 1) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify PA spec has minReplicas=0 + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + gomega.Expect(fetched.Spec.MinReplicas).ToNot(gomega.BeNil()) + gomega.Expect(*fetched.Spec.MinReplicas).To(gomega.Equal(int32(0))) + // HPA will not have minReplicas set (uses default 1) when PA minReplicas=0 + // This is controller design: only sets HPA minReplicas when PA minReplicas > 0 + hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + // HPA minReplicas will be nil or 1 (K8s default) + if hpa.Spec.MinReplicas != nil { + gomega.Expect(*hpa.Spec.MinReplicas).To(gomega.BeNumerically(">=", 1)) + } + }, + }, + }, + }, + ), + + // ========================================================================= + // Scaling History Management + // ========================================================================= + + ginkgo.Entry("ScalingHistory - Basic history tracking in HPA mode", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-history"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(10). + ScaleTargetRefWithKind("Deployment", "apps/v1", "history-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("history-deployment", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 3) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify PA is created + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + // In HPA mode, ScalingHistory is managed by HPA and may be empty + // Just verify the field exists and is within limits + if fetched.Status.ScalingHistory != nil { + // maxScalingHistorySize = 5 + gomega.Expect(len(fetched.Status.ScalingHistory)).To(gomega.BeNumerically("<=", 5)) + } + // Main validation: PA has valid conditions + validation.ValidatePodAutoscalerConditionExists(fetched, ConditionValidSpec) + }, + }, + }, + }, + ), + + // ========================================================================= + // StormService Scaling + // ========================================================================= + + ginkgo.Entry("StormService Scaling - Replica Mode (scale entire StormService)", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-ss-replica"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(10). + ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create StormService with 2 roles + matchLabel := map[string]string{"app": "test-vllm"} + podTemplate := corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: matchLabel, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "vllm-container", + Image: "vllm/vllm-openai:latest", + }, + }, + }, + } + roleSetSpec := &orchestrationapi.RoleSetSpec{ + Roles: []orchestrationapi.RoleSpec{ + { + Name: "prefill", + Replicas: ptr.To(int32(2)), + Template: podTemplate, + Stateful: false, + }, + { + Name: "decode", + Replicas: ptr.To(int32(1)), + Template: podTemplate, + Stateful: false, + }, + }, + } + ss := wrapper.MakeStormService("test-stormservice"). + Namespace(ns.Name). + Replicas(ptr.To(int32(2))). + Selector(metav1.SetAsLabelSelector(matchLabel)). + UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType). + RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec). + Obj() + gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed()) + + // Create PA + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 3) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify PA is created and HPA is created + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + // ValidSpec should be True + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + // HPA should be created for StormService + validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + }, time.Second*15, time.Millisecond*500).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + ginkgo.Entry("StormService Scaling - Role-Level with SubTargetSelector", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-ss-role"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.KPA). + MinReplicas(1). + MaxReplicas(10). + ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-role"). + SubTargetSelector("prefill"). // Only scale "prefill" role + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create StormService with prefill and decode roles + matchLabel := map[string]string{"app": "test-vllm-role"} + podTemplate := corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: matchLabel, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "vllm-container", + Image: "vllm/vllm-openai:latest", + }, + }, + }, + } + roleSetSpec := &orchestrationapi.RoleSetSpec{ + Roles: []orchestrationapi.RoleSpec{ + { + Name: "prefill", + Replicas: ptr.To(int32(3)), + Template: podTemplate, + Stateful: false, + }, + { + Name: "decode", + Replicas: ptr.To(int32(2)), + Template: podTemplate, + Stateful: false, + }, + }, + } + ss := wrapper.MakeStormService("test-stormservice-role"). + Namespace(ns.Name). + Replicas(ptr.To(int32(2))). + Selector(metav1.SetAsLabelSelector(matchLabel)). + UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType). + RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec). + Obj() + gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed()) + + // Create PA targeting only "prefill" role + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 3) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify PA is created with role-level targeting + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + // ValidSpec should be True + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + // SubTargetSelector should be set + g.Expect(fetched.Spec.SubTargetSelector).ToNot(gomega.BeNil()) + g.Expect(fetched.Spec.SubTargetSelector.RoleName).To(gomega.Equal("prefill")) + // AbleToScale should eventually be True + validation.ValidatePodAutoscalerConditionExists(fetched, ConditionAbleToScale) + }, time.Second*15, time.Millisecond*500).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + ginkgo.Entry("StormService Scaling - Role-Level Conflict Detection", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + // This will be PA2 (created second) + return wrapper.MakePodAutoscaler("pa-ss-conflict-2"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.KPA). + MinReplicas(1). + MaxReplicas(10). + ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-conflict"). + SubTargetSelector("prefill"). // Same role as PA1 + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Create StormService + matchLabel := map[string]string{"app": "test-vllm-conflict"} + podTemplate := corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: matchLabel, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "vllm-container", + Image: "vllm/vllm-openai:latest", + }, + }, + }, + } + roleSetSpec := &orchestrationapi.RoleSetSpec{ + Roles: []orchestrationapi.RoleSpec{ + { + Name: "prefill", + Replicas: ptr.To(int32(3)), + Template: podTemplate, + Stateful: false, + }, + { + Name: "decode", + Replicas: ptr.To(int32(2)), + Template: podTemplate, + Stateful: false, + }, + }, + } + ss := wrapper.MakeStormService("test-stormservice-conflict"). + Namespace(ns.Name). + Replicas(ptr.To(int32(2))). + Selector(metav1.SetAsLabelSelector(matchLabel)). + UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType). + RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec). + Obj() + gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed()) + + // Create PA1 first (targeting same SS and same role) + pa1 := wrapper.MakePodAutoscaler("pa-ss-conflict-1"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.KPA). + MinReplicas(1). + MaxReplicas(10). + ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-conflict"). + SubTargetSelector("prefill"). // Same role + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). + Obj() + gomega.Expect(k8sClient.Create(ctx, pa1)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + + // Create PA2 (should have conflict) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 3) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify PA2 has conflict condition + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + // Should have Conflict condition with Status=False (meaning conflict exists) + validation.ValidatePodAutoscalerCondition(fetched, ConditionConflict, metav1.ConditionFalse, "") + // AbleToScale should be False due to conflict + validation.ValidatePodAutoscalerCondition(fetched, ConditionAbleToScale, metav1.ConditionFalse, "") + }, time.Second*15, time.Millisecond*500).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + // ========================================================================= + // Annotation-Based Configuration + // ========================================================================= + + ginkgo.Entry("Annotation - Scale up cooldown annotation", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + annotations := map[string]string{ + "kpa.autoscaling.aibrix.ai/scale-up-cooldown": "30s", + } + return wrapper.MakePodAutoscaler("pa-cooldown"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.KPA). + MinReplicas(1). + MaxReplicas(10). + Annotations(annotations). + ScaleTargetRefWithKind("Deployment", "apps/v1", "cooldown-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("cooldown-deployment", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify PA is created with cooldown annotation + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + gomega.Expect(fetched.Annotations).To(gomega.HaveKey("kpa.autoscaling.aibrix.ai/scale-up-cooldown")) + gomega.Expect(fetched.Annotations["kpa.autoscaling.aibrix.ai/scale-up-cooldown"]).To(gomega.Equal("30s")) + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + }, + }, + }, + }, + ), + + ginkgo.Entry("Annotation - Scale down delay annotation", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + annotations := map[string]string{ + "kpa.autoscaling.aibrix.ai/scale-down-delay": "3m", + } + return wrapper.MakePodAutoscaler("pa-scale-down-delay"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.KPA). + MinReplicas(1). + MaxReplicas(10). + Annotations(annotations). + ScaleTargetRefWithKind("Deployment", "apps/v1", "delay-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("delay-deployment", ns.Name, 5) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify annotation is preserved + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + gomega.Expect(fetched.Annotations).To(gomega.HaveKey("kpa.autoscaling.aibrix.ai/scale-down-delay")) + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + }, + }, + }, + }, + ), + + ginkgo.Entry("Annotation - Multiple KPA annotations", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + annotations := map[string]string{ + "kpa.autoscaling.aibrix.ai/panic-threshold": "200", + "kpa.autoscaling.aibrix.ai/panic-window": "10s", + "kpa.autoscaling.aibrix.ai/stable-window": "60s", + "kpa.autoscaling.aibrix.ai/scale-up-cooldown": "30s", + "kpa.autoscaling.aibrix.ai/scale-down-delay": "180s", + "kpa.autoscaling.aibrix.ai/tolerance": "0.1", + } + return wrapper.MakePodAutoscaler("pa-annotations"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.KPA). + MinReplicas(1). + MaxReplicas(10). + Annotations(annotations). + ScaleTargetRefWithKind("Deployment", "apps/v1", "annotations-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("annotations-deployment", ns.Name, 3) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify all annotations are preserved + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + gomega.Expect(fetched.Annotations).To(gomega.HaveKey("kpa.autoscaling.aibrix.ai/panic-threshold")) + gomega.Expect(fetched.Annotations["kpa.autoscaling.aibrix.ai/panic-threshold"]).To(gomega.Equal("200")) + gomega.Expect(fetched.Annotations).To(gomega.HaveKey("kpa.autoscaling.aibrix.ai/tolerance")) + gomega.Expect(fetched.Annotations["kpa.autoscaling.aibrix.ai/tolerance"]).To(gomega.Equal("0.1")) + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + }, + }, + }, + }, + ), + + // ========================================================================= + // Advanced Scenarios + // ========================================================================= + + ginkgo.Entry("Advanced - Update PA spec and verify reconciliation", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-update-spec"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.KPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "update-spec-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "cpu_usage", "0.7")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("update-spec-deployment", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + gomega.Expect(fetched.Spec.MaxReplicas).To(gomega.Equal(int32(5))) + }, + }, + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Update spec: change maxReplicas + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + fetched.Spec.MaxReplicas = 10 + gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Verify update is applied and reconciled + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + g.Expect(fetched.Spec.MaxReplicas).To(gomega.Equal(int32(10))) + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + + ginkgo.Entry("Advanced - Multiple rapid updates to spec", + &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler("pa-rapid-updates"). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(1). + MaxReplicas(5). + ScaleTargetRefWithKind("Deployment", "apps/v1", "rapid-deployment"). + MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment("rapid-deployment", ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + validation.ValidatePodAutoscalerConditionExists(validation.GetPodAutoscaler(ctx, k8sClient, pa), ConditionValidSpec) + }, + }, + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + // Rapid updates: change maxReplicas multiple times + for i := 0; i < 3; i++ { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + fetched.Spec.MaxReplicas = int32(5 + i*2) + gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed()) + time.Sleep(time.Millisecond * 500) + } + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { + // Eventually consistent: final maxReplicas should be 9 (5 + 2*2) + gomega.Eventually(func(g gomega.Gomega) { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + g.Expect(fetched.Spec.MaxReplicas).To(gomega.Equal(int32(9))) + validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "") + }, time.Second*15, time.Second*1).Should(gomega.Succeed()) + }, + }, + }, + }, + ), + ) +}) From e91c393e7934850f569c3df194934c7ce9abc075 Mon Sep 17 00:00:00 2001 From: Wang Zhenyu Date: Sun, 19 Oct 2025 21:45:37 +0800 Subject: [PATCH 2/4] feat: integration test for podAutoScaler Signed-off-by: Wang Zhenyu --- test/utils/validation/hpa.go | 114 +++++++++++++ test/utils/validation/podautoscaler.go | 215 +++++++++++++++++++++++++ test/utils/wrapper/podautoscaler.go | 170 +++++++++++++++++++ 3 files changed, 499 insertions(+) create mode 100644 test/utils/validation/hpa.go create mode 100644 test/utils/validation/podautoscaler.go create mode 100644 test/utils/wrapper/podautoscaler.go diff --git a/test/utils/validation/hpa.go b/test/utils/validation/hpa.go new file mode 100644 index 000000000..afa4f4021 --- /dev/null +++ b/test/utils/validation/hpa.go @@ -0,0 +1,114 @@ +/* +Copyright 2025 The Aibrix Team. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package validation + +import ( + "context" + "time" + + "github.com/onsi/gomega" + autoscalingv2 "k8s.io/api/autoscaling/v2" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// WaitForHPACreated waits for an HPA to be created and returns it. +func WaitForHPACreated(ctx context.Context, + k8sClient client.Client, + namespace, name string) *autoscalingv2.HorizontalPodAutoscaler { + + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + gomega.Eventually(func(g gomega.Gomega) { + err := k8sClient.Get(ctx, types.NamespacedName{ + Namespace: namespace, + Name: name, + }, hpa) + g.Expect(err).ToNot(gomega.HaveOccurred()) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) + + return hpa +} + +// WaitForHPADeleted waits for an HPA to be deleted. +func WaitForHPADeleted(ctx context.Context, + k8sClient client.Client, + namespace, name string) { + + gomega.Eventually(func(g gomega.Gomega) { + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + err := k8sClient.Get(ctx, types.NamespacedName{ + Namespace: namespace, + Name: name, + }, hpa) + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(client.IgnoreNotFound(err)).To(gomega.Succeed()) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) +} + +// ValidateHPAOwnerReference validates the HPA's OwnerReference. +func ValidateHPAOwnerReference(hpa *autoscalingv2.HorizontalPodAutoscaler, + expectedOwnerName string, + expectedOwnerKind string) { + + gomega.Expect(hpa.OwnerReferences).To(gomega.HaveLen(1), + "HPA should have exactly one owner reference") + + ownerRef := hpa.OwnerReferences[0] + gomega.Expect(ownerRef.Name).To(gomega.Equal(expectedOwnerName), + "Owner name should be %s", expectedOwnerName) + gomega.Expect(ownerRef.Kind).To(gomega.Equal(expectedOwnerKind), + "Owner kind should be %s", expectedOwnerKind) + gomega.Expect(ownerRef.Controller).ToNot(gomega.BeNil(), + "Controller field should not be nil") + gomega.Expect(*ownerRef.Controller).To(gomega.BeTrue(), + "Controller field should be true") +} + +// ValidateHPASpec validates the HPA spec fields. +func ValidateHPASpec(hpa *autoscalingv2.HorizontalPodAutoscaler, + expectedMinReplicas, expectedMaxReplicas int32) { + + gomega.Expect(hpa.Spec.MinReplicas).ToNot(gomega.BeNil(), + "HPA MinReplicas should not be nil") + gomega.Expect(*hpa.Spec.MinReplicas).To(gomega.Equal(expectedMinReplicas), + "HPA MinReplicas should be %d", expectedMinReplicas) + gomega.Expect(hpa.Spec.MaxReplicas).To(gomega.Equal(expectedMaxReplicas), + "HPA MaxReplicas should be %d", expectedMaxReplicas) +} + +// ValidateHPAScaleTargetRef validates the HPA's scale target reference. +func ValidateHPAScaleTargetRef(hpa *autoscalingv2.HorizontalPodAutoscaler, + expectedKind, expectedName string) { + + gomega.Expect(hpa.Spec.ScaleTargetRef.Kind).To(gomega.Equal(expectedKind), + "HPA ScaleTargetRef kind should be %s", expectedKind) + gomega.Expect(hpa.Spec.ScaleTargetRef.Name).To(gomega.Equal(expectedName), + "HPA ScaleTargetRef name should be %s", expectedName) +} + +// GetHPA fetches an HPA by namespace and name. +func GetHPA(ctx context.Context, + k8sClient client.Client, + namespace, name string) *autoscalingv2.HorizontalPodAutoscaler { + + hpa := &autoscalingv2.HorizontalPodAutoscaler{} + gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{ + Namespace: namespace, + Name: name, + }, hpa)).To(gomega.Succeed()) + return hpa +} diff --git a/test/utils/validation/podautoscaler.go b/test/utils/validation/podautoscaler.go new file mode 100644 index 000000000..7aee5d17a --- /dev/null +++ b/test/utils/validation/podautoscaler.go @@ -0,0 +1,215 @@ +/* +Copyright 2025 The Aibrix Team. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package validation + +import ( + "context" + "time" + + "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + + autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1" +) + +// ValidatePodAutoscalerSpec validates the spec fields of a PodAutoscaler. +func ValidatePodAutoscalerSpec(pa *autoscalingv1alpha1.PodAutoscaler, + expectedMin, expectedMax int32, + expectedStrategy autoscalingv1alpha1.ScalingStrategyType) { + + gomega.Expect(pa.Spec.MinReplicas).ToNot(gomega.BeNil(), "MinReplicas should not be nil") + gomega.Expect(*pa.Spec.MinReplicas).To(gomega.Equal(expectedMin), "MinReplicas should match expected value") + gomega.Expect(pa.Spec.MaxReplicas).To(gomega.Equal(expectedMax), "MaxReplicas should match expected value") + gomega.Expect(pa.Spec.ScalingStrategy).To(gomega.Equal(expectedStrategy), "ScalingStrategy should match expected value") +} + +// ValidatePodAutoscalerCondition validates a specific condition in a PodAutoscaler. +func ValidatePodAutoscalerCondition(pa *autoscalingv1alpha1.PodAutoscaler, + conditionType string, + expectedStatus metav1.ConditionStatus, + expectedReason string) { + + var found *metav1.Condition + for i := range pa.Status.Conditions { + if pa.Status.Conditions[i].Type == conditionType { + found = &pa.Status.Conditions[i] + break + } + } + + gomega.Expect(found).ToNot(gomega.BeNil(), + "condition %s should exist", conditionType) + gomega.Expect(found.Status).To(gomega.Equal(expectedStatus), + "condition %s status should be %s", conditionType, expectedStatus) + if expectedReason != "" { + gomega.Expect(found.Reason).To(gomega.Equal(expectedReason), + "condition %s reason should be %s", conditionType, expectedReason) + } +} + +// ValidatePodAutoscalerConditionExists validates that a condition exists. +func ValidatePodAutoscalerConditionExists(pa *autoscalingv1alpha1.PodAutoscaler, + conditionType string) { + + var found *metav1.Condition + for i := range pa.Status.Conditions { + if pa.Status.Conditions[i].Type == conditionType { + found = &pa.Status.Conditions[i] + break + } + } + + gomega.Expect(found).ToNot(gomega.BeNil(), + "condition %s should exist", conditionType) +} + +// ValidatePodAutoscalerConditionNotExists validates that a condition does not exist. +func ValidatePodAutoscalerConditionNotExists(pa *autoscalingv1alpha1.PodAutoscaler, + conditionType string) { + + var found *metav1.Condition + for i := range pa.Status.Conditions { + if pa.Status.Conditions[i].Type == conditionType { + found = &pa.Status.Conditions[i] + break + } + } + + gomega.Expect(found).To(gomega.BeNil(), + "condition %s should not exist", conditionType) +} + +// ValidatePodAutoscalerScaling validates the scaling status (desiredScale and actualScale). +func ValidatePodAutoscalerScaling(ctx context.Context, + k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler, + expectedDesired, expectedActual int32) { + + gomega.Eventually(func(g gomega.Gomega) { + fetched := &autoscalingv1alpha1.PodAutoscaler{} + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + g.Expect(fetched.Status.DesiredScale).To(gomega.Equal(expectedDesired), + "DesiredScale should be %d", expectedDesired) + g.Expect(fetched.Status.ActualScale).To(gomega.Equal(expectedActual), + "ActualScale should be %d", expectedActual) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) +} + +// ValidatePodAutoscalerScalingEventually validates scaling status and waits for eventual consistency. +func ValidatePodAutoscalerScalingEventually(ctx context.Context, + k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler, + expectedDesired, expectedActual int32) { + + gomega.Eventually(func(g gomega.Gomega) { + fetched := &autoscalingv1alpha1.PodAutoscaler{} + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + g.Expect(fetched.Status.DesiredScale).To(gomega.Equal(expectedDesired), + "DesiredScale should be %d", expectedDesired) + g.Expect(fetched.Status.ActualScale).To(gomega.Equal(expectedActual), + "ActualScale should be %d", expectedActual) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) +} + +// ValidateScalingHistory validates the scalingHistory in status. +func ValidateScalingHistory(pa *autoscalingv1alpha1.PodAutoscaler, + expectedCount int, + checkLatest func(autoscalingv1alpha1.ScalingDecision)) { + + gomega.Expect(len(pa.Status.ScalingHistory)).To(gomega.Equal(expectedCount), + "ScalingHistory should have %d entries", expectedCount) + + if checkLatest != nil && len(pa.Status.ScalingHistory) > 0 { + latest := pa.Status.ScalingHistory[len(pa.Status.ScalingHistory)-1] + checkLatest(latest) + } +} + +// WaitForPodAutoscalerCondition waits for a specific condition to reach expected status. +func WaitForPodAutoscalerCondition(ctx context.Context, + k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler, + conditionType string, + expectedStatus metav1.ConditionStatus) { + + gomega.Eventually(func(g gomega.Gomega) { + fetched := &autoscalingv1alpha1.PodAutoscaler{} + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + ValidatePodAutoscalerCondition(fetched, conditionType, expectedStatus, "") + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) +} + +// WaitForPodAutoscalerConditionWithReason waits for a condition with specific reason. +func WaitForPodAutoscalerConditionWithReason(ctx context.Context, + k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler, + conditionType string, + expectedStatus metav1.ConditionStatus, + expectedReason string) { + + gomega.Eventually(func(g gomega.Gomega) { + fetched := &autoscalingv1alpha1.PodAutoscaler{} + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched) + g.Expect(err).ToNot(gomega.HaveOccurred()) + + ValidatePodAutoscalerCondition(fetched, conditionType, expectedStatus, expectedReason) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) +} + +// WaitForPodAutoscalerCreated waits for PodAutoscaler to be created and returns the latest version. +func WaitForPodAutoscalerCreated(ctx context.Context, + k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler) *autoscalingv1alpha1.PodAutoscaler { + + fetched := &autoscalingv1alpha1.PodAutoscaler{} + gomega.Eventually(func(g gomega.Gomega) { + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched) + g.Expect(err).ToNot(gomega.HaveOccurred()) + }, time.Second*5, time.Millisecond*250).Should(gomega.Succeed()) + + return fetched +} + +// WaitForPodAutoscalerDeleted waits for PodAutoscaler to be deleted. +func WaitForPodAutoscalerDeleted(ctx context.Context, + k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler) { + + gomega.Eventually(func(g gomega.Gomega) { + fetched := &autoscalingv1alpha1.PodAutoscaler{} + err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched) + g.Expect(err).To(gomega.HaveOccurred()) + g.Expect(client.IgnoreNotFound(err)).To(gomega.Succeed()) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) +} + +// GetPodAutoscaler fetches the latest version of a PodAutoscaler. +func GetPodAutoscaler(ctx context.Context, + k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler) *autoscalingv1alpha1.PodAutoscaler { + + fetched := &autoscalingv1alpha1.PodAutoscaler{} + gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)).To(gomega.Succeed()) + return fetched +} diff --git a/test/utils/wrapper/podautoscaler.go b/test/utils/wrapper/podautoscaler.go new file mode 100644 index 000000000..1f64ef832 --- /dev/null +++ b/test/utils/wrapper/podautoscaler.go @@ -0,0 +1,170 @@ +/* +Copyright 2025 The Aibrix Team. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package wrapper + +import ( + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1" +) + +// PodAutoscalerWrapper wraps PodAutoscaler to provide a fluent API for test construction. +type PodAutoscalerWrapper struct { + autoscalingv1alpha1.PodAutoscaler +} + +// MakePodAutoscaler creates a new PodAutoscalerWrapper with the given name. +func MakePodAutoscaler(name string) *PodAutoscalerWrapper { + return &PodAutoscalerWrapper{ + PodAutoscaler: autoscalingv1alpha1.PodAutoscaler{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + }, + }, + } +} + +// Obj returns the pointer to the underlying PodAutoscaler object. +func (w *PodAutoscalerWrapper) Obj() *autoscalingv1alpha1.PodAutoscaler { + return &w.PodAutoscaler +} + +// Namespace sets the namespace of the PodAutoscaler. +func (w *PodAutoscalerWrapper) Namespace(namespace string) *PodAutoscalerWrapper { + w.PodAutoscaler.Namespace = namespace + return w +} + +// ScalingStrategy sets the scaling strategy (HPA, KPA, or APA). +func (w *PodAutoscalerWrapper) ScalingStrategy(strategy autoscalingv1alpha1.ScalingStrategyType) *PodAutoscalerWrapper { + w.Spec.ScalingStrategy = strategy + return w +} + +// MinReplicas sets the minimum number of replicas. +func (w *PodAutoscalerWrapper) MinReplicas(min int32) *PodAutoscalerWrapper { + w.Spec.MinReplicas = &min + return w +} + +// MaxReplicas sets the maximum number of replicas. +func (w *PodAutoscalerWrapper) MaxReplicas(max int32) *PodAutoscalerWrapper { + w.Spec.MaxReplicas = max + return w +} + +// ScaleTargetRef sets the target resource to scale. +func (w *PodAutoscalerWrapper) ScaleTargetRef(ref corev1.ObjectReference) *PodAutoscalerWrapper { + w.Spec.ScaleTargetRef = ref + return w +} + +// ScaleTargetRefWithKind sets the target resource with kind, apiVersion, and name. +func (w *PodAutoscalerWrapper) ScaleTargetRefWithKind(kind, apiVersion, name string) *PodAutoscalerWrapper { + w.Spec.ScaleTargetRef = corev1.ObjectReference{ + Kind: kind, + APIVersion: apiVersion, + Name: name, + } + return w +} + +// SubTargetSelector sets the sub-target selector (e.g., for role-level scaling). +func (w *PodAutoscalerWrapper) SubTargetSelector(roleName string) *PodAutoscalerWrapper { + w.Spec.SubTargetSelector = &autoscalingv1alpha1.SubTargetSelector{ + RoleName: roleName, + } + return w +} + +// MetricSource sets a single metric source (replaces any existing). +func (w *PodAutoscalerWrapper) MetricSource(source autoscalingv1alpha1.MetricSource) *PodAutoscalerWrapper { + w.Spec.MetricsSources = []autoscalingv1alpha1.MetricSource{source} + return w +} + +// AddMetricSource adds a metric source to the list. +func (w *PodAutoscalerWrapper) AddMetricSource(source autoscalingv1alpha1.MetricSource) *PodAutoscalerWrapper { + w.Spec.MetricsSources = append(w.Spec.MetricsSources, source) + return w +} + +// Annotations sets annotations on the PodAutoscaler. +func (w *PodAutoscalerWrapper) Annotations(annotations map[string]string) *PodAutoscalerWrapper { + if w.PodAutoscaler.Annotations == nil { + w.PodAutoscaler.Annotations = make(map[string]string) + } + for k, v := range annotations { + w.PodAutoscaler.Annotations[k] = v + } + return w +} + +// Labels sets labels on the PodAutoscaler. +func (w *PodAutoscalerWrapper) Labels(labels map[string]string) *PodAutoscalerWrapper { + if w.PodAutoscaler.Labels == nil { + w.PodAutoscaler.Labels = make(map[string]string) + } + for k, v := range labels { + w.PodAutoscaler.Labels[k] = v + } + return w +} + +// MakeMetricSourcePod creates a POD-type metric source. +func MakeMetricSourcePod(protocolType autoscalingv1alpha1.ProtocolType, port, path, targetMetric, targetValue string) autoscalingv1alpha1.MetricSource { + return autoscalingv1alpha1.MetricSource{ + MetricSourceType: autoscalingv1alpha1.POD, + ProtocolType: protocolType, + Port: port, + Path: path, + TargetMetric: targetMetric, + TargetValue: targetValue, + } +} + +// MakeMetricSourceResource creates a RESOURCE-type metric source (e.g., CPU, memory). +func MakeMetricSourceResource(targetMetric, targetValue string) autoscalingv1alpha1.MetricSource { + return autoscalingv1alpha1.MetricSource{ + MetricSourceType: autoscalingv1alpha1.RESOURCE, + TargetMetric: targetMetric, + TargetValue: targetValue, + } +} + +// MakeMetricSourceExternal creates an EXTERNAL-type metric source. +func MakeMetricSourceExternal(protocolType autoscalingv1alpha1.ProtocolType, endpoint, path, targetMetric, targetValue string) autoscalingv1alpha1.MetricSource { + return autoscalingv1alpha1.MetricSource{ + MetricSourceType: autoscalingv1alpha1.EXTERNAL, + ProtocolType: protocolType, + Endpoint: endpoint, + Path: path, + TargetMetric: targetMetric, + TargetValue: targetValue, + } +} + +// MakeMetricSourceCustom creates a CUSTOM-type metric source. +func MakeMetricSourceCustom(targetMetric, targetValue string) autoscalingv1alpha1.MetricSource { + return autoscalingv1alpha1.MetricSource{ + MetricSourceType: autoscalingv1alpha1.CUSTOM, + TargetMetric: targetMetric, + TargetValue: targetValue, + } +} + From 9f9eafbde407d394776bbdaabd988631f6923f77 Mon Sep 17 00:00:00 2001 From: Wang Zhenyu Date: Mon, 20 Oct 2025 09:50:13 +0800 Subject: [PATCH 3/4] fix: correct typo, add Eventually, delete duplicate Func Signed-off-by: Wang Zhenyu --- .../podautoscaler/podautoscaler_controller.go | 2 +- .../controller/podautoscaler_test.go | 78 +++++++++++-------- test/utils/validation/podautoscaler.go | 18 ----- 3 files changed, 48 insertions(+), 50 deletions(-) diff --git a/pkg/controller/podautoscaler/podautoscaler_controller.go b/pkg/controller/podautoscaler/podautoscaler_controller.go index 29cca1adf..bd72d1cb4 100644 --- a/pkg/controller/podautoscaler/podautoscaler_controller.go +++ b/pkg/controller/podautoscaler/podautoscaler_controller.go @@ -74,7 +74,7 @@ const RayClusterFleet = "RayClusterFleet" const ( ConditionReady = "Ready" ConditionValidSpec = "ValidSpec" - ConditionConflict = "MutilPodAutoscalerConflict" + ConditionConflict = "MultiPodAutoscalerConflict" ConditionScalingActive = "ScalingActive" ConditionAbleToScale = "AbleToScale" diff --git a/test/integration/controller/podautoscaler_test.go b/test/integration/controller/podautoscaler_test.go index bb1bb0b4f..8fa5b1214 100644 --- a/test/integration/controller/podautoscaler_test.go +++ b/test/integration/controller/podautoscaler_test.go @@ -38,7 +38,7 @@ import ( const ( ConditionReady = "Ready" ConditionValidSpec = "ValidSpec" - ConditionConflict = "MutilPodAutoscalerConflict" + ConditionConflict = "MultiPodAutoscalerConflict" ConditionScalingActive = "ScalingActive" ConditionAbleToScale = "AbleToScale" @@ -197,14 +197,16 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { }, { updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { - // Update PodAutoscaler spec - time.Sleep(time.Second * 3) // Wait for initial reconcile - fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) - minReplicas := int32(2) - fetched.Spec.MinReplicas = &minReplicas - fetched.Spec.MaxReplicas = 10 - gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed()) - time.Sleep(time.Second * 3) // Wait for update to propagate + // Update PodAutoscaler spec with retry for race conditions + gomega.Eventually(func() error { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + minReplicas := int32(2) + fetched.Spec.MinReplicas = &minReplicas + fetched.Spec.MaxReplicas = 10 + return k8sClient.Update(ctx, fetched) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) + // Give controller time to reconcile the updated PA and sync HPA + time.Sleep(time.Second * 2) }, checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { // Validate HPA is updated with more relaxed timing @@ -472,21 +474,33 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { // Delete first PA gomega.Expect(k8sClient.Delete(ctx, pa)).To(gomega.Succeed()) + + // Wait for deletion to complete + gomega.Eventually(func() error { + temp := &autoscalingv1alpha1.PodAutoscaler{} + return k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), temp) + }, time.Second*10, time.Millisecond*250).ShouldNot(gomega.Succeed()) + + // Give controller time to process the deletion event and update caches time.Sleep(time.Second * 2) - // Manually trigger PA2 reconcile by updating it (no-op update) - // This forces the controller to re-check the conflict status - pa2 := &autoscalingv1alpha1.PodAutoscaler{} - err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2) - gomega.Expect(err).ToNot(gomega.HaveOccurred()) + // Manually trigger PA2 reconcile by updating it with retry for race conditions + gomega.Eventually(func() error { + pa2 := &autoscalingv1alpha1.PodAutoscaler{} + err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2) + if err != nil { + return err + } + // Add an annotation to trigger reconcile + if pa2.Annotations == nil { + pa2.Annotations = make(map[string]string) + } + pa2.Annotations["test.aibrix.ai/force-reconcile"] = time.Now().Format(time.RFC3339) + return k8sClient.Update(ctx, pa2) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) - // Add an annotation to trigger reconcile - if pa2.Annotations == nil { - pa2.Annotations = make(map[string]string) - } - pa2.Annotations["test.aibrix.ai/force-reconcile"] = time.Now().Format(time.RFC3339) - gomega.Expect(k8sClient.Update(ctx, pa2)).To(gomega.Succeed()) - time.Sleep(time.Second * 3) + // Give controller time to reconcile PA2 after annotation update + time.Sleep(time.Second * 1) }, checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { // PA2 conflict should be resolved - condition should be removed @@ -1213,11 +1227,12 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { }, { updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { - // Update spec: change maxReplicas - fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) - fetched.Spec.MaxReplicas = 10 - gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed()) - time.Sleep(time.Second * 2) + // Update spec: change maxReplicas with retry for race conditions + gomega.Eventually(func() error { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + fetched.Spec.MaxReplicas = 10 + return k8sClient.Update(ctx, fetched) + }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) }, checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { // Verify update is applied and reconciled @@ -1259,12 +1274,13 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { // Rapid updates: change maxReplicas multiple times for i := 0; i < 3; i++ { - fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) - fetched.Spec.MaxReplicas = int32(5 + i*2) - gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed()) - time.Sleep(time.Millisecond * 500) + maxReplicas := int32(5 + i*2) + gomega.Eventually(func() error { + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + fetched.Spec.MaxReplicas = maxReplicas + return k8sClient.Update(ctx, fetched) + }, time.Second*5, time.Millisecond*100).Should(gomega.Succeed()) } - time.Sleep(time.Second * 2) }, checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { // Eventually consistent: final maxReplicas should be 9 (5 + 2*2) diff --git a/test/utils/validation/podautoscaler.go b/test/utils/validation/podautoscaler.go index 7aee5d17a..8366dd09b 100644 --- a/test/utils/validation/podautoscaler.go +++ b/test/utils/validation/podautoscaler.go @@ -112,24 +112,6 @@ func ValidatePodAutoscalerScaling(ctx context.Context, }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) } -// ValidatePodAutoscalerScalingEventually validates scaling status and waits for eventual consistency. -func ValidatePodAutoscalerScalingEventually(ctx context.Context, - k8sClient client.Client, - pa *autoscalingv1alpha1.PodAutoscaler, - expectedDesired, expectedActual int32) { - - gomega.Eventually(func(g gomega.Gomega) { - fetched := &autoscalingv1alpha1.PodAutoscaler{} - err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched) - g.Expect(err).ToNot(gomega.HaveOccurred()) - - g.Expect(fetched.Status.DesiredScale).To(gomega.Equal(expectedDesired), - "DesiredScale should be %d", expectedDesired) - g.Expect(fetched.Status.ActualScale).To(gomega.Equal(expectedActual), - "ActualScale should be %d", expectedActual) - }, time.Second*10, time.Millisecond*250).Should(gomega.Succeed()) -} - // ValidateScalingHistory validates the scalingHistory in status. func ValidateScalingHistory(pa *autoscalingv1alpha1.PodAutoscaler, expectedCount int, From 7eba63656db3a420d08be59e6aae36e56bf96587 Mon Sep 17 00:00:00 2001 From: Wang Zhenyu Date: Tue, 21 Oct 2025 09:45:08 +0800 Subject: [PATCH 4/4] fix: pass lint check Signed-off-by: Wang Zhenyu --- .../controller/podautoscaler_test.go | 396 ++++++++---------- test/utils/validation/podautoscaler.go | 3 +- test/utils/wrapper/podautoscaler.go | 11 +- 3 files changed, 188 insertions(+), 222 deletions(-) diff --git a/test/integration/controller/podautoscaler_test.go b/test/integration/controller/podautoscaler_test.go index 8fa5b1214..e6adb8238 100644 --- a/test/integration/controller/podautoscaler_test.go +++ b/test/integration/controller/podautoscaler_test.go @@ -116,6 +116,120 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { return deployment } + // Helper: creates a StormService with two roles (prefill and decode) + createStormService := func(name, namespace string, labelKey, labelValue string, + prefillReplicas, decodeReplicas int32) *orchestrationapi.StormService { + matchLabel := map[string]string{labelKey: labelValue} + podTemplate := corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: matchLabel, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "vllm-container", + Image: "vllm/vllm-openai:latest", + }, + }, + }, + } + roleSetSpec := &orchestrationapi.RoleSetSpec{ + Roles: []orchestrationapi.RoleSpec{ + { + Name: "prefill", + Replicas: ptr.To(prefillReplicas), + Template: podTemplate, + Stateful: false, + }, + { + Name: "decode", + Replicas: ptr.To(decodeReplicas), + Template: podTemplate, + Stateful: false, + }, + }, + } + ss := wrapper.MakeStormService(name). + Namespace(namespace). + Replicas(ptr.To(int32(2))). + Selector(metav1.SetAsLabelSelector(matchLabel)). + UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType). + RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec). + Obj() + gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed()) + return ss + } + + // Helper: creates a test case for boundary enforcement with similar structure + makeBoundaryTestCase := func(name, deploymentName string, min, max int32, + deploymentReplicas int32) *testValidatingCase { + return &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler(name). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(min). + MaxReplicas(max). + ScaleTargetRefWithKind("Deployment", "apps/v1", deploymentName). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", + "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment(deploymentName, ns.Name, deploymentReplicas) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler) { + hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") + validation.ValidateHPASpec(hpa, min, max) + }, + }, + }, + } + } + + // Helper: creates a test case for spec validation with similar structure + makeSpecValidationTestCase := func(name, deploymentName string, min, max int32, + expectedCondition string, expectedStatus metav1.ConditionStatus, + expectedReason string) *testValidatingCase { + return &testValidatingCase{ + makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { + return wrapper.MakePodAutoscaler(name). + Namespace(ns.Name). + ScalingStrategy(autoscalingv1alpha1.HPA). + MinReplicas(min). + MaxReplicas(max). + ScaleTargetRefWithKind("Deployment", "apps/v1", deploymentName). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", + "requests_per_second", "100")). + Obj() + }, + updates: []*update{ + { + updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { + createDeployment(deploymentName, ns.Name, 2) + gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) + time.Sleep(time.Second * 2) + }, + checkFunc: func(ctx context.Context, k8sClient client.Client, + pa *autoscalingv1alpha1.PodAutoscaler) { + validation.WaitForPodAutoscalerConditionWithReason( + ctx, k8sClient, pa, + expectedCondition, expectedStatus, + expectedReason, + ) + }, + }, + }, + } + } + ginkgo.DescribeTable("test PodAutoscaler creation and reconciliation", func(tc *testValidatingCase) { pa := tc.makePodAutoscaler() @@ -144,7 +258,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -181,7 +296,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-2"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -229,7 +345,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-3"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -276,7 +393,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", ""). // Empty name - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -300,36 +418,11 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { ), ginkgo.Entry("Spec Validation - Invalid Replica Bounds (min > max)", - &testValidatingCase{ - makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { - return wrapper.MakePodAutoscaler("pa-invalid-bounds"). - Namespace(ns.Name). - ScalingStrategy(autoscalingv1alpha1.HPA). - MinReplicas(5). // min > max - MaxReplicas(3). - ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-4"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). - Obj() - }, - updates: []*update{ - { - updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { - createDeployment("test-deployment-4", ns.Name, 2) - gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) - // Wait for controller to reconcile - time.Sleep(time.Second * 2) - }, - checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { - // Validate ValidSpec condition is False - validation.WaitForPodAutoscalerConditionWithReason( - ctx, k8sClient, pa, - ConditionValidSpec, metav1.ConditionFalse, - ReasonInvalidBounds, - ) - }, - }, - }, - }, + makeSpecValidationTestCase( + "pa-invalid-bounds", "test-deployment-4", + 5, 3, // min > max + ConditionValidSpec, metav1.ConditionFalse, ReasonInvalidBounds, + ), ), // Note: Invalid ScalingStrategy test is skipped because CRD-level validation @@ -339,36 +432,11 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { // prevents empty metricsSources from being created (minItems=1). ginkgo.Entry("Spec Validation - Valid Spec", - &testValidatingCase{ - makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { - return wrapper.MakePodAutoscaler("pa-valid-spec"). - Namespace(ns.Name). - ScalingStrategy(autoscalingv1alpha1.HPA). - MinReplicas(1). - MaxReplicas(5). - ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-7"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). - Obj() - }, - updates: []*update{ - { - updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { - createDeployment("test-deployment-7", ns.Name, 2) - gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) - // Wait for controller to reconcile - time.Sleep(time.Second * 2) - }, - checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { - // Validate ValidSpec condition is True - validation.WaitForPodAutoscalerConditionWithReason( - ctx, k8sClient, pa, - ConditionValidSpec, metav1.ConditionTrue, - ReasonAsExpected, - ) - }, - }, - }, - }, + makeSpecValidationTestCase( + "pa-valid-spec", "test-deployment-7", + 1, 5, + ConditionValidSpec, metav1.ConditionTrue, ReasonAsExpected, + ), ), // ========================================================================= @@ -384,7 +452,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "shared-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -411,7 +480,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(10). ScaleTargetRefWithKind("Deployment", "apps/v1", "shared-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() gomega.Expect(k8sClient.Create(ctx, pa2)).To(gomega.Succeed()) time.Sleep(time.Second * 2) @@ -441,7 +511,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "resolve-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -457,7 +528,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(10). ScaleTargetRefWithKind("Deployment", "apps/v1", "resolve-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() gomega.Expect(k8sClient.Create(ctx, pa2)).To(gomega.Succeed()) time.Sleep(time.Second * 2) @@ -530,7 +602,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "status-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -565,7 +638,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "condition-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -596,7 +670,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "ready-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -634,7 +709,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(10). ScaleTargetRefWithKind("Deployment", "apps/v1", "scale-test-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -669,7 +745,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "nonexistent-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -699,64 +776,11 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { // ========================================================================= ginkgo.Entry("Boundary Enforcement - maxReplicas enforced in HPA", - &testValidatingCase{ - makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { - return wrapper.MakePodAutoscaler("pa-boundary-max"). - Namespace(ns.Name). - ScalingStrategy(autoscalingv1alpha1.HPA). - MinReplicas(1). - MaxReplicas(5). - ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). - Obj() - }, - updates: []*update{ - { - updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { - // Create deployment with many replicas - createDeployment("boundary-deployment", ns.Name, 8) - gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) - time.Sleep(time.Second * 2) - }, - checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { - // HPA should be created with maxReplicas=5 - hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") - validation.ValidateHPASpec(hpa, 1, 5) - // HPA will enforce the max boundary - }, - }, - }, - }, + makeBoundaryTestCase("pa-boundary-max", "boundary-deployment", 1, 5, 8), ), ginkgo.Entry("Boundary Enforcement - minReplicas enforced in HPA", - &testValidatingCase{ - makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler { - return wrapper.MakePodAutoscaler("pa-boundary-min"). - Namespace(ns.Name). - ScalingStrategy(autoscalingv1alpha1.HPA). - MinReplicas(3). - MaxReplicas(10). - ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-min-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). - Obj() - }, - updates: []*update{ - { - updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { - // Create deployment with few replicas - createDeployment("boundary-min-deployment", ns.Name, 1) - gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) - time.Sleep(time.Second * 2) - }, - checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { - // HPA should be created with minReplicas=3 - hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa") - validation.ValidateHPASpec(hpa, 3, 10) - }, - }, - }, - }, + makeBoundaryTestCase("pa-boundary-min", "boundary-min-deployment", 3, 10, 1), ), ginkgo.Entry("Boundary Enforcement - minReplicas=0 in PA spec", @@ -768,7 +792,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(0). // Set minReplicas=0 in PA MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-zero-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -809,7 +834,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(10). ScaleTargetRefWithKind("Deployment", "apps/v1", "history-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -849,52 +875,15 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(10). ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ { updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { // Create StormService with 2 roles - matchLabel := map[string]string{"app": "test-vllm"} - podTemplate := corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: matchLabel, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "vllm-container", - Image: "vllm/vllm-openai:latest", - }, - }, - }, - } - roleSetSpec := &orchestrationapi.RoleSetSpec{ - Roles: []orchestrationapi.RoleSpec{ - { - Name: "prefill", - Replicas: ptr.To(int32(2)), - Template: podTemplate, - Stateful: false, - }, - { - Name: "decode", - Replicas: ptr.To(int32(1)), - Template: podTemplate, - Stateful: false, - }, - }, - } - ss := wrapper.MakeStormService("test-stormservice"). - Namespace(ns.Name). - Replicas(ptr.To(int32(2))). - Selector(metav1.SetAsLabelSelector(matchLabel)). - UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType). - RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec). - Obj() - gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed()) - + createStormService("test-stormservice", ns.Name, "app", "test-vllm", 2, 1) // Create PA gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) time.Sleep(time.Second * 3) @@ -924,52 +913,15 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MaxReplicas(10). ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-role"). SubTargetSelector("prefill"). // Only scale "prefill" role - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). Obj() }, updates: []*update{ { updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) { // Create StormService with prefill and decode roles - matchLabel := map[string]string{"app": "test-vllm-role"} - podTemplate := corev1.PodTemplateSpec{ - ObjectMeta: metav1.ObjectMeta{ - Labels: matchLabel, - }, - Spec: corev1.PodSpec{ - Containers: []corev1.Container{ - { - Name: "vllm-container", - Image: "vllm/vllm-openai:latest", - }, - }, - }, - } - roleSetSpec := &orchestrationapi.RoleSetSpec{ - Roles: []orchestrationapi.RoleSpec{ - { - Name: "prefill", - Replicas: ptr.To(int32(3)), - Template: podTemplate, - Stateful: false, - }, - { - Name: "decode", - Replicas: ptr.To(int32(2)), - Template: podTemplate, - Stateful: false, - }, - }, - } - ss := wrapper.MakeStormService("test-stormservice-role"). - Namespace(ns.Name). - Replicas(ptr.To(int32(2))). - Selector(metav1.SetAsLabelSelector(matchLabel)). - UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType). - RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec). - Obj() - gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed()) - + createStormService("test-stormservice-role", ns.Name, "app", "test-vllm-role", 3, 2) // Create PA targeting only "prefill" role gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed()) time.Sleep(time.Second * 3) @@ -1003,7 +955,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MaxReplicas(10). ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-conflict"). SubTargetSelector("prefill"). // Same role as PA1 - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). Obj() }, updates: []*update{ @@ -1057,7 +1010,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MaxReplicas(10). ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-conflict"). SubTargetSelector("prefill"). // Same role - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")). Obj() gomega.Expect(k8sClient.Create(ctx, pa1)).To(gomega.Succeed()) time.Sleep(time.Second * 2) @@ -1098,7 +1052,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MaxReplicas(10). Annotations(annotations). ScaleTargetRefWithKind("Deployment", "apps/v1", "cooldown-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). Obj() }, updates: []*update{ @@ -1133,7 +1088,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MaxReplicas(10). Annotations(annotations). ScaleTargetRefWithKind("Deployment", "apps/v1", "delay-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). Obj() }, updates: []*update{ @@ -1172,7 +1128,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MaxReplicas(10). Annotations(annotations). ScaleTargetRefWithKind("Deployment", "apps/v1", "annotations-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")). Obj() }, updates: []*update{ @@ -1209,7 +1166,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "update-spec-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "cpu_usage", "0.7")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "cpu_usage", "0.7")). Obj() }, updates: []*update{ @@ -1256,7 +1214,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { MinReplicas(1). MaxReplicas(5). ScaleTargetRefWithKind("Deployment", "apps/v1", "rapid-deployment"). - MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). + MetricSource(wrapper.MakeMetricSourcePod( + autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")). Obj() }, updates: []*update{ @@ -1267,7 +1226,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() { time.Sleep(time.Second * 2) }, checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) { - validation.ValidatePodAutoscalerConditionExists(validation.GetPodAutoscaler(ctx, k8sClient, pa), ConditionValidSpec) + fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa) + validation.ValidatePodAutoscalerConditionExists(fetched, ConditionValidSpec) }, }, { diff --git a/test/utils/validation/podautoscaler.go b/test/utils/validation/podautoscaler.go index 8366dd09b..1937234ce 100644 --- a/test/utils/validation/podautoscaler.go +++ b/test/utils/validation/podautoscaler.go @@ -35,7 +35,8 @@ func ValidatePodAutoscalerSpec(pa *autoscalingv1alpha1.PodAutoscaler, gomega.Expect(pa.Spec.MinReplicas).ToNot(gomega.BeNil(), "MinReplicas should not be nil") gomega.Expect(*pa.Spec.MinReplicas).To(gomega.Equal(expectedMin), "MinReplicas should match expected value") gomega.Expect(pa.Spec.MaxReplicas).To(gomega.Equal(expectedMax), "MaxReplicas should match expected value") - gomega.Expect(pa.Spec.ScalingStrategy).To(gomega.Equal(expectedStrategy), "ScalingStrategy should match expected value") + gomega.Expect(pa.Spec.ScalingStrategy).To( + gomega.Equal(expectedStrategy), "ScalingStrategy should match expected value") } // ValidatePodAutoscalerCondition validates a specific condition in a PodAutoscaler. diff --git a/test/utils/wrapper/podautoscaler.go b/test/utils/wrapper/podautoscaler.go index 1f64ef832..5590d5808 100644 --- a/test/utils/wrapper/podautoscaler.go +++ b/test/utils/wrapper/podautoscaler.go @@ -127,7 +127,10 @@ func (w *PodAutoscalerWrapper) Labels(labels map[string]string) *PodAutoscalerWr } // MakeMetricSourcePod creates a POD-type metric source. -func MakeMetricSourcePod(protocolType autoscalingv1alpha1.ProtocolType, port, path, targetMetric, targetValue string) autoscalingv1alpha1.MetricSource { +func MakeMetricSourcePod( + protocolType autoscalingv1alpha1.ProtocolType, + port, path, targetMetric, targetValue string, +) autoscalingv1alpha1.MetricSource { return autoscalingv1alpha1.MetricSource{ MetricSourceType: autoscalingv1alpha1.POD, ProtocolType: protocolType, @@ -148,7 +151,10 @@ func MakeMetricSourceResource(targetMetric, targetValue string) autoscalingv1alp } // MakeMetricSourceExternal creates an EXTERNAL-type metric source. -func MakeMetricSourceExternal(protocolType autoscalingv1alpha1.ProtocolType, endpoint, path, targetMetric, targetValue string) autoscalingv1alpha1.MetricSource { +func MakeMetricSourceExternal( + protocolType autoscalingv1alpha1.ProtocolType, + endpoint, path, targetMetric, targetValue string, +) autoscalingv1alpha1.MetricSource { return autoscalingv1alpha1.MetricSource{ MetricSourceType: autoscalingv1alpha1.EXTERNAL, ProtocolType: protocolType, @@ -167,4 +173,3 @@ func MakeMetricSourceCustom(targetMetric, targetValue string) autoscalingv1alpha TargetValue: targetValue, } } -