From 25f282cb76d05b0f1696f02333fb719c08cc26d0 Mon Sep 17 00:00:00 2001
From: Wang Zhenyu <ts-zhenyu.b.wang@rakuten.com>
Date: Sun, 19 Oct 2025 21:45:04 +0800
Subject: [PATCH 1/4] feat: integration test for podAutoScaler

Signed-off-by: Wang Zhenyu <ts-zhenyu.b.wang@rakuten.com>
---
 .../controller/podautoscaler_test.go          | 1282 +++++++++++++++++
 1 file changed, 1282 insertions(+)
 create mode 100644 test/integration/controller/podautoscaler_test.go

diff --git a/test/integration/controller/podautoscaler_test.go b/test/integration/controller/podautoscaler_test.go
new file mode 100644
index 000000000..bb1bb0b4f
--- /dev/null
+++ b/test/integration/controller/podautoscaler_test.go
@@ -0,0 +1,1282 @@
+/*
+Copyright 2025 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+	http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package controller
+
+import (
+	"context"
+	"time"
+
+	"github.com/onsi/ginkgo/v2"
+	"github.com/onsi/gomega"
+	appsv1 "k8s.io/api/apps/v1"
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"k8s.io/utils/ptr"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1"
+	orchestrationapi "github.com/vllm-project/aibrix/api/orchestration/v1alpha1"
+	"github.com/vllm-project/aibrix/test/utils/validation"
+	"github.com/vllm-project/aibrix/test/utils/wrapper"
+)
+
+// Condition type constants from controller
+const (
+	ConditionReady         = "Ready"
+	ConditionValidSpec     = "ValidSpec"
+	ConditionConflict      = "MutilPodAutoscalerConflict"
+	ConditionScalingActive = "ScalingActive"
+	ConditionAbleToScale   = "AbleToScale"
+
+	ReasonAsExpected             = "AsExpected"
+	ReasonInvalidScalingStrategy = "InvalidScalingStrategy"
+	ReasonInvalidBounds          = "InvalidBounds"
+	ReasonMissingTargetRef       = "MissingScaleTargetRef"
+	ReasonMetricsConfigError     = "MetricsConfigError"
+)
+
+var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
+	var ns *corev1.Namespace
+
+	// update represents a test step: optional mutation + validation
+	type update struct {
+		updateFunc func(pa *autoscalingv1alpha1.PodAutoscaler)
+		checkFunc  func(context.Context, client.Client, *autoscalingv1alpha1.PodAutoscaler)
+	}
+
+	ginkgo.BeforeEach(func() {
+		ns = &corev1.Namespace{
+			ObjectMeta: metav1.ObjectMeta{
+				GenerateName: "test-pa-",
+			},
+		}
+		gomega.Expect(k8sClient.Create(ctx, ns)).To(gomega.Succeed())
+		// Ensure namespace is fully created
+		gomega.Eventually(func() error {
+			return k8sClient.Get(ctx, client.ObjectKeyFromObject(ns), ns)
+		}, time.Second*3).Should(gomega.Succeed())
+	})
+
+	ginkgo.AfterEach(func() {
+		gomega.Expect(k8sClient.Delete(ctx, ns)).To(gomega.Succeed())
+	})
+
+	// testValidatingCase defines a test case with initial setup and a series of updates
+	type testValidatingCase struct {
+		makePodAutoscaler func() *autoscalingv1alpha1.PodAutoscaler
+		updates           []*update
+	}
+
+	// Helper: creates a deployment for testing
+	createDeployment := func(name, namespace string, replicas int32) *appsv1.Deployment {
+		deployment := &appsv1.Deployment{
+			ObjectMeta: metav1.ObjectMeta{
+				Name:      name,
+				Namespace: namespace,
+			},
+			Spec: appsv1.DeploymentSpec{
+				Replicas: &replicas,
+				Selector: &metav1.LabelSelector{
+					MatchLabels: map[string]string{
+						"app": name,
+					},
+				},
+				Template: corev1.PodTemplateSpec{
+					ObjectMeta: metav1.ObjectMeta{
+						Labels: map[string]string{
+							"app": name,
+						},
+					},
+					Spec: corev1.PodSpec{
+						Containers: []corev1.Container{
+							{
+								Name:  "nginx",
+								Image: "nginx:latest",
+							},
+						},
+					},
+				},
+			},
+		}
+		gomega.Expect(k8sClient.Create(ctx, deployment)).To(gomega.Succeed())
+		return deployment
+	}
+
+	ginkgo.DescribeTable("test PodAutoscaler creation and reconciliation",
+		func(tc *testValidatingCase) {
+			pa := tc.makePodAutoscaler()
+			for _, upd := range tc.updates {
+				if upd.updateFunc != nil {
+					upd.updateFunc(pa)
+				}
+
+				// Run validation check directly (no need to fetch if PA is deleted)
+				if upd.checkFunc != nil {
+					upd.checkFunc(ctx, k8sClient, pa)
+				}
+			}
+		},
+
+		// =========================================================================
+		// HPA Strategy - Resource Lifecycle Management
+		// =========================================================================
+
+		ginkgo.Entry("HPA Strategy - Create PA → HPA Created",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-hpa-create").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create deployment first
+							createDeployment("test-deployment", ns.Name, 2)
+							// Create PodAutoscaler
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Validate HPA is created
+							hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+							gomega.Expect(hpa).ToNot(gomega.BeNil())
+
+							// Validate HPA OwnerReference
+							validation.ValidateHPAOwnerReference(hpa, pa.Name, "PodAutoscaler")
+
+							// Validate HPA Spec
+							validation.ValidateHPASpec(hpa, 1, 5)
+							validation.ValidateHPAScaleTargetRef(hpa, "Deployment", "test-deployment")
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("HPA Strategy - Update PA → HPA Synced",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-hpa-update").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-2").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("test-deployment-2", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Wait for HPA creation
+							validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+						},
+					},
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Update PodAutoscaler spec
+							time.Sleep(time.Second * 3) // Wait for initial reconcile
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							minReplicas := int32(2)
+							fetched.Spec.MinReplicas = &minReplicas
+							fetched.Spec.MaxReplicas = 10
+							gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed())
+							time.Sleep(time.Second * 3) // Wait for update to propagate
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Validate HPA is updated with more relaxed timing
+							gomega.Eventually(func(g gomega.Gomega) {
+								hpa := validation.GetHPA(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+								validation.ValidateHPASpec(hpa, 2, 10)
+							}, time.Second*15, time.Second*1).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("HPA Strategy - Delete PA → HPA Deleted (Cascade)",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-hpa-delete").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-3").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("test-deployment-3", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Wait for HPA creation
+							validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+						},
+					},
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Wait for initial reconcile
+							time.Sleep(time.Second * 3)
+							// Delete PodAutoscaler
+							gomega.Expect(k8sClient.Delete(ctx, pa)).To(gomega.Succeed())
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Validate PA is deleted
+							validation.WaitForPodAutoscalerDeleted(ctx, k8sClient, pa)
+							// Note: In envtest, HPA cascade deletion via OwnerReference doesn't work
+							// because garbage collector controller is not running. In real K8s,
+							// the HPA would be automatically deleted due to OwnerReference.
+							// We already verified OwnerReference is set correctly in the creation test.
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// Spec Validation Logic
+		// =========================================================================
+
+		ginkgo.Entry("Spec Validation - Invalid ScaleTargetRef (empty name)",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-invalid-ref").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", ""). // Empty name
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							// Wait for controller to reconcile
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Validate ValidSpec condition is False
+							validation.WaitForPodAutoscalerConditionWithReason(
+								ctx, k8sClient, pa,
+								ConditionValidSpec, metav1.ConditionFalse,
+								ReasonMissingTargetRef,
+							)
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Spec Validation - Invalid Replica Bounds (min > max)",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-invalid-bounds").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(5). // min > max
+						MaxReplicas(3).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-4").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("test-deployment-4", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							// Wait for controller to reconcile
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Validate ValidSpec condition is False
+							validation.WaitForPodAutoscalerConditionWithReason(
+								ctx, k8sClient, pa,
+								ConditionValidSpec, metav1.ConditionFalse,
+								ReasonInvalidBounds,
+							)
+						},
+					},
+				},
+			},
+		),
+
+		// Note: Invalid ScalingStrategy test is skipped because CRD-level validation
+		// prevents invalid values from being created in the first place.
+
+		// Note: Empty MetricsSources test is skipped because CRD-level validation
+		// prevents empty metricsSources from being created (minItems=1).
+
+		ginkgo.Entry("Spec Validation - Valid Spec",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-valid-spec").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-7").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("test-deployment-7", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							// Wait for controller to reconcile
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Validate ValidSpec condition is True
+							validation.WaitForPodAutoscalerConditionWithReason(
+								ctx, k8sClient, pa,
+								ConditionValidSpec, metav1.ConditionTrue,
+								ReasonAsExpected,
+							)
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// Conflict Detection Mechanism
+		// =========================================================================
+
+		ginkgo.Entry("Conflict Detection - Two PAs target same Deployment",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-conflict-1").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "shared-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create deployment
+							createDeployment("shared-deployment", ns.Name, 2)
+							// Create first PA
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// PA1 should not have conflict
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							validation.ValidatePodAutoscalerConditionNotExists(fetched, ConditionConflict)
+						},
+					},
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create second PA targeting the same deployment
+							pa2 := wrapper.MakePodAutoscaler("pa-conflict-2").
+								Namespace(ns.Name).
+								ScalingStrategy(autoscalingv1alpha1.HPA).
+								MinReplicas(1).
+								MaxReplicas(10).
+								ScaleTargetRefWithKind("Deployment", "apps/v1", "shared-deployment").
+								MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+								Obj()
+							gomega.Expect(k8sClient.Create(ctx, pa2)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// PA2 should have conflict condition with Status=False (conflict detected)
+							pa2 := &autoscalingv1alpha1.PodAutoscaler{}
+							gomega.Eventually(func(g gomega.Gomega) {
+								err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-conflict-2"}, pa2)
+								g.Expect(err).ToNot(gomega.HaveOccurred())
+								validation.ValidatePodAutoscalerConditionExists(pa2, ConditionConflict)
+								// When there's a conflict, Status=False (conflict exists)
+								validation.ValidatePodAutoscalerCondition(pa2, ConditionConflict, metav1.ConditionFalse, "")
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Conflict Resolution - Delete first PA",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-resolve-1").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "resolve-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create deployment and two PAs
+							createDeployment("resolve-deployment", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+
+							pa2 := wrapper.MakePodAutoscaler("pa-resolve-2").
+								Namespace(ns.Name).
+								ScalingStrategy(autoscalingv1alpha1.HPA).
+								MinReplicas(1).
+								MaxReplicas(10).
+								ScaleTargetRefWithKind("Deployment", "apps/v1", "resolve-deployment").
+								MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+								Obj()
+							gomega.Expect(k8sClient.Create(ctx, pa2)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify PA2 has conflict
+							pa2 := &autoscalingv1alpha1.PodAutoscaler{}
+							err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2)
+							gomega.Expect(err).ToNot(gomega.HaveOccurred())
+							validation.ValidatePodAutoscalerConditionExists(pa2, ConditionConflict)
+						},
+					},
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Delete first PA
+							gomega.Expect(k8sClient.Delete(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+
+							// Manually trigger PA2 reconcile by updating it (no-op update)
+							// This forces the controller to re-check the conflict status
+							pa2 := &autoscalingv1alpha1.PodAutoscaler{}
+							err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2)
+							gomega.Expect(err).ToNot(gomega.HaveOccurred())
+
+							// Add an annotation to trigger reconcile
+							if pa2.Annotations == nil {
+								pa2.Annotations = make(map[string]string)
+							}
+							pa2.Annotations["test.aibrix.ai/force-reconcile"] = time.Now().Format(time.RFC3339)
+							gomega.Expect(k8sClient.Update(ctx, pa2)).To(gomega.Succeed())
+							time.Sleep(time.Second * 3)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// PA2 conflict should be resolved - condition should be removed
+							pa2 := &autoscalingv1alpha1.PodAutoscaler{}
+							gomega.Eventually(func(g gomega.Gomega) {
+								err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2)
+								g.Expect(err).ToNot(gomega.HaveOccurred())
+								// After conflict resolution, the conflict condition should be removed
+								validation.ValidatePodAutoscalerConditionNotExists(pa2, ConditionConflict)
+							}, time.Second*15, time.Second*1).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// Status and Condition Management
+		// =========================================================================
+
+		ginkgo.Entry("Status Management - DesiredScale and ActualScale",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-status-scale").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "status-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create deployment with 2 replicas
+							createDeployment("status-deployment", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify status is updated
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								// ActualScale should reflect deployment replicas
+								g.Expect(fetched.Status.ActualScale).To(gomega.BeNumerically(">=", 0))
+								// DesiredScale should be set
+								g.Expect(fetched.Status.DesiredScale).To(gomega.BeNumerically(">=", 0))
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Condition Management - AbleToScale",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-condition-able").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "condition-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("condition-deployment", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify AbleToScale condition exists and is True
+							validation.WaitForPodAutoscalerCondition(
+								ctx, k8sClient, pa,
+								ConditionAbleToScale, metav1.ConditionTrue,
+							)
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Condition Management - Ready condition transitions",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-condition-ready").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "ready-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("ready-deployment", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify all basic conditions exist
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								validation.ValidatePodAutoscalerConditionExists(fetched, ConditionReady)
+								validation.ValidatePodAutoscalerConditionExists(fetched, ConditionValidSpec)
+								validation.ValidatePodAutoscalerConditionExists(fetched, ConditionAbleToScale)
+								validation.ValidatePodAutoscalerConditionExists(fetched, ConditionScalingActive)
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// Scale Target Management
+		// =========================================================================
+
+		ginkgo.Entry("Scale Target - Deployment scaling",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-scale-deployment").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(10).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "scale-test-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create deployment
+							createDeployment("scale-test-deployment", ns.Name, 3)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify PA can get current replicas from deployment
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								// ActualScale should reflect deployment's replicas
+								g.Expect(fetched.Status.ActualScale).To(gomega.BeNumerically(">=", 0))
+								// For HPA strategy, HPA should be created
+								validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Scale Target - Target Resource Not Found",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-target-notfound").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "nonexistent-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Don't create the deployment - test missing target
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Should not crash, ValidSpec should be True (spec itself is valid)
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								// Spec validation should pass (the spec is syntactically correct)
+								validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+								// Controller handles missing target gracefully
+								// HPA will be created even if target doesn't exist (K8s HPA behavior)
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// Boundary Enforcement
+		// =========================================================================
+
+		ginkgo.Entry("Boundary Enforcement - maxReplicas enforced in HPA",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-boundary-max").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create deployment with many replicas
+							createDeployment("boundary-deployment", ns.Name, 8)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// HPA should be created with maxReplicas=5
+							hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+							validation.ValidateHPASpec(hpa, 1, 5)
+							// HPA will enforce the max boundary
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Boundary Enforcement - minReplicas enforced in HPA",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-boundary-min").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(3).
+						MaxReplicas(10).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-min-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create deployment with few replicas
+							createDeployment("boundary-min-deployment", ns.Name, 1)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// HPA should be created with minReplicas=3
+							hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+							validation.ValidateHPASpec(hpa, 3, 10)
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Boundary Enforcement - minReplicas=0 in PA spec",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-boundary-zero").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(0). // Set minReplicas=0 in PA
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-zero-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("boundary-zero-deployment", ns.Name, 1)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify PA spec has minReplicas=0
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							gomega.Expect(fetched.Spec.MinReplicas).ToNot(gomega.BeNil())
+							gomega.Expect(*fetched.Spec.MinReplicas).To(gomega.Equal(int32(0)))
+							// HPA will not have minReplicas set (uses default 1) when PA minReplicas=0
+							// This is controller design: only sets HPA minReplicas when PA minReplicas > 0
+							hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+							// HPA minReplicas will be nil or 1 (K8s default)
+							if hpa.Spec.MinReplicas != nil {
+								gomega.Expect(*hpa.Spec.MinReplicas).To(gomega.BeNumerically(">=", 1))
+							}
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// Scaling History Management
+		// =========================================================================
+
+		ginkgo.Entry("ScalingHistory - Basic history tracking in HPA mode",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-history").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(10).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "history-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("history-deployment", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 3)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify PA is created
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							// In HPA mode, ScalingHistory is managed by HPA and may be empty
+							// Just verify the field exists and is within limits
+							if fetched.Status.ScalingHistory != nil {
+								// maxScalingHistorySize = 5
+								gomega.Expect(len(fetched.Status.ScalingHistory)).To(gomega.BeNumerically("<=", 5))
+							}
+							// Main validation: PA has valid conditions
+							validation.ValidatePodAutoscalerConditionExists(fetched, ConditionValidSpec)
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// StormService Scaling
+		// =========================================================================
+
+		ginkgo.Entry("StormService Scaling - Replica Mode (scale entire StormService)",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-ss-replica").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(10).
+						ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create StormService with 2 roles
+							matchLabel := map[string]string{"app": "test-vllm"}
+							podTemplate := corev1.PodTemplateSpec{
+								ObjectMeta: metav1.ObjectMeta{
+									Labels: matchLabel,
+								},
+								Spec: corev1.PodSpec{
+									Containers: []corev1.Container{
+										{
+											Name:  "vllm-container",
+											Image: "vllm/vllm-openai:latest",
+										},
+									},
+								},
+							}
+							roleSetSpec := &orchestrationapi.RoleSetSpec{
+								Roles: []orchestrationapi.RoleSpec{
+									{
+										Name:     "prefill",
+										Replicas: ptr.To(int32(2)),
+										Template: podTemplate,
+										Stateful: false,
+									},
+									{
+										Name:     "decode",
+										Replicas: ptr.To(int32(1)),
+										Template: podTemplate,
+										Stateful: false,
+									},
+								},
+							}
+							ss := wrapper.MakeStormService("test-stormservice").
+								Namespace(ns.Name).
+								Replicas(ptr.To(int32(2))).
+								Selector(metav1.SetAsLabelSelector(matchLabel)).
+								UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType).
+								RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec).
+								Obj()
+							gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed())
+
+							// Create PA
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 3)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify PA is created and HPA is created
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								// ValidSpec should be True
+								validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+								// HPA should be created for StormService
+								validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+							}, time.Second*15, time.Millisecond*500).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("StormService Scaling - Role-Level with SubTargetSelector",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-ss-role").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.KPA).
+						MinReplicas(1).
+						MaxReplicas(10).
+						ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-role").
+						SubTargetSelector("prefill"). // Only scale "prefill" role
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create StormService with prefill and decode roles
+							matchLabel := map[string]string{"app": "test-vllm-role"}
+							podTemplate := corev1.PodTemplateSpec{
+								ObjectMeta: metav1.ObjectMeta{
+									Labels: matchLabel,
+								},
+								Spec: corev1.PodSpec{
+									Containers: []corev1.Container{
+										{
+											Name:  "vllm-container",
+											Image: "vllm/vllm-openai:latest",
+										},
+									},
+								},
+							}
+							roleSetSpec := &orchestrationapi.RoleSetSpec{
+								Roles: []orchestrationapi.RoleSpec{
+									{
+										Name:     "prefill",
+										Replicas: ptr.To(int32(3)),
+										Template: podTemplate,
+										Stateful: false,
+									},
+									{
+										Name:     "decode",
+										Replicas: ptr.To(int32(2)),
+										Template: podTemplate,
+										Stateful: false,
+									},
+								},
+							}
+							ss := wrapper.MakeStormService("test-stormservice-role").
+								Namespace(ns.Name).
+								Replicas(ptr.To(int32(2))).
+								Selector(metav1.SetAsLabelSelector(matchLabel)).
+								UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType).
+								RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec).
+								Obj()
+							gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed())
+
+							// Create PA targeting only "prefill" role
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 3)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify PA is created with role-level targeting
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								// ValidSpec should be True
+								validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+								// SubTargetSelector should be set
+								g.Expect(fetched.Spec.SubTargetSelector).ToNot(gomega.BeNil())
+								g.Expect(fetched.Spec.SubTargetSelector.RoleName).To(gomega.Equal("prefill"))
+								// AbleToScale should eventually be True
+								validation.ValidatePodAutoscalerConditionExists(fetched, ConditionAbleToScale)
+							}, time.Second*15, time.Millisecond*500).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("StormService Scaling - Role-Level Conflict Detection",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					// This will be PA2 (created second)
+					return wrapper.MakePodAutoscaler("pa-ss-conflict-2").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.KPA).
+						MinReplicas(1).
+						MaxReplicas(10).
+						ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-conflict").
+						SubTargetSelector("prefill"). // Same role as PA1
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Create StormService
+							matchLabel := map[string]string{"app": "test-vllm-conflict"}
+							podTemplate := corev1.PodTemplateSpec{
+								ObjectMeta: metav1.ObjectMeta{
+									Labels: matchLabel,
+								},
+								Spec: corev1.PodSpec{
+									Containers: []corev1.Container{
+										{
+											Name:  "vllm-container",
+											Image: "vllm/vllm-openai:latest",
+										},
+									},
+								},
+							}
+							roleSetSpec := &orchestrationapi.RoleSetSpec{
+								Roles: []orchestrationapi.RoleSpec{
+									{
+										Name:     "prefill",
+										Replicas: ptr.To(int32(3)),
+										Template: podTemplate,
+										Stateful: false,
+									},
+									{
+										Name:     "decode",
+										Replicas: ptr.To(int32(2)),
+										Template: podTemplate,
+										Stateful: false,
+									},
+								},
+							}
+							ss := wrapper.MakeStormService("test-stormservice-conflict").
+								Namespace(ns.Name).
+								Replicas(ptr.To(int32(2))).
+								Selector(metav1.SetAsLabelSelector(matchLabel)).
+								UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType).
+								RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec).
+								Obj()
+							gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed())
+
+							// Create PA1 first (targeting same SS and same role)
+							pa1 := wrapper.MakePodAutoscaler("pa-ss-conflict-1").
+								Namespace(ns.Name).
+								ScalingStrategy(autoscalingv1alpha1.KPA).
+								MinReplicas(1).
+								MaxReplicas(10).
+								ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-conflict").
+								SubTargetSelector("prefill"). // Same role
+								MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
+								Obj()
+							gomega.Expect(k8sClient.Create(ctx, pa1)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+
+							// Create PA2 (should have conflict)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 3)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify PA2 has conflict condition
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								// Should have Conflict condition with Status=False (meaning conflict exists)
+								validation.ValidatePodAutoscalerCondition(fetched, ConditionConflict, metav1.ConditionFalse, "")
+								// AbleToScale should be False due to conflict
+								validation.ValidatePodAutoscalerCondition(fetched, ConditionAbleToScale, metav1.ConditionFalse, "")
+							}, time.Second*15, time.Millisecond*500).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// Annotation-Based Configuration
+		// =========================================================================
+
+		ginkgo.Entry("Annotation - Scale up cooldown annotation",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					annotations := map[string]string{
+						"kpa.autoscaling.aibrix.ai/scale-up-cooldown": "30s",
+					}
+					return wrapper.MakePodAutoscaler("pa-cooldown").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.KPA).
+						MinReplicas(1).
+						MaxReplicas(10).
+						Annotations(annotations).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "cooldown-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("cooldown-deployment", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify PA is created with cooldown annotation
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							gomega.Expect(fetched.Annotations).To(gomega.HaveKey("kpa.autoscaling.aibrix.ai/scale-up-cooldown"))
+							gomega.Expect(fetched.Annotations["kpa.autoscaling.aibrix.ai/scale-up-cooldown"]).To(gomega.Equal("30s"))
+							validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Annotation - Scale down delay annotation",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					annotations := map[string]string{
+						"kpa.autoscaling.aibrix.ai/scale-down-delay": "3m",
+					}
+					return wrapper.MakePodAutoscaler("pa-scale-down-delay").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.KPA).
+						MinReplicas(1).
+						MaxReplicas(10).
+						Annotations(annotations).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "delay-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("delay-deployment", ns.Name, 5)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify annotation is preserved
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							gomega.Expect(fetched.Annotations).To(gomega.HaveKey("kpa.autoscaling.aibrix.ai/scale-down-delay"))
+							validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Annotation - Multiple KPA annotations",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					annotations := map[string]string{
+						"kpa.autoscaling.aibrix.ai/panic-threshold":   "200",
+						"kpa.autoscaling.aibrix.ai/panic-window":      "10s",
+						"kpa.autoscaling.aibrix.ai/stable-window":     "60s",
+						"kpa.autoscaling.aibrix.ai/scale-up-cooldown": "30s",
+						"kpa.autoscaling.aibrix.ai/scale-down-delay":  "180s",
+						"kpa.autoscaling.aibrix.ai/tolerance":         "0.1",
+					}
+					return wrapper.MakePodAutoscaler("pa-annotations").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.KPA).
+						MinReplicas(1).
+						MaxReplicas(10).
+						Annotations(annotations).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "annotations-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("annotations-deployment", ns.Name, 3)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify all annotations are preserved
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							gomega.Expect(fetched.Annotations).To(gomega.HaveKey("kpa.autoscaling.aibrix.ai/panic-threshold"))
+							gomega.Expect(fetched.Annotations["kpa.autoscaling.aibrix.ai/panic-threshold"]).To(gomega.Equal("200"))
+							gomega.Expect(fetched.Annotations).To(gomega.HaveKey("kpa.autoscaling.aibrix.ai/tolerance"))
+							gomega.Expect(fetched.Annotations["kpa.autoscaling.aibrix.ai/tolerance"]).To(gomega.Equal("0.1"))
+							validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+						},
+					},
+				},
+			},
+		),
+
+		// =========================================================================
+		// Advanced Scenarios
+		// =========================================================================
+
+		ginkgo.Entry("Advanced - Update PA spec and verify reconciliation",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-update-spec").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.KPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "update-spec-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "cpu_usage", "0.7")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("update-spec-deployment", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+							gomega.Expect(fetched.Spec.MaxReplicas).To(gomega.Equal(int32(5)))
+						},
+					},
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Update spec: change maxReplicas
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							fetched.Spec.MaxReplicas = 10
+							gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Verify update is applied and reconciled
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								g.Expect(fetched.Spec.MaxReplicas).To(gomega.Equal(int32(10)))
+								validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+
+		ginkgo.Entry("Advanced - Multiple rapid updates to spec",
+			&testValidatingCase{
+				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+					return wrapper.MakePodAutoscaler("pa-rapid-updates").
+						Namespace(ns.Name).
+						ScalingStrategy(autoscalingv1alpha1.HPA).
+						MinReplicas(1).
+						MaxReplicas(5).
+						ScaleTargetRefWithKind("Deployment", "apps/v1", "rapid-deployment").
+						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						Obj()
+				},
+				updates: []*update{
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							createDeployment("rapid-deployment", ns.Name, 2)
+							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							validation.ValidatePodAutoscalerConditionExists(validation.GetPodAutoscaler(ctx, k8sClient, pa), ConditionValidSpec)
+						},
+					},
+					{
+						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Rapid updates: change maxReplicas multiple times
+							for i := 0; i < 3; i++ {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								fetched.Spec.MaxReplicas = int32(5 + i*2)
+								gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed())
+								time.Sleep(time.Millisecond * 500)
+							}
+							time.Sleep(time.Second * 2)
+						},
+						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
+							// Eventually consistent: final maxReplicas should be 9 (5 + 2*2)
+							gomega.Eventually(func(g gomega.Gomega) {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								g.Expect(fetched.Spec.MaxReplicas).To(gomega.Equal(int32(9)))
+								validation.ValidatePodAutoscalerCondition(fetched, ConditionValidSpec, metav1.ConditionTrue, "")
+							}, time.Second*15, time.Second*1).Should(gomega.Succeed())
+						},
+					},
+				},
+			},
+		),
+	)
+})

From e91c393e7934850f569c3df194934c7ce9abc075 Mon Sep 17 00:00:00 2001
From: Wang Zhenyu <ts-zhenyu.b.wang@rakuten.com>
Date: Sun, 19 Oct 2025 21:45:37 +0800
Subject: [PATCH 2/4] feat: integration test for podAutoScaler

Signed-off-by: Wang Zhenyu <ts-zhenyu.b.wang@rakuten.com>
---
 test/utils/validation/hpa.go           | 114 +++++++++++++
 test/utils/validation/podautoscaler.go | 215 +++++++++++++++++++++++++
 test/utils/wrapper/podautoscaler.go    | 170 +++++++++++++++++++
 3 files changed, 499 insertions(+)
 create mode 100644 test/utils/validation/hpa.go
 create mode 100644 test/utils/validation/podautoscaler.go
 create mode 100644 test/utils/wrapper/podautoscaler.go

diff --git a/test/utils/validation/hpa.go b/test/utils/validation/hpa.go
new file mode 100644
index 000000000..afa4f4021
--- /dev/null
+++ b/test/utils/validation/hpa.go
@@ -0,0 +1,114 @@
+/*
+Copyright 2025 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package validation
+
+import (
+	"context"
+	"time"
+
+	"github.com/onsi/gomega"
+	autoscalingv2 "k8s.io/api/autoscaling/v2"
+	"k8s.io/apimachinery/pkg/types"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+)
+
+// WaitForHPACreated waits for an HPA to be created and returns it.
+func WaitForHPACreated(ctx context.Context,
+	k8sClient client.Client,
+	namespace, name string) *autoscalingv2.HorizontalPodAutoscaler {
+
+	hpa := &autoscalingv2.HorizontalPodAutoscaler{}
+	gomega.Eventually(func(g gomega.Gomega) {
+		err := k8sClient.Get(ctx, types.NamespacedName{
+			Namespace: namespace,
+			Name:      name,
+		}, hpa)
+		g.Expect(err).ToNot(gomega.HaveOccurred())
+	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+
+	return hpa
+}
+
+// WaitForHPADeleted waits for an HPA to be deleted.
+func WaitForHPADeleted(ctx context.Context,
+	k8sClient client.Client,
+	namespace, name string) {
+
+	gomega.Eventually(func(g gomega.Gomega) {
+		hpa := &autoscalingv2.HorizontalPodAutoscaler{}
+		err := k8sClient.Get(ctx, types.NamespacedName{
+			Namespace: namespace,
+			Name:      name,
+		}, hpa)
+		g.Expect(err).To(gomega.HaveOccurred())
+		g.Expect(client.IgnoreNotFound(err)).To(gomega.Succeed())
+	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+}
+
+// ValidateHPAOwnerReference validates the HPA's OwnerReference.
+func ValidateHPAOwnerReference(hpa *autoscalingv2.HorizontalPodAutoscaler,
+	expectedOwnerName string,
+	expectedOwnerKind string) {
+
+	gomega.Expect(hpa.OwnerReferences).To(gomega.HaveLen(1),
+		"HPA should have exactly one owner reference")
+
+	ownerRef := hpa.OwnerReferences[0]
+	gomega.Expect(ownerRef.Name).To(gomega.Equal(expectedOwnerName),
+		"Owner name should be %s", expectedOwnerName)
+	gomega.Expect(ownerRef.Kind).To(gomega.Equal(expectedOwnerKind),
+		"Owner kind should be %s", expectedOwnerKind)
+	gomega.Expect(ownerRef.Controller).ToNot(gomega.BeNil(),
+		"Controller field should not be nil")
+	gomega.Expect(*ownerRef.Controller).To(gomega.BeTrue(),
+		"Controller field should be true")
+}
+
+// ValidateHPASpec validates the HPA spec fields.
+func ValidateHPASpec(hpa *autoscalingv2.HorizontalPodAutoscaler,
+	expectedMinReplicas, expectedMaxReplicas int32) {
+
+	gomega.Expect(hpa.Spec.MinReplicas).ToNot(gomega.BeNil(),
+		"HPA MinReplicas should not be nil")
+	gomega.Expect(*hpa.Spec.MinReplicas).To(gomega.Equal(expectedMinReplicas),
+		"HPA MinReplicas should be %d", expectedMinReplicas)
+	gomega.Expect(hpa.Spec.MaxReplicas).To(gomega.Equal(expectedMaxReplicas),
+		"HPA MaxReplicas should be %d", expectedMaxReplicas)
+}
+
+// ValidateHPAScaleTargetRef validates the HPA's scale target reference.
+func ValidateHPAScaleTargetRef(hpa *autoscalingv2.HorizontalPodAutoscaler,
+	expectedKind, expectedName string) {
+
+	gomega.Expect(hpa.Spec.ScaleTargetRef.Kind).To(gomega.Equal(expectedKind),
+		"HPA ScaleTargetRef kind should be %s", expectedKind)
+	gomega.Expect(hpa.Spec.ScaleTargetRef.Name).To(gomega.Equal(expectedName),
+		"HPA ScaleTargetRef name should be %s", expectedName)
+}
+
+// GetHPA fetches an HPA by namespace and name.
+func GetHPA(ctx context.Context,
+	k8sClient client.Client,
+	namespace, name string) *autoscalingv2.HorizontalPodAutoscaler {
+
+	hpa := &autoscalingv2.HorizontalPodAutoscaler{}
+	gomega.Expect(k8sClient.Get(ctx, types.NamespacedName{
+		Namespace: namespace,
+		Name:      name,
+	}, hpa)).To(gomega.Succeed())
+	return hpa
+}
diff --git a/test/utils/validation/podautoscaler.go b/test/utils/validation/podautoscaler.go
new file mode 100644
index 000000000..7aee5d17a
--- /dev/null
+++ b/test/utils/validation/podautoscaler.go
@@ -0,0 +1,215 @@
+/*
+Copyright 2025 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package validation
+
+import (
+	"context"
+	"time"
+
+	"github.com/onsi/gomega"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/controller-runtime/pkg/client"
+
+	autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1"
+)
+
+// ValidatePodAutoscalerSpec validates the spec fields of a PodAutoscaler.
+func ValidatePodAutoscalerSpec(pa *autoscalingv1alpha1.PodAutoscaler,
+	expectedMin, expectedMax int32,
+	expectedStrategy autoscalingv1alpha1.ScalingStrategyType) {
+
+	gomega.Expect(pa.Spec.MinReplicas).ToNot(gomega.BeNil(), "MinReplicas should not be nil")
+	gomega.Expect(*pa.Spec.MinReplicas).To(gomega.Equal(expectedMin), "MinReplicas should match expected value")
+	gomega.Expect(pa.Spec.MaxReplicas).To(gomega.Equal(expectedMax), "MaxReplicas should match expected value")
+	gomega.Expect(pa.Spec.ScalingStrategy).To(gomega.Equal(expectedStrategy), "ScalingStrategy should match expected value")
+}
+
+// ValidatePodAutoscalerCondition validates a specific condition in a PodAutoscaler.
+func ValidatePodAutoscalerCondition(pa *autoscalingv1alpha1.PodAutoscaler,
+	conditionType string,
+	expectedStatus metav1.ConditionStatus,
+	expectedReason string) {
+
+	var found *metav1.Condition
+	for i := range pa.Status.Conditions {
+		if pa.Status.Conditions[i].Type == conditionType {
+			found = &pa.Status.Conditions[i]
+			break
+		}
+	}
+
+	gomega.Expect(found).ToNot(gomega.BeNil(),
+		"condition %s should exist", conditionType)
+	gomega.Expect(found.Status).To(gomega.Equal(expectedStatus),
+		"condition %s status should be %s", conditionType, expectedStatus)
+	if expectedReason != "" {
+		gomega.Expect(found.Reason).To(gomega.Equal(expectedReason),
+			"condition %s reason should be %s", conditionType, expectedReason)
+	}
+}
+
+// ValidatePodAutoscalerConditionExists validates that a condition exists.
+func ValidatePodAutoscalerConditionExists(pa *autoscalingv1alpha1.PodAutoscaler,
+	conditionType string) {
+
+	var found *metav1.Condition
+	for i := range pa.Status.Conditions {
+		if pa.Status.Conditions[i].Type == conditionType {
+			found = &pa.Status.Conditions[i]
+			break
+		}
+	}
+
+	gomega.Expect(found).ToNot(gomega.BeNil(),
+		"condition %s should exist", conditionType)
+}
+
+// ValidatePodAutoscalerConditionNotExists validates that a condition does not exist.
+func ValidatePodAutoscalerConditionNotExists(pa *autoscalingv1alpha1.PodAutoscaler,
+	conditionType string) {
+
+	var found *metav1.Condition
+	for i := range pa.Status.Conditions {
+		if pa.Status.Conditions[i].Type == conditionType {
+			found = &pa.Status.Conditions[i]
+			break
+		}
+	}
+
+	gomega.Expect(found).To(gomega.BeNil(),
+		"condition %s should not exist", conditionType)
+}
+
+// ValidatePodAutoscalerScaling validates the scaling status (desiredScale and actualScale).
+func ValidatePodAutoscalerScaling(ctx context.Context,
+	k8sClient client.Client,
+	pa *autoscalingv1alpha1.PodAutoscaler,
+	expectedDesired, expectedActual int32) {
+
+	gomega.Eventually(func(g gomega.Gomega) {
+		fetched := &autoscalingv1alpha1.PodAutoscaler{}
+		err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)
+		g.Expect(err).ToNot(gomega.HaveOccurred())
+
+		g.Expect(fetched.Status.DesiredScale).To(gomega.Equal(expectedDesired),
+			"DesiredScale should be %d", expectedDesired)
+		g.Expect(fetched.Status.ActualScale).To(gomega.Equal(expectedActual),
+			"ActualScale should be %d", expectedActual)
+	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+}
+
+// ValidatePodAutoscalerScalingEventually validates scaling status and waits for eventual consistency.
+func ValidatePodAutoscalerScalingEventually(ctx context.Context,
+	k8sClient client.Client,
+	pa *autoscalingv1alpha1.PodAutoscaler,
+	expectedDesired, expectedActual int32) {
+
+	gomega.Eventually(func(g gomega.Gomega) {
+		fetched := &autoscalingv1alpha1.PodAutoscaler{}
+		err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)
+		g.Expect(err).ToNot(gomega.HaveOccurred())
+
+		g.Expect(fetched.Status.DesiredScale).To(gomega.Equal(expectedDesired),
+			"DesiredScale should be %d", expectedDesired)
+		g.Expect(fetched.Status.ActualScale).To(gomega.Equal(expectedActual),
+			"ActualScale should be %d", expectedActual)
+	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+}
+
+// ValidateScalingHistory validates the scalingHistory in status.
+func ValidateScalingHistory(pa *autoscalingv1alpha1.PodAutoscaler,
+	expectedCount int,
+	checkLatest func(autoscalingv1alpha1.ScalingDecision)) {
+
+	gomega.Expect(len(pa.Status.ScalingHistory)).To(gomega.Equal(expectedCount),
+		"ScalingHistory should have %d entries", expectedCount)
+
+	if checkLatest != nil && len(pa.Status.ScalingHistory) > 0 {
+		latest := pa.Status.ScalingHistory[len(pa.Status.ScalingHistory)-1]
+		checkLatest(latest)
+	}
+}
+
+// WaitForPodAutoscalerCondition waits for a specific condition to reach expected status.
+func WaitForPodAutoscalerCondition(ctx context.Context,
+	k8sClient client.Client,
+	pa *autoscalingv1alpha1.PodAutoscaler,
+	conditionType string,
+	expectedStatus metav1.ConditionStatus) {
+
+	gomega.Eventually(func(g gomega.Gomega) {
+		fetched := &autoscalingv1alpha1.PodAutoscaler{}
+		err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)
+		g.Expect(err).ToNot(gomega.HaveOccurred())
+
+		ValidatePodAutoscalerCondition(fetched, conditionType, expectedStatus, "")
+	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+}
+
+// WaitForPodAutoscalerConditionWithReason waits for a condition with specific reason.
+func WaitForPodAutoscalerConditionWithReason(ctx context.Context,
+	k8sClient client.Client,
+	pa *autoscalingv1alpha1.PodAutoscaler,
+	conditionType string,
+	expectedStatus metav1.ConditionStatus,
+	expectedReason string) {
+
+	gomega.Eventually(func(g gomega.Gomega) {
+		fetched := &autoscalingv1alpha1.PodAutoscaler{}
+		err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)
+		g.Expect(err).ToNot(gomega.HaveOccurred())
+
+		ValidatePodAutoscalerCondition(fetched, conditionType, expectedStatus, expectedReason)
+	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+}
+
+// WaitForPodAutoscalerCreated waits for PodAutoscaler to be created and returns the latest version.
+func WaitForPodAutoscalerCreated(ctx context.Context,
+	k8sClient client.Client,
+	pa *autoscalingv1alpha1.PodAutoscaler) *autoscalingv1alpha1.PodAutoscaler {
+
+	fetched := &autoscalingv1alpha1.PodAutoscaler{}
+	gomega.Eventually(func(g gomega.Gomega) {
+		err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)
+		g.Expect(err).ToNot(gomega.HaveOccurred())
+	}, time.Second*5, time.Millisecond*250).Should(gomega.Succeed())
+
+	return fetched
+}
+
+// WaitForPodAutoscalerDeleted waits for PodAutoscaler to be deleted.
+func WaitForPodAutoscalerDeleted(ctx context.Context,
+	k8sClient client.Client,
+	pa *autoscalingv1alpha1.PodAutoscaler) {
+
+	gomega.Eventually(func(g gomega.Gomega) {
+		fetched := &autoscalingv1alpha1.PodAutoscaler{}
+		err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)
+		g.Expect(err).To(gomega.HaveOccurred())
+		g.Expect(client.IgnoreNotFound(err)).To(gomega.Succeed())
+	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+}
+
+// GetPodAutoscaler fetches the latest version of a PodAutoscaler.
+func GetPodAutoscaler(ctx context.Context,
+	k8sClient client.Client,
+	pa *autoscalingv1alpha1.PodAutoscaler) *autoscalingv1alpha1.PodAutoscaler {
+
+	fetched := &autoscalingv1alpha1.PodAutoscaler{}
+	gomega.Expect(k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)).To(gomega.Succeed())
+	return fetched
+}
diff --git a/test/utils/wrapper/podautoscaler.go b/test/utils/wrapper/podautoscaler.go
new file mode 100644
index 000000000..1f64ef832
--- /dev/null
+++ b/test/utils/wrapper/podautoscaler.go
@@ -0,0 +1,170 @@
+/*
+Copyright 2025 The Aibrix Team.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package wrapper
+
+import (
+	corev1 "k8s.io/api/core/v1"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+
+	autoscalingv1alpha1 "github.com/vllm-project/aibrix/api/autoscaling/v1alpha1"
+)
+
+// PodAutoscalerWrapper wraps PodAutoscaler to provide a fluent API for test construction.
+type PodAutoscalerWrapper struct {
+	autoscalingv1alpha1.PodAutoscaler
+}
+
+// MakePodAutoscaler creates a new PodAutoscalerWrapper with the given name.
+func MakePodAutoscaler(name string) *PodAutoscalerWrapper {
+	return &PodAutoscalerWrapper{
+		PodAutoscaler: autoscalingv1alpha1.PodAutoscaler{
+			ObjectMeta: metav1.ObjectMeta{
+				Name: name,
+			},
+		},
+	}
+}
+
+// Obj returns the pointer to the underlying PodAutoscaler object.
+func (w *PodAutoscalerWrapper) Obj() *autoscalingv1alpha1.PodAutoscaler {
+	return &w.PodAutoscaler
+}
+
+// Namespace sets the namespace of the PodAutoscaler.
+func (w *PodAutoscalerWrapper) Namespace(namespace string) *PodAutoscalerWrapper {
+	w.PodAutoscaler.Namespace = namespace
+	return w
+}
+
+// ScalingStrategy sets the scaling strategy (HPA, KPA, or APA).
+func (w *PodAutoscalerWrapper) ScalingStrategy(strategy autoscalingv1alpha1.ScalingStrategyType) *PodAutoscalerWrapper {
+	w.Spec.ScalingStrategy = strategy
+	return w
+}
+
+// MinReplicas sets the minimum number of replicas.
+func (w *PodAutoscalerWrapper) MinReplicas(min int32) *PodAutoscalerWrapper {
+	w.Spec.MinReplicas = &min
+	return w
+}
+
+// MaxReplicas sets the maximum number of replicas.
+func (w *PodAutoscalerWrapper) MaxReplicas(max int32) *PodAutoscalerWrapper {
+	w.Spec.MaxReplicas = max
+	return w
+}
+
+// ScaleTargetRef sets the target resource to scale.
+func (w *PodAutoscalerWrapper) ScaleTargetRef(ref corev1.ObjectReference) *PodAutoscalerWrapper {
+	w.Spec.ScaleTargetRef = ref
+	return w
+}
+
+// ScaleTargetRefWithKind sets the target resource with kind, apiVersion, and name.
+func (w *PodAutoscalerWrapper) ScaleTargetRefWithKind(kind, apiVersion, name string) *PodAutoscalerWrapper {
+	w.Spec.ScaleTargetRef = corev1.ObjectReference{
+		Kind:       kind,
+		APIVersion: apiVersion,
+		Name:       name,
+	}
+	return w
+}
+
+// SubTargetSelector sets the sub-target selector (e.g., for role-level scaling).
+func (w *PodAutoscalerWrapper) SubTargetSelector(roleName string) *PodAutoscalerWrapper {
+	w.Spec.SubTargetSelector = &autoscalingv1alpha1.SubTargetSelector{
+		RoleName: roleName,
+	}
+	return w
+}
+
+// MetricSource sets a single metric source (replaces any existing).
+func (w *PodAutoscalerWrapper) MetricSource(source autoscalingv1alpha1.MetricSource) *PodAutoscalerWrapper {
+	w.Spec.MetricsSources = []autoscalingv1alpha1.MetricSource{source}
+	return w
+}
+
+// AddMetricSource adds a metric source to the list.
+func (w *PodAutoscalerWrapper) AddMetricSource(source autoscalingv1alpha1.MetricSource) *PodAutoscalerWrapper {
+	w.Spec.MetricsSources = append(w.Spec.MetricsSources, source)
+	return w
+}
+
+// Annotations sets annotations on the PodAutoscaler.
+func (w *PodAutoscalerWrapper) Annotations(annotations map[string]string) *PodAutoscalerWrapper {
+	if w.PodAutoscaler.Annotations == nil {
+		w.PodAutoscaler.Annotations = make(map[string]string)
+	}
+	for k, v := range annotations {
+		w.PodAutoscaler.Annotations[k] = v
+	}
+	return w
+}
+
+// Labels sets labels on the PodAutoscaler.
+func (w *PodAutoscalerWrapper) Labels(labels map[string]string) *PodAutoscalerWrapper {
+	if w.PodAutoscaler.Labels == nil {
+		w.PodAutoscaler.Labels = make(map[string]string)
+	}
+	for k, v := range labels {
+		w.PodAutoscaler.Labels[k] = v
+	}
+	return w
+}
+
+// MakeMetricSourcePod creates a POD-type metric source.
+func MakeMetricSourcePod(protocolType autoscalingv1alpha1.ProtocolType, port, path, targetMetric, targetValue string) autoscalingv1alpha1.MetricSource {
+	return autoscalingv1alpha1.MetricSource{
+		MetricSourceType: autoscalingv1alpha1.POD,
+		ProtocolType:     protocolType,
+		Port:             port,
+		Path:             path,
+		TargetMetric:     targetMetric,
+		TargetValue:      targetValue,
+	}
+}
+
+// MakeMetricSourceResource creates a RESOURCE-type metric source (e.g., CPU, memory).
+func MakeMetricSourceResource(targetMetric, targetValue string) autoscalingv1alpha1.MetricSource {
+	return autoscalingv1alpha1.MetricSource{
+		MetricSourceType: autoscalingv1alpha1.RESOURCE,
+		TargetMetric:     targetMetric,
+		TargetValue:      targetValue,
+	}
+}
+
+// MakeMetricSourceExternal creates an EXTERNAL-type metric source.
+func MakeMetricSourceExternal(protocolType autoscalingv1alpha1.ProtocolType, endpoint, path, targetMetric, targetValue string) autoscalingv1alpha1.MetricSource {
+	return autoscalingv1alpha1.MetricSource{
+		MetricSourceType: autoscalingv1alpha1.EXTERNAL,
+		ProtocolType:     protocolType,
+		Endpoint:         endpoint,
+		Path:             path,
+		TargetMetric:     targetMetric,
+		TargetValue:      targetValue,
+	}
+}
+
+// MakeMetricSourceCustom creates a CUSTOM-type metric source.
+func MakeMetricSourceCustom(targetMetric, targetValue string) autoscalingv1alpha1.MetricSource {
+	return autoscalingv1alpha1.MetricSource{
+		MetricSourceType: autoscalingv1alpha1.CUSTOM,
+		TargetMetric:     targetMetric,
+		TargetValue:      targetValue,
+	}
+}
+

From 9f9eafbde407d394776bbdaabd988631f6923f77 Mon Sep 17 00:00:00 2001
From: Wang Zhenyu <ts-zhenyu.b.wang@rakuten.com>
Date: Mon, 20 Oct 2025 09:50:13 +0800
Subject: [PATCH 3/4] fix: correct typo, add Eventually, delete duplicate Func

Signed-off-by: Wang Zhenyu <ts-zhenyu.b.wang@rakuten.com>
---
 .../podautoscaler/podautoscaler_controller.go |  2 +-
 .../controller/podautoscaler_test.go          | 78 +++++++++++--------
 test/utils/validation/podautoscaler.go        | 18 -----
 3 files changed, 48 insertions(+), 50 deletions(-)

diff --git a/pkg/controller/podautoscaler/podautoscaler_controller.go b/pkg/controller/podautoscaler/podautoscaler_controller.go
index 29cca1adf..bd72d1cb4 100644
--- a/pkg/controller/podautoscaler/podautoscaler_controller.go
+++ b/pkg/controller/podautoscaler/podautoscaler_controller.go
@@ -74,7 +74,7 @@ const RayClusterFleet = "RayClusterFleet"
 const (
 	ConditionReady         = "Ready"
 	ConditionValidSpec     = "ValidSpec"
-	ConditionConflict      = "MutilPodAutoscalerConflict"
+	ConditionConflict      = "MultiPodAutoscalerConflict"
 	ConditionScalingActive = "ScalingActive"
 	ConditionAbleToScale   = "AbleToScale"
 
diff --git a/test/integration/controller/podautoscaler_test.go b/test/integration/controller/podautoscaler_test.go
index bb1bb0b4f..8fa5b1214 100644
--- a/test/integration/controller/podautoscaler_test.go
+++ b/test/integration/controller/podautoscaler_test.go
@@ -38,7 +38,7 @@ import (
 const (
 	ConditionReady         = "Ready"
 	ConditionValidSpec     = "ValidSpec"
-	ConditionConflict      = "MutilPodAutoscalerConflict"
+	ConditionConflict      = "MultiPodAutoscalerConflict"
 	ConditionScalingActive = "ScalingActive"
 	ConditionAbleToScale   = "AbleToScale"
 
@@ -197,14 +197,16 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 					},
 					{
 						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
-							// Update PodAutoscaler spec
-							time.Sleep(time.Second * 3) // Wait for initial reconcile
-							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
-							minReplicas := int32(2)
-							fetched.Spec.MinReplicas = &minReplicas
-							fetched.Spec.MaxReplicas = 10
-							gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed())
-							time.Sleep(time.Second * 3) // Wait for update to propagate
+							// Update PodAutoscaler spec with retry for race conditions
+							gomega.Eventually(func() error {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								minReplicas := int32(2)
+								fetched.Spec.MinReplicas = &minReplicas
+								fetched.Spec.MaxReplicas = 10
+								return k8sClient.Update(ctx, fetched)
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
+							// Give controller time to reconcile the updated PA and sync HPA
+							time.Sleep(time.Second * 2)
 						},
 						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
 							// Validate HPA is updated with more relaxed timing
@@ -472,21 +474,33 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
 							// Delete first PA
 							gomega.Expect(k8sClient.Delete(ctx, pa)).To(gomega.Succeed())
+
+							// Wait for deletion to complete
+							gomega.Eventually(func() error {
+								temp := &autoscalingv1alpha1.PodAutoscaler{}
+								return k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), temp)
+							}, time.Second*10, time.Millisecond*250).ShouldNot(gomega.Succeed())
+
+							// Give controller time to process the deletion event and update caches
 							time.Sleep(time.Second * 2)
 
-							// Manually trigger PA2 reconcile by updating it (no-op update)
-							// This forces the controller to re-check the conflict status
-							pa2 := &autoscalingv1alpha1.PodAutoscaler{}
-							err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2)
-							gomega.Expect(err).ToNot(gomega.HaveOccurred())
+							// Manually trigger PA2 reconcile by updating it with retry for race conditions
+							gomega.Eventually(func() error {
+								pa2 := &autoscalingv1alpha1.PodAutoscaler{}
+								err := k8sClient.Get(ctx, client.ObjectKey{Namespace: ns.Name, Name: "pa-resolve-2"}, pa2)
+								if err != nil {
+									return err
+								}
+								// Add an annotation to trigger reconcile
+								if pa2.Annotations == nil {
+									pa2.Annotations = make(map[string]string)
+								}
+								pa2.Annotations["test.aibrix.ai/force-reconcile"] = time.Now().Format(time.RFC3339)
+								return k8sClient.Update(ctx, pa2)
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
 
-							// Add an annotation to trigger reconcile
-							if pa2.Annotations == nil {
-								pa2.Annotations = make(map[string]string)
-							}
-							pa2.Annotations["test.aibrix.ai/force-reconcile"] = time.Now().Format(time.RFC3339)
-							gomega.Expect(k8sClient.Update(ctx, pa2)).To(gomega.Succeed())
-							time.Sleep(time.Second * 3)
+							// Give controller time to reconcile PA2 after annotation update
+							time.Sleep(time.Second * 1)
 						},
 						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
 							// PA2 conflict should be resolved - condition should be removed
@@ -1213,11 +1227,12 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 					},
 					{
 						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
-							// Update spec: change maxReplicas
-							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
-							fetched.Spec.MaxReplicas = 10
-							gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed())
-							time.Sleep(time.Second * 2)
+							// Update spec: change maxReplicas with retry for race conditions
+							gomega.Eventually(func() error {
+								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+								fetched.Spec.MaxReplicas = 10
+								return k8sClient.Update(ctx, fetched)
+							}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
 						},
 						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
 							// Verify update is applied and reconciled
@@ -1259,12 +1274,13 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
 							// Rapid updates: change maxReplicas multiple times
 							for i := 0; i < 3; i++ {
-								fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
-								fetched.Spec.MaxReplicas = int32(5 + i*2)
-								gomega.Expect(k8sClient.Update(ctx, fetched)).To(gomega.Succeed())
-								time.Sleep(time.Millisecond * 500)
+								maxReplicas := int32(5 + i*2)
+								gomega.Eventually(func() error {
+									fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+									fetched.Spec.MaxReplicas = maxReplicas
+									return k8sClient.Update(ctx, fetched)
+								}, time.Second*5, time.Millisecond*100).Should(gomega.Succeed())
 							}
-							time.Sleep(time.Second * 2)
 						},
 						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
 							// Eventually consistent: final maxReplicas should be 9 (5 + 2*2)
diff --git a/test/utils/validation/podautoscaler.go b/test/utils/validation/podautoscaler.go
index 7aee5d17a..8366dd09b 100644
--- a/test/utils/validation/podautoscaler.go
+++ b/test/utils/validation/podautoscaler.go
@@ -112,24 +112,6 @@ func ValidatePodAutoscalerScaling(ctx context.Context,
 	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
 }
 
-// ValidatePodAutoscalerScalingEventually validates scaling status and waits for eventual consistency.
-func ValidatePodAutoscalerScalingEventually(ctx context.Context,
-	k8sClient client.Client,
-	pa *autoscalingv1alpha1.PodAutoscaler,
-	expectedDesired, expectedActual int32) {
-
-	gomega.Eventually(func(g gomega.Gomega) {
-		fetched := &autoscalingv1alpha1.PodAutoscaler{}
-		err := k8sClient.Get(ctx, client.ObjectKeyFromObject(pa), fetched)
-		g.Expect(err).ToNot(gomega.HaveOccurred())
-
-		g.Expect(fetched.Status.DesiredScale).To(gomega.Equal(expectedDesired),
-			"DesiredScale should be %d", expectedDesired)
-		g.Expect(fetched.Status.ActualScale).To(gomega.Equal(expectedActual),
-			"ActualScale should be %d", expectedActual)
-	}, time.Second*10, time.Millisecond*250).Should(gomega.Succeed())
-}
-
 // ValidateScalingHistory validates the scalingHistory in status.
 func ValidateScalingHistory(pa *autoscalingv1alpha1.PodAutoscaler,
 	expectedCount int,

From 7eba63656db3a420d08be59e6aae36e56bf96587 Mon Sep 17 00:00:00 2001
From: Wang Zhenyu <ts-zhenyu.b.wang@rakuten.com>
Date: Tue, 21 Oct 2025 09:45:08 +0800
Subject: [PATCH 4/4] fix: pass lint check

Signed-off-by: Wang Zhenyu <ts-zhenyu.b.wang@rakuten.com>
---
 .../controller/podautoscaler_test.go          | 396 ++++++++----------
 test/utils/validation/podautoscaler.go        |   3 +-
 test/utils/wrapper/podautoscaler.go           |  11 +-
 3 files changed, 188 insertions(+), 222 deletions(-)

diff --git a/test/integration/controller/podautoscaler_test.go b/test/integration/controller/podautoscaler_test.go
index 8fa5b1214..e6adb8238 100644
--- a/test/integration/controller/podautoscaler_test.go
+++ b/test/integration/controller/podautoscaler_test.go
@@ -116,6 +116,120 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 		return deployment
 	}
 
+	// Helper: creates a StormService with two roles (prefill and decode)
+	createStormService := func(name, namespace string, labelKey, labelValue string,
+		prefillReplicas, decodeReplicas int32) *orchestrationapi.StormService {
+		matchLabel := map[string]string{labelKey: labelValue}
+		podTemplate := corev1.PodTemplateSpec{
+			ObjectMeta: metav1.ObjectMeta{
+				Labels: matchLabel,
+			},
+			Spec: corev1.PodSpec{
+				Containers: []corev1.Container{
+					{
+						Name:  "vllm-container",
+						Image: "vllm/vllm-openai:latest",
+					},
+				},
+			},
+		}
+		roleSetSpec := &orchestrationapi.RoleSetSpec{
+			Roles: []orchestrationapi.RoleSpec{
+				{
+					Name:     "prefill",
+					Replicas: ptr.To(prefillReplicas),
+					Template: podTemplate,
+					Stateful: false,
+				},
+				{
+					Name:     "decode",
+					Replicas: ptr.To(decodeReplicas),
+					Template: podTemplate,
+					Stateful: false,
+				},
+			},
+		}
+		ss := wrapper.MakeStormService(name).
+			Namespace(namespace).
+			Replicas(ptr.To(int32(2))).
+			Selector(metav1.SetAsLabelSelector(matchLabel)).
+			UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType).
+			RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec).
+			Obj()
+		gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed())
+		return ss
+	}
+
+	// Helper: creates a test case for boundary enforcement with similar structure
+	makeBoundaryTestCase := func(name, deploymentName string, min, max int32,
+		deploymentReplicas int32) *testValidatingCase {
+		return &testValidatingCase{
+			makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+				return wrapper.MakePodAutoscaler(name).
+					Namespace(ns.Name).
+					ScalingStrategy(autoscalingv1alpha1.HPA).
+					MinReplicas(min).
+					MaxReplicas(max).
+					ScaleTargetRefWithKind("Deployment", "apps/v1", deploymentName).
+					MetricSource(wrapper.MakeMetricSourcePod(
+						autoscalingv1alpha1.HTTP, "8080", "/metrics",
+						"requests_per_second", "100")).
+					Obj()
+			},
+			updates: []*update{
+				{
+					updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+						createDeployment(deploymentName, ns.Name, deploymentReplicas)
+						gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+						time.Sleep(time.Second * 2)
+					},
+					checkFunc: func(ctx context.Context, k8sClient client.Client,
+						pa *autoscalingv1alpha1.PodAutoscaler) {
+						hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
+						validation.ValidateHPASpec(hpa, min, max)
+					},
+				},
+			},
+		}
+	}
+
+	// Helper: creates a test case for spec validation with similar structure
+	makeSpecValidationTestCase := func(name, deploymentName string, min, max int32,
+		expectedCondition string, expectedStatus metav1.ConditionStatus,
+		expectedReason string) *testValidatingCase {
+		return &testValidatingCase{
+			makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
+				return wrapper.MakePodAutoscaler(name).
+					Namespace(ns.Name).
+					ScalingStrategy(autoscalingv1alpha1.HPA).
+					MinReplicas(min).
+					MaxReplicas(max).
+					ScaleTargetRefWithKind("Deployment", "apps/v1", deploymentName).
+					MetricSource(wrapper.MakeMetricSourcePod(
+						autoscalingv1alpha1.HTTP, "8080", "/metrics",
+						"requests_per_second", "100")).
+					Obj()
+			},
+			updates: []*update{
+				{
+					updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
+						createDeployment(deploymentName, ns.Name, 2)
+						gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
+						time.Sleep(time.Second * 2)
+					},
+					checkFunc: func(ctx context.Context, k8sClient client.Client,
+						pa *autoscalingv1alpha1.PodAutoscaler) {
+						validation.WaitForPodAutoscalerConditionWithReason(
+							ctx, k8sClient, pa,
+							expectedCondition, expectedStatus,
+							expectedReason,
+						)
+					},
+				},
+			},
+		}
+	}
+
 	ginkgo.DescribeTable("test PodAutoscaler creation and reconciliation",
 		func(tc *testValidatingCase) {
 			pa := tc.makePodAutoscaler()
@@ -144,7 +258,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -181,7 +296,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-2").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -229,7 +345,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-3").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -276,7 +393,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", ""). // Empty name
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -300,36 +418,11 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 		),
 
 		ginkgo.Entry("Spec Validation - Invalid Replica Bounds (min > max)",
-			&testValidatingCase{
-				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
-					return wrapper.MakePodAutoscaler("pa-invalid-bounds").
-						Namespace(ns.Name).
-						ScalingStrategy(autoscalingv1alpha1.HPA).
-						MinReplicas(5). // min > max
-						MaxReplicas(3).
-						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-4").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
-						Obj()
-				},
-				updates: []*update{
-					{
-						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
-							createDeployment("test-deployment-4", ns.Name, 2)
-							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
-							// Wait for controller to reconcile
-							time.Sleep(time.Second * 2)
-						},
-						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
-							// Validate ValidSpec condition is False
-							validation.WaitForPodAutoscalerConditionWithReason(
-								ctx, k8sClient, pa,
-								ConditionValidSpec, metav1.ConditionFalse,
-								ReasonInvalidBounds,
-							)
-						},
-					},
-				},
-			},
+			makeSpecValidationTestCase(
+				"pa-invalid-bounds", "test-deployment-4",
+				5, 3, // min > max
+				ConditionValidSpec, metav1.ConditionFalse, ReasonInvalidBounds,
+			),
 		),
 
 		// Note: Invalid ScalingStrategy test is skipped because CRD-level validation
@@ -339,36 +432,11 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 		// prevents empty metricsSources from being created (minItems=1).
 
 		ginkgo.Entry("Spec Validation - Valid Spec",
-			&testValidatingCase{
-				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
-					return wrapper.MakePodAutoscaler("pa-valid-spec").
-						Namespace(ns.Name).
-						ScalingStrategy(autoscalingv1alpha1.HPA).
-						MinReplicas(1).
-						MaxReplicas(5).
-						ScaleTargetRefWithKind("Deployment", "apps/v1", "test-deployment-7").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
-						Obj()
-				},
-				updates: []*update{
-					{
-						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
-							createDeployment("test-deployment-7", ns.Name, 2)
-							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
-							// Wait for controller to reconcile
-							time.Sleep(time.Second * 2)
-						},
-						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
-							// Validate ValidSpec condition is True
-							validation.WaitForPodAutoscalerConditionWithReason(
-								ctx, k8sClient, pa,
-								ConditionValidSpec, metav1.ConditionTrue,
-								ReasonAsExpected,
-							)
-						},
-					},
-				},
-			},
+			makeSpecValidationTestCase(
+				"pa-valid-spec", "test-deployment-7",
+				1, 5,
+				ConditionValidSpec, metav1.ConditionTrue, ReasonAsExpected,
+			),
 		),
 
 		// =========================================================================
@@ -384,7 +452,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "shared-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -411,7 +480,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 								MinReplicas(1).
 								MaxReplicas(10).
 								ScaleTargetRefWithKind("Deployment", "apps/v1", "shared-deployment").
-								MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+								MetricSource(wrapper.MakeMetricSourcePod(
+									autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 								Obj()
 							gomega.Expect(k8sClient.Create(ctx, pa2)).To(gomega.Succeed())
 							time.Sleep(time.Second * 2)
@@ -441,7 +511,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "resolve-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -457,7 +528,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 								MinReplicas(1).
 								MaxReplicas(10).
 								ScaleTargetRefWithKind("Deployment", "apps/v1", "resolve-deployment").
-								MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+								MetricSource(wrapper.MakeMetricSourcePod(
+									autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 								Obj()
 							gomega.Expect(k8sClient.Create(ctx, pa2)).To(gomega.Succeed())
 							time.Sleep(time.Second * 2)
@@ -530,7 +602,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "status-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -565,7 +638,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "condition-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -596,7 +670,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "ready-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -634,7 +709,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(10).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "scale-test-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -669,7 +745,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "nonexistent-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -699,64 +776,11 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 		// =========================================================================
 
 		ginkgo.Entry("Boundary Enforcement - maxReplicas enforced in HPA",
-			&testValidatingCase{
-				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
-					return wrapper.MakePodAutoscaler("pa-boundary-max").
-						Namespace(ns.Name).
-						ScalingStrategy(autoscalingv1alpha1.HPA).
-						MinReplicas(1).
-						MaxReplicas(5).
-						ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
-						Obj()
-				},
-				updates: []*update{
-					{
-						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
-							// Create deployment with many replicas
-							createDeployment("boundary-deployment", ns.Name, 8)
-							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
-							time.Sleep(time.Second * 2)
-						},
-						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
-							// HPA should be created with maxReplicas=5
-							hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
-							validation.ValidateHPASpec(hpa, 1, 5)
-							// HPA will enforce the max boundary
-						},
-					},
-				},
-			},
+			makeBoundaryTestCase("pa-boundary-max", "boundary-deployment", 1, 5, 8),
 		),
 
 		ginkgo.Entry("Boundary Enforcement - minReplicas enforced in HPA",
-			&testValidatingCase{
-				makePodAutoscaler: func() *autoscalingv1alpha1.PodAutoscaler {
-					return wrapper.MakePodAutoscaler("pa-boundary-min").
-						Namespace(ns.Name).
-						ScalingStrategy(autoscalingv1alpha1.HPA).
-						MinReplicas(3).
-						MaxReplicas(10).
-						ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-min-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
-						Obj()
-				},
-				updates: []*update{
-					{
-						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
-							// Create deployment with few replicas
-							createDeployment("boundary-min-deployment", ns.Name, 1)
-							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
-							time.Sleep(time.Second * 2)
-						},
-						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
-							// HPA should be created with minReplicas=3
-							hpa := validation.WaitForHPACreated(ctx, k8sClient, ns.Name, pa.Name+"-hpa")
-							validation.ValidateHPASpec(hpa, 3, 10)
-						},
-					},
-				},
-			},
+			makeBoundaryTestCase("pa-boundary-min", "boundary-min-deployment", 3, 10, 1),
 		),
 
 		ginkgo.Entry("Boundary Enforcement - minReplicas=0 in PA spec",
@@ -768,7 +792,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(0). // Set minReplicas=0 in PA
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "boundary-zero-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -809,7 +834,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(10).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "history-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -849,52 +875,15 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(10).
 						ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
 					{
 						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
 							// Create StormService with 2 roles
-							matchLabel := map[string]string{"app": "test-vllm"}
-							podTemplate := corev1.PodTemplateSpec{
-								ObjectMeta: metav1.ObjectMeta{
-									Labels: matchLabel,
-								},
-								Spec: corev1.PodSpec{
-									Containers: []corev1.Container{
-										{
-											Name:  "vllm-container",
-											Image: "vllm/vllm-openai:latest",
-										},
-									},
-								},
-							}
-							roleSetSpec := &orchestrationapi.RoleSetSpec{
-								Roles: []orchestrationapi.RoleSpec{
-									{
-										Name:     "prefill",
-										Replicas: ptr.To(int32(2)),
-										Template: podTemplate,
-										Stateful: false,
-									},
-									{
-										Name:     "decode",
-										Replicas: ptr.To(int32(1)),
-										Template: podTemplate,
-										Stateful: false,
-									},
-								},
-							}
-							ss := wrapper.MakeStormService("test-stormservice").
-								Namespace(ns.Name).
-								Replicas(ptr.To(int32(2))).
-								Selector(metav1.SetAsLabelSelector(matchLabel)).
-								UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType).
-								RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec).
-								Obj()
-							gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed())
-
+							createStormService("test-stormservice", ns.Name, "app", "test-vllm", 2, 1)
 							// Create PA
 							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
 							time.Sleep(time.Second * 3)
@@ -924,52 +913,15 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MaxReplicas(10).
 						ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-role").
 						SubTargetSelector("prefill"). // Only scale "prefill" role
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
 						Obj()
 				},
 				updates: []*update{
 					{
 						updateFunc: func(pa *autoscalingv1alpha1.PodAutoscaler) {
 							// Create StormService with prefill and decode roles
-							matchLabel := map[string]string{"app": "test-vllm-role"}
-							podTemplate := corev1.PodTemplateSpec{
-								ObjectMeta: metav1.ObjectMeta{
-									Labels: matchLabel,
-								},
-								Spec: corev1.PodSpec{
-									Containers: []corev1.Container{
-										{
-											Name:  "vllm-container",
-											Image: "vllm/vllm-openai:latest",
-										},
-									},
-								},
-							}
-							roleSetSpec := &orchestrationapi.RoleSetSpec{
-								Roles: []orchestrationapi.RoleSpec{
-									{
-										Name:     "prefill",
-										Replicas: ptr.To(int32(3)),
-										Template: podTemplate,
-										Stateful: false,
-									},
-									{
-										Name:     "decode",
-										Replicas: ptr.To(int32(2)),
-										Template: podTemplate,
-										Stateful: false,
-									},
-								},
-							}
-							ss := wrapper.MakeStormService("test-stormservice-role").
-								Namespace(ns.Name).
-								Replicas(ptr.To(int32(2))).
-								Selector(metav1.SetAsLabelSelector(matchLabel)).
-								UpdateStrategyType(orchestrationapi.RollingUpdateStormServiceStrategyType).
-								RoleSetTemplateMeta(metav1.ObjectMeta{Labels: matchLabel}, roleSetSpec).
-								Obj()
-							gomega.Expect(k8sClient.Create(ctx, ss)).To(gomega.Succeed())
-
+							createStormService("test-stormservice-role", ns.Name, "app", "test-vllm-role", 3, 2)
 							// Create PA targeting only "prefill" role
 							gomega.Expect(k8sClient.Create(ctx, pa)).To(gomega.Succeed())
 							time.Sleep(time.Second * 3)
@@ -1003,7 +955,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MaxReplicas(10).
 						ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-conflict").
 						SubTargetSelector("prefill"). // Same role as PA1
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
 						Obj()
 				},
 				updates: []*update{
@@ -1057,7 +1010,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 								MaxReplicas(10).
 								ScaleTargetRefWithKind("StormService", "orchestration.aibrix.ai/v1alpha1", "test-stormservice-conflict").
 								SubTargetSelector("prefill"). // Same role
-								MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
+								MetricSource(wrapper.MakeMetricSourcePod(
+									autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.7")).
 								Obj()
 							gomega.Expect(k8sClient.Create(ctx, pa1)).To(gomega.Succeed())
 							time.Sleep(time.Second * 2)
@@ -1098,7 +1052,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MaxReplicas(10).
 						Annotations(annotations).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "cooldown-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
 						Obj()
 				},
 				updates: []*update{
@@ -1133,7 +1088,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MaxReplicas(10).
 						Annotations(annotations).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "delay-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
 						Obj()
 				},
 				updates: []*update{
@@ -1172,7 +1128,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MaxReplicas(10).
 						Annotations(annotations).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "annotations-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "gpu_cache_usage_perc", "0.5")).
 						Obj()
 				},
 				updates: []*update{
@@ -1209,7 +1166,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "update-spec-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "cpu_usage", "0.7")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "cpu_usage", "0.7")).
 						Obj()
 				},
 				updates: []*update{
@@ -1256,7 +1214,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 						MinReplicas(1).
 						MaxReplicas(5).
 						ScaleTargetRefWithKind("Deployment", "apps/v1", "rapid-deployment").
-						MetricSource(wrapper.MakeMetricSourcePod(autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
+						MetricSource(wrapper.MakeMetricSourcePod(
+							autoscalingv1alpha1.HTTP, "8080", "/metrics", "requests_per_second", "100")).
 						Obj()
 				},
 				updates: []*update{
@@ -1267,7 +1226,8 @@ var _ = ginkgo.Describe("PodAutoscaler controller test", func() {
 							time.Sleep(time.Second * 2)
 						},
 						checkFunc: func(ctx context.Context, k8sClient client.Client, pa *autoscalingv1alpha1.PodAutoscaler) {
-							validation.ValidatePodAutoscalerConditionExists(validation.GetPodAutoscaler(ctx, k8sClient, pa), ConditionValidSpec)
+							fetched := validation.GetPodAutoscaler(ctx, k8sClient, pa)
+							validation.ValidatePodAutoscalerConditionExists(fetched, ConditionValidSpec)
 						},
 					},
 					{
diff --git a/test/utils/validation/podautoscaler.go b/test/utils/validation/podautoscaler.go
index 8366dd09b..1937234ce 100644
--- a/test/utils/validation/podautoscaler.go
+++ b/test/utils/validation/podautoscaler.go
@@ -35,7 +35,8 @@ func ValidatePodAutoscalerSpec(pa *autoscalingv1alpha1.PodAutoscaler,
 	gomega.Expect(pa.Spec.MinReplicas).ToNot(gomega.BeNil(), "MinReplicas should not be nil")
 	gomega.Expect(*pa.Spec.MinReplicas).To(gomega.Equal(expectedMin), "MinReplicas should match expected value")
 	gomega.Expect(pa.Spec.MaxReplicas).To(gomega.Equal(expectedMax), "MaxReplicas should match expected value")
-	gomega.Expect(pa.Spec.ScalingStrategy).To(gomega.Equal(expectedStrategy), "ScalingStrategy should match expected value")
+	gomega.Expect(pa.Spec.ScalingStrategy).To(
+		gomega.Equal(expectedStrategy), "ScalingStrategy should match expected value")
 }
 
 // ValidatePodAutoscalerCondition validates a specific condition in a PodAutoscaler.
diff --git a/test/utils/wrapper/podautoscaler.go b/test/utils/wrapper/podautoscaler.go
index 1f64ef832..5590d5808 100644
--- a/test/utils/wrapper/podautoscaler.go
+++ b/test/utils/wrapper/podautoscaler.go
@@ -127,7 +127,10 @@ func (w *PodAutoscalerWrapper) Labels(labels map[string]string) *PodAutoscalerWr
 }
 
 // MakeMetricSourcePod creates a POD-type metric source.
-func MakeMetricSourcePod(protocolType autoscalingv1alpha1.ProtocolType, port, path, targetMetric, targetValue string) autoscalingv1alpha1.MetricSource {
+func MakeMetricSourcePod(
+	protocolType autoscalingv1alpha1.ProtocolType,
+	port, path, targetMetric, targetValue string,
+) autoscalingv1alpha1.MetricSource {
 	return autoscalingv1alpha1.MetricSource{
 		MetricSourceType: autoscalingv1alpha1.POD,
 		ProtocolType:     protocolType,
@@ -148,7 +151,10 @@ func MakeMetricSourceResource(targetMetric, targetValue string) autoscalingv1alp
 }
 
 // MakeMetricSourceExternal creates an EXTERNAL-type metric source.
-func MakeMetricSourceExternal(protocolType autoscalingv1alpha1.ProtocolType, endpoint, path, targetMetric, targetValue string) autoscalingv1alpha1.MetricSource {
+func MakeMetricSourceExternal(
+	protocolType autoscalingv1alpha1.ProtocolType,
+	endpoint, path, targetMetric, targetValue string,
+) autoscalingv1alpha1.MetricSource {
 	return autoscalingv1alpha1.MetricSource{
 		MetricSourceType: autoscalingv1alpha1.EXTERNAL,
 		ProtocolType:     protocolType,
@@ -167,4 +173,3 @@ func MakeMetricSourceCustom(targetMetric, targetValue string) autoscalingv1alpha
 		TargetValue:      targetValue,
 	}
 }
-