Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix deployment status propagation when scaling from zero #15550

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions pkg/apis/autoscaling/v1alpha1/pa_lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ var podCondSet = apis.NewLivingConditionSet(
PodAutoscalerConditionActive,
PodAutoscalerConditionScaleTargetInitialized,
PodAutoscalerConditionSKSReady,
PodAutoscalerConditionScaleTargetScaled,
)

// GetConditionSet retrieves the condition set for this resource. Implements the KRShaped interface.
Expand Down Expand Up @@ -215,6 +216,12 @@ func (pas *PodAutoscalerStatus) MarkScaleTargetInitialized() {
podCondSet.Manage(pas).MarkTrue(PodAutoscalerConditionScaleTargetInitialized)
}

// ScaleTargetNotScaled returns true if the PodAutoscaler's scale target has failed
// to scaled.
func (pas *PodAutoscalerStatus) ScaleTargetNotScaled() bool {
return pas.GetCondition(PodAutoscalerConditionScaleTargetScaled).IsFalse()
}

// MarkSKSReady marks the PA condition denoting that SKS is ready.
func (pas *PodAutoscalerStatus) MarkSKSReady() {
podCondSet.Manage(pas).MarkTrue(PodAutoscalerConditionSKSReady)
Expand Down Expand Up @@ -250,6 +257,16 @@ func (pas *PodAutoscalerStatus) MarkInactive(reason, message string) {
podCondSet.Manage(pas).MarkFalse(PodAutoscalerConditionActive, reason, message)
}

// MarkWithNoScaleFailures marks the PA as free of scale failures.
func (pas *PodAutoscalerStatus) MarkWithNoScaleFailures() {
podCondSet.Manage(pas).MarkTrue(PodAutoscalerConditionScaleTargetScaled)
}

// MarkWithScaleFailures marks the PA as failed due to scale failures.
func (pas *PodAutoscalerStatus) MarkWithScaleFailures(reason, message string) {
podCondSet.Manage(pas).MarkFalse(PodAutoscalerConditionScaleTargetScaled, reason, message)
}

// MarkResourceNotOwned changes the "Active" condition to false to reflect that the
// resource of the given kind and name has already been created, and we do not own it.
func (pas *PodAutoscalerStatus) MarkResourceNotOwned(kind, name string) {
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/autoscaling/v1alpha1/pa_lifecycle_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,7 @@ func TestTypicalFlow(t *testing.T) {
// When we see traffic, mark ourselves active.
r.MarkActive()
r.MarkScaleTargetInitialized()
r.MarkWithNoScaleFailures()
apistest.CheckConditionSucceeded(r, PodAutoscalerConditionScaleTargetInitialized, t)
apistest.CheckConditionSucceeded(r, PodAutoscalerConditionSKSReady, t)
apistest.CheckConditionSucceeded(r, PodAutoscalerConditionActive, t)
Expand All @@ -1065,6 +1066,7 @@ func TestTypicalFlow(t *testing.T) {
// Check idempotency.
r.MarkActive()
r.MarkScaleTargetInitialized()
r.MarkWithNoScaleFailures()
r.MarkSKSReady()
apistest.CheckConditionSucceeded(r, PodAutoscalerConditionScaleTargetInitialized, t)
apistest.CheckConditionSucceeded(r, PodAutoscalerConditionActive, t)
Expand Down
5 changes: 5 additions & 0 deletions pkg/apis/autoscaling/v1alpha1/pa_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@ const (
PodAutoscalerConditionActive apis.ConditionType = "Active"
// PodAutoscalerConditionSKSReady is set when SKS is ready.
PodAutoscalerConditionSKSReady = "SKSReady"
// PodAutoscalerConditionScaleTargetScaled is set when scaling the revision is successful.
// There are cases where there are pod failures during scaling eg. from zero but
// K8s does not properly propagate the failure to the deployment. In those cases
// progressdeadline is not enough.
PodAutoscalerConditionScaleTargetScaled apis.ConditionType = "ScaleTargetScaled"
)

// PodAutoscalerStatus communicates the observed state of the PodAutoscaler (from the controller).
Expand Down
7 changes: 7 additions & 0 deletions pkg/apis/serving/v1/revision_lifecycle.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,13 @@ func (rs *RevisionStatus) PropagateAutoscalerStatus(ps *autoscalingv1alpha1.PodA
// that implies that |service.endpoints| > 0.
rs.MarkResourcesAvailableTrue()
rs.MarkContainerHealthyTrue()

// Mark resource unavailable if we are scaling back to zero, but we never achieved the required scale
// and deployment status was not updated properly by K8s. For example due to an image pull error.
if ps.ScaleTargetNotScaled() {
condScaled := ps.GetCondition(autoscalingv1alpha1.PodAutoscalerConditionScaleTargetScaled)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could set ContainerHealthyFalse here too but we need #15503

rs.MarkResourcesAvailableFalse(condScaled.Reason, condScaled.Message)
}
}

// Mark resource unavailable if we don't have a Service Name and the deployment is ready
Expand Down
9 changes: 7 additions & 2 deletions pkg/reconciler/autoscaling/kpa/kpa.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,10 +114,15 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, pa *autoscalingv1alpha1.
if err := c.ReconcileMetric(ctx, pa, resolveScrapeTarget(ctx, pa)); err != nil {
return fmt.Errorf("error reconciling Metric: %w", err)
}
podCounter := resourceutil.NewPodAccessor(c.podsLister, pa.Namespace, pa.Labels[serving.RevisionLabelKey])

if err != nil {
return fmt.Errorf("error getting a pod for the revision: %w", err)
}

// Get the appropriate current scale from the metric, and right size
// the scaleTargetRef based on it.
want, err := c.scaler.scale(ctx, pa, sks, decider.Status.DesiredScale)
want, err := c.scaler.scale(ctx, pa, sks, decider.Status.DesiredScale, &podCounter)
if err != nil {
return fmt.Errorf("error scaling target: %w", err)
}
Expand Down Expand Up @@ -145,7 +150,6 @@ func (c *Reconciler) ReconcileKind(ctx context.Context, pa *autoscalingv1alpha1.
}

// Compare the desired and observed resources to determine our situation.
podCounter := resourceutil.NewPodAccessor(c.podsLister, pa.Namespace, pa.Labels[serving.RevisionLabelKey])
ready, notReady, pending, terminating, err := podCounter.PodCountsByState()
if err != nil {
return fmt.Errorf("error getting pod counts: %w", err)
Expand Down Expand Up @@ -270,6 +274,7 @@ func computeActiveCondition(ctx context.Context, pa *autoscalingv1alpha1.PodAuto
minReady := activeThreshold(ctx, pa)
if pc.ready >= minReady && pa.Status.ServiceName != "" {
pa.Status.MarkScaleTargetInitialized()
pa.Status.MarkWithNoScaleFailures()
}

switch {
Expand Down
46 changes: 23 additions & 23 deletions pkg/reconciler/autoscaling/kpa/kpa_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ func TestReconcile(t *testing.T) {
}
activeKPAMinScale := func(g, w int32) *autoscalingv1alpha1.PodAutoscaler {
return kpa(
testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized,
testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
withScales(g, w), WithReachabilityReachable,
withMinScale(defaultScale), WithPAStatusService(testRevision), WithPAMetricsService(privateSvc),
WithObservedGeneration(1),
Expand Down Expand Up @@ -332,7 +332,7 @@ func TestReconcile(t *testing.T) {
Name: "steady state",
Key: key,
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic,
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, WithScaleTargetNoScaleFailures,
markScaleTargetInitialized, WithPAMetricsService(privateSvc),
withScales(1, defaultScale), WithPAStatusService(testRevision), WithObservedGeneration(1)),
defaultSKS,
Expand All @@ -358,12 +358,12 @@ func TestReconcile(t *testing.T) {
},
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, WithTraffic,
markScaleTargetInitialized, WithPASKSReady,
markScaleTargetInitialized, WithPASKSReady, WithScaleTargetNoScaleFailures,
WithPAMetricsService(privateSvc), withScales(1, defaultScale),
WithPAStatusService(testRevision), WithObservedGeneration(1)),
}, {
Object: kpa(testNamespace, testRevision, WithTraffic,
markScaleTargetInitialized, WithPASKSReady,
markScaleTargetInitialized, WithPASKSReady, WithScaleTargetNoScaleFailures,
WithPAMetricsService(privateSvc), withScales(1, defaultScale),
WithPAStatusService(testRevision), WithObservedGeneration(1)),
}},
Expand Down Expand Up @@ -410,11 +410,11 @@ func TestReconcile(t *testing.T) {
Name: "create metric",
Key: key,
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithTraffic, markScaleTargetInitialized,
kpa(testNamespace, testRevision, WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
withScales(1, defaultScale), WithPAStatusService(testRevision)),
defaultSKS, defaultDeployment, defaultReady},
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, WithTraffic, markScaleTargetInitialized,
Object: kpa(testNamespace, testRevision, WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
withScales(1, defaultScale), WithPASKSReady, WithPAStatusService(testRevision),
WithPAMetricsService(privateSvc), WithObservedGeneration(1)),
}},
Expand All @@ -425,7 +425,7 @@ func TestReconcile(t *testing.T) {
Name: "scale up deployment",
Key: key,
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized,
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
WithPAMetricsService(privateSvc), withScales(1, defaultScale), WithPAStatusService(testRevision),
WithObservedGeneration(1)),
defaultSKS,
Expand Down Expand Up @@ -495,7 +495,7 @@ func TestReconcile(t *testing.T) {
preciseReady...),
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, withScales(defaultScale, defaultScale),
WithPASKSNotReady(""), WithTraffic, markScaleTargetInitialized,
WithPASKSNotReady(""), WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
WithPAMetricsService(privateSvc), WithPAStatusService(testRevision), WithObservedGeneration(1)),
}},
}, {
Expand Down Expand Up @@ -555,7 +555,7 @@ func TestReconcile(t *testing.T) {
defaultDeployment, defaultReady},
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, WithPASKSReady,
WithTraffic, markScaleTargetInitialized, withMinScale(2), WithPAMetricsService(privateSvc),
WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures, withMinScale(2), WithPAMetricsService(privateSvc),
withScales(1, defaultScale), WithPAStatusService(testRevision), WithReachabilityUnreachable,
WithObservedGeneration(1)),
}},
Expand All @@ -571,7 +571,7 @@ func TestReconcile(t *testing.T) {
}, makeReadyPods(2, testNamespace, testRevision)...),
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, WithPASKSReady,
WithTraffic, markScaleTargetInitialized, withMinScale(2), WithPAMetricsService(privateSvc),
WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures, withMinScale(2), WithPAMetricsService(privateSvc),
withScales(2, defaultScale), WithPAStatusService(testRevision), WithReachabilityReachable,
WithObservedGeneration(1)),
}},
Expand All @@ -587,7 +587,7 @@ func TestReconcile(t *testing.T) {
}, makeReadyPods(2, testNamespace, testRevision)...),
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, WithPASKSReady,
WithTraffic, markScaleTargetInitialized, withMinScale(2), WithPAMetricsService(privateSvc),
WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures, withMinScale(2), WithPAMetricsService(privateSvc),
withScales(2, defaultScale), WithPAStatusService(testRevision), WithReachabilityUnknown,
WithObservedGeneration(1)),
}},
Expand Down Expand Up @@ -749,7 +749,7 @@ func TestReconcile(t *testing.T) {
metric(testNamespace, testRevision),
deploy(testNamespace, testRevision), defaultReady},
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, markScaleTargetInitialized, withScales(1, 0),
Object: kpa(testNamespace, testRevision, markScaleTargetInitialized, WithScaleTargetNoScaleFailures, withScales(1, 0),
WithPASKSReady, WithPAMetricsService(privateSvc),
WithNoTraffic(noTrafficReason, "The target is not receiving traffic."),
WithPAStatusService(testRevision), WithPAMetricsService(privateSvc),
Expand All @@ -765,7 +765,7 @@ func TestReconcile(t *testing.T) {
decider(testNamespace, testRevision, 0 /* desiredScale */, 0 /* ebc */)),
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic,
markScaleTargetInitialized, withScales(1, 1),
markScaleTargetInitialized, WithScaleTargetNoScaleFailures, withScales(1, 1),
WithPAStatusService(testRevision), WithPAMetricsService(privateSvc), WithObservedGeneration(1)),
defaultSKS,
metric(testNamespace, testRevision),
Expand All @@ -783,7 +783,7 @@ func TestReconcile(t *testing.T) {
metric(testNamespace, testRevision),
deploy(testNamespace, testRevision), defaultReady},
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, markScaleTargetInitialized, WithPASKSReady, WithPAMetricsService(privateSvc),
Object: kpa(testNamespace, testRevision, markScaleTargetInitialized, WithScaleTargetNoScaleFailures, WithPASKSReady, WithPAMetricsService(privateSvc),
WithNoTraffic("TimedOut", "The target could not be activated."), withScales(1, 0),
WithPAStatusService(testRevision), WithPAMetricsService(privateSvc),
WithObservedGeneration(1)),
Expand Down Expand Up @@ -859,7 +859,7 @@ func TestReconcile(t *testing.T) {
minScalePatch,
},
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: activatingKPAMinScale(underscale, markScaleTargetInitialized, WithPASKSReady),
Object: activatingKPAMinScale(underscale, markScaleTargetInitialized, WithScaleTargetNoScaleFailures, WithPASKSReady),
}},
}, {
// Scale to `minScale` and mark PA "active"
Expand Down Expand Up @@ -954,7 +954,7 @@ func TestReconcile(t *testing.T) {
-42 /* ebc */)),
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic,
markScaleTargetInitialized, WithPAMetricsService(privateSvc),
markScaleTargetInitialized, WithScaleTargetNoScaleFailures, WithPAMetricsService(privateSvc),
withScales(1, defaultScale), WithPAStatusService(testRevision), WithObservedGeneration(1)),
defaultProxySKS,
metric(testNamespace, testRevision),
Expand All @@ -966,7 +966,7 @@ func TestReconcile(t *testing.T) {
decider(testNamespace, testRevision, defaultScale, /* desiredScale */
-18 /* ebc */)),
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized,
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
WithPAMetricsService(privateSvc), withScales(1, defaultScale),
WithPAStatusService(testRevision), WithObservedGeneration(1)),
sks(testNamespace, testRevision, WithDeployRef(deployName), WithSKSReady,
Expand All @@ -984,7 +984,7 @@ func TestReconcile(t *testing.T) {
decider(testNamespace, testRevision, defaultScale, /* desiredScale */
1 /* ebc */)),
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized,
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
WithPAMetricsService(privateSvc), withScales(1, defaultScale),
WithPAStatusService(testRevision), WithObservedGeneration(1)),
defaultProxySKS,
Expand Down Expand Up @@ -1038,7 +1038,7 @@ func TestReconcile(t *testing.T) {
}, makeReadyPods(20, testNamespace, testRevision)...),
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic,
markScaleTargetInitialized, withScales(20, 20), withInitialScale(20), WithReachabilityReachable,
markScaleTargetInitialized, WithScaleTargetNoScaleFailures, withScales(20, 20), withInitialScale(20), WithReachabilityReachable,
WithPAStatusService(testRevision), WithPAMetricsService(privateSvc), WithObservedGeneration(1),
),
}},
Expand All @@ -1064,7 +1064,7 @@ func TestReconcile(t *testing.T) {
}),
},
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, markScaleTargetInitialized,
Object: kpa(testNamespace, testRevision, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
WithNoTraffic(noTrafficReason, "The target is not receiving traffic."),
withScales(0, -1), WithReachabilityReachable,
WithPAMetricsService(privateSvc), WithObservedGeneration(1),
Expand Down Expand Up @@ -1134,7 +1134,7 @@ func TestReconcile(t *testing.T) {
}),
}, makeReadyPods(2, testNamespace, testRevision)...),
WantStatusUpdates: []clientgotesting.UpdateActionImpl{{
Object: kpa(testNamespace, testRevision, WithTraffic, WithPASKSReady, markScaleTargetInitialized,
Object: kpa(testNamespace, testRevision, WithTraffic, WithPASKSReady, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
withScales(2, 2), WithReachabilityReachable, WithPAStatusService(testRevision),
WithPAMetricsService(privateSvc), WithObservedGeneration(1),
),
Expand All @@ -1146,7 +1146,7 @@ func TestReconcile(t *testing.T) {
decider(testNamespace, testRevision, defaultScale, /* desiredScale */
1 /* ebc */)),
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized,
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
WithPAMetricsService(privateSvc), withScales(1, defaultScale),
WithPAStatusService(testRevision), WithObservedGeneration(1)),
defaultProxySKS,
Expand All @@ -1161,7 +1161,7 @@ func TestReconcile(t *testing.T) {
decider(testNamespace, testRevision, defaultScale, /* desiredScale */
1 /* ebc */)),
Objects: []runtime.Object{
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized,
kpa(testNamespace, testRevision, WithPASKSReady, WithTraffic, markScaleTargetInitialized, WithScaleTargetNoScaleFailures,
WithPAMetricsService(privateSvc), withScales(1, defaultScale),
WithPAStatusService(testRevision), WithObservedGeneration(1)),
defaultSKS,
Expand Down
Loading
Loading