Skip to content

Commit

Permalink
add tests to validate uptime of services during user pod deletion
Browse files Browse the repository at this point in the history
  • Loading branch information
izabelacg committed May 30, 2024
1 parent 0b363c0 commit 30bd7e4
Showing 1 changed file with 164 additions and 0 deletions.
164 changes: 164 additions & 0 deletions test/ha/resilience_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
//go:build e2e

Check failure on line 1 in test/ha/resilience_test.go

View workflow job for this annotation

GitHub Actions / style / Golang / Auto-format and Check

Please run goimports. diff --git a/test/ha/resilience_test.go b/test/ha/resilience_test.go index e12c714..06f3331 100644 --- a/test/ha/resilience_test.go +++ b/test/ha/resilience_test.go @@ -21,6 +21,10 @@ package ha import ( "context" + "strconv" + "sync" + "testing" + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" @@ -31,9 +35,6 @@ import ( "knative.dev/serving/test" "knative.dev/serving/test/e2e" v1test "knative.dev/serving/test/v1" - "strconv" - "sync" - "testing" ) const minimumNumberOfReplicas = 2
// +build e2e

/*
Copyright 2024 The Knative Authors
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package ha

import (
"context"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/watch"
"knative.dev/serving/pkg/apis/autoscaling"
"knative.dev/serving/pkg/apis/serving"
rtesting "knative.dev/serving/pkg/testing/v1"
"knative.dev/serving/test"
"knative.dev/serving/test/e2e"
v1test "knative.dev/serving/test/v1"
"strconv"
"sync"
"testing"
)

const minimumNumberOfReplicas = 2

func TestActivatorNotInRequestPath(t *testing.T) {
clients := e2e.Setup(t)
ctx := context.Background()

// Create first service that we will continually probe during disruption scenario.
names, resources := createPizzaPlanetService(t,
rtesting.WithConfigAnnotations(map[string]string{
autoscaling.MinScaleAnnotationKey: strconv.Itoa(minimumNumberOfReplicas), // Make sure we don't scale to zero during the test.
autoscaling.TargetBurstCapacityKey: "0", // The Activator is only added to the request path during scale from zero scenarios.
}),
)
test.EnsureTearDown(t, clients, &names)

testUptimeDuringUserPodDeletion(t, ctx, clients, names, resources)
}

func TestActivatorInRequestPathAlways(t *testing.T) {
clients := e2e.Setup(t)
ctx := context.Background()

// Create first service that we will continually probe during disruption scenario.
names, resources := createPizzaPlanetService(t,
rtesting.WithConfigAnnotations(map[string]string{
autoscaling.MinScaleAnnotationKey: strconv.Itoa(minimumNumberOfReplicas), // Make sure we don't scale to zero during the test.
autoscaling.TargetBurstCapacityKey: "-1", // Make sure all requests go through the activator.
}),
)
test.EnsureTearDown(t, clients, &names)

testUptimeDuringUserPodDeletion(t, ctx, clients, names, resources)
}

func TestActivatorInRequestPathPossibly(t *testing.T) {
clients := e2e.Setup(t)
ctx := context.Background()

// Create first service that we will continually probe during disruption scenario.
names, resources := createPizzaPlanetService(t,
rtesting.WithConfigAnnotations(map[string]string{
autoscaling.MinScaleAnnotationKey: strconv.Itoa(minimumNumberOfReplicas), // Make sure we don't scale to zero during the test.
autoscaling.TargetBurstCapacityKey: "1", // The Activator may be in the path, depending on the revision scale and load.
}),
)
test.EnsureTearDown(t, clients, &names)

testUptimeDuringUserPodDeletion(t, ctx, clients, names, resources)
}

func testUptimeDuringUserPodDeletion(t *testing.T, ctx context.Context, clients *test.Clients, names test.ResourceNames, resources *v1test.ResourceObjects) {
t.Log("Starting prober")
prober := test.NewProberManager(t.Logf, clients, minProbes, test.AddRootCAtoTransport(context.Background(), t.Logf, clients, test.ServingFlags.HTTPS))

prober.Spawn(resources.Service.Status.URL.URL())
defer assertSLO(t, prober, 1)

// Get user pods

selector := labels.SelectorFromSet(labels.Set{
serving.ServiceLabelKey: names.Service,
})
pods, err := clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace).List(ctx, metav1.ListOptions{LabelSelector: selector.String()})
if err != nil {
t.Fatalf("Unable to get pods: %v", err)
}

if !(len(pods.Items) == minimumNumberOfReplicas) {
t.Fatalf("Expected to have %d user pod(s) running, but found %d.", minimumNumberOfReplicas, len(pods.Items))
}

t.Logf("Watching user pods")
var wg sync.WaitGroup
wg.Add(2)
go watchPodEvents(t, ctx, clients, &wg, selector, pods.Items[0].Name)

Check failure on line 113 in test/ha/resilience_test.go

View workflow job for this annotation

GitHub Actions / style / Golang / Lint

testinggoroutine: call to (*T).Fatalf from a non-test goroutine (govet)
go watchPodEvents(t, ctx, clients, &wg, selector, pods.Items[1].Name)

Check failure on line 114 in test/ha/resilience_test.go

View workflow job for this annotation

GitHub Actions / style / Golang / Lint

testinggoroutine: call to (*T).Fatalf from a non-test goroutine (govet)
wg.Wait()

// Delete first user pod
err = clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace).Delete(ctx, pods.Items[0].Name, metav1.DeleteOptions{})
if err != nil {
t.Fatalf("Unable to delete pod: %v", err)
}

// Delete second user pod
err = clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace).Delete(ctx, pods.Items[1].Name, metav1.DeleteOptions{})
if err != nil {
t.Fatalf("Unable to delete pod: %v", err)
}

newPods, err := clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace).List(ctx, metav1.ListOptions{LabelSelector: selector.String()})
if err != nil {
t.Fatalf("Unable to get pods: %v", err)
}

if !(len(newPods.Items) == minimumNumberOfReplicas) {
t.Errorf("Expected to have %d user pod(s) running, but found %d.", minimumNumberOfReplicas, len(pods.Items))
}
}

func watchPodEvents(t *testing.T, ctx context.Context, clients *test.Clients, wg *sync.WaitGroup, selector labels.Selector, targetPod string) {
defer wg.Done()

watcher, err := clients.KubeClient.CoreV1().Pods(test.ServingFlags.TestNamespace).Watch(ctx, metav1.ListOptions{
LabelSelector: selector.String(),
Watch: true,
})
if err != nil {
t.Fatalf("Unable to watch pods: %v", err)

Check failure on line 147 in test/ha/resilience_test.go

View workflow job for this annotation

GitHub Actions / style / Golang / Lint

SA2002(related information): call to T.Fatalf (staticcheck)
}

podEventsChan := watcher.ResultChan()
defer watcher.Stop()

for event := range podEventsChan {
pod, ok := event.Object.(*v1.Pod)
t.Logf("Pod %s received event: %s", pod.Name, event.Type)
if !ok {
continue
}
if event.Type == watch.Deleted && pod.Name == targetPod {
t.Logf("Pod %s deleted from node %s", pod.Name, pod.Spec.NodeName)
break
}
}
}

0 comments on commit 30bd7e4

Please sign in to comment.