feat: add workload test case for external tests

jpculp · jpculp · commit c35d8a75533c · 2025-11-20T19:54:19.000-08:00
Signed-off-by: Patrick J.P. Culp &lt;jpculp@amazon.com&gt;
diff --git a/test/cases/workload/main_test.go b/test/cases/workload/main_test.go
@@ -0,0 +1,45 @@
+//go:build e2e
+
+package workload
+
+import (
+	"context"
+	"flag"
+	"log"
+	"os"
+	"os/signal"
+	"testing"
+
+	"sigs.k8s.io/e2e-framework/pkg/env"
+	"sigs.k8s.io/e2e-framework/pkg/envconf"
+)
+
+var (
+	testenv                 env.Environment
+	workloadTestCommand     *string
+	workloadTestImage       *string
+	workloadTestName        *string
+	workloadTestAccelerator *string
+)
+
+func TestMain(m *testing.M) {
+	workloadTestCommand = flag.String("workloadTestCommand", "", "command for workload test")
+	workloadTestImage = flag.String("workloadTestImage", "", "image for workload test")
+	workloadTestName = flag.String("workloadTestName", "workload-test", "name for workload test")
+	workloadTestAccelerator = flag.String("workloadTestAccelerator", "", "accelerator for workload test: neuron, nvidia")
+	cfg, err := envconf.NewFromFlags()
+	if err != nil {
+		log.Fatalf("failed to initialize test environment: %v", err)
+	}
+	testenv = env.NewWithConfig(cfg)
+	ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
+	defer cancel()
+	testenv = testenv.WithContext(ctx)
+
+	testenv.Setup(func(ctx context.Context, config *envconf.Config) (context.Context, error) {
+		log.Println("Starting workload test suite...")
+		return ctx, nil
+	})
+
+	os.Exit(testenv.Run(m))
+}
diff --git a/test/cases/workload/workload_test.go b/test/cases/workload/workload_test.go
@@ -0,0 +1,148 @@
+//go:build e2e
+
+package workload
+
+import (
+	"context"
+	"testing"
+	"time"
+
+	fwext "github.com/aws/aws-k8s-tester/internal/e2e"
+	batchv1 "k8s.io/api/batch/v1"
+	corev1 "k8s.io/api/core/v1"
+	"k8s.io/apimachinery/pkg/api/resource"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
+	"sigs.k8s.io/e2e-framework/klient/wait"
+	"sigs.k8s.io/e2e-framework/pkg/envconf"
+	"sigs.k8s.io/e2e-framework/pkg/features"
+)
+
+func createWorkloadJob(name, image, command, accelerator string) *batchv1.Job {
+	backoffLimit := int32(4)
+	job := &batchv1.Job{
+		ObjectMeta: metav1.ObjectMeta{
+			Name:      name,
+			Namespace: corev1.NamespaceDefault,
+			Labels:    map[string]string{"app": name},
+		},
+		Spec: batchv1.JobSpec{
+			BackoffLimit: &backoffLimit,
+			Template: corev1.PodTemplateSpec{
+				ObjectMeta: metav1.ObjectMeta{
+					Labels: map[string]string{"app": name},
+				},
+				Spec: corev1.PodSpec{
+					RestartPolicy: corev1.RestartPolicyNever,
+					Containers: []corev1.Container{
+						{
+							Name:            name,
+							Image:           image,
+							Command:         []string{"/bin/bash", "-c"},
+							Args:            []string{command},
+							ImagePullPolicy: corev1.PullAlways,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	switch accelerator {
+	case "neuron":
+		job.Spec.Template.Spec.Containers[0].Resources = corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"aws.amazon.com/neuron": resource.MustParse("1"),
+			},
+			Requests: corev1.ResourceList{
+				"aws.amazon.com/neuron": resource.MustParse("1"),
+			},
+		}
+	case "nvidia":
+		job.Spec.Template.Spec.Containers[0].Resources = corev1.ResourceRequirements{
+			Limits: corev1.ResourceList{
+				"nvidia.com/gpu": resource.MustParse("1"),
+			},
+			Requests: corev1.ResourceList{
+				"nvidia.com/gpu": resource.MustParse("1"),
+			},
+		}
+	}
+
+	return job
+}
+
+func TestWorkload(t *testing.T) {
+	if *workloadTestName == "" {
+		t.Fatal("workloadTestName must be set to run the test")
+	}
+	if *workloadTestCommand == "" {
+		t.Fatal("workloadTestCommand must be set to run the test")
+	}
+	if *workloadTestImage == "" {
+		t.Fatal("workloadTestImage must be set to run the test")
+	}
+	if *workloadTestAccelerator != "" && *workloadTestAccelerator != "neuron" && *workloadTestAccelerator != "nvidia" {
+		t.Fatalf("Invalid acceleration type '%s', must be 'neuron' or 'nvidia'", *workloadTestAccelerator)
+	}
+
+	feature := features.New(*workloadTestName).
+		WithLabel("suite", "workload")
+
+	switch *workloadTestAccelerator {
+	case "neuron":
+		feature = feature.WithLabel("hardware", "neuron")
+	case "nvidia":
+		feature = feature.WithLabel("hardware", "gpu")
+	}
+
+	workload := feature.Setup(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
+		job := createWorkloadJob(*workloadTestName, *workloadTestImage, *workloadTestCommand, *workloadTestAccelerator)
+		if *workloadTestAccelerator != "" {
+			t.Logf("Creating %s job with %s acceleration", *workloadTestName, *workloadTestAccelerator)
+		} else {
+			t.Logf("Creating %s job", *workloadTestName)
+		}
+		if err := cfg.Client().Resources().Create(ctx, job); err != nil {
+			t.Fatal(err)
+		}
+		t.Logf("%s job created successfully", *workloadTestName)
+		return ctx
+	}).
+		Assess("Job succeeds", func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
+			job := &batchv1.Job{
+				ObjectMeta: metav1.ObjectMeta{Name: *workloadTestName, Namespace: corev1.NamespaceDefault},
+			}
+			t.Logf("Waiting for %s job to complete", *workloadTestName)
+			err := wait.For(fwext.NewConditionExtension(cfg.Client().Resources()).JobSucceeded(job),
+				wait.WithContext(ctx),
+				wait.WithTimeout(time.Minute*20),
+			)
+			if err != nil {
+				t.Fatal(err)
+			}
+			return ctx
+		}).
+		Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context {
+			log, err := fwext.GetJobLogs(cfg.Client().RESTConfig(), &batchv1.Job{
+				ObjectMeta: metav1.ObjectMeta{Name: *workloadTestName, Namespace: corev1.NamespaceDefault},
+			})
+			if err != nil {
+				t.Error(err)
+			}
+			t.Logf("Test log for %s:", *workloadTestName)
+			t.Log(log)
+			job := &batchv1.Job{
+				ObjectMeta: metav1.ObjectMeta{Name: *workloadTestName, Namespace: corev1.NamespaceDefault},
+			}
+			if err := cfg.Client().Resources().Delete(ctx, job, func(do *metav1.DeleteOptions) {
+				policy := metav1.DeletePropagationBackground
+				do.PropagationPolicy = &policy
+			}); err != nil {
+				t.Error(err)
+			}
+			return ctx
+		}).
+		Feature()
+
+	testenv.Test(t, workload)
+}