diff --git a/README.md b/README.md index f1af5cc..5adb0bd 100644 --- a/README.md +++ b/README.md @@ -86,10 +86,10 @@ jaeger-agent-4k845 jaeger-agent 100Mi 100Mi 2022-11-11 21:06:3 ### Development -If you wish to force some `OOMKilled` for testing purposes, you can use [`oomer`](https://github.com/jdockerty/oomer) +If you wish to force some `OOMKilled` pods for testing purposes, you can use [`oomer`](https://github.com/jdockerty/oomer) or simply run - kubectl apply -f https://github.com/jdockerty/oomer/blob/main/oomer.yaml + kubectl apply -f https://raw.githubusercontent.com/jdockerty/oomer/main/oomer.yaml This will create the `oomkilled` namespace and a `Deployment` with pods that continually exit with code `137`, in order to be picked up by `oomd`. diff --git a/cmd/plugin/cli/root.go b/cmd/plugin/cli/root.go index 49d3209..508e11a 100644 --- a/cmd/plugin/cli/root.go +++ b/cmd/plugin/cli/root.go @@ -34,7 +34,6 @@ var ( // Only 'time' is supported currently. sortField string - // Formatting for table output, similar to other kubectl commands. t = tabwriter.NewWriter(os.Stdout, 10, 1, 5, ' ', 0) ) diff --git a/pkg/plugin/plugin.go b/pkg/plugin/plugin.go index 3c147b4..e9168a7 100644 --- a/pkg/plugin/plugin.go +++ b/pkg/plugin/plugin.go @@ -59,6 +59,20 @@ func getK8sClientAndConfig(configFlags *genericclioptions.ConfigFlags) (*kuberne return clientset, config, nil } +// getPodSpecIndex is a helper function to return the index of a container +// within the containers list of the pod specification. This is used as the +// index, as the index which appears within the containerStatus field is not +// guaranteed to be the same. +func getPodSpecIndex(name string, pod v1.Pod) (int, error) { + + for i, c := range pod.Spec.Containers { + if name == c.Name { + return i, nil + } + } + return -1, fmt.Errorf("unable to retrieve pod spec index for %s", name) +} + // GetNamespace will retrieve the current namespace from the provided namespace or kubeconfig file of the caller // or handle the return of the all namespaces shortcut when the flag is set. func GetNamespace(configFlags *genericclioptions.ConfigFlags, all bool, givenNamespace string) (string, error) { @@ -105,13 +119,12 @@ func BuildTerminatedPodsInfo(client *kubernetes.Clientset, namespace string) (Te return nil, fmt.Errorf("failed to list pods: %w", err) } - terminatedPods := TerminatedPodsFilter(pods.Items) - var terminatedPodsInfo []TerminatedPodInfo - for _, pod := range terminatedPods { + terminatedPods := TerminatedPodsFilter(pods.Items) - for i, containerStatus := range pod.Status.ContainerStatuses { + for _, pod := range terminatedPods { + for _, containerStatus := range pod.Status.ContainerStatuses { // Not every container within the pod will be in a terminated state, we skip these ones. // This also means we can use the relevant index to directly access the container, @@ -120,8 +133,13 @@ func BuildTerminatedPodsInfo(client *kubernetes.Clientset, namespace string) (Te continue } - containerStartTime := pod.Status.ContainerStatuses[i].LastTerminationState.Terminated.StartedAt.String() - containerTerminatedTime := pod.Status.ContainerStatuses[i].LastTerminationState.Terminated.FinishedAt + containerStartTime := containerStatus.LastTerminationState.Terminated.StartedAt.String() + containerTerminatedTime := containerStatus.LastTerminationState.Terminated.FinishedAt + + podSpecIndex, err := getPodSpecIndex(containerStatus.Name, pod) + if err != nil { + return nil, err + } // Build our terminated pod info struct info := TerminatedPodInfo{ @@ -131,11 +149,10 @@ func BuildTerminatedPodsInfo(client *kubernetes.Clientset, namespace string) (Te terminatedTime: containerTerminatedTime.Time, TerminatedTime: containerTerminatedTime.String(), Memory: MemoryInfo{ - Limit: pod.Spec.Containers[i].Resources.Limits.Memory().String(), - Request: pod.Spec.Containers[i].Resources.Requests.Memory().String(), + Limit: pod.Spec.Containers[podSpecIndex].Resources.Limits.Memory().String(), + Request: pod.Spec.Containers[podSpecIndex].Resources.Requests.Memory().String(), }, } - // TODO: Since we know all pods here have been in the "terminated state", can we // achieve this same result in an elegant way? terminatedPodsInfo = append(terminatedPodsInfo, info) diff --git a/pkg/plugin/plugin_test.go b/pkg/plugin/plugin_test.go index 2590b09..3187aa9 100644 --- a/pkg/plugin/plugin_test.go +++ b/pkg/plugin/plugin_test.go @@ -1,15 +1,23 @@ package plugin import ( + "bufio" + "fmt" + "io" + "net/http" "os" "testing" "time" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/suite" + appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + k8syaml "k8s.io/apimachinery/pkg/util/yaml" "k8s.io/cli-runtime/pkg/genericclioptions" "k8s.io/kubectl/pkg/cmd" + "k8s.io/kubectl/pkg/scheme" ) var KubernetesConfigFlags = genericclioptions.NewConfigFlags(false) @@ -19,23 +27,15 @@ const ( // The namespace to use with tests that do not require access to a working cluster. testNamespace = "test-namespace" - // The namespace defined for use with the `oomer` manifest. - integrationTestNamespace = "oomkilled" - // A manifest which contains the `oomkilled` namespace and `oomer` deployment for testing purposes. forceOOMKilledManifest = "https://raw.githubusercontent.com/jdockerty/oomer/main/oomer.yaml" ) -func setupIntegrationTestDependencies(t *testing.T) { +type RequiresClusterTests struct { + suite.Suite - err := applyOrDeleteOOMKilledManifest(false) - if err != nil { - t.Fatalf("unable to apply OOMKilled manifest: %s", err) - } - defer applyOrDeleteOOMKilledManifest(true) - - t.Log("Waiting 20 seconds for pods to start being OOMKilled...") - time.Sleep(20 * time.Second) + // The namespace defined for use with the `oomer` manifest. + IntegrationTestNamespace string } // applyOrDeleteOOMKilledManifest is a rather hacky way to utilise `kubectl` within @@ -56,21 +56,110 @@ func applyOrDeleteOOMKilledManifest(runDelete bool) error { return nil } +// getMemoryRequestAndLimitFromDeploymentManifest is a helper function for retrieving the +// string representation of a container's memory limit and request, such as "128Mi", from +// its own manifest. +func getMemoryRequestAndLimitFromDeploymentManifest(r io.Reader, containerIndex int) (string, string, error) { + + multir := k8syaml.NewYAMLReader(bufio.NewReader(r)) + + for { + + buf, err := multir.Read() + if err != nil { + if err == io.EOF { + break + } + return "", "", err + } + + obj, gvk, err := scheme.Codecs.UniversalDeserializer().Decode(buf, nil, nil) + if err != nil { + return "", "", err + } + + switch gvk.Kind { + case "Deployment": + d := obj.(*appsv1.Deployment) + request := d.Spec.Template.Spec.Containers[containerIndex].Resources.Requests["memory"] + limit := d.Spec.Template.Spec.Containers[containerIndex].Resources.Limits["memory"] + return request.String(), limit.String(), nil + + } + } + + return "", "", fmt.Errorf("unable to get Kind=Deployment from manifest") +} + +func (rc *RequiresClusterTests) SetupTest() { + rc.IntegrationTestNamespace = "oomkilled" +} + +func (rc *RequiresClusterTests) SetupSuite() { + err := applyOrDeleteOOMKilledManifest(false) + if err != nil { + rc.T().Fatalf("unable to apply OOMKilled manifest: %s", err) + } + + rc.T().Log("Waiting 30 seconds for pods to start being OOMKilled...") + time.Sleep(30 * time.Second) +} + +// TearDownSuite runs the deletion process against the Namespace and Deployment +// created as part of the testing process. +func (rc *RequiresClusterTests) TearDownSuite() { + applyOrDeleteOOMKilledManifest(true) +} + // TestRunPlugin tests against an initialised cluster with OOMKilled pods that // the plugin's functionality works as expected. -func TestRunPlugin(t *testing.T) { +func (rc *RequiresClusterTests) TestRunPlugin() { + pods, err := Run(KubernetesConfigFlags, rc.IntegrationTestNamespace) + assert.Nil(rc.T(), err) - if testing.Short() { - t.Skipf("skipping %s which requires running cluster", t.Name()) - } + assert.Greater(rc.T(), len(pods), 0, "expected number of failed pods to be greater than 0, got %d", len(pods)) +} - setupIntegrationTestDependencies(t) +func (rc *RequiresClusterTests) TestCorrectResources() { + res, err := http.Get(forceOOMKilledManifest) + if err != nil { + // TODO Wrap these functions in a retry as they have reliance on external factors + // which could cause a failure, e.g. here an issue with GitHub would cause an + // error with getting the manifest. + rc.T().Skipf("Skipping %s as could not perform GET request for the manifest", rc.T().Name()) + } + defer res.Body.Close() + + // We can pass containerIndex 0 here as I control the manifest we are using, so it is + // okay to hardcode it, since I know it is the first index. + knownIndex := 0 + manifestReq, manifestLim, err := getMemoryRequestAndLimitFromDeploymentManifest(res.Body, knownIndex) + assert.Nil(rc.T(), err) // We don't skip this on failure, as if we got the manifest it should be a Deployment. + + pods, _ := Run(KubernetesConfigFlags, rc.IntegrationTestNamespace) + + fmt.Println(manifestReq, manifestLim) + podMemoryRequest := pods[knownIndex].Pod.Spec.Containers[knownIndex].Resources.Requests["memory"] + podMemoryLimit := pods[knownIndex].Pod.Spec.Containers[knownIndex].Resources.Limits["memory"] + + assert.Equal(rc.T(), podMemoryRequest.String(), manifestReq) + assert.Equal(rc.T(), podMemoryLimit.String(), manifestLim) + rc.T().Logf( + "\nMemory:\n\tManifest Request: %s\n\tManifest Limit: %s\n\tPod Request: %s\n\tPod Limit: %s\n", + manifestReq, + manifestLim, + &podMemoryRequest, + &podMemoryLimit, + ) - pods, err := Run(KubernetesConfigFlags, integrationTestNamespace) - assert.Nil(t, err) +} - assert.Greater(t, len(pods), 0, "expected number of failed pods to be greater than 0, got %d", len(pods)) +func TestRequiresClusterSuite(t *testing.T) { + if testing.Short() { + t.Skipf("skipping %s which requires running cluster", t.Name()) + } + suite.Run(t, new(RequiresClusterTests)) } func TestGetNamespace(t *testing.T) {