diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index ea292ef..9a1884f 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -1 +1,2 @@ - [Chris Hirsch](mailto:chris@base2technology.com) +- [Lee Smith](mailto:lee.smith@fundingcircle.com) diff --git a/README.md b/README.md index 9749b3b..1304eec 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ Once the contents of the `PVC` have been validated, the check `Job`, the init `J Container resource requests are set to `15 millicores` of CPU and `20Mi` units of memory and use the Alpine image `alpine:3.11` for the `Job` and a default of `1Gi` for the `PVC`. If the environment variable `CHECK_STORAGE_PVC_SIZE` is set then the value of that will be used instead of the default. -By default, the nodes of the cluster will be discovered and only those nodes that are `untainted`, in a `Ready` state and not in the role of `master` will be used. If node(s) need to be `ignored` for whatever reason, then the environment variable `CHECK_STORAGE_IGNORED_CHECK_NODES` should be used a space or comma separated list of nodes should be supplied. If `auto-discovery` is not desired, the environment variable `CHECK_STORAGE_ALLOWED_CHECK_NODES` can be used and a space or comma separated list of nodes that should be checked needs to be supplied. If `CHECK_STORAGE_ALLOWED_CHECK_NODES` is supplied and a node in that list matches a node in the ignored (`CHECK_STORAGE_IGNORED_CHECK_NODES`) list then that node will be ignored. +By default, the nodes of the cluster will be discovered and only those nodes that are `untainted` (or has taints that are all specified in `CHECK_TOLERATIONS`), in a `Ready` state and not in the role of `master` will be used. If node(s) need to be `ignored` for whatever reason, then the environment variable `CHECK_STORAGE_IGNORED_CHECK_NODES` should be used a space or comma separated list of nodes should be supplied. If `auto-discovery` is not desired, the environment variable `CHECK_STORAGE_ALLOWED_CHECK_NODES` can be used and a space or comma separated list of nodes that should be checked needs to be supplied. If `CHECK_STORAGE_ALLOWED_CHECK_NODES` is supplied and a node in that list matches a node in the ignored (`CHECK_STORAGE_IGNORED_CHECK_NODES`) list then that node will be ignored. By default, the storage check `Job` and initialize storage check `Job` will use Alpine's `alpine:3.11` image. If a different image is desired, use the environment variable `CHECK_STORAGE_IMAGE` or `CHECK_STORAGE_INIT_IMAGE` depending on which image should be changed. @@ -33,7 +33,7 @@ This check follows the list of actions in order during the run of the check: 1. Looks for old storage check job, storage init job, and PVC belonging to this check and cleans them up. 2. Creates a PVC in the namespace and waits for the PVC to be ready. 3. Creates a storage init configuration, applies it to the namespace, and waits for the storage init job to come up and initialize the PVC with known data. -4. Determine which nodes in the cluster are going to run the storage check by auto-discovery or a list supplied nodes via the `CHECK_STORAGE_IGNORED_CHECK_NODES` and `CHECK_STORAGE_ALLOWED_CHECK_NODES` environment variables. +4. Determine which nodes in the cluster are going to run the storage check by auto-discovery or a list supplied nodes via the `CHECK_STORAGE_IGNORED_CHECK_NODES` and `CHECK_STORAGE_ALLOWED_CHECK_NODES` environment variables. Nodes with taints will not be included unless the toleration is configured in `CHECK_TOLERATIONS`. 5. For each node that needs a check, creates a storage check configuration, applies it to the namespace, and waits for the storage check job to start and validate the contents of storage on each desired node. 6. Tear everything down once completed. @@ -55,6 +55,7 @@ This check follows the list of actions in order during the run of the check: - `CHECK_POD_CPU_LIMIT`: Check pod deployment CPU limit value. Calculated in decimal SI units `(75 = 75m cpu)`. - `CHECK_POD_MEM_REQUEST`: Check pod deployment memory request value. Calculated in binary SI units `(20 * 1024^2 = 20Mi memory)`. - `CHECK_POD_MEM_LIMIT`: Check pod deployment memory limit value. Calculated in binary SI units `(75 * 1024^2 = 75Mi memory)`. + - `CHECK_TOLERATIONS`: Check pod tolerations of node taints. In the format "key=value:effect,key=value:effect". By default no taints are tolerated. - `ADDITIONAL_ENV_VARS`: Comma separated list of `key=value` variables passed into the pod's containers. - `SHUTDOWN_GRACE_PERIOD`: Amount of time in seconds the shutdown will allow itself to clean up after an interrupt signal (default=`30s`). - `DEBUG`: Verbose debug logging. diff --git a/cmd/storage-check/input.go b/cmd/storage-check/input.go index 8f59f18..8d10094 100644 --- a/cmd/storage-check/input.go +++ b/cmd/storage-check/input.go @@ -19,6 +19,7 @@ import ( kh "github.com/Comcast/kuberhealthy/v2/pkg/checks/external/checkclient" log "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" ) // parseDebugSettings parses debug settings and fatals on errors. @@ -219,4 +220,45 @@ func parseInputValues() { shutdownGracePeriod = duration log.Infoln("Parsed SHUTDOWN_GRACE_PERIOD:", shutdownGracePeriod) } + + // Parse CHECK_TOLERATIONS in the format "key=value:effect,key=value:effect" + if len(tolerationsEnv) > 0 { + tolerationSpecs := strings.Split(tolerationsEnv, ",") + for _, spec := range tolerationSpecs { + parts := strings.Split(spec, ":") + if len(parts) != 2 { + log.Fatalf("Error: invalid toleration specification: %s", spec) + } + + keyValue := parts[0] + effect := parts[1] + + keyValueParts := strings.Split(strings.TrimSpace(keyValue), "=") + if len(keyValueParts) != 2 { + log.Fatalf("Error: invalid key-value specification: %s", keyValue) + } + + key := keyValueParts[0] + value := keyValueParts[1] + + var taintEffect corev1.TaintEffect + switch strings.TrimSpace(effect) { + case "NoSchedule": + taintEffect = corev1.TaintEffectNoSchedule + case "PreferNoSchedule": + taintEffect = corev1.TaintEffectPreferNoSchedule + default: + log.Fatalf("Error: unknown effect value: %s", effect) + } + + toleration := corev1.Toleration{ + Key: key, + Operator: "Equal", + Value: value, + Effect: taintEffect, + } + + tolerations = append(tolerations, toleration) + } + } } diff --git a/cmd/storage-check/main.go b/cmd/storage-check/main.go index 53e6dab..fc4d395 100644 --- a/cmd/storage-check/main.go +++ b/cmd/storage-check/main.go @@ -21,6 +21,7 @@ import ( kh "github.com/Comcast/kuberhealthy/v2/pkg/checks/external/checkclient" "github.com/Comcast/kuberhealthy/v2/pkg/kubeClient" log "github.com/sirupsen/logrus" + corev1 "k8s.io/api/core/v1" "k8s.io/client-go/kubernetes" ) @@ -110,6 +111,9 @@ var ( shutdownGracePeriodEnv = os.Getenv("SHUTDOWN_GRACE_PERIOD") shutdownGracePeriod time.Duration + tolerationsEnv = os.Getenv("CHECK_TOLERATIONS") + tolerations []corev1.Toleration + // Time object used for the check. now time.Time diff --git a/cmd/storage-check/run_check.go b/cmd/storage-check/run_check.go index f6dd65a..ac0653a 100644 --- a/cmd/storage-check/run_check.go +++ b/cmd/storage-check/run_check.go @@ -14,6 +14,7 @@ import ( "context" "fmt" "os" + "reflect" "strings" "time" @@ -28,7 +29,6 @@ type Node struct { schedulable bool override bool status v1.NodeStatus - effect v1.TaintEffect } // runStorageCheck sets up a storage PVC, a storage init and storage check and applies it to the cluster. @@ -169,19 +169,17 @@ func runStorageCheck() { node.name = n.Name node.status = n.Status - // TODO Need to work through more logic to see if this should be configurable if len(n.Spec.Taints) > 0 { - // By defalt, only schedule the storage checks on untained (nodes that are Ready and not masters) nodes - for _, t := range n.Spec.Taints { - log.Debugln("t.Effect=", t.Effect) - log.Debugln("t.Key=", t.Key) - log.Debugln("t.Value=", t.Value) - log.Infoln("Adding node ", n.Name, " which is tainted as ", t.Effect, " NOT be schduled for check") - node.effect = t.Effect - node.schedulable = false + // By default, only schedule the storage checks on untainted nodes + node.schedulable = toleratesAllTaints(tolerations, n.Spec.Taints) + + status := "be" + if !node.schedulable { + status = "NOT be" } + log.Printf("Adding node %s with taints %s to %s scheduled for check", n.Name, formatTaints(n.Spec.Taints), status) } else { - log.Infoln("Adding untainted node ", n.Name, " to be schduled for check") + log.Infoln("Adding untainted node ", n.Name, " to be scheduled for check") node.schedulable = true } checkNodes[node.name] = node @@ -370,3 +368,36 @@ func cleanUpOrphanedResources(ctx context.Context) chan error { return cleanUpChan } + +func toleratesAllTaints(tolerations []v1.Toleration, nodeTaints []v1.Taint) bool { + for _, nodeTaint := range nodeTaints { + tolerated := false + for _, toleration := range tolerations { + if reflect.DeepEqual(toleration, v1.Toleration{ + Key: nodeTaint.Key, + Value: nodeTaint.Value, + Operator: v1.TolerationOpEqual, + Effect: nodeTaint.Effect, + }) { + tolerated = true + break + } + } + if !tolerated { + return false + } + } + return true +} + +func formatTaints(taints []v1.Taint) string { + var taintStrings []string + + for _, taint := range taints { + // Format each taint as "key=value:effect" + taintString := fmt.Sprintf("%s=%s:%s", taint.Key, taint.Value, taint.Effect) + taintStrings = append(taintStrings, taintString) + } + + return strings.Join(taintStrings, ",") +} diff --git a/cmd/storage-check/storage.go b/cmd/storage-check/storage.go index 26a22a3..a83bbc9 100644 --- a/cmd/storage-check/storage.go +++ b/cmd/storage-check/storage.go @@ -141,6 +141,7 @@ func initializeStorageConfig(jobName string, pvcName string) *batchv1.Job { Name: "data", VolumeSource: corev1.VolumeSource{PersistentVolumeClaim: pvc}, }}, + Tolerations: tolerations, }, }, }