diff --git a/config/core/configmaps/deployment.yaml b/config/core/configmaps/deployment.yaml index a0dd039b611c..b8c70ab39da8 100644 --- a/config/core/configmaps/deployment.yaml +++ b/config/core/configmaps/deployment.yaml @@ -22,7 +22,7 @@ metadata: app.kubernetes.io/component: controller app.kubernetes.io/version: devel annotations: - knative.dev/example-checksum: "ed77183a" + knative.dev/example-checksum: "e2f637c6" data: # This is the Go import path for the binary that is containerized # and substituted here. @@ -91,3 +91,20 @@ data: # Sets rootCA for the queue proxy - used by QPOptions # If omitted, or empty, no rootCA is added to the golang rootCAs queue-sidecar-rootca: "" + + # If set, it automatically configures pod anti-affinity requirements for all Knative services. + # It employs the `preferredDuringSchedulingIgnoredDuringExecution` weighted pod affinity term, + # aligning with the Knative revision label. It yields the configuration below in all workloads' deployments: + # ` + # affinity: + # podAntiAffinity: + # preferredDuringSchedulingIgnoredDuringExecution: + # - podAffinityTerm: + # topologyKey: kubernetes.io/hostname + # labelSelector: + # matchLabels: + # serving.knative.dev/revision: {{revision-name}} + # weight: 100 + # ` + # This may be "none" or "prefer-spread-revision-over-nodes" (default) + # default-affinity-type: "prefer-spread-revision-over-nodes" diff --git a/pkg/deployment/config.go b/pkg/deployment/config.go index d35cc814111b..4bb78d33056d 100644 --- a/pkg/deployment/config.go +++ b/pkg/deployment/config.go @@ -24,7 +24,6 @@ import ( corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/util/sets" - cm "knative.dev/pkg/configmap" ) @@ -68,6 +67,9 @@ const ( // qpoptions queueSidecarTokenAudiencesKey = "queue-sidecar-token-audiences" queueSidecarRooCAKey = "queue-sidecar-rootca" + + defaultAffinityTypeKey = "default-affinity-type" + defaultAffinityTypeValue = PreferSpreadRevisionOverNodes ) var ( @@ -103,6 +105,7 @@ func defaultConfig() *Config { DigestResolutionTimeout: digestResolutionTimeoutDefault, RegistriesSkippingTagResolving: sets.New("kind.local", "ko.local", "dev.local"), QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, + DefaultAffinityType: defaultAffinityTypeValue, } // The following code is needed for ConfigMap testing. // defaultConfig must match the example in deployment.yaml which includes: `queue-sidecar-token-audiences: ""` @@ -164,6 +167,14 @@ func NewConfigFromMap(configMap map[string]string) (*Config, error) { return nil, fmt.Errorf("digest-resolution-timeout cannot be a non-positive duration, was %v", nc.DigestResolutionTimeout) } + if affinity, ok := configMap[defaultAffinityTypeKey]; ok { + switch opt := AffinityType(affinity); opt { + case None, PreferSpreadRevisionOverNodes: + nc.DefaultAffinityType = opt + default: + return nil, fmt.Errorf("unsupported %s value: %q", defaultAffinityTypeKey, affinity) + } + } return nc, nil } @@ -172,6 +183,17 @@ func NewConfigFromConfigMap(config *corev1.ConfigMap) (*Config, error) { return NewConfigFromMap(config.Data) } +// AffinityType specifies which affinity requirements will be automatically applied to the PodSpec of all Knative services. +type AffinityType string + +const ( + // None is used for deactivating affinity configuration for user workloads. + None AffinityType = "none" + + // PreferSpreadRevisionOverNodes is used to set pod anti-affinity requirements for user workloads. + PreferSpreadRevisionOverNodes AffinityType = "prefer-spread-revision-over-nodes" +) + // Config includes the configurations for the controller. type Config struct { // QueueSidecarImage is the name of the image used for the queue sidecar @@ -214,4 +236,8 @@ type Config struct { // QueueSidecarRootCA is a root certificate to be trusted by the queue proxy sidecar qpoptions. QueueSidecarRootCA string + + // DefaultAffinityType is a string that controls what affinity rules will be automatically + // applied to the PodSpec of all Knative services. + DefaultAffinityType AffinityType } diff --git a/pkg/deployment/config_test.go b/pkg/deployment/config_test.go index 014042545a3c..b4965126d5fd 100644 --- a/pkg/deployment/config_test.go +++ b/pkg/deployment/config_test.go @@ -81,6 +81,64 @@ func TestControllerConfiguration(t *testing.T) { wantConfig *Config data map[string]string }{{ + name: "controller configuration with no default affinity type specified", + wantConfig: &Config{ + RegistriesSkippingTagResolving: sets.New("kind.local", "ko.local", "dev.local"), + DigestResolutionTimeout: digestResolutionTimeoutDefault, + QueueSidecarImage: defaultSidecarImage, + QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, + QueueSidecarTokenAudiences: sets.New(""), + ProgressDeadline: ProgressDeadlineDefault, + DefaultAffinityType: defaultAffinityTypeValue, + }, + data: map[string]string{ + QueueSidecarImageKey: defaultSidecarImage, + }, + }, { + name: "controller configuration with empty string set for the default affinity type", + wantErr: true, + data: map[string]string{ + QueueSidecarImageKey: defaultSidecarImage, + defaultAffinityTypeKey: "", + }, + }, { + name: "controller configuration with unsupported value for default affinity type", + wantErr: true, + data: map[string]string{ + QueueSidecarImageKey: defaultSidecarImage, + defaultAffinityTypeKey: "coconut", + }, + }, { + name: "controller configuration with the default affinity type set", + wantConfig: &Config{ + RegistriesSkippingTagResolving: sets.New("kind.local", "ko.local", "dev.local"), + DigestResolutionTimeout: digestResolutionTimeoutDefault, + QueueSidecarImage: defaultSidecarImage, + QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, + QueueSidecarTokenAudiences: sets.New(""), + ProgressDeadline: ProgressDeadlineDefault, + DefaultAffinityType: defaultAffinityTypeValue, + }, + data: map[string]string{ + QueueSidecarImageKey: defaultSidecarImage, + defaultAffinityTypeKey: string(PreferSpreadRevisionOverNodes), + }, + }, { + name: "controller configuration with default affinity type deactivated", + wantConfig: &Config{ + RegistriesSkippingTagResolving: sets.New("kind.local", "ko.local", "dev.local"), + DigestResolutionTimeout: digestResolutionTimeoutDefault, + QueueSidecarImage: defaultSidecarImage, + QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, + QueueSidecarTokenAudiences: sets.New(""), + ProgressDeadline: ProgressDeadlineDefault, + DefaultAffinityType: None, + }, + data: map[string]string{ + QueueSidecarImageKey: defaultSidecarImage, + defaultAffinityTypeKey: string(None), + }, + }, { name: "controller configuration with bad registries", wantConfig: &Config{ RegistriesSkippingTagResolving: sets.New("ko.local", ""), @@ -89,6 +147,7 @@ func TestControllerConfiguration(t *testing.T) { QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, QueueSidecarTokenAudiences: sets.New("foo", "bar", "boo-srv"), ProgressDeadline: ProgressDeadlineDefault, + DefaultAffinityType: defaultAffinityTypeValue, }, data: map[string]string{ QueueSidecarImageKey: defaultSidecarImage, @@ -104,6 +163,7 @@ func TestControllerConfiguration(t *testing.T) { QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, QueueSidecarTokenAudiences: sets.New(""), ProgressDeadline: 444 * time.Second, + DefaultAffinityType: defaultAffinityTypeValue, }, data: map[string]string{ QueueSidecarImageKey: defaultSidecarImage, @@ -118,6 +178,7 @@ func TestControllerConfiguration(t *testing.T) { QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, QueueSidecarTokenAudiences: sets.New(""), ProgressDeadline: ProgressDeadlineDefault, + DefaultAffinityType: defaultAffinityTypeValue, }, data: map[string]string{ QueueSidecarImageKey: defaultSidecarImage, @@ -132,6 +193,7 @@ func TestControllerConfiguration(t *testing.T) { QueueSidecarCPURequest: &QueueSidecarCPURequestDefault, QueueSidecarTokenAudiences: sets.New(""), ProgressDeadline: ProgressDeadlineDefault, + DefaultAffinityType: defaultAffinityTypeValue, }, data: map[string]string{ QueueSidecarImageKey: defaultSidecarImage, @@ -151,6 +213,7 @@ func TestControllerConfiguration(t *testing.T) { QueueSidecarMemoryLimit: quantity("654m"), QueueSidecarEphemeralStorageLimit: quantity("321M"), QueueSidecarTokenAudiences: sets.New(""), + DefaultAffinityType: defaultAffinityTypeValue, }, data: map[string]string{ QueueSidecarImageKey: defaultSidecarImage, @@ -227,6 +290,7 @@ func TestControllerConfiguration(t *testing.T) { QueueSidecarEphemeralStorageRequest: quantity("9M"), QueueSidecarEphemeralStorageLimit: quantity("10M"), QueueSidecarTokenAudiences: sets.New(""), + DefaultAffinityType: defaultAffinityTypeValue, }, }, { name: "newer key case takes priority", @@ -268,6 +332,7 @@ func TestControllerConfiguration(t *testing.T) { QueueSidecarEphemeralStorageRequest: quantity("20M"), QueueSidecarEphemeralStorageLimit: quantity("21M"), QueueSidecarTokenAudiences: sets.New("foo"), + DefaultAffinityType: defaultAffinityTypeValue, }, }} diff --git a/pkg/reconciler/revision/resources/deploy.go b/pkg/reconciler/revision/resources/deploy.go index 2df44c1bb5a4..d17cfe830b54 100644 --- a/pkg/reconciler/revision/resources/deploy.go +++ b/pkg/reconciler/revision/resources/deploy.go @@ -39,6 +39,7 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" apiconfig "knative.dev/serving/pkg/apis/config" + deploymentconfig "knative.dev/serving/pkg/deployment" ) const certVolumeName = "server-certs" @@ -150,6 +151,22 @@ func rewriteUserLivenessProbe(p *corev1.Probe, userPort int) { } } +func makePreferSpreadRevisionOverNodes(revisionLabelValue string) *corev1.PodAntiAffinity { + return &corev1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{{ + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + TopologyKey: corev1.LabelHostname, + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + serving.RevisionLabelKey: revisionLabelValue, + }, + }, + }, + }}, + } +} + func makePodSpec(rev *v1.Revision, cfg *config.Config) (*corev1.PodSpec, error) { queueContainer, err := makeQueueContainer(rev, cfg) tokenVolume := varTokenVolume.DeepCopy() @@ -210,6 +227,10 @@ func makePodSpec(rev *v1.Revision, cfg *config.Config) (*corev1.PodSpec, error) } } + if cfg.Deployment.DefaultAffinityType == deploymentconfig.PreferSpreadRevisionOverNodes && rev.Spec.Affinity == nil { + podSpec.Affinity = &corev1.Affinity{PodAntiAffinity: makePreferSpreadRevisionOverNodes(rev.Name)} + } + return podSpec, nil } diff --git a/pkg/reconciler/revision/resources/deploy_test.go b/pkg/reconciler/revision/resources/deploy_test.go index c9fd07b69fa9..bf1038130499 100644 --- a/pkg/reconciler/revision/resources/deploy_test.go +++ b/pkg/reconciler/revision/resources/deploy_test.go @@ -205,6 +205,31 @@ var ( EnableServiceLinks: ptr.Bool(false), } + defaultPodAntiAffinityRules = &corev1.PodAntiAffinity{ + PreferredDuringSchedulingIgnoredDuringExecution: []corev1.WeightedPodAffinityTerm{{ + Weight: 100, + PodAffinityTerm: corev1.PodAffinityTerm{ + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "serving.knative.dev/revision": "bar", + }, + }, + }, + }}, + } + + userDefinedPodAntiAffinityRules = &corev1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []corev1.PodAffinityTerm{{ + TopologyKey: "kubernetes.io/hostname", + LabelSelector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "serving.knative.dev/revision": "bar", + }, + }, + }}, + } + maxUnavailable = intstr.FromInt(0) defaultDeployment = &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ @@ -1409,6 +1434,98 @@ func TestMakePodSpec(t *testing.T) { withEnvVar("SERVING_READINESS_PROBE", `[{"httpGet":{"path":"/","port":8080,"host":"127.0.0.1","scheme":"HTTP"}},{"httpGet":{"path":"/","port":8090,"host":"127.0.0.1","scheme":"HTTP"}}]`), ), }), + }, { + name: "with default affinity type set", + rev: revision("bar", "foo", + withContainers([]corev1.Container{{ + Name: servingContainerName, + Image: "busybox", + ReadinessProbe: withTCPReadinessProbe(v1.DefaultUserPort), + }}), + WithContainerStatuses([]v1.ContainerStatus{{ + ImageDigest: "busybox@sha256:deadbeef", + }}), + ), + fc: apicfg.Features{ + PodSpecAffinity: apicfg.Disabled, + }, + dc: deployment.Config{ + DefaultAffinityType: deployment.PreferSpreadRevisionOverNodes, + }, + want: podSpec( + []corev1.Container{ + servingContainer(func(container *corev1.Container) { + container.Image = "busybox@sha256:deadbeef" + }), + queueContainer(), + }, + func(p *corev1.PodSpec) { + p.Affinity = &corev1.Affinity{ + PodAntiAffinity: defaultPodAntiAffinityRules, + } + }, + ), + }, { + name: "with default affinity type deactivated", + rev: revision("bar", "foo", + withContainers([]corev1.Container{{ + Name: servingContainerName, + Image: "busybox", + ReadinessProbe: withTCPReadinessProbe(v1.DefaultUserPort), + }}), + WithContainerStatuses([]v1.ContainerStatus{{ + ImageDigest: "busybox@sha256:deadbeef", + }}), + ), + fc: apicfg.Features{ + PodSpecAffinity: apicfg.Disabled, + }, + dc: deployment.Config{ + DefaultAffinityType: deployment.None, + }, + want: podSpec( + []corev1.Container{ + servingContainer(func(container *corev1.Container) { + container.Image = "busybox@sha256:deadbeef" + }), + queueContainer(), + }, + ), + }, { + name: "with affinity rules set by both the user and the operator", + rev: revision("bar", "foo", + withContainers([]corev1.Container{{ + Name: servingContainerName, + Image: "busybox", + ReadinessProbe: withTCPReadinessProbe(v1.DefaultUserPort), + }}), + WithContainerStatuses([]v1.ContainerStatus{{ + ImageDigest: "busybox@sha256:deadbeef", + }}), + func(r *v1.Revision) { + r.Spec.Affinity = &corev1.Affinity{ + PodAntiAffinity: userDefinedPodAntiAffinityRules, + } + }), + fc: apicfg.Features{ + PodSpecAffinity: apicfg.Enabled, + }, + dc: deployment.Config{ + DefaultAffinityType: deployment.PreferSpreadRevisionOverNodes, + }, + want: podSpec( + []corev1.Container{ + servingContainer(func(container *corev1.Container) { + container.Image = "busybox@sha256:deadbeef" + }), + queueContainer(), + }, + func(p *corev1.PodSpec) { + p.Affinity = &corev1.Affinity{ + PodAntiAffinity: userDefinedPodAntiAffinityRules, + } + }, + ), }} for _, test := range tests {