Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions apis/multiarch/v1beta1/groupversion_info.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,5 +37,6 @@ var (

const ClusterPodPlacementConfigResource = "clusterpodplacementconfigs"
const ClusterPodPlacementConfigKind = "ClusterPodPlacementConfig"
const PodPlacementConfigResource = "podplacementconfigs"
const ENoExecEventKind = "ENoExecEvent"
const ENoExecEventResource = "enoexecevents"
5 changes: 5 additions & 0 deletions controllers/operator/podplacement_objects.go
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,11 @@ func buildClusterRoleController() *rbacv1.ClusterRole {
Resources: []string{v1beta1.ClusterPodPlacementConfigResource},
Verbs: []string{LIST, WATCH, GET},
},
{
APIGroups: []string{v1beta1.GroupVersion.Group},
Resources: []string{v1beta1.PodPlacementConfigResource},
Verbs: []string{LIST, WATCH, GET},
},
{
APIGroups: []string{""},
Resources: []string{"configmaps", "secrets"},
Expand Down
27 changes: 16 additions & 11 deletions controllers/podplacement/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,16 +11,21 @@ const (
ArchitectureAwareSchedulingGateRemovalFailure = "ArchAwareSchedGateRemovalFailed"
ArchitectureAwareSchedulingGateRemovalSuccess = "ArchAwareSchedGateRemovalSuccess"
NoSupportedArchitecturesFound = "NoSupportedArchitecturesFound"
ArchitecturePreferredAffinityDuplicates = "ArchAwarePreferredAffinityDuplicates"

SchedulingGateAddedMsg = "Successfully gated with the " + utils.SchedulingGateName + " scheduling gate"
SchedulingGateRemovalSuccessMsg = "Successfully removed the " + utils.SchedulingGateName + " scheduling gate"
SchedulingGateRemovalFailureMsg = "Failed to remove the scheduling gate \"" + utils.SchedulingGateName + "\""
ArchitecturePredicatesConflictMsg = "All the scheduling predicates already include architecture-specific constraints"
ArchitecturePredicateSetupMsg = "Set the supported architectures to "
ArchitecturePreferredPredicateSetupMsg = "Set the architecture preferences in the nodeAffinity"
ArchitecturePreferredPredicateSkippedMsg = "The node affinity already includes architecture preferences"
ImageArchitectureInspectionErrorMsg = "Failed to retrieve the supported architectures: "
NoSupportedArchitecturesFoundMsg = "Pod cannot be scheduled due to incompatible image architectures; container images have no supported architectures in common"
ArchitectureAwareGatedPodIgnoredMsg = "The gated pod has been modified and is no longer eligible for architecture-aware scheduling"
ImageInspectionErrorMaxRetriesMsg = "Failed to retrieve the supported architectures after multiple retries"
SchedulingGateAddedMsg = "Successfully gated with the " + utils.SchedulingGateName + " scheduling gate"
SchedulingGateRemovalSuccessMsg = "Successfully removed the " + utils.SchedulingGateName + " scheduling gate"
SchedulingGateRemovalFailureMsg = "Failed to remove the scheduling gate \"" + utils.SchedulingGateName + "\""
ArchitecturePredicatesConflictMsg = "All the scheduling predicates already include architecture-specific constraints"
ArchitecturePredicateSetupMsg = "Set the supported architectures to "

ArchitecturePreferredPredicateSetupMsg = "Applied all architecture preferences from configuration"
ArchitecturePreferredAffinityWithDuplicatesMsg = "Applied some architecture preferences from configuration; others were already set"
ArchitecturePreferredAffinityAllDuplicatesMsg = "Skipped all architecture preferences from configuration; all were already set"
ArchitecturePreferredPredicateSkippedMsg = "Skipped configuration; no architecture preferences were provided"

ImageArchitectureInspectionErrorMsg = "Failed to retrieve the supported architectures: "
NoSupportedArchitecturesFoundMsg = "Pod cannot be scheduled due to incompatible image architectures; container images have no supported architectures in common"
ArchitectureAwareGatedPodIgnoredMsg = "The gated pod has been modified and is no longer eligible for architecture-aware scheduling"
ImageInspectionErrorMaxRetriesMsg = "Failed to retrieve the supported architectures after multiple retries"
)
91 changes: 70 additions & 21 deletions controllers/podplacement/pod_model.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
ctrllog "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/openshift/multiarch-tuning-operator/apis/multiarch/common"
"github.com/openshift/multiarch-tuning-operator/apis/multiarch/common/plugins"
"github.com/openshift/multiarch-tuning-operator/apis/multiarch/v1beta1"
"github.com/openshift/multiarch-tuning-operator/controllers/podplacement/metrics"
"github.com/openshift/multiarch-tuning-operator/pkg/image"
Expand Down Expand Up @@ -166,12 +167,9 @@ func (pod *Pod) setRequiredArchNodeAffinity(requirement corev1.NodeSelectorRequi
}

// SetPreferredArchNodeAffinity sets the node affinity for the pod to the preferences given in the ClusterPodPlacementConfig.
func (pod *Pod) SetPreferredArchNodeAffinity(cppc *v1beta1.ClusterPodPlacementConfig) {
// Prevent overriding of user-provided kubernetes.io/arch preferred affinities or overwriting previously set preferred affinity
if pod.isPreferredAffinityConfiguredForArchitecture() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @AnnaZivkovic, Why are we removing this check? Should we ignore Pods if the user has already set a Preferred Affinity for architecture?
If the intention is to cover both local PPC and CPPC, could we introduce a third option to distinguish whether the Preferred Affinity was user-defined or modified by our code? For example, we might rely on the label multiarch.openshift.io/preferred-node-affinity—treat Pods without this label(or not-set) or with the value set differently.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lwan-wanglin that is a good idea! ill add a value to notate how the label got set.

This check will need to be removed or modified because it can not find the difference between having the preferred node affinity setp with the CPPC or the PPC. With the original check it would not check the CPPC preferred affinity if the PPC set anything. Tho I think removing it is the wrong move now. Ill give it a more fine grained approach

return
}

// The configSource parameter identifies which configuration is setting the preferences (e.g., "ClusterPodPlacementConfig" or "PodPlacementConfig/my-ppc").
func (pod *Pod) SetPreferredArchNodeAffinity(nodeAffinity *plugins.NodeAffinityScoring, configSource string) {
log := ctrllog.FromContext(pod.Ctx())
if pod.Spec.Affinity == nil {
pod.Spec.Affinity = &corev1.Affinity{}
}
Expand All @@ -184,27 +182,78 @@ func (pod *Pod) SetPreferredArchNodeAffinity(cppc *v1beta1.ClusterPodPlacementCo
pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = []corev1.PreferredSchedulingTerm{}
}

for _, nodeAffinityScoringPlatformTerm := range cppc.Spec.Plugins.NodeAffinityScoring.Platforms {
preferredSchedulingTerm := corev1.PreferredSchedulingTerm{
Weight: nodeAffinityScoringPlatformTerm.Weight,
Preference: corev1.NodeSelectorTerm{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: utils.ArchLabel,
Operator: corev1.NodeSelectorOpIn,
Values: []string{nodeAffinityScoringPlatformTerm.Architecture},
seenArchitectures := pod.getExistingPreferredArchitectures()
var preferredSchedulingTerms []corev1.PreferredSchedulingTerm
var skippedArchitectures []string
for _, nodeAffinityScoringPlatformTerm := range nodeAffinity.Platforms {
if !seenArchitectures[nodeAffinityScoringPlatformTerm.Architecture] {
preferredSchedulingTerm := corev1.PreferredSchedulingTerm{
Weight: nodeAffinityScoringPlatformTerm.Weight,
Preference: corev1.NodeSelectorTerm{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: utils.ArchLabel,
Operator: corev1.NodeSelectorOpIn,
Values: []string{nodeAffinityScoringPlatformTerm.Architecture},
},
},
},
},
}
preferredSchedulingTerms = append(preferredSchedulingTerms, preferredSchedulingTerm)
seenArchitectures[nodeAffinityScoringPlatformTerm.Architecture] = true
} else {
skippedArchitectures = append(skippedArchitectures, nodeAffinityScoringPlatformTerm.Architecture)
log.Info("Preferred affinity for pod is already set", "Architecture", nodeAffinityScoringPlatformTerm.Architecture, "Weight", nodeAffinityScoringPlatformTerm.Weight, "Pod.Name", pod.Name, "Pod.Namespace", pod.Namespace, "ConfigSource", configSource)
}
}

if preferredSchedulingTerms != nil {
pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution = append(
pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution, preferredSchedulingTerm)
pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution, preferredSchedulingTerms...)
pod.EnsureLabel(utils.PreferredNodeAffinityLabel, utils.NodeAffinityLabelValueSet)
}
switch {
// Case 1: All architectures from this config were successfully added (no duplicates)
case preferredSchedulingTerms != nil && skippedArchitectures == nil:
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet, fmt.Sprintf("%s source: %s", ArchitecturePreferredPredicateSetupMsg, configSource))
log.V(2).Info("Applied all architecture preferences from configuration", "ConfigSource", configSource)

// Case 2: Some architectures were added, but some were skipped due to duplicates
case preferredSchedulingTerms != nil && skippedArchitectures != nil:
pod.EnsureLabel(utils.PreferredNodeAffinitySourceLabel, utils.LabelValueSetWithDuplicates)
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet, fmt.Sprintf("%s source: %s, skipped: %s", ArchitecturePreferredAffinityWithDuplicatesMsg, configSource, strings.Join(skippedArchitectures, ", ")))
log.V(2).Info("Applied some architecture preferences from configuration", "ConfigSource", configSource, "SkippedArchitectures", skippedArchitectures)

// Case 3: All architectures from this config were already set
case preferredSchedulingTerms == nil && skippedArchitectures != nil:
pod.EnsureLabel(utils.PreferredNodeAffinitySourceLabel, utils.LabelValueSetWithDuplicates)
pod.PublishEvent(corev1.EventTypeNormal, ArchitecturePreferredAffinityDuplicates, fmt.Sprintf("%s source: %s, architectures: %s", ArchitecturePreferredAffinityAllDuplicatesMsg, configSource, strings.Join(skippedArchitectures, ", ")))
log.V(2).Info("All architectures from configuration were already set", "ConfigSource", configSource, "SkippedArchitectures", skippedArchitectures)

// Case 4: No architectures were provided in the config
default:
log.V(2).Info("No architecture preferences provided in configuration", "ConfigSource", configSource)
}
}

// if the nodeSelectorTerms were patched at least once, we set the nodeAffinity label to the set value, to keep
// track of the fact that the nodeAffinity was patched by the operator.
pod.EnsureLabel(utils.PreferredNodeAffinityLabel, utils.NodeAffinityLabelValueSet)
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet, ArchitecturePreferredPredicateSetupMsg)
// getExistingPreferredArchitectures finds all
// architectures that already have a preferred node affinity configured on the pod.
func (pod *Pod) getExistingPreferredArchitectures() map[string]bool {
seen := make(map[string]bool)
if pod.Spec.Affinity == nil || pod.Spec.Affinity.NodeAffinity == nil {
return seen
}

for _, term := range pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution {
for _, expr := range term.Preference.MatchExpressions {
if expr.Key == utils.ArchLabel {
for _, value := range expr.Values {
seen[value] = true
}
}
}
}
return seen
}

func (pod *Pod) getArchitecturePredicate(pullSecretDataList [][]byte) (corev1.NodeSelectorRequirement, error) {
Expand Down
71 changes: 64 additions & 7 deletions controllers/podplacement/pod_model_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,11 +545,67 @@ func TestPod_SetPreferredArchNodeAffinityWithCPPC(t *testing.T) {
imageInspectionCache = fake.FacadeSingleton()
pod := newPod(tt.pod, ctx, nil)
g := NewGomegaWithT(t)
pod.SetPreferredArchNodeAffinity(
NewClusterPodPlacementConfig().
WithName(common.SingletonResourceObjectName).
WithNodeAffinityScoring(true).
WithNodeAffinityScoringTerm(utils.ArchitectureAmd64, 1).Build())
cppc := NewClusterPodPlacementConfig().
WithName(common.SingletonResourceObjectName).
WithNodeAffinityScoring(true).
WithNodeAffinityScoringTerm(utils.ArchitectureAmd64, 1).Build()
pod.SetPreferredArchNodeAffinity(cppc.Spec.Plugins.NodeAffinityScoring, v1beta1.ClusterPodPlacementConfigKind)
g.Expect(pod.Spec.Affinity).Should(Equal(tt.want.Spec.Affinity))
imageInspectionCache = mmoimage.FacadeSingleton()
})
}
}

func TestPod_SetPreferredArchNodeAffinityPPC(t *testing.T) {
tests := []struct {
name string
pod *v1.Pod
want *v1.Pod
}{
{
name: "pod with no predefined preferred affinity",
pod: NewPod().WithContainersImages(fake.SingleArchAmd64Image).Build(),
want: NewPod().WithContainersImages(fake.SingleArchAmd64Image).WithPreferredDuringSchedulingIgnoredDuringExecution(
NewPreferredSchedulingTerm().WithArchitecture(utils.ArchitectureAmd64).WithWeight(1).Build(),
).Build(),
},
{
name: "pod with predefined preferred node affinity",
pod: NewPod().WithContainersImages(fake.SingleArchAmd64Image).WithPreferredDuringSchedulingIgnoredDuringExecution(
NewPreferredSchedulingTerm().WithCustomKeyValue("foo", "bar").WithWeight(50).Build(),
).Build(),
want: NewPod().WithContainersImages(fake.SingleArchAmd64Image).WithPreferredDuringSchedulingIgnoredDuringExecution(
NewPreferredSchedulingTerm().WithCustomKeyValue("foo", "bar").WithWeight(50).Build(),
NewPreferredSchedulingTerm().WithArchitecture(utils.ArchitectureAmd64).WithWeight(1).Build(),
).Build(),
},
{
name: "pod with predefined preferred node affinity with arch label set",
pod: NewPod().WithContainersImages(fake.SingleArchAmd64Image).WithPreferredDuringSchedulingIgnoredDuringExecution(
NewPreferredSchedulingTerm().WithArchitecture(utils.ArchitectureAmd64).WithWeight(30).Build(),
).Build(),
want: NewPod().WithContainersImages(fake.SingleArchAmd64Image).WithPreferredDuringSchedulingIgnoredDuringExecution(
NewPreferredSchedulingTerm().WithArchitecture(utils.ArchitectureAmd64).WithWeight(30).Build(),
).Build(),
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
imageInspectionCache = fake.FacadeSingleton()
pod := newPod(tt.pod, ctx, nil)
g := NewGomegaWithT(t)
ppc := NewPodPlacementConfig().
WithName("test-high-priority").
WithNodeAffinityScoring(true).
WithPriority(10).
WithNodeAffinityScoringTerm(utils.ArchitectureAmd64, 1).Build()
pod.SetPreferredArchNodeAffinity(ppc.Spec.Plugins.NodeAffinityScoring, "PodPlacementConfig/test-high-priority")
ppc = NewPodPlacementConfig().
WithName("test-low-priority").
WithNodeAffinityScoring(true).
WithPriority(5).
WithNodeAffinityScoringTerm(utils.ArchitectureAmd64, 5).Build()
pod.SetPreferredArchNodeAffinity(ppc.Spec.Plugins.NodeAffinityScoring, "PodPlacementConfig/test-low-priority")
g.Expect(pod.Spec.Affinity).Should(Equal(tt.want.Spec.Affinity))
imageInspectionCache = mmoimage.FacadeSingleton()
})
Expand All @@ -573,7 +629,7 @@ func TestPod_SetPreferredArchNodeAffinity(t *testing.T) {
imageInspectionCache = fake.FacadeSingleton()
pod := newPod(tt.pod, ctx, nil)
g := NewGomegaWithT(t)
pod.SetPreferredArchNodeAffinity(&v1beta1.ClusterPodPlacementConfig{
cppc := &v1beta1.ClusterPodPlacementConfig{
ObjectMeta: metav1.ObjectMeta{
Name: "cluster",
},
Expand All @@ -587,7 +643,8 @@ func TestPod_SetPreferredArchNodeAffinity(t *testing.T) {
},
},
},
})
}
pod.SetPreferredArchNodeAffinity(cppc.Spec.Plugins.NodeAffinityScoring, v1beta1.ClusterPodPlacementConfigKind)
g.Expect(pod.Spec.Affinity).Should(Equal(tt.want.Spec.Affinity))
imageInspectionCache = mmoimage.FacadeSingleton()
})
Expand Down
67 changes: 59 additions & 8 deletions controllers/podplacement/pod_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,24 @@ import (
"context"
"fmt"
runtime2 "runtime"
"sort"
"time"

"github.com/openshift/multiarch-tuning-operator/apis/multiarch/common"
multiarchv1beta1 "github.com/openshift/multiarch-tuning-operator/apis/multiarch/v1beta1"
"github.com/openshift/multiarch-tuning-operator/controllers/podplacement/metrics"
"github.com/openshift/multiarch-tuning-operator/pkg/informers/clusterpodplacementconfig"
"github.com/openshift/multiarch-tuning-operator/pkg/utils"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
ctrl2 "sigs.k8s.io/controller-runtime/pkg/controller"
ctrllog "sigs.k8s.io/controller-runtime/pkg/log"

"github.com/openshift/multiarch-tuning-operator/apis/multiarch/common"
"github.com/openshift/multiarch-tuning-operator/controllers/podplacement/metrics"
"github.com/openshift/multiarch-tuning-operator/pkg/informers/clusterpodplacementconfig"
"github.com/openshift/multiarch-tuning-operator/pkg/utils"
)

// PodReconciler reconciles a Pod object
Expand Down Expand Up @@ -117,8 +119,10 @@ func (r *PodReconciler) processPod(ctx context.Context, pod *Pod) {
return
}

r.applyPodPlacementConfigs(ctx, pod)

if cppc != nil && cppc.PluginsEnabled(common.NodeAffinityScoringPluginName) {
pod.SetPreferredArchNodeAffinity(cppc)
pod.SetPreferredArchNodeAffinity(cppc.Spec.Plugins.NodeAffinityScoring, multiarchv1beta1.ClusterPodPlacementConfigKind)
}

// Prepare the requirement for the node affinity.
Expand All @@ -141,15 +145,62 @@ func (r *PodReconciler) processPod(ctx context.Context, pod *Pod) {
// If the pod has been processed successfully or the max retries have been reached, remove the scheduling gate.
if err == nil || pod.maxRetries() {
if pod.Labels[utils.PreferredNodeAffinityLabel] == utils.LabelValueNotSet {
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet,
ArchitecturePreferredPredicateSkippedMsg)
if pod.Labels[utils.LabelValueSetWithDuplicates] == utils.LabelValueSetWithDuplicates {
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet,
ArchitecturePreferredAffinityAllDuplicatesMsg)
log.V(2).Info("All provided preferred node affinity was already set.")
} else {
pod.PublishEvent(corev1.EventTypeNormal, ArchitectureAwareNodeAffinitySet,
ArchitecturePreferredPredicateSkippedMsg)
log.V(2).Info("No preferred node affinity was set")
}
}

log.V(1).Info("Removing the scheduling gate from pod.")
pod.RemoveSchedulingGate()
}
}

func (r *PodReconciler) applyPodPlacementConfigs(ctx context.Context, pod *Pod) {
log := ctrllog.FromContext(ctx).WithName("PodPlacementConfig")
// List existing PodPlacementConfigs in the same namespace
ppcList := &multiarchv1beta1.PodPlacementConfigList{}
if err := r.List(ctx, ppcList, client.InNamespace(pod.Namespace)); err != nil {
pod.handleError(err, "failed to list existing PodPlacementConfigs in namespace")
return
}

// Sort the configurations by descending priority
sort.Slice(ppcList.Items, func(i, j int) bool {
return ppcList.Items[i].Spec.Priority > ppcList.Items[j].Spec.Priority
})

// For each namespace-scoped configuration, check selector and apply
for _, ppc := range ppcList.Items {
log.V(1).Info("Processing PodPlacementConfig", "namespace", ppc.Namespace, "name", ppc.Name)

// check if plugin is enabled
if !ppc.PluginsEnabled(common.NodeAffinityScoringPluginName) {
log.V(1).Info("Skipping PodPlacementConfig NodeAffinityScoring disabled", "namespace", ppc.Namespace, "name", ppc.Name)
continue
}

selector, err := metav1.LabelSelectorAsSelector(ppc.Spec.LabelSelector)
if err != nil {
pod.handleError(err, "Invalid label selector in PodPlacementConfig")
continue
}

// Check if the pod matches the label selector
if selector == labels.Nothing() || selector.Matches(labels.Set(pod.Labels)) {
log.Info("Applying namespace-scoped config", "PodPlacementConfig", ppc.Name)
// Apply the configuration, checking for overlaps
configSource := fmt.Sprintf("%s-%s", multiarchv1beta1.PodPlacementConfigResource, ppc.Name)
pod.SetPreferredArchNodeAffinity(ppc.Spec.Plugins.NodeAffinityScoring, configSource)
}
}
}

// pullSecretDataList returns the list of secrets data for the given pod given its imagePullSecrets field
func (r *PodReconciler) pullSecretDataList(ctx context.Context, pod *Pod) ([][]byte, error) {
log := ctrllog.FromContext(ctx)
Expand Down
Loading