Skip to content

Commit

Permalink
feat: add podAntiAffinity to MOFED
Browse files Browse the repository at this point in the history
- Add podAntiAffinity to MOFED
- Add unit test to check PodAntiAffinity rendering
- Add unit test to check NodeSelector rendering
- Extract methods in state_ofed for better readability

Signed-off-by: Fred Rolland <frolland@nvidia.com>
  • Loading branch information
rollandf committed Dec 18, 2023
1 parent 3f67c24 commit 31f75e1
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 40 deletions.
8 changes: 8 additions & 0 deletions manifests/state-ofed-driver/0050_ofed-driver-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,14 @@ spec:
- key: nvidia.com/gpu
operator: Exists
effect: NoSchedule
affinity:
podAntiAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
- labelSelector:
matchExpressions:
- key: nvidia.com/ofed-driver
operator: Exists
topologyKey: kubernetes.io/hostname
serviceAccountName: ofed-driver
hostNetwork: true
{{- if .CrSpec.ImagePullSecrets }}
Expand Down
106 changes: 66 additions & 40 deletions pkg/state/state_ofed.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,56 +397,23 @@ func (s *stateOFED) getManifestObjects(
}
nodeAttr := attrs[0].Attributes

if cr.Spec.OFEDDriver.StartupProbe == nil {
cr.Spec.OFEDDriver.StartupProbe = &mellanoxv1alpha1.PodProbeSpec{
InitialDelaySeconds: 10,
PeriodSeconds: 10,
}
}

if cr.Spec.OFEDDriver.LivenessProbe == nil {
cr.Spec.OFEDDriver.LivenessProbe = &mellanoxv1alpha1.PodProbeSpec{
InitialDelaySeconds: 30,
PeriodSeconds: 30,
}
}

if cr.Spec.OFEDDriver.ReadinessProbe == nil {
cr.Spec.OFEDDriver.ReadinessProbe = &mellanoxv1alpha1.PodProbeSpec{
InitialDelaySeconds: 10,
PeriodSeconds: 30,
}
}
setProbesDefaults(cr)

// Update MOFED Env variables with defaults for the cluster
cr.Spec.OFEDDriver.Env = s.mergeWithDefaultEnvs(cr.Spec.OFEDDriver.Env, nodeAttr)

additionalVolMounts := additionalVolumeMounts{}
osname := nodeAttr[nodeinfo.AttrTypeOSName]
// set any custom ssl key/certificate configuration provided
if cr.Spec.OFEDDriver.CertConfig != nil && cr.Spec.OFEDDriver.CertConfig.Name != "" {
destinationDir, err := getCertConfigPath(osname)
if err != nil {
return nil, fmt.Errorf("failed to get destination directory for custom TLS certificates config: %v", err)
}

err = s.handleAdditionalMounts(ctx, &additionalVolMounts, cr.Spec.OFEDDriver.CertConfig.Name, destinationDir)
if err != nil {
return nil, fmt.Errorf("failed to mount volumes for custom TLS certificates: %v", err)
}
err := s.handleCertConfig(ctx, cr, osname, additionalVolMounts)
if err != nil {
return nil, err
}

// set any custom repo configuration provided
if cr.Spec.OFEDDriver.RepoConfig != nil && cr.Spec.OFEDDriver.RepoConfig.Name != "" {
destinationDir, err := getRepoConfigPath(osname)
if err != nil {
return nil, fmt.Errorf("failed to get destination directory for custom repo config: %v", err)
}

err = s.handleAdditionalMounts(ctx, &additionalVolMounts, cr.Spec.OFEDDriver.RepoConfig.Name, destinationDir)
if err != nil {
return nil, fmt.Errorf("failed to mount volumes for custom repositories configuration: %v", err)
}
err = s.handleRepoConfig(ctx, cr, osname, additionalVolMounts)
if err != nil {
return nil, err
}

renderData := &ofedManifestRenderData{
Expand Down Expand Up @@ -695,3 +662,62 @@ func (e envVarsWithGet) Get(name string) *v1.EnvVar {

return nil
}

// setProbesDefaults populates NicClusterPolicy CR with default Probe values
// if not provided by user
func setProbesDefaults(cr *mellanoxv1alpha1.NicClusterPolicy) {
if cr.Spec.OFEDDriver.StartupProbe == nil {
cr.Spec.OFEDDriver.StartupProbe = &mellanoxv1alpha1.PodProbeSpec{
InitialDelaySeconds: 10,
PeriodSeconds: 10,
}
}

if cr.Spec.OFEDDriver.LivenessProbe == nil {
cr.Spec.OFEDDriver.LivenessProbe = &mellanoxv1alpha1.PodProbeSpec{
InitialDelaySeconds: 30,
PeriodSeconds: 30,
}
}

if cr.Spec.OFEDDriver.ReadinessProbe == nil {
cr.Spec.OFEDDriver.ReadinessProbe = &mellanoxv1alpha1.PodProbeSpec{
InitialDelaySeconds: 10,
PeriodSeconds: 30,
}
}
}

// handleCertConfig handles additional mounts required for Certificates if specified
func (s *stateOFED) handleCertConfig(
ctx context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, osname string, mounts additionalVolumeMounts) error {
if cr.Spec.OFEDDriver.CertConfig != nil && cr.Spec.OFEDDriver.CertConfig.Name != "" {
destinationDir, err := getCertConfigPath(osname)
if err != nil {
return fmt.Errorf("failed to get destination directory for custom TLS certificates config: %v", err)
}

err = s.handleAdditionalMounts(ctx, &mounts, cr.Spec.OFEDDriver.CertConfig.Name, destinationDir)
if err != nil {
return fmt.Errorf("failed to mount volumes for custom TLS certificates: %v", err)
}
}
return nil
}

// handleRepoConfig handles additional mounts required for custom repo if specified
func (s *stateOFED) handleRepoConfig(
ctx context.Context, cr *mellanoxv1alpha1.NicClusterPolicy, osname string, mounts additionalVolumeMounts) error {
if cr.Spec.OFEDDriver.RepoConfig != nil && cr.Spec.OFEDDriver.RepoConfig.Name != "" {
destinationDir, err := getRepoConfigPath(osname)
if err != nil {
return fmt.Errorf("failed to get destination directory for custom repo config: %v", err)
}

err = s.handleAdditionalMounts(ctx, &mounts, cr.Spec.OFEDDriver.RepoConfig.Name, destinationDir)
if err != nil {
return fmt.Errorf("failed to mount volumes for custom repositories configuration: %v", err)
}
}
return nil
}
113 changes: 113 additions & 0 deletions pkg/state/state_ofed_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,24 @@
package state

import (
"context"
"strings"

"k8s.io/apimachinery/pkg/runtime"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

osconfigv1 "github.com/openshift/api/config/v1"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

"github.com/Mellanox/network-operator/api/v1alpha1"
"github.com/Mellanox/network-operator/pkg/nodeinfo"
"github.com/Mellanox/network-operator/pkg/render"
"github.com/Mellanox/network-operator/pkg/testing/mocks"
"github.com/Mellanox/network-operator/pkg/utils"
)

const (
Expand All @@ -35,13 +43,17 @@ const (
testClusterWideNoProxy = "no-proxy-cluster-wide"
testNicPolicyHTTPProxy = "http-policy"
testNicPolicyNoProxy = "no-proxy-policy"
osName = "ubuntu"
osVer = "22.04"
)

var _ = Describe("MOFED state test", func() {
var stateOfed stateOFED
var ctx context.Context

BeforeEach(func() {
stateOfed = stateOFED{}
ctx = context.Background()
})

Context("getMofedDriverImageName", func() {
Expand Down Expand Up @@ -206,4 +218,105 @@ var _ = Describe("MOFED state test", func() {
[]v1.EnvVar{{Name: "Foo", Value: "Bar"}}),
Entry("Ubuntu 23.04", "ubuntu", "23.04", []v1.EnvVar{}, []v1.EnvVar{}),
)

Context("Render Manifests", func() {
It("Should Render Mofed DaemonSet", func() {
client := mocks.ControllerRuntimeClient{}
manifestBaseDir := "../../manifests/state-ofed-driver"
scheme := runtime.NewScheme()

files, err := utils.GetFilesWithSuffix(manifestBaseDir, render.ManifestFileSuffix...)
Expect(err).NotTo(HaveOccurred())
renderer := render.NewRenderer(files)

ofedState := stateOFED{
stateSkel: stateSkel{
name: stateOFEDName,
description: stateOFEDDescription,
client: &client,
scheme: scheme,
renderer: renderer,
},
}
cr := &v1alpha1.NicClusterPolicy{}
cr.Name = "nic-cluster-policy"
cr.Spec.OFEDDriver = &v1alpha1.OFEDDriverSpec{
ImageSpec: v1alpha1.ImageSpec{
Image: "mofed",
Repository: "nvcr.io/mellanox",
Version: "23.10-0.5.5.0",
},
}
By("Creating NodeProvider with 1 Node")
infoProvider := nodeinfo.NewProvider([]*v1.Node{
getNode("node1"),
})
objs, err := ofedState.getManifestObjects(ctx, cr, infoProvider, &dummyProvider{})
Expect(err).NotTo(HaveOccurred())
// Expect 4 objects: DaemonSet, Service Account, Role, RoleBinding
Expect(len(objs)).To(Equal(4))
By("Verify DaemonSet")
for _, obj := range objs {
if obj.GetKind() != "DaemonSet" {
continue
}
ds := appsv1.DaemonSet{}
err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, &ds)
Expect(err).NotTo(HaveOccurred())
Expect(ds.Name).To(Equal("mofed-ubuntu22.04-ds"))
verifyDSNodeSelector(ds.Spec.Template.Spec.NodeSelector)
verifyPodAntiInfinity(ds.Spec.Template.Spec.Affinity)
}
})
})
})

func verifyPodAntiInfinity(affinity *v1.Affinity) {
By("Verify PodAntiInfinity")
Expect(affinity).NotTo(BeNil())
expected := v1.Affinity{
PodAntiAffinity: &v1.PodAntiAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{
{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "nvidia.com/ofed-driver",
Operator: metav1.LabelSelectorOpExists,
},
},
},
TopologyKey: "kubernetes.io/hostname",
},
},
},
}
Expect(*affinity).To(BeEquivalentTo(expected))
}

func verifyDSNodeSelector(selector map[string]string) {
By("Verify NodeSelector")
nsMellanox, ok := selector["feature.node.kubernetes.io/pci-15b3.present"]
Expect(ok).To(BeTrue())
Expect(nsMellanox).To(Equal("true"))
nsOsName, ok := selector["feature.node.kubernetes.io/system-os_release.ID"]
Expect(ok).To(BeTrue())
Expect(nsOsName).To(Equal(osName))
nsOsVer, ok := selector["feature.node.kubernetes.io/system-os_release.VERSION_ID"]
Expect(ok).To(BeTrue())
Expect(nsOsVer).To(Equal(osVer))
}

func getNode(name string) *v1.Node {
return &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: name,
Labels: map[string]string{
nodeinfo.NodeLabelMlnxNIC: "true",
nodeinfo.NodeLabelOSName: osName,
nodeinfo.NodeLabelOSVer: osVer,
nodeinfo.NodeLabelCPUArch: "amd64",
},
},
}
}

0 comments on commit 31f75e1

Please sign in to comment.