From a274a0445597d4c68b1079dd21c99cbc30e21700 Mon Sep 17 00:00:00 2001 From: Fred Rolland Date: Wed, 22 Nov 2023 18:12:15 +0200 Subject: [PATCH] feat: Support OFED with DTK In Openshift, in order to OFED containter to be able to download and compile the needed Kernel files, it is required to install a cluster-wide entitlement. This requirement is not user friendly. In order to avoid this, a container image with the needed files is available in Openshift distributions. This image is called DriverToolKit aka DTK. By using this container as a side-car to MOFED container, the modules can be compiled without entitlement. Changes: API: - NicClusterPolicy CRD: add bool 'useOcpDriverToolkit' under 'ofedDriver'. Default is 'true' - Helm : add support to 'useOcpDriverToolkit' OFED state: - In case of OCP and 'useOcpDriverToolkit' is true, find DTK image based on NFD label of node. - If available, add to MOFED DS a DTK container, change entrypoint logic. Signed-off-by: Fred Rolland --- api/v1alpha1/nicclusterpolicy_types.go | 4 + .../mellanox.com_nicclusterpolicies.yaml | 5 + config/rbac/role.yaml | 8 ++ controllers/nicclusterpolicy_controller.go | 1 + deployment/network-operator/README.md | 1 + .../crds/mellanox.com_nicclusterpolicies.yaml | 5 + ...anox.com_v1alpha1_nicclusterpolicy_cr.yaml | 1 + .../network-operator/templates/role.yaml | 8 ++ deployment/network-operator/values.yaml | 1 + hack/templates/values/values.template | 1 + main.go | 2 + .../0050_ofed-driver-ds.yaml | 36 ++++++ pkg/nodeinfo/attributes.go | 4 + pkg/state/state_ofed.go | 42 +++++++ pkg/state/state_ofed_test.go | 104 +++++++++++++++++- 15 files changed, 222 insertions(+), 1 deletion(-) diff --git a/api/v1alpha1/nicclusterpolicy_types.go b/api/v1alpha1/nicclusterpolicy_types.go index c56eeb6b9..deeecf051 100644 --- a/api/v1alpha1/nicclusterpolicy_types.go +++ b/api/v1alpha1/nicclusterpolicy_types.go @@ -106,6 +106,10 @@ type OFEDDriverSpec struct { // +kubebuilder:default:=300 // +kubebuilder:validation:Minimum:=0 TerminationGracePeriodSeconds int64 `json:"terminationGracePeriodSeconds,omitempty"` + // UseOCPDriverToolkit indicates if DriverToolkit image should be used on OpenShift to build and install driver modules + // +kubebuilder:default:=true + // +optional + UseOCPDriverToolkit bool `json:"useOcpDriverToolkit"` } // DriverUpgradePolicySpec describes policy configuration for automatic upgrades diff --git a/config/crd/bases/mellanox.com_nicclusterpolicies.yaml b/config/crd/bases/mellanox.com_nicclusterpolicies.yaml index 32459b336..c3bc4e444 100644 --- a/config/crd/bases/mellanox.com_nicclusterpolicies.yaml +++ b/config/crd/bases/mellanox.com_nicclusterpolicies.yaml @@ -719,6 +719,11 @@ spec: type: integer type: object type: object + useOcpDriverToolkit: + default: true + description: UseOCPDriverToolkit indicates if DriverToolkit image + should be used on OpenShift to build and install driver modules + type: boolean version: pattern: '[a-zA-Z0-9\.-]+' type: string diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 90cdb476a..1dab46119 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -180,6 +180,14 @@ rules: - create - patch - update +- apiGroups: + - image.openshift.io + resources: + - imagestreams + verbs: + - get + - list + - watch - apiGroups: - k8s.cni.cncf.io resources: diff --git a/controllers/nicclusterpolicy_controller.go b/controllers/nicclusterpolicy_controller.go index dbad39fee..32bfe08d2 100644 --- a/controllers/nicclusterpolicy_controller.go +++ b/controllers/nicclusterpolicy_controller.go @@ -82,6 +82,7 @@ type NicClusterPolicyReconciler struct { // +kubebuilder:rbac:groups=nv-ipam.nvidia.com,resources=ippools/status,verbs=get;update;patch; // +kubebuilder:rbac:groups=cert-manager.io,resources=issuers;certificates,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=validatingwebhookconfigurations,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=image.openshift.io,resources=imagestreams,verbs=get;list;watch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. diff --git a/deployment/network-operator/README.md b/deployment/network-operator/README.md index 36ea8b1bc..7644f9746 100644 --- a/deployment/network-operator/README.md +++ b/deployment/network-operator/README.md @@ -447,6 +447,7 @@ containerResources: | `ofedDriver.upgradePolicy.waitForCompletion.podSelector` | string | not set | specifies a label selector for the pods to wait for completion before starting the driver upgrade | | `ofedDriver.upgradePolicy.waitForCompletion.timeoutSeconds` | int | not set | specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite | | `ofedDriver.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `mofed-container` container | +| `ofedDriver.useOcpDriverToolkit` | bool | `true` | In OpenShift, use Driver Toolkit image to compile OFED drivers | #### RDMA Device Plugin diff --git a/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml b/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml index 32459b336..c3bc4e444 100644 --- a/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml +++ b/deployment/network-operator/crds/mellanox.com_nicclusterpolicies.yaml @@ -719,6 +719,11 @@ spec: type: integer type: object type: object + useOcpDriverToolkit: + default: true + description: UseOCPDriverToolkit indicates if DriverToolkit image + should be used on OpenShift to build and install driver modules + type: boolean version: pattern: '[a-zA-Z0-9\.-]+' type: string diff --git a/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml b/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml index d47b03e92..de75374ea 100644 --- a/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml +++ b/deployment/network-operator/templates/mellanox.com_v1alpha1_nicclusterpolicy_cr.yaml @@ -32,6 +32,7 @@ spec: image: {{ .Values.ofedDriver.image }} repository: {{ .Values.ofedDriver.repository }} version: {{ .Values.ofedDriver.version }} + useOcpDriverToolkit: {{ .Values.ofedDriver.useOcpDriverToolkit }} {{- if .Values.ofedDriver.env }} env: {{ toYaml .Values.ofedDriver.env | nindent 6 }} diff --git a/deployment/network-operator/templates/role.yaml b/deployment/network-operator/templates/role.yaml index 0352e5c68..b46053b00 100644 --- a/deployment/network-operator/templates/role.yaml +++ b/deployment/network-operator/templates/role.yaml @@ -195,6 +195,14 @@ rules: - create - patch - update +- apiGroups: + - image.openshift.io + resources: + - imagestreams + verbs: + - get + - list + - watch - apiGroups: - k8s.cni.cncf.io resources: diff --git a/deployment/network-operator/values.yaml b/deployment/network-operator/values.yaml index cf1102f49..4c203f670 100644 --- a/deployment/network-operator/values.yaml +++ b/deployment/network-operator/values.yaml @@ -226,6 +226,7 @@ ofedDriver: # podSelector: "app=myapp" # specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite # timeoutSeconds: 300 + useOcpDriverToolkit: true rdmaSharedDevicePlugin: deploy: true diff --git a/hack/templates/values/values.template b/hack/templates/values/values.template index d3d510fe4..8fb677fc9 100644 --- a/hack/templates/values/values.template +++ b/hack/templates/values/values.template @@ -226,6 +226,7 @@ ofedDriver: # podSelector: "app=myapp" # specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite # timeoutSeconds: 300 + useOcpDriverToolkit: true rdmaSharedDevicePlugin: deploy: true diff --git a/main.go b/main.go index 3c347dee3..32eb3d567 100644 --- a/main.go +++ b/main.go @@ -25,6 +25,7 @@ import ( "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" osconfigv1 "github.com/openshift/api/config/v1" + imagev1 "github.com/openshift/api/image/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -59,6 +60,7 @@ func init() { utilruntime.Must(mellanoxcomv1alpha1.AddToScheme(scheme)) utilruntime.Must(netattdefv1.AddToScheme(scheme)) utilruntime.Must(osconfigv1.AddToScheme(scheme)) + utilruntime.Must(imagev1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } diff --git a/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml b/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml index 958b36c80..a932a8b9b 100644 --- a/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml +++ b/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml @@ -80,6 +80,10 @@ spec: - image: {{ .RuntimeSpec.MOFEDImageName }} imagePullPolicy: IfNotPresent name: mofed-container + {{- if .RuntimeSpec.UseDtk }} + command: ["ocp_dtk_entrypoint"] + args: ["nv-fs-ctr-run-with-dtk"] + {{- end }} securityContext: privileged: true seLinuxOptions: @@ -112,6 +116,10 @@ spec: readOnly: {{ .ReadOnly }} {{- end }} {{- end }} + {{- if .RuntimeSpec.UseDtk }} + - name: shared-doca-driver-toolkit + mountPath: /mnt/shared-doca-driver-toolkit + {{- end}} {{- with index .RuntimeSpec.ContainerResources "mofed-container" }} resources: {{- if .Requests }} @@ -146,6 +154,30 @@ spec: initialDelaySeconds: {{ .CrSpec.ReadinessProbe.InitialDelaySeconds }} failureThreshold: 1 periodSeconds: {{ .CrSpec.ReadinessProbe.PeriodSeconds }} + {{- if .RuntimeSpec.UseDtk }} + - image: {{ .RuntimeSpec.DtkImageName }} + imagePullPolicy: IfNotPresent + name: openshift-driver-toolkit-ctr + command: [bash, -xc] + args: ["until [ -f /mnt/shared-doca-driver-toolkit/dir_prepared ]; do echo Waiting for doca-driver container to prepare the shared directory ...; sleep 10; done; exec /mnt/shared-doca-driver-toolkit/ocp_dtk_entrypoint dtk-build-driver"] + {{- if .CrSpec.Env }} + env: + {{- range .CrSpec.Env }} + {{ . | yaml | nindentPrefix 14 "- " }} + {{- end }} + {{- end }} + volumeMounts: + {{- if.AdditionalVolumeMounts.VolumeMounts }} + {{- range .AdditionalVolumeMounts.VolumeMounts }} + - name: {{ .Name }} + mountPath: {{ .MountPath }} + subPath: {{ .SubPath }} + readOnly: {{ .ReadOnly }} + {{- end }} + {{- end }} + - name: shared-doca-driver-toolkit + mountPath: /mnt/shared-doca-driver-toolkit + {{- end }} # unloading OFED modules can take more time than default terminationGracePeriod (30 sec) terminationGracePeriodSeconds: {{ .CrSpec.TerminationGracePeriodSeconds }} volumes: @@ -176,6 +208,10 @@ spec: {{ . | yaml | nindentPrefix 14 "- " }} {{- end }} {{- end }} + {{- if .RuntimeSpec.UseDtk }} + - name: shared-doca-driver-toolkit + emptyDir: {} + {{- end }} nodeSelector: feature.node.kubernetes.io/pci-15b3.present: "true" feature.node.kubernetes.io/system-os_release.ID: {{ .RuntimeSpec.OSName }} diff --git a/pkg/nodeinfo/attributes.go b/pkg/nodeinfo/attributes.go index 597a716ec..8c937ca9b 100644 --- a/pkg/nodeinfo/attributes.go +++ b/pkg/nodeinfo/attributes.go @@ -37,6 +37,7 @@ const ( NodeLabelNvGPU = "nvidia.com/gpu.present" NodeLabelWaitOFED = "network.nvidia.com/operator.mofed.wait" NodeLabelCudaVersionMajor = "nvidia.com/cuda.driver.major" + NodeLabelOSTreeVersion = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION" ) // AttributeType categorizes Attributes of the host. @@ -51,6 +52,7 @@ const ( AttrTypeOSVer // optional attrs AttrTypeCudaVersionMajor + AttrTypeOSTreeVersion OptionalAttrsStart = AttrTypeCudaVersionMajor ) @@ -66,6 +68,8 @@ var attrToLabel = []string{ NodeLabelOSVer, // AttrTypeCudaVersionMajor NodeLabelCudaVersionMajor, + // AttrTypeOSTreeVersion + NodeLabelOSTreeVersion, } // NodeAttributes provides attributes of a specific node diff --git a/pkg/state/state_ofed.go b/pkg/state/state_ofed.go index 59919e8e0..2c346dbd3 100644 --- a/pkg/state/state_ofed.go +++ b/pkg/state/state_ofed.go @@ -28,6 +28,7 @@ import ( "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" "github.com/go-logr/logr" osconfigv1 "github.com/openshift/api/config/v1" + apiimagev1 "github.com/openshift/api/image/v1" "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -156,6 +157,8 @@ type ofedRuntimeSpec struct { // is true if cluster type is Openshift IsOpenshift bool ContainerResources ContainerResourcesMap + UseDtk bool + DtkImageName string } type ofedManifestRenderData struct { @@ -410,6 +413,19 @@ func (s *stateOFED) GetManifestObjects( } nodeAttr := attrs[0].Attributes + useDtk := clusterInfo.IsOpenshift() && cr.Spec.OFEDDriver.UseOCPDriverToolkit + var dtkImageName string + if useDtk { + if err := s.checkAttributesExist(attrs[0], nodeinfo.AttrTypeOSTreeVersion); err != nil { + return nil, err + } + dtk, err := s.getOCPDriverToolkitImage(ctx, nodeAttr[nodeinfo.AttrTypeOSTreeVersion]) + if err != nil { + return nil, fmt.Errorf("failed to get OpenShift DTK image : %v", err) + } + dtkImageName = dtk + } + setProbesDefaults(cr) // Update MOFED Env variables with defaults for the cluster @@ -441,6 +457,8 @@ func (s *stateOFED) GetManifestObjects( config.FromEnv().State.OFEDState.InitContainerImage), IsOpenshift: clusterInfo.IsOpenshift(), ContainerResources: createContainerResourcesMap(cr.Spec.OFEDDriver.ContainerResources), + UseDtk: useDtk, + DtkImageName: dtkImageName, }, Tolerations: cr.Spec.Tolerations, NodeAffinity: cr.Spec.NodeAffinity, @@ -735,3 +753,27 @@ func (s *stateOFED) handleRepoConfig( } return nil } + +// getOCPDriverToolkitImage gets the DTK ImageStream and return the DTK image according to RHCOS version +func (s *stateOFED) getOCPDriverToolkitImage(ctx context.Context, rhcosVersion string) (string, error) { + reqLogger := log.FromContext(ctx) + dtkImageStream := &apiimagev1.ImageStream{} + name := "driver-toolkit" + namespace := "openshift" + err := s.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, dtkImageStream) + if err != nil { + reqLogger.Error(err, "Couldn't get the driver-toolkit imagestream") + return "", err + } + rhcosDriverToolkitImages := make(map[string]string) + reqLogger.Info("ocpDriverToolkitImages: driver-toolkit imagestream found") + for _, tag := range dtkImageStream.Spec.Tags { + rhcosDriverToolkitImages[tag.Name] = tag.From.Name + } + + image, ok := rhcosDriverToolkitImages[rhcosVersion] + if !ok { + return "", fmt.Errorf("failed to find DTK image for RHCOS version: %v", rhcosVersion) + } + return image, nil +} diff --git a/pkg/state/state_ofed_test.go b/pkg/state/state_ofed_test.go index bff66e865..109572b4f 100644 --- a/pkg/state/state_ofed_test.go +++ b/pkg/state/state_ofed_test.go @@ -26,11 +26,14 @@ import ( . "github.com/onsi/gomega" osconfigv1 "github.com/openshift/api/config/v1" + apiimagev1 "github.com/openshift/api/image/v1" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" "github.com/Mellanox/network-operator/api/v1alpha1" + "github.com/Mellanox/network-operator/pkg/clustertype" "github.com/Mellanox/network-operator/pkg/nodeinfo" "github.com/Mellanox/network-operator/pkg/render" "github.com/Mellanox/network-operator/pkg/testing/mocks" @@ -45,8 +48,24 @@ const ( testNicPolicyNoProxy = "no-proxy-policy" osName = "ubuntu" osVer = "22.04" + rhcosOsTree = "414.92.202311061957-0" ) +type openShiftClusterProvider struct { +} + +func (d *openShiftClusterProvider) GetClusterType() clustertype.Type { + return clustertype.Openshift +} + +func (d *openShiftClusterProvider) IsKubernetes() bool { + return false +} + +func (d *openShiftClusterProvider) IsOpenshift() bool { + return true +} + var _ = Describe("MOFED state test", func() { var stateOfed stateOFED var ctx context.Context @@ -255,7 +274,7 @@ var _ = Describe("MOFED state test", func() { })) objs, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) Expect(err).NotTo(HaveOccurred()) - // Expect 4 objects: DaemonSet, Service Account, Role, RoleBinding + // Expect 4 objects: DaemonSet, Service Account, ClusterRole, ClusterRoleBinding Expect(len(objs)).To(Equal(4)) By("Verify DaemonSet") for _, obj := range objs { @@ -271,6 +290,89 @@ var _ = Describe("MOFED state test", func() { } }) }) + Context("Render Manifests DTK", func() { + It("Should Render DaemonSet with DTK", func() { + dtkImageName := "quay.io/openshift-release-dev/ocp-v4.0-art-dev:414" + dtkImageStream := &apiimagev1.ImageStream{ + TypeMeta: metav1.TypeMeta{ + Kind: "ImageStream", + APIVersion: "image/v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "driver-toolkit", + Namespace: "openshift", + }, + Spec: apiimagev1.ImageStreamSpec{ + Tags: []apiimagev1.TagReference{ + { + Name: rhcosOsTree, + From: &v1.ObjectReference{ + Kind: "DockerImage", + Name: dtkImageName, + }, + }, + }, + }, + } + scheme := runtime.NewScheme() + Expect(apiimagev1.AddToScheme(scheme)).NotTo(HaveOccurred()) + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(dtkImageStream).Build() + manifestBaseDir := "../../manifests/state-ofed-driver" + + files, err := utils.GetFilesWithSuffix(manifestBaseDir, render.ManifestFileSuffix...) + Expect(err).NotTo(HaveOccurred()) + renderer := render.NewRenderer(files) + + ofedState := stateOFED{ + stateSkel: stateSkel{ + name: stateOFEDName, + description: stateOFEDDescription, + client: client, + scheme: scheme, + renderer: renderer, + }, + } + cr := &v1alpha1.NicClusterPolicy{} + cr.Name = "nic-cluster-policy" + cr.Spec.OFEDDriver = &v1alpha1.OFEDDriverSpec{ + UseOCPDriverToolkit: true, + ImageSpec: v1alpha1.ImageSpec{ + Image: "mofed", + Repository: "nvcr.io/mellanox", + Version: "23.10-0.5.5.0", + }, + } + By("Creating NodeProvider with 1 Node with RHCOS OS TREE label") + node := getNode("node1") + node.Labels[nodeinfo.NodeLabelOSTreeVersion] = rhcosOsTree + infoProvider := nodeinfo.NewProvider([]*v1.Node{ + node, + }) + catalog := NewInfoCatalog() + catalog.Add(InfoTypeClusterType, &openShiftClusterProvider{}) + catalog.Add(InfoTypeNodeInfo, infoProvider) + objs, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) + Expect(err).NotTo(HaveOccurred()) + // Expect 6 object due to OpenShift: DaemonSet, Service Account, ClusterRole, ClusterRoleBinding + // Role, RoleBinding + Expect(len(objs)).To(Equal(6)) + By("Verify DaemonSet with DTK") + for _, obj := range objs { + if obj.GetKind() != "DaemonSet" { + continue + } + ds := appsv1.DaemonSet{} + err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, &ds) + Expect(err).NotTo(HaveOccurred()) + By("Verify DaemonSet NodeSelector") + verifyDSNodeSelector(ds.Spec.Template.Spec.NodeSelector) + By("Verify DTK container image") + Expect(len(ds.Spec.Template.Spec.Containers)).To(Equal(2)) + dtkContainer := ds.Spec.Template.Spec.Containers[1] + Expect(dtkContainer.Image).To(Equal(dtkImageName)) + } + }) + }) }) func verifyPodAntiInfinity(affinity *v1.Affinity) {