From e8e8f6c91f30ee40bf4e0e65a87eeed3b819aa66 Mon Sep 17 00:00:00 2001 From: Fred Rolland Date: Wed, 22 Nov 2023 18:12:15 +0200 Subject: [PATCH] feat: Support OFED with DTK In Openshift, in order to OFED containter to be able to download and compile the needed Kernel files, it is required to install a cluster-wide entitlement. This requirement is not user friendly. In order to avoid this, a container image with the needed files is available in Openshift distributions. This image is called DriverToolKit aka DTK. By using this container as a side-car to MOFED container, the modules can be compiled without entitlement. Changes: API: - DTK is 'true' by default, and can be changed by env variable in the Operator Deployment OFED state: - In case of OCP and 'useOcpDriverToolkit' is true, find DTK image based on NFD label of node. - If available, add to MOFED DS a DTK container, change entrypoint logic. Signed-off-by: Fred Rolland --- config/manager/manager.yaml | 2 + config/rbac/role.yaml | 8 ++ controllers/nicclusterpolicy_controller.go | 1 + .../network-operator/templates/operator.yaml | 2 + .../network-operator/templates/role.yaml | 8 ++ main.go | 9 +- .../0050_ofed-driver-ds.yaml | 39 +++++++ pkg/nodeinfo/attributes.go | 4 + pkg/state/state_ofed.go | 46 ++++++++ pkg/state/state_ofed_test.go | 108 +++++++++++++++++- pkg/staticconfig/static_config.go | 3 +- 11 files changed, 227 insertions(+), 3 deletions(-) diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 931374f40..d3c76f1f2 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -68,6 +68,8 @@ spec: fieldPath: metadata.namespace - name: ENABLE_WEBHOOKS value: "false" + - name: USE_DTK + value: "true" securityContext: allowPrivilegeEscalation: false livenessProbe: diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 90cdb476a..1dab46119 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -180,6 +180,14 @@ rules: - create - patch - update +- apiGroups: + - image.openshift.io + resources: + - imagestreams + verbs: + - get + - list + - watch - apiGroups: - k8s.cni.cncf.io resources: diff --git a/controllers/nicclusterpolicy_controller.go b/controllers/nicclusterpolicy_controller.go index dbad39fee..32bfe08d2 100644 --- a/controllers/nicclusterpolicy_controller.go +++ b/controllers/nicclusterpolicy_controller.go @@ -82,6 +82,7 @@ type NicClusterPolicyReconciler struct { // +kubebuilder:rbac:groups=nv-ipam.nvidia.com,resources=ippools/status,verbs=get;update;patch; // +kubebuilder:rbac:groups=cert-manager.io,resources=issuers;certificates,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=validatingwebhookconfigurations,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=image.openshift.io,resources=imagestreams,verbs=get;list;watch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. diff --git a/deployment/network-operator/templates/operator.yaml b/deployment/network-operator/templates/operator.yaml index 07ccb460c..81493fe11 100644 --- a/deployment/network-operator/templates/operator.yaml +++ b/deployment/network-operator/templates/operator.yaml @@ -78,6 +78,8 @@ spec: value: "network-operator" - name: ENABLE_WEBHOOKS value: "{{ .Values.operator.admissionController.enabled }}" + - name: USE_DTK + value: "true" {{- if .Values.operator.cniBinDirectory }} - name: CNI_BIN_DIR value: "{{ .Values.operator.cniBinDirectory }}" diff --git a/deployment/network-operator/templates/role.yaml b/deployment/network-operator/templates/role.yaml index 0352e5c68..b46053b00 100644 --- a/deployment/network-operator/templates/role.yaml +++ b/deployment/network-operator/templates/role.yaml @@ -195,6 +195,14 @@ rules: - create - patch - update +- apiGroups: + - image.openshift.io + resources: + - imagestreams + verbs: + - get + - list + - watch - apiGroups: - k8s.cni.cncf.io resources: diff --git a/main.go b/main.go index 3c347dee3..b7da95e3a 100644 --- a/main.go +++ b/main.go @@ -25,6 +25,7 @@ import ( "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1" osconfigv1 "github.com/openshift/api/config/v1" + imagev1 "github.com/openshift/api/image/v1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -59,6 +60,7 @@ func init() { utilruntime.Must(mellanoxcomv1alpha1.AddToScheme(scheme)) utilruntime.Must(netattdefv1.AddToScheme(scheme)) utilruntime.Must(osconfigv1.AddToScheme(scheme)) + utilruntime.Must(imagev1.AddToScheme(scheme)) // +kubebuilder:scaffold:scheme } @@ -84,7 +86,12 @@ func setupCRDControllers(ctx context.Context, c client.Client, mgr ctrl.Manager) clusterTypeProvider, err := clustertype.NewProvider(ctx, c) cniBinDir := os.Getenv("CNI_BIN_DIR") - staticInfoProvider := staticconfig.NewProvider(staticconfig.StaticConfig{CniBinDirectory: cniBinDir}) + useDTK := true + if os.Getenv("USE_DTK") == "false" { + useDTK = false + } + staticInfoProvider := staticconfig.NewProvider( + staticconfig.StaticConfig{CniBinDirectory: cniBinDir, UseOcpDriverToolkit: useDTK}) if err != nil { setupLog.Error(err, "unable to create cluster type provider") diff --git a/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml b/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml index 958b36c80..f68a8f070 100644 --- a/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml +++ b/manifests/state-ofed-driver/0050_ofed-driver-ds.yaml @@ -80,6 +80,10 @@ spec: - image: {{ .RuntimeSpec.MOFEDImageName }} imagePullPolicy: IfNotPresent name: mofed-container + {{- if .RuntimeSpec.UseDtk }} + command: ["ocp_dtk_entrypoint"] + args: ["nv-fs-ctr-run-with-dtk"] + {{- end }} securityContext: privileged: true seLinuxOptions: @@ -112,6 +116,10 @@ spec: readOnly: {{ .ReadOnly }} {{- end }} {{- end }} + {{- if .RuntimeSpec.UseDtk }} + - name: shared-doca-driver-toolkit + mountPath: /mnt/shared-doca-driver-toolkit + {{- end}} {{- with index .RuntimeSpec.ContainerResources "mofed-container" }} resources: {{- if .Requests }} @@ -146,6 +154,33 @@ spec: initialDelaySeconds: {{ .CrSpec.ReadinessProbe.InitialDelaySeconds }} failureThreshold: 1 periodSeconds: {{ .CrSpec.ReadinessProbe.PeriodSeconds }} + {{- if .RuntimeSpec.UseDtk }} + - image: {{ .RuntimeSpec.DtkImageName }} + imagePullPolicy: IfNotPresent + name: openshift-driver-toolkit-ctr + command: [bash, -xc] + args: ["until [ -f /mnt/shared-doca-driver-toolkit/dir_prepared ]; do echo Waiting for doca-driver container to prepare the shared directory ...; sleep 10; done; exec /mnt/shared-doca-driver-toolkit/ocp_dtk_entrypoint dtk-build-driver"] + {{- if .CrSpec.Env }} + env: + {{- range .CrSpec.Env }} + {{ . | yaml | nindentPrefix 14 "- " }} + {{- end }} + {{- end }} + volumeMounts: + - name: shared-doca-driver-toolkit + mountPath: /mnt/shared-doca-driver-toolkit + {{- with index .RuntimeSpec.ContainerResources "openshift-driver-toolkit-ctr" }} + resources: + {{- if .Requests }} + requests: + {{ .Requests | yaml | nindent 14}} + {{- end }} + {{- if .Limits }} + limits: + {{ .Limits | yaml | nindent 14}} + {{- end }} + {{- end }} + {{- end }} # unloading OFED modules can take more time than default terminationGracePeriod (30 sec) terminationGracePeriodSeconds: {{ .CrSpec.TerminationGracePeriodSeconds }} volumes: @@ -176,6 +211,10 @@ spec: {{ . | yaml | nindentPrefix 14 "- " }} {{- end }} {{- end }} + {{- if .RuntimeSpec.UseDtk }} + - name: shared-doca-driver-toolkit + emptyDir: {} + {{- end }} nodeSelector: feature.node.kubernetes.io/pci-15b3.present: "true" feature.node.kubernetes.io/system-os_release.ID: {{ .RuntimeSpec.OSName }} diff --git a/pkg/nodeinfo/attributes.go b/pkg/nodeinfo/attributes.go index 597a716ec..8c937ca9b 100644 --- a/pkg/nodeinfo/attributes.go +++ b/pkg/nodeinfo/attributes.go @@ -37,6 +37,7 @@ const ( NodeLabelNvGPU = "nvidia.com/gpu.present" NodeLabelWaitOFED = "network.nvidia.com/operator.mofed.wait" NodeLabelCudaVersionMajor = "nvidia.com/cuda.driver.major" + NodeLabelOSTreeVersion = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION" ) // AttributeType categorizes Attributes of the host. @@ -51,6 +52,7 @@ const ( AttrTypeOSVer // optional attrs AttrTypeCudaVersionMajor + AttrTypeOSTreeVersion OptionalAttrsStart = AttrTypeCudaVersionMajor ) @@ -66,6 +68,8 @@ var attrToLabel = []string{ NodeLabelOSVer, // AttrTypeCudaVersionMajor NodeLabelCudaVersionMajor, + // AttrTypeOSTreeVersion + NodeLabelOSTreeVersion, } // NodeAttributes provides attributes of a specific node diff --git a/pkg/state/state_ofed.go b/pkg/state/state_ofed.go index 59919e8e0..1d55e3a31 100644 --- a/pkg/state/state_ofed.go +++ b/pkg/state/state_ofed.go @@ -28,6 +28,7 @@ import ( "github.com/NVIDIA/k8s-operator-libs/pkg/upgrade" "github.com/go-logr/logr" osconfigv1 "github.com/openshift/api/config/v1" + apiimagev1 "github.com/openshift/api/image/v1" "github.com/pkg/errors" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" @@ -156,6 +157,8 @@ type ofedRuntimeSpec struct { // is true if cluster type is Openshift IsOpenshift bool ContainerResources ContainerResourcesMap + UseDtk bool + DtkImageName string } type ofedManifestRenderData struct { @@ -394,6 +397,10 @@ func (s *stateOFED) GetManifestObjects( if clusterInfo == nil { return nil, errors.New("clusterInfo provider required") } + staticConfig := catalog.GetStaticConfigProvider() + if staticConfig == nil { + return nil, errors.New("staticConfig provider required") + } attrs := nodeInfo.GetNodesAttributes( nodeinfo.NewNodeLabelFilterBuilder().WithLabel(nodeinfo.NodeLabelMlnxNIC, "true").Build()) if len(attrs) == 0 { @@ -410,6 +417,19 @@ func (s *stateOFED) GetManifestObjects( } nodeAttr := attrs[0].Attributes + useDtk := clusterInfo.IsOpenshift() && staticConfig.GetStaticConfig().UseOcpDriverToolkit + var dtkImageName string + if useDtk { + if err := s.checkAttributesExist(attrs[0], nodeinfo.AttrTypeOSTreeVersion); err != nil { + return nil, err + } + dtk, err := s.getOCPDriverToolkitImage(ctx, nodeAttr[nodeinfo.AttrTypeOSTreeVersion]) + if err != nil { + return nil, fmt.Errorf("failed to get OpenShift DTK image : %v", err) + } + dtkImageName = dtk + } + setProbesDefaults(cr) // Update MOFED Env variables with defaults for the cluster @@ -441,6 +461,8 @@ func (s *stateOFED) GetManifestObjects( config.FromEnv().State.OFEDState.InitContainerImage), IsOpenshift: clusterInfo.IsOpenshift(), ContainerResources: createContainerResourcesMap(cr.Spec.OFEDDriver.ContainerResources), + UseDtk: useDtk, + DtkImageName: dtkImageName, }, Tolerations: cr.Spec.Tolerations, NodeAffinity: cr.Spec.NodeAffinity, @@ -735,3 +757,27 @@ func (s *stateOFED) handleRepoConfig( } return nil } + +// getOCPDriverToolkitImage gets the DTK ImageStream and return the DTK image according to OSTREE version +func (s *stateOFED) getOCPDriverToolkitImage(ctx context.Context, ostreeVersion string) (string, error) { + reqLogger := log.FromContext(ctx) + dtkImageStream := &apiimagev1.ImageStream{} + name := "driver-toolkit" + namespace := "openshift" + err := s.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, dtkImageStream) + if err != nil { + reqLogger.Error(err, "Couldn't get the driver-toolkit imagestream") + return "", err + } + rhcosDriverToolkitImages := make(map[string]string) + reqLogger.Info("ocpDriverToolkitImages: driver-toolkit imagestream found") + for _, tag := range dtkImageStream.Spec.Tags { + rhcosDriverToolkitImages[tag.Name] = tag.From.Name + } + + image, ok := rhcosDriverToolkitImages[ostreeVersion] + if !ok { + return "", fmt.Errorf("failed to find DTK image for RHCOS version: %v", ostreeVersion) + } + return image, nil +} diff --git a/pkg/state/state_ofed_test.go b/pkg/state/state_ofed_test.go index bff66e865..6930c0f2b 100644 --- a/pkg/state/state_ofed_test.go +++ b/pkg/state/state_ofed_test.go @@ -26,13 +26,17 @@ import ( . "github.com/onsi/gomega" osconfigv1 "github.com/openshift/api/config/v1" + apiimagev1 "github.com/openshift/api/image/v1" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "sigs.k8s.io/controller-runtime/pkg/client/fake" "github.com/Mellanox/network-operator/api/v1alpha1" + "github.com/Mellanox/network-operator/pkg/clustertype" "github.com/Mellanox/network-operator/pkg/nodeinfo" "github.com/Mellanox/network-operator/pkg/render" + "github.com/Mellanox/network-operator/pkg/staticconfig" "github.com/Mellanox/network-operator/pkg/testing/mocks" "github.com/Mellanox/network-operator/pkg/utils" ) @@ -45,8 +49,24 @@ const ( testNicPolicyNoProxy = "no-proxy-policy" osName = "ubuntu" osVer = "22.04" + rhcosOsTree = "414.92.202311061957-0" ) +type openShiftClusterProvider struct { +} + +func (d *openShiftClusterProvider) GetClusterType() clustertype.Type { + return clustertype.Openshift +} + +func (d *openShiftClusterProvider) IsKubernetes() bool { + return false +} + +func (d *openShiftClusterProvider) IsOpenshift() bool { + return true +} + var _ = Describe("MOFED state test", func() { var stateOfed stateOFED var ctx context.Context @@ -249,13 +269,15 @@ var _ = Describe("MOFED state test", func() { } By("Creating NodeProvider with 1 Node") catalog := NewInfoCatalog() + staticInfoProvider := staticconfig.NewProvider(staticconfig.StaticConfig{UseOcpDriverToolkit: true}) + catalog.Add(InfoTypeStaticConfig, staticInfoProvider) catalog.Add(InfoTypeClusterType, &dummyProvider{}) catalog.Add(InfoTypeNodeInfo, nodeinfo.NewProvider([]*v1.Node{ getNode("node1"), })) objs, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) Expect(err).NotTo(HaveOccurred()) - // Expect 4 objects: DaemonSet, Service Account, Role, RoleBinding + // Expect 4 objects: DaemonSet, Service Account, ClusterRole, ClusterRoleBinding Expect(len(objs)).To(Equal(4)) By("Verify DaemonSet") for _, obj := range objs { @@ -271,6 +293,90 @@ var _ = Describe("MOFED state test", func() { } }) }) + Context("Render Manifests DTK", func() { + It("Should Render DaemonSet with DTK", func() { + dtkImageName := "quay.io/openshift-release-dev/ocp-v4.0-art-dev:414" + dtkImageStream := &apiimagev1.ImageStream{ + TypeMeta: metav1.TypeMeta{ + Kind: "ImageStream", + APIVersion: "image/v1", + }, + ObjectMeta: metav1.ObjectMeta{ + Name: "driver-toolkit", + Namespace: "openshift", + }, + Spec: apiimagev1.ImageStreamSpec{ + Tags: []apiimagev1.TagReference{ + { + Name: rhcosOsTree, + From: &v1.ObjectReference{ + Kind: "DockerImage", + Name: dtkImageName, + }, + }, + }, + }, + } + scheme := runtime.NewScheme() + Expect(apiimagev1.AddToScheme(scheme)).NotTo(HaveOccurred()) + client := fake.NewClientBuilder().WithScheme(scheme).WithObjects(dtkImageStream).Build() + manifestBaseDir := "../../manifests/state-ofed-driver" + + files, err := utils.GetFilesWithSuffix(manifestBaseDir, render.ManifestFileSuffix...) + Expect(err).NotTo(HaveOccurred()) + renderer := render.NewRenderer(files) + + ofedState := stateOFED{ + stateSkel: stateSkel{ + name: stateOFEDName, + description: stateOFEDDescription, + client: client, + scheme: scheme, + renderer: renderer, + }, + } + cr := &v1alpha1.NicClusterPolicy{} + cr.Name = "nic-cluster-policy" + cr.Spec.OFEDDriver = &v1alpha1.OFEDDriverSpec{ + ImageSpec: v1alpha1.ImageSpec{ + Image: "mofed", + Repository: "nvcr.io/mellanox", + Version: "23.10-0.5.5.0", + }, + } + By("Creating NodeProvider with 1 Node with RHCOS OS TREE label") + node := getNode("node1") + node.Labels[nodeinfo.NodeLabelOSTreeVersion] = rhcosOsTree + infoProvider := nodeinfo.NewProvider([]*v1.Node{ + node, + }) + catalog := NewInfoCatalog() + staticInfoProvider := staticconfig.NewProvider(staticconfig.StaticConfig{UseOcpDriverToolkit: true}) + catalog.Add(InfoTypeStaticConfig, staticInfoProvider) + catalog.Add(InfoTypeClusterType, &openShiftClusterProvider{}) + catalog.Add(InfoTypeNodeInfo, infoProvider) + objs, err := ofedState.GetManifestObjects(ctx, cr, catalog, testLogger) + Expect(err).NotTo(HaveOccurred()) + // Expect 6 object due to OpenShift: DaemonSet, Service Account, ClusterRole, ClusterRoleBinding + // Role, RoleBinding + Expect(len(objs)).To(Equal(6)) + By("Verify DaemonSet with DTK") + for _, obj := range objs { + if obj.GetKind() != "DaemonSet" { + continue + } + ds := appsv1.DaemonSet{} + err = runtime.DefaultUnstructuredConverter.FromUnstructured(obj.Object, &ds) + Expect(err).NotTo(HaveOccurred()) + By("Verify DaemonSet NodeSelector") + verifyDSNodeSelector(ds.Spec.Template.Spec.NodeSelector) + By("Verify DTK container image") + Expect(len(ds.Spec.Template.Spec.Containers)).To(Equal(2)) + dtkContainer := ds.Spec.Template.Spec.Containers[1] + Expect(dtkContainer.Image).To(Equal(dtkImageName)) + } + }) + }) }) func verifyPodAntiInfinity(affinity *v1.Affinity) { diff --git a/pkg/staticconfig/static_config.go b/pkg/staticconfig/static_config.go index a7b9211e3..44bb6e671 100644 --- a/pkg/staticconfig/static_config.go +++ b/pkg/staticconfig/static_config.go @@ -23,7 +23,8 @@ package staticconfig // StaticConfig holds static config for the operator. type StaticConfig struct { - CniBinDirectory string + CniBinDirectory string + UseOcpDriverToolkit bool } // Provider provides static cluster attributes