Skip to content

Commit

Permalink
feat: Support OFED with DTK
Browse files Browse the repository at this point in the history
In Openshift, in order to OFED containter to be able
to download and compile the needed Kernel files, it is
required to install a cluster-wide entitlement.

This requirement is not user friendly.

In order to avoid this, a container image with the needed files
is available in Openshift distributions.
This image is called DriverToolKit aka DTK.

By using this container as a side-car to MOFED container,
the modules can be compiled without entitlement.

Changes:
API:
 - NicClusterPolicy CRD: add bool 'useOcpDriverToolkit'
   under 'ofedDriver'. Default is 'true'
 - Helm : add support to 'useOcpDriverToolkit'

OFED state:
 - In case of OCP and 'useOcpDriverToolkit' is true,
   find DTK image based on NFD label of node.
 - If available, add to MOFED DS a DTK container,
   change entrypoint logic.

Signed-off-by: Fred Rolland <frolland@nvidia.com>
  • Loading branch information
rollandf committed Dec 25, 2023
1 parent 49e9bea commit a274a04
Show file tree
Hide file tree
Showing 15 changed files with 222 additions and 1 deletion.
4 changes: 4 additions & 0 deletions api/v1alpha1/nicclusterpolicy_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ type OFEDDriverSpec struct {
// +kubebuilder:default:=300
// +kubebuilder:validation:Minimum:=0
TerminationGracePeriodSeconds int64 `json:"terminationGracePeriodSeconds,omitempty"`
// UseOCPDriverToolkit indicates if DriverToolkit image should be used on OpenShift to build and install driver modules
// +kubebuilder:default:=true
// +optional
UseOCPDriverToolkit bool `json:"useOcpDriverToolkit"`
}

// DriverUpgradePolicySpec describes policy configuration for automatic upgrades
Expand Down
5 changes: 5 additions & 0 deletions config/crd/bases/mellanox.com_nicclusterpolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,11 @@ spec:
type: integer
type: object
type: object
useOcpDriverToolkit:
default: true
description: UseOCPDriverToolkit indicates if DriverToolkit image
should be used on OpenShift to build and install driver modules
type: boolean
version:
pattern: '[a-zA-Z0-9\.-]+'
type: string
Expand Down
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@ rules:
- create
- patch
- update
- apiGroups:
- image.openshift.io
resources:
- imagestreams
verbs:
- get
- list
- watch
- apiGroups:
- k8s.cni.cncf.io
resources:
Expand Down
1 change: 1 addition & 0 deletions controllers/nicclusterpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ type NicClusterPolicyReconciler struct {
// +kubebuilder:rbac:groups=nv-ipam.nvidia.com,resources=ippools/status,verbs=get;update;patch;
// +kubebuilder:rbac:groups=cert-manager.io,resources=issuers;certificates,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=validatingwebhookconfigurations,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=image.openshift.io,resources=imagestreams,verbs=get;list;watch

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
Expand Down
1 change: 1 addition & 0 deletions deployment/network-operator/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,7 @@ containerResources:
| `ofedDriver.upgradePolicy.waitForCompletion.podSelector` | string | not set | specifies a label selector for the pods to wait for completion before starting the driver upgrade |
| `ofedDriver.upgradePolicy.waitForCompletion.timeoutSeconds` | int | not set | specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite |
| `ofedDriver.containerResources` | [] | not set | Optional [resource requests and limits](#container-resources) for the `mofed-container` container |
| `ofedDriver.useOcpDriverToolkit` | bool | `true` | In OpenShift, use Driver Toolkit image to compile OFED drivers |

#### RDMA Device Plugin

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -719,6 +719,11 @@ spec:
type: integer
type: object
type: object
useOcpDriverToolkit:
default: true
description: UseOCPDriverToolkit indicates if DriverToolkit image
should be used on OpenShift to build and install driver modules
type: boolean
version:
pattern: '[a-zA-Z0-9\.-]+'
type: string
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ spec:
image: {{ .Values.ofedDriver.image }}
repository: {{ .Values.ofedDriver.repository }}
version: {{ .Values.ofedDriver.version }}
useOcpDriverToolkit: {{ .Values.ofedDriver.useOcpDriverToolkit }}
{{- if .Values.ofedDriver.env }}
env:
{{ toYaml .Values.ofedDriver.env | nindent 6 }}
Expand Down
8 changes: 8 additions & 0 deletions deployment/network-operator/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ rules:
- create
- patch
- update
- apiGroups:
- image.openshift.io
resources:
- imagestreams
verbs:
- get
- list
- watch
- apiGroups:
- k8s.cni.cncf.io
resources:
Expand Down
1 change: 1 addition & 0 deletions deployment/network-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ ofedDriver:
# podSelector: "app=myapp"
# specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite
# timeoutSeconds: 300
useOcpDriverToolkit: true

rdmaSharedDevicePlugin:
deploy: true
Expand Down
1 change: 1 addition & 0 deletions hack/templates/values/values.template
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ ofedDriver:
# podSelector: "app=myapp"
# specify the length of time in seconds to wait before giving up for workload to finish, zero means infinite
# timeoutSeconds: 300
useOcpDriverToolkit: true

rdmaSharedDevicePlugin:
deploy: true
Expand Down
2 changes: 2 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/NVIDIA/k8s-operator-libs/pkg/upgrade"
netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
osconfigv1 "github.com/openshift/api/config/v1"
imagev1 "github.com/openshift/api/image/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
Expand Down Expand Up @@ -59,6 +60,7 @@ func init() {
utilruntime.Must(mellanoxcomv1alpha1.AddToScheme(scheme))
utilruntime.Must(netattdefv1.AddToScheme(scheme))
utilruntime.Must(osconfigv1.AddToScheme(scheme))
utilruntime.Must(imagev1.AddToScheme(scheme))
// +kubebuilder:scaffold:scheme
}

Expand Down
36 changes: 36 additions & 0 deletions manifests/state-ofed-driver/0050_ofed-driver-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ spec:
- image: {{ .RuntimeSpec.MOFEDImageName }}
imagePullPolicy: IfNotPresent
name: mofed-container
{{- if .RuntimeSpec.UseDtk }}
command: ["ocp_dtk_entrypoint"]
args: ["nv-fs-ctr-run-with-dtk"]
{{- end }}
securityContext:
privileged: true
seLinuxOptions:
Expand Down Expand Up @@ -112,6 +116,10 @@ spec:
readOnly: {{ .ReadOnly }}
{{- end }}
{{- end }}
{{- if .RuntimeSpec.UseDtk }}
- name: shared-doca-driver-toolkit
mountPath: /mnt/shared-doca-driver-toolkit
{{- end}}
{{- with index .RuntimeSpec.ContainerResources "mofed-container" }}
resources:
{{- if .Requests }}
Expand Down Expand Up @@ -146,6 +154,30 @@ spec:
initialDelaySeconds: {{ .CrSpec.ReadinessProbe.InitialDelaySeconds }}
failureThreshold: 1
periodSeconds: {{ .CrSpec.ReadinessProbe.PeriodSeconds }}
{{- if .RuntimeSpec.UseDtk }}
- image: {{ .RuntimeSpec.DtkImageName }}
imagePullPolicy: IfNotPresent
name: openshift-driver-toolkit-ctr
command: [bash, -xc]
args: ["until [ -f /mnt/shared-doca-driver-toolkit/dir_prepared ]; do echo Waiting for doca-driver container to prepare the shared directory ...; sleep 10; done; exec /mnt/shared-doca-driver-toolkit/ocp_dtk_entrypoint dtk-build-driver"]
{{- if .CrSpec.Env }}
env:
{{- range .CrSpec.Env }}
{{ . | yaml | nindentPrefix 14 "- " }}
{{- end }}
{{- end }}
volumeMounts:
{{- if.AdditionalVolumeMounts.VolumeMounts }}
{{- range .AdditionalVolumeMounts.VolumeMounts }}
- name: {{ .Name }}
mountPath: {{ .MountPath }}
subPath: {{ .SubPath }}
readOnly: {{ .ReadOnly }}
{{- end }}
{{- end }}
- name: shared-doca-driver-toolkit
mountPath: /mnt/shared-doca-driver-toolkit
{{- end }}
# unloading OFED modules can take more time than default terminationGracePeriod (30 sec)
terminationGracePeriodSeconds: {{ .CrSpec.TerminationGracePeriodSeconds }}
volumes:
Expand Down Expand Up @@ -176,6 +208,10 @@ spec:
{{ . | yaml | nindentPrefix 14 "- " }}
{{- end }}
{{- end }}
{{- if .RuntimeSpec.UseDtk }}
- name: shared-doca-driver-toolkit
emptyDir: {}
{{- end }}
nodeSelector:
feature.node.kubernetes.io/pci-15b3.present: "true"
feature.node.kubernetes.io/system-os_release.ID: {{ .RuntimeSpec.OSName }}
Expand Down
4 changes: 4 additions & 0 deletions pkg/nodeinfo/attributes.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const (
NodeLabelNvGPU = "nvidia.com/gpu.present"
NodeLabelWaitOFED = "network.nvidia.com/operator.mofed.wait"
NodeLabelCudaVersionMajor = "nvidia.com/cuda.driver.major"
NodeLabelOSTreeVersion = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"
)

// AttributeType categorizes Attributes of the host.
Expand All @@ -51,6 +52,7 @@ const (
AttrTypeOSVer
// optional attrs
AttrTypeCudaVersionMajor
AttrTypeOSTreeVersion

OptionalAttrsStart = AttrTypeCudaVersionMajor
)
Expand All @@ -66,6 +68,8 @@ var attrToLabel = []string{
NodeLabelOSVer,
// AttrTypeCudaVersionMajor
NodeLabelCudaVersionMajor,
// AttrTypeOSTreeVersion
NodeLabelOSTreeVersion,
}

// NodeAttributes provides attributes of a specific node
Expand Down
42 changes: 42 additions & 0 deletions pkg/state/state_ofed.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/NVIDIA/k8s-operator-libs/pkg/upgrade"
"github.com/go-logr/logr"
osconfigv1 "github.com/openshift/api/config/v1"
apiimagev1 "github.com/openshift/api/image/v1"
"github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -156,6 +157,8 @@ type ofedRuntimeSpec struct {
// is true if cluster type is Openshift
IsOpenshift bool
ContainerResources ContainerResourcesMap
UseDtk bool
DtkImageName string
}

type ofedManifestRenderData struct {
Expand Down Expand Up @@ -410,6 +413,19 @@ func (s *stateOFED) GetManifestObjects(
}
nodeAttr := attrs[0].Attributes

useDtk := clusterInfo.IsOpenshift() && cr.Spec.OFEDDriver.UseOCPDriverToolkit
var dtkImageName string
if useDtk {
if err := s.checkAttributesExist(attrs[0], nodeinfo.AttrTypeOSTreeVersion); err != nil {
return nil, err
}
dtk, err := s.getOCPDriverToolkitImage(ctx, nodeAttr[nodeinfo.AttrTypeOSTreeVersion])
if err != nil {
return nil, fmt.Errorf("failed to get OpenShift DTK image : %v", err)
}
dtkImageName = dtk
}

setProbesDefaults(cr)

// Update MOFED Env variables with defaults for the cluster
Expand Down Expand Up @@ -441,6 +457,8 @@ func (s *stateOFED) GetManifestObjects(
config.FromEnv().State.OFEDState.InitContainerImage),
IsOpenshift: clusterInfo.IsOpenshift(),
ContainerResources: createContainerResourcesMap(cr.Spec.OFEDDriver.ContainerResources),
UseDtk: useDtk,
DtkImageName: dtkImageName,
},
Tolerations: cr.Spec.Tolerations,
NodeAffinity: cr.Spec.NodeAffinity,
Expand Down Expand Up @@ -735,3 +753,27 @@ func (s *stateOFED) handleRepoConfig(
}
return nil
}

// getOCPDriverToolkitImage gets the DTK ImageStream and return the DTK image according to RHCOS version
func (s *stateOFED) getOCPDriverToolkitImage(ctx context.Context, rhcosVersion string) (string, error) {
reqLogger := log.FromContext(ctx)
dtkImageStream := &apiimagev1.ImageStream{}
name := "driver-toolkit"
namespace := "openshift"
err := s.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, dtkImageStream)
if err != nil {
reqLogger.Error(err, "Couldn't get the driver-toolkit imagestream")
return "", err
}
rhcosDriverToolkitImages := make(map[string]string)
reqLogger.Info("ocpDriverToolkitImages: driver-toolkit imagestream found")
for _, tag := range dtkImageStream.Spec.Tags {
rhcosDriverToolkitImages[tag.Name] = tag.From.Name
}

image, ok := rhcosDriverToolkitImages[rhcosVersion]
if !ok {
return "", fmt.Errorf("failed to find DTK image for RHCOS version: %v", rhcosVersion)
}
return image, nil
}
Loading

0 comments on commit a274a04

Please sign in to comment.