Skip to content

Commit

Permalink
feat: Support OFED with DTK
Browse files Browse the repository at this point in the history
In Openshift, in order to OFED containter to be able
to download and compile the needed Kernel files, it is
required to install a cluster-wide entitlement.

This requirement is not user friendly.

In order to avoid this, a container image with the needed files
is available in Openshift distributions.
This image is called DriverToolKit aka DTK.

By using this container as a side-car to MOFED container,
the modules can be compiled without entitlement.

Changes:
API:
 - DTK is 'true' by default, and can be changed by env
   variable in the Operator Deployment

OFED state:
 - In case of OCP and 'useOcpDriverToolkit' is true,
   find DTK image based on NFD label of node.
 - If available, add to MOFED DS a DTK container,
   change entrypoint logic.

Signed-off-by: Fred Rolland <frolland@nvidia.com>
  • Loading branch information
rollandf committed Dec 25, 2023
1 parent 49e9bea commit e8e8f6c
Show file tree
Hide file tree
Showing 11 changed files with 227 additions and 3 deletions.
2 changes: 2 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ spec:
fieldPath: metadata.namespace
- name: ENABLE_WEBHOOKS
value: "false"
- name: USE_DTK
value: "true"
securityContext:
allowPrivilegeEscalation: false
livenessProbe:
Expand Down
8 changes: 8 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@ rules:
- create
- patch
- update
- apiGroups:
- image.openshift.io
resources:
- imagestreams
verbs:
- get
- list
- watch
- apiGroups:
- k8s.cni.cncf.io
resources:
Expand Down
1 change: 1 addition & 0 deletions controllers/nicclusterpolicy_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ type NicClusterPolicyReconciler struct {
// +kubebuilder:rbac:groups=nv-ipam.nvidia.com,resources=ippools/status,verbs=get;update;patch;
// +kubebuilder:rbac:groups=cert-manager.io,resources=issuers;certificates,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=admissionregistration.k8s.io,resources=validatingwebhookconfigurations,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=image.openshift.io,resources=imagestreams,verbs=get;list;watch

// Reconcile is part of the main kubernetes reconciliation loop which aims to
// move the current state of the cluster closer to the desired state.
Expand Down
2 changes: 2 additions & 0 deletions deployment/network-operator/templates/operator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ spec:
value: "network-operator"
- name: ENABLE_WEBHOOKS
value: "{{ .Values.operator.admissionController.enabled }}"
- name: USE_DTK
value: "true"
{{- if .Values.operator.cniBinDirectory }}
- name: CNI_BIN_DIR
value: "{{ .Values.operator.cniBinDirectory }}"
Expand Down
8 changes: 8 additions & 0 deletions deployment/network-operator/templates/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ rules:
- create
- patch
- update
- apiGroups:
- image.openshift.io
resources:
- imagestreams
verbs:
- get
- list
- watch
- apiGroups:
- k8s.cni.cncf.io
resources:
Expand Down
9 changes: 8 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"github.com/NVIDIA/k8s-operator-libs/pkg/upgrade"
netattdefv1 "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/apis/k8s.cni.cncf.io/v1"
osconfigv1 "github.com/openshift/api/config/v1"
imagev1 "github.com/openshift/api/image/v1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
Expand Down Expand Up @@ -59,6 +60,7 @@ func init() {
utilruntime.Must(mellanoxcomv1alpha1.AddToScheme(scheme))
utilruntime.Must(netattdefv1.AddToScheme(scheme))
utilruntime.Must(osconfigv1.AddToScheme(scheme))
utilruntime.Must(imagev1.AddToScheme(scheme))
// +kubebuilder:scaffold:scheme
}

Expand All @@ -84,7 +86,12 @@ func setupCRDControllers(ctx context.Context, c client.Client, mgr ctrl.Manager)
clusterTypeProvider, err := clustertype.NewProvider(ctx, c)

cniBinDir := os.Getenv("CNI_BIN_DIR")
staticInfoProvider := staticconfig.NewProvider(staticconfig.StaticConfig{CniBinDirectory: cniBinDir})
useDTK := true
if os.Getenv("USE_DTK") == "false" {
useDTK = false
}
staticInfoProvider := staticconfig.NewProvider(
staticconfig.StaticConfig{CniBinDirectory: cniBinDir, UseOcpDriverToolkit: useDTK})

if err != nil {
setupLog.Error(err, "unable to create cluster type provider")
Expand Down
39 changes: 39 additions & 0 deletions manifests/state-ofed-driver/0050_ofed-driver-ds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,10 @@ spec:
- image: {{ .RuntimeSpec.MOFEDImageName }}
imagePullPolicy: IfNotPresent
name: mofed-container
{{- if .RuntimeSpec.UseDtk }}
command: ["ocp_dtk_entrypoint"]
args: ["nv-fs-ctr-run-with-dtk"]
{{- end }}
securityContext:
privileged: true
seLinuxOptions:
Expand Down Expand Up @@ -112,6 +116,10 @@ spec:
readOnly: {{ .ReadOnly }}
{{- end }}
{{- end }}
{{- if .RuntimeSpec.UseDtk }}
- name: shared-doca-driver-toolkit
mountPath: /mnt/shared-doca-driver-toolkit
{{- end}}
{{- with index .RuntimeSpec.ContainerResources "mofed-container" }}
resources:
{{- if .Requests }}
Expand Down Expand Up @@ -146,6 +154,33 @@ spec:
initialDelaySeconds: {{ .CrSpec.ReadinessProbe.InitialDelaySeconds }}
failureThreshold: 1
periodSeconds: {{ .CrSpec.ReadinessProbe.PeriodSeconds }}
{{- if .RuntimeSpec.UseDtk }}
- image: {{ .RuntimeSpec.DtkImageName }}
imagePullPolicy: IfNotPresent
name: openshift-driver-toolkit-ctr
command: [bash, -xc]
args: ["until [ -f /mnt/shared-doca-driver-toolkit/dir_prepared ]; do echo Waiting for doca-driver container to prepare the shared directory ...; sleep 10; done; exec /mnt/shared-doca-driver-toolkit/ocp_dtk_entrypoint dtk-build-driver"]
{{- if .CrSpec.Env }}
env:
{{- range .CrSpec.Env }}
{{ . | yaml | nindentPrefix 14 "- " }}
{{- end }}
{{- end }}
volumeMounts:
- name: shared-doca-driver-toolkit
mountPath: /mnt/shared-doca-driver-toolkit
{{- with index .RuntimeSpec.ContainerResources "openshift-driver-toolkit-ctr" }}
resources:
{{- if .Requests }}
requests:
{{ .Requests | yaml | nindent 14}}
{{- end }}
{{- if .Limits }}
limits:
{{ .Limits | yaml | nindent 14}}
{{- end }}
{{- end }}
{{- end }}
# unloading OFED modules can take more time than default terminationGracePeriod (30 sec)
terminationGracePeriodSeconds: {{ .CrSpec.TerminationGracePeriodSeconds }}
volumes:
Expand Down Expand Up @@ -176,6 +211,10 @@ spec:
{{ . | yaml | nindentPrefix 14 "- " }}
{{- end }}
{{- end }}
{{- if .RuntimeSpec.UseDtk }}
- name: shared-doca-driver-toolkit
emptyDir: {}
{{- end }}
nodeSelector:
feature.node.kubernetes.io/pci-15b3.present: "true"
feature.node.kubernetes.io/system-os_release.ID: {{ .RuntimeSpec.OSName }}
Expand Down
4 changes: 4 additions & 0 deletions pkg/nodeinfo/attributes.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const (
NodeLabelNvGPU = "nvidia.com/gpu.present"
NodeLabelWaitOFED = "network.nvidia.com/operator.mofed.wait"
NodeLabelCudaVersionMajor = "nvidia.com/cuda.driver.major"
NodeLabelOSTreeVersion = "feature.node.kubernetes.io/system-os_release.OSTREE_VERSION"
)

// AttributeType categorizes Attributes of the host.
Expand All @@ -51,6 +52,7 @@ const (
AttrTypeOSVer
// optional attrs
AttrTypeCudaVersionMajor
AttrTypeOSTreeVersion

OptionalAttrsStart = AttrTypeCudaVersionMajor
)
Expand All @@ -66,6 +68,8 @@ var attrToLabel = []string{
NodeLabelOSVer,
// AttrTypeCudaVersionMajor
NodeLabelCudaVersionMajor,
// AttrTypeOSTreeVersion
NodeLabelOSTreeVersion,
}

// NodeAttributes provides attributes of a specific node
Expand Down
46 changes: 46 additions & 0 deletions pkg/state/state_ofed.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/NVIDIA/k8s-operator-libs/pkg/upgrade"
"github.com/go-logr/logr"
osconfigv1 "github.com/openshift/api/config/v1"
apiimagev1 "github.com/openshift/api/image/v1"
"github.com/pkg/errors"
appsv1 "k8s.io/api/apps/v1"
v1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -156,6 +157,8 @@ type ofedRuntimeSpec struct {
// is true if cluster type is Openshift
IsOpenshift bool
ContainerResources ContainerResourcesMap
UseDtk bool
DtkImageName string
}

type ofedManifestRenderData struct {
Expand Down Expand Up @@ -394,6 +397,10 @@ func (s *stateOFED) GetManifestObjects(
if clusterInfo == nil {
return nil, errors.New("clusterInfo provider required")
}
staticConfig := catalog.GetStaticConfigProvider()
if staticConfig == nil {
return nil, errors.New("staticConfig provider required")
}
attrs := nodeInfo.GetNodesAttributes(
nodeinfo.NewNodeLabelFilterBuilder().WithLabel(nodeinfo.NodeLabelMlnxNIC, "true").Build())
if len(attrs) == 0 {
Expand All @@ -410,6 +417,19 @@ func (s *stateOFED) GetManifestObjects(
}
nodeAttr := attrs[0].Attributes

useDtk := clusterInfo.IsOpenshift() && staticConfig.GetStaticConfig().UseOcpDriverToolkit
var dtkImageName string
if useDtk {
if err := s.checkAttributesExist(attrs[0], nodeinfo.AttrTypeOSTreeVersion); err != nil {
return nil, err
}
dtk, err := s.getOCPDriverToolkitImage(ctx, nodeAttr[nodeinfo.AttrTypeOSTreeVersion])
if err != nil {
return nil, fmt.Errorf("failed to get OpenShift DTK image : %v", err)
}
dtkImageName = dtk
}

setProbesDefaults(cr)

// Update MOFED Env variables with defaults for the cluster
Expand Down Expand Up @@ -441,6 +461,8 @@ func (s *stateOFED) GetManifestObjects(
config.FromEnv().State.OFEDState.InitContainerImage),
IsOpenshift: clusterInfo.IsOpenshift(),
ContainerResources: createContainerResourcesMap(cr.Spec.OFEDDriver.ContainerResources),
UseDtk: useDtk,
DtkImageName: dtkImageName,
},
Tolerations: cr.Spec.Tolerations,
NodeAffinity: cr.Spec.NodeAffinity,
Expand Down Expand Up @@ -735,3 +757,27 @@ func (s *stateOFED) handleRepoConfig(
}
return nil
}

// getOCPDriverToolkitImage gets the DTK ImageStream and return the DTK image according to OSTREE version
func (s *stateOFED) getOCPDriverToolkitImage(ctx context.Context, ostreeVersion string) (string, error) {
reqLogger := log.FromContext(ctx)
dtkImageStream := &apiimagev1.ImageStream{}
name := "driver-toolkit"
namespace := "openshift"
err := s.client.Get(ctx, types.NamespacedName{Namespace: namespace, Name: name}, dtkImageStream)
if err != nil {
reqLogger.Error(err, "Couldn't get the driver-toolkit imagestream")
return "", err
}
rhcosDriverToolkitImages := make(map[string]string)
reqLogger.Info("ocpDriverToolkitImages: driver-toolkit imagestream found")
for _, tag := range dtkImageStream.Spec.Tags {
rhcosDriverToolkitImages[tag.Name] = tag.From.Name
}

image, ok := rhcosDriverToolkitImages[ostreeVersion]
if !ok {
return "", fmt.Errorf("failed to find DTK image for RHCOS version: %v", ostreeVersion)
}
return image, nil
}
Loading

0 comments on commit e8e8f6c

Please sign in to comment.