diff --git a/api/v1/slurmcluster_types.go b/api/v1/slurmcluster_types.go index 87cfd470..276af14e 100644 --- a/api/v1/slurmcluster_types.go +++ b/api/v1/slurmcluster_types.go @@ -83,11 +83,11 @@ type SlurmClusterSpec struct { // SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. // // +kubebuilder:validation:Optional - // +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", taskPluginParam: "Verbose", maxJobCount: 10000, minJobAge: 86400} + // +kubebuilder:default={defMemPerNode: 1228800, defCpuPerGPU: 16, completeWait: 5, debugFlags: "Cgroup,CPU_Bind,Gres,JobComp,Priority,Script,SelectType,Steps,TraceJobs", taskPluginParam: "", maxJobCount: 10000, minJobAge: 86400} SlurmConfig SlurmConfig `json:"slurmConfig,omitempty"` // Generate and set default AppArmor profile for the Slurm worker and login nodes. The Security Profiles Operator must be installed. // - // +kubebuilder:default=true + // +kubebuilder:default=false UseDefaultAppArmorProfile bool `json:"useDefaultAppArmorProfile,omitempty"` } @@ -117,8 +117,8 @@ type SlurmConfig struct { // Additional parameters for the task plugin // // +kubebuilder:validation:Optional - // +kubebuilder:default="Verbose" - // +kubebuilder:validation:Pattern="^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$" + // +kubebuilder:default="" + // +kubebuilder:validation:Pattern="^(|((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+)$" TaskPluginParam *string `json:"taskPluginParam,omitempty"` // Keep N last jobs in controller memory // @@ -237,7 +237,7 @@ type NCCLBenchmark struct { // FailedJobsHistoryLimit defines the number of failed finished jobs to retain // // +kubebuilder:validation:Optional - // +kubebuilder:default=3 + // +kubebuilder:default=16 FailedJobsHistoryLimit int32 `json:"failedJobsHistoryLimit,omitempty"` // Image defines the nccl container image @@ -894,11 +894,30 @@ type NodeVolumeJailSubMount struct { // +kubebuilder:validation:Required MountPath string `json:"mountPath"` + // SubPath points to a specific entry inside the volume. + // Corresponds to the subPath field in the K8s volumeMount structure. + // See official docs for details: https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath + // + // +kubebuilder:validation:Optional + // +kubebuilder:default="" + SubPath string `json:"subPath"` + + // ReadOnly defines whether the mount point should be read-only + // + // +kubebuilder:validation:Optional + // +kubebuilder:default=false + ReadOnly bool `json:"readOnly"` + // VolumeSourceName defines the name of the volume source for the sub-mount. // Must correspond to the name of one of [VolumeSource] // - // +kubebuilder:validation:Required - VolumeSourceName string `json:"volumeSourceName"` + // +kubebuilder:validation:Optional + VolumeSourceName *string `json:"volumeSourceName"` + + // VolumeClaimTemplateSpec defines the [corev1.PersistentVolumeClaim] template specification + // + // +kubebuilder:validation:Optional + VolumeClaimTemplateSpec *corev1.PersistentVolumeClaimSpec `json:"volumeClaimTemplateSpec,omitempty"` } type Telemetry struct { diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 0c3111db..202b17de 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -364,6 +364,16 @@ func (in *NodeVolume) DeepCopy() *NodeVolume { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *NodeVolumeJailSubMount) DeepCopyInto(out *NodeVolumeJailSubMount) { *out = *in + if in.VolumeSourceName != nil { + in, out := &in.VolumeSourceName, &out.VolumeSourceName + *out = new(string) + **out = **in + } + if in.VolumeClaimTemplateSpec != nil { + in, out := &in.VolumeClaimTemplateSpec, &out.VolumeClaimTemplateSpec + *out = new(corev1.PersistentVolumeClaimSpec) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NodeVolumeJailSubMount. @@ -820,7 +830,9 @@ func (in *SlurmNodeLoginVolumes) DeepCopyInto(out *SlurmNodeLoginVolumes) { if in.JailSubMounts != nil { in, out := &in.JailSubMounts, &out.JailSubMounts *out = make([]NodeVolumeJailSubMount, len(*in)) - copy(*out, *in) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } } @@ -861,7 +873,9 @@ func (in *SlurmNodeWorkerVolumes) DeepCopyInto(out *SlurmNodeWorkerVolumes) { if in.JailSubMounts != nil { in, out := &in.JailSubMounts, &out.JailSubMounts *out = make([]NodeVolumeJailSubMount, len(*in)) - copy(*out, *in) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } if in.SharedMemorySize != nil { in, out := &in.SharedMemorySize, &out.SharedMemorySize diff --git a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml index f4da9701..f1aa0f70 100644 --- a/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml +++ b/config/crd/bases/slurm.nebius.ai_slurmclusters.yaml @@ -1140,7 +1140,7 @@ spec: scheduled type: boolean failedJobsHistoryLimit: - default: 3 + default: 16 description: FailedJobsHistoryLimit defines the number of failed finished jobs to retain format: int32 @@ -1486,7 +1486,7 @@ spec: defMemPerNode: 1228800 maxJobCount: 10000 minJobAge: 86400 - taskPluginParam: Verbose + taskPluginParam: "" description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. properties: @@ -1525,9 +1525,9 @@ spec: format: int32 type: integer taskPluginParam: - default: Verbose + default: "" description: Additional parameters for the task plugin - pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ + pattern: ^(|((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+)$ type: string type: object slurmNodes: @@ -3661,6 +3661,221 @@ spec: name: description: Name defines the name of the sub-mount type: string + readOnly: + default: false + description: ReadOnly defines whether the mount + point should be read-only + type: boolean + subPath: + default: "" + description: |- + SubPath points to a specific entry inside the volume. + Corresponds to the subPath field in the K8s volumeMount structure. + See official docs for details: https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath + type: string + volumeClaimTemplateSpec: + description: VolumeClaimTemplateSpec defines the + [corev1.PersistentVolumeClaim] template specification + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over + volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference + to the PersistentVolume backing this claim. + type: string + type: object volumeSourceName: description: |- VolumeSourceName defines the name of the volume source for the sub-mount. @@ -3669,7 +3884,6 @@ spec: required: - mountPath - name - - volumeSourceName type: object type: array required: @@ -4098,6 +4312,221 @@ spec: name: description: Name defines the name of the sub-mount type: string + readOnly: + default: false + description: ReadOnly defines whether the mount + point should be read-only + type: boolean + subPath: + default: "" + description: |- + SubPath points to a specific entry inside the volume. + Corresponds to the subPath field in the K8s volumeMount structure. + See official docs for details: https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath + type: string + volumeClaimTemplateSpec: + description: VolumeClaimTemplateSpec defines the + [corev1.PersistentVolumeClaim] template specification + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over + volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference + to the PersistentVolume backing this claim. + type: string + type: object volumeSourceName: description: |- VolumeSourceName defines the name of the volume source for the sub-mount. @@ -4106,7 +4535,6 @@ spec: required: - mountPath - name - - volumeSourceName type: object type: array sharedMemorySize: @@ -4416,7 +4844,7 @@ spec: type: object type: object useDefaultAppArmorProfile: - default: true + default: false description: Generate and set default AppArmor profile for the Slurm worker and login nodes. The Security Profiles Operator must be installed. type: boolean diff --git a/helm/slurm-cluster/values.yaml b/helm/slurm-cluster/values.yaml index 298065fb..d39d4bb8 100644 --- a/helm/slurm-cluster/values.yaml +++ b/helm/slurm-cluster/values.yaml @@ -2,7 +2,7 @@ clusterName: "slurm1" # Additional annotations for the cluster annotations: {} # Add appArmor profile to the cluster -useDefaultAppArmorProfile: true +useDefaultAppArmorProfile: false # Maintenance defines the maintenance window for the cluster. # It can have the following values: # - none: No maintenance is performed. The cluster operates normally. @@ -98,7 +98,7 @@ periodicChecks: # CronJob timeout in seconds. By default, equals to 30 min activeDeadlineSeconds: 1800 # Number of successful finished jobs to retain - successfulJobsHistoryLimit: 3 + successfulJobsHistoryLimit: 24 # Number of failed finished jobs to retain failedJobsHistoryLimit: 3 # NCCL test settings @@ -317,6 +317,8 @@ slurmNodes: # jailSubMounts: # - name: "mlcommons-sd-bench-data" # mountPath: "/mlperf-sd" + # subPath: "" + # readOnly: false # volumeSourceName: "mlperf-sd" login: size: 2 diff --git a/helm/soperator-crds/templates/slurmcluster-crd.yaml b/helm/soperator-crds/templates/slurmcluster-crd.yaml index 4ff50f1d..c44e2779 100644 --- a/helm/soperator-crds/templates/slurmcluster-crd.yaml +++ b/helm/soperator-crds/templates/slurmcluster-crd.yaml @@ -1139,7 +1139,7 @@ spec: scheduled type: boolean failedJobsHistoryLimit: - default: 3 + default: 16 description: FailedJobsHistoryLimit defines the number of failed finished jobs to retain format: int32 @@ -1485,7 +1485,7 @@ spec: defMemPerNode: 1228800 maxJobCount: 10000 minJobAge: 86400 - taskPluginParam: Verbose + taskPluginParam: "" description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. properties: @@ -1524,9 +1524,9 @@ spec: format: int32 type: integer taskPluginParam: - default: Verbose + default: "" description: Additional parameters for the task plugin - pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ + pattern: ^(|((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+)$ type: string type: object slurmNodes: @@ -3660,6 +3660,221 @@ spec: name: description: Name defines the name of the sub-mount type: string + readOnly: + default: false + description: ReadOnly defines whether the mount + point should be read-only + type: boolean + subPath: + default: "" + description: |- + SubPath points to a specific entry inside the volume. + Corresponds to the subPath field in the K8s volumeMount structure. + See official docs for details: https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath + type: string + volumeClaimTemplateSpec: + description: VolumeClaimTemplateSpec defines the + [corev1.PersistentVolumeClaim] template specification + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over + volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference + to the PersistentVolume backing this claim. + type: string + type: object volumeSourceName: description: |- VolumeSourceName defines the name of the volume source for the sub-mount. @@ -3668,7 +3883,6 @@ spec: required: - mountPath - name - - volumeSourceName type: object type: array required: @@ -4097,6 +4311,221 @@ spec: name: description: Name defines the name of the sub-mount type: string + readOnly: + default: false + description: ReadOnly defines whether the mount + point should be read-only + type: boolean + subPath: + default: "" + description: |- + SubPath points to a specific entry inside the volume. + Corresponds to the subPath field in the K8s volumeMount structure. + See official docs for details: https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath + type: string + volumeClaimTemplateSpec: + description: VolumeClaimTemplateSpec defines the + [corev1.PersistentVolumeClaim] template specification + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over + volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference + to the PersistentVolume backing this claim. + type: string + type: object volumeSourceName: description: |- VolumeSourceName defines the name of the volume source for the sub-mount. @@ -4105,7 +4534,6 @@ spec: required: - mountPath - name - - volumeSourceName type: object type: array sharedMemorySize: @@ -4415,7 +4843,7 @@ spec: type: object type: object useDefaultAppArmorProfile: - default: true + default: false description: Generate and set default AppArmor profile for the Slurm worker and login nodes. The Security Profiles Operator must be installed. type: boolean diff --git a/helm/soperator/crds/slurmcluster-crd.yaml b/helm/soperator/crds/slurmcluster-crd.yaml index 4ff50f1d..c44e2779 100644 --- a/helm/soperator/crds/slurmcluster-crd.yaml +++ b/helm/soperator/crds/slurmcluster-crd.yaml @@ -1139,7 +1139,7 @@ spec: scheduled type: boolean failedJobsHistoryLimit: - default: 3 + default: 16 description: FailedJobsHistoryLimit defines the number of failed finished jobs to retain format: int32 @@ -1485,7 +1485,7 @@ spec: defMemPerNode: 1228800 maxJobCount: 10000 minJobAge: 86400 - taskPluginParam: Verbose + taskPluginParam: "" description: SlurmConfig represents the Slurm configuration in slurm.conf. Not all options are supported. properties: @@ -1524,9 +1524,9 @@ spec: format: int32 type: integer taskPluginParam: - default: Verbose + default: "" description: Additional parameters for the task plugin - pattern: ^((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+$ + pattern: ^(|((None|Cores|Sockets|Threads|SlurmdOffSpec|OOMKillStep|Verbose|Autobind)(,)?)+)$ type: string type: object slurmNodes: @@ -3660,6 +3660,221 @@ spec: name: description: Name defines the name of the sub-mount type: string + readOnly: + default: false + description: ReadOnly defines whether the mount + point should be read-only + type: boolean + subPath: + default: "" + description: |- + SubPath points to a specific entry inside the volume. + Corresponds to the subPath field in the K8s volumeMount structure. + See official docs for details: https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath + type: string + volumeClaimTemplateSpec: + description: VolumeClaimTemplateSpec defines the + [corev1.PersistentVolumeClaim] template specification + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over + volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference + to the PersistentVolume backing this claim. + type: string + type: object volumeSourceName: description: |- VolumeSourceName defines the name of the volume source for the sub-mount. @@ -3668,7 +3883,6 @@ spec: required: - mountPath - name - - volumeSourceName type: object type: array required: @@ -4097,6 +4311,221 @@ spec: name: description: Name defines the name of the sub-mount type: string + readOnly: + default: false + description: ReadOnly defines whether the mount + point should be read-only + type: boolean + subPath: + default: "" + description: |- + SubPath points to a specific entry inside the volume. + Corresponds to the subPath field in the K8s volumeMount structure. + See official docs for details: https://kubernetes.io/docs/concepts/storage/volumes/#using-subpath + type: string + volumeClaimTemplateSpec: + description: VolumeClaimTemplateSpec defines the + [corev1.PersistentVolumeClaim] template specification + properties: + accessModes: + description: |- + accessModes contains the desired access modes the volume should have. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#access-modes-1 + items: + type: string + type: array + x-kubernetes-list-type: atomic + dataSource: + description: |- + dataSource field can be used to specify either: + * An existing VolumeSnapshot object (snapshot.storage.k8s.io/VolumeSnapshot) + * An existing PVC (PersistentVolumeClaim) + If the provisioner or an external controller can support the specified data source, + it will create a new volume based on the contents of the specified data source. + When the AnyVolumeDataSource feature gate is enabled, dataSource contents will be copied to dataSourceRef, + and dataSourceRef contents will be copied to dataSource when dataSourceRef.namespace is not specified. + If the namespace is specified, then dataSourceRef will not be copied to dataSource. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + required: + - kind + - name + type: object + x-kubernetes-map-type: atomic + dataSourceRef: + description: |- + dataSourceRef specifies the object from which to populate the volume with data, if a non-empty + volume is desired. This may be any object from a non-empty API group (non + core object) or a PersistentVolumeClaim object. + When this field is specified, volume binding will only succeed if the type of + the specified object matches some installed volume populator or dynamic + provisioner. + This field will replace the functionality of the dataSource field and as such + if both fields are non-empty, they must have the same value. For backwards + compatibility, when namespace isn't specified in dataSourceRef, + both fields (dataSource and dataSourceRef) will be set to the same + value automatically if one of them is empty and the other is non-empty. + When namespace is specified in dataSourceRef, + dataSource isn't set to the same value and must be empty. + There are three important differences between dataSource and dataSourceRef: + * While dataSource only allows two specific types of objects, dataSourceRef + allows any non-core object, as well as PersistentVolumeClaim objects. + * While dataSource ignores disallowed values (dropping them), dataSourceRef + preserves all values, and generates an error if a disallowed value is + specified. + * While dataSource only allows local objects, dataSourceRef allows objects + in any namespaces. + (Beta) Using this field requires the AnyVolumeDataSource feature gate to be enabled. + (Alpha) Using the namespace field of dataSourceRef requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + properties: + apiGroup: + description: |- + APIGroup is the group for the resource being referenced. + If APIGroup is not specified, the specified Kind must be in the core API group. + For any other third-party types, APIGroup is required. + type: string + kind: + description: Kind is the type of resource + being referenced + type: string + name: + description: Name is the name of resource + being referenced + type: string + namespace: + description: |- + Namespace is the namespace of resource being referenced + Note that when a namespace is specified, a gateway.networking.k8s.io/ReferenceGrant object is required in the referent namespace to allow that namespace's owner to accept the reference. See the ReferenceGrant documentation for details. + (Alpha) This field requires the CrossNamespaceVolumeDataSource feature gate to be enabled. + type: string + required: + - kind + - name + type: object + resources: + description: |- + resources represents the minimum resources the volume should have. + If RecoverVolumeExpansionFailure feature is enabled users are allowed to specify resource requirements + that are lower than previous value but must still be higher than capacity recorded in the + status field of the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#resources + properties: + limits: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Limits describes the maximum amount of compute resources allowed. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + requests: + additionalProperties: + anyOf: + - type: integer + - type: string + pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ + x-kubernetes-int-or-string: true + description: |- + Requests describes the minimum amount of compute resources required. + If Requests is omitted for a container, it defaults to Limits if that is explicitly specified, + otherwise to an implementation-defined value. Requests cannot exceed Limits. + More info: https://kubernetes.io/docs/concepts/configuration/manage-resources-containers/ + type: object + type: object + selector: + description: selector is a label query over + volumes to consider for binding. + properties: + matchExpressions: + description: matchExpressions is a list + of label selector requirements. The requirements + are ANDed. + items: + description: |- + A label selector requirement is a selector that contains values, a key, and an operator that + relates the key and values. + properties: + key: + description: key is the label key + that the selector applies to. + type: string + operator: + description: |- + operator represents a key's relationship to a set of values. + Valid operators are In, NotIn, Exists and DoesNotExist. + type: string + values: + description: |- + values is an array of string values. If the operator is In or NotIn, + the values array must be non-empty. If the operator is Exists or DoesNotExist, + the values array must be empty. This array is replaced during a strategic + merge patch. + items: + type: string + type: array + x-kubernetes-list-type: atomic + required: + - key + - operator + type: object + type: array + x-kubernetes-list-type: atomic + matchLabels: + additionalProperties: + type: string + description: |- + matchLabels is a map of {key,value} pairs. A single {key,value} in the matchLabels + map is equivalent to an element of matchExpressions, whose key field is "key", the + operator is "In", and the values array contains only "value". The requirements are ANDed. + type: object + type: object + x-kubernetes-map-type: atomic + storageClassName: + description: |- + storageClassName is the name of the StorageClass required by the claim. + More info: https://kubernetes.io/docs/concepts/storage/persistent-volumes#class-1 + type: string + volumeAttributesClassName: + description: |- + volumeAttributesClassName may be used to set the VolumeAttributesClass used by this claim. + If specified, the CSI driver will create or update the volume with the attributes defined + in the corresponding VolumeAttributesClass. This has a different purpose than storageClassName, + it can be changed after the claim is created. An empty string value means that no VolumeAttributesClass + will be applied to the claim but it's not allowed to reset this field to empty string once it is set. + If unspecified and the PersistentVolumeClaim is unbound, the default VolumeAttributesClass + will be set by the persistentvolume controller if it exists. + If the resource referred to by volumeAttributesClass does not exist, this PersistentVolumeClaim will be + set to a Pending state, as reflected by the modifyVolumeStatus field, until such as a resource + exists. + More info: https://kubernetes.io/docs/concepts/storage/volume-attributes-classes/ + (Beta) Using this field requires the VolumeAttributesClass feature gate to be enabled (off by default). + type: string + volumeMode: + description: |- + volumeMode defines what type of volume is required by the claim. + Value of Filesystem is implied when not included in claim spec. + type: string + volumeName: + description: volumeName is the binding reference + to the PersistentVolume backing this claim. + type: string + type: object volumeSourceName: description: |- VolumeSourceName defines the name of the volume source for the sub-mount. @@ -4105,7 +4534,6 @@ spec: required: - mountPath - name - - volumeSourceName type: object type: array sharedMemorySize: @@ -4415,7 +4843,7 @@ spec: type: object type: object useDefaultAppArmorProfile: - default: true + default: false description: Generate and set default AppArmor profile for the Slurm worker and login nodes. The Security Profiles Operator must be installed. type: boolean diff --git a/images/common/scripts/complement_jail.sh b/images/common/scripts/complement_jail.sh index e9f9332e..1c895bfe 100755 --- a/images/common/scripts/complement_jail.sh +++ b/images/common/scripts/complement_jail.sh @@ -66,32 +66,32 @@ pushd "${jaildir}" if [ -n "$worker" ] && [ "$SLURM_CLUSTER_TYPE" = "gpu" ]; then echo "Run nvidia-container-cli to propagate NVIDIA drivers, CUDA, NVML and other GPU-related stuff to the jail" - nvidia-container-cli \ - --user \ - --debug=/dev/stderr \ - --no-pivot \ - configure \ - --no-cgroups \ - --ldconfig="@$(command -v ldconfig.real || command -v ldconfig)" \ - --device=all \ - --utility \ - --compute \ - "${jaildir}" + flock etc/complement_jail_nvidia_container_cli.lock -c " + nvidia-container-cli \ + --user \ + --debug=/dev/stderr \ + --no-pivot \ + configure \ + --no-cgroups \ + --ldconfig=\"@$(command -v ldconfig.real || command -v ldconfig)\" \ + --device=all \ + --utility \ + --compute \ + \"${jaildir}\" + " touch "etc/gpu_libs_installed.flag" fi - if [ -n "$worker" ]; then - echo "Bind-mount enroot data directory because if should be node-local" - mount --bind /usr/share/enroot/enroot-data usr/share/enroot/enroot-data - fi + echo "Bind-mount enroot data directory because it should be node-local" + mount --bind /usr/share/enroot/enroot-data usr/share/enroot/enroot-data if ! getcap usr/bin/enroot-mksquashovlfs | grep -q 'cap_sys_admin+pe'; then echo "Set capabilities for enroot-mksquashovlfs to run containers without privileges" - setcap cap_sys_admin+pe usr/bin/enroot-mksquashovlfs + flock etc/complement_jail_setcap_enroot_mksquashovlfs.lock -c "setcap cap_sys_admin+pe usr/bin/enroot-mksquashovlfs" fi if ! getcap usr/bin/enroot-aufs2ovlfs | grep -q 'cap_sys_admin,cap_mknod+pe'; then echo "Set capabilities for enroot-aufs2ovlfs to run containers without privileges" - setcap cap_sys_admin,cap_mknod+pe usr/bin/enroot-aufs2ovlfs + flock etc/complement_jail_setcap_enroot_aufs2ovlfs.lock -c "setcap cap_sys_admin,cap_mknod+pe usr/bin/enroot-aufs2ovlfs" fi echo "Bind-mount slurm configs" @@ -118,6 +118,6 @@ pushd "${jaildir}" if [ -n "$worker" ]; then echo "Update linker cache inside the jail" - chroot "${jaildir}" /usr/sbin/ldconfig + flock etc/complement_jail_ldconfig.lock -c "chroot \"${jaildir}\" /usr/sbin/ldconfig" fi popd diff --git a/images/jail/gpubench/main.go b/images/jail/gpubench/main.go index 1c0890fc..8b09d7b8 100644 --- a/images/jail/gpubench/main.go +++ b/images/jail/gpubench/main.go @@ -176,7 +176,7 @@ func main() { succeed := 0 log.WithField("avg_bandwidth", avgBandwidth).Info(fmt.Sprintf("Avg bus bandwidth: %f", avgBandwidth)) messageReason := fmt.Sprintf( - "The GPU benchmark ended with an unsatisfactory result for the NCCL test all_reduce_perf: Avg bus bandwidth=%f, min=%f", + "Soperator healthcheck: NCCL test all_reduce_perf: Avg bus bandwidth=%fGB/s, min=%fGB/s", avgBandwidth, *limit, ) diff --git a/images/jail/jail.dockerfile b/images/jail/jail.dockerfile index 04b21480..135bd1fd 100644 --- a/images/jail/jail.dockerfile +++ b/images/jail/jail.dockerfile @@ -78,7 +78,9 @@ RUN apt update && \ unzip \ rsync \ numactl \ - htop + htop \ + rdma-core \ + ibverbs-utils # Install python COPY common/scripts/install_python.sh /opt/bin/ @@ -200,16 +202,27 @@ RUN chmod 644 /etc/passwd /etc/group && chown 0:0 /etc/passwd /etc/group && \ chmod 640 /etc/shadow /etc/gshadow && chown 0:42 /etc/shadow /etc/gshadow && \ chmod 440 /etc/sudoers && chown 0:0 /etc/sudoers -# Adjust the default $HOME directory content -RUN cd /etc/skel && \ - mkdir -m 755 .slurm && \ - touch .slurm/defaults && \ - chmod 644 .slurm/defaults && \ - cp -r /etc/skel/.slurm /root/ +# Setup the default $HOME directory content +RUN rm -rf -- /etc/skel/..?* /etc/skel/.[!.]* /etc/skel/* +COPY jail/skel/ /etc/skel/ +RUN chmod 755 /etc/skel/.slurm && \ + chmod 644 /etc/skel/.slurm/defaults && \ + chmod 644 /etc/skel/.bash_logout && \ + chmod 644 /etc/skel/.bashrc && \ + chmod 644 /etc/skel/.profile + +# Use the same /etc/skel content for /root +RUN rm -rf -- /root/..?* /root/.[!.]* /root/* && \ + cp -a /etc/skel/. /root/ # Copy createuser utility script COPY jail/scripts/createuser.sh /usr/bin/createuser RUN chmod +x /usr/bin/createuser +# Replace SSH "message of the day" scripts +RUN rm -rf /etc/update-motd.d/* +COPY jail/motd/ /etc/update-motd.d/ +RUN chmod +x /etc/update-motd.d/* + # Update linker cache RUN ldconfig diff --git a/images/jail/motd/00-welcome b/images/jail/motd/00-welcome new file mode 100644 index 00000000..690b37aa --- /dev/null +++ b/images/jail/motd/00-welcome @@ -0,0 +1,13 @@ +#!/bin/sh + +[ -r /etc/lsb-release ] && . /etc/lsb-release + +if [ -z "$DISTRIB_DESCRIPTION" ] && [ -x /usr/bin/lsb_release ]; then + DISTRIB_DESCRIPTION=$(lsb_release -s -d) +fi + +CPU_ARCH="$(uname -m)" +HOSTNAME="$(hostname)" + +printf "Welcome to Soperator cluster \n\n" +printf "You are on node %s (%s %s) \n" "${HOSTNAME}" "${DISTRIB_DESCRIPTION}" "${CPU_ARCH}" diff --git a/images/jail/motd/10-system-info b/images/jail/motd/10-system-info new file mode 100644 index 00000000..104b6ef5 --- /dev/null +++ b/images/jail/motd/10-system-info @@ -0,0 +1,25 @@ +#!/bin/sh + +printf "\nSystem information as of %s:\n" "$(/bin/date)" + +LOAD_AVG_1M=$(awk '{print $1}' /proc/loadavg) +CPU_CORES=$(nproc) +CPU_LOAD=$(awk -v load="$LOAD_AVG_1M" -v cores="$CPU_CORES" 'BEGIN { printf "%.1f", (load / cores) * 100 }') +printf " CPU load: %s" "${CPU_LOAD}%" + +TOTAL_MEM=$(awk '/^MemTotal:/ {print $2}' /proc/meminfo) +AVAIL_MEM=$(awk '/^MemAvailable:/ {print $2}' /proc/meminfo) +USED_MEM=$((TOTAL_MEM - AVAIL_MEM)) +MEM_USAGE=$(awk -v used="$USED_MEM" -v total="$TOTAL_MEM" 'BEGIN { printf "%.1f", (used / total) * 100 }') +printf "\tMemory usage: %s" "${MEM_USAGE}%" + +NUM_PROCESSES=$(ps -e --no-headers | wc -l) +printf "\tProcesses: %s\n" "${NUM_PROCESSES}" + +DISK_USAGE_CMD=" + findmnt -o SIZE,USE%,TARGET --types virtiofs,tmpfs,nfs4,overlay | grep -vE \"/dev|/usr/lib|/secrets\" \ + | sed 's/SIZE/Size/' | sed 's/USE%/Use%/' | sed 's/TARGET/Directory/' \ + | sed 's/^/ /' +" +printf " Disk usage:\n" +chroot /mnt/jail /bin/sh -c "${DISK_USAGE_CMD}" 2>/dev/null diff --git a/images/jail/motd/20-slurm-stats b/images/jail/motd/20-slurm-stats new file mode 100644 index 00000000..abe2dbec --- /dev/null +++ b/images/jail/motd/20-slurm-stats @@ -0,0 +1,26 @@ +#!/bin/sh + +SLURM_STATS_CMD=" + CONTROLLERS=\"\$(scontrol ping 2>/dev/null)\" + CONTROLLERS_EXITCODE=\"\$?\" + + if [ \"\${CONTROLLERS_EXITCODE}\" = \"0\" ]; then + printf \"\\nSlurm nodes:\\n\" + sinfo --Format=PartitionName:12,CPUs:7,Memory:10,Gres:37,Nodes:8,NodeList:26,StateLong:8,Reason:50 2>/dev/null | sed 's/^/ /' + printf \"\\n\" + + QUEUE=\$(squeue --Format=JobID:12,Partition:12,Name:24,UserName:16,State:10,TimeUsed:9,NumNodes:8,ReasonList:50 2>/dev/null | awk 'NR == 1 || \$4 != \"root\"') + QUEUE_LINES=\$(printf \"%s\" \"\${QUEUE}\" | grep -c '^') + if [ \"\${QUEUE_LINES}\" -le 1 ]; then + printf \"No user jobs in the queue\\n\" + else + printf \"Job queue:\\n\" + printf \"%s\\n\" \"\${QUEUE}\" | sed 's/^/ /' + fi + else + printf \"Slurm controllers:\\n\" + echo \"\${CONTROLLERS}\" | sed 's/^/ /' + fi +" + +chroot /mnt/jail /bin/sh -c "${SLURM_STATS_CMD}" diff --git a/images/jail/motd/30-ssh-users b/images/jail/motd/30-ssh-users new file mode 100644 index 00000000..7edd7932 --- /dev/null +++ b/images/jail/motd/30-ssh-users @@ -0,0 +1,10 @@ +#!/bin/sh + +USER="$(whoami)" +LOGGED_IN_USERS=$(last -F | awk '/still logged in/ {print $1, $5, $6, $7, $8}' | grep -vE "^${USER}") +if [ -z "$LOGGED_IN_USERS" ]; then + printf "\nNo other users are currently logged in \n" +else + printf "\nOther users currently logged in: \n" + printf "%s\n" "$LOGGED_IN_USERS" | awk '{printf " * %s - since %s %s %s %s\n", $1, $2, $3, $4, $5}' +fi diff --git a/images/jail/skel/.bash_logout b/images/jail/skel/.bash_logout new file mode 100644 index 00000000..de4f5f75 --- /dev/null +++ b/images/jail/skel/.bash_logout @@ -0,0 +1,7 @@ +# ~/.bash_logout: executed by bash(1) when login shell exits. + +# when leaving the console clear the screen to increase privacy + +if [ "$SHLVL" = 1 ]; then + [ -x /usr/bin/clear_console ] && /usr/bin/clear_console -q +fi diff --git a/images/jail/skel/.bashrc b/images/jail/skel/.bashrc new file mode 100644 index 00000000..4c2b2395 --- /dev/null +++ b/images/jail/skel/.bashrc @@ -0,0 +1,122 @@ +# ~/.bashrc: executed by bash(1) for non-login shells. +# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) +# for examples + +# If not running interactively, don't do anything +case $- in + *i*) ;; + *) return;; +esac + +# don't put duplicate lines or lines starting with space in the history. +# See bash(1) for more options +HISTCONTROL=ignoreboth + +# append to the history file, don't overwrite it +shopt -s histappend + +# for setting history length see HISTSIZE and HISTFILESIZE in bash(1) +HISTSIZE=10000 +HISTFILESIZE=20000 + +# check the window size after each command and, if necessary, +# update the values of LINES and COLUMNS. +shopt -s checkwinsize + +# If set, the pattern "**" used in a pathname expansion context will +# match all files and zero or more directories and subdirectories. +#shopt -s globstar + +# make less more friendly for non-text input files, see lesspipe(1) +[ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)" + +# set variable identifying the chroot you work in (used in the prompt below) +if [ -z "${debian_chroot:-}" ] && [ -r /etc/debian_chroot ]; then + debian_chroot=$(cat /etc/debian_chroot) +fi + +# set a fancy prompt (non-color, unless we know we "want" color) +case "$TERM" in + xterm-color|*-256color) color_prompt=yes;; +esac + +# uncomment for a colored prompt, if the terminal has the capability; turned +# off by default to not distract the user: the focus in a terminal window +# should be on the output of commands, not on the prompt +#force_color_prompt=yes + +if [ -n "$force_color_prompt" ]; then + if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then + # We have color support; assume it's compliant with Ecma-48 + # (ISO/IEC-6429). (Lack of such support is extremely rare, and such + # a case would tend to support setf rather than setaf.) + color_prompt=yes + else + color_prompt= + fi +fi + +if [ "$color_prompt" = yes ]; then + if [ $UID != 0 ]; then + PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ ' + else + # For user root, use the same prompt but colored in red + PS1='${debian_chroot:+($debian_chroot)}\[\033[01;31m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ ' + fi +else + PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ ' +fi +unset color_prompt force_color_prompt + +# If this is an xterm set the title to user@host:dir +case "$TERM" in +xterm*|rxvt*) + PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1" + ;; +*) + ;; +esac + +# enable color support of ls and also add handy aliases +if [ -x /usr/bin/dircolors ]; then + test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)" + alias ls='ls --color=auto' + #alias dir='dir --color=auto' + #alias vdir='vdir --color=auto' + + alias grep='grep --color=auto' + alias fgrep='fgrep --color=auto' + alias egrep='egrep --color=auto' +fi + +# colored GCC warnings and errors +#export GCC_COLORS='error=01;31:warning=01;35:note=01;36:caret=01;32:locus=01:quote=01' + +# some more ls aliases +alias ll='ls -alF' +alias la='ls -A' +alias l='ls -CF' + +# Add an "alert" alias for long running commands. Use like so: +# sleep 10; alert +alias alert='notify-send --urgency=low -i "$([ $? = 0 ] && echo terminal || echo error)" "$(history|tail -n1|sed -e '\''s/^\s*[0-9]\+\s*//;s/[;&|]\s*alert$//'\'')"' + +# Alias definitions. +# You may want to put all your additions into a separate file like +# ~/.bash_aliases, instead of adding them here directly. +# See /usr/share/doc/bash-doc/examples in the bash-doc package. + +if [ -f ~/.bash_aliases ]; then + . ~/.bash_aliases +fi + +# enable programmable completion features (you don't need to enable +# this, if it's already enabled in /etc/bash.bashrc and /etc/profile +# sources /etc/bash.bashrc). +if ! shopt -oq posix; then + if [ -f /usr/share/bash-completion/bash_completion ]; then + . /usr/share/bash-completion/bash_completion + elif [ -f /etc/bash_completion ]; then + . /etc/bash_completion + fi +fi diff --git a/images/jail/skel/.profile b/images/jail/skel/.profile new file mode 100644 index 00000000..d89ea5a6 --- /dev/null +++ b/images/jail/skel/.profile @@ -0,0 +1,27 @@ +# ~/.profile: executed by the command interpreter for login shells. +# This file is not read by bash(1), if ~/.bash_profile or ~/.bash_login +# exists. +# see /usr/share/doc/bash/examples/startup-files for examples. +# the files are located in the bash-doc package. + +# the default umask is set in /etc/profile; for setting the umask +# for ssh logins, install and configure the libpam-umask package. +#umask 022 + +# if running bash +if [ -n "$BASH_VERSION" ]; then + # include .bashrc if it exists + if [ -f "$HOME/.bashrc" ]; then + . "$HOME/.bashrc" + fi +fi + +# set PATH so it includes user's private bin if it exists +if [ -d "$HOME/bin" ] ; then + PATH="$HOME/bin:$PATH" +fi + +# set PATH so it includes user's private bin if it exists +if [ -d "$HOME/.local/bin" ] ; then + PATH="$HOME/.local/bin:$PATH" +fi diff --git a/images/jail/skel/.slurm/defaults b/images/jail/skel/.slurm/defaults new file mode 100644 index 00000000..b581b530 --- /dev/null +++ b/images/jail/skel/.slurm/defaults @@ -0,0 +1 @@ +cpu-bind=verbose diff --git a/images/login/sshd.dockerfile b/images/login/sshd.dockerfile index c9ffce7d..5ad27edc 100644 --- a/images/login/sshd.dockerfile +++ b/images/login/sshd.dockerfile @@ -54,6 +54,10 @@ RUN mkdir -p /usr/src/dummy && \ gcc -shared -o libdummy.so dummy.c && \ cp libdummy.so /lib/x86_64-linux-gnu/ +# Create node-local directories for enroot runtime data +RUN mkdir -p -m 777 /usr/share/enroot/enroot-data && \ + mkdir -p -m 755 /run/enroot + # Copy script for complementing jail filesystem in runtime COPY common/scripts/complement_jail.sh /opt/bin/slurm/ RUN chmod +x /opt/bin/slurm/complement_jail.sh @@ -65,6 +69,9 @@ RUN ldconfig RUN rm /etc/passwd* /etc/group* /etc/shadow* /etc/gshadow* RUN rm -rf /home +# Delete SSH "message of the day" scripts because they will be linked from jail +RUN rm -rf /etc/update-motd.d + # Expose the port used for accessing sshd EXPOSE 22 diff --git a/images/login/sshd_entrypoint.sh b/images/login/sshd_entrypoint.sh index 4bc81e36..90da1629 100755 --- a/images/login/sshd_entrypoint.sh +++ b/images/login/sshd_entrypoint.sh @@ -9,6 +9,9 @@ ln -s /mnt/jail/etc/shadow /etc/shadow ln -s /mnt/jail/etc/gshadow /etc/gshadow chown -h 0:42 /etc/{shadow,gshadow} +echo "Link SSH \"message of the day\" scripts from jail" +ln -s /mnt/jail/etc/update-motd.d /etc/update-motd.d + echo "Link home from jail to use SSH keys from there" ln -s /mnt/jail/home /home diff --git a/images/populate_jail/populate_jail_entrypoint.sh b/images/populate_jail/populate_jail_entrypoint.sh index 6a7888ea..a5626bc5 100755 --- a/images/populate_jail/populate_jail_entrypoint.sh +++ b/images/populate_jail/populate_jail_entrypoint.sh @@ -11,10 +11,10 @@ if [ "$OVERWRITE" != "1" ] && [ -d /mnt/jail/dev ]; then fi echo "Delete everything from jail directory" -rm -rf /mnt/jail/* +rm -rf -- /mnt/jail/..?* /mnt/jail/.[!.]* /mnt/jail/* echo "Rclone and rsync jail rootfs into jail directory" -rclone copy /jail /mnt/jail --progress --transfers="$(nproc)" --links +rclone copy /jail /mnt/jail --progress --transfers="$(( $(nproc) * 2 ))" --links rsync --verbose --archive --one-file-system --xattrs --numeric-ids --sparse --acls --hard-links /jail/ /mnt/jail/ echo "Set permissions for jail directory" diff --git a/images/worker/slurmd.dockerfile b/images/worker/slurmd.dockerfile index 00494aea..5d20a042 100644 --- a/images/worker/slurmd.dockerfile +++ b/images/worker/slurmd.dockerfile @@ -49,7 +49,9 @@ RUN apt-get update && \ libdrm-dev \ sudo \ supervisor \ - openssh-server + openssh-server \ + rdma-core \ + ibverbs-utils # Install PMIx COPY common/scripts/install_pmix.sh /opt/bin/ @@ -120,6 +122,9 @@ RUN ldconfig RUN rm /etc/passwd* /etc/group* /etc/shadow* /etc/gshadow* RUN rm -rf /home +# Delete SSH "message of the day" scripts because they aren't needed on worker nodes +RUN rm -rf /etc/update-motd.d/* + # Expose the port used for accessing slurmd EXPOSE 6818 diff --git a/internal/consts/sshd.go b/internal/consts/sshd.go index bf67dfd1..6c0e2874 100644 --- a/internal/consts/sshd.go +++ b/internal/consts/sshd.go @@ -3,9 +3,9 @@ package consts // https://linux.die.net/man/5/sshd_config const ( - SSHDClientAliveInterval = "9000" // 30 minute - SSHDClientAliveCountMax = "10" - SSHDMaxStartups = "10:30:60" - SSHDLoginGraceTime = "9000" + SSHDClientAliveInterval = "3600" // 1 hour + SSHDClientAliveCountMax = "5" + SSHDMaxStartups = "100:50:300" + SSHDLoginGraceTime = "120" SSHDMaxAuthTries = "4" ) diff --git a/internal/render/common/configmap.go b/internal/render/common/configmap.go index e73166b3..9ac13bce 100644 --- a/internal/render/common/configmap.go +++ b/internal/render/common/configmap.go @@ -97,10 +97,11 @@ func generateSlurmConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { res.AddProperty("HealthCheckNodeState", "ANY") res.AddComment("") res.AddProperty("InactiveLimit", 0) - res.AddProperty("KillWait", 30) - res.AddProperty("SlurmctldTimeout", 120) - res.AddProperty("SlurmdTimeout", 300) - res.AddProperty("Waittime", 0) + res.AddProperty("KillWait", 180) + res.AddProperty("UnkillableStepTimeout", 600) + res.AddProperty("SlurmctldTimeout", 30) + res.AddProperty("SlurmdTimeout", 180) + res.AddProperty("WaitTime", 0) res.AddComment("") res.AddComment("SCHEDULING") res.AddProperty("SchedulerType", "sched/backfill") @@ -344,7 +345,6 @@ func RenderDefaultConfigMapSSHDConfigs( func generateDefaultSshdConfig(cluster *values.SlurmCluster) renderutils.ConfigFile { res := &renderutils.MultilineStringConfig{} - res.AddLine("LogLevel INFO") res.AddLine(fmt.Sprintf("Port %d", cluster.NodeLogin.ContainerSshd.Port)) res.AddLine("PermitRootLogin yes") res.AddLine("PasswordAuthentication no") @@ -363,6 +363,10 @@ func generateDefaultSshdConfig(cluster *values.SlurmCluster) renderutils.ConfigF res.AddLine("MaxStartups " + consts.SSHDMaxStartups) res.AddLine("LoginGraceTime " + consts.SSHDLoginGraceTime) res.AddLine("MaxAuthTries " + consts.SSHDMaxAuthTries) + res.AddLine("LogLevel DEBUG3") + res.AddLine("") + res.AddLine("Match User *") + res.AddLine(" LogLevel INFO") return res } diff --git a/internal/render/common/volume.go b/internal/render/common/volume.go index f6831ee2..1a16ca5b 100644 --- a/internal/render/common/volume.go +++ b/internal/render/common/volume.go @@ -206,6 +206,8 @@ func RenderVolumeMountJailSubMount(subMount slurmv1.NodeVolumeJailSubMount) core return corev1.VolumeMount{ Name: subMount.Name, MountPath: path.Join(consts.VolumeMountPathJailUpper, subMount.MountPath), + SubPath: subMount.SubPath, + ReadOnly: subMount.ReadOnly, } } diff --git a/internal/render/login/volume.go b/internal/render/login/volume.go index f74c927e..fce94182 100644 --- a/internal/render/login/volume.go +++ b/internal/render/login/volume.go @@ -42,10 +42,19 @@ func renderVolumesAndClaimTemplateSpecs( // Jail sub-mounts for _, subMount := range login.JailSubMounts { - volumes = append( - volumes, - common.RenderVolumeFromSource(volumeSources, subMount.VolumeSourceName, subMount.Name), - ) + if v, s, err := common.AddVolumeOrSpec( + subMount.VolumeSourceName, + func(sourceName string) corev1.Volume { + return common.RenderVolumeFromSource(volumeSources, *subMount.VolumeSourceName, subMount.Name) + }, + subMount.VolumeClaimTemplateSpec, + subMount.Name, + ); err != nil { + return nil, nil, err + } else { + volumes = append(volumes, v...) + pvcTemplateSpecs = append(pvcTemplateSpecs, s...) + } } return volumes, pvcTemplateSpecs, nil diff --git a/internal/render/worker/volume.go b/internal/render/worker/volume.go index 0a03b317..83884732 100644 --- a/internal/render/worker/volume.go +++ b/internal/render/worker/volume.go @@ -73,10 +73,19 @@ func renderVolumesAndClaimTemplateSpecs( // Jail sub-mounts for _, subMount := range worker.JailSubMounts { - volumes = append( - volumes, - common.RenderVolumeFromSource(volumeSources, subMount.VolumeSourceName, subMount.Name), - ) + if v, s, err := common.AddVolumeOrSpec( + subMount.VolumeSourceName, + func(sourceName string) corev1.Volume { + return common.RenderVolumeFromSource(volumeSources, *subMount.VolumeSourceName, subMount.Name) + }, + subMount.VolumeClaimTemplateSpec, + subMount.Name, + ); err != nil { + return nil, nil, err + } else { + volumes = append(volumes, v...) + pvcTemplateSpecs = append(pvcTemplateSpecs, s...) + } } return volumes, pvcTemplateSpecs, nil diff --git a/internal/values/validate.go b/internal/values/validate.go index a463f544..7440c10f 100644 --- a/internal/values/validate.go +++ b/internal/values/validate.go @@ -107,11 +107,11 @@ func (c *SlurmCluster) Validate(ctx context.Context) error { } // worker jail sub-mounts for _, subMount := range c.NodeWorker.JailSubMounts { - volumeSourceNamesRaw = append(volumeSourceNamesRaw, &subMount.VolumeSourceName) + volumeSourceNamesRaw = append(volumeSourceNamesRaw, subMount.VolumeSourceName) } // login jail sub-mounts for _, subMount := range c.NodeLogin.JailSubMounts { - volumeSourceNamesRaw = append(volumeSourceNamesRaw, &subMount.VolumeSourceName) + volumeSourceNamesRaw = append(volumeSourceNamesRaw, subMount.VolumeSourceName) } for _, volumeSourceName := range volumeSourceNamesRaw { if volumeSourceName == nil {