values.yaml

################################################################################
## Ingress configuration
## ---------------------

# HostName of the setup for ingress configuration
hostname: 

################################################################################
## BastionSSH configuration
## ------------------------

# bastionImage is the docker image of the bastion pod (main container)
bastionImage: "landerlini/bastion:v0.1a"

# bastionPort is the *internal* port to configure the sshd server
bastionPort: 22

# bastionNodePort is the *external* nodePort to configure the bastion service.
# It is the port used to access the bastion from outside.
bastionNodePort: 30022

# bastionAdminPublicKey is the public RSA key of an administrator 
bastionAdminPublicKey: 


################################################################################
## NFS server configuration
## ------------------------
## Important. You *must* define manually on which node to setup the platform 
## storage, by setting the label storage=master. For example, to use a master 
## volume on the node "vnode-0.localdomain", you can issue:
## ```
## kubectl label nodes vnode-0.localdomain storage=master
## ```

# nfsServerAddress is the fixed-IP address of the NFS server (avoid DNS)
# Important! If modified, the Helm Chart of JupyterHub must be kept in-sync.
nfsServerAddress: "10.96.0.2"

# nfsServerImage is the docker image of the NFS server
nfsServerImage: k8s.gcr.io/volume-nfs:0.8

# nfsServerInternalMountPath is the mount point *inside* the NFS server image 
nfsServerInternalMountPath: /exports

# nfsServerHostPath is the hostPath where the nfsServer store shared data
nfsServerHostPath: /user-data/nfs 

# nfsServerNodeSelector is the nodeSelector used to attach the NFS server
# to a node with dedicated storage resources.
nfsServerNodeSelector:
  storage: master

# nfsServerTolerations define the tolerations of the NFS server
nfsServerTolerations:
  - key: "node-role.kubernetes.io/control-plane"
    operator: "Exists"
    effect: "NoSchedule"

################################################################################
## Accelerators (GPUs)
## -------------------

# gpuMigStrategy indicate how kubernetes allocates MIG partitions of GPUs
# https://docs.nvidia.com/datacenter/cloud-native/kubernetes/latest/index.html
gpuMigStrategy: single

# GPU-equipped nodes must be marked with a label with same key as gpuAffinityLabel
gpuAffinityLabel: accelerator

# GPU-equipped nodes may have node taints to avoid CPU workloads to spoil resources.
# If such taints are set, the nVidia device-plugin must tolerate them.
gpuNodeTaints: 
  - nvidia.com/gpu

# acceleratorKnownModels is the "database" of all the GPU and FPGA models known 
# to the cluster. Knowing a model does not imply having it allocatable, but 
# models not listed here won't be usable even if made allocatable by Kubernetes.
acceleratorKnownModels:
  - name: cpu-only
    description: None
    node_selector:
      accelerator: none
    memory_gb: 0
    preference_weight: 100
    type: 

  - name: mig-1g.10gb
    description: nVidia A100 1g MIG partition (10 GB)
    node_selector:
      accelerator: a100
    memory_gb: 10
    preference_weight: 90
    type: gpu
    extended_resource: nvidia.com/gpu

  - name: t4
    description: nVidia T4 (16 GB)
    node_selector:
      accelerator: t4
    memory_gb: 16
    preference_weight: 80
    type: gpu
    extended_resource: nvidia.com/gpu

  - name: rtx5000
    description: nVidia RTX5000 (32 GB)
    node_selector:
      accelerator: rtx5000
    memory_gb: 32
    preference_weight: 70
    type: gpu
    extended_resource: nvidia.com/gpu

  - name: mig-2g.20gb
    description: nVidia A100 2g MIG partition (20 GB)
    node_selector:
      accelerator: a100
    memory_gb: 20
    preference_weight: 60
    type: gpu
    extended_resource: nvidia.com/gpu

  - name: mig-3g.30gb
    description: nVidia A100 3g MIG partition (30 GB)
    node_selector:
      accelerator: a100
    memory_gb: 30
    preference_weight: 50
    type: gpu
    extended_resource: nvidia.com/gpu

  - name: mig-4g.40gb
    description: nVidia A100 4g MIG partition (40 GB)
    node_selector:
      accelerator: a100
    memory_gb: 40
    preference_weight: 30
    type: gpu
    extended_resource: nvidia.com/gpu

  - name: a100
    description: nVidia A100 (80 GB)
    node_selector:
      accelerator: a100
    memory_gb: 80
    preference_weight: 20
    type: gpu
    extended_resource: nvidia.com/gpu


################################################################################
## Virtual Kubelet Dispatcher configuration
## ----------------------------------------

# vkdEnables enable the configuration of the virtual kubelet dispatcher 
# components, including minio and kaniko
vkdEnabled: true

vkdNamespace: vkd

# vkdSidecarImage is the docker image used as vkd server in a sidecar container
vkdSidecarImage: "harbor.cloud.infn.it/testbed-dm/vkd-dev:v0.0"

# vdkSidecarName is the name of the sidecar container
vkdSidecarName: vkd

# vkdAdminServiceAccount is the name of the service account that can submit jobs
vkdAdminUserGroup: vkd-batch-admin

# vkdAdminUserGroup is the JupyterHub system=true Admin Group to modify queues and quotas
vkdAdminUserGroup: vkd

# vkdPort is the port of the vkd container 
vkdPort: 8000

# vkdMinioUrl is the URL of the single-user Minio service to snapshot the filesystem before
# at submission time.
vkdMinioUrl: minio-singleuser:9000

# vkdImageBranch is the branch of the github repository the docker image downloads for CD
vkdImageBranch: newdm  #TODO put this back to "main"

# vkdMinioServerTolerations is the hostPath location used to store S3 data
# with snapshots of the filesystem to be used in batch jobs
vkdMinioServerHostPath: /user-data/minio

# nfsServerNodeSelector is the nodeSelector used to attach the NFS server
# to a node with dedicated storage resources.
vkdMinioServerNodeSelector:
  storage: master

# nfsServerTolerations define the tolerations of the NFS server
vkdMinioServerTolerations:
  - key: "node-role.kubernetes.io/control-plane"
    operator: "Exists"
    effect: "NoSchedule"

vkdPriorityClasses:
  - name: "lowest"
    value: -100
  - name: "lower"
    value: -80
  - name: "higher"
    value: -50
  - name: "highest"
    value: -20
  - name: "fire"
    value: -10
    preemptionPolicy: PreemptLowerPriority
    description: |
      Fire priority can kill other jobs to get resources. 
      Can waste CPU resources. Should be used upon authorization.

################################################################################
## Static website configuration (docs/blog ...)
## --------------------------------------------

# wwwEnabled enables a static web site service (based on nginx) configuring the
# related ingress. The website must be available via NFS and can be developed 
# from JupyterLab.
wwwEnabled: true

# wwwPrefix is the url prefix mapped by nginx-ingress to the nginx-server
wwwPrefix: /docs

# wwwPathToStaticWebsite is the path, relative to the served NFS volume, 
# where the static website is stored.
# Important: the `wwwPrefix` must be reproduced as a directory-tree structure 
# in the filesystem.
wwwPathToStaticWebsite: /www


################################################################################
## Ancillary JupyterHub configurtions
## ----------------------------------

jhubIamServer: https://iam.cloud.infn.it/
jhubAllowedIamGroups:
 - ml-infn-platform
 - ml-infn-users

jhubAdminIamGroups:
  - ml-infn

jhubIamClientId: 
jhubIamClientSecret: 
jhubCryptKey: 


jhubDatabaseLocalPath: /var/local
jhubDebug: false
jhubStartTimeout: 20
jhubStartupScript: /envs/setup.sh
rbac:
  create: true
  roleName: hub

jhubHomeDirectoryName: home
jhubSplashMessage: Welcome to the AI_INFN Platform!
jhubLabImages: 
  AI_INFN Multi-environment setup: harbor.cloud.infn.it/testbed-dm/ai-infn:0.1-pre12
  AI_INFN Multi-environment setup (stable): harbor.cloud.infn.it/testbed-dm/ai-infn:0.1-pre9


################################################################################
## JupyterHub Helm chart configuration
jupyterhub:
  ingress:
    enabled: false

  scheduling:
    userScheduler:
      enabled: false
    podPriority:
      enabled: false
    userPlaceholder:
      enabled: false

  cull:
    enabled: true
    timeout: 3600
    every: 600

  proxy:
    chp:
      networkPolicy:
        enabled: false
    secretToken: #<PUT HERE A RANDOM TOKEN openssl rand -hex 32>

  singleuser:
    networkPolicy:
          enabled: false

  hub:
    image:
      name: quay.io/jupyterhub/k8s-hub
      tag: '3.2.1'

    cookieSecret: #<PUT HERE A RANDOM TOKEN openssl rand -hex 32>

    networkPolicy:
      enabled: false

    db:
      type: sqlite-pvc
      upgrade: true
      pvc:
        accessModes:
          - ReadWriteOnce
        storage: 1Gi
        storageClassName: local-storage

    # JupyterHub should run on the master node
    tolerations:
      - key: "node-role.kubernetes.io/control-plane"
        operator: "Exists"
        effect: "NoSchedule"


    extraVolumes:
      - name: jhub-config
        configMap:
          name: jhub-config
          items:
           - key: customconfig.py
             path: customconfig.py
           - key: spawn_form.jinja2.html
             path: spawn_form.jinja2.html
           - key: envs-setup.sh
             path: envs-setup.sh

      - name: shared
        nfs:
          server: "10.96.0.2"
          path: "/"

    extraVolumeMounts:
      - name: jhub-config
        mountPath: /usr/local/etc/jupyterhub/jupyterhub_config.d/

      - name: shared
        mountPath: /nfs-shared
 

    extraEnv:
      OAUTH_CALLBACK_URL: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: oauthCallback

      OAUTH_ENDPOINT: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: oauthEndpoint

      OAUTH_GROUPS: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: oauthGroups

      OAUTH_ADMIN_GROUPS: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: oauthAdminGroups

      IAM_CLIENT_ID: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: oauthIamClientId

      IAM_CLIENT_SECRET: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: oauthIamClientSecret

      JUPYTERHUB_CRYPT_KEY: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: jupyterhubCryptKey

      NFS_SERVER_ADDRESS:
        valueFrom:
          configMapKeyRef:
            name: jhub-env
            key: nfsServerAddress

      NFS_MOUNT_POINT:
        valueFrom:
          configMapKeyRef:
            name: jhub-env
            key: nfsMountPoint

      START_TIMEOUT:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: jupyterStartTimeout

      STARTUP_SCRIPT:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: startupScript

      DEBUG:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: debug

      HOME_NAME: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: homeName

      SSH_PORT: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: sshPort

      JHUB_NAMESPACE:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: jhubNamespace

      DEFAULT_SPLASH_MESSAGE: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: defaultSplashMessage

      DEFAULT_JLAB_IMAGES: 
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: defaultJlabImages

      GPU_MODEL_DESCRIPTION:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: gpuModelDescription

      CONFIGMAP_MOUNT_PATH:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: configmapMountPath

      ENABLE_VKD:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: enableVkd
      
      VKD_SIDECAR_IMAGE:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: vkdSidecarImage
      
      VKD_SIDECAR_NAME:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: vkdSidecarName
      
      VKD_ADMIN_SERVICE_ACCOUNT:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: vkdAdminServiceAccount
      
      VKD_ADMIN_USER_GROUP:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: vkdAdminUserGroup
      
      VKD_PORT:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: vkdPort
      
      VKD_MINIO_URL:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: vkdMinioUrl
      
      VKD_IMAGE_BRANCH:
        valueFrom: 
          configMapKeyRef:
            name: jhub-env
            key: vkdImageBranch