Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 62 additions & 12 deletions components/renku_data_services/crc/api.spec.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ paths:
content:
"application/json":
schema:
$ref: "#/components/schemas/K8sLabelList"
$ref: "#/components/schemas/Tolerations"
default:
$ref: "#/components/responses/Error"
tags:
Expand Down Expand Up @@ -1075,7 +1075,7 @@ components:
default:
$ref: "#/components/schemas/DefaultFlag"
tolerations:
$ref: "#/components/schemas/K8sLabelList"
$ref: "#/components/schemas/Tolerations"
node_affinities:
$ref: "#/components/schemas/NodeAffinityList"
required: ["cpu", "memory", "gpu", "max_storage", "name", "default", "default_storage"]
Expand Down Expand Up @@ -1106,7 +1106,7 @@ components:
default:
$ref: "#/components/schemas/DefaultFlagPatch"
tolerations:
$ref: "#/components/schemas/K8sLabelList"
$ref: "#/components/schemas/Tolerations"
node_affinities:
$ref: "#/components/schemas/NodeAffinityList"
example:
Expand All @@ -1133,7 +1133,7 @@ components:
default:
$ref: "#/components/schemas/DefaultFlagPatch"
tolerations:
$ref: "#/components/schemas/K8sLabelList"
$ref: "#/components/schemas/Tolerations"
node_affinities:
$ref: "#/components/schemas/NodeAffinityList"
required: ["id"]
Expand Down Expand Up @@ -1161,7 +1161,7 @@ components:
default:
$ref: "#/components/schemas/DefaultFlag"
tolerations:
$ref: "#/components/schemas/K8sLabelList"
$ref: "#/components/schemas/Tolerations"
node_affinities:
$ref: "#/components/schemas/NodeAffinityList"
required: ["cpu", "memory", "gpu", "max_storage", "name", "id", "default", "default_storage"]
Expand Down Expand Up @@ -1197,7 +1197,7 @@ components:
matching:
type: boolean
tolerations:
$ref: "#/components/schemas/K8sLabelList"
$ref: "#/components/schemas/Tolerations"
node_affinities:
$ref: "#/components/schemas/NodeAffinityList"
required: ["cpu", "memory", "gpu", "max_storage", "name", "id", "default", "default_storage"]
Expand Down Expand Up @@ -1800,15 +1800,65 @@ components:
description: A threshold in seconds after which a session gets culled/deleted (0 means no threshold)
minimum: 0
maximum: 2147483647
K8sLabelList:
Tolerations:
type: array
description: A list of k8s labels used for tolerations
description: |
A list of toleration items used for pod scheduling in Kubernetes.
items:
$ref: "#/components/schemas/K8sLabel"
example: ["test-label-1"]
uniqueItems: true
$ref: "#/components/schemas/Toleration"
default: []
minItems: 0
Toleration:
type: object
description: |
A toleration item.
The pod this Toleration is attached to tolerates any taint
that matches the triple <key,value,effect>
using the matching operator <operator>.
properties:
effect:
description: |-
Effect indicates the taint effect to match. Empty means match all taint effects.
When specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.
type: string
key:
description: |-
Key is the taint key that the toleration applies to. Empty means match all taint keys.
If the key is empty, operator must be Exists; this combination means to match all values and all keys.
type: string
operator:
description: |-
Operator represents a key's relationship to the value.
Valid operators are Exists and Equal. Defaults to Equal.
Exists is equivalent to wildcard for value, so that a pod can
tolerate all taints of a particular category.
type: string
tolerationSeconds:
description: |-
TolerationSeconds represents the period of time the toleration (which must be
of effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,
it is not set, which means tolerate the taint forever (do not evict). Zero and
negative values will be treated as 0 (evict immediately) by the system.
format: int64
type: integer
value:
description: |-
Value is the taint value the toleration matches to.
If the operator is Exists, the value should be empty, otherwise just a regular string.
type: string
example:
effect: "NoSchedule"
key: "renku.io/dedicated"
operator: "Equal"
value: "user"
# K8sLabelList:
# type: array
# description: A list of k8s labels used for tolerations
# items:
# $ref: "#/components/schemas/K8sLabel"
# example: ["test-label-1"]
# uniqueItems: true
# default: []
# minItems: 0
K8sLabel:
type: string
description: A valid K8s label
Expand Down
60 changes: 31 additions & 29 deletions components/renku_data_services/crc/apispec.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# generated by datamodel-codegen:
# filename: api.spec.yaml
# timestamp: 2025-10-13T09:06:36+00:00
# timestamp: 2025-10-31T14:40:52+00:00

from __future__ import annotations

Expand Down Expand Up @@ -88,14 +88,26 @@ class RemoteConfigurationFirecrestPatch(BaseAPISpec):
)


class K8sLabel(RootModel[str]):
root: str = Field(
...,
description="A valid K8s label",
examples=["some-label-1"],
max_length=63,
min_length=3,
pattern="^[a-z0-9A-Z][a-z0-9A-Z-_./]*[a-z0-9A-Z]$",
class Toleration(BaseAPISpec):
effect: Optional[str] = Field(
None,
description="Effect indicates the taint effect to match. Empty means match all taint effects.\nWhen specified, allowed values are NoSchedule, PreferNoSchedule and NoExecute.",
)
key: Optional[str] = Field(
None,
description="Key is the taint key that the toleration applies to. Empty means match all taint keys.\nIf the key is empty, operator must be Exists; this combination means to match all values and all keys.",
)
operator: Optional[str] = Field(
None,
description="Operator represents a key's relationship to the value.\nValid operators are Exists and Equal. Defaults to Equal.\nExists is equivalent to wildcard for value, so that a pod can\ntolerate all taints of a particular category.",
)
tolerationSeconds: Optional[int] = Field(
None,
description="TolerationSeconds represents the period of time the toleration (which must be\nof effect NoExecute, otherwise this field is ignored) tolerates the taint. By default,\nit is not set, which means tolerate the taint forever (do not evict). Zero and\nnegative values will be treated as 0 (evict immediately) by the system.",
)
value: Optional[str] = Field(
None,
description="Value is the taint value the toleration matches to.\nIf the operator is Exists, the value should be empty, otherwise just a regular string.",
)


Expand Down Expand Up @@ -485,11 +497,9 @@ class ResourceClass(BaseAPISpec):
description="A default selection for resource classes or resource pools",
examples=[False],
)
tolerations: Optional[List[K8sLabel]] = Field(
tolerations: Optional[List[Toleration]] = Field(
None,
description="A list of k8s labels used for tolerations",
examples=[["test-label-1"]],
min_length=0,
description="A list of toleration items used for pod scheduling in Kubernetes.\n",
)
node_affinities: Optional[List[NodeAffinity]] = Field(
None,
Expand Down Expand Up @@ -541,11 +551,9 @@ class ResourceClassPatch(BaseAPISpec):
description="A default selection for resource classes or resource pools",
examples=[False],
)
tolerations: Optional[List[K8sLabel]] = Field(
tolerations: Optional[List[Toleration]] = Field(
None,
description="A list of k8s labels used for tolerations",
examples=[["test-label-1"]],
min_length=0,
description="A list of toleration items used for pod scheduling in Kubernetes.\n",
)
node_affinities: Optional[List[NodeAffinity]] = Field(
None,
Expand Down Expand Up @@ -603,11 +611,9 @@ class ResourceClassPatchWithId(BaseAPISpec):
description="A default selection for resource classes or resource pools",
examples=[False],
)
tolerations: Optional[List[K8sLabel]] = Field(
tolerations: Optional[List[Toleration]] = Field(
None,
description="A list of k8s labels used for tolerations",
examples=[["test-label-1"]],
min_length=0,
description="A list of toleration items used for pod scheduling in Kubernetes.\n",
)
node_affinities: Optional[List[NodeAffinity]] = Field(
None,
Expand Down Expand Up @@ -663,11 +669,9 @@ class ResourceClassWithId(BaseAPISpec):
description="A default selection for resource classes or resource pools",
examples=[False],
)
tolerations: Optional[List[K8sLabel]] = Field(
tolerations: Optional[List[Toleration]] = Field(
None,
description="A list of k8s labels used for tolerations",
examples=[["test-label-1"]],
min_length=0,
description="A list of toleration items used for pod scheduling in Kubernetes.\n",
)
node_affinities: Optional[List[NodeAffinity]] = Field(
None,
Expand Down Expand Up @@ -724,11 +728,9 @@ class ResourceClassWithIdFiltered(BaseAPISpec):
examples=[False],
)
matching: Optional[bool] = None
tolerations: Optional[List[K8sLabel]] = Field(
tolerations: Optional[List[Toleration]] = Field(
None,
description="A list of k8s labels used for tolerations",
examples=[["test-label-1"]],
min_length=0,
description="A list of toleration items used for pod scheduling in Kubernetes.\n",
)
node_affinities: Optional[List[NodeAffinity]] = Field(
None,
Expand Down
20 changes: 17 additions & 3 deletions components/renku_data_services/crc/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@
from renku_data_services.base_models import RESET, ResetType
from renku_data_services.crc import apispec, models
from renku_data_services.errors import errors
from renku_data_services.k8s.pod_scheduling import api as k8s_api
from renku_data_services.k8s.pod_scheduling import models as k8s_models
from renku_data_services.k8s.pod_scheduling import transforms as k8s_transforms


def validate_quota(body: apispec.QuotaWithOptionalId) -> models.UnsavedQuota:
Expand Down Expand Up @@ -43,7 +46,11 @@ def validate_resource_class(body: apispec.ResourceClass) -> models.UnsavedResour
),
key=lambda x: (x.key, x.required_during_scheduling),
)
tolerations = sorted(t.root for t in body.tolerations or [])
api_tolerations = k8s_api.TolerationsField.model_validate(
[tol.model_dump(mode="json") for tol in body.tolerations or []]
)
tolerations = k8s_transforms.transform_tolerations(body=api_tolerations) or []
tolerations = sorted(tolerations, key=k8s_models.Toleration.sort_key)
return models.UnsavedResourceClass(
name=body.name,
cpu=body.cpu,
Expand Down Expand Up @@ -91,9 +98,16 @@ def validate_resource_class_patch_or_put(
),
key=lambda x: (x.key, x.required_during_scheduling),
)
tolerations: list[str] | None = [] if method == "PUT" else None
# tolerations: list[str] | None = [] if method == "PUT" else None
# if body.tolerations:
# tolerations = sorted(t.root for t in body.tolerations or [])
tolerations: list[k8s_models.Toleration] | None = [] if method == "PUT" else None
if body.tolerations:
tolerations = sorted(t.root for t in body.tolerations or [])
api_tolerations = k8s_api.TolerationsField.model_validate(
[tol.model_dump(mode="json") for tol in body.tolerations]
)
tolerations = k8s_transforms.transform_tolerations(body=api_tolerations) or []
tolerations = sorted(tolerations, key=k8s_models.Toleration.sort_key)
if rc_id:
return models.ResourceClassPatchWithId(
id=rc_id,
Expand Down
45 changes: 31 additions & 14 deletions components/renku_data_services/crc/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import renku_data_services.base_models as base_models
from renku_data_services import errors
from renku_data_services.app_config import logging
from renku_data_services.base_models import RESET
from renku_data_services.crc import models
from renku_data_services.crc import orm as schemas
Expand All @@ -29,6 +30,8 @@
from renku_data_services.k8s.db import QuotaRepository
from renku_data_services.users.db import UserRepo

logger = logging.getLogger(__name__)


class _Base:
def __init__(self, session_maker: Callable[..., AsyncSession], quotas_repo: QuotaRepository) -> None:
Expand Down Expand Up @@ -338,8 +341,9 @@ async def get_classes(
# NOTE: The line below ensures that the right users can access the right resources, do not remove.
stmt = _classes_user_access_control(api_user, stmt)

res = await session.execute(stmt)
orms = res.scalars().all()
res = await session.scalars(stmt)
orms = res.all()
logger.warning(f"Classes = {[orm.dump() for orm in orms]}")
return [orm.dump() for orm in orms]

async def get_resource_class(self, api_user: base_models.APIUser, id: int) -> models.ResourceClass:
Expand Down Expand Up @@ -586,18 +590,31 @@ async def update_resource_class(
cls.node_affinities.remove(existing_affinity)

if update.tolerations is not None:
existing_tolerations: dict[str, schemas.TolerationORM] = {tol.key: tol for tol in cls.tolerations}
new_tolerations: dict[str, schemas.TolerationORM] = {
tol: schemas.TolerationORM(key=tol) for tol in update.tolerations
}
for new_tol_key, new_tol in new_tolerations.items():
if new_tol_key not in existing_tolerations:
# CREATE a brand new toleration
cls.tolerations.append(new_tol)
# REMOVE a toleration
for existing_tol_key, existing_tol in existing_tolerations.items():
if existing_tol_key not in new_tolerations:
cls.tolerations.remove(existing_tol)
# existing_tolerations: dict[str, schemas.TolerationORM] = {tol.key: tol for tol in cls.tolerations}
# new_tolerations: dict[str, schemas.TolerationORM] = {
# tol: schemas.TolerationORM(key=tol) for tol in update.tolerations
# }
# for new_tol_key, new_tol in new_tolerations.items():
# if new_tol_key not in existing_tolerations:
# # CREATE a brand new toleration
# cls.tolerations.append(new_tol)
# # REMOVE a toleration
# for existing_tol_key, existing_tol in existing_tolerations.items():
# if existing_tol_key not in new_tolerations:
# cls.tolerations.remove(existing_tol)

# NOTE: the whole list of tolerations is updated
existing_tolerations = list(cls.new_tolerations)
for existing_tol, new_tol in zip(existing_tolerations, update.tolerations, strict=False):
existing_tol.contents = new_tol.to_dict()

if len(update.tolerations) > len(existing_tolerations):
# Add new tolerations
for new_tol in update.tolerations[len(existing_tolerations) :]:
cls.new_tolerations.append(schemas.NewTolerationORM.from_model(new_tol))
elif len(update.tolerations) < len(existing_tolerations):
# Remove old tolerations
cls.new_tolerations = cls.new_tolerations[: len(update.tolerations)]

# NOTE: do we need to perform this check?
if cls.resource_pool is None:
Expand Down
10 changes: 7 additions & 3 deletions components/renku_data_services/crc/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from renku_data_services import errors
from renku_data_services.base_models import ResetType
from renku_data_services.k8s.constants import ClusterId
from renku_data_services.k8s.pod_scheduling import models as k8s_models
from renku_data_services.notebooks.cr_amalthea_session import TlsSecret


Expand Down Expand Up @@ -88,7 +89,8 @@ class UnsavedResourceClass(ResourcesCompareMixin):
default: bool = False
default_storage: int = 1
node_affinities: list[NodeAffinity] = field(default_factory=list)
tolerations: list[str] = field(default_factory=list)
# tolerations: list[str] = field(default_factory=list)
tolerations: list[k8s_models.Toleration] = field(default_factory=list)


@dataclass(frozen=True, eq=True, kw_only=True)
Expand All @@ -105,7 +107,8 @@ class ResourceClass(ResourcesCompareMixin):
default_storage: int = 1
matching: Optional[bool] = None
node_affinities: list[NodeAffinity] = field(default_factory=list)
tolerations: list[str] = field(default_factory=list)
# tolerations: list[str] = field(default_factory=list)
tolerations: list[k8s_models.Toleration] = field(default_factory=list)
quota: str | None = None


Expand All @@ -121,7 +124,8 @@ class ResourceClassPatch:
default: bool | None = None
default_storage: int | None = None
node_affinities: list[NodeAffinity] | None = None
tolerations: list[str] | None = None
# tolerations: list[str] | None = None
tolerations: list[k8s_models.Toleration] | None = None


@dataclass(frozen=True, eq=True, kw_only=True)
Expand Down
Loading
Loading