|
5 | 5 | import os |
6 | 6 | import random |
7 | 7 | import string |
8 | | -from collections.abc import AsyncIterator, Sequence |
| 8 | +from collections.abc import AsyncIterator, Mapping, Sequence |
9 | 9 | from datetime import timedelta |
10 | 10 | from pathlib import PurePosixPath |
11 | 11 | from typing import Protocol, TypeVar, cast |
|
20 | 20 |
|
21 | 21 | import renku_data_services.notebooks.image_check as ic |
22 | 22 | from renku_data_services.app_config import logging |
23 | | -from renku_data_services.base_models import AnonymousAPIUser, APIUser, AuthenticatedAPIUser |
| 23 | +from renku_data_services.base_models import RESET, AnonymousAPIUser, APIUser, AuthenticatedAPIUser, ResetType |
24 | 24 | from renku_data_services.base_models.metrics import MetricsService |
25 | 25 | from renku_data_services.connected_services.db import ConnectedServicesRepository |
26 | 26 | from renku_data_services.crc.db import ClusterRepository, ResourcePoolRepository |
|
54 | 54 | Authentication, |
55 | 55 | AuthenticationType, |
56 | 56 | Culling, |
| 57 | + CullingPatch, |
57 | 58 | DataSource, |
58 | 59 | ExtraContainer, |
59 | 60 | ExtraVolume, |
|
69 | 70 | Requests, |
70 | 71 | RequestsStr, |
71 | 72 | Resources, |
| 73 | + ResourcesPatch, |
72 | 74 | SecretAsVolume, |
73 | 75 | SecretAsVolumeItem, |
74 | 76 | Session, |
|
91 | 93 | ) |
92 | 94 | from renku_data_services.notebooks.utils import ( |
93 | 95 | node_affinity_from_resource_class, |
| 96 | + node_affinity_patch_from_resource_class, |
94 | 97 | tolerations_from_resource_class, |
95 | 98 | ) |
96 | 99 | from renku_data_services.project.db import ProjectRepository, ProjectSessionSecretRepository |
@@ -462,6 +465,21 @@ async def request_session_secret_creation( |
462 | 465 | ) |
463 | 466 |
|
464 | 467 |
|
| 468 | +def resources_patch_from_resource_class(resource_class: ResourceClass) -> ResourcesPatch: |
| 469 | + """Convert the resource class to a k8s resources spec.""" |
| 470 | + gpu_name = GpuKind.NVIDIA.value + "/gpu" |
| 471 | + resources = resources_from_resource_class(resource_class) |
| 472 | + requests: Mapping[str, Requests | RequestsStr | ResetType] | ResetType |
| 473 | + limits: Mapping[str, Limits | LimitsStr | ResetType] | ResetType |
| 474 | + defaul_requests = {"memory": RESET, "cpu": RESET, gpu_name: RESET} |
| 475 | + default_limits = {"memory": RESET, "cpu": RESET, gpu_name: RESET} |
| 476 | + if resources.requests: |
| 477 | + requests = RESET if len(resources.requests.keys()) == 0 else {**defaul_requests, **resources.requests} |
| 478 | + if resources.limits: |
| 479 | + limits = RESET if len(resources.limits.keys()) == 0 else {**default_limits, **resources.limits} |
| 480 | + return ResourcesPatch(requests=requests, limits=limits) |
| 481 | + |
| 482 | + |
465 | 483 | def resources_from_resource_class(resource_class: ResourceClass) -> Resources: |
466 | 484 | """Convert the resource class to a k8s resources spec.""" |
467 | 485 | requests: dict[str, Requests | RequestsStr] = { |
@@ -528,6 +546,31 @@ def get_culling( |
528 | 546 | ) |
529 | 547 |
|
530 | 548 |
|
| 549 | +def get_culling_patch( |
| 550 | + user: AuthenticatedAPIUser | AnonymousAPIUser, resource_pool: ResourcePool, nb_config: NotebooksConfig |
| 551 | +) -> CullingPatch: |
| 552 | + """Get the patch for the culling durations of a session.""" |
| 553 | + culling = get_culling(user, resource_pool, nb_config) |
| 554 | + patch = CullingPatch( |
| 555 | + maxAge=RESET, |
| 556 | + maxFailedDuration=RESET, |
| 557 | + maxHibernatedDuration=RESET, |
| 558 | + maxIdleDuration=RESET, |
| 559 | + maxStartingDuration=RESET, |
| 560 | + ) |
| 561 | + if culling.maxAge: |
| 562 | + patch.maxAge = culling.maxAge |
| 563 | + if culling.maxFailedDuration: |
| 564 | + patch.maxFailedDuration = culling.maxFailedDuration |
| 565 | + if culling.maxHibernatedDuration: |
| 566 | + patch.maxHibernatedDuration = culling.maxHibernatedDuration |
| 567 | + if culling.maxIdleDuration: |
| 568 | + patch.maxIdleDuration = culling.maxIdleDuration |
| 569 | + if culling.maxStartingDuration: |
| 570 | + patch.maxStartingDuration = culling.maxStartingDuration |
| 571 | + return patch |
| 572 | + |
| 573 | + |
531 | 574 | async def __requires_image_pull_secret(nb_config: NotebooksConfig, image: str, internal_gitlab_user: APIUser) -> bool: |
532 | 575 | """Determines if an image requires a pull secret based on its visibility and their GitLab access token.""" |
533 | 576 |
|
@@ -1030,29 +1073,39 @@ async def patch_session( |
1030 | 1073 | ) |
1031 | 1074 | ) |
1032 | 1075 | rp = await rp_repo.get_resource_pool_from_class(user, body.resource_class_id) |
| 1076 | + try: |
| 1077 | + old_rp = await rp_repo.get_resource_pool_from_class(user, session.resource_class_id()) |
| 1078 | + except (errors.MissingResourceError, errors.UnauthorizedError, errors.ForbiddenError): |
| 1079 | + old_rp = None |
1033 | 1080 | rc = rp.get_resource_class(body.resource_class_id) |
1034 | 1081 | if not rc: |
1035 | 1082 | raise errors.MissingResourceError( |
1036 | 1083 | message=f"The resource class you requested with ID {body.resource_class_id} does not exist" |
1037 | 1084 | ) |
1038 | | - # TODO: reject session classes which change the cluster |
| 1085 | + if old_rp is not None and rp.cluster != old_rp.cluster: |
| 1086 | + raise errors.ValidationError(message="Changing resource pools with different clusters is not allowed.") |
1039 | 1087 | if not patch.metadata: |
1040 | 1088 | patch.metadata = AmaltheaSessionV1Alpha1MetadataPatch() |
1041 | | - # Patch the resource class ID in the annotations |
| 1089 | + # Patch the resource pool and class ID in the annotations |
| 1090 | + patch.metadata.annotations = {"renku.io/resource_pool_id": str(rp.id)} |
1042 | 1091 | patch.metadata.annotations = {"renku.io/resource_class_id": str(body.resource_class_id)} |
1043 | 1092 | if not patch.spec.session: |
1044 | 1093 | patch.spec.session = AmaltheaSessionV1Alpha1SpecSessionPatch() |
1045 | | - patch.spec.session.resources = resources_from_resource_class(rc) |
| 1094 | + patch.spec.session.resources = resources_patch_from_resource_class(rc) |
1046 | 1095 | # Tolerations |
1047 | 1096 | tolerations = tolerations_from_resource_class(rc, nb_config.sessions.tolerations_model) |
1048 | 1097 | patch.spec.tolerations = tolerations |
1049 | 1098 | # Affinities |
1050 | | - patch.spec.affinity = node_affinity_from_resource_class(rc, nb_config.sessions.affinity_model) |
| 1099 | + patch.spec.affinity = node_affinity_patch_from_resource_class(rc, nb_config.sessions.affinity_model) |
1051 | 1100 | # Priority class (if a quota is being used) |
1052 | | - patch.spec.priorityClassName = rc.quota |
1053 | | - patch.spec.culling = get_culling(user, rp, nb_config) |
| 1101 | + if rc.quota is None: |
| 1102 | + patch.spec.priorityClassName = RESET |
| 1103 | + patch.spec.culling = get_culling_patch(user, rp, nb_config) |
| 1104 | + # Service account name |
1054 | 1105 | if rp.cluster is not None: |
1055 | | - patch.spec.service_account_name = rp.cluster.service_account_name |
| 1106 | + patch.spec.service_account_name = ( |
| 1107 | + rp.cluster.service_account_name if rp.cluster.service_account_name is not None else RESET |
| 1108 | + ) |
1056 | 1109 |
|
1057 | 1110 | # If the session is being hibernated we do not need to patch anything else that is |
1058 | 1111 | # not specifically called for in the request body, we can refresh things when the user resumes. |
@@ -1126,6 +1179,8 @@ async def patch_session( |
1126 | 1179 | if image_pull_secret: |
1127 | 1180 | session_extras.concat(SessionExtraResources(secrets=[image_pull_secret])) |
1128 | 1181 | patch.spec.imagePullSecrets = [ImagePullSecret(name=image_pull_secret.name, adopt=image_pull_secret.adopt)] |
| 1182 | + else: |
| 1183 | + patch.spec.imagePullSecrets = RESET |
1129 | 1184 |
|
1130 | 1185 | # Construct session patch |
1131 | 1186 | patch.spec.extraContainers = _make_patch_spec_list( |
|
0 commit comments