From f346542db04b0cebf93578db14e5bab4b797e5da Mon Sep 17 00:00:00 2001 From: Naved Ansari Date: Tue, 6 Aug 2024 13:35:38 -0400 Subject: [PATCH 1/2] Set minimum size of a container This prevents people from explicity setting memory or cpu request to 0 for their containers. While such pods will be the first to be kicked out when the cluster is low on resources, they will continue to run for free at other times. With this in place the scheduler will reject the container from running with an appropriate message to the user. --- k8s/base/limits.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/k8s/base/limits.json b/k8s/base/limits.json index 78c9d29..4ba8c90 100644 --- a/k8s/base/limits.json +++ b/k8s/base/limits.json @@ -10,6 +10,10 @@ "cpu": "1", "memory": "512Mi", "nvidia.com/gpu": "0" + }, + "min": { + "cpu": "125m", + "memory": "256Mi" } } ] From 73b793146ff0fa26246ac433f9216268ff8b0afb Mon Sep 17 00:00:00 2001 From: Naved Ansari Date: Tue, 6 Aug 2024 13:43:03 -0400 Subject: [PATCH 2/2] Update the default limit and request for containers This sets it such that the CPU to Memory ratio aligns with our CPU SU definition of 1 CPU to 4 Gi of Memory. Note that I lowered the CPU limits and request instead of only increasing the memory. --- k8s/base/limits.json | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/k8s/base/limits.json b/k8s/base/limits.json index 4ba8c90..03f3292 100644 --- a/k8s/base/limits.json +++ b/k8s/base/limits.json @@ -2,13 +2,13 @@ { "type": "Container", "default": { - "cpu": "2", - "memory": "1024Mi", + "cpu": "1", + "memory": "4096Mi", "nvidia.com/gpu": "0" }, "defaultRequest": { - "cpu": "1", - "memory": "512Mi", + "cpu": "500m", + "memory": "2048Mi", "nvidia.com/gpu": "0" }, "min": {