Skip to content

Commit

Permalink
Merge pull request #3572 from yuvipanda/ds-requests
Browse files Browse the repository at this point in the history
Include initContainers when calculating pod overhead
  • Loading branch information
yuvipanda authored Jan 5, 2024
2 parents 0c3180f + e295b8f commit 25ed0db
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 50 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,50 @@ def get_daemon_sets_requests():
info = []
for ds in daemon_sets:
name = ds["metadata"]["name"]
req_mem = req_cpu = lim_mem = lim_cpu = 0
# From https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers
# > - The highest of any particular resource request or limit defined on
# > all init containers is the effective init request/limit. If any
# > resource has no resource limit specified this is considered as the
# > highest limit.
# > - The Pod's effective request/limit for a resource is the higher of:
# > - the sum of all app containers request/limit for a resource
# > - the effective init request/limit for a resource
#
# So we have to calculate the requests of the init containers and containers separately,
# and take the max as the effective request / limit

container_req_mem = (
container_req_cpu
) = container_lim_mem = container_lim_cpu = 0
init_container_req_mem = (
init_container_req_cpu
) = init_container_lim_mem = init_container_lim_cpu = 0

for c in ds["spec"]["template"]["spec"]["containers"]:
resources = c.get("resources", {})
requests = resources.get("requests", {})
limits = resources.get("limits", {})
req_mem += parse_quantity(requests.get("memory", 0))
lim_mem += parse_quantity(limits.get("memory", 0))
req_cpu += parse_quantity(requests.get("cpu", 0))
lim_cpu += parse_quantity(limits.get("cpu", 0))
container_req_mem += parse_quantity(requests.get("memory", 0))
container_lim_mem += parse_quantity(limits.get("memory", 0))
container_req_cpu += parse_quantity(requests.get("cpu", 0))
container_lim_cpu += parse_quantity(limits.get("cpu", 0))

for c in ds["spec"]["template"]["spec"].get("initContainers", []):
resources = c.get("resources", {})
requests = resources.get("requests", {})
limits = resources.get("limits", {})
init_container_req_mem += parse_quantity(requests.get("memory", 0))
init_container_lim_mem += parse_quantity(limits.get("memory", 0))
init_container_req_cpu += parse_quantity(requests.get("cpu", 0))
init_container_lim_cpu += parse_quantity(limits.get("cpu", 0))

info.append(
{
"name": name,
"cpu_request": float(req_cpu),
"cpu_limit": float(lim_cpu),
"memory_request": int(req_mem),
"memory_limit": int(lim_mem),
"cpu_request": float(max(container_req_cpu, init_container_req_cpu)),
"cpu_limit": float(max(container_lim_cpu, init_container_lim_cpu)),
"memory_request": int(max(container_req_mem, init_container_req_mem)),
"memory_limit": int(max(container_lim_mem, init_container_lim_mem)),
}
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
gke:
2i2c:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
other_daemon_sets: binder-staging-dind,binder-staging-image-cleaner,imagebuilding-demo-binderhub-service-docker-api
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.4-gke.900
Expand All @@ -31,7 +31,7 @@ gke:
other_daemon_sets: ""
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.4-gke.900
k8s_version: v1.27.7-gke.1056000
awi-ciroh:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
Expand All @@ -43,25 +43,25 @@ gke:
other_daemon_sets: ""
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.4-gke.900
k8s_version: v1.27.7-gke.1056000
catalystproject-latam:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 338m
memory_requests: 496Mi
k8s_version: v1.27.3-gke.100
k8s_version: v1.27.7-gke.1056000
cloudbank:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: continuous-image-puller,continuous-image-puller,continuous-image-puller,netd
cpu_requests: 342m
memory_requests: 566Mi
k8s_version: v1.26.5-gke.2100
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.5-gke.200
hhmi:
requesting_daemon_sets: fluentbit-gke,gke-metadata-server,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 228m
memory_requests: 480Mi
k8s_version: v1.27.3-gke.100
k8s_version: v1.27.7-gke.1056000
leap:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
Expand All @@ -81,88 +81,94 @@ gke:
memory_requests: 580Mi
k8s_version: v1.27.4-gke.900
pangeo-hubs:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: netd
cpu_requests: 342m
memory_requests: 566Mi
k8s_version: v1.26.5-gke.2100
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,gke-metrics-agent,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 344m
memory_requests: 596Mi
k8s_version: v1.27.5-gke.200
qcl:
requesting_daemon_sets: calico-node,fluentbit-gke,gke-metadata-server,ip-masq-agent,netd,pdcsi-node,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 338m
memory_requests: 496Mi
k8s_version: v1.27.4-gke.900
k8s_version: v1.27.7-gke.1056000
eks:
2i2c-aws-us:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
catalystproject-africa:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.27.4-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
gridsst:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
jupyter-meets-the-earth:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
nasa-cryo:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
nasa-esdis:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.27.8-eks-8cb36c9
nasa-ghg:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.27.4-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
nasa-veda:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
openscapes:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.24.16-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
smithsonian:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
ubc-eoas:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.24.17-eks-f8587cb
k8s_version: v1.27.8-eks-8cb36c9
victor:
requesting_daemon_sets: aws-node,ebs-csi-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: ""
cpu_requests: 170m
memory_requests: 250Mi
k8s_version: v1.25.12-eks-2d98532
k8s_version: v1.27.8-eks-8cb36c9
aks:
utoronto:
requesting_daemon_sets: cloud-node-manager,csi-azuredisk-node,csi-azurefile-node,kube-proxy,support-cryptnono,support-prometheus-node-exporter
other_daemon_sets: calico-node,continuous-image-puller,continuous-image-puller,continuous-image-puller,continuous-image-puller
other_daemon_sets: calico-node
cpu_requests: 226m
memory_requests: 300Mi
k8s_version: v1.26.3
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,12 @@
"memory": 130451771392
},
"measured_overhead": {
"cpu": 0.165,
"memory": 157286400
"cpu": 0.17,
"memory": 262144000
},
"available": {
"cpu": 15.725,
"memory": 130294484992
"cpu": 15.72,
"memory": 130189627392
}
},
"n2-highmem-32": {
Expand Down
40 changes: 31 additions & 9 deletions deployer/commands/generate/resource_allocation/update_nodeinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,42 @@ def get_node_capacity_info(instance_type: str):
mem_available = mem_allocatable

for p in pods:
mem_request = 0
cpu_request = 0
# Iterate through all the containers in the pod, and count the memory & cpu requests
# they make. We don't count initContainers' requests as they don't overlap with the
# container requests at any point.
# From https://kubernetes.io/docs/concepts/workloads/pods/init-containers/#resource-sharing-within-containers
# > - The highest of any particular resource request or limit defined on
# > all init containers is the effective init request/limit. If any
# > resource has no resource limit specified this is considered as the
# > highest limit.
# > - The Pod's effective request/limit for a resource is the higher of:
# > - the sum of all app containers request/limit for a resource
# > - the effective init request/limit for a resource
#
# So we have to calculate the requests of the init containers and containers separately,
# and take the max as the effective request / limit
container_cpu_request = container_mem_request = 0
init_container_cpu_request = init_container_mem_request = 0

for c in p["spec"]["containers"]:
mem_request += parse_quantity(
container_mem_request += parse_quantity(
c.get("resources", {}).get("requests", {}).get("memory", "0")
)
container_cpu_request += parse_quantity(
c.get("resources", {}).get("requests", {}).get("cpu", "0")
)

for c in p["spec"].get("initContainers", []):
init_container_mem_request += parse_quantity(
c.get("resources", {}).get("requests", {}).get("memory", "0")
)
cpu_request += parse_quantity(
init_container_cpu_request += parse_quantity(
c.get("resources", {}).get("requests", {}).get("cpu", "0")
)
cpu_available -= cpu_request
mem_available -= mem_request

print(
p["metadata"]["name"],
max(init_container_mem_request, container_mem_request),
)
cpu_available -= max(container_cpu_request, init_container_cpu_request)
mem_available -= max(container_mem_request, init_container_mem_request)

return {
# CPU units are in fractions, while memory units are bytes
Expand Down

0 comments on commit 25ed0db

Please sign in to comment.