Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add script to generate resource allocation (nodeshare) choices #3030

Merged
merged 12 commits into from
Oct 24, 2023
5 changes: 4 additions & 1 deletion deployer/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
import deployer.commands.grafana.tokens # noqa: F401
import deployer.commands.validate.config # noqa: F401
import deployer.keys.decrypt_age # noqa: F401
from deployer.cli_app import app
import deployer.resource_allocation.generate_choices # noqa: F401
import deployer.resource_allocation.update_nodeinfo # noqa: F401

from .cli_app import app


def main():
Expand Down
Empty file.
130 changes: 130 additions & 0 deletions deployer/resource_allocation/generate_choices.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
import json
import sys
from enum import Enum
from pathlib import Path

import typer
from ruamel.yaml import YAML

from ..cli_app import app

yaml = YAML(typ="rt")

HERE = Path(__file__).parent


class ResourceAllocationStrategies(str, Enum):
PROPORTIONAL_MEMORY_STRATEGY = "proportional-memory-strategy"


def proportional_memory_strategy(
instance_type: str, nodeinfo: dict, num_allocations: int
):
"""
Generate choices for resource allocation based on proportional changes to memory

Used primarily in research cases where:
1. Workloads are more memory constrained than CPU constrained
2. End users can be expected to select appropriate amount of memory they need for a given
workload, either by their own intrinsic knowledge or instructed by an instructor.

It features:
1. No memory overcommit at all, as end users are expected to ask for as much memory as
they need.
2. CPU *guarantees* are proportional to amount of memory guarantee - the more memory you
ask for, the more CPU you are guaranteed. This allows end users to pick resources purely
based on memory only, simplifying the mental model. Also allows for maximum packing of
user pods onto a node, as we will *not* run out of CPU on a node before running out of
memory.
3. No CPU limits at all, as CPU is a more flexible resource. The CPU guarantee will ensure
that users will not be starved of CPU.
4. Each choice the user can make approximately has half as many resources as the next largest
choice, with the largest being a full node. This offers a decent compromise - if you pick
the largest option, you will most likely have to wait for a full node spawn, while smaller
options are much more likely to be shared.
"""

# We operate on *available* memory, which already accounts for system components (like kubelet & systemd)
# as well as daemonsets we run on every node. This represents the resources that are available
# for user pods.

# FIXME: Add some more more wiggle room here
available_node_mem = nodeinfo["available"]["memory"]
available_node_cpu = nodeinfo["available"]["cpu"]

# We always start from the top, and provide a choice that takes up the whole node.
mem_limit = available_node_mem

choices = {}
for i in range(num_allocations):
# CPU guarantee is proportional to the memory limit for this particular choice.
# This makes sure we utilize all the memory on a node all the time.
cpu_guarantee = (mem_limit / available_node_mem) * available_node_cpu

# Memory is in bytes, let's convert it to GB to display
mem_display = f"{mem_limit / 1024 / 1024 / 1024:.1f}"
display_name = f"{mem_display} GB RAM, upto {available_node_cpu} CPUs"

choice = {
"display_name": display_name,
"kubespawner_override": {
# Guarantee and Limit are the same - this strategy has no oversubscription
"mem_guarantee": int(mem_limit),
"mem_limit": int(mem_limit),
"cpu_guarantee": cpu_guarantee,
# CPU limit is set to entire available CPU of the node, making sure no single
# user can starve the node of critical kubelet / systemd resources.
# Leaving it unset sets it to same as guarantee, which we do not want.
"cpu_limit": available_node_cpu,
# Explicitly set node_selector here, so the output can be easily combined
# multiple times, with multiple instance types
"node_selector": {"node.kubernetes.io/instance-type": instance_type},
},
}

# Use the amount of RAM made available as a slug, to allow combining choices from
# multiple instance types in the same profile. This does mean you can not have
# the same RAM allocation from multiple node selectors. But that's a feature, not a bug.
choices[f"mem_{mem_display.replace('.', '_')}"] = choice

# Halve the mem_limit for the next choice
mem_limit = mem_limit / 2

# Reverse the choices so the smallest one is first
choices = dict(reversed(choices.items()))

# Make the smallest choice the default explicitly
choices[list(choices.keys())[0]]["default"] = True

return choices


@app.command()
def generate_resource_allocation_choices(
instance_type: str = typer.Argument(
..., help="Instance type to generate Resource Allocation options for"
),
num_allocations: int = typer.Option(5, help="Number of choices to generate"),
strategy: ResourceAllocationStrategies = typer.Option(
ResourceAllocationStrategies.PROPORTIONAL_MEMORY_STRATEGY,
help="Strategy to use for generating resource allocation choices choices",
),
):
with open(HERE / "node-capacity-info.json") as f:
nodeinfo = json.load(f)

if instance_type not in nodeinfo:
print(
f"Capacity information about {instance_type} not available", file=sys.stderr
)
print("TODO: Provide information on how to update it", file=sys.stderr)
sys.exit(1)

# Call appropriate function based on what strategy we want to use
if strategy == ResourceAllocationStrategies.PROPORTIONAL_MEMORY_STRATEGY:
choices = proportional_memory_strategy(
instance_type, nodeinfo[instance_type], num_allocations
)
else:
raise ValueError(f"Strategy {strategy} is not currently supported")
yaml.dump(choices, sys.stdout)
102 changes: 102 additions & 0 deletions deployer/resource_allocation/node-capacity-info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
{
"r5.xlarge": {
"capacity": {
"cpu": 4.0,
"memory": 33186611200
},
"allocatable": {
"cpu": 3.92,
"memory": 32145375232
},
"measured_overhead": {
"cpu": 0.17,
"memory": 262144000
},
"available": {
"cpu": 3.75,
"memory": 31883231232
}
},
"r5.16xlarge": {
"capacity": {
"cpu": 64.0,
"memory": 535146246144
},
"available": {
"cpu": 63.6,
"memory": 526011052032
}
},
"n2-highmem-4": {
"capacity": {
"cpu": 4.0,
"memory": 33672949760
},
"allocatable": {
"cpu": 3.92,
"memory": 29786927104
},
"measured_overhead": {
"cpu": 0.435,
"memory": 488636416
},
"available": {
"cpu": 3.485,
"memory": 29298290688
}
},
"r5.4xlarge": {
"capacity": {
"cpu": 16.0,
"memory": 133545017344
},
"allocatable": {
"cpu": 15.89,
"memory": 130473738240
},
"measured_overhead": {
"cpu": 0.17,
"memory": 262144000
},
"available": {
"cpu": 15.72,
"memory": 130211594240
}
},
"n2-highmem-32": {
"capacity": {
"cpu": 32.0,
"memory": 270473359360
},
"allocatable": {
"cpu": 31.85,
"memory": 257783492608
},
"measured_overhead": {
"cpu": 0.426,
"memory": 457179136
},
"available": {
"cpu": 31.424,
"memory": 257326313472
}
},
"n1-highmem-4": {
"capacity": {
"cpu": 4.0,
"memory": 27328200704
},
"allocatable": {
"cpu": 3.92,
"memory": 23829102592
},
"measured_overhead": {
"cpu": 0.441,
"memory": 593494016
},
"available": {
"cpu": 3.479,
"memory": 23235608576
}
}
}
Loading