Skip to content

Commit

Permalink
better drain handling
Browse files Browse the repository at this point in the history
  • Loading branch information
albertz committed Dec 9, 2024
1 parent 909b1d0 commit 1c56e31
Showing 1 changed file with 6 additions and 2 deletions.
8 changes: 6 additions & 2 deletions users/zeyer/slurm_tools/report_gpus.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ def main():
total = defaultdict(int)
alloc = defaultdict(int)
reserved = defaultdict(int)
drain = defaultdict(int)
down = defaultdict(int)
for _, node_info in nodes_info.items():
for partition in node_info.get("Partitions", "").split(","):
Expand All @@ -53,7 +54,9 @@ def main():
key = (count_arg, partition)
if "RESERVED" in state_flags:
reserved[key] += node_total_count
elif state in {"ALLOCATED", "IDLE", "MIXED"} and "DRAIN" not in state_flags:
elif "DRAIN" in state_flags:
drain[key] += node_total_count
elif state in {"ALLOCATED", "IDLE", "MIXED"}:
total[key] += node_total_count
alloc[key] += node_alloc_count
else:
Expand All @@ -63,10 +66,11 @@ def main():
total_ = total[key]
alloc_ = alloc[key]
reserved_ = reserved[key]
drain_ = drain[key]
down_ = down[key]
print(
f"Count {key}: {alloc_}/{total_} used, {total_ - alloc_}/{total_} free,"
f" {reserved_} reserved, {down_} down"
f" {drain_} drain, {reserved_} reserved, {down_} down"
)


Expand Down

0 comments on commit 1c56e31

Please sign in to comment.