From 0f04a198141c0cc0a6e6f76cb89f49813225edab Mon Sep 17 00:00:00 2001 From: Ashkan Mirzaee Date: Wed, 8 Dec 2021 16:48:50 -0600 Subject: [PATCH] Add a new report option to sbox --- README.md | 3 ++- bin/sbox | 31 +++++++++++++++++++++++++++---- docs/_static/docs-generator.sh | 4 ++-- docs/sbox.rst | 6 ++++-- share/man/man1/interactive.1 | 3 ++- share/man/man1/sbox.1 | 14 ++++++++++---- 6 files changed, 47 insertions(+), 14 deletions(-) diff --git a/README.md b/README.md index f8e2016..97a5edd 100644 --- a/README.md +++ b/README.md @@ -53,7 +53,8 @@ Sbox is a toolbox for Slurm that provides information about users' accounts and - `--whodat`: Show users informations by UID. It uses `ldapsearch` command and returns empty output if the cluster does not use LDAP. - `--whodat2`: Show users informations by name. It uses `ldapsearch`command and returns empty output if the cluster does not use LDAP. - `--agent`: Start, stop and list user's ssh-agents on the current host. It requires one of the start/stop/list options as an argument. Use `ssh -o StrictHostKeyChecking=no` to disable asking for host key acceptances. -- `--report`: Show the cluster utilization report. It uses `sinfo` and `squeue` commands. +- `--report`: Show current cluster utilization based on the running jobs. It uses slurm `sinfo` and `squeue` commands. +- `--report2`: Show cluster utilization based on ended jobs in last day. It uses Slurm `sacct` command. **Examples** diff --git a/bin/sbox b/bin/sbox index 46505ad..49f21e3 100755 --- a/bin/sbox +++ b/bin/sbox @@ -11,6 +11,7 @@ import os import re import sys import json +import time import pathlib import argparse @@ -44,10 +45,10 @@ parser.add_argument('-p', '--partition', action = 'store_true', help = 'show par parser.add_argument('-u', '--user', default = user, help = 'user id', metavar = 'UID') parser.add_argument('-v', '--version', action = 'version', version = '%(prog)s 1.2') parser.add_argument('--eff', type = int, help = 'show efficiency of a job', metavar = 'JOBID') -parser.add_argument('--history', choices = ['day','week','month','year'], help = 'show jobs history for last day/week/month/year') +parser.add_argument('--history', choices = ['day','week','month','year'], help = 'show jobs history') parser.add_argument('--pending', action = 'store_true', help = 'show pending jobs') parser.add_argument('--running', action = 'store_true', help = 'show running jobs') -parser.add_argument('--cancel', help = 'cancel jobs by a single id or a comma separated list of ids', metavar = 'JOBID') +parser.add_argument('--cancel', help = 'cancel jobs by job id', metavar = 'JOBID') parser.add_argument('--qos', action = 'store_true', help = 'show quality of services') parser.add_argument('--quota', action = 'store_true', help = 'show quotas') parser.add_argument('--ncpu', action = 'store_true', help = 'show number of available cpus') @@ -58,8 +59,9 @@ parser.add_argument('--reserve', action = 'store_true', help = 'show reservation parser.add_argument('--topusage', action = 'store_true', help = 'show top usage users') parser.add_argument('--whodat', help = 'show users informations by uid', metavar = 'UID') parser.add_argument('--whodat2', help = 'show users informations by name', metavar = 'UNAME') -parser.add_argument('--agent', choices = ['start','stop','list'], help = 'start/stop/list ssh-agents on the current host') -parser.add_argument('--report', action = 'store_true', help = 'show the cluster utilization report') +parser.add_argument('--agent', choices = ['start','stop','list'], help = 'start/stop/list ssh-agents on a host') +parser.add_argument('--report', action = 'store_true', help = 'show current cluster utilization') +parser.add_argument('--report2', action = 'store_true', help = 'show cluster utilization in last 24 hours') args = parser.parse_args() if len(sys.argv) == 1: @@ -392,3 +394,24 @@ if args.report: gpu_util=`(date +"%F"| tr "\n" " " ; echo "{g} gpu" | tr "\n" " " ; sinfo -p {g} -o %n,%G | grep -Po '(?<=:)\d' | awk '{{ sum1 += $1 }} END {{ print sum1 }}' | tr "\n" " " ; squeue -O jobid,partition,gres,state,username | grep RUNNING | grep -i {g} | awk '{{ print $3 }}' | awk 'BEGIN {{ FS=":" }} {{ total+=$2 }} END {{ print total }}')` (echo $gpu_util | tr "\n" " " ; if [ -z `echo $gpu_util | awk '{{ print $5 }}'` ]; then echo "0 0"; else echo `echo $gpu_util | awk '{{ print $5/$4 }}'`; fi) | tr " " "," """) + +if args.report2: + if len(part_cpu) == 0: + print('No CPU partition found in the config file!') + sys.exit(1) + + print('date,partition,total_corehour,allocation_corehour,utilization') + for c in part_cpu: + tocpu = os.popen(f"sinfo --partition {c} --Node --format %C | cut --delimiter '/' --fields 4 | awk '{{ sum1 += $1 }} END {{ print sum1 }}'").read().strip() + data_ = os.popen(f"sacct --partition {c} --allusers --parsable2 --noheader --allocation --duplicates --format partition,start,end,ncpus --state CANCELLED,COMPLETED,FAILED,NODE_FAIL,PREEMPTED,TIMEOU --starttime $(date --date='1 day ago' +'%Y-%m-%d-%H:%M') --endtime $(date +'%Y-%m-%d-%H:%M')").read().strip().split("\n") + if len(data_[0]) > 0: + tmcpu = [] + for i in data_: + rw_ = i.split('|') + tm1 = time.strptime(rw_[1], '%Y-%m-%dT%H:%M:%S') + tm2 = time.strptime(rw_[2], '%Y-%m-%dT%H:%M:%S') + dif = time.mktime(tm2) - time.mktime(tm1) # running time per second + tmcpu.append(int(rw_[3]) * int(dif)) + else: + tmcpu = [0] + print(time.strftime('%Y-%m-%d'), c, round(int(tocpu)*24), round(sum(tmcpu)/3600), round(sum(tmcpu)/(int(tocpu)*86400),2), sep = ',') diff --git a/docs/_static/docs-generator.sh b/docs/_static/docs-generator.sh index 36ccdcf..5ba7d34 100644 --- a/docs/_static/docs-generator.sh +++ b/docs/_static/docs-generator.sh @@ -4,10 +4,10 @@ ## Create an env for Sphinx and Pandoc #conda create -n sphinx -c conda-forge sphinx pandoc -conda activate sphinx +source activate sphinx ## Build the local html for preview at ../_build/html/ -make -C ../ html +#make -C ../ html ## RST Docs awk "/## Sbox/,/## Quick install/" ../../README.md | head -n -1 > sbox.md diff --git a/docs/sbox.rst b/docs/sbox.rst index 9457756..3cb6435 100644 --- a/docs/sbox.rst +++ b/docs/sbox.rst @@ -76,8 +76,10 @@ Command line options host. It requires one of the start/stop/list options as an argument. Use ``ssh -o StrictHostKeyChecking=no`` to disable asking for host key acceptances. -- ``--report``: Show the cluster utilization report. It uses ``sinfo`` - and ``squeue`` commands. +- ``--report``: Show current cluster utilization based on the running + jobs. It uses slurm ``sinfo`` and ``squeue`` commands. +- ``--report2``: Show cluster utilization based on ended jobs in last + day. It uses Slurm ``sacct`` command. **Examples** diff --git a/share/man/man1/interactive.1 b/share/man/man1/interactive.1 index e0ff6d9..bca6e6d 100644 --- a/share/man/man1/interactive.1 +++ b/share/man/man1/interactive.1 @@ -1,7 +1,8 @@ -.TH SBOX "1" "November 2021" "SBOX 1.2" +.TH SBOX "1" "December 2021" "SBOX 1.2" .SH NAME interactive \- an alias for using cluster interactively .SH SYNOPSIS +interactive [-h] [-a] [-n] [-N] [-p] [-t] [-k] [-e] [-E] [-l] [-m] [-g] [{jupyter}] .br .SH DESCRIPTION .PP diff --git a/share/man/man1/sbox.1 b/share/man/man1/sbox.1 index 2c9073e..1fd2e3d 100644 --- a/share/man/man1/sbox.1 +++ b/share/man/man1/sbox.1 @@ -1,8 +1,8 @@ -.TH SBOX "1" "November 2021" "SBOX 1.2" +.TH SBOX "1" "December 2021" "SBOX 1.2" .SH NAME sbox \- a simple toolbox for Slurm .SH SYNOPSIS -sbox [-h] [-a] [-f] [-g] [-q] [-j JOBID] [-c] [-p] [-u UID] [-v] [--eff JOBID] [--history {day,week,month,year}] [--pending] [--running] [--cancel JOBID] [--qos] [--quota] [--ncpu] [--ngpu] [--gpu] [--license] [--reserve] [--topusage] [--whodat UID] [--whodat2 UNAME] [--agent {start,stop,list}] [--report] +sbox [-h] [-a] [-f] [-g] [-q] [-j JOBID] [-c] [-p] [-u UID] [-v] [--eff JOBID] [--history {day,week,month,year}] [--pending] [--running] [--cancel JOBID] [--qos] [--quota] [--ncpu] [--ngpu] [--gpu] [--license] [--reserve] [--topusage] [--whodat UID] [--whodat2 UNAME] [--agent {start,stop,list}] [--report] [--report2] .br .SH DESCRIPTION .PP @@ -145,8 +145,14 @@ Use ssh -o StrictHostKeyChecking=no to disable asking for host key acceptances. .TP .B --report -Show the cluster utilization report. -It uses sinfo and squeue commands. +Show current cluster utilization based on the +running jobs. +It uses slurm sinfo and squeue commands. +.TP +.B --report2 +Show cluster utilization based on ended jobs in +last day. +It uses Slurm sacct command. .PP Examples .PP