Skip to content

Commit

Permalink
Fix GT Phoenix runners
Browse files Browse the repository at this point in the history
  • Loading branch information
henryleberre committed Jan 14, 2024
1 parent ed8dd7a commit 726d993
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 13 deletions.
63 changes: 63 additions & 0 deletions .github/workflows/phoenix/submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#!/bin/bash

set -e

usage() {
echo "Usage: $0 [script.sh] [cpu|gpu]"
}

if [ ! -z "$1" ]; then
sbatch_script_contents=`cat $1`
else
usage
exit 1
fi

sbatch_cpu_opts="\
#SBATCH -p cpu-small # partition
#SBATCH --ntasks-per-node=24 # Number of cores per node required
#SBATCH --mem-per-cpu=2G # Memory per core\
"

sbatch_gpu_opts="\
#SBATCH -CV100-16GB
#SBATCH -G2\
"

if [ "$2" == "cpu" ]; then
sbatch_device_opts="$sbatch_cpu_opts"
elif [ "$2" == "gpu" ]; then
sbatch_device_opts="$sbatch_gpu_opts"
else
usage
exit 1
fi

job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"

sbatch <<EOT
#!/bin/bash
#SBATCH -Jshb-$job_slug # Job name
#SBATCH --account=gts-sbryngelson3 # charge account
#SBATCH -N1 # Number of nodes required
$sbatch_device_opts
#SBATCH -t 04:00:00 # Duration of the job (Ex: 15 mins)
#SBATCH -q embers # QOS Name
#SBATCH -o$job_slug.out # Combined output and error messages file
#SBATCH -W # Do not exit until the submitted job terminates.
set -e
set -x
cd "\$SLURM_SUBMIT_DIR"
echo "Running in $(pwd):"
job_slug="$job_slug"
job_device="$2"
. ./mfc.sh load -c p -m $2
$sbatch_script_contents
EOT

20 changes: 20 additions & 0 deletions .github/workflows/phoenix/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

build_opts=""
if [ "$job_device" == "gpu" ]; then
build_opts="--gpu"
fi

./mfc.sh build -j 8 $build_opts

n_test_threads=8

if [ "$job_device" == "gpu" ]; then
gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node
gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
device_opts="-g $gpu_ids"
n_test_threads=`expr $gpu_count \* 2`
fi

./mfc.sh test -a -j $n_test_threads $device_opts -- -c phoenix

21 changes: 8 additions & 13 deletions .github/workflows/ci.yml → .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,18 +114,13 @@ jobs:
- name: Clone
uses: actions/checkout@v3

- name: Build
run: |
. ./mfc.sh load -c p -m gpu
./mfc.sh build -j 2 $(if [ '${{ matrix.device }}' == 'gpu' ]; then echo '--gpu'; fi)
- name: Build & Test
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}

- name: Test
run: |
. ./mfc.sh load -c p -m gpu
mv misc/run-phoenix-release-${{ matrix.device }}.sh ./
sbatch run-phoenix-release-${{ matrix.device }}.sh
- name: Archive Logs
uses: actions/upload-artifact@v3
if: always()
with:
name: logs
path: test-${{ matrix.device }}.out

- name: Print
if: always()
run: |
cat test.out

0 comments on commit 726d993

Please sign in to comment.