Skip to content

Commit

Permalink
#49: GitHub CI Benchmarking
Browse files Browse the repository at this point in the history
  • Loading branch information
henryleberre committed Dec 15, 2023
1 parent 371c51a commit e4f861e
Show file tree
Hide file tree
Showing 14 changed files with 269 additions and 115 deletions.
66 changes: 66 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
name: 'Benchmark'

on:
pull_request:
paths:
- '**.f90'
- '**.fpp'
- '**.py'
- '**.yml'
- 'mfc.sh'
- 'CMakeLists.txt'
- 'requirements.txt'

jobs:
self:
name: Georgia Tech | Phoenix (NVHPC)
if: github.repository == 'MFlowCode/MFC'
strategy:
matrix:
device: ['cpu', 'gpu']
runs-on:
group: phoenix
labels: self-hosted
steps:
- name: Clone - PR
uses: actions/checkout@v3

- name: Bench - PR
run: |
bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}
mv bench-${{ matrix.device }}.out ~/bench-${{ matrix.device }}-pr.out
mv bench-${{ matrix.device }}.yaml ~/bench-${{ matrix.device }}-pr.yaml
- name: Clone - Master
uses: actions/checkout@v3
with:
repository: henryleberre/MFC
ref: master

- name: Bench - Master
run: |
bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}
mv bench-${{ matrix.device }}.out ~/bench-${{ matrix.device }}-master.out
mv bench-${{ matrix.device }}.yaml ~/bench-${{ matrix.device }}-master.yaml
- name: Post Comment
run: |
PR_COMMENT=`python3 .github/workflows/phoenix/compare.py ~/bench-${{ matrix.device }}-master.yaml ~/bench-${{ matrix.device }}-pr.yaml`
echo "Posting comment on PR @ ${{ github.event.issue.comments_url }}:"
echo "$PR_COMMENT"
curl \
-X POST \
${{ github.event.issue.comments_url }} \
-H "Content-Type: application/json" \
-H "Authorization: token ${{ secrets.GITHUB_TOKEN }}" \
--data "{\"body\":\"$PR_COMMENT\"}"
- name: Archive Logs
uses: actions/upload-artifact@v3
if: always()
with:
name: logs-${{ matrix.device }}
path: |
~/bench-${{ matrix.device }}-*
2 changes: 1 addition & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
password: ${{ secrets.DOCKER_PASSWORD }}

- name: Build & Publish thereto
uses: docker/build-push-action@v4
uses: docker/build-push-action@v3
with:
file: toolchain/Dockerfile
push: true
Expand Down
11 changes: 11 additions & 0 deletions .github/workflows/phoenix/bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

n_ranks=12

if [ "$job_device" == "gpu" ]; then
n_ranks=$(nvidia-smi -L | wc -l) # number of GPUs on node
gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1
device_opts="--gpu -g $gpu_ids"
fi

./mfc.sh bench "$job_slug.yaml" -j $(nproc) -b mpirun $device_opts -n $n_ranks
51 changes: 51 additions & 0 deletions .github/workflows/phoenix/compare.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python3

import argparse

import yaml

parser = argparse.ArgumentParser()
parser.add_argument('master', metavar="MASTER", type=str)
parser.add_argument('pr', metavar="PR", type=str)

args = parser.parse_args()

def load_cases(filepath):
return { case["name"]: case for case in yaml.safe_load(open(filepath))["cases"] }

master, pr = load_cases(args.master), load_cases(args.pr)

master_keys = set(master.keys())
pr_keys = set(pr.keys())

missing_cases = master_keys.symmetric_difference(pr_keys)

if len(missing_cases) > 0:
print("**Warning:** The following cases are **missing** from master or this PR:\n")

for case in missing_cases:
print(f" - {case}.")

print("")

speedups = {}

for case in master_keys.intersection(pr_keys):
speedups[case] = {
"pre_proess": pr[case]["pre_process"] / master[case]["pre_process"],
"simulation": pr[case]["simulation"] / master[case]["simulation"],
}

avg_speedup = sum([ speedups[case]["simulation"] for case in speedups ]) / len(speedups)

print(f"""\
**[Benchmark Results]** Compared to Master, this PR's `simulation` is on average **~{avg_speedup:0.2f}x faster**.
| **Case** | **Master** | **PR** | **Speedup** |
| -------- | ---------- | ------ | ----------- |\
""")

for case in sorted(speedups.keys()):
speedup = speedups[case]

print(f"| {case} | {master[case]['simulation']:0.2f}s | {pr[case]['simulation']:0.2f}s | {speedups[case]['simulation']:0.2f}x |")
59 changes: 59 additions & 0 deletions .github/workflows/phoenix/submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash

usage() {
echo "Usage: $0 [script.sh] [cpu|gpu]"
}

if [ ! -z "$1" ]; then
sbatch_script_contents=`cat $1`
else
usage
exit 1
fi

sbatch_cpu_opts="\
#SBATCH --ntasks-per-node=12 # Number of cores per node required
#SBATCH --mem-per-cpu=2G # Memory per core\
"

sbatch_gpu_opts="\
#SBATCH -CV100-16GB
#SBATCH -G2\
"

if [ "$2" == "cpu" ]; then
sbatch_device_opts="$sbatch_cpu_opts"
elif [ "$2" == "gpu" ]; then
sbatch_device_opts="$sbatch_gpu_opts"
else
usage
exit 1
fi

job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"

sbatch <<EOT
#!/bin/bash
#SBATCH -Jshb-$job_slug # Job name
#SBATCH --account=gts-sbryngelson3 # charge account
#SBATCH -N1 # Number of nodes required
$sbatch_device_opts
#SBATCH -t 04:00:00 # Duration of the job (Ex: 15 mins)
#SBATCH -q embers # QOS Name
#SBATCH -o$job_slug.out # Combined output and error messages file
#SBATCH -W # Do not exit until the submitted job terminates.
set -e
set -x
cd "\$SLURM_SUBMIT_DIR"
echo "Running in $(pwd):"
job_slug="$job_slug"
job_device="$2"
. ./mfc.sh load -c p -m $2
$sbatch_script_contents
EOT
19 changes: 19 additions & 0 deletions .github/workflows/phoenix/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash

build_opts=""
if [ "$job_device" == "gpu" ]; then
build_opts="--gpu"
fi

./mfc.sh build -j $(nproc) $build_opts

n_test_threads=$(nproc)

if [ "$job_device" == "gpu" ]; then
gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node
gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
device_opts="-g $gpu_ids"
n_test_threads=`expr $gpu_count \* 2`
fi

./mfc.sh test -a -b mpirun -j $n_test_threads $device_opts
21 changes: 7 additions & 14 deletions .github/workflows/ci.yml → .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -114,19 +114,12 @@ jobs:
- name: Clone
uses: actions/checkout@v3

- name: Build
run: |
. ./mfc.sh load -c p -m gpu
./mfc.sh build -j 2 $(if [ '${{ matrix.device }}' == 'gpu' ]; then echo '--gpu'; fi)
- name: Build & Test
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}

- name: Test
run: |
. ./mfc.sh load -c p -m gpu
mv misc/run-phoenix-release-${{ matrix.device }}.sh ./
sbatch run-phoenix-release-${{ matrix.device }}.sh
- name: Print
- name: Archive Logs
uses: actions/upload-artifact@v3
if: always()
run: |
cat test.out
with:
name: logs
path: test-${{ matrix.device }}.out
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<img src="https://zenodo.org/badge/doi/10.1016/j.cpc.2020.107396.svg" />
</a>
<a href="https://github.com/MFlowCode/MFC/actions">
<img src="https://github.com/MFlowCode/MFC/actions/workflows/ci.yml/badge.svg" />
<img src="https://github.com/MFlowCode/MFC/actions/workflows/test.yml/badge.svg" />
</a>
<a href="https://lbesson.mit-license.org/">
<img src="https://img.shields.io/badge/License-MIT-blue.svg" />
Expand Down
16 changes: 0 additions & 16 deletions misc/run-phoenix-release-cpu.sh

This file was deleted.

24 changes: 0 additions & 24 deletions misc/run-phoenix-release-gpu.sh

This file was deleted.

3 changes: 3 additions & 0 deletions toolchain/bench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- name: 1D_bubblescreen
path: examples/1D_bubblescreen/case.py
args: []
4 changes: 3 additions & 1 deletion toolchain/mfc/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ def add_common_arguments(p, mask = None):
run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.")

# === BENCH ===
add_common_arguments(bench, "t")
add_common_arguments(bench, "tjgn")
bench.add_argument("output", metavar="OUTPUT", default=None, type=str, help="Path to the YAML output file to write the results to.")
bench.add_argument(metavar="FORWARDED", default=[], dest='forwarded', nargs=argparse.REMAINDER, help="Arguments to forward to the ./mfc.sh run invocations.")

# === COUNT ===
add_common_arguments(count, "g")
Expand Down
Loading

0 comments on commit e4f861e

Please sign in to comment.