Skip to content

Commit

Permalink
#49: GitHub CI Benchmarking
Browse files Browse the repository at this point in the history
  • Loading branch information
henryleberre committed Dec 15, 2023
1 parent 371c51a commit 00cc18b
Show file tree
Hide file tree
Showing 13 changed files with 172 additions and 111 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: 'Benchmark'

on:
push:
paths:
- '**.f90'
- '**.fpp'
- '**.py'
- '**.yml'
- 'mfc.sh'
- 'CMakeLists.txt'
- 'requirements.txt'

pull_request:

workflow_dispatch:

jobs:
self:
name: Georgia Tech | Phoenix (NVHPC)
if: github.repository == 'MFlowCode/MFC'
strategy:
matrix:
device: ['cpu', 'gpu']
runs-on:
group: phoenix
labels: self-hosted
steps:
- name: Clone
uses: actions/checkout@v4

- name: Bench
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}

- name: Print
if: always()
run: cat bench-${{ matrix.device }}.out
2 changes: 1 addition & 1 deletion .github/workflows/docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

- name: Login to Docker Hub
uses: docker/login-action@v2
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4

# We build doxygen from source because of
# https://github.com/doxygen/doxygen/issues/9016
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/phoenix/bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/bash

./mfc.sh bench "$job_slug.yaml" -j $(nproc) -b mpirun
58 changes: 58 additions & 0 deletions .github/workflows/phoenix/submit.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash

usage() {
echo "Usage: $0 [script.sh] [cpu|gpu]"
}

if [ ! -z "$1" ]; then
sbatch_script_contents=`cat $1`
else
usage
exit 1
fi

sbatch_cpu_opts="\
#SBATCH --ntasks-per-node=12 # Number of cores per node required
#SBATCH --mem-per-cpu=2G # Memory per core\
"

sbatch_gpu_opts="\
#SBATCH -CV100-16GB
#SBATCH -G2\
"

if [ "$2" == "cpu" ]; then
sbatch_device_opts="$sbatch_cpu_opts"
elif [ "$2" == "gpu" ]; then
sbatch_device_opts="$sbatch_gpu_opts"
else
usage
exit 1
fi

job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"

cat <<EOT
#!/bin/bash
#SBATCH -Jshb-$job_slug # Job name
#SBATCH --account=gts-sbryngelson3 # charge account
#SBATCH -N1 # Number of nodes required
$sbatch_device_opts
#SBATCH -t 04:00:00 # Duration of the job (Ex: 15 mins)
#SBATCH -q embers # QOS Name
#SBATCH -o$job_slug.out # Combined output and error messages file
#SBATCH -W # Do not exit until the submitted job terminates.
set -x
cd "\$SLURM_SUBMIT_DIR"
echo "Running in $(pwd):"
job_slug="$job_slug"
job_device="$2"
. ./mfc.sh load -c p -m $2
$sbatch_script_contents
EOT
9 changes: 9 additions & 0 deletions .github/workflows/phoenix/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#!/bin/bash

if [ "$job_device" == "gpu" ]; then
gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node
gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1
device_opts="--gpu -g $gpu_ids"
fi

./mfc.sh test -a -b mpirun -j $(nproc) $device_opts
15 changes: 5 additions & 10 deletions .github/workflows/ci.yml → .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
runs-on: ${{ matrix.os }}-latest
steps:
- name: Clone
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Setup MacOS
if: matrix.os == 'macos'
Expand Down Expand Up @@ -95,7 +95,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Clone
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Test
run: sudo ./mfc.sh docker ./mfc.sh test -j $(nproc) -a
Expand All @@ -112,21 +112,16 @@ jobs:
labels: self-hosted
steps:
- name: Clone
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Build
run: |
. ./mfc.sh load -c p -m gpu
./mfc.sh build -j 2 $(if [ '${{ matrix.device }}' == 'gpu' ]; then echo '--gpu'; fi)
- name: Test
run: |
. ./mfc.sh load -c p -m gpu
mv misc/run-phoenix-release-${{ matrix.device }}.sh ./
sbatch run-phoenix-release-${{ matrix.device }}.sh
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}

- name: Print
if: always()
run: |
cat test.out
run: cat test-${{ matrix.device }}.out
16 changes: 0 additions & 16 deletions misc/run-phoenix-release-cpu.sh

This file was deleted.

24 changes: 0 additions & 24 deletions misc/run-phoenix-release-gpu.sh

This file was deleted.

7 changes: 7 additions & 0 deletions toolchain/bench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- name: 1D_bubblescreen
path: examples/1D_bubblescreen/case.py
args: []

- name: 1D_kapilashocktube
path: examples/1D_kapilashocktube/case.py
args: []
4 changes: 3 additions & 1 deletion toolchain/mfc/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,9 @@ def add_common_arguments(p, mask = None):
run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.")

# === BENCH ===
add_common_arguments(bench, "t")
add_common_arguments(bench, "tjgn")
bench.add_argument("output", metavar="OUTPUT", default=None, type=str, help="Path to the YAML output file to write the results to.")
bench.add_argument(metavar="FORWARDED", default=[], dest='forwarded', nargs=argparse.REMAINDER, help="Arguments to forward to the ./mfc.sh run invocations.")

# === COUNT ===
add_common_arguments(count, "g")
Expand Down
95 changes: 43 additions & 52 deletions toolchain/mfc/bench.py
Original file line number Diff line number Diff line change
@@ -1,62 +1,53 @@
import os, json, time, typing, datetime, subprocess

import rich.table
import sys, time, subprocess, dataclasses

from .printer import cons
from .state import ARG
from .state import ARG, CFG
from .build import PRE_PROCESS, SIMULATION, build_targets
from .common import system, MFC_SUBDIR
from .common import system, MFC_BENCH_FILEPATH, file_load_yaml, file_dump_yaml
from . import sched


@dataclasses.dataclass
class BenchCase:
name: str
path: str
args: list[str]


def bench():
build_targets([PRE_PROCESS, SIMULATION])

cons.print()
cons.print("[bold]Benchmarking [magenta]simulation[/magenta]:[/bold]")
cons.indent()

CASES = ["1D_bubblescreen", "1D_exercise_WENO", "1D_kapilashocktube"]
RESULTS = []

table = rich.table.Table(show_lines=False, show_edge=False)
table.add_column("Case")
table.add_column("(Simulation) Runtime (s)")

def __worker(case: str, devices: typing.Set[int]):
nonlocal RESULTS

system(["./mfc.sh", "run", f"examples/{case}/case.py", "--no-build", "-t", "pre_process"], stdout=subprocess.DEVNULL)
start = time.monotonic()
system(["./mfc.sh", "run", f"examples/{case}/case.py", "--no-build", "-t", "simulation"], stdout=subprocess.DEVNULL)
end = time.monotonic()
runtime = datetime.timedelta(seconds=end - start).total_seconds()

RESULTS.append({
"name": f"Simulation: {case}",
"unit": "seconds",
"value": runtime
})

table.add_row(case, str(runtime))

tasks: typing.List[sched.Task] = [
sched.Task(1, __worker, [ case ], 1) for case in CASES
]

cons.print()
nThreads = min(ARG('jobs'), len(ARG('gpus'))) if ARG("gpu") else ARG('jobs')
if ARG('case_optimization'):
nThreads = 1

sched.sched(tasks, nThreads, ARG("gpus"))
cons.print()
cons.unindent()
cons.print("[bold]Benchmark Results:[/bold]")
cons.print()
cons.raw.print(table)
cons.print()

filepath = os.path.join(MFC_SUBDIR, "bench.json")
with open(filepath, "w") as f:
json.dump(RESULTS, f)

cons.print(f"[bold green]✓[/bold green] Saved results to [magenta]{filepath}[/magenta].")
CASES = [ BenchCase(**case) for case in file_load_yaml(MFC_BENCH_FILEPATH) ]

for case in CASES:
case.args = case.args + ARG("forwarded")

cons.print(f"Found [magenta]{len(CASES)}[/magenta] cases.")

results = {
"metadata": {
"invocation": sys.argv[1:],
"lock": dataclasses.asdict(CFG())
},
"cases": [],
}

for i, case in enumerate(CASES):
cons.print(f"{str(i+1).zfill(len(CASES) // 10 + 1)}/{len(CASES)}: {case.name} @ [bold]{case.path}[/bold]")
system(["./mfc.sh", "build", "--targets", "pre_process", "simulation", "--case-optimization", "--input", case.path], stdout=subprocess.DEVNULL)

case_results = dataclasses.asdict(case)

for target in [PRE_PROCESS, SIMULATION]:
start = time.time()
system(["./mfc.sh", "run", case.path, "--targets", target.name, "--case-optimization", *case.args], stdout=subprocess.DEVNULL)
case_results[target.name] = time.time() - start

results["cases"].append(case_results)

file_dump_yaml(ARG("output"), results)

cons.unindent()
11 changes: 5 additions & 6 deletions toolchain/mfc/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
from os.path import abspath, normpath, dirname, realpath


MFC_ROOTDIR = normpath(f"{dirname(realpath(__file__))}/../..")
MFC_TESTDIR = abspath(f"{MFC_ROOTDIR}/tests")
MFC_SUBDIR = abspath(f"{MFC_ROOTDIR}/build")
MFC_DEV_FILEPATH = abspath(f"{MFC_ROOTDIR}/toolchain/mfc.dev.yaml")
MFC_USER_FILEPATH = abspath(f"{MFC_ROOTDIR}/defaults.yaml")
MFC_LOCK_FILEPATH = abspath(f"{MFC_SUBDIR}/lock.yaml")
MFC_ROOTDIR = normpath(f"{dirname(realpath(__file__))}/../..")
MFC_TESTDIR = abspath(f"{MFC_ROOTDIR}/tests")
MFC_SUBDIR = abspath(f"{MFC_ROOTDIR}/build")
MFC_LOCK_FILEPATH = abspath(f"{MFC_SUBDIR}/lock.yaml")
MFC_BENCH_FILEPATH = abspath(f"{MFC_ROOTDIR}/toolchain/bench.yaml")

MFC_LOGO = f"""
.=++*: -+*+=.
Expand Down

0 comments on commit 00cc18b

Please sign in to comment.