Skip to content

Commit

Permalink
Handshake: Benchmarking (#316)
Browse files Browse the repository at this point in the history
Co-authored-by: Spencer Bryngelson <shb@gatech.edu>
  • Loading branch information
henryleberre and sbryngelson authored Jan 26, 2024
1 parent 1d27af6 commit 7d71dfa
Show file tree
Hide file tree
Showing 15 changed files with 321 additions and 120 deletions.
52 changes: 52 additions & 0 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: 'Benchmark'

on:
pull_request:

jobs:
self:
name: Georgia Tech | Phoenix (NVHPC)
if: github.repository == 'MFlowCode/MFC'
strategy:
matrix:
device: ['cpu', 'gpu']
runs-on:
group: phoenix
labels: gt
steps:
- name: Clone - PR
uses: actions/checkout@v3
with:
path: pr

- name: Clone - Master
uses: actions/checkout@v3
with:
repository: MFlowCode/MFC
ref: master
path: master

- name: Bench (Master v. PR)
run: |
(cd pr && bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}) &
(cd master && bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/bench.sh ${{ matrix.device }}) &
wait %1 && wait %2
- name: Generate Comment
run: |
COMMENT_MSG=`./mfc.sh bench_diff master/bench-${{ matrix.device }}.yaml pr/bench-${{ matrix.device }}.yaml`
echo "COMMENT_MSG=\"$COMMENT_MSG\"" >> $GITHUB_ENV
- name: Post Comment
run: echo "$COMMENT_MSG"

- name: Archive Logs
uses: actions/upload-artifact@v3
if: always()
with:
name: logs-${{ matrix.device }}
path: |
pr/bench-${{ matrix.device }}.*
pr/build/benchmarks/*
master/bench-${{ matrix.device }}.*
master/build/benchmarks/*
11 changes: 11 additions & 0 deletions .github/workflows/phoenix/bench.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
#!/bin/bash

n_ranks=4

if [ "$job_device" == "gpu" ]; then
n_ranks=$(nvidia-smi -L | wc -l) # number of GPUs on node
gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1
device_opts="--gpu -g $gpu_ids"
fi

./mfc.sh bench -j $(nproc) -o "$job_slug.yaml" -- -c phoenix $device_opts -n $n_ranks
4 changes: 4 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ jobs:
- name: Build & Test
run: bash .github/workflows/phoenix/submit.sh .github/workflows/phoenix/test.sh ${{ matrix.device }}

- name: Print Logs
if: always()
run: cat test-${{ matrix.device }}.out

- name: Archive Logs
uses: actions/upload-artifact@v3
if: always()
Expand Down
6 changes: 6 additions & 0 deletions toolchain/bench.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
- slug: 3D_shockdroplet
path: examples/3D_shockdroplet/case.py
args: []
- slug: 3D_turb_mixing
path: examples/3D_turb_mixing/case.py
args: []
6 changes: 3 additions & 3 deletions toolchain/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,9 @@ def __checks():


def __run():
{"test": test.test, "run": run.run, "build": build.build,
"clean": build.clean, "bench": bench.bench, "count": count.count,
"packer": packer.packer, "count_diff": count.count_diff
{"test": test.test, "run": run.run, "build": build.build,
"clean": build.clean, "bench": bench.bench, "count": count.count,
"packer": packer.packer, "count_diff": count.count_diff, "bench_diff": bench.diff
}[ARG("command")]()


Expand Down
30 changes: 19 additions & 11 deletions toolchain/mfc/args.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,15 @@ def parse(config):
)

parsers = parser.add_subparsers(dest="command")
run = parsers.add_parser(name="run", help="Run a case with MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
test = parsers.add_parser(name="test", help="Run MFC's test suite.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
build = parsers.add_parser(name="build", help="Build MFC and its dependencies.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
clean = parsers.add_parser(name="clean", help="Clean build artifacts.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
bench = parsers.add_parser(name="bench", help="Benchmark MFC (for CI).", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
count = parsers.add_parser(name="count", help="Count LOC in MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
count_diff = parsers.add_parser(name="count_diff", help="Count LOC in MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
packer = parsers.add_parser(name="packer", help="Packer utility (pack/unpack/compare)", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
run = parsers.add_parser(name="run", help="Run a case with MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
test = parsers.add_parser(name="test", help="Run MFC's test suite.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
build = parsers.add_parser(name="build", help="Build MFC and its dependencies.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
clean = parsers.add_parser(name="clean", help="Clean build artifacts.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
bench = parsers.add_parser(name="bench", help="Benchmark MFC (for CI).", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
bench_diff = parsers.add_parser(name="bench_diff", help="Compare MFC Benchmarks (for CI).", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
count = parsers.add_parser(name="count", help="Count LOC in MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
count_diff = parsers.add_parser(name="count_diff", help="Count LOC in MFC.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
packer = parsers.add_parser(name="packer", help="Packer utility (pack/unpack/compare)", formatter_class=argparse.ArgumentDefaultsHelpFormatter)

packers = packer.add_subparsers(dest="packer")
pack = packers.add_parser(name="pack", help="Pack a case into a single file.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)
Expand Down Expand Up @@ -120,9 +121,17 @@ def add_common_arguments(p, mask = None):
run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.")
run.add_argument("-f", "--flags", metavar="FLAGS", dest="--", nargs=argparse.REMAINDER, type=str, default=[], help="Arguments to forward to the MPI invocation.")
run.add_argument("-c", "--computer", metavar="COMPUTER", type=str, default="default", help=f"(Batch) Path to a custom submission file template or one of {format_list_to_string(list(get_baked_templates().keys()))}.")
run.add_argument("-o", "--output-summary", metavar="OUTPUT", type=str, default=None, help="Output file (YAML) for summary.")

# === BENCH ===
add_common_arguments(bench, "t")
add_common_arguments(bench)
bench.add_argument("-o", "--output", metavar="OUTPUT", default=None, type=str, required="True", help="Path to the YAML output file to write the results to.")
bench.add_argument(metavar="FORWARDED", default=[], dest='--', nargs="*", help="Arguments to forward to the ./mfc.sh run invocations.")

# === BENCH_DIFF ===
add_common_arguments(bench_diff, "t")
bench_diff.add_argument("lhs", metavar="LHS", type=str, help="Path to a benchmark result YAML file.")
bench_diff.add_argument("rhs", metavar="RHS", type=str, help="Path to a benchmark result YAML file.")

# === COUNT ===
add_common_arguments(count, "g")
Expand All @@ -135,8 +144,7 @@ def add_common_arguments(p, mask = None):

# Add default arguments of other subparsers
for name, parser in [("run", run), ("test", test), ("build", build),
("clean", clean), ("bench", bench), ("count", count),
("count_diff", count_diff)]:
("clean", clean), ("count", count), ("count_diff", count_diff)]:
if args["command"] == name:
continue

Expand Down
116 changes: 113 additions & 3 deletions toolchain/mfc/bench.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,114 @@
from .common import MFCException
import os, sys, uuid, subprocess, dataclasses

def bench():
raise MFCException("Benchmarks are currently disabled.")
import rich.table

from .printer import cons
from .state import ARG, CFG
from .build import get_targets, DEFAULT_TARGETS
from .common import system, MFC_BENCH_FILEPATH, MFC_SUBDIR, format_list_to_string
from .common import file_load_yaml, file_dump_yaml, create_directory


@dataclasses.dataclass
class BenchCase:
slug: str
path: str
args: list[str]


def bench(targets = None):
if targets is None:
targets = ARG("targets")

targets = get_targets(targets)

bench_dirpath = os.path.join(MFC_SUBDIR, "benchmarks", str(uuid.uuid4())[:4])
create_directory(bench_dirpath)

cons.print()
cons.print(f"[bold]Benchmarking {format_list_to_string(ARG('targets'), 'magenta')} ([magenta]{os.path.relpath(bench_dirpath)}[/magenta]):[/bold]")
cons.indent()
cons.print()

CASES = [ BenchCase(**case) for case in file_load_yaml(MFC_BENCH_FILEPATH) ]

for case in CASES:
case.args = case.args + ARG("--")
case.path = os.path.abspath(case.path)

results = {
"metadata": {
"invocation": sys.argv[1:],
"lock": dataclasses.asdict(CFG())
},
"cases": {},
}

for i, case in enumerate(CASES):
summary_filepath = os.path.join(bench_dirpath, f"{case.slug}.yaml")
log_filepath = os.path.join(bench_dirpath, f"{case.slug}.out")

cons.print(f"{str(i+1).zfill(len(CASES) // 10 + 1)}/{len(CASES)}: {case.slug} @ [bold]{os.path.relpath(case.path)}[/bold]")
cons.indent()
cons.print()
cons.print(f"> Log: [bold]{os.path.relpath(log_filepath)}[/bold]")
cons.print(f"> Summary: [bold]{os.path.relpath(summary_filepath)}[/bold]")

with open(log_filepath, "w") as log_file:
system(
["./mfc.sh", "run", case.path, "--case-optimization"] +
["--targets"] + [t.name for t in targets] +
["--output-summary", summary_filepath] +
case.args,
stdout=log_file,
stderr=subprocess.STDOUT)

results["cases"][case.slug] = {
"description": dataclasses.asdict(case),
"output_summary": file_load_yaml(summary_filepath),
}

file_dump_yaml(ARG("output"), results)

cons.print(f"Wrote results to [bold magenta]{os.path.relpath(ARG('output'))}[/bold magenta].")

cons.unindent()


def diff():
lhs, rhs = file_load_yaml(ARG("lhs")), file_load_yaml(ARG("rhs"))

cons.print(f"[bold]Comparing Bencharks: [magenta]{os.path.relpath(ARG('lhs'))}[/magenta] is x times slower than [magenta]{os.path.relpath(ARG('rhs'))}[/magenta].[/bold]")

if lhs["metadata"] != rhs["metadata"]:
cons.print(f"[bold yellow]Warning[/bold yellow]: Metadata of lhs and rhs are not equal.")
quit(1)

slugs = set(lhs["cases"].keys()) & set(rhs["cases"].keys())
if len(slugs) not in [len(lhs["cases"]), len(rhs["cases"])]:
cons.print(f"[bold yellow]Warning[/bold yellow]: Cases of lhs and rhs are not equal.[/bold yellow]")
cons.print(f"[bold yellow]lhs: {set(lhs['cases'].keys()) - slugs}[/bold yellow]")
cons.print(f"[bold yellow]rhs: {set(rhs['cases'].keys()) - slugs}[/bold yellow]")
cons.print(f"[bold yellow]Using intersection: {slugs}[/bold yellow]")

table = rich.table.Table(show_header=True, box=rich.table.box.SIMPLE)
table.add_column("[bold]Case[/bold]", justify="left")
table.add_column("[bold]Pre Process[/bold]", justify="right")
table.add_column("[bold]Simulation[/bold]", justify="right")
table.add_column("[bold]Post Process[/bold]", justify="right")

for slug in slugs:
lhs_summary = lhs["cases"][slug]["output_summary"]
rhs_summary = rhs["cases"][slug]["output_summary"]

speedups = ['N/A', 'N/A', 'N/A']

for i, target in enumerate(sorted(DEFAULT_TARGETS, key=lambda t: t.runOrder)):
if target.name not in lhs_summary or target.name not in rhs_summary:
continue

speedups[i] = f"{lhs_summary[target.name] / rhs_summary[target.name]:.2f}x"

table.add_row(f"[magenta]{slug}[/magenta]", *speedups)

cons.raw.print(table)
16 changes: 5 additions & 11 deletions toolchain/mfc/run/case_dicts.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from .. import common
from ..state import ARG


Expand Down Expand Up @@ -150,16 +149,11 @@


def get_input_dict_keys(target_name: str) -> list:
result = None
if target_name == "pre_process":
result = PRE_PROCESS.copy()
if target_name == "simulation":
result = SIMULATION.copy()
if target_name == "post_process":
result = POST_PROCESS.copy()

if result is None:
raise common.MFCException(f"[INPUT DICTS] Target {target_name} doesn't have an input dict.")
result = {
"pre_process" : PRE_PROCESS,
"simulation" : SIMULATION,
"post_process" : POST_PROCESS
}.get(target_name, {}).copy()

if not ARG("case_optimization") or target_name != "simulation":
return result
Expand Down
20 changes: 12 additions & 8 deletions toolchain/mfc/run/run.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
import re, os, typing
import re, os, sys, typing, dataclasses

from glob import glob

from mako.lookup import TemplateLookup
from mako.template import Template

from ..build import get_targets, build
from ..build import get_targets, build, REQUIRED_TARGETS
from ..printer import cons
from ..state import ARG, ARGS
from ..state import ARG, ARGS, CFG
from ..common import MFCException, isspace, file_read, does_command_exist
from ..common import MFC_TEMPLATEDIR, file_write, system, MFC_ROOTDIR
from ..common import format_list_to_string
from ..common import format_list_to_string, file_dump_yaml

from . import queues, input

Expand Down Expand Up @@ -84,12 +84,11 @@ def __generate_job_script(targets):
env['CUDA_VISIBLE_DEVICES'] = ','.join([str(_) for _ in ARG('gpus')])

content = __get_template().render(
**ARGS(),
**{**ARGS(), 'targets': targets},
ARG=ARG,
env=env,
rootdir=MFC_ROOTDIR,
MFC_ROOTDIR=MFC_ROOTDIR,
qsystem=queues.get_system(),
binpaths=[target.get_install_binpath() for target in targets],
profiler=__profiler_prepend(),
)

Expand Down Expand Up @@ -119,7 +118,7 @@ def __execute_job_script(qsystem: queues.QueueSystem):


def run(targets = None):
targets = get_targets(targets or ARG("targets"))
targets = get_targets(list(REQUIRED_TARGETS) + (targets or ARG("targets")))

build(targets)

Expand All @@ -134,4 +133,9 @@ def run(targets = None):
__generate_input_files(targets)

if not ARG("dry_run"):
if ARG("output_summary") is not None:
file_dump_yaml(ARG("output_summary"), {
"invocation": sys.argv[1:],
"lock": dataclasses.asdict(CFG())
})
__execute_job_script(qsystem)
Loading

0 comments on commit 7d71dfa

Please sign in to comment.