From a805d23f88c408de1690eb06e6b197d6c09be3f5 Mon Sep 17 00:00:00 2001 From: Mike Campbell Date: Fri, 5 Nov 2021 17:38:14 -0500 Subject: [PATCH 1/6] Update driver for https://github.com/illinois-ceesd/mirgecom/pull/522 --- isolator.py | 65 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 28 deletions(-) diff --git a/isolator.py b/isolator.py index 924fdbd..ae9f9ab 100644 --- a/isolator.py +++ b/isolator.py @@ -75,7 +75,11 @@ ) from mirgecom.restart import write_restart_file from mirgecom.io import make_init_message -from mirgecom.mpi import mpi_entry_point +from mirgecom.mpi import ( + MPILikeDistributedContext, + NoMPIDistributedContext, + mpi_entry_point +) import pyopencl.tools as cl_tools from mirgecom.integrators import (rk4_step, lsrk54_step, lsrk144_step, euler_step) @@ -603,27 +607,26 @@ def __call__(self, x_vec, *, eos, **kwargs): momentum=mom, species_mass=specmass) -@mpi_entry_point -def main(ctx_factory=cl.create_some_context, restart_filename=None, - use_profiling=False, use_logmgr=True, user_input_file=None, - actx_class=PyOpenCLArrayContext, casename=None): - """Drive the Y0 example.""" +def main(ctx_factory=cl.create_some_context, dist_ctx=None, use_logmgr=True, + use_profiling=False, user_input_file=None, casename=None, + restart_filename=None, actx_class=PyOpenCLArrayContext): + """Y2-Isolator driver""" cl_ctx = ctx_factory() - from mpi4py import MPI - comm = MPI.COMM_WORLD - rank = comm.Get_rank() - nparts = comm.Get_size() - - from mirgecom.simutil import global_reduce as _global_reduce - global_reduce = partial(_global_reduce, comm=comm) - if casename is None: - casename = "mirgecom" + casename = "y2-isolator" - # logging and profiling + if dist_ctx is None: + dist_ctx = NoMPIDistributedContext() + assert isinstance(dist_ctx, MPILikeDistributedContext) + rank = dist_ctx.rank # logging and profiling + nparts = dist_ctx.size + + from mirgecom.simutil import global_reduce as _global_reduce + global_reduce = partial(_global_reduce, comm=dist_ctx.comm) + logmgr = initialize_logmgr(use_logmgr, - filename=f"{casename}.sqlite", mode="wo", mpi_comm=comm) + filename=f"{casename}.sqlite", mode="wo", mpi_comm=dist_ctx.comm) if use_profiling: queue = cl.CommandQueue(cl_ctx, @@ -671,7 +674,7 @@ def main(ctx_factory=cl.create_some_context, restart_filename=None, if rank == 0: with open(user_input_file) as f: input_data = yaml.load(f, Loader=yaml.FullLoader) - input_data = comm.bcast(input_data, root=0) + input_data = dist_ctx.comm.bcast(input_data, root=0) try: nviz = int(input_data["nviz"]) except KeyError: @@ -862,8 +865,8 @@ def main(ctx_factory=cl.create_some_context, restart_filename=None, from numpy import loadtxt geometry_bottom = loadtxt("nozzleBottom.dat", comments="#", unpack=False) geometry_top = loadtxt("nozzleTop.dat", comments="#", unpack=False) - geometry_bottom = comm.bcast(geometry_bottom, root=0) - geometry_top = comm.bcast(geometry_top, root=0) + geometry_bottom = dist_ctx.comm.bcast(geometry_bottom, root=0) + geometry_top = dist_ctx.comm.bcast(geometry_top, root=0) # parameters to adjust the shape of the initialization vel_sigma = 2000 @@ -933,14 +936,15 @@ def main(ctx_factory=cl.create_some_context, restart_filename=None, assert restart_data["nparts"] == nparts else: # generate the grid from scratch - local_mesh, global_nelements = generate_and_distribute_mesh(comm, get_mesh) + local_mesh, global_nelements = \ + generate_and_distribute_mesh(dist_ctx.comm, get_mesh) local_nelements = local_mesh.nelements if rank == 0: logging.info("Making discretization") discr = EagerDGDiscretization( - actx, local_mesh, order=order, mpi_communicator=comm + actx, local_mesh, order=order, mpi_communicator=dist_ctx.comm ) if rank == 0: logging.info("Done making discretization") @@ -1006,7 +1010,7 @@ def main(ctx_factory=cl.create_some_context, restart_filename=None, actx, local_mesh, order=restart_order, - mpi_communicator=comm) + mpi_communicator=dist_ctx.comm) from meshmode.discretization.connection import make_same_mesh_connection connection = make_same_mesh_connection( actx, @@ -1098,7 +1102,7 @@ def my_write_restart(step, t, state): "global_nelements": global_nelements, "num_parts": nparts } - write_restart_file(actx, restart_data, restart_fname, comm) + write_restart_file(actx, restart_data, restart_fname, dist_ctx.comm) def my_health_check(dv): health_error = False @@ -1326,6 +1330,7 @@ def my_rhs(t, state): import argparse parser = argparse.ArgumentParser( description="MIRGE-Com Isentropic Nozzle Driver") + parser.add_argument("--mpi", action="store_true", help="run with MPI"), parser.add_argument("-r", "--restart_file", type=ascii, dest="restart_file", nargs="?", action="store", help="simulation restart file") parser.add_argument("-i", "--input_file", type=ascii, dest="input_file", @@ -1371,9 +1376,13 @@ def my_rhs(t, state): else: print("No user input file, using default values") - print(f"Running {sys.argv[0]}\n") - main(restart_filename=restart_filename, user_input_file=input_file, - use_profiling=args.profile, use_logmgr=args.log, - actx_class=actx_class, casename=casename) + if args.mpi: + main_func = mpi_entry_point(main) + else: + main_func = main print(f"Running {sys.argv[0]}\n") + + main_func(restart_filename=restart_filename, user_input_file=input_file, + use_profiling=args.profile, use_logmgr=args.log, + actx_class=actx_class, casename=casename) # vim: foldmethod=marker From 120c72b0f743851e284cb292059e31606a96bfb4 Mon Sep 17 00:00:00 2001 From: Michael Campbell Date: Fri, 5 Nov 2021 17:53:07 -0500 Subject: [PATCH 2/6] Update build script to hit Matts fork, fix syntax err. --- buildMirge.sh | 7 ++++--- isolator.py | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/buildMirge.sh b/buildMirge.sh index 424fe0e..2255b66 100755 --- a/buildMirge.sh +++ b/buildMirge.sh @@ -1,7 +1,8 @@ #!/bin/bash # default branch for building mirgecom for this driver -mirge_branch="y1-production" +mirge_branch="y1-production-examples-optional-mpi" +mirge_fork="majosm" # conda environment name conda_env="mirgeDriver.Y2isolator" @@ -98,11 +99,11 @@ else if [ -z ${CONDA_PATH+x} ]; then echo "CONDA_PATH unset, installing new conda with emirge" echo "./install.sh --env-name=${conda_env} ${git_method} --branch=${mirge_branch}" - ./install.sh --env-name=${conda_env} ${git_method} --branch=${mirge_branch} + ./install.sh --env-name=${conda_env} ${git_method} --fork=${mirge_fork} --branch=${mirge_branch} else echo "Using existing Conda installation, ${CONDA_PATH}" echo "./install.sh --conda-prefix=$CONDA_PATH --env-name=${conda_env} ${git_method} --branch=${mirge_branch}" - ./install.sh --conda-prefix=$CONDA_PATH --env-name=${conda_env} ${git_method} --branch=${mirge_branch} + ./install.sh --conda-prefix=$CONDA_PATH --env-name=${conda_env} ${git_method} --fork=${mirge_fork} --branch=${mirge_branch} fi fi diff --git a/isolator.py b/isolator.py index ae9f9ab..da15d18 100644 --- a/isolator.py +++ b/isolator.py @@ -1379,7 +1379,8 @@ def my_rhs(t, state): if args.mpi: main_func = mpi_entry_point(main) else: - main_func = main print(f"Running {sys.argv[0]}\n") + main_func = main + print(f"Running {sys.argv[0]}\n") main_func(restart_filename=restart_filename, user_input_file=input_file, use_profiling=args.profile, use_logmgr=args.log, From ea37b32c1f827374229bf5b673834c52aa04c8de Mon Sep 17 00:00:00 2001 From: Michael Campbell Date: Fri, 5 Nov 2021 17:56:14 -0500 Subject: [PATCH 3/6] Placate flake8. --- isolator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/isolator.py b/isolator.py index da15d18..3303a63 100644 --- a/isolator.py +++ b/isolator.py @@ -624,7 +624,7 @@ def main(ctx_factory=cl.create_some_context, dist_ctx=None, use_logmgr=True, from mirgecom.simutil import global_reduce as _global_reduce global_reduce = partial(_global_reduce, comm=dist_ctx.comm) - + logmgr = initialize_logmgr(use_logmgr, filename=f"{casename}.sqlite", mode="wo", mpi_comm=dist_ctx.comm) From 1eea6173be073b132cc26c6fa5b38cbc606ec68d Mon Sep 17 00:00:00 2001 From: Michael Campbell Date: Fri, 5 Nov 2021 18:07:19 -0500 Subject: [PATCH 4/6] Catch MPI calls for when MPI is active. --- isolator.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/isolator.py b/isolator.py index 3303a63..05252a4 100644 --- a/isolator.py +++ b/isolator.py @@ -674,7 +674,8 @@ def main(ctx_factory=cl.create_some_context, dist_ctx=None, use_logmgr=True, if rank == 0: with open(user_input_file) as f: input_data = yaml.load(f, Loader=yaml.FullLoader) - input_data = dist_ctx.comm.bcast(input_data, root=0) + if nparts > 1: + input_data = dist_ctx.comm.bcast(input_data, root=0) try: nviz = int(input_data["nviz"]) except KeyError: @@ -865,8 +866,9 @@ def main(ctx_factory=cl.create_some_context, dist_ctx=None, use_logmgr=True, from numpy import loadtxt geometry_bottom = loadtxt("nozzleBottom.dat", comments="#", unpack=False) geometry_top = loadtxt("nozzleTop.dat", comments="#", unpack=False) - geometry_bottom = dist_ctx.comm.bcast(geometry_bottom, root=0) - geometry_top = dist_ctx.comm.bcast(geometry_top, root=0) + if nparts > 1: + geometry_bottom = dist_ctx.comm.bcast(geometry_bottom, root=0) + geometry_top = dist_ctx.comm.bcast(geometry_top, root=0) # parameters to adjust the shape of the initialization vel_sigma = 2000 From 48c526e6cfd0518058ff6097939d55d7d53c720b Mon Sep 17 00:00:00 2001 From: Michael Campbell Date: Fri, 5 Nov 2021 18:25:32 -0500 Subject: [PATCH 5/6] Make health check return an integer i guess? --- isolator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/isolator.py b/isolator.py index 05252a4..1a1384e 100644 --- a/isolator.py +++ b/isolator.py @@ -1107,15 +1107,15 @@ def my_write_restart(step, t, state): write_restart_file(actx, restart_data, restart_fname, dist_ctx.comm) def my_health_check(dv): - health_error = False + health_error = 0 if check_naninf_local(discr, "vol", dv.pressure): - health_error = True + health_error = 1 logger.info(f"{rank=}: NANs/Infs in pressure data.") if global_reduce(check_range_local(discr, "vol", dv.pressure, health_pres_min, health_pres_max), op="lor"): - health_error = True + health_error = 1 p_min = actx.to_numpy(nodal_min(discr, "vol", dv.pressure)) p_max = actx.to_numpy(nodal_max(discr, "vol", dv.pressure)) logger.info(f"Pressure range violation ({p_min=}, {p_max=})") @@ -1238,7 +1238,7 @@ def my_pre_step(step, t, dt, state): if do_health: dv = eos.dependent_vars(state) - health_errors = global_reduce(my_health_check(dv), op="lor") + health_errors = global_reduce(my_health_check(dv), op="max") if health_errors: if rank == 0: logger.warning("Fluid solution failed health check.") From 5eb90e93ef2e6e89afd76267430e648cd5fde116 Mon Sep 17 00:00:00 2001 From: Michael Campbell Date: Fri, 5 Nov 2021 18:51:30 -0500 Subject: [PATCH 6/6] Tell me what ye do. --- buildMirge.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/buildMirge.sh b/buildMirge.sh index 2255b66..991b8d6 100755 --- a/buildMirge.sh +++ b/buildMirge.sh @@ -97,13 +97,15 @@ else cd emirge if [ -z ${CONDA_PATH+x} ]; then - echo "CONDA_PATH unset, installing new conda with emirge" - echo "./install.sh --env-name=${conda_env} ${git_method} --branch=${mirge_branch}" - ./install.sh --env-name=${conda_env} ${git_method} --fork=${mirge_fork} --branch=${mirge_branch} + echo "CONDA_PATH unset, installing new conda with emirge" + set -x + ./install.sh --env-name=${conda_env} ${git_method} --fork=${mirge_fork} --branch=${mirge_branch} + set +x else echo "Using existing Conda installation, ${CONDA_PATH}" - echo "./install.sh --conda-prefix=$CONDA_PATH --env-name=${conda_env} ${git_method} --branch=${mirge_branch}" + set -x ./install.sh --conda-prefix=$CONDA_PATH --env-name=${conda_env} ${git_method} --fork=${mirge_fork} --branch=${mirge_branch} + set +x fi fi