diff --git a/docs/documentation/running.md b/docs/documentation/running.md index 566866f7d..ceaa4a0f1 100644 --- a/docs/documentation/running.md +++ b/docs/documentation/running.md @@ -17,8 +17,8 @@ several supercomputer clusters, both interactively and through batch submission. > > Adding a new template file or modifying an existing one will most likely be required if: > - You are on a cluster that does not have a template yet. -> - Your cluster is configured with SLURM and but fails when interactive jobs are -> launched with `mpirun`. +> - Your cluster is configured with SLURM but interactive job launches fail when +> using `srun`. You might need to invoke `mpirun` instead. > - Something in the existing default or computer template file is incompatible with > your system or does not provide a feature you need. > diff --git a/misc/run-phoenix-release-cpu.sh b/misc/run-phoenix-release-cpu.sh index 185d95865..aea1b80a4 100644 --- a/misc/run-phoenix-release-cpu.sh +++ b/misc/run-phoenix-release-cpu.sh @@ -12,4 +12,4 @@ cd "$SLURM_SUBMIT_DIR" echo "Running in $(pwd):" . ./mfc.sh load -c p -m gpu -./mfc.sh test -j $(nproc) -a -- -b mpirun +./mfc.sh test -j $(nproc) -a -- -c phoenix diff --git a/misc/run-phoenix-release-gpu.sh b/misc/run-phoenix-release-gpu.sh index 45f16eb81..b2f3e4c1f 100644 --- a/misc/run-phoenix-release-gpu.sh +++ b/misc/run-phoenix-release-gpu.sh @@ -19,4 +19,4 @@ set -x gpu_count=$(nvidia-smi -L | wc -l) # number of GPUs on node gpu_ids=$(seq -s ' ' 0 $(($gpu_count-1))) # 0,1,2,...,gpu_count-1 -./mfc.sh test -a -j 2 --gpu -g $gpu_ids -- -b mpirun +./mfc.sh test -a -j 2 --gpu -g $gpu_ids -- -c phoenix diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index 1855d0105..0eb1db3ee 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -101,7 +101,6 @@ def add_common_arguments(p, mask = None): run.add_argument("input", metavar="INPUT", type=str, help="Input file to run.") run.add_argument("arguments", metavar="ARGUMENTS", nargs="*", type=str, default=[], help="Additional positional arguments to pass to the case file.") run.add_argument("-e", "--engine", choices=["interactive", "batch"], type=str, default="interactive", help="Job execution/submission engine choice.") - run.add_argument("--output-summary", type=str, default=None, help="(Interactive) Output a YAML summary file.") run.add_argument("-p", "--partition", metavar="PARTITION", type=str, default="", help="(Batch) Partition for job submission.") run.add_argument("-q", "--quality_of_service", metavar="QOS", type=str, default="", help="(Batch) Quality of Service for job submission.") run.add_argument("-N", "--nodes", metavar="NODES", type=int, default=1, help="(Batch) Number of nodes.") @@ -111,13 +110,14 @@ def add_common_arguments(p, mask = None): run.add_argument("-@", "--email", metavar="EMAIL", type=str, default="", help="(Batch) Email for job notification.") run.add_argument("-#", "--name", metavar="NAME", type=str, default="MFC", help="(Batch) Job name.") run.add_argument("-s", "--scratch", action="store_true", default=False, help="Build from scratch.") + run.add_argument("-b", "--binary", choices=["mpirun", "jsrun", "srun", "mpiexec"], type=str, default=None, help="(Interactive) Override MPI execution binary") run.add_argument("--ncu", nargs=argparse.REMAINDER, type=str, help="Profile with NVIDIA Nsight Compute.") run.add_argument("--nsys", nargs=argparse.REMAINDER, type=str, help="Profile with NVIDIA Nsight Systems.") run.add_argument( "--dry-run", action="store_true", default=False, help="(Batch) Run without submitting batch file.") run.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") run.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") run.add_argument("--wait", action="store_true", default=False, help="(Batch) Wait for the job to finish.") - run.add_argument("-f", "--flags", metavar="FLAGS", dest="--", nargs=argparse.REMAINDER, type=str, default=[], help="(Interactive) Arguments to forward to the MPI invocation.") + run.add_argument("-f", "--flags", metavar="FLAGS", dest="--", nargs=argparse.REMAINDER, type=str, default=[], help="Arguments to forward to the MPI invocation.") run.add_argument("-c", "--computer", metavar="COMPUTER", type=str, default="default", help=f"(Batch) Path to a custom submission file template or one of {format_list_to_string(list(get_baked_templates().keys()))}.") # === BENCH === @@ -154,6 +154,9 @@ def add_common_arguments(p, mask = None): if args["command"] == "build": if (args["input"] is not None) ^ args["case_optimization"] : raise MFCException("./mfc.sh build's --case-optimization and --input must be used together.") + if args["command"] == "run": + if args["binary"] is not None and args["engine"] != "interactive": + raise MFCException("./mfc.sh run's --binary can only be used with --engine=interactive.") # Input files to absolute paths for e in ["input", "input1", "input2"]: diff --git a/toolchain/mfc/run/run.py b/toolchain/mfc/run/run.py index 8c4bb7348..9b81121d8 100644 --- a/toolchain/mfc/run/run.py +++ b/toolchain/mfc/run/run.py @@ -88,6 +88,7 @@ def __generate_job_script(targets): ARG=ARG, env=env, rootdir=MFC_ROOTDIR, + qsystem=queues.get_system(), binpaths=[target.get_install_binpath() for target in targets], profiler=__profiler_prepend(), ) diff --git a/toolchain/templates/bridges2.mako b/toolchain/templates/bridges2.mako index 203a8c3e6..34d718716 100644 --- a/toolchain/templates/bridges2.mako +++ b/toolchain/templates/bridges2.mako @@ -27,27 +27,30 @@ <%include file="prologue.mako"/> -echo -e ":) Loading modules:\n" +ok ":) Loading modules:\n" cd "${rootdir}" . ./mfc.sh load -c b -m ${'g' if gpu else 'c'} cd - > /dev/null echo % for binpath in binpaths: - echo -e ":) Running ${binpath.split('/')[-1]}:\n" + ok -e ":) Running ${binpath.split('/')[-1]}:\n" % if not mpi: ${' '.join([f"'{x}'" for x in profiler ])} "${binpath}" % else: - mpirun -np ${nodes*tasks_per_node} \ - ${' '.join([f"'{x}'" for x in profiler ])} \ - "${binpath}" + ${' '.join([f"'{x}'" for x in profiler ])} \ + mpirun -np ${nodes*tasks_per_node} \ + ${' '.join([f"'{x}'" for x in ARG('--') ])} \ + "${binpath}" % endif % if engine == 'interactive': code=$? if [ $code -ne 0 ]; then - echo -e "\n:( $MAGENTA${binpath}$COLOR_RESET failed with exit code $MAGENTA$code$COLOR_RESET.\n" + echo + error ":( $MAGENTA${binpath}$COLOR_RESET failed with exit code $MAGENTA$code$COLOR_RESET." + echo exit 1 fi % endif diff --git a/toolchain/templates/default.mako b/toolchain/templates/default.mako index 32cf69f3e..9917e0517 100644 --- a/toolchain/templates/default.mako +++ b/toolchain/templates/default.mako @@ -12,38 +12,55 @@ warn "This is the$MAGENTA default$COLOR_RESET template." warn "It is not intended to support all systems and execution engines." warn "Please use a different template via the $MAGENTA--computer$COLOR_RESET option." -echo + +% if mpi: + # Find a suitable MPI launcher and store it in the variable "binary". + for binary in ${binary or ''} jsrun srun mpirun mpiexec; do + if command -v $binary > /dev/null; then + break + fi + done + + if ! command -v $binary > /dev/null; then + error ":( Could not find a suitable MPI launcher.\n" + exit 1 + else + ok ":) Selected MPI launcher $MAGENTA$binary$COLOR_RESET. Use$MAGENTA --binary$COLOR_RESET to override." + fi +% endif % for binpath in binpaths: - echo -e ":) Running $MAGENTA${binpath}$COLOR_RESET:\n" + ok ":) Running $MAGENTA${binpath}$COLOR_RESET:\n" % if not mpi: ${' '.join([f"'{x}'" for x in profiler ])} "${binpath}" % else: - if command -v jsrun > /dev/null; then - jsrun --nrs ${tasks_per_node*nodes} \ - --cpu_per_rs 1 \ - --gpu_per_rs ${1 if gpu else 0} \ - --tasks_per_rs 1 \ - ${' '.join([f"'{x}'" for x in profiler ])} \ - "${binpath}" - elif command -v srun > /dev/null; then - srun --ntasks-per-node ${tasks_per_node} \ - ${' '.join([f"'{x}'" for x in profiler ])} \ - "${binpath}" - elif command -v mpirun > /dev/null; then - mpirun -np ${nodes*tasks_per_node} \ - ${' '.join([f"'{x}'" for x in profiler ])} \ - "${binpath}" - else - echo -e "\n:( Could not find a suitable MPI launcher.\n" - exit 1 + if [ "$binary" == "jsrun" ]; then + ${' '.join([f"'{x}'" for x in profiler ])} \ + jsrun --nrs ${tasks_per_node*nodes} \ + --cpu_per_rs 1 \ + --gpu_per_rs ${1 if gpu else 0} \ + --tasks_per_rs 1 \ + ${' '.join([f"'{x}'" for x in ARG('--') ])} \ + "${binpath}" + elif [ "$binary" == "srun" ]; then + ${' '.join([f"'{x}'" for x in profiler ])} \ + srun --ntasks-per-node ${tasks_per_node} \ + ${' '.join([f"'{x}'" for x in ARG('--') ])} \ + "${binpath}" + elif [ "$binary" == "mpirun" ] || [ "$binary" == "mpiexec" ]; then + ${' '.join([f"'{x}'" for x in profiler ])} \ + $binary -np ${nodes*tasks_per_node} \ + ${' '.join([f"'{x}'" for x in ARG('--') ])} \ + "${binpath}" fi % endif code=$? if [ $code -ne 0 ]; then - echo -e "\n:( $MAGENTA${binpath}$COLOR_RESET failed with exit code $MAGENTA$code$COLOR_RESET.\n" + echo + error ":( $MAGENTA${binpath}$COLOR_RESET failed with exit code $MAGENTA$code$COLOR_RESET." + echo exit 1 fi diff --git a/toolchain/templates/include/prologue.mako b/toolchain/templates/include/prologue.mako index 32dabe3b9..aa9962042 100644 --- a/toolchain/templates/include/prologue.mako +++ b/toolchain/templates/include/prologue.mako @@ -13,11 +13,12 @@ TABLE_HEADER="+----------------------------------------------------------------- TABLE_FOOTER="+-----------------------------------------------------------------------------------------------------------+ \\n" TABLE_TITLE_FORMAT="| %-105s |\\n" TABLE_CONTENT=$(cat <<-END -$(printf "$TABLE_FORMAT_LINE" "Start-time" "$(date +%T)" "Start-date" "$(date +%T)") -$(printf "$TABLE_FORMAT_LINE" "Partition" "${partition}" "Walltime" "${walltime}") -$(printf "$TABLE_FORMAT_LINE" "Account" "${account}" "Nodes" "${nodes}") -$(printf "$TABLE_FORMAT_LINE" "Job Name" "${name}" "Engine" "${engine}") -$(printf "$TABLE_FORMAT_LINE" "Queue System" "{qsystem.name}" "Email" "${email}") +$(printf "$TABLE_FORMAT_LINE" "Start-time" "$(date +%T)" "Start-date" "$(date +%T)") +$(printf "$TABLE_FORMAT_LINE" "Partition" "${partition or 'N/A'}" "Walltime" "${walltime}") +$(printf "$TABLE_FORMAT_LINE" "Account" "${account or 'N/A'}" "Nodes" "${nodes}") +$(printf "$TABLE_FORMAT_LINE" "Job Name" "${name}" "Engine" "${engine}") +$(printf "$TABLE_FORMAT_LINE" "QoS" "${quality_of_service or 'N/A'}" "Binary" "${binary or 'N/A'}") +$(printf "$TABLE_FORMAT_LINE" "Queue System" "${qsystem.name}" "Email" "${email or 'N/A'}") END ) diff --git a/toolchain/templates/phoenix.mako b/toolchain/templates/phoenix.mako index 495c96a1e..80c70f97e 100644 --- a/toolchain/templates/phoenix.mako +++ b/toolchain/templates/phoenix.mako @@ -27,25 +27,31 @@ <%include file="prologue.mako"/> -echo -e ":) Loading modules:\n" -cd "${rootdir}" && . ./mfc.sh load -c p -m ${'g' if gpu else 'c'} && cd - +ok ":) Loading modules:\n" +cd "${rootdir}" +. ./mfc.sh load -c p -m ${'g' if gpu else 'c'} +cd - > /dev/null echo % for binpath in binpaths: - echo -e ":) Running ${binpath.split('/')[-1]}:\n" + ok ":) Running ${binpath.split('/')[-1]}:\n" % if not mpi: ${' '.join([f"'{x}'" for x in profiler ])} "${binpath}" % else: - mpirun -np ${nodes*tasks_per_node} \ - ${' '.join([f"'{x}'" for x in profiler ])} \ - "${binpath}" + ${' '.join([f"'{x}'" for x in profiler ])} \ + mpirun -np ${nodes*tasks_per_node} \ + --bind-to none \ + ${' '.join([f"'{x}'" for x in ARG('--') ])} \ + "${binpath}" % endif % if engine == 'interactive': code=$? if [ $code -ne 0 ]; then - echo -e "\n:( $MAGENTA${binpath}$COLOR_RESET failed with exit code $MAGENTA$code$COLOR_RESET.\n" + echo + error ":( $MAGENTA${binpath}$COLOR_RESET failed with exit code $MAGENTA$code$COLOR_RESET." + echo exit 1 fi % endif diff --git a/toolchain/templates/summit.mako b/toolchain/templates/summit.mako index cce6b7062..a9a888c41 100644 --- a/toolchain/templates/summit.mako +++ b/toolchain/templates/summit.mako @@ -12,30 +12,35 @@ <%include file="prologue.mako"/> -echo -e ":) Loading modules:\n" -cd "${rootdir}" && . ./mfc.sh load -c s -m ${'g' if gpu else 'c'} && cd - +ok ":) Loading modules:\n" +cd "${rootdir}" +. ./mfc.sh load -c s -m ${'g' if gpu else 'c'} +cd - > /dev/null echo % for binpath in binpaths: - echo -e ":) Running ${binpath}:\n" + ok ":) Running ${binpath}:\n" % if not mpi: ${' '.join([f"'{x}'" for x in profiler ])} "${binpath}" % else: - jsrun \ - ${'--smpiargs="-gpu"' if gpu else ''} \ - --nrs ${tasks_per_node*nodes} \ - --cpu_per_rs 1 \ - --gpu_per_rs ${1 if gpu else 0} \ - --tasks_per_rs 1 \ - ${' '.join([f"'{x}'" for x in profiler ])} \ - "${binpath}" + ${' '.join([f"'{x}'" for x in profiler ])} \ + jsrun \ + ${'--smpiargs="-gpu"' if gpu else ''} \ + --nrs ${tasks_per_node*nodes} \ + --cpu_per_rs 1 \ + --gpu_per_rs ${1 if gpu else 0} \ + --tasks_per_rs 1 \ + ${' '.join([f"'{x}'" for x in ARG('--') ])} \ + "${binpath}" % endif % if engine == 'interactive': code=$? if [ $code -ne 0 ]; then - echo -e "\n:( $MAGENTA${binpath}$COLOR_RESET failed with exit code $MAGENTA$code$COLOR_RESET.\n" + echo + error ":( $MAGENTA${binpath}$COLOR_RESET failed with exit code $MAGENTA$code$COLOR_RESET." + echo exit 1 fi % endif