Skip to content

Commit

Permalink
Merge pull request #341 from glotzerlab/update-greatlakes-guidance
Browse files Browse the repository at this point in the history
Recommend srun instead of mpirun on great lakes.
  • Loading branch information
joaander authored Nov 2, 2023
2 parents 186ecb9 + 753c4e2 commit d1592d2
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 12 deletions.
16 changes: 12 additions & 4 deletions doc/clusters/greatlakes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,31 @@ Use the following commands in your job scripts or interactively to execute softw
Serial (or multithreaded) CPU jobs::

module load gcc/10.3.0 openmpi/4.1.6 singularity
mpirun -n 1 singularity exec --bind /scratch,/gpfs \
srun -u --export=ALL -n 1 singularity exec --bind /scratch,/gpfs \
/scratch/your-account_root/your-account/$USER/software.sif command arguments

Single GPU jobs::

module load gcc/10.3.0 openmpi/4.1.6 singularity
mpirun -n 1 singularity exec --bind /scratch,/gpfs --nv \
srun -u --export=ALL -n 1 singularity exec --bind /scratch,/gpfs --nv \
/scratch/your-account_root/your-account/$USER/software.sif command arguments

MPI parallel CPU jobs::

module load gcc/10.3.0 openmpi/4.1.6 singularity
mpirun singularity exec --bind /scratch,/gpfs \
srun -u --export=ALL singularity exec --bind /scratch,/gpfs \
/scratch/your-account_root/your-account/$USER/software.sif command arguments

MPI parallel GPU jobs::

module load gcc/10.3.0 openmpi/4.1.6 singularity
mpirun singularity exec --bind /scratch,/gpfs --nv \
srun -u --export=ALL singularity exec --bind /scratch,/gpfs --nv \
/scratch/your-account_root/your-account/$USER/software.sif command arguments

.. important::

Invoke parallel jobs with ``srun -u --export=ALL`` to ensure proper task distribution to the
requested resources (``mpirun`` oversubscribes resources in some cases). The ``--export=ALL``
should be the default behavior but is not observed in testing. The ``-u`` option ensures that
the stdout and stderr output is written to the file immediately. Without ``-u``, srun on Great
Lakes tends to buffer output until the job completes.
8 changes: 3 additions & 5 deletions docker/greatlakes/test/job-cpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,15 @@
#SBATCH --export=ALL
#SBATCH -t 0:10:00

export PMIX_MCA_psec=^munge

module load gcc/10.3.0 openmpi/4.1.6 singularity

set -x
cd $SLURM_SUBMIT_DIR

mpirun -n 1 singularity exec --bind /scratch,/gpfs software.sif python3 serial-cpu.py
srun -u --export=ALL -n 1 singularity exec --bind /scratch,/gpfs software.sif python3 serial-cpu.py

mpirun singularity exec --bind /scratch,/gpfs software.sif python3 mpi-cpu.py
srun -u --export=ALL singularity exec --bind /scratch,/gpfs software.sif python3 mpi-cpu.py

mpirun singularity exec --bind /scratch,/gpfs software.sif /opt/osu-micro-benchmarks/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_bibw
srun -u --export=ALL singularity exec --bind /scratch,/gpfs software.sif /opt/osu-micro-benchmarks/libexec/osu-micro-benchmarks/mpi/pt2pt/osu_bibw

echo "Tests complete."
4 changes: 1 addition & 3 deletions docker/greatlakes/test/job-gpu.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,11 @@
#SBATCH --export=ALL
#SBATCH -t 0:10:00

export PMIX_MCA_psec=^munge

module load gcc/10.3.0 openmpi/4.1.6 singularity

set -x
cd $SLURM_SUBMIT_DIR

mpirun -n 1 singularity exec --bind /scratch,/gpfs --nv software.sif python3 serial-gpu.py
srun -u --export=ALL -n 1 singularity exec --bind /scratch,/gpfs --nv software.sif python3 serial-gpu.py

echo "Tests complete."

0 comments on commit d1592d2

Please sign in to comment.