Skip to content

Commit

Permalink
Merge pull request #335 from glotzerlab/frontier-nvme
Browse files Browse the repository at this point in the history
Frontier: Increase PMPI_Init timeout, allow NVME and use Python 3.10.
  • Loading branch information
joaander authored Oct 18, 2023
2 parents 2dc49a7 + b8a9c53 commit 76d44c8
Show file tree
Hide file tree
Showing 3 changed files with 90 additions and 26 deletions.
30 changes: 22 additions & 8 deletions doc/clusters/frontier.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,26 +17,40 @@ If you already have a clone, update it::
$ cd software
$ git pull origin trunk

Per OLCF policies, you should install your software in NFS under ``/ccs/proj/``. For example,
set the installation root directory to ``/ccs/proj/{your-project}/software/${USER}``.
You should install your software in NFS under ``/ccs/proj/``. For example,
set the installation root directory to ``/ccs/proj/{your-project}/software/frontier/${USER}``.

Build the software environment and install it into the root::

$ script/frontier/install.sh /ccs/proj/{your-project}/software/${USER}
$ script/frontier/install.sh /ccs/proj/{your-project}/software/frontier/${USER}
... compiling software will take several minutes ...

Activate the environment with::

$ source /ccs/proj/{your-project}/software/${USER}/environment.sh
$ source /ccs/proj/{your-project}/software/frontier/${USER}/environment.sh

The environment is a `python3 venv <https://docs.python.org/3/library/venv.html>`_. You may extend
it with additional python packages using ``python3 -m pip install``::

$ source /ccs/proj/{your-project}/software/${USER}/environment.sh
$ source /ccs/proj/{your-project}/software/frontier/${USER}/environment.sh
$ python3 -m pip install package

Use the following commands in your job scripts or interactively to execute software inside the
container::
Importing Python packages from this environment will be *very* slow with large node count jobs.
To improve performance, generate a tar file with the environment and store it on Orion (repeat this
step after you update the environment or install packages with ``pip``)::

$ /ccs/proj/{your-project}/software/frontier/${USER}/generate-tar-cache.sh \
${MEMBERWORK}/{your-project}/software.tar

Use the following commands in your job scripts (or interactively with ``salloc``) to load the cache
into NVME and execute software from there::

#SBATCH -C nvme

export GLOTZERLAB_SOFTWARE_ROOT=/mnt/bb/${USER}/software
srun --ntasks-per-node 1 mkdir ${GLOTZERLAB_SOFTWARE_ROOT}
srun --ntasks-per-node 1 tar --directory ${GLOTZERLAB_SOFTWARE_ROOT} -xpf \
${MEMBERWORK}/{your-project}/software.tar
source ${GLOTZERLAB_SOFTWARE_ROOT}/variables.sh

source /ccs/proj/{your-project}/software/${USER}/environment.sh
srun {srun options} command arguments
43 changes: 34 additions & 9 deletions script/frontier/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ ROOT=$(realpath $1)
echo "Installing glotzerlab-software to $ROOT"
module purge
module load PrgEnv-gnu
module load cray-python/3.9.13.1
module load cray-python/3.10.10
python3 -m venv $ROOT

cat >$ROOT/environment.sh << EOL
cat >$ROOT/variables.sh << EOL
module purge
module load PrgEnv-gnu
module load cmake/3.23.2
module load git/2.36.1
module load rocm/5.4.3
module load cray-python/3.9.13.1
module load cray-python/3.10.10
module load hdf5/1.14.0
module load ninja/1.10.2
module load tmux/3.2a
Expand All @@ -34,12 +34,13 @@ module unload darshan-runtime
# The cray-mpich module does not provide this, it is needed to build mpi4py from source.
export MPICC=\$CRAY_MPICH_DIR/bin/mpicc
export LD_LIBRARY_PATH=$ROOT/lib:\$LD_LIBRARY_PATH
export PATH=$ROOT/bin:\$PATH
export CPATH=$ROOT/include
export LIBRARY_PATH=$ROOT/lib
export VIRTUAL_ENV=$ROOT
export CMAKE_PREFIX_PATH=$ROOT
export LD_LIBRARY_PATH=\$GLOTZERLAB_SOFTWARE_ROOT/lib:\$LD_LIBRARY_PATH
export PATH=\$GLOTZERLAB_SOFTWARE_ROOT/bin:\$PATH
export CPATH=\$GLOTZERLAB_SOFTWARE_ROOT/include
export LIBRARY_PATH=\$GLOTZERLAB_SOFTWARE_ROOT/lib
export VIRTUAL_ENV=\$GLOTZERLAB_SOFTWARE_ROOT
export CMAKE_PREFIX_PATH=\$GLOTZERLAB_SOFTWARE_ROOT
export PYTHONPATH=\$(\${GLOTZERLAB_SOFTWARE_ROOT}/bin/python -c 'import site; print(site.getsitepackages()[0])')
export CC=\$GCC_PATH/bin/gcc
export CXX=\$GCC_PATH/bin/g++
Expand All @@ -49,7 +50,31 @@ export ROCM_HOME=\$OLCF_ROCM_ROOT
export HCC_AMDGPU_TARGET=gfx90a
export PYTHONUNBUFFERED=1
# work around PMI_Init mmap sync errors
export PMI_MMAP_SYNC_WAIT_TIME=1800
EOL

cat >$ROOT/environment.sh << EOL
export GLOTZERLAB_SOFTWARE_ROOT=$ROOT
source \$GLOTZERLAB_SOFTWARE_ROOT/variables.sh
EOL

cat >$ROOT/generate-tar-cache.sh << EOL
#! /usr/bin/bash
usage="\$(basename "\$0") output-file -- Generate a tar cache file."
if [[ \$# -lt 1 || \$# -gt 1 || \$1 == "-h" ]]
then
echo "\$usage"
exit 0
fi
DEST=\$(realpath \$1)
tar --directory $ROOT --exclude software.tar -cf \$DEST .
EOL
chmod ug+x $ROOT/generate-tar-cache.sh

source $ROOT/environment.sh

Expand Down
43 changes: 34 additions & 9 deletions template/frontier.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,16 @@ ROOT=$(realpath $1)
echo "Installing glotzerlab-software to $ROOT"
module purge
module load PrgEnv-gnu
module load cray-python/3.9.13.1
module load cray-python/3.10.10
python3 -m venv $ROOT

cat >$ROOT/environment.sh << EOL
cat >$ROOT/variables.sh << EOL
module purge
module load PrgEnv-gnu
module load cmake/3.23.2
module load git/2.36.1
module load rocm/5.4.3
module load cray-python/3.9.13.1
module load cray-python/3.10.10
module load hdf5/1.14.0
module load ninja/1.10.2
module load tmux/3.2a
Expand All @@ -34,12 +34,13 @@ module unload darshan-runtime
# The cray-mpich module does not provide this, it is needed to build mpi4py from source.
export MPICC=\$CRAY_MPICH_DIR/bin/mpicc
export LD_LIBRARY_PATH=$ROOT/lib:\$LD_LIBRARY_PATH
export PATH=$ROOT/bin:\$PATH
export CPATH=$ROOT/include
export LIBRARY_PATH=$ROOT/lib
export VIRTUAL_ENV=$ROOT
export CMAKE_PREFIX_PATH=$ROOT
export LD_LIBRARY_PATH=\$GLOTZERLAB_SOFTWARE_ROOT/lib:\$LD_LIBRARY_PATH
export PATH=\$GLOTZERLAB_SOFTWARE_ROOT/bin:\$PATH
export CPATH=\$GLOTZERLAB_SOFTWARE_ROOT/include
export LIBRARY_PATH=\$GLOTZERLAB_SOFTWARE_ROOT/lib
export VIRTUAL_ENV=\$GLOTZERLAB_SOFTWARE_ROOT
export CMAKE_PREFIX_PATH=\$GLOTZERLAB_SOFTWARE_ROOT
export PYTHONPATH=\$(\${GLOTZERLAB_SOFTWARE_ROOT}/bin/python -c 'import site; print(site.getsitepackages()[0])')
export CC=\$GCC_PATH/bin/gcc
export CXX=\$GCC_PATH/bin/g++
Expand All @@ -49,7 +50,31 @@ export ROCM_HOME=\$OLCF_ROCM_ROOT
export HCC_AMDGPU_TARGET=gfx90a
export PYTHONUNBUFFERED=1
# work around PMI_Init mmap sync errors
export PMI_MMAP_SYNC_WAIT_TIME=1800
EOL

cat >$ROOT/environment.sh << EOL
export GLOTZERLAB_SOFTWARE_ROOT=$ROOT
source \$GLOTZERLAB_SOFTWARE_ROOT/variables.sh
EOL

cat >$ROOT/generate-tar-cache.sh << EOL
#! /usr/bin/bash
usage="\$(basename "\$0") output-file -- Generate a tar cache file."
if [[ \$# -lt 1 || \$# -gt 1 || \$1 == "-h" ]]
then
echo "\$usage"
exit 0
fi
DEST=\$(realpath \$1)
tar --directory $ROOT --exclude software.tar -cf \$DEST .
EOL
chmod ug+x $ROOT/generate-tar-cache.sh

source $ROOT/environment.sh

Expand Down

0 comments on commit 76d44c8

Please sign in to comment.