diff --git a/doc/clusters/frontier.rst b/doc/clusters/frontier.rst index fb723d95..2694d5e5 100644 --- a/doc/clusters/frontier.rst +++ b/doc/clusters/frontier.rst @@ -17,26 +17,40 @@ If you already have a clone, update it:: $ cd software $ git pull origin trunk -Per OLCF policies, you should install your software in NFS under ``/ccs/proj/``. For example, -set the installation root directory to ``/ccs/proj/{your-project}/software/${USER}``. +You should install your software in NFS under ``/ccs/proj/``. For example, +set the installation root directory to ``/ccs/proj/{your-project}/software/frontier/${USER}``. Build the software environment and install it into the root:: - $ script/frontier/install.sh /ccs/proj/{your-project}/software/${USER} + $ script/frontier/install.sh /ccs/proj/{your-project}/software/frontier/${USER} ... compiling software will take several minutes ... Activate the environment with:: - $ source /ccs/proj/{your-project}/software/${USER}/environment.sh + $ source /ccs/proj/{your-project}/software/frontier/${USER}/environment.sh The environment is a `python3 venv `_. You may extend it with additional python packages using ``python3 -m pip install``:: - $ source /ccs/proj/{your-project}/software/${USER}/environment.sh + $ source /ccs/proj/{your-project}/software/frontier/${USER}/environment.sh $ python3 -m pip install package -Use the following commands in your job scripts or interactively to execute software inside the -container:: +Importing Python packages from this environment will be *very* slow with large node count jobs. +To improve performance, generate a tar file with the environment and store it on Orion (repeat this +step after you update the environment or install packages with ``pip``):: + + $ /ccs/proj/{your-project}/software/frontier/${USER}/generate-tar-cache.sh \ + ${MEMBERWORK}/{your-project}/software.tar + +Use the following commands in your job scripts (or interactively with ``salloc``) to load the cache +into NVME and execute software from there:: + + #SBATCH -C nvme + + export GLOTZERLAB_SOFTWARE_ROOT=/mnt/bb/${USER}/software + srun --ntasks-per-node 1 mkdir ${GLOTZERLAB_SOFTWARE_ROOT} + srun --ntasks-per-node 1 tar --directory ${GLOTZERLAB_SOFTWARE_ROOT} -xpf \ + ${MEMBERWORK}/{your-project}/software.tar + source ${GLOTZERLAB_SOFTWARE_ROOT}/variables.sh - source /ccs/proj/{your-project}/software/${USER}/environment.sh srun {srun options} command arguments diff --git a/script/frontier/install.sh b/script/frontier/install.sh index ad5d1e18..7f6300c9 100755 --- a/script/frontier/install.sh +++ b/script/frontier/install.sh @@ -16,16 +16,16 @@ ROOT=$(realpath $1) echo "Installing glotzerlab-software to $ROOT" module purge module load PrgEnv-gnu -module load cray-python/3.9.13.1 +module load cray-python/3.10.10 python3 -m venv $ROOT -cat >$ROOT/environment.sh << EOL +cat >$ROOT/variables.sh << EOL module purge module load PrgEnv-gnu module load cmake/3.23.2 module load git/2.36.1 module load rocm/5.4.3 -module load cray-python/3.9.13.1 +module load cray-python/3.10.10 module load hdf5/1.14.0 module load ninja/1.10.2 module load tmux/3.2a @@ -34,12 +34,13 @@ module unload darshan-runtime # The cray-mpich module does not provide this, it is needed to build mpi4py from source. export MPICC=\$CRAY_MPICH_DIR/bin/mpicc -export LD_LIBRARY_PATH=$ROOT/lib:\$LD_LIBRARY_PATH -export PATH=$ROOT/bin:\$PATH -export CPATH=$ROOT/include -export LIBRARY_PATH=$ROOT/lib -export VIRTUAL_ENV=$ROOT -export CMAKE_PREFIX_PATH=$ROOT +export LD_LIBRARY_PATH=\$GLOTZERLAB_SOFTWARE_ROOT/lib:\$LD_LIBRARY_PATH +export PATH=\$GLOTZERLAB_SOFTWARE_ROOT/bin:\$PATH +export CPATH=\$GLOTZERLAB_SOFTWARE_ROOT/include +export LIBRARY_PATH=\$GLOTZERLAB_SOFTWARE_ROOT/lib +export VIRTUAL_ENV=\$GLOTZERLAB_SOFTWARE_ROOT +export CMAKE_PREFIX_PATH=\$GLOTZERLAB_SOFTWARE_ROOT +export PYTHONPATH=\$(\${GLOTZERLAB_SOFTWARE_ROOT}/bin/python -c 'import site; print(site.getsitepackages()[0])') export CC=\$GCC_PATH/bin/gcc export CXX=\$GCC_PATH/bin/g++ @@ -49,7 +50,31 @@ export ROCM_HOME=\$OLCF_ROCM_ROOT export HCC_AMDGPU_TARGET=gfx90a export PYTHONUNBUFFERED=1 + +# work around PMI_Init mmap sync errors +export PMI_MMAP_SYNC_WAIT_TIME=1800 +EOL + +cat >$ROOT/environment.sh << EOL +export GLOTZERLAB_SOFTWARE_ROOT=$ROOT +source \$GLOTZERLAB_SOFTWARE_ROOT/variables.sh +EOL + +cat >$ROOT/generate-tar-cache.sh << EOL +#! /usr/bin/bash +usage="\$(basename "\$0") output-file -- Generate a tar cache file." + +if [[ \$# -lt 1 || \$# -gt 1 || \$1 == "-h" ]] +then + echo "\$usage" + exit 0 +fi + +DEST=\$(realpath \$1) + +tar --directory $ROOT --exclude software.tar -cf \$DEST . EOL +chmod ug+x $ROOT/generate-tar-cache.sh source $ROOT/environment.sh diff --git a/template/frontier.jinja b/template/frontier.jinja index df97f883..5f061b2f 100644 --- a/template/frontier.jinja +++ b/template/frontier.jinja @@ -16,16 +16,16 @@ ROOT=$(realpath $1) echo "Installing glotzerlab-software to $ROOT" module purge module load PrgEnv-gnu -module load cray-python/3.9.13.1 +module load cray-python/3.10.10 python3 -m venv $ROOT -cat >$ROOT/environment.sh << EOL +cat >$ROOT/variables.sh << EOL module purge module load PrgEnv-gnu module load cmake/3.23.2 module load git/2.36.1 module load rocm/5.4.3 -module load cray-python/3.9.13.1 +module load cray-python/3.10.10 module load hdf5/1.14.0 module load ninja/1.10.2 module load tmux/3.2a @@ -34,12 +34,13 @@ module unload darshan-runtime # The cray-mpich module does not provide this, it is needed to build mpi4py from source. export MPICC=\$CRAY_MPICH_DIR/bin/mpicc -export LD_LIBRARY_PATH=$ROOT/lib:\$LD_LIBRARY_PATH -export PATH=$ROOT/bin:\$PATH -export CPATH=$ROOT/include -export LIBRARY_PATH=$ROOT/lib -export VIRTUAL_ENV=$ROOT -export CMAKE_PREFIX_PATH=$ROOT +export LD_LIBRARY_PATH=\$GLOTZERLAB_SOFTWARE_ROOT/lib:\$LD_LIBRARY_PATH +export PATH=\$GLOTZERLAB_SOFTWARE_ROOT/bin:\$PATH +export CPATH=\$GLOTZERLAB_SOFTWARE_ROOT/include +export LIBRARY_PATH=\$GLOTZERLAB_SOFTWARE_ROOT/lib +export VIRTUAL_ENV=\$GLOTZERLAB_SOFTWARE_ROOT +export CMAKE_PREFIX_PATH=\$GLOTZERLAB_SOFTWARE_ROOT +export PYTHONPATH=\$(\${GLOTZERLAB_SOFTWARE_ROOT}/bin/python -c 'import site; print(site.getsitepackages()[0])') export CC=\$GCC_PATH/bin/gcc export CXX=\$GCC_PATH/bin/g++ @@ -49,7 +50,31 @@ export ROCM_HOME=\$OLCF_ROCM_ROOT export HCC_AMDGPU_TARGET=gfx90a export PYTHONUNBUFFERED=1 + +# work around PMI_Init mmap sync errors +export PMI_MMAP_SYNC_WAIT_TIME=1800 +EOL + +cat >$ROOT/environment.sh << EOL +export GLOTZERLAB_SOFTWARE_ROOT=$ROOT +source \$GLOTZERLAB_SOFTWARE_ROOT/variables.sh +EOL + +cat >$ROOT/generate-tar-cache.sh << EOL +#! /usr/bin/bash +usage="\$(basename "\$0") output-file -- Generate a tar cache file." + +if [[ \$# -lt 1 || \$# -gt 1 || \$1 == "-h" ]] +then + echo "\$usage" + exit 0 +fi + +DEST=\$(realpath \$1) + +tar --directory $ROOT --exclude software.tar -cf \$DEST . EOL +chmod ug+x $ROOT/generate-tar-cache.sh source $ROOT/environment.sh