From ae4e71603b73351475c625659264e55717a6b481 Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Thu, 18 Jan 2024 05:12:10 -0600 Subject: [PATCH 1/3] Split spack environments into soft and lib This allows the software to be build with spack using different compilers and/or MPI libraries than are used for the for the libraries and E3SM component builds. --- deploy/bootstrap.py | 138 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 111 insertions(+), 27 deletions(-) diff --git a/deploy/bootstrap.py b/deploy/bootstrap.py index dd4300c95..2e3003b01 100755 --- a/deploy/bootstrap.py +++ b/deploy/bootstrap.py @@ -400,13 +400,102 @@ def get_env_vars(machine, compiler, mpilib): return env_vars -def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 - spack_env, spack_base, spack_template_path, env_vars, - tmpdir, logger): +def build_spack_soft_env(config, update_spack, machine, env_type, # noqa: C901 + polaris_version, source_path, spack_base, + spack_template_path, tmpdir): + + if not config.has_option('deploy', 'software_compiler'): + return None + + compiler = config.get('deploy', 'software_compiler') + if not config.has_option('deploy', f'mpi_{compiler}'): + raise ValueError(f'Machine config file for {machine} is missing ' + f'mpi_{compiler}, the MPI library for the software ' + f'compiler.') + mpi = config.get('deploy', f'mpi_{compiler}') + + if machine is not None: + spack_base = get_spack_base(spack_base, config) + + if spack_base is None: + return None + + if env_type == 'dev': + ver = version.parse(polaris_version) + release_version = '.'.join(str(vr) for vr in ver.release) + spack_env = f'dev_polaris_soft_{release_version}' + elif env_type == 'test_release': + spack_env = f'test_polaris_soft_{polaris_version}' + else: + spack_env = f'polaris_soft_{polaris_version}' + + spack_env = spack_env.replace('.', '_') + + build_dir = f'deploy_tmp/build_soft_{machine}' + + try: + shutil.rmtree(build_dir) + except OSError: + pass + try: + os.makedirs(build_dir) + except FileExistsError: + pass + + os.chdir(build_dir) + + esmf = config.get('deploy', 'esmf') + + spack_branch_base = f'{spack_base}/{spack_env}' + + specs = list() + + e3sm_hdf5_netcdf = config.getboolean('deploy', 'use_e3sm_hdf5_netcdf') + if not e3sm_hdf5_netcdf: + hdf5 = config.get('deploy', 'hdf5') + netcdf_c = config.get('deploy', 'netcdf_c') + netcdf_fortran = config.get('deploy', 'netcdf_fortran') + specs.extend([ + f'"hdf5@{hdf5}+cxx+fortran+hl+mpi+shared"', + f'"netcdf-c@{netcdf_c}+mpi~parallel-netcdf"', + f'"netcdf-fortran@{netcdf_fortran}"']) + + if esmf != 'None': + specs.append(f'"esmf@{esmf}+mpi+netcdf~pnetcdf~external-parallelio"') + + yaml_template: str | None = None + template_path = f'{spack_template_path}/{machine}_{compiler}_{mpi}.yaml' + if os.path.exists(template_path): + yaml_template = template_path + + if machine is not None: + here = os.path.abspath(os.path.dirname(__file__)) + machine_config = os.path.join(here, '..', 'polaris', 'machines', + f'{machine}.cfg') + else: + machine_config = None + + if update_spack: + make_spack_env(spack_path=spack_branch_base, env_name=spack_env, + spack_specs=specs, compiler=compiler, mpi=mpi, + machine=machine, config_file=machine_config, + include_e3sm_hdf5_netcdf=e3sm_hdf5_netcdf, + yaml_template=yaml_template, tmpdir=tmpdir) + + spack_view = f'{spack_branch_base}/var/spack/environments/' \ + f'{spack_env}/.spack-env/view' + + os.chdir(source_path) + + return spack_view + + +def build_spack_libs_env(config, update_spack, machine, compiler, # noqa: C901 + mpi, spack_env, spack_base, spack_template_path, + env_vars, tmpdir, logger): albany = config.get('deploy', 'albany') cmake = config.get('deploy', 'cmake') - esmf = config.get('deploy', 'esmf') lapack = config.get('deploy', 'lapack') petsc = config.get('deploy', 'petsc') scorpio = config.get('deploy', 'scorpio') @@ -430,8 +519,6 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 f'"netcdf-fortran@{netcdf_fortran}"', f'"parallel-netcdf@{pnetcdf}+cxx+fortran"']) - if esmf != 'None': - specs.append(f'"esmf@{esmf}+mpi+netcdf~pnetcdf~external-parallelio"') if lapack != 'None': specs.append(f'"netlib-lapack@{lapack}"') include_e3sm_lapack = False @@ -440,19 +527,10 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 if petsc != 'None': specs.append(f'"petsc@{petsc}+mpi+batch"') - custom_spack = '' if scorpio != 'None': specs.append( f'"scorpio' f'@{scorpio}+pnetcdf~timing+internal-timing~tools+malloc"') - # make sure scorpio, not esmf, libraries are linked - lib_path = f'{spack_branch_base}/var/spack/environments/' \ - f'{spack_env}/.spack-env/view/lib' - scorpio_lib_path = '$(spack find --format "{prefix}" scorpio)' - custom_spack = \ - f'{custom_spack}' \ - f'ln -sfn {scorpio_lib_path}/lib/libpioc.a {lib_path}\n' \ - f'ln -sfn {scorpio_lib_path}/lib/libpiof.a {lib_path}\n' if albany != 'None': specs.append(f'"albany@{albany}+mpas"') @@ -475,16 +553,8 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 machine=machine, config_file=machine_config, include_e3sm_lapack=include_e3sm_lapack, include_e3sm_hdf5_netcdf=e3sm_hdf5_netcdf, - yaml_template=yaml_template, tmpdir=tmpdir, - custom_spack=custom_spack) - - # remove ESMC/ESMF include files that interfere with MPAS time keeping - include_path = f'{spack_branch_base}/var/spack/environments/' \ - f'{spack_env}/.spack-env/view/include' - for prefix in ['ESMC', 'esmf']: - files = glob.glob(os.path.join(include_path, f'{prefix}*')) - for filename in files: - os.remove(filename) + yaml_template=yaml_template, tmpdir=tmpdir) + set_ld_library_path(spack_branch_base, spack_env, logger) spack_script = get_spack_script( @@ -531,7 +601,7 @@ def build_spack_env(config, update_spack, machine, compiler, mpi, # noqa: C901 f'export PETSC={spack_view}\n' \ f'export USE_PETSC=true\n' - return spack_branch_base, spack_script, env_vars + return spack_script, env_vars def set_ld_library_path(spack_branch_base, spack_env, logger): @@ -954,6 +1024,10 @@ def main(): # noqa: C901 permissions_dirs = [] activ_path = None + soft_spack_view = build_spack_soft_env( + config, args.update_spack, machine, env_type, polaris_version, + source_path, args.spack_base, spack_template_path, args.tmpdir) + for compiler, mpi in zip(compilers, mpis): python, recreate, conda_mpi, activ_suffix, env_suffix, \ @@ -1011,7 +1085,8 @@ def main(): # noqa: C901 if compiler is not None: env_vars = get_env_vars(machine, compiler, mpi) if spack_base is not None: - _, spack_script, env_vars = build_spack_env( + + spack_script, env_vars = build_spack_libs_env( config, args.update_spack, machine, compiler, mpi, spack_env, spack_base, spack_template_path, env_vars, args.tmpdir, logger) @@ -1024,6 +1099,14 @@ def main(): # noqa: C901 f'{env_vars}' \ f'export PIO={conda_env_path}\n' \ f'export OPENMP_INCLUDE=-I"{conda_env_path}/include"\n' + + if soft_spack_view is None: + raise ValueError('A software compiler or a spack base was not ' + 'defined so required software was not ' + 'installed with spack.') + env_vars = f'{env_vars}' \ + f'export PATH="{soft_spack_view}/bin:$PATH"\n' + else: env_vars = '' @@ -1067,6 +1150,7 @@ def main(): # noqa: C901 if args.update_spack or env_type != 'dev': # we need to update permissions on shared stuff + update_permissions(config, env_type, activ_path, permissions_dirs) From 75e45a19f7379b73d643f546446c7b72ebd4ff1f Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Thu, 18 Jan 2024 05:14:46 -0600 Subject: [PATCH 2/3] Add software compilers to supported machines --- polaris/machines/anvil.cfg | 3 +++ polaris/machines/chicoma-cpu.cfg | 3 +++ polaris/machines/chrysalis.cfg | 3 +++ polaris/machines/compy.cfg | 3 +++ polaris/machines/morpheus.cfg | 3 +++ polaris/machines/pm-cpu.cfg | 3 +++ 6 files changed, 18 insertions(+) diff --git a/polaris/machines/anvil.cfg b/polaris/machines/anvil.cfg index 304f40315..43abd9d18 100644 --- a/polaris/machines/anvil.cfg +++ b/polaris/machines/anvil.cfg @@ -15,6 +15,9 @@ polaris_envs = /lcrc/soft/climate/polaris/anvil/base # the compiler set to use for system libraries and MPAS builds compiler = intel +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = intel + # the system MPI library to use for intel compiler mpi_intel = impi diff --git a/polaris/machines/chicoma-cpu.cfg b/polaris/machines/chicoma-cpu.cfg index 18bab1e80..26a71e909 100644 --- a/polaris/machines/chicoma-cpu.cfg +++ b/polaris/machines/chicoma-cpu.cfg @@ -15,6 +15,9 @@ polaris_envs = /usr/projects/e3sm/polaris/chicoma-cpu/conda/base # the compiler set to use for system libraries and MPAS builds compiler = gnu +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + # the system MPI library to use for gnu compiler mpi_gnu = mpich diff --git a/polaris/machines/chrysalis.cfg b/polaris/machines/chrysalis.cfg index c28f89292..26cbc8f6e 100644 --- a/polaris/machines/chrysalis.cfg +++ b/polaris/machines/chrysalis.cfg @@ -15,6 +15,9 @@ polaris_envs = /lcrc/soft/climate/polaris/chrysalis/base # the compiler set to use for system libraries and MPAS builds compiler = intel +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = intel + # the system MPI library to use for intel compiler mpi_intel = openmpi diff --git a/polaris/machines/compy.cfg b/polaris/machines/compy.cfg index 33f24de0f..0fc702c3e 100644 --- a/polaris/machines/compy.cfg +++ b/polaris/machines/compy.cfg @@ -15,6 +15,9 @@ polaris_envs = /share/apps/E3SM/conda_envs/polaris/conda/base # the compiler set to use for system libraries and MPAS builds compiler = intel +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = intel + # the system MPI library to use for intel compiler mpi_intel = impi diff --git a/polaris/machines/morpheus.cfg b/polaris/machines/morpheus.cfg index 0a3bc51b5..c16c0ec52 100644 --- a/polaris/machines/morpheus.cfg +++ b/polaris/machines/morpheus.cfg @@ -42,6 +42,9 @@ polaris_envs = /home/xylar/data/polaris_envs # the compiler set to use for system libraries and MPAS builds compiler = gnu +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + # the system MPI library to use for gnu compiler mpi_gnu = openmpi diff --git a/polaris/machines/pm-cpu.cfg b/polaris/machines/pm-cpu.cfg index baa1ac7ed..744a6ca65 100644 --- a/polaris/machines/pm-cpu.cfg +++ b/polaris/machines/pm-cpu.cfg @@ -15,6 +15,9 @@ polaris_envs = /global/common/software/e3sm/polaris/pm-cpu/conda/base # the compiler set to use for system libraries and MPAS builds compiler = gnu +# the compiler to use to build software (e.g. ESMF and MOAB) with spack +software_compiler = gnu + # the system MPI library to use for gnu compiler mpi_gnu = mpich From 277dfd596b7491845f71f10b134d21f7d1412c8d Mon Sep 17 00:00:00 2001 From: Xylar Asay-Davis Date: Thu, 18 Jan 2024 05:46:11 -0600 Subject: [PATCH 3/3] Update the spack deployment docs --- docs/developers_guide/deploying_spack.md | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/docs/developers_guide/deploying_spack.md b/docs/developers_guide/deploying_spack.md index 55ffdbf59..0245299b2 100644 --- a/docs/developers_guide/deploying_spack.md +++ b/docs/developers_guide/deploying_spack.md @@ -21,15 +21,29 @@ re-deploy shared spack environments on each supported machine. ### Spack -Spack is for libraries used by MPAS and tools that need system MPI: +Spack is used to build libraries used by E3SM components and tools that need +system MPI: - ESMF +- MOAB - SCORPIO +- Metis +- Parmetis +- Trilinos - Albany - PETSc - Netlib LAPACK -When we update the versions of any of these libraries in Polaris, we also need +We build one spack environment for tools (e.g. ESMF and MOAB) and another for +libraries. This allows us to build the tools with one set of compilers and +MPI libraries adn the libraries with another. This is sometimes necessary, +since ESMF, MOAB and/or their dependencies can't always be built or don't +run correctly with all compiler and MPI combinations. For example, we have +experienced problems running ESMF built with intel compilers on Perlmutter. +We are also not able to build ESMF or the Eigen dependency of MOAB using +`nvidiagpu` compilers. + +When we update the versions of any of these packages in Polaris, we also need to bump the Polaris version (typically either the major or the minor version) and then re-deploy shared spack environments on each supported machine. @@ -37,8 +51,8 @@ and then re-deploy shared spack environments on each supported machine. Conda (via conda-forge) is used for python packages and related dependencies that don’t need system MPI. Conda environments aren’t shared between -developers because the polaris you’re developing is part of the conda -environment. +developers because the polaris python package you’re developing is part of the +conda environment. When we update the constraints on conda dependencies, we also need to bump the Polaris alpha, beta or rc version. We do not need to re-deploy spack @@ -67,6 +81,7 @@ These config options are shared across packages including: - E3SM_Diags - zppy - polaris +- compass - E3SM-Unified Polaris uses these config options to know how to make a job script, where to