diff --git a/ci/distributed.yml b/ci/distributed.yml index 916afe1734..4b4038d047 100644 --- a/ci/distributed.yml +++ b/ci/distributed.yml @@ -38,34 +38,41 @@ build_distributed_baseimage_aarch64: DOCKERFILE: ci/docker/checkout_mpi.Dockerfile DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION", "BASE_IMAGE=${BASE_IMAGE_${PYVERSION_PREFIX}}", "VENV=${UV_PROJECT_ENVIRONMENT}"]' PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi - USE_MPI: NO - SLURM_MPI_TYPE: pmix - PMIX_MCA_psec: native - PMIX_MCA_gds: "^shmem2" -.build_distributed_cpu: +.build_distributed: extends: [.build_distributed_template] variables: UV_PROJECT_ENVIRONMENT: venv_dist -build_distributed_cpu: +build_distributed: stage: image - extends: [.container-builder-cscs-gh200, .build_distributed_cpu] + extends: [.container-builder-cscs-gh200, .build_distributed] needs: [build_distributed_baseimage_aarch64] .test_template_distributed: timeout: 8h image: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi - extends: [.container-runner-santis-gh200, .build_distributed_cpu] - needs: [build_distributed_cpu] + extends: [.container-runner-santis-gh200, .build_distributed] + needs: [build_distributed] variables: SLURM_JOB_NUM_NODES: 1 SLURM_CPU_BIND: 'verbose' SLURM_NTASKS: 4 + SLURM_GPUS_PER_TASK: 1 ICON4PY_TEST_DATA_PATH: "/icon4py/testdata" ICON4PY_ENABLE_GRID_DOWNLOAD: false ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false CSCS_ADDITIONAL_MOUNTS: '["/capstor/store/cscs/userlab/cwci02/icon4py/ci/testdata:$ICON4PY_TEST_DATA_PATH"]' + # Do not use libfabric from the host system. Libfabric with slingshot + # support is built into the container image. + USE_MPI: NO + # Use libfabric slingshot (cxi) provider and recommended settings from + # https://docs.cscs.ch/software/communication/openmpi. + SLURM_MPI_TYPE: pmix + PMIX_MCA_psec: native + FI_PROVIDER: cxi + OMPI_MCA_pml: cm + OMPI_MCA_mtl: ofi .test_distributed_aarch64: stage: test @@ -80,14 +87,17 @@ build_distributed_cpu: parallel: matrix: - COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common] - BACKEND: [embedded, gtfn_cpu, dace_cpu] + BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu, gtfn_gpu] rules: - if: $COMPONENT == 'atmosphere/diffusion' variables: SLURM_TIMELIMIT: '00:05:00' - - if: $COMPONENT == 'atmosphere/dycore' && $BACKEND == 'dace_cpu' + - if: $COMPONENT == 'atmosphere/dycore' && ($BACKEND == 'dace_cpu' || $BACKEND == 'dace_gpu') + variables: + SLURM_TIMELIMIT: '00:30:00' + - if: $COMPONENT == 'common' && $BACKEND == 'dace_gpu' variables: - SLURM_TIMELIMIT: '00:20:00' + SLURM_TIMELIMIT: '00:45:00' - if: $COMPONENT == 'atmosphere/dycore' variables: SLURM_TIMELIMIT: '00:15:00' diff --git a/ci/docker/base_mpi.Dockerfile b/ci/docker/base_mpi.Dockerfile index 3fcdb21297..a600b4ff1c 100644 --- a/ci/docker/base_mpi.Dockerfile +++ b/ci/docker/base_mpi.Dockerfile @@ -1,27 +1,124 @@ -FROM ubuntu:25.04 +FROM ubuntu:25.10 ENV LANG C.UTF-8 ENV LC_ALL C.UTF-8 ARG DEBIAN_FRONTEND=noninteractive -RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \ - strace \ - build-essential \ - tar \ - wget \ - curl \ - libboost-dev \ - libnuma-dev \ - libopenmpi-dev \ - ca-certificates \ - libssl-dev \ - autoconf \ - automake \ - libtool \ - pkg-config \ - libreadline-dev \ - git && \ +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + autoconf \ + automake \ + build-essential \ + ca-certificates \ + curl \ + git \ + libboost-dev \ + libconfig-dev \ + libcurl4-openssl-dev \ + libfuse-dev \ + libjson-c-dev \ + libnl-3-dev \ + libnuma-dev \ + libreadline-dev \ + libsensors-dev \ + libssl-dev \ + libtool \ + libuv1-dev \ + libyaml-dev \ + nvidia-cuda-dev \ + nvidia-cuda-toolkit \ + nvidia-cuda-toolkit-gcc \ + pkg-config \ + python3 \ + strace \ + tar \ + wget && \ rm -rf /var/lib/apt/lists/* +ENV CC=/usr/bin/cuda-gcc +ENV CXX=/usr/bin/cuda-g++ +ENV CUDAHOSTCXX=/usr/bin/cuda-g++ + +# Install OpenMPI configured with libfabric, libcxi, and gdrcopy support for use +# on Alps. This is based on examples in +# https://github.com/eth-cscs/cray-network-stack. +ARG gdrcopy_version=2.5.1 +RUN set -eux; \ + git clone --depth 1 --branch "v${gdrcopy_version}" https://github.com/NVIDIA/gdrcopy.git; \ + cd gdrcopy; \ + make lib -j"$(nproc)" lib_install; \ + cd /; \ + rm -rf /gdrcopy; \ + ldconfig + +ARG cassini_headers_version=release/shs-13.0.0 +RUN set -eux; \ + git clone --depth 1 --branch "${cassini_headers_version}" https://github.com/HewlettPackard/shs-cassini-headers.git; \ + cd shs-cassini-headers; \ + cp -r include/* /usr/include/; \ + cp -r share/* /usr/share/; \ + rm -rf /shs-cassini-headers + +ARG cxi_driver_version=release/shs-13.0.0 +RUN set -eux; \ + git clone --depth 1 --branch "${cxi_driver_version}" https://github.com/HewlettPackard/shs-cxi-driver.git; \ + cd shs-cxi-driver; \ + cp -r include/* /usr/include/; \ + rm -rf /shs-cxi-driver + +ARG libcxi_version=release/shs-13.0.0 +RUN set -eux; \ + git clone --depth 1 --branch "${libcxi_version}" https://github.com/HewlettPackard/shs-libcxi.git; \ + cd shs-libcxi; \ + ./autogen.sh; \ + ./configure \ + --with-cuda; \ + make -j"$(nproc)" install; \ + cd /; \ + rm -rf /shs-libcxi; \ + ldconfig + +ARG xpmem_version=0d0bad4e1d07b38d53ecc8f20786bb1328c446da +RUN set -eux; \ + git clone https://github.com/hpc/xpmem.git; \ + cd xpmem; \ + git checkout "${xpmem_version}"; \ + ./autogen.sh; \ + ./configure --disable-kernel-module; \ + make -j"$(nproc)" install; \ + cd /; \ + rm -rf /xpmem; \ + ldconfig + +# NOTE: xpmem is not found correctly without setting the prefix explicitly in +# --enable-xpmem +ARG libfabric_version=v2.4.0 +RUN set -eux; \ + git clone --depth 1 --branch "${libfabric_version}" https://github.com/ofiwg/libfabric.git; \ + cd libfabric; \ + ./autogen.sh; \ + ./configure \ + --with-cuda \ + --enable-xpmem=/usr \ + --enable-tcp \ + --enable-cxi; \ + make -j"$(nproc)" install; \ + cd /; \ + rm -rf /libfabric; \ + ldconfig + +ARG openmpi_version=5.0.9 +RUN set -eux; \ + curl -fsSL "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-${openmpi_version}.tar.gz" -o /tmp/ompi.tar.gz; \ + tar -C /tmp -xzf /tmp/ompi.tar.gz; \ + cd "/tmp/openmpi-${openmpi_version}"; \ + ./configure \ + --with-ofi \ + --with-cuda=/usr; \ + make -j"$(nproc)" install; \ + cd /; \ + rm -rf "/tmp/openmpi-${openmpi_version}" /tmp/ompi.tar.gz; \ + ldconfig + # Install uv: https://docs.astral.sh/uv/guides/integration/docker COPY --from=ghcr.io/astral-sh/uv:0.9.24@sha256:816fdce3387ed2142e37d2e56e1b1b97ccc1ea87731ba199dc8a25c04e4997c5 /uv /uvx /bin/ diff --git a/ci/docker/checkout_mpi.Dockerfile b/ci/docker/checkout_mpi.Dockerfile index c229d6c374..01e26702b4 100644 --- a/ci/docker/checkout_mpi.Dockerfile +++ b/ci/docker/checkout_mpi.Dockerfile @@ -7,5 +7,9 @@ WORKDIR /icon4py ARG PYVERSION ARG VENV ENV UV_PROJECT_ENVIRONMENT=$VENV -ENV MPI4PY_BUILD_BACKEND="scikit-build-core" -RUN uv sync --extra distributed --python=$PYVERSION +ENV MPI4PY_BUILD_BACKEND=scikit-build-core +ENV GHEX_USE_GPU=ON +ENV GHEX_GPU_TYPE=NVIDIA +ENV GHEX_GPU_ARCH=90 +ENV GHEX_TRANSPORT_BACKEND=MPI +RUN uv sync --extra all --extra cuda12 --python=$PYVERSION diff --git a/model/common/src/icon4py/model/common/grid/utils.py b/model/common/src/icon4py/model/common/grid/utils.py index 39b48c9dd5..dbb3d69449 100644 --- a/model/common/src/icon4py/model/common/grid/utils.py +++ b/model/common/src/icon4py/model/common/grid/utils.py @@ -5,21 +5,20 @@ # # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause -from types import ModuleType import numpy as np from icon4py.model.common.grid import gridfile -def revert_repeated_index_to_invalid(offset: np.ndarray, array_ns: ModuleType): +def revert_repeated_index_to_invalid(offset: np.ndarray): num_elements = offset.shape[0] for i in range(num_elements): # convert repeated indices back into -1 - for val in array_ns.flip(offset[i, :]): - if array_ns.count_nonzero(val == offset[i, :]) > 1: - unique_values, counts = array_ns.unique(offset[i, :], return_counts=True) + for val in np.flip(offset[i, :]): + if np.count_nonzero(val == offset[i, :]) > 1: + unique_values, counts = np.unique(offset[i, :], return_counts=True) rep_values = unique_values[counts > 1] - rep_indices = array_ns.where(array_ns.isin(offset[i, :], rep_values))[0] + rep_indices = np.where(np.isin(offset[i, :], rep_values))[0] offset[i, rep_indices[1:]] = gridfile.GridFile.INVALID_INDEX return offset diff --git a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py index 0f0d4c59f1..ae6a6f5f39 100644 --- a/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py +++ b/model/common/tests/common/decomposition/mpi_tests/test_mpi_decomposition.py @@ -279,6 +279,7 @@ def test_exchange_on_dummy_data( @pytest.mark.mpi @pytest.mark.datatest +@pytest.mark.embedded_only @pytest.mark.parametrize("processor_props", [False], indirect=True) def test_halo_exchange_for_sparse_field( interpolation_savepoint: serialbox.InterpolationSavepoint, diff --git a/model/testing/src/icon4py/model/testing/serialbox.py b/model/testing/src/icon4py/model/testing/serialbox.py index 9cdb912e2d..ac7d409a6c 100644 --- a/model/testing/src/icon4py/model/testing/serialbox.py +++ b/model/testing/src/icon4py/model/testing/serialbox.py @@ -72,7 +72,7 @@ def wrapper(self, *args, **kwargs): # as a workaround for the lack of support for optional fields in gt4py. shp = (1,) * len(dims) return gtx.as_field( - dims, np.zeros(shp, dtype=dtype), allocator=self.backend + dims, self.xp.zeros(shp, dtype=dtype), allocator=self.backend ) else: return None @@ -503,9 +503,8 @@ def construct_icon_grid( def potentially_revert_icon_index_transformation(ar): return ar else: - potentially_revert_icon_index_transformation = functools.partial( - grid_utils.revert_repeated_index_to_invalid, - array_ns=data_alloc.import_array_ns(backend), + potentially_revert_icon_index_transformation = ( + grid_utils.revert_repeated_index_to_invalid ) c2e2c = self.c2e2c() diff --git a/pyproject.toml b/pyproject.toml index e349356eb7..df2c6e3d98 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -361,7 +361,7 @@ url = 'https://gridtools.github.io/pypi/' [tool.uv.sources] dace = {index = "gridtools"} -ghex = {git = "https://github.com/msimberg/GHEX.git", branch = "async-mpi"} +ghex = {git = "https://github.com/philip-paul-mueller/GHEX.git", branch = "phimuell__async-mpi-2"} # gt4py = {git = "https://github.com/GridTools/gt4py", branch = "main"} # gt4py = {index = "test.pypi"} icon4py-atmosphere-advection = {workspace = true} diff --git a/uv.lock b/uv.lock index f5641ba1e4..aca8ec23cc 100644 --- a/uv.lock +++ b/uv.lock @@ -1362,7 +1362,7 @@ wheels = [ [[package]] name = "ghex" version = "0.4.1" -source = { git = "https://github.com/msimberg/GHEX.git?branch=async-mpi#6d896166994cedbcfc50da1873239a5edb212e3f" } +source = { git = "https://github.com/philip-paul-mueller/GHEX.git?branch=phimuell__async-mpi-2#80c0650fdae40bdd40e0435e5687267bada4cdd2" } dependencies = [ { name = "mpi4py" }, { name = "numpy" }, @@ -1887,7 +1887,7 @@ requires-dist = [ { name = "cupy-cuda12x", marker = "extra == 'cuda12'", specifier = ">=13.0" }, { name = "dace", specifier = "==43!2026.1.21", index = "https://gridtools.github.io/pypi/" }, { name = "datashader", marker = "extra == 'io'", specifier = ">=0.16.1" }, - { name = "ghex", marker = "extra == 'distributed'", git = "https://github.com/msimberg/GHEX.git?branch=async-mpi" }, + { name = "ghex", marker = "extra == 'distributed'", git = "https://github.com/philip-paul-mueller/GHEX.git?branch=phimuell__async-mpi-2" }, { name = "gt4py", specifier = "==1.1.3" }, { name = "gt4py", extras = ["cuda11"], marker = "extra == 'cuda11'" }, { name = "gt4py", extras = ["cuda12"], marker = "extra == 'cuda12'" },