Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
1fd6389
Attempt to add cuda support to distributed ci pipeline
msimberg Jan 28, 2026
cbb1891
Add cuda12 extra
msimberg Jan 28, 2026
bbb151c
Add nvidia-cuda-toolkit
msimberg Jan 28, 2026
b9be7fb
Revert "refactor: testing infrastructure (#1002)"
msimberg Jan 29, 2026
731283a
Use cxi hook in ci
msimberg Jan 29, 2026
ea2b3aa
Try mpich
msimberg Jan 29, 2026
8f04d36
Reduce tests
msimberg Jan 29, 2026
9f96b70
Try using manually built openmpi
msimberg Jan 29, 2026
9fce9b5
Debugging
msimberg Jan 30, 2026
c6a767e
Remove debug prints
msimberg Jan 30, 2026
0b9d26b
Merge remote-tracking branch 'origin/main' into distributed-tests-dac…
msimberg Jan 30, 2026
adb1ee6
Unrevert test download changes
msimberg Jan 30, 2026
b0321e7
Numpy/cupy issues
msimberg Jan 30, 2026
c62979c
Enable shm, lnx, xpmem support in libfabric
msimberg Jan 30, 2026
b4071d0
Linting
msimberg Jan 30, 2026
6eb3d8d
Enable GPU support for GHEX
msimberg Jan 30, 2026
28b1b1b
Set appropriate gcc for cuda
msimberg Feb 1, 2026
73a5b5b
Explicitly set OpenMPI settings
msimberg Feb 1, 2026
d8e90e4
Don't dlopen cuda and gdrcopy
msimberg Feb 3, 2026
67cfdb5
Update comments and clean up options
msimberg Feb 3, 2026
c81af9e
Try ubuntu lts release for distributed ci
msimberg Feb 3, 2026
790612a
Set gpu binding through SLURM_GPUS_PER_TASK
msimberg Feb 3, 2026
64482e8
Enable all tests again
msimberg Feb 3, 2026
b3eef3a
Clean up names in distributed.yml
msimberg Feb 3, 2026
d6f71d6
Update base image to ubuntu 25.10
msimberg Feb 3, 2026
7b68f7b
Merge remote-tracking branch 'origin/main' into distributed-tests-dac…
msimberg Feb 4, 2026
518bbde
Mark distributed compute_geofac_div test embedded only, like single-r…
msimberg Feb 4, 2026
c1eed7f
Use philip's async-mpi branch (fixes gpu buffer stride computation)
msimberg Feb 4, 2026
d08b60c
Increase time limit for distributed dace tests
msimberg Feb 4, 2026
148850c
Increase time limit for distributed dace_gpu common tests
msimberg Feb 4, 2026
c6d0042
Merge branch 'main' into distributed-tests-dace-gpu
jcanton Feb 5, 2026
0c727f5
sorry2
jcanton Feb 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 22 additions & 12 deletions ci/distributed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,41 @@ build_distributed_baseimage_aarch64:
DOCKERFILE: ci/docker/checkout_mpi.Dockerfile
DOCKER_BUILD_ARGS: '["PYVERSION=$PYVERSION", "BASE_IMAGE=${BASE_IMAGE_${PYVERSION_PREFIX}}", "VENV=${UV_PROJECT_ENVIRONMENT}"]'
PERSIST_IMAGE_NAME: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi
USE_MPI: NO
SLURM_MPI_TYPE: pmix
PMIX_MCA_psec: native
PMIX_MCA_gds: "^shmem2"

.build_distributed_cpu:
.build_distributed:
extends: [.build_distributed_template]
variables:
UV_PROJECT_ENVIRONMENT: venv_dist

build_distributed_cpu:
build_distributed:
stage: image
extends: [.container-builder-cscs-gh200, .build_distributed_cpu]
extends: [.container-builder-cscs-gh200, .build_distributed]
needs: [build_distributed_baseimage_aarch64]

.test_template_distributed:
timeout: 8h
image: $CSCS_REGISTRY_PATH/public/$ARCH/icon4py/icon4py-ci:$CI_COMMIT_SHA-$UV_PROJECT_ENVIRONMENT-$PYVERSION-mpi
extends: [.container-runner-santis-gh200, .build_distributed_cpu]
needs: [build_distributed_cpu]
extends: [.container-runner-santis-gh200, .build_distributed]
needs: [build_distributed]
variables:
SLURM_JOB_NUM_NODES: 1
SLURM_CPU_BIND: 'verbose'
SLURM_NTASKS: 4
SLURM_GPUS_PER_TASK: 1
ICON4PY_TEST_DATA_PATH: "/icon4py/testdata"
ICON4PY_ENABLE_GRID_DOWNLOAD: false
ICON4PY_ENABLE_TESTDATA_DOWNLOAD: false
CSCS_ADDITIONAL_MOUNTS: '["/capstor/store/cscs/userlab/cwci02/icon4py/ci/testdata:$ICON4PY_TEST_DATA_PATH"]'
# Do not use libfabric from the host system. Libfabric with slingshot
# support is built into the container image.
USE_MPI: NO
# Use libfabric slingshot (cxi) provider and recommended settings from
# https://docs.cscs.ch/software/communication/openmpi.
SLURM_MPI_TYPE: pmix
PMIX_MCA_psec: native
FI_PROVIDER: cxi
OMPI_MCA_pml: cm
OMPI_MCA_mtl: ofi

.test_distributed_aarch64:
stage: test
Expand All @@ -80,14 +87,17 @@ build_distributed_cpu:
parallel:
matrix:
- COMPONENT: [atmosphere/diffusion, atmosphere/dycore, common]
BACKEND: [embedded, gtfn_cpu, dace_cpu]
BACKEND: [embedded, gtfn_cpu, dace_cpu, dace_gpu, gtfn_gpu]
rules:
- if: $COMPONENT == 'atmosphere/diffusion'
variables:
SLURM_TIMELIMIT: '00:05:00'
- if: $COMPONENT == 'atmosphere/dycore' && $BACKEND == 'dace_cpu'
- if: $COMPONENT == 'atmosphere/dycore' && ($BACKEND == 'dace_cpu' || $BACKEND == 'dace_gpu')
variables:
SLURM_TIMELIMIT: '00:30:00'
- if: $COMPONENT == 'common' && $BACKEND == 'dace_gpu'
variables:
SLURM_TIMELIMIT: '00:20:00'
SLURM_TIMELIMIT: '00:45:00'
- if: $COMPONENT == 'atmosphere/dycore'
variables:
SLURM_TIMELIMIT: '00:15:00'
Expand Down
133 changes: 115 additions & 18 deletions ci/docker/base_mpi.Dockerfile
Original file line number Diff line number Diff line change
@@ -1,27 +1,124 @@
FROM ubuntu:25.04
FROM ubuntu:25.10

ENV LANG C.UTF-8
ENV LC_ALL C.UTF-8

ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update -qq && apt-get install -qq -y --no-install-recommends \
strace \
build-essential \
tar \
wget \
curl \
libboost-dev \
libnuma-dev \
libopenmpi-dev \
ca-certificates \
libssl-dev \
autoconf \
automake \
libtool \
pkg-config \
libreadline-dev \
git && \
RUN apt-get update && \
apt-get install -y --no-install-recommends \
autoconf \
automake \
build-essential \
ca-certificates \
curl \
git \
libboost-dev \
libconfig-dev \
libcurl4-openssl-dev \
libfuse-dev \
libjson-c-dev \
libnl-3-dev \
libnuma-dev \
libreadline-dev \
libsensors-dev \
libssl-dev \
libtool \
libuv1-dev \
libyaml-dev \
nvidia-cuda-dev \
nvidia-cuda-toolkit \
nvidia-cuda-toolkit-gcc \
pkg-config \
python3 \
strace \
tar \
wget && \
rm -rf /var/lib/apt/lists/*

ENV CC=/usr/bin/cuda-gcc
ENV CXX=/usr/bin/cuda-g++
ENV CUDAHOSTCXX=/usr/bin/cuda-g++

# Install OpenMPI configured with libfabric, libcxi, and gdrcopy support for use
# on Alps. This is based on examples in
# https://github.com/eth-cscs/cray-network-stack.
ARG gdrcopy_version=2.5.1
RUN set -eux; \
git clone --depth 1 --branch "v${gdrcopy_version}" https://github.com/NVIDIA/gdrcopy.git; \
cd gdrcopy; \
make lib -j"$(nproc)" lib_install; \
cd /; \
rm -rf /gdrcopy; \
ldconfig

ARG cassini_headers_version=release/shs-13.0.0
RUN set -eux; \
git clone --depth 1 --branch "${cassini_headers_version}" https://github.com/HewlettPackard/shs-cassini-headers.git; \
cd shs-cassini-headers; \
cp -r include/* /usr/include/; \
cp -r share/* /usr/share/; \
rm -rf /shs-cassini-headers

ARG cxi_driver_version=release/shs-13.0.0
RUN set -eux; \
git clone --depth 1 --branch "${cxi_driver_version}" https://github.com/HewlettPackard/shs-cxi-driver.git; \
cd shs-cxi-driver; \
cp -r include/* /usr/include/; \
rm -rf /shs-cxi-driver

ARG libcxi_version=release/shs-13.0.0
RUN set -eux; \
git clone --depth 1 --branch "${libcxi_version}" https://github.com/HewlettPackard/shs-libcxi.git; \
cd shs-libcxi; \
./autogen.sh; \
./configure \
--with-cuda; \
make -j"$(nproc)" install; \
cd /; \
rm -rf /shs-libcxi; \
ldconfig

ARG xpmem_version=0d0bad4e1d07b38d53ecc8f20786bb1328c446da
RUN set -eux; \
git clone https://github.com/hpc/xpmem.git; \
cd xpmem; \
git checkout "${xpmem_version}"; \
./autogen.sh; \
./configure --disable-kernel-module; \
make -j"$(nproc)" install; \
cd /; \
rm -rf /xpmem; \
ldconfig

# NOTE: xpmem is not found correctly without setting the prefix explicitly in
# --enable-xpmem
ARG libfabric_version=v2.4.0
RUN set -eux; \
git clone --depth 1 --branch "${libfabric_version}" https://github.com/ofiwg/libfabric.git; \
cd libfabric; \
./autogen.sh; \
./configure \
--with-cuda \
--enable-xpmem=/usr \
--enable-tcp \
--enable-cxi; \
make -j"$(nproc)" install; \
cd /; \
rm -rf /libfabric; \
ldconfig

ARG openmpi_version=5.0.9
RUN set -eux; \
curl -fsSL "https://download.open-mpi.org/release/open-mpi/v5.0/openmpi-${openmpi_version}.tar.gz" -o /tmp/ompi.tar.gz; \
tar -C /tmp -xzf /tmp/ompi.tar.gz; \
cd "/tmp/openmpi-${openmpi_version}"; \
./configure \
--with-ofi \
--with-cuda=/usr; \
make -j"$(nproc)" install; \
cd /; \
rm -rf "/tmp/openmpi-${openmpi_version}" /tmp/ompi.tar.gz; \
ldconfig

# Install uv: https://docs.astral.sh/uv/guides/integration/docker
COPY --from=ghcr.io/astral-sh/uv:0.9.24@sha256:816fdce3387ed2142e37d2e56e1b1b97ccc1ea87731ba199dc8a25c04e4997c5 /uv /uvx /bin/
8 changes: 6 additions & 2 deletions ci/docker/checkout_mpi.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,9 @@ WORKDIR /icon4py
ARG PYVERSION
ARG VENV
ENV UV_PROJECT_ENVIRONMENT=$VENV
ENV MPI4PY_BUILD_BACKEND="scikit-build-core"
RUN uv sync --extra distributed --python=$PYVERSION
ENV MPI4PY_BUILD_BACKEND=scikit-build-core
ENV GHEX_USE_GPU=ON
ENV GHEX_GPU_TYPE=NVIDIA
ENV GHEX_GPU_ARCH=90
ENV GHEX_TRANSPORT_BACKEND=MPI
RUN uv sync --extra all --extra cuda12 --python=$PYVERSION
11 changes: 5 additions & 6 deletions model/common/src/icon4py/model/common/grid/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@
#
# Please, refer to the LICENSE file in the root directory.
# SPDX-License-Identifier: BSD-3-Clause
from types import ModuleType

import numpy as np

from icon4py.model.common.grid import gridfile


def revert_repeated_index_to_invalid(offset: np.ndarray, array_ns: ModuleType):
def revert_repeated_index_to_invalid(offset: np.ndarray):
num_elements = offset.shape[0]
for i in range(num_elements):
# convert repeated indices back into -1
for val in array_ns.flip(offset[i, :]):
if array_ns.count_nonzero(val == offset[i, :]) > 1:
unique_values, counts = array_ns.unique(offset[i, :], return_counts=True)
for val in np.flip(offset[i, :]):
if np.count_nonzero(val == offset[i, :]) > 1:
unique_values, counts = np.unique(offset[i, :], return_counts=True)
rep_values = unique_values[counts > 1]
rep_indices = array_ns.where(array_ns.isin(offset[i, :], rep_values))[0]
rep_indices = np.where(np.isin(offset[i, :], rep_values))[0]
offset[i, rep_indices[1:]] = gridfile.GridFile.INVALID_INDEX
return offset
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,7 @@ def test_exchange_on_dummy_data(

@pytest.mark.mpi
@pytest.mark.datatest
@pytest.mark.embedded_only
@pytest.mark.parametrize("processor_props", [False], indirect=True)
def test_halo_exchange_for_sparse_field(
interpolation_savepoint: serialbox.InterpolationSavepoint,
Expand Down
7 changes: 3 additions & 4 deletions model/testing/src/icon4py/model/testing/serialbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def wrapper(self, *args, **kwargs):
# as a workaround for the lack of support for optional fields in gt4py.
shp = (1,) * len(dims)
return gtx.as_field(
dims, np.zeros(shp, dtype=dtype), allocator=self.backend
dims, self.xp.zeros(shp, dtype=dtype), allocator=self.backend
)
else:
return None
Expand Down Expand Up @@ -503,9 +503,8 @@ def construct_icon_grid(
def potentially_revert_icon_index_transformation(ar):
return ar
else:
potentially_revert_icon_index_transformation = functools.partial(
grid_utils.revert_repeated_index_to_invalid,
array_ns=data_alloc.import_array_ns(backend),
potentially_revert_icon_index_transformation = (
grid_utils.revert_repeated_index_to_invalid
)

c2e2c = self.c2e2c()
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ url = 'https://gridtools.github.io/pypi/'

[tool.uv.sources]
dace = {index = "gridtools"}
ghex = {git = "https://github.com/msimberg/GHEX.git", branch = "async-mpi"}
ghex = {git = "https://github.com/philip-paul-mueller/GHEX.git", branch = "phimuell__async-mpi-2"}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is updated because ghex-org/GHEX#190 contains a bugfix to how strides are computed for GPU buffers. Tests fail with master and async-mpi. We should get ghex-org/GHEX#190 merged ASAP to be able to use GHEX master here.

# gt4py = {git = "https://github.com/GridTools/gt4py", branch = "main"}
# gt4py = {index = "test.pypi"}
icon4py-atmosphere-advection = {workspace = true}
Expand Down
4 changes: 2 additions & 2 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.