From 00066e9321c64d9b74a3902c431fe5a15ec564c2 Mon Sep 17 00:00:00 2001 From: Dante Gama Dessavre Date: Thu, 20 Nov 2025 11:33:48 -0600 Subject: [PATCH 1/9] FEA first commit for ARM CUDA wheels --- .github/runs-on.yml | 4 ++ .github/workflows/cuda13.yml | 43 ++++++++++++-- .github/workflows/main.yml | 29 ++++++++- doc/contrib/ci.rst | 21 +++++++ doc/install.rst | 7 ++- ops/pipeline/build-cuda-arm64.sh | 75 ++++++++++++++++++++++++ ops/pipeline/build-cuda13.sh | 18 +++++- ops/pipeline/test-python-wheel-cuda13.sh | 15 ++++- ops/pipeline/test-python-wheel-impl.sh | 7 ++- ops/pipeline/test-python-wheel.sh | 4 +- ops/script/release_artifacts.py | 1 + 11 files changed, 211 insertions(+), 13 deletions(-) create mode 100755 ops/pipeline/build-cuda-arm64.sh diff --git a/.github/runs-on.yml b/.github/runs-on.yml index 1d97b8c5de21..b005093420d8 100644 --- a/.github/runs-on.yml +++ b/.github/runs-on.yml @@ -34,6 +34,10 @@ runners: cpu: 16 family: ["c6g", "c7g"] image: linux-arm64 + linux-arm64-gpu: + family: ["g5g.xlarge"] + image: linux-arm64 + spot: "false" windows-gpu: family: ["g4dn.2xlarge"] image: windows-amd64 diff --git a/.github/workflows/cuda13.yml b/.github/workflows/cuda13.yml index 5ea448f25cce..b6f33291d178 100644 --- a/.github/workflows/cuda13.yml +++ b/.github/workflows/cuda13.yml @@ -36,6 +36,29 @@ jobs: --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ --prefix cache/${{ github.run_id }}/build-cuda13 \ build/testxgboost python-package/dist/*.whl + + build-cuda13-arm64: + name: Build CUDA 13 (ARM64) + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-arm64-cpu + - tag=cuda13-build-cuda13-arm64 + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4 + with: + submodules: "true" + - name: Log into Docker registry (AWS ECR) + run: bash ops/pipeline/login-docker-registry.sh + - run: | + bash ops/pipeline/build-cuda13.sh + - name: Stash files + run: | + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-cuda13-arm64 \ + python-package/dist/*.whl test-cpp-cuda13: name: Google Test (C++) with CUDA 13 needs: [build-cuda13] @@ -62,12 +85,22 @@ jobs: - run: | bash ops/pipeline/test-cpp-cuda13.sh test-python-cuda13: - name: Run Python tests with CUDA 13 - needs: [build-cuda13] + name: Run Python tests with CUDA 13 (${{ matrix.description }}) + needs: [build-cuda13, build-cuda13-arm64] runs-on: - runs-on=${{ github.run_id }} - - runner=linux-amd64-gpu - - tag=cuda13-test-python-cuda13 + - runner=${{ matrix.runner }} + - tag=cuda13-test-python-cuda13-${{ matrix.description }} + strategy: + fail-fast: false + matrix: + include: + - description: amd64 + runner: linux-amd64-gpu + artifact_from: build-cuda13 + - description: arm64 + runner: linux-arm64-gpu + artifact_from: build-cuda13-arm64 steps: # Restart Docker daemon so that it recognizes the ephemeral disks - run: sudo systemctl restart docker @@ -80,7 +113,7 @@ jobs: run: | python3 ops/pipeline/manage-artifacts.py download \ --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ - --prefix cache/${{ github.run_id }}/build-cuda13 \ + --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \ --dest-dir wheelhouse \ *.whl - name: Run Python tests diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 11fb4ff0a7df..5f7aa9aa95dd 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -94,6 +94,28 @@ jobs: bash ops/pipeline/build-cuda.sh \ xgb-ci.gpu_build_rockylinux8_dev_ver enable-rmm + build-cuda-arm64: + name: Build CUDA + manylinux_2_28_aarch64 wheel + runs-on: + - runs-on=${{ github.run_id }} + - runner=linux-arm64-cpu + - tag=main-build-cuda-arm64 + steps: + # Restart Docker daemon so that it recognizes the ephemeral disks + - run: sudo systemctl restart docker + - uses: actions/checkout@v4 + with: + submodules: "true" + - name: Log into Docker registry (AWS ECR) + run: bash ops/pipeline/login-docker-registry.sh + - run: bash ops/pipeline/build-cuda-arm64.sh + - name: Stash files + run: | + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ + --prefix cache/${{ github.run_id }}/build-cuda-arm64 \ + python-package/dist/*.whl + build-python-wheels-arm64: name: Build manylinux_2_28_aarch64 wheel runs-on: @@ -211,7 +233,7 @@ jobs: test-python-wheel: name: Run Python tests (${{ matrix.description }}) - needs: [build-cuda, build-python-wheels-arm64] + needs: [build-cuda, build-cuda-arm64, build-python-wheels-arm64] runs-on: - runs-on - runner=${{ matrix.runner }} @@ -242,6 +264,11 @@ jobs: suite: cpu-arm64 runner: linux-arm64-cpu artifact_from: build-python-wheels-arm64 + - description: gpu-arm64 + image_repo: xgb-ci.gpu_aarch64 + suite: gpu-arm64 + runner: linux-arm64-gpu + artifact_from: build-cuda-arm64 steps: # Restart Docker daemon so that it recognizes the ephemeral disks - run: sudo systemctl restart docker diff --git a/doc/contrib/ci.rst b/doc/contrib/ci.rst index 964d833e4756..1e9319b1593d 100644 --- a/doc/contrib/ci.rst +++ b/doc/contrib/ci.rst @@ -198,6 +198,15 @@ Examples: useful tasks for local development --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu_build_rockylinux8:main \ -- ops/pipeline/build-cuda-impl.sh +* Build XGBoost with GPU support on Linux ARM64 + + .. code-block:: bash + + export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com + python3 ops/docker_run.py \ + --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu_build_rockylinux8_aarch64:main \ + -- ops/pipeline/build-cuda-impl.sh + * Run Python tests .. code-block:: bash @@ -217,6 +226,16 @@ Examples: useful tasks for local development --use-gpus \ -- ops/pipeline/test-python-wheel-impl.sh gpu +* Run Python tests with GPU algorithm on Linux ARM64 + + .. code-block:: bash + + export DOCKER_REGISTRY=492475357299.dkr.ecr.us-west-2.amazonaws.com + python3 ops/docker_run.py \ + --image-uri ${DOCKER_REGISTRY}/xgb-ci.gpu_aarch64:main \ + --use-gpus \ + -- ops/pipeline/test-python-wheel-impl.sh gpu-arm64 + * Run Python tests with GPU algorithm, with multiple GPUs .. code-block:: bash @@ -287,6 +306,8 @@ To opt into self-hosted runners (enabled by RunsOn), we use the following specia - tag=[unique tag that uniquely identifies the job in the GH Action workflow] where the runner is defined in ``.github/runs-on.yml``. +For CUDA-enabled ARM64 builds and tests we rely on the ``linux-arm64-gpu`` runner, +which provisions a Graviton + NVIDIA GPU instance. =================================================================== The Lay of the Land: how CI pipelines are organized in the codebase diff --git a/doc/install.rst b/doc/install.rst index 7fcea0d3b68c..ea466c624acf 100644 --- a/doc/install.rst +++ b/doc/install.rst @@ -67,7 +67,7 @@ Capabilities of binary wheels for each platform: +=====================+=========+======================+ | Linux x86_64 | |tick| | |tick| | +---------------------+---------+----------------------+ -| Linux aarch64 | |cross| | |cross| | +| Linux aarch64 | |tick| | |cross| | +---------------------+---------+----------------------+ | MacOS x86_64 | |cross| | |cross| | +---------------------+---------+----------------------+ @@ -76,6 +76,11 @@ Capabilities of binary wheels for each platform: | Windows | |tick| | |cross| | +---------------------+---------+----------------------+ +Linux aarch64 wheels now ship with CUDA support, so ``pip install xgboost`` on +modern Jetson or Graviton machines provides the same GPU functionality as the +Linux x86_64 wheel. Multi-node and multi-GPU training remain experimental on +ARM64 at this time. + Minimal installation (CPU-only) ******************************* The default installation with ``pip`` will install the full XGBoost package, including the support for the GPU algorithms and federated learning. diff --git a/ops/pipeline/build-cuda-arm64.sh b/ops/pipeline/build-cuda-arm64.sh new file mode 100755 index 000000000000..f6078cba5298 --- /dev/null +++ b/ops/pipeline/build-cuda-arm64.sh @@ -0,0 +1,75 @@ +#!/bin/bash +## Build XGBoost with CUDA for Linux ARM64 + +set -euo pipefail + +if [[ -z "${GITHUB_SHA:-}" ]] +then + echo "Make sure to set environment variable GITHUB_SHA" + exit 1 +fi + +IMAGE_REPO="xgb-ci.gpu_build_rockylinux8_aarch64" +export USE_FEDERATED=1 +export USE_RMM=0 + +source ops/pipeline/classify-git-branch.sh +source ops/pipeline/get-docker-registry-details.sh +source ops/pipeline/get-image-tag.sh + +WHEEL_TAG=manylinux_2_28_aarch64 +BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}" +MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}" + +echo "--- Build with CUDA (ARM64)" + +if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] +then + export BUILD_ONLY_SM75=1 +else + export BUILD_ONLY_SM75=0 +fi + +set -x + +python3 ops/docker_run.py \ + --image-uri ${BUILD_IMAGE_URI} \ + --run-args='-e BUILD_ONLY_SM75 -e USE_RMM -e USE_FEDERATED' \ + -- ops/pipeline/build-cuda-impl.sh + +echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard" +python3 ops/docker_run.py \ + --image-uri ${MANYLINUX_IMAGE_URI} \ + -- auditwheel repair --only-plat \ + --plat ${WHEEL_TAG} python-package/dist/*.whl +python3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \ + wheelhouse/*.whl +mv -v wheelhouse/*.whl python-package/dist/ +if ! unzip -l ./python-package/dist/*.whl | grep libgomp > /dev/null; then + echo "error: libgomp.so was not vendored in the wheel" + exit -1 +fi + +# Check size of wheel +pydistcheck --config python-package/pyproject.toml python-package/dist/*.whl + +echo "--- Generate meta info" +python3 ops/script/format_wheel_meta.py \ + --wheel-path python-package/dist/*.whl \ + --commit-hash ${GITHUB_SHA} \ + --platform-tag ${WHEEL_TAG} \ + --meta-path python-package/dist/ + +echo "--- Upload Python wheel" +if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] +then + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ + python-package/dist/*.whl + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${BRANCH_NAME} --make-public \ + python-package/dist/meta.json +fi + diff --git a/ops/pipeline/build-cuda13.sh b/ops/pipeline/build-cuda13.sh index 8e24e8147b70..36caec14ae31 100755 --- a/ops/pipeline/build-cuda13.sh +++ b/ops/pipeline/build-cuda13.sh @@ -9,15 +9,29 @@ then exit 1 fi -IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8" export USE_RMM=0 export USE_FEDERATED=0 +ARCH=$(uname -m) +case "${ARCH}" in + x86_64) + IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8" + WHEEL_TAG=manylinux_2_28_x86_64 + ;; + aarch64) + IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8_aarch64" + WHEEL_TAG=manylinux_2_28_aarch64 + ;; + *) + echo "Unsupported architecture: ${ARCH}" + exit 1 + ;; +esac + source ops/pipeline/classify-git-branch.sh source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh -WHEEL_TAG=manylinux_2_28_x86_64 BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}" MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}" diff --git a/ops/pipeline/test-python-wheel-cuda13.sh b/ops/pipeline/test-python-wheel-cuda13.sh index 279411779927..495fe5672aa5 100755 --- a/ops/pipeline/test-python-wheel-cuda13.sh +++ b/ops/pipeline/test-python-wheel-cuda13.sh @@ -6,7 +6,20 @@ set -euo pipefail source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh -IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8" +ARCH=$(uname -m) +case "${ARCH}" in + x86_64) + IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8" + ;; + aarch64) + IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8_aarch64" + ;; + *) + echo "Unsupported architecture: ${ARCH}" + exit 1 + ;; +esac + IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}" set -x diff --git a/ops/pipeline/test-python-wheel-impl.sh b/ops/pipeline/test-python-wheel-impl.sh index 5c24e31210d2..88270e85bf5a 100755 --- a/ops/pipeline/test-python-wheel-impl.sh +++ b/ops/pipeline/test-python-wheel-impl.sh @@ -13,7 +13,7 @@ suite="$1" # Cannot set -u before Conda env activation case "$suite" in - gpu|mgpu) + gpu|mgpu|gpu-arm64) source activate gpu_test ;; cpu) @@ -42,6 +42,11 @@ case "$suite" in python -c 'from cupy.cuda import jitify; jitify._init_module()' pytest -v -s -rxXs --durations=0 -m 'not mgpu' tests/python-gpu ;; + gpu-arm64) + echo "-- Run Python tests, using a single GPU (ARM64)" + python -c 'from cupy.cuda import jitify; jitify._init_module()' + pytest -v -s -rxXs --durations=0 -m 'not mgpu' tests/python-gpu + ;; mgpu) echo "-- Run Python tests, using multiple GPUs" python -c 'from cupy.cuda import jitify; jitify._init_module()' diff --git a/ops/pipeline/test-python-wheel.sh b/ops/pipeline/test-python-wheel.sh index 9ccdc42042d5..bc83504f2fba 100755 --- a/ops/pipeline/test-python-wheel.sh +++ b/ops/pipeline/test-python-wheel.sh @@ -5,14 +5,14 @@ set -euo pipefail if [[ "$#" -lt 2 ]] then - echo "Usage: $0 {gpu|mgpu|cpu|cpu-arm64} [image_repo]" + echo "Usage: $0 {gpu|mgpu|gpu-arm64|cpu|cpu-arm64} [image_repo]" exit 1 fi suite="$1" image_repo="$2" -if [[ "$suite" == "gpu" || "$suite" == "mgpu" ]] +if [[ "$suite" == "gpu" || "$suite" == "mgpu" || "$suite" == "gpu-arm64" ]] then gpu_option="--use-gpus" else diff --git a/ops/script/release_artifacts.py b/ops/script/release_artifacts.py index ef05a71420ac..a26f5c3ba449 100644 --- a/ops/script/release_artifacts.py +++ b/ops/script/release_artifacts.py @@ -154,6 +154,7 @@ def download_python_wheels(branch: str, commit_hash: str, outdir: Path) -> None: ] cu13_platforms = [ "manylinux_2_28_x86_64", + "manylinux_2_28_aarch64", ] minimal_platforms = [ "win_amd64", From 0df024ff4ff48758edc478c01012625f4c9ab4b8 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Tue, 9 Dec 2025 20:26:12 -0800 Subject: [PATCH 2/9] Enable building CUDA 12 wheel on ARM64 --- .github/workflows/main.yml | 57 ++++++++++------------- ops/pipeline/build-cuda.sh | 39 ++++++++++------ ops/pipeline/build-python-wheels-arm64.sh | 51 -------------------- 3 files changed, 48 insertions(+), 99 deletions(-) delete mode 100755 ops/pipeline/build-python-wheels-arm64.sh diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 19cb064e22d7..e44b29a10161 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,11 +31,21 @@ jobs: - run: bash ops/pipeline/build-cpu.sh build-cuda: - name: Build CUDA + manylinux_2_28_x86_64 wheel + name: Build CUDA + manylinux_2_28_${{ matrix.arch }} wheel runs-on: - runs-on=${{ github.run_id }} - - runner=linux-amd64-cpu - - tag=main-build-cuda + - runner=${{ matrix.runner }} + - tag=main-build-cuda-${{ matrix.arch }} + strategy: + fail-fast: false + matrix: + include: + - arch: aarch64 + runner: linux-arm64-cpu + image_repo: xgb-ci.gpu_build_rockylinux8_aarch64 + - arch: x86_64 + runner: linux-amd64-cpu + image_repo: xgb-ci.gpu_build_rockylinux8 steps: # Restart Docker daemon so that it recognizes the ephemeral disks - run: sudo systemctl restart docker @@ -45,12 +55,12 @@ jobs: - name: Log into Docker registry (AWS ECR) run: bash ops/pipeline/login-docker-registry.sh - run: | - bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 disable-rmm + bash ops/pipeline/build-cuda.sh ${{ matrix.image_repo }} ${{ matrix.arch }} disable-rmm - name: Stash files run: | python3 ops/pipeline/manage-artifacts.py upload \ --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ - --prefix cache/${{ github.run_id }}/build-cuda \ + --prefix cache/${{ github.run_id }}/build-cuda-${{ matrix.arch }} \ build/testxgboost python-package/dist/*.whl build-cuda-with-rmm: @@ -76,28 +86,6 @@ jobs: --prefix cache/${{ github.run_id }}/build-cuda-with-rmm \ build/testxgboost - build-python-wheels-arm64: - name: Build manylinux_2_28_aarch64 wheel - runs-on: - - runs-on=${{ github.run_id }} - - runner=linux-arm64-cpu - - tag=build-python-wheels-arm64 - steps: - # Restart Docker daemon so that it recognizes the ephemeral disks - - run: sudo systemctl restart docker - - uses: actions/checkout@v4 - with: - submodules: "true" - - name: Log into Docker registry (AWS ECR) - run: bash ops/pipeline/login-docker-registry.sh - - run: bash ops/pipeline/build-python-wheels-arm64.sh - - name: Stash files - run: | - python3 ops/pipeline/manage-artifacts.py upload \ - --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ - --prefix cache/${{ github.run_id }}/build-python-wheels-arm64 \ - python-package/dist/*.whl - build-python-wheels-cpu: name: Build CPU wheel for ${{ matrix.manylinux_target }}_${{ matrix.arch }} runs-on: @@ -166,7 +154,10 @@ jobs: include: - suite: gpu runner: linux-amd64-gpu - artifact_from: build-cuda + artifact_from: build-cuda-x86_64 + - suite: gpu + runner: linux-arm64-gpu + artifact_from: build-cuda-aarch64 - suite: gpu-rmm runner: linux-amd64-gpu artifact_from: build-cuda-with-rmm @@ -208,27 +199,27 @@ jobs: image_repo: xgb-ci.gpu suite: gpu runner: linux-amd64-gpu - artifact_from: build-cuda + artifact_from: build-cuda-x86_64 - description: multiple-gpu image_repo: xgb-ci.gpu suite: mgpu runner: linux-amd64-mgpu - artifact_from: build-cuda + artifact_from: build-cuda-x86_64 - description: cpu-amd64 image_repo: xgb-ci.cpu suite: cpu runner: linux-amd64-cpu - artifact_from: build-cuda + artifact_from: build-cuda-x86_64 - description: cpu-arm64 image_repo: xgb-ci.manylinux_2_28_aarch64 suite: cpu-arm64 runner: linux-arm64-cpu - artifact_from: build-python-wheels-arm64 + artifact_from: build-cuda-aarch64 - description: gpu-arm64 image_repo: xgb-ci.gpu_aarch64 suite: gpu-arm64 runner: linux-arm64-gpu - artifact_from: build-cuda-arm64 + artifact_from: build-cuda-aarch64 steps: # Restart Docker daemon so that it recognizes the ephemeral disks - run: sudo systemctl restart docker diff --git a/ops/pipeline/build-cuda.sh b/ops/pipeline/build-cuda.sh index 3458719bf090..08f135d85f0a 100755 --- a/ops/pipeline/build-cuda.sh +++ b/ops/pipeline/build-cuda.sh @@ -9,13 +9,14 @@ then exit 1 fi -if [[ "$#" -lt 2 ]] +if [[ "$#" -lt 3 ]] then - echo "Usage: $0 [image_repo] {enable-rmm,disable-rmm}" + echo "Usage: $0 [image_repo] {x86_64,aarch64} {enable-rmm,disable-rmm}" exit 2 fi image_repo="$1" -rmm_flag="$2" +arch="$2" +rmm_flag="$3" export USE_FEDERATED=1 # Validate RMM flag @@ -36,7 +37,7 @@ source ops/pipeline/classify-git-branch.sh source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh -WHEEL_TAG=manylinux_2_28_x86_64 +WHEEL_TAG=manylinux_2_28_${ARCH} BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG}" MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}" @@ -74,13 +75,17 @@ pydistcheck --config python-package/pyproject.toml python-package/dist/*.whl if [[ $USE_RMM == 0 ]] then - # Generate the meta info which includes xgboost version and the commit info - echo "--- Generate meta info" - python3 ops/script/format_wheel_meta.py \ - --wheel-path python-package/dist/*.whl \ - --commit-hash ${GITHUB_SHA} \ - --platform-tag ${WHEEL_TAG} \ - --meta-path python-package/dist/ + if [[ $ARCH == "x86_64" ]] + then + # Generate the meta info which includes xgboost version and the commit info + # TODO(hcho3): Generate meta.json that contains both x86_64 and aarch64 wheels + echo "--- Generate meta info" + python3 ops/script/format_wheel_meta.py \ + --wheel-path python-package/dist/*.whl \ + --commit-hash ${GITHUB_SHA} \ + --platform-tag ${WHEEL_TAG} \ + --meta-path python-package/dist/ + fi echo "--- Upload Python wheel" if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] @@ -89,9 +94,13 @@ then --s3-bucket xgboost-nightly-builds \ --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ python-package/dist/*.whl - python3 ops/pipeline/manage-artifacts.py upload \ - --s3-bucket xgboost-nightly-builds \ - --prefix ${BRANCH_NAME} --make-public \ - python-package/dist/meta.json + + if [[ $ARCH == "x86_64" ]] + then + python3 ops/pipeline/manage-artifacts.py upload \ + --s3-bucket xgboost-nightly-builds \ + --prefix ${BRANCH_NAME} --make-public \ + python-package/dist/meta.json + fi fi fi diff --git a/ops/pipeline/build-python-wheels-arm64.sh b/ops/pipeline/build-python-wheels-arm64.sh deleted file mode 100755 index ff38ceee13de..000000000000 --- a/ops/pipeline/build-python-wheels-arm64.sh +++ /dev/null @@ -1,51 +0,0 @@ -#!/bin/bash -## Build and test XGBoost with ARM64 CPU (no GPU, no federated learning) - -set -euo pipefail - -if [[ -z "${GITHUB_SHA:-}" ]] -then - echo "Make sure to set environment variable GITHUB_SHA" - exit 1 -fi - -source ops/pipeline/classify-git-branch.sh -source ops/pipeline/get-docker-registry-details.sh -source ops/pipeline/get-image-tag.sh - -WHEEL_TAG=manylinux_2_28_aarch64 -IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG} - -echo "--- Build CPU code targeting ARM64" -set -x - -python3 ops/script/pypi_variants.py --use-suffix=na --require-nccl-dep=na -python3 ops/docker_run.py \ - --image-uri ${IMAGE_URI} \ - -- ops/pipeline/build-python-wheels-arm64-impl.sh - -echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard" -python3 ops/docker_run.py \ - --image-uri ${IMAGE_URI} \ - -- auditwheel repair --only-plat \ - --plat ${WHEEL_TAG} python-package/dist/*.whl -python3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \ - wheelhouse/*.whl -mv -v wheelhouse/*.whl python-package/dist/ - -if ! unzip -l ./python-package/dist/*.whl | grep libgomp > /dev/null; then - echo "error: libgomp.so was not vendored in the wheel" - exit -1 -fi - -# Check size of wheel -pydistcheck --config python-package/pyproject.toml python-package/dist/*.whl - -echo "--- Upload Python wheel" -if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] -then - python3 ops/pipeline/manage-artifacts.py upload \ - --s3-bucket xgboost-nightly-builds \ - --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ - python-package/dist/*.whl -fi From 5e711fcd657b8ab30c63ef0673587b6b9c1c2e62 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Tue, 9 Dec 2025 20:29:41 -0800 Subject: [PATCH 3/9] fix --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e44b29a10161..daf07bbb008e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -184,7 +184,7 @@ jobs: test-python-wheel: name: Run Python tests (${{ matrix.description }}) - needs: [build-cuda, build-cuda-arm64, build-python-wheels-arm64] + needs: [build-cuda] runs-on: - runs-on - runner=${{ matrix.runner }} From c4a2aadb9010314cbf35c47fa59d24d1f6ddc73a Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Tue, 9 Dec 2025 20:31:28 -0800 Subject: [PATCH 4/9] typo in script --- ops/pipeline/build-cuda.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ops/pipeline/build-cuda.sh b/ops/pipeline/build-cuda.sh index 08f135d85f0a..d9aeb3e708e1 100755 --- a/ops/pipeline/build-cuda.sh +++ b/ops/pipeline/build-cuda.sh @@ -37,7 +37,7 @@ source ops/pipeline/classify-git-branch.sh source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh -WHEEL_TAG=manylinux_2_28_${ARCH} +WHEEL_TAG=manylinux_2_28_${arch} BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG}" MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}" @@ -75,7 +75,7 @@ pydistcheck --config python-package/pyproject.toml python-package/dist/*.whl if [[ $USE_RMM == 0 ]] then - if [[ $ARCH == "x86_64" ]] + if [[ $arch == "x86_64" ]] then # Generate the meta info which includes xgboost version and the commit info # TODO(hcho3): Generate meta.json that contains both x86_64 and aarch64 wheels @@ -95,7 +95,7 @@ then --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ python-package/dist/*.whl - if [[ $ARCH == "x86_64" ]] + if [[ $arch == "x86_64" ]] then python3 ops/pipeline/manage-artifacts.py upload \ --s3-bucket xgboost-nightly-builds \ From 41cc339e28107a6d167aa139e1998661eb018a08 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Tue, 9 Dec 2025 20:49:01 -0800 Subject: [PATCH 5/9] Use matrix to de-duplicate jobs for x86_64, aarch64 --- .github/workflows/cuda13.yml | 81 ++++++++++++------------ .github/workflows/main.yml | 2 +- ops/pipeline/build-cuda13.sh | 24 +++---- ops/pipeline/test-cpp-cuda13.sh | 10 ++- ops/pipeline/test-python-wheel-cuda13.sh | 23 +++---- 5 files changed, 65 insertions(+), 75 deletions(-) diff --git a/.github/workflows/cuda13.yml b/.github/workflows/cuda13.yml index b6f33291d178..57ca9059b284 100644 --- a/.github/workflows/cuda13.yml +++ b/.github/workflows/cuda13.yml @@ -15,11 +15,21 @@ env: jobs: build-cuda13: - name: Build CUDA 13 + name: Build CUDA 13 wheel for ${{ matrix.arch }} runs-on: - runs-on=${{ github.run_id }} - - runner=linux-amd64-cpu - - tag=cuda13-build-cuda13 + - runner=${{ matrix.runner }} + - tag=cuda13-build-cuda13-${{ matrix.arch }} + strategy: + fail-fast: false + matrix: + include: + - arch: aarch64 + runner: linux-arm64-cpu + image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64 + - arch: x86_64 + runner: linux-amd64-cpu + image_repo: xgb-ci.gpu_build_cuda13_rockylinux8 steps: # Restart Docker daemon so that it recognizes the ephemeral disks - run: sudo systemctl restart docker @@ -29,43 +39,31 @@ jobs: - name: Log into Docker registry (AWS ECR) run: bash ops/pipeline/login-docker-registry.sh - run: | - bash ops/pipeline/build-cuda13.sh + bash ops/pipeline/build-cuda13.sh ${{ matrix.image_repo }} ${{ matrix.arch }} - name: Stash files run: | python3 ops/pipeline/manage-artifacts.py upload \ --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ - --prefix cache/${{ github.run_id }}/build-cuda13 \ + --prefix cache/${{ github.run_id }}/build-cuda13-${{ matrix.arch }} \ build/testxgboost python-package/dist/*.whl - build-cuda13-arm64: - name: Build CUDA 13 (ARM64) - runs-on: - - runs-on=${{ github.run_id }} - - runner=linux-arm64-cpu - - tag=cuda13-build-cuda13-arm64 - steps: - # Restart Docker daemon so that it recognizes the ephemeral disks - - run: sudo systemctl restart docker - - uses: actions/checkout@v4 - with: - submodules: "true" - - name: Log into Docker registry (AWS ECR) - run: bash ops/pipeline/login-docker-registry.sh - - run: | - bash ops/pipeline/build-cuda13.sh - - name: Stash files - run: | - python3 ops/pipeline/manage-artifacts.py upload \ - --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ - --prefix cache/${{ github.run_id }}/build-cuda13-arm64 \ - python-package/dist/*.whl test-cpp-cuda13: - name: Google Test (C++) with CUDA 13 + name: Google Test (C++) with CUDA 13, arch ${{ matrix.arch }} needs: [build-cuda13] runs-on: - runs-on=${{ github.run_id }} - - runner=linux-amd64-gpu - - tag=cuda13-test-cpp-cuda13 + - runner=${{ matrix.runner }} + - tag=cuda13-test-cpp-cuda13-${{ matrix.arch }} + strategy: + fail-fast: false + matrix: + include: + - arch: aarch64 + runner: linux-arm64-gpu + image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64 + - arch: x86_64 + runner: linux-amd64-gpu + image_repo: xgb-ci.gpu_build_cuda13_rockylinux8 steps: # Restart Docker daemon so that it recognizes the ephemeral disks - run: sudo systemctl restart docker @@ -78,29 +76,30 @@ jobs: run: | python3 ops/pipeline/manage-artifacts.py download \ --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ - --prefix cache/${{ github.run_id }}/build-cuda13 \ + --prefix cache/${{ github.run_id }}/build-cuda13-${{ matrix.arch }} \ --dest-dir build \ testxgboost chmod +x build/testxgboost - run: | - bash ops/pipeline/test-cpp-cuda13.sh + bash ops/pipeline/test-cpp-cuda13.sh ${{ matrix.image_repo }} + test-python-cuda13: - name: Run Python tests with CUDA 13 (${{ matrix.description }}) - needs: [build-cuda13, build-cuda13-arm64] + name: Run Python tests with CUDA 13, arch ${{ matrix.arch }} + needs: [build-cuda13] runs-on: - runs-on=${{ github.run_id }} - runner=${{ matrix.runner }} - - tag=cuda13-test-python-cuda13-${{ matrix.description }} + - tag=cuda13-test-python-cuda13-${{ matrix.arch }} strategy: fail-fast: false matrix: include: - - description: amd64 + - arch: x86_64 runner: linux-amd64-gpu - artifact_from: build-cuda13 - - description: arm64 + image_repo: xgb-ci.gpu_build_cuda13_rockylinux8 + - arch: aarch64 runner: linux-arm64-gpu - artifact_from: build-cuda13-arm64 + image_repo: xgb-ci.gpu_build_cuda13_rockylinux8_aarch64 steps: # Restart Docker daemon so that it recognizes the ephemeral disks - run: sudo systemctl restart docker @@ -113,8 +112,8 @@ jobs: run: | python3 ops/pipeline/manage-artifacts.py download \ --s3-bucket ${{ env.RUNS_ON_S3_BUCKET_CACHE }} \ - --prefix cache/${{ github.run_id }}/${{ matrix.artifact_from }} \ + --prefix cache/${{ github.run_id }}/build-cuda13-${{ matrix.arch }} \ --dest-dir wheelhouse \ *.whl - name: Run Python tests - run: bash ops/pipeline/test-python-wheel-cuda13.sh + run: bash ops/pipeline/test-python-wheel-cuda13.sh ${{ matrix.image_repo }} diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index daf07bbb008e..ec6e996ba581 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -78,7 +78,7 @@ jobs: - name: Log into Docker registry (AWS ECR) run: bash ops/pipeline/login-docker-registry.sh - run: | - bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 enable-rmm + bash ops/pipeline/build-cuda.sh xgb-ci.gpu_build_rockylinux8 x86_64 enable-rmm - name: Stash files run: | python3 ops/pipeline/manage-artifacts.py upload \ diff --git a/ops/pipeline/build-cuda13.sh b/ops/pipeline/build-cuda13.sh index 36caec14ae31..03049d06bba0 100755 --- a/ops/pipeline/build-cuda13.sh +++ b/ops/pipeline/build-cuda13.sh @@ -9,29 +9,21 @@ then exit 1 fi +if [[ "$#" -lt 2 ]] +then + echo "Usage: $0 [image_repo] {x86_64,aarch64}" + exit 2 +fi +image_repo="$1" +arch="$2" export USE_RMM=0 export USE_FEDERATED=0 -ARCH=$(uname -m) -case "${ARCH}" in - x86_64) - IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8" - WHEEL_TAG=manylinux_2_28_x86_64 - ;; - aarch64) - IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8_aarch64" - WHEEL_TAG=manylinux_2_28_aarch64 - ;; - *) - echo "Unsupported architecture: ${ARCH}" - exit 1 - ;; -esac - source ops/pipeline/classify-git-branch.sh source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh +WHEEL_TAG=manylinux_2_28_${arch} BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}" MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}" diff --git a/ops/pipeline/test-cpp-cuda13.sh b/ops/pipeline/test-cpp-cuda13.sh index 2ccd7bea6abc..165ea1b1109e 100755 --- a/ops/pipeline/test-cpp-cuda13.sh +++ b/ops/pipeline/test-cpp-cuda13.sh @@ -2,11 +2,17 @@ set -euox pipefail +if [[ "$#" -lt 1 ]] +then + echo "Usage: $0 [image_repo]" + exit 2 +fi +image_repo="$1" + source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh -IMAGE_REPO='xgb-ci.gpu_build_cuda13_rockylinux8' -IMAGE_URI=${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG} +IMAGE_URI=${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG} echo "--- Run Google Tests, using a single GPU, CUDA 13" python3 ops/docker_run.py --image-uri ${IMAGE_URI} --use-gpus \ diff --git a/ops/pipeline/test-python-wheel-cuda13.sh b/ops/pipeline/test-python-wheel-cuda13.sh index 495fe5672aa5..fd76515f8d05 100755 --- a/ops/pipeline/test-python-wheel-cuda13.sh +++ b/ops/pipeline/test-python-wheel-cuda13.sh @@ -3,24 +3,17 @@ set -euo pipefail +if [[ "$#" -lt 1 ]] +then + echo "Usage: $0 [image_repo]" + exit 2 +fi +image_repo="$1" + source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh -ARCH=$(uname -m) -case "${ARCH}" in - x86_64) - IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8" - ;; - aarch64) - IMAGE_REPO="xgb-ci.gpu_build_cuda13_rockylinux8_aarch64" - ;; - *) - echo "Unsupported architecture: ${ARCH}" - exit 1 - ;; -esac - -IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}" +IMAGE_URI="${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG}" set -x python3 ops/docker_run.py --image-uri "${IMAGE_URI}" --use-gpus \ From 88646e58e4565cbd332d75533f5c5326ca6c1ede Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Tue, 9 Dec 2025 20:50:26 -0800 Subject: [PATCH 6/9] Removed unused build-cuda-arm64.sh --- ops/pipeline/build-cuda-arm64.sh | 75 -------------------------------- 1 file changed, 75 deletions(-) delete mode 100755 ops/pipeline/build-cuda-arm64.sh diff --git a/ops/pipeline/build-cuda-arm64.sh b/ops/pipeline/build-cuda-arm64.sh deleted file mode 100755 index f6078cba5298..000000000000 --- a/ops/pipeline/build-cuda-arm64.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -## Build XGBoost with CUDA for Linux ARM64 - -set -euo pipefail - -if [[ -z "${GITHUB_SHA:-}" ]] -then - echo "Make sure to set environment variable GITHUB_SHA" - exit 1 -fi - -IMAGE_REPO="xgb-ci.gpu_build_rockylinux8_aarch64" -export USE_FEDERATED=1 -export USE_RMM=0 - -source ops/pipeline/classify-git-branch.sh -source ops/pipeline/get-docker-registry-details.sh -source ops/pipeline/get-image-tag.sh - -WHEEL_TAG=manylinux_2_28_aarch64 -BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}" -MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}" - -echo "--- Build with CUDA (ARM64)" - -if [[ ($is_pull_request == 1) || ($is_release_branch == 0) ]] -then - export BUILD_ONLY_SM75=1 -else - export BUILD_ONLY_SM75=0 -fi - -set -x - -python3 ops/docker_run.py \ - --image-uri ${BUILD_IMAGE_URI} \ - --run-args='-e BUILD_ONLY_SM75 -e USE_RMM -e USE_FEDERATED' \ - -- ops/pipeline/build-cuda-impl.sh - -echo "--- Audit binary wheel to ensure it's compliant with ${WHEEL_TAG} standard" -python3 ops/docker_run.py \ - --image-uri ${MANYLINUX_IMAGE_URI} \ - -- auditwheel repair --only-plat \ - --plat ${WHEEL_TAG} python-package/dist/*.whl -python3 -m wheel tags --python-tag py3 --abi-tag none --platform ${WHEEL_TAG} --remove \ - wheelhouse/*.whl -mv -v wheelhouse/*.whl python-package/dist/ -if ! unzip -l ./python-package/dist/*.whl | grep libgomp > /dev/null; then - echo "error: libgomp.so was not vendored in the wheel" - exit -1 -fi - -# Check size of wheel -pydistcheck --config python-package/pyproject.toml python-package/dist/*.whl - -echo "--- Generate meta info" -python3 ops/script/format_wheel_meta.py \ - --wheel-path python-package/dist/*.whl \ - --commit-hash ${GITHUB_SHA} \ - --platform-tag ${WHEEL_TAG} \ - --meta-path python-package/dist/ - -echo "--- Upload Python wheel" -if [[ ($is_pull_request == 0) && ($is_release_branch == 1) ]] -then - python3 ops/pipeline/manage-artifacts.py upload \ - --s3-bucket xgboost-nightly-builds \ - --prefix ${BRANCH_NAME}/${GITHUB_SHA} --make-public \ - python-package/dist/*.whl - python3 ops/pipeline/manage-artifacts.py upload \ - --s3-bucket xgboost-nightly-builds \ - --prefix ${BRANCH_NAME} --make-public \ - python-package/dist/meta.json -fi - From 7ce0ce3b0a3ba8b4882a6eebf41a28057e080be7 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Tue, 9 Dec 2025 20:51:53 -0800 Subject: [PATCH 7/9] typo --- ops/pipeline/build-cuda13.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ops/pipeline/build-cuda13.sh b/ops/pipeline/build-cuda13.sh index 03049d06bba0..bd312b7c48b0 100755 --- a/ops/pipeline/build-cuda13.sh +++ b/ops/pipeline/build-cuda13.sh @@ -24,7 +24,7 @@ source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh WHEEL_TAG=manylinux_2_28_${arch} -BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${IMAGE_REPO}:${IMAGE_TAG}" +BUILD_IMAGE_URI="${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG}" MANYLINUX_IMAGE_URI="${DOCKER_REGISTRY_URL}/xgb-ci.${WHEEL_TAG}:${IMAGE_TAG}" echo "--- Build with CUDA" From 9dfc5ef17d32872f6c59e13a0d82cef28e26ca6c Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Tue, 9 Dec 2025 21:07:41 -0800 Subject: [PATCH 8/9] Use correct artifact for mgpu test --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index ec6e996ba581..c7d40f2d52a9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -163,7 +163,7 @@ jobs: artifact_from: build-cuda-with-rmm - suite: mgpu runner: linux-amd64-mgpu - artifact_from: build-cuda + artifact_from: build-cuda-x86_64 steps: # Restart Docker daemon so that it recognizes the ephemeral disks - run: sudo systemctl restart docker From 7194274637f825253de782dcfe8be92978544f88 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Tue, 9 Dec 2025 21:12:19 -0800 Subject: [PATCH 9/9] Use correct Docker image for arm64 gtest --- .github/workflows/main.yml | 6 +++++- ops/pipeline/test-cpp-gpu.sh | 9 +++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index c7d40f2d52a9..7ba762aeea38 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -154,15 +154,19 @@ jobs: include: - suite: gpu runner: linux-amd64-gpu + image_repo: xgb-ci.gpu artifact_from: build-cuda-x86_64 - suite: gpu runner: linux-arm64-gpu + image_repo: xgb-ci.gpu_aarch64 artifact_from: build-cuda-aarch64 - suite: gpu-rmm runner: linux-amd64-gpu + image_repo: xgb-ci.gpu artifact_from: build-cuda-with-rmm - suite: mgpu runner: linux-amd64-mgpu + image_repo: xgb-ci.gpu artifact_from: build-cuda-x86_64 steps: # Restart Docker daemon so that it recognizes the ephemeral disks @@ -180,7 +184,7 @@ jobs: --dest-dir build \ testxgboost chmod +x build/testxgboost - - run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.suite }} + - run: bash ops/pipeline/test-cpp-gpu.sh ${{ matrix.image_repo }} ${{ matrix.suite }} test-python-wheel: name: Run Python tests (${{ matrix.description }}) diff --git a/ops/pipeline/test-cpp-gpu.sh b/ops/pipeline/test-cpp-gpu.sh index 3f3992828cef..39810a11b0f9 100755 --- a/ops/pipeline/test-cpp-gpu.sh +++ b/ops/pipeline/test-cpp-gpu.sh @@ -2,17 +2,18 @@ set -euox pipefail -if [[ "$#" -lt 1 ]] +if [[ "$#" -lt 2 ]] then - echo "Usage: $0 {gpu,gpu-rmm,mgpu}" + echo "Usage: $0 [image_repo] {gpu,gpu-rmm,mgpu}" exit 1 fi -suite=$1 +image_repo=$1 +suite=$2 source ops/pipeline/get-docker-registry-details.sh source ops/pipeline/get-image-tag.sh -IMAGE_URI=${DOCKER_REGISTRY_URL}/xgb-ci.gpu:${IMAGE_TAG} +IMAGE_URI=${DOCKER_REGISTRY_URL}/${image_repo}:${IMAGE_TAG} case "${suite}" in gpu)