From 31aa3369d4056813ddda532161e717e571c44ce9 Mon Sep 17 00:00:00 2001 From: Sandeep Datta <128171450+sandeepd-nv@users.noreply.github.com> Date: Tue, 26 Sep 2023 18:24:09 +0530 Subject: [PATCH 01/18] GH artifacts based CI (#1043) --- .github/actions/download-artifacts/action.yml | 48 ++++++ .github/workflows/ci-gh.yml | 11 +- .github/workflows/gh-build-and-test.yml | 88 +++++++++-- .github/workflows/gh-build.yml | 148 ++++++++---------- .github/workflows/gh-cleanup.yml | 43 ----- .github/workflows/gh-test.yml | 91 +++++++++++ cmake/versions.json | 2 +- conda/conda-build/meta.yaml | 4 +- continuous_integration/Dockerfile | 44 ------ continuous_integration/dot-gitconfig | 3 + .../home/coder/.local/bin/build-cunumeric-all | 17 -- .../scripts/build-cunumeric-all | 44 ++++++ .../bin => scripts}/build-cunumeric-conda | 19 ++- .../bin => scripts}/build-cunumeric-cpp | 1 - .../bin => scripts}/build-cunumeric-wheel | 0 continuous_integration/scripts/entrypoint | 46 ++++++ continuous_integration/scripts/test-cunumeric | 61 ++++++++ 17 files changed, 458 insertions(+), 212 deletions(-) create mode 100644 .github/actions/download-artifacts/action.yml delete mode 100644 .github/workflows/gh-cleanup.yml create mode 100644 .github/workflows/gh-test.yml delete mode 100644 continuous_integration/Dockerfile create mode 100644 continuous_integration/dot-gitconfig delete mode 100644 continuous_integration/home/coder/.local/bin/build-cunumeric-all create mode 100755 continuous_integration/scripts/build-cunumeric-all rename continuous_integration/{home/coder/.local/bin => scripts}/build-cunumeric-conda (86%) rename continuous_integration/{home/coder/.local/bin => scripts}/build-cunumeric-cpp (93%) rename continuous_integration/{home/coder/.local/bin => scripts}/build-cunumeric-wheel (100%) create mode 100755 continuous_integration/scripts/entrypoint create mode 100755 continuous_integration/scripts/test-cunumeric diff --git a/.github/actions/download-artifacts/action.yml b/.github/actions/download-artifacts/action.yml new file mode 100644 index 0000000000..e8019b1b19 --- /dev/null +++ b/.github/actions/download-artifacts/action.yml @@ -0,0 +1,48 @@ +name: setup-legate-conda + +description: Download dependencies (artifacts) + +inputs: + device: {type: string, required: true} + git_sha: {type: string, required: true} + +runs: + using: composite + steps: + + - id: cache + name: Cache conda artifacts + uses: actions/cache@v3 + with: + key: "nv-legate/legate.core@${{ inputs.git_sha }}-${{ inputs.device }}" + path: .artifacts + + - if: steps.cache.outputs.cache-hit != 'true' + name: Download conda artifacts + uses: dawidd6/action-download-artifact@v2 + with: + path: .artifacts-dl + repo: nv-legate/legate.core + commit: ${{ inputs.git_sha }} + workflow_conclusion: success + workflow: "ci-gh-${{ inputs.device }}-build-and-test.yml" + name: "legate.core-${{ inputs.device }}-[0-9a-z]{40}" + name_is_regexp: true + + - if: steps.cache.outputs.cache-hit != 'true' + name: Move conda artifacts into cached dir + shell: bash --noprofile --norc -xeo pipefail {0} + run: | + mkdir -p .artifacts; + find .artifacts-dl/legate.core-${{ inputs.device }}-*/ \ + -maxdepth 2 -type d -name legate_core -exec mv {} .artifacts/ \; + find .artifacts-dl/legate.core-${{ inputs.device }}-*/ \ + -maxdepth 2 -type f -name "environment*.yaml" -exec mv {} .artifacts/ \; + + - name: Copy and change cache dir ownership + shell: bash --noprofile --norc -xeo pipefail {0} + run: | + # Copy and change directory ownership + cp -ar .artifacts /home/coder/.artifacts; + chown -R coder:coder /home/coder/.artifacts; + ls -R /home/coder/.artifacts diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml index f1aafdd595..ffb77c10e8 100644 --- a/.github/workflows/ci-gh.yml +++ b/.github/workflows/ci-gh.yml @@ -16,10 +16,13 @@ jobs: fail-fast: false matrix: include: - - {build-target: cpu} - - {build-target: gpu} + - device: "gpu" + image: "rapidsai/devcontainers:23.06-cpp-mambaforge-ubuntu22.04" + + - device: "cpu" + image: "rapidsai/devcontainers:23.06-cpp-mambaforge-ubuntu22.04" uses: ./.github/workflows/gh-build-and-test.yml with: - build-target: ${{ matrix.build-target }} - sha: ${{ github.sha }} + device: ${{ matrix.device }} + image: ${{ matrix.image }} diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index f297b97618..e0e87899da 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -1,32 +1,92 @@ on: workflow_call: inputs: - build-target: - required: true + image: type: string - sha: required: true + device: type: string + required: true + jobs: build: - name: "Build cunumeric (with ${{ inputs.build-target }} legate) on GH" + name: "Build cunumeric (with ${{ inputs.device }} legate) on GH" uses: ./.github/workflows/gh-build.yml with: - build-target: ${{ inputs.build-target }} - # Ref: https://docs.rapids.ai/resources/github-actions/#cpu-labels for `linux-amd64-cpu4` - runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-cpu4' || 'ubuntu-latest' }} - sha: ${{ inputs.sha }} + device: ${{ inputs.device }} + image: ${{ inputs.image }} + runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-32cpu' || 'ubuntu-latest' }} - cleanup: + test: needs: - build + strategy: + fail-fast: false + matrix: + include: + - name: 1 CPU test + options: test --cpus 1 --unit --debug + runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }} + has-gpu: false + enabled: true + + - name: 2 CPUs test + options: test --cpus 2 --debug + runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu8' }} + has-gpu: false + enabled: true + + - name: GPU test + options: test --use cuda --gpus 1 --debug + runner: linux-amd64-gpu-v100-latest-1 + has-gpu: true + enabled: ${{ inputs.device == 'gpu' }} + + - name: 2 GPUs test + options: test --use cuda --gpus 2 --debug + runner: linux-amd64-2gpu + has-gpu: true + enabled: ${{ inputs.device == 'gpu' }} + + - name: OpenMP test + options: test --use openmp --omps 1 --ompthreads 2 --debug + runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }} + has-gpu: ${{ inputs.device == 'gpu' }} + enabled: false + + - name: 2 NUMA OpenMPs test + options: test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug + runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }} + has-gpu: ${{ inputs.device == 'gpu' }} + enabled: false + + - name: Eager execution test + options: test --use eager --debug + runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }} + has-gpu: ${{ inputs.device == 'gpu' }} + enabled: true + + - name: mypy + options: mypy + runner: linux-amd64-cpu4 + has-gpu: false + enabled: true + + - name: documentation + options: docs + runner: linux-amd64-32cpu + has-gpu: false + enabled: ${{ inputs.device == 'gpu' }} - # This ensures the cleanup job runs even if previous jobs fail or the workflow is cancelled. - if: always() uses: - ./.github/workflows/gh-cleanup.yml + ./.github/workflows/gh-test.yml with: - build-target: ${{ inputs.build-target }} - sha: ${{ inputs.sha }} + name: ${{ matrix.name }} + device: ${{ inputs.device }} + image: ${{ inputs.image }} + runs-on: ${{ matrix.runner }} + has-gpu: ${{ matrix.has-gpu }} + test-options: ${{ matrix.options }} + enabled: ${{ matrix.enabled }} diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml index c84ac0b9a8..308b5f78c2 100644 --- a/.github/workflows/gh-build.yml +++ b/.github/workflows/gh-build.yml @@ -1,123 +1,101 @@ -name: Build cunumeric on GH +name: Build on: workflow_call: inputs: - build-target: - required: true + image: type: string - runs-on: + required: true + device: required: true type: string - sha: + runs-on: required: true type: string -env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - BASE_IMAGE: rapidsai/devcontainers:23.06-cpp-cuda11.8-mambaforge-ubuntu22.04 - IMAGE_NAME_LEGATE: legate.core-${{ inputs.build-target }} - IMAGE_NAME_CUNUMERIC: cunumeric-${{ inputs.build-target }} - USE_CUDA: ${{ (inputs.build-target == 'cpu' && 'OFF') || 'ON' }} - jobs: build: - name: build-${{ inputs.build-target }}-sub-workflow + name: build-${{ inputs.device }}-sub-workflow permissions: id-token: write # This is required for configure-aws-credentials contents: read # This is required for actions/checkout - packages: write # This is required to push docker image to ghcr.io - + runs-on: ${{ inputs.runs-on }} - steps: - - name: Checkout legate.core - uses: actions/checkout@v3 - with: - repository: nv-legate/legate.core - fetch-depth: 0 - path: legate + container: + options: -u root + image: "${{ inputs.image }}" + env: + CUDA_VERSION: "12.0" + CUDA_VERSION_MAJOR: "12" + CUDA_VERSION_MINOR: "0" + SCCACHE_REGION: "us-east-2" + SCCACHE_BUCKET: "rapids-sccache-devs" + SCCACHE_S3_KEY_PREFIX: "legate-cunumeric-dev" + USE_CUDA: "${{ inputs.device == 'gpu' && 'ON' || 'OFF' }}" + GH_TOKEN: "${{ env.GH_TOKEN }}" + GITHUB_TOKEN: "${{ env.GITHUB_TOKEN }}" + VAULT_HOST: "${{ github.repository_owner != 'nv-legate' && 'https://vault.ops.k8s.rapids.ai' || '' }}" + defaults: + run: + shell: su coder {0} + working-directory: /home/coder + steps: - name: Checkout cunumeric (= this repo) uses: actions/checkout@v3 with: fetch-depth: 0 path: cunumeric + persist-credentials: false - - if: github.repository_owner == 'nv-legate' - name: Get AWS credentials for sccache bucket - uses: aws-actions/configure-aws-credentials@v2 - with: - aws-region: us-east-2 - role-duration-seconds: 28800 # 8 hours - role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-nv-legate - - - name: Docker system prune + - name: Dump environment run: | - docker version - docker system prune --all --force + env - - name: Build legate.core using docker build + - name: Copy source folder run: | - echo BUILD_TARGET: ${{ inputs.build-target }} - echo USE_CUDA: ${{ env.USE_CUDA }} - - export LEGATE_SHA=$(cat cunumeric/cmake/versions.json | jq -r '.packages.legate_core.git_tag') - echo "Checking out LEGATE_SHA: ${LEGATE_SHA}" - git -C legate checkout $LEGATE_SHA - - IMAGE_TAG_LEGATE=${{ env.IMAGE_NAME_LEGATE }}:${{ inputs.sha }} - - chmod +x legate/continuous_integration/build-docker-image - legate/continuous_integration/build-docker-image \ - --base-image "$BASE_IMAGE" \ - --image-tag "$IMAGE_TAG_LEGATE" \ - --source-dir legate - - - name: Build cunumeric using docker build - run: | - IMAGE_TAG_CUNUMERIC=${{ env.IMAGE_NAME_CUNUMERIC }}:${{ inputs.sha }} - IMAGE_TAG_LEGATE=${{ env.IMAGE_NAME_LEGATE }}:${{ inputs.sha }} - - legate/continuous_integration/build-docker-image \ - --base-image "$IMAGE_TAG_LEGATE" \ - --image-tag "$IMAGE_TAG_CUNUMERIC" \ - --source-dir cunumeric - - - name: Dump docker history of image before upload + set -x + pwd + cp -r $GITHUB_WORKSPACE/cunumeric . + chown -R coder:coder cunumeric; + ls -R + + - name: Copy .gitconfig + run: cp ~/cunumeric/continuous_integration/dot-gitconfig ~/.gitconfig + + - id: legate_core_info + name: Read legate.core SHA + shell: bash --noprofile --norc -xeo pipefail {0} run: | - IMAGE_TAG=${{ env.IMAGE_NAME_CUNUMERIC }}:${{ inputs.sha }} - docker history $IMAGE_TAG - - - name: Log in to container image registry - run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u $ --password-stdin - - - name: Push cunumeric image - run: | - IMAGE_TAG=${{ env.IMAGE_NAME_CUNUMERIC }}:${{ inputs.sha }} - - IMAGE_ID=ghcr.io/${{ github.repository_owner }} + git_tag="$(jq -r '.packages.legate_core.git_tag' cunumeric/cmake/versions.json)"; - # Change all uppercase to lowercase - IMAGE_ID=$(echo $IMAGE_ID | tr '[A-Z]' '[a-z]') + echo "git_tag=$git_tag" | tee -a "${GITHUB_OUTPUT}"; - IMAGE_ID=$IMAGE_ID/$IMAGE_TAG + - name: Download dependencies (artifacts) + uses: ./cunumeric/.github/actions/download-artifacts + with: + device: "${{ inputs.device }}" + git_sha: "${{ steps.legate_core_info.outputs.git_tag }}" - docker tag $IMAGE_TAG $IMAGE_ID - docker push $IMAGE_ID + - if: github.repository_owner == 'nv-legate' + name: Get AWS credentials for sccache bucket + uses: aws-actions/configure-aws-credentials@v2 + with: + aws-region: us-east-2 + role-duration-seconds: 28800 # 8 hours + role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-nv-legate - - name: Copy artifacts back to the host + - name: Build cunumeric run: | - IMAGE_TAG=${{ env.IMAGE_NAME_CUNUMERIC }}:${{ inputs.sha }} - mkdir -p artifacts - docker run -v "$(pwd)/artifacts:/home/coder/.artifacts" --rm -t $IMAGE_TAG copy-artifacts - - - name: Display structure of workdir - run: ls -R + export PATH="/home/coder/cunumeric/continuous_integration/scripts:$PATH" + build-cunumeric-all - name: Upload build artifacts uses: actions/upload-artifact@v3 with: - name: "cunumeric-${{ inputs.build-target }}-${{ inputs.sha }}" - path: artifacts + name: "cunumeric-${{ inputs.device }}-${{ github.sha }}" + path: | + /tmp/out + /tmp/conda-build diff --git a/.github/workflows/gh-cleanup.yml b/.github/workflows/gh-cleanup.yml deleted file mode 100644 index 6451c401c3..0000000000 --- a/.github/workflows/gh-cleanup.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: Clean up - -on: - workflow_call: - inputs: - build-target: - required: true - type: string - sha: - required: true - type: string - -env: - IMAGE_NAME: cunumeric-${{ inputs.build-target }} - -jobs: - cleanup: - permissions: - packages: write - - runs-on: ubuntu-latest - - steps: - - name: Delete docker image - run: | - set -xeuo pipefail - - PACKAGE_NAME=${{ env.IMAGE_NAME }} - PACKAGE_VERSION_ID=$( - curl -L \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer ${{ github.token }}"\ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/orgs/${{ github.repository_owner }}/packages/container/$PACKAGE_NAME/versions | - jq '.[] | select(.metadata.container.tags[] == "${{ inputs.sha }}") | .id' - - ) - - curl -L \ - -X DELETE \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer ${{ github.token }}"\ - -H "X-GitHub-Api-Version: 2022-11-28" \ - https://api.github.com/orgs/${{ github.repository_owner }}/packages/container/$PACKAGE_NAME/versions/$PACKAGE_VERSION_ID diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml new file mode 100644 index 0000000000..675f27e9ba --- /dev/null +++ b/.github/workflows/gh-test.yml @@ -0,0 +1,91 @@ +name: Test cunumeric on GH + +on: + workflow_call: + inputs: + name: + required: true + type: string + image: + type: string + required: true + device: + required: true + type: string + runs-on: + required: true + type: string + has-gpu: + required: true + type: boolean + description: "The runner has GPU(s)." + test-options: + required: true + type: string + enabled: + required: true + type: boolean + +env: + build_artifact_name: "cunumeric-${{ inputs.device }}-${{ github.sha }}" + +jobs: + test: + name: ${{ inputs.name }} + if: inputs.enabled && github.repository_owner == 'nv-legate' + runs-on: ${{ inputs.runs-on }} + + container: + options: -u root + image: "${{ inputs.image }}" + env: + # CUDA_VERSION: "${{ inputs.CUDA }}" + NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} + + defaults: + run: + shell: su coder {0} + working-directory: /home/coder + + steps: + - if: inputs.has-gpu + name: Run nvidia-smi to make sure GPU is working + run: nvidia-smi + + - name: Install numactl + run: | + export DEBIAN_FRONTEND=noninteractive && \ + sudo apt-get update && \ + sudo apt-get install -y numactl + + - name: Checkout cunumeric + uses: actions/checkout@v3 + with: + fetch-depth: 0 + path: cunumeric + persist-credentials: false + + - name: Copy source folder + run: | + set -x + pwd + cp -r $GITHUB_WORKSPACE/cunumeric . + chown -R coder:coder cunumeric; + ls -R + + - name: Download build artifacts + uses: actions/download-artifact@v3 + with: + name: ${{ env.build_artifact_name }} + path: /home/coder/.artifacts + + - name: Run cunumeric test / analysis + shell: su coder {0} + run: | + set -x + sudo chown -R coder:coder /home/coder/.artifacts + + export PATH="/home/coder/cunumeric/continuous_integration/scripts:$PATH" + + set -eo pipefail + test-cunumeric ${{ inputs.test-options }} diff --git a/cmake/versions.json b/cmake/versions.json index d1ae134aba..7a98894dd5 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "14cca04834095553e4d88f503dc4cd35e4072212" + "git_tag" : "06b0e4d7fded0b4207fd8b4ba34c330333ee3543" } } } diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index 81569e3a24..c652d931bf 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -10,7 +10,7 @@ ## The placeholder version is strictly for making two-pass conda build process. ## It should not be used for any other purpose, and this is not a default version. {% set placeholder_version = '0.0.0.dev' %} -{% set default_cuda_version = '11.8' %} +{% set default_cuda_version = '12.0' %} {% set cuda_version='.'.join(environ.get('CUDA', default_cuda_version).split('.')[:2]) %} {% set cuda_major=cuda_version.split('.')[0]|int %} {% set py_version=environ.get('CONDA_PY', '') %} @@ -138,7 +138,7 @@ requirements: - cuda-version >={{ cuda_version }},<{{ cuda_major+1 }} - cutensor >=1.3 =*_* - libcublas - - libcusolver =11.4.1.48-0 + - libcusolver >=11.4.1.48-0 - libcufft {% endif %} - opt_einsum >=3.3 diff --git a/continuous_integration/Dockerfile b/continuous_integration/Dockerfile deleted file mode 100644 index 4e6478d143..0000000000 --- a/continuous_integration/Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -ARG BASE_IMAGE -FROM ${BASE_IMAGE} as stage0 - -COPY --chown=coder:coder continuous_integration/home/coder/.local/bin/* /home/coder/.local/bin/ -COPY --chown=coder:coder . /home/coder/cunumeric - -RUN chmod a+x /home/coder/.local/bin/* - -#--------------------------------------------------- -FROM stage0 as setup - -USER coder -WORKDIR /home/coder - -RUN set -x && . conda-utils && \ - get_yaml_and_make_conda_env && \ - install_legate_core_with_war - -#--------------------------------------------------- -FROM setup as build -USER coder -WORKDIR /home/coder - -ARG GITHUB_TOKEN -ENV GITHUB_TOKEN=${GITHUB_TOKEN} -ARG AWS_SESSION_TOKEN -ENV AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN} -ARG AWS_ACCESS_KEY_ID -ENV AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} -ARG AWS_SECRET_ACCESS_KEY -ENV AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY} - -COPY --chown=coder:coder .creds /run/secrets - -RUN entrypoint build-cunumeric-all - -#--------------------------------------------------- -FROM stage0 as final -USER coder -WORKDIR /home/coder - -COPY --from=build --chown=coder:coder /tmp/out /tmp/out -COPY --from=build --chown=coder:coder /tmp/conda-build /tmp/conda-build -COPY --from=build --chown=coder:coder /tmp/env_yaml /tmp/env_yaml diff --git a/continuous_integration/dot-gitconfig b/continuous_integration/dot-gitconfig new file mode 100644 index 0000000000..91ac79c701 --- /dev/null +++ b/continuous_integration/dot-gitconfig @@ -0,0 +1,3 @@ +[user] + email = users.noreply.github.com + name = anon \ No newline at end of file diff --git a/continuous_integration/home/coder/.local/bin/build-cunumeric-all b/continuous_integration/home/coder/.local/bin/build-cunumeric-all deleted file mode 100644 index 62c6da0d7e..0000000000 --- a/continuous_integration/home/coder/.local/bin/build-cunumeric-all +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/env bash - - -build_cunumeric_all() { - set -x - cd ~/; - - conda info - - set -euo pipefail; - - build-cunumeric-cpp; - build-cunumeric-wheel; - build-cunumeric-conda; -} - -(build_cunumeric_all "$@"); diff --git a/continuous_integration/scripts/build-cunumeric-all b/continuous_integration/scripts/build-cunumeric-all new file mode 100755 index 0000000000..bcdbf62ec5 --- /dev/null +++ b/continuous_integration/scripts/build-cunumeric-all @@ -0,0 +1,44 @@ +#!/usr/bin/env bash + +setup_env() { + yaml_file=$(find ~/.artifacts -name "environment*.yaml" | head -n 1) + + [ "${USE_CUDA:-}" = "ON" ] && + echo " - libcublas-dev" >> "${yaml_file}" && + echo " - libcufft-dev" >> "${yaml_file}" && + echo " - libcurand-dev" >> "${yaml_file}" && + echo " - libcusolver-dev" >> "${yaml_file}"; + + echo "YAML file..." + cat "${yaml_file}" + + mkdir -p /tmp/out; + + cp "${yaml_file}" /tmp/out + + mamba env create -n legate -f "$yaml_file" + + mamba uninstall -yn legate numpy + + mamba install -yn legate -c ~/.artifacts/legate_core -c conda-forge -c nvidia legate-core + + mamba activate legate +} + +build_cunumeric_all() { + set -xeo pipefail + + setup_env; + cd ~/cunumeric; + conda info; + + set -xeuo pipefail; + printf "\n\n\n\n********* BUILDING CUNUMERIC CPP *********\n" + build-cunumeric-cpp; + printf "\n\n\n\n********* BUILDING CUNUMERIC WHEEL *********\n" + build-cunumeric-wheel; + printf "\n\n\n\n********* BUILDING CUNUMERIC CONDA *********\n" + build-cunumeric-conda; +} + +(build_cunumeric_all "$@"); diff --git a/continuous_integration/home/coder/.local/bin/build-cunumeric-conda b/continuous_integration/scripts/build-cunumeric-conda similarity index 86% rename from continuous_integration/home/coder/.local/bin/build-cunumeric-conda rename to continuous_integration/scripts/build-cunumeric-conda index 0be424252d..ee4efefcb2 100755 --- a/continuous_integration/home/coder/.local/bin/build-cunumeric-conda +++ b/continuous_integration/scripts/build-cunumeric-conda @@ -9,6 +9,9 @@ build_cunumeric_conda_package() { python_version="$(python3 --version 2>&1 | cut -d' ' -f2 | cut -d'.' -f3 --complement)"; fi + mkdir -p /tmp/conda-build /tmp/out + cp -r ~/.artifacts/legate_core /tmp/conda-build/ + local conda_build_args=(); conda_build_args+=(--override-channels); conda_build_args+=(-c conda-forge); @@ -34,7 +37,21 @@ build_cunumeric_conda_package() { # Synthesize new cunumeric conda-build build.sh script - cat <> ~/cunumeric/conda/conda-build/conda_build_config.yaml + cat < ~/cunumeric/conda/conda-build/conda_build_config.yaml +gpu_enabled: + - "${GPU_ENABLED}" + +python: + - "${python_version}" + +numpy_version: + - ">=1.22" + +cmake_version: + - ">=3.20.1,!=3.23.0" + +use_local_path: + - "true" numpy: - 1.22 diff --git a/continuous_integration/home/coder/.local/bin/build-cunumeric-cpp b/continuous_integration/scripts/build-cunumeric-cpp similarity index 93% rename from continuous_integration/home/coder/.local/bin/build-cunumeric-cpp rename to continuous_integration/scripts/build-cunumeric-cpp index 83f6dcd8c7..fd08ceac2f 100755 --- a/continuous_integration/home/coder/.local/bin/build-cunumeric-cpp +++ b/continuous_integration/scripts/build-cunumeric-cpp @@ -9,7 +9,6 @@ build_cunumeric_cpp() { cmake_args+=(-DBUILD_MARCH=${BUILD_MARCH:-haswell}); cmake_args+=(-DCMAKE_BUILD_TYPE=Release); cmake_args+=(-DCMAKE_CUDA_ARCHITECTURES=RAPIDS); - cmake_args+=(-Dlegate_core_ROOT=$HOME/legate/build); cmake_args+=(-DCMAKE_BUILD_PARALLEL_LEVEL=${JOBS:-$(nproc --ignore=1)}); cmake_args+=(${@}); diff --git a/continuous_integration/home/coder/.local/bin/build-cunumeric-wheel b/continuous_integration/scripts/build-cunumeric-wheel similarity index 100% rename from continuous_integration/home/coder/.local/bin/build-cunumeric-wheel rename to continuous_integration/scripts/build-cunumeric-wheel diff --git a/continuous_integration/scripts/entrypoint b/continuous_integration/scripts/entrypoint new file mode 100755 index 0000000000..298fc1c7a1 --- /dev/null +++ b/continuous_integration/scripts/entrypoint @@ -0,0 +1,46 @@ +#!/usr/bin/env bash + +sccache_stop_server_and_show_stats() { + sccache --stop-server || true && sccache --show-stats; +} + +init_devcontainer() { + # disable xtrace and history + local xtrace_enabled=$(echo "${SHELLOPTS:-}" | grep -q 'xtrace'; echo $?); + local history_enabled=$(echo "${SHELLOPTS:-}" | grep -q 'history'; echo $?); + { set +xo history; } 2>/dev/null; + eval "export $(find /run/secrets/ -type f -exec bash -c 'echo ${0/\/run\/secrets\//}=$(<${0})' {} \;)"; + if [ "${history_enabled}" -eq "0" ]; then { set -o history; } 2>/dev/null; fi; + if [ "${xtrace_enabled}" -eq "0" ]; then { set -o xtrace; } 2>/dev/null; fi; + + set -xeo pipefail + + . devcontainer-utils-post-attach-command; + + sleep 10; + . devcontainer-utils-vault-s3-test; + . devcontainer-utils-vault-s3-export 0; +} + +entrypoint() { + set -x + + mkdir -p /home/coder/.cache; + + local secrets_dir=/run/secrets + + if [ -d "$secrets_dir" ] && [ "$(ls -A $secrets_dir)" ]; then + init_devcontainer + else + sccache_stop_server_and_show_stats + fi + + echo AWS_REGION=${AWS_REGION:-} + echo AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN:-} + echo AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} + echo AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} + + exec "$@"; +} + +entrypoint "$@"; diff --git a/continuous_integration/scripts/test-cunumeric b/continuous_integration/scripts/test-cunumeric new file mode 100755 index 0000000000..ca57b42e97 --- /dev/null +++ b/continuous_integration/scripts/test-cunumeric @@ -0,0 +1,61 @@ +#!/usr/bin/env bash + +setup_env() { + mamba create -yn legate -c ~/.artifacts/conda-build/legate_core -c ~/.artifacts/conda-build/cunumeric -c conda-forge -c "nvidia/label/cuda-12.0.0" legate-core cunumeric +} + +setup_test_env() { + mamba install -y "clang-tools>=8" "clang>=8" colorama coverage mock pre-commit pytest-cov pytest-lazy-fixture pytest-mock pytest types-docutils pynvml + + pip install tifffile +} + +setup_docs_env() { + mamba install -y pandoc doxygen + pip install ipython jinja2 "markdown<3.4.0" "pydata-sphinx-theme>=0.13" myst-parser nbsphinx sphinx-copybutton "sphinx>=4.4.0" + +} + +setup_mypy_env() { + mamba install -y "mypy>=0.961" jinja2 nbsphinx sphinx-copybutton "sphinx>=4.4.0" types-docutils +} + +test-cunumeric() { + set -xeo pipefail + + setup_env; + + set +u + mamba activate legate; + conda info; + + cd ~/cunumeric; + + case "$1" in + "test") + echo "Executing tests..." + shift; + setup_test_env; + ./test.py --verbose "$@" + ;; + "mypy") + echo "Installing and executing mypy..." + shift; + setup_mypy_env; + mypy cunumeric + ;; + "docs") + echo "Building docs..." + shift; + setup_docs_env; + cd docs/cunumeric + make clean html + ;; + *) + echo "Invalid command: $1" + return 1 + ;; + esac +} + +(test-cunumeric "$@"); \ No newline at end of file From c813d99ec910d395b96d4b2b9c70aad0f2a66b00 Mon Sep 17 00:00:00 2001 From: Sandeep Datta <128171450+sandeepd-nv@users.noreply.github.com> Date: Wed, 27 Sep 2023 21:47:50 +0530 Subject: [PATCH 02/18] Enable OpenMP tests in CI. (#1051) * Enable OpenMP tests in CI. * Removed ci.yml. --- .github/workflows/ci.yml | 136 ------------------------ .github/workflows/gh-build-and-test.yml | 4 +- 2 files changed, 2 insertions(+), 138 deletions(-) delete mode 100644 .github/workflows/ci.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml deleted file mode 100644 index c147cd17da..0000000000 --- a/.github/workflows/ci.yml +++ /dev/null @@ -1,136 +0,0 @@ -name: Build cunumeric -on: - push: - branches-ignore: - - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) - pull_request: - branches-ignore: - - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) - schedule: - # * is a special character in YAML so you have to quote this string - - cron: '0 */6 * * *' -env: - COMMIT: ${{ github.event.pull_request.head.sha || github.sha }} - PROJECT: github-cunumeric-ci - REF: ${{ github.event.pull_request.head.ref || github.ref }} - BASE_REF: ${{ github.event.pull_request.base.ref || github.ref }} - EVENT_NAME: ${{ github.event_name }} - LABEL: ${{ github.event.pull_request.head.label }} - # Prevent output buffering - PYTHONUNBUFFERED: 1 -jobs: - build: - if: ${{ github.repository == 'nv-legate/cunumeric' }} - runs-on: self-hosted - steps: - - name: Dump GitHub context - env: - GITHUB_CONTEXT: ${{ toJSON(github) }} - run: echo "$GITHUB_CONTEXT" - - name: Dump job context - env: - JOB_CONTEXT: ${{ toJSON(job) }} - run: echo "$JOB_CONTEXT" - - name: Dump steps context - env: - STEPS_CONTEXT: ${{ toJSON(steps) }} - run: echo "$STEPS_CONTEXT" - - name: Dump runner context - env: - RUNNER_CONTEXT: ${{ toJSON(runner) }} - run: echo "$RUNNER_CONTEXT" - - name: Dump strategy context - env: - STRATEGY_CONTEXT: ${{ toJSON(strategy) }} - run: echo "$STRATEGY_CONTEXT" - - name: Dump matrix context - env: - MATRIX_CONTEXT: ${{ toJSON(matrix) }} - run: echo "$MATRIX_CONTEXT" - - name: Run CI build - run: | - /data/github-runner/legate-bin/setup.sh - cd legate-ci/github-ci/cunumeric - rm -rf ngc-artifacts || true - ./build-separate.sh > ${COMMIT}-build.log 2>&1 - - name: Process Output - run: | - cd legate-ci/github-ci/cunumeric - cat *artifacts/*/* - if: always() - - name: Upload Build Log - if: always() - uses: actions/upload-artifact@v3 - with: - name: build-log - path: ./**/${{ env.COMMIT }}-build.log.gpg - test: - if: ${{ github.repository == 'nv-legate/cunumeric' }} - runs-on: self-hosted - needs: build - strategy: - fail-fast: false - matrix: - include: - - {name: 1 CPU test, options: --cpus 1 --unit --debug, log: cpu} - - {name: 2 CPUs test, options: --cpus 2 --debug, log: cpus} - - {name: GPU test, options: --use cuda --gpus 1 --debug, log: gpu} - - {name: 2 GPUs test, options: --use cuda --gpus 2 --debug, log: gpus} - - {name: OpenMP test, options: --use openmp --omps 1 --ompthreads 2 --debug, log: omp} - - {name: 2 NUMA OpenMPs test, options: --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps} - - {name: Eager execution test, options: --use eager --debug, log: eager} - - {name: mypy, options: mypy, log: mypy} - - {name: documentation, options: docs, log: docs} - name: ${{ matrix.name }} - steps: - - name: Dump GitHub context - env: - GITHUB_CONTEXT: ${{ toJSON(github) }} - run: echo "$GITHUB_CONTEXT" - - name: Dump job context - env: - JOB_CONTEXT: ${{ toJSON(job) }} - run: echo "$JOB_CONTEXT" - - name: Dump steps context - env: - STEPS_CONTEXT: ${{ toJSON(steps) }} - run: echo "$STEPS_CONTEXT" - - name: Dump runner context - env: - RUNNER_CONTEXT: ${{ toJSON(runner) }} - run: echo "$RUNNER_CONTEXT" - - name: Dump strategy context - env: - STRATEGY_CONTEXT: ${{ toJSON(strategy) }} - run: echo "$STRATEGY_CONTEXT" - - name: Dump matrix context - env: - MATRIX_CONTEXT: ${{ toJSON(matrix) }} - run: echo "$MATRIX_CONTEXT" - - name: Prepare - run: | - /data/github-runner/legate-bin/setup.sh - cd legate-ci/github-ci/cunumeric - if [[ ! -d ngc-artifacts ]] - then - mkdir ngc-artifacts - else - rm -rf ngc-artifacts/* - fi - - name: Test - run: | - cd legate-ci/github-ci/cunumeric - [[ "${{ matrix.name }}" == "Eager"* ]] && export PYTHONFAULTHANDLER=1 - ./test.sh ${{ matrix.options }} > ${COMMIT}-test-${{ matrix.log }}.log 2>&1 - - name: Process output - if: always() - run: | - cd legate-ci/github-ci/cunumeric - /data/github-runner/legate-bin/encrypt.sh ${COMMIT}-test-${{ matrix.log }}.log - cat *artifacts/*/* - - name: Upload Log - if: always() - uses: actions/upload-artifact@v3 - with: - name: test-${{ matrix.log }}-log - path: ./**/${{ env.COMMIT }}-test-${{ matrix.log }}.log.gpg diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index e0e87899da..3766a07ee6 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -54,13 +54,13 @@ jobs: options: test --use openmp --omps 1 --ompthreads 2 --debug runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }} has-gpu: ${{ inputs.device == 'gpu' }} - enabled: false + enabled: true - name: 2 NUMA OpenMPs test options: test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }} has-gpu: ${{ inputs.device == 'gpu' }} - enabled: false + enabled: true - name: Eager execution test options: test --use eager --debug From 45e76b26baadf332c4a8036773f2968e08dd8189 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 29 Sep 2023 08:51:18 -0700 Subject: [PATCH 03/18] Update version to 23.11 (#1052) * Update version * Update legion version --- CMakeLists.txt | 2 +- cmake/versions.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 68dfe83f6f..5f1c6a8581 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -55,7 +55,7 @@ include(rapids-cuda) include(rapids-export) include(rapids-find) -set(cunumeric_version 23.09.00) +set(cunumeric_version 23.11.00) # For now we want the optimization flags to match on both normal make and cmake # builds so we override the cmake defaults here for release, this changes diff --git a/cmake/versions.json b/cmake/versions.json index 7a98894dd5..b99da26cb4 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -1,11 +1,11 @@ { "packages" : { "legate_core" : { - "version": "23.09.00", + "version": "23.11.00", "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "06b0e4d7fded0b4207fd8b4ba34c330333ee3543" + "git_tag" : "8997f997be02936304b3ac23fe785f1de7a3424b" } } } From 233ef08480187deb2b5f013b7c1f89341ae454e9 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 3 Oct 2023 14:30:41 -0700 Subject: [PATCH 04/18] replacing set with OrderedSet to avoid control-replication violations (#1054) replacing set with OrderedSet to avoid control-replication violations --- cunumeric/_ufunc/ufunc.py | 3 ++- cunumeric/array.py | 10 +++++----- cunumeric/coverage.py | 5 +++-- cunumeric/deferred.py | 17 +++++++++-------- cunumeric/utils.py | 15 +++++++++++---- tests/integration/test_einsum.py | 13 ++++++++----- tests/integration/utils/contractions.py | 8 ++++++-- 7 files changed, 44 insertions(+), 27 deletions(-) diff --git a/cunumeric/_ufunc/ufunc.py b/cunumeric/_ufunc/ufunc.py index 3079f32616..11800e53f2 100644 --- a/cunumeric/_ufunc/ufunc.py +++ b/cunumeric/_ufunc/ufunc.py @@ -17,6 +17,7 @@ from typing import TYPE_CHECKING, Any, Dict, Sequence, Union import numpy as np +from legate.core.utils import OrderedSet from ..array import check_writeable, convert_to_cunumeric_ndarray, ndarray from ..config import BinaryOpCode, UnaryOpCode, UnaryRedCode @@ -552,7 +553,7 @@ def _find_common_type( arrs: Sequence[ndarray], orig_args: Sequence[Any] ) -> np.dtype[Any]: all_ndarray = all(isinstance(arg, ndarray) for arg in orig_args) - unique_dtypes = set(arr.dtype for arr in arrs) + unique_dtypes = OrderedSet(arr.dtype for arr in arrs) # If all operands are ndarrays and they all have the same dtype, # we already know the common dtype if len(unique_dtypes) == 1 and all_ndarray: diff --git a/cunumeric/array.py b/cunumeric/array.py index 9a172305c1..dd7079fa30 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -25,7 +25,6 @@ Literal, Optional, Sequence, - Set, TypeVar, Union, cast, @@ -33,6 +32,7 @@ import numpy as np from legate.core import Array, Field +from legate.core.utils import OrderedSet from numpy.core.multiarray import ( # type: ignore [attr-defined] normalize_axis_index, ) @@ -90,7 +90,7 @@ def add_boilerplate( parameter (if present), to cuNumeric ndarrays. * Convert the special "where" parameter (if present) to a valid predicate. """ - keys = set(array_params) + keys = OrderedSet(array_params) assert len(keys) == len(array_params) def decorator(func: Callable[P, R]) -> Callable[P, R]: @@ -100,11 +100,11 @@ def decorator(func: Callable[P, R]) -> Callable[P, R]: # For each parameter specified by name, also consider the case where # it's passed as a positional parameter. - indices: Set[int] = set() + indices: OrderedSet[int] = OrderedSet() where_idx: Optional[int] = None out_idx: Optional[int] = None params = signature(func).parameters - extra = keys - set(params) + extra = keys - OrderedSet(params) assert len(extra) == 0, f"unknown parameter(s): {extra}" for idx, param in enumerate(params): if param == "where": @@ -2435,7 +2435,7 @@ def _diag_helper( else: assert axes is not None N = len(axes) - if len(axes) != len(set(axes)): + if len(axes) != len(OrderedSet(axes)): raise ValueError( "axes passed to _diag_helper should be all different" ) diff --git a/cunumeric/coverage.py b/cunumeric/coverage.py index 55f74d238d..a8e57285f5 100644 --- a/cunumeric/coverage.py +++ b/cunumeric/coverage.py @@ -36,6 +36,7 @@ ) from legate.core import track_provenance +from legate.core.utils import OrderedSet from typing_extensions import Protocol from .runtime import runtime @@ -62,7 +63,7 @@ def filter_namespace( omit_names: Optional[Container[str]] = None, omit_types: tuple[type, ...] = (), ) -> dict[str, Any]: - omit_names = omit_names or set() + omit_names = omit_names or OrderedSet() return { attr: value for attr, value in ns.items() @@ -330,7 +331,7 @@ def clone_class( """ class_name = f"{origin_class.__module__}.{origin_class.__name__}" - clean_omit_names = set() if omit_names is None else omit_names + clean_omit_names = OrderedSet() if omit_names is None else omit_names def _clone_class(cls: type) -> type: missing = filter_namespace( diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 5cbea74bfb..6a56f65dbe 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -36,6 +36,7 @@ import legate.core.types as ty import numpy as np from legate.core import Annotation, Future, ReductionOp, Store +from legate.core.utils import OrderedSet from numpy.core.numeric import ( # type: ignore [attr-defined] normalize_axis_tuple, ) @@ -96,7 +97,7 @@ def auto_convert( """ Converts all named parameters to DeferredArrays. """ - keys = set(thunk_params) + keys = OrderedSet(thunk_params) assert len(keys) == len(thunk_params) def decorator(func: Callable[P, R]) -> Callable[P, R]: @@ -107,7 +108,7 @@ def decorator(func: Callable[P, R]) -> Callable[P, R]: # For each parameter specified by name, also consider the case where # it's passed as a positional parameter. params = signature(func).parameters - extra = keys - set(params) + extra = keys - OrderedSet(params) assert len(extra) == 0, f"unknown parameter(s): {extra}" indices = {idx for (idx, param) in enumerate(params) if param in keys} @@ -1429,7 +1430,7 @@ def fft( task.add_scalar_arg(kind.type_id, ty.int32) task.add_scalar_arg(direction.value, ty.int32) task.add_scalar_arg( - len(set(axes)) != len(axes) + len(OrderedSet(axes)) != len(axes) or len(axes) != input.ndim or tuple(axes) != tuple(sorted(axes)), ty.bool_, @@ -1437,8 +1438,8 @@ def fft( for ax in axes: task.add_scalar_arg(ax, ty.int64) - if input.ndim > len(set(axes)): - task.add_broadcast(input, axes=set(axes)) + if input.ndim > len(OrderedSet(axes)): + task.add_broadcast(input, axes=OrderedSet(axes)) else: task.add_broadcast(input) task.add_constraint(p_output == p_input) @@ -1502,9 +1503,9 @@ def contract( # Sanity checks # no duplicate modes within an array - assert len(lhs_modes) == len(set(lhs_modes)) - assert len(rhs1_modes) == len(set(rhs1_modes)) - assert len(rhs2_modes) == len(set(rhs2_modes)) + assert len(lhs_modes) == len(OrderedSet(lhs_modes)) + assert len(rhs1_modes) == len(OrderedSet(rhs1_modes)) + assert len(rhs2_modes) == len(OrderedSet(rhs2_modes)) # no singleton modes mode_counts: Counter[str] = Counter() mode_counts.update(lhs_modes) diff --git a/cunumeric/utils.py b/cunumeric/utils.py index 55a9b8c1e8..7071545238 100644 --- a/cunumeric/utils.py +++ b/cunumeric/utils.py @@ -22,6 +22,7 @@ import legate.core.types as ty import numpy as np +from legate.core.utils import OrderedSet from .types import NdShape @@ -194,8 +195,8 @@ def check_axes(a_axes: Axes, b_axes: Axes) -> None: len(a_axes) != len(b_axes) or len(a_axes) > a_ndim or len(b_axes) > b_ndim - or len(a_axes) != len(set(a_axes)) - or len(b_axes) != len(set(b_axes)) + or len(a_axes) != len(OrderedSet(a_axes)) + or len(b_axes) != len(OrderedSet(b_axes)) or any(ax < 0 for ax in a_axes) or any(ax < 0 for ax in b_axes) or any(ax >= a_ndim for ax in a_axes) @@ -211,8 +212,14 @@ def check_axes(a_axes: Axes, b_axes: Axes) -> None: b_modes = list(ascii_uppercase[:b_ndim]) for a_i, b_i in zip(a_axes, b_axes): b_modes[b_i] = a_modes[a_i] - a_out = [a_modes[a_i] for a_i in sorted(set(range(a_ndim)) - set(a_axes))] - b_out = [b_modes[b_i] for b_i in sorted(set(range(b_ndim)) - set(b_axes))] + a_out = [ + a_modes[a_i] + for a_i in sorted(OrderedSet(range(a_ndim)) - OrderedSet(a_axes)) + ] + b_out = [ + b_modes[b_i] + for b_i in sorted(OrderedSet(range(b_ndim)) - OrderedSet(b_axes)) + ] return (a_modes, b_modes, a_out + b_out) diff --git a/tests/integration/test_einsum.py b/tests/integration/test_einsum.py index 96492b7239..e482e8cf09 100644 --- a/tests/integration/test_einsum.py +++ b/tests/integration/test_einsum.py @@ -19,6 +19,7 @@ import numpy as np import pytest +from legate.core.utils import OrderedSet from utils.comparisons import allclose from utils.generators import mk_0to1_array, permutes_to @@ -54,8 +55,8 @@ def gen_operand( return # If we've hit the limit on distinct modes, only use modes # appearing on the same operand - if len(op) == dim_lim - 1 and len(set(op)) >= mode_lim: - for m in sorted(set(op)): + if len(op) == dim_lim - 1 and len(OrderedSet(op)) >= mode_lim: + for m in sorted(OrderedSet(op)): op.append(m) yield from gen_operand(used_modes, dim_lim, mode_lim, op) op.pop() @@ -82,7 +83,7 @@ def gen_expr( if opers is None: opers = [] if cache is None: - cache = set() + cache = OrderedSet() # The goal here is to avoid producing duplicate expressions, up to # reordering of operands and alpha-renaming, e.g. the following # are considered equivalent (for the purposes of testing): @@ -108,7 +109,9 @@ def gen_expr( dim_lim = len(opers[-1]) if len(opers) > 0 else MAX_OPERAND_DIM # Between operands of the same length, put those with the most distinct # modes first. - mode_lim = len(set(opers[-1])) if len(opers) > 0 else MAX_OPERAND_DIM + mode_lim = ( + len(OrderedSet(opers[-1])) if len(opers) > 0 else MAX_OPERAND_DIM + ) for op in gen_operand(used_modes, dim_lim, mode_lim): opers.append(op) yield from gen_expr(opers, cache) @@ -187,7 +190,7 @@ def mk_input_that_broadcasts_to(lib, tgt_shape): # just one of them to 1. Consider the operation 'aab->ab': (10,10,11), # (10,10,1), (1,1,11), (1,1,1) are all acceptable input shapes, but # (1,10,11) is not. - tgt_sizes = list(sorted(set(tgt_shape))) + tgt_sizes = list(sorted(OrderedSet(tgt_shape))) res = [] for mask in product([True, False], repeat=len(tgt_sizes)): tgt2src_size = { diff --git a/tests/integration/utils/contractions.py b/tests/integration/utils/contractions.py index 46886f1165..c590adbe46 100644 --- a/tests/integration/utils/contractions.py +++ b/tests/integration/utils/contractions.py @@ -15,6 +15,7 @@ import numpy as np from legate.core import LEGATE_MAX_DIM +from legate.core.utils import OrderedSet import cunumeric as num @@ -38,7 +39,7 @@ def gen_output_default(lib, modes, a, b): def gen_shapes(a_modes, b_modes): yield ((5,) * len(a_modes), (5,) * len(b_modes)) - for mode_to_squeeze in set(a_modes + b_modes): + for mode_to_squeeze in OrderedSet(a_modes + b_modes): a_shape = tuple((1 if m == mode_to_squeeze else 5) for m in a_modes) b_shape = tuple((1 if m == mode_to_squeeze else 5) for m in b_modes) yield (a_shape, b_shape) @@ -104,7 +105,10 @@ def gen_output_of_various_types(lib, modes, a, b): def _test(name, modes, operation, gen_inputs, gen_output=None, **kwargs): (a_modes, b_modes, out_modes) = modes - if len(set(a_modes) | set(b_modes) | set(out_modes)) > LEGATE_MAX_DIM: + if ( + len(OrderedSet(a_modes) | OrderedSet(b_modes) | OrderedSet(out_modes)) + > LEGATE_MAX_DIM + ): # Total number of distinct modes can't exceed maximum Legion dimension, # because we may need to promote arrays so that one includes all modes. return From 2e520817283f63fcce3aec20fa74d0259cf9edc4 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 4 Oct 2023 08:24:59 -0700 Subject: [PATCH 05/18] Satisfy new flake8 type check rules (#1056) --- cunumeric/array.py | 4 ++-- cunumeric/module.py | 4 ++-- cunumeric/utils.py | 6 +++--- tests/integration/test_prod.py | 4 ++-- tests/integration/test_reduction.py | 4 ++-- tests/integration/test_squeeze.py | 4 ++-- tests/unit/cunumeric/test_coverage.py | 4 ++-- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index dd7079fa30..0176798bce 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -2554,7 +2554,7 @@ def diagonal( raise ValueError("extract can be true only for Ndim >=2") axes = None else: - if type(axis1) == int and type(axis2) == int: + if isinstance(axis1, int) and isinstance(axis2, int): if axes is not None: raise ValueError( "Either axis1/axis2 or axes must be supplied" @@ -3102,7 +3102,7 @@ def mean( Multiple GPUs, Multiple CPUs """ - if axis is not None and type(axis) != int: + if axis is not None and not isinstance(axis, int): raise NotImplementedError( "cunumeric.mean only supports int types for " "'axis' currently" diff --git a/cunumeric/module.py b/cunumeric/module.py index c676dc02c5..e8d933da65 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -4760,7 +4760,7 @@ def einsum_path( """ computed_operands = [convert_to_cunumeric_ndarray(op) for op in operands] memory_limit = _builtin_max(op.size for op in computed_operands) - if type(optimize) == tuple: + if isinstance(optimize, tuple): if len(optimize) != 2: raise ValueError("einsum_path expects optimize tuples of size 2") optimize, memory_limit = optimize @@ -4771,7 +4771,7 @@ def einsum_path( elif optimize in ["greedy", "optimal"]: pass elif ( - type(optimize) == list + isinstance(optimize, list) and len(optimize) > 1 and optimize[0] == "einsum_path" ): diff --git a/cunumeric/utils.py b/cunumeric/utils.py index 7071545238..93e45fb740 100644 --- a/cunumeric/utils.py +++ b/cunumeric/utils.py @@ -234,11 +234,11 @@ def deep_apply(obj: Any, func: Callable[[Any], Any]) -> Any: primarily meant to be used for arguments of NumPy API calls, which shouldn't nest their arrays very deep. """ - if type(obj) == list: + if isinstance(obj, list): return [deep_apply(x, func) for x in obj] - elif type(obj) == tuple: + elif isinstance(obj, tuple): return tuple(deep_apply(x, func) for x in obj) - elif type(obj) == dict: + elif isinstance(obj, dict): return {k: deep_apply(v, func) for k, v in obj.items()} else: return func(obj) diff --git a/tests/integration/test_prod.py b/tests/integration/test_prod.py index 8b627ecd12..ab0f4def8f 100644 --- a/tests/integration/test_prod.py +++ b/tests/integration/test_prod.py @@ -263,7 +263,7 @@ def test_out_axis(self, size): ndim = arr_np.ndim for axis in range(-ndim + 1, ndim, 1): out_shape = () - if type(size) == tuple: + if isinstance(size, tuple): out_shape_list = list(size) del out_shape_list[axis] out_shape = tuple(out_shape_list) @@ -283,7 +283,7 @@ def test_out_axis_dtype(self, size): ndim = arr_np.ndim for axis in range(-ndim + 1, ndim, 1): out_shape = () - if type(size) == tuple: + if isinstance(size, tuple): out_shape_list = list(size) del out_shape_list[axis] out_shape = tuple(out_shape_list) diff --git a/tests/integration/test_reduction.py b/tests/integration/test_reduction.py index 58f133a1ff..a7a89a6af0 100644 --- a/tests/integration/test_reduction.py +++ b/tests/integration/test_reduction.py @@ -219,7 +219,7 @@ def test_out_axis(self, size): ndim = arr_np.ndim for axis in range(-ndim + 1, ndim, 1): out_shape = () - if type(size) == tuple: + if isinstance(size, tuple): out_shape_list = list(size) del out_shape_list[axis] out_shape = tuple(out_shape_list) @@ -239,7 +239,7 @@ def test_out_axis_dtype(self, size): ndim = arr_np.ndim for axis in range(-ndim + 1, ndim, 1): out_shape = () - if type(size) == tuple: + if isinstance(size, tuple): out_shape_list = list(size) del out_shape_list[axis] out_shape = tuple(out_shape_list) diff --git a/tests/integration/test_squeeze.py b/tests/integration/test_squeeze.py index 84ac8be2e0..14c2fda0d1 100644 --- a/tests/integration/test_squeeze.py +++ b/tests/integration/test_squeeze.py @@ -125,7 +125,7 @@ def test_array_basic(size): @pytest.mark.parametrize( - "size", (s for s in SIZES if type(s) == tuple if 1 in s), ids=str + "size", (s for s in SIZES if isinstance(s, tuple) if 1 in s), ids=str ) def test_num_axis(size): a = np.random.randint(low=-10, high=10, size=size) @@ -139,7 +139,7 @@ def test_num_axis(size): @pytest.mark.parametrize( - "size", (s for s in SIZES if type(s) == tuple if 1 in s), ids=str + "size", (s for s in SIZES if isinstance(s, tuple) if 1 in s), ids=str ) def test_array_axis(size): a = np.random.randint(low=-10, high=10, size=size) diff --git a/tests/unit/cunumeric/test_coverage.py b/tests/unit/cunumeric/test_coverage.py index 0ce089e094..ca683b51c5 100644 --- a/tests/unit/cunumeric/test_coverage.py +++ b/tests/unit/cunumeric/test_coverage.py @@ -388,8 +388,8 @@ def __array_prepare__(self): return "I am now ready" def foo(self, other): - assert type(self) == _Orig_ndarray - assert type(other) == _Orig_ndarray + assert type(self) == _Orig_ndarray # noqa + assert type(other) == _Orig_ndarray # noqa return "original foo" def bar(self, other): From 74490ef5f6ef44c1eb3adf39f90fdf09912fb1e1 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 4 Oct 2023 09:32:52 -0700 Subject: [PATCH 06/18] Inline boolean operators in NumPy are bitwise, not logical (#1057) * Inline boolean operators in NumPy are bitwise, not logical * Add tests for inline operators --- cunumeric/array.py | 28 +++++------ tests/integration/test_binary_ufunc.py | 69 +++++++++++++++++++------- tests/integration/test_unary_ufunc.py | 53 ++++++++++++-------- 3 files changed, 97 insertions(+), 53 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 0176798bce..91ad41dde5 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -817,9 +817,9 @@ def __and__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import logical_and + from ._ufunc import bitwise_and - return logical_and(self, rhs) + return bitwise_and(self, rhs) def __array__( self, dtype: Union[np.dtype[Any], None] = None @@ -1073,9 +1073,9 @@ def __iand__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import logical_and + from ._ufunc import bitwise_and - return logical_and(self, rhs, out=self) + return bitwise_and(self, rhs, out=self) def __idiv__(self, rhs: Any) -> ndarray: """a.__idiv__(value, /) @@ -1186,9 +1186,9 @@ def __ior__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import logical_or + from ._ufunc import bitwise_or - return logical_or(self, rhs, out=self) + return bitwise_or(self, rhs, out=self) def __ipow__(self, rhs: float) -> ndarray: """a.__ipow__(/) @@ -1260,9 +1260,9 @@ def __ixor__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import logical_xor + from ._ufunc import bitwise_xor - return logical_xor(self, rhs, out=self) + return bitwise_xor(self, rhs, out=self) def __le__(self, rhs: Any) -> ndarray: """a.__le__(value, /) @@ -1416,9 +1416,9 @@ def __or__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import logical_or + from ._ufunc import bitwise_or - return logical_or(self, rhs) + return bitwise_or(self, rhs) def __pos__(self) -> ndarray: """a.__pos__(value, /) @@ -1473,9 +1473,9 @@ def __rand__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import logical_and + from ._ufunc import bitwise_and - return logical_and(lhs, self) + return bitwise_and(lhs, self) def __rdiv__(self, lhs: Any) -> ndarray: """a.__rdiv__(value, /) @@ -1584,9 +1584,9 @@ def __ror__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import logical_or + from ._ufunc import bitwise_or - return logical_or(lhs, self) + return bitwise_or(lhs, self) def __rpow__(self, lhs: Any) -> ndarray: """__rpow__(value, /) diff --git a/tests/integration/test_binary_ufunc.py b/tests/integration/test_binary_ufunc.py index 4d2a9b7db7..a6acef0494 100644 --- a/tests/integration/test_binary_ufunc.py +++ b/tests/integration/test_binary_ufunc.py @@ -41,32 +41,53 @@ def check_result(op, in_np, out_np, out_num): def check_ops(ops, in_np, out_dtype="D"): + in_num = tuple(num.array(arr) for arr in in_np) + for op in ops: - op_np = getattr(np, op) - op_num = getattr(num, op) + if op.isidentifier(): + op_np = getattr(np, op) + op_num = getattr(num, op) + assert op_np.nout == 1 + + out_np = op_np(*in_np) + out_num = op_num(*in_num) + + check_result(op, in_np, out_np, out_num) - assert op_np.nout == 1 + out_np = np.empty(out_np.shape, dtype=out_dtype) + out_num = num.empty(out_num.shape, dtype=out_dtype) + op_np(*in_np, out=out_np) + op_num(*in_num, out=out_num) - in_num = tuple(num.array(arr) for arr in in_np) + check_result(op, in_np, out_np, out_num) - out_np = op_np(*in_np) - out_num = op_num(*in_num) + # Ask cuNumeric to produce outputs to NumPy ndarrays + out_num = np.empty(out_np.shape, dtype=out_dtype) + op_num(*in_num, out=out_num) - check_result(op, in_np, out_np, out_num) + check_result(op, in_np, out_np, out_num) + + else: + # Doing it this way instead of invoking the dunders directly, to + # avoid having to select the right version, __add__ vs __radd__, + # when one isn't supported, e.g. for scalar.__add__(array) - out_np = np.empty(out_np.shape, dtype=out_dtype) - out_num = num.empty(out_num.shape, dtype=out_dtype) + out_np = eval(f"in_np[0] {op} in_np[1]") + out_num = eval(f"in_num[0] {op} in_num[1]") - op_np(*in_np, out=out_np) - op_num(*in_num, out=out_num) + check_result(op, in_np, out_np, out_num) - check_result(op, in_np, out_np, out_num) + out_np = np.ones_like(out_np) + out_num = num.ones_like(out_num) + exec(f"out_np {op}= in_np[0]") + exec(f"out_num {op}= in_num[0]") - # Ask cuNumeric to produce outputs to NumPy ndarrays - out_num = np.ones(out_np.shape, dtype=out_dtype) - op_num(*in_num, out_num) + check_result(op, in_np, out_np, out_num) - check_result(op, in_np, out_np, out_num) + out_num = np.ones_like(out_np) + exec(f"out_num {op}= in_num[0]") + + check_result(op, in_np, out_np, out_num) def test_all(): @@ -74,8 +95,14 @@ def test_all(): # for some boring inputs. For some of these, we will want to # test corner cases in the future. + # TODO: matmul, @ + # Math operations ops = [ + "*", + "+", + "-", + "/", "add", # "divmod", "equal", @@ -121,6 +148,7 @@ def test_all(): check_ops(ops, (scalar1, scalar2)) ops = [ + "//", "arctan2", "copysign", "floor_divide", @@ -142,6 +170,7 @@ def test_all(): check_ops(ops, (scalar1, scalar2)) ops = [ + "**", "power", "float_power", ] @@ -159,6 +188,7 @@ def test_all(): check_ops(ops, (scalars[3], scalars[0])) ops = [ + "%", "remainder", ] @@ -173,12 +203,17 @@ def test_all(): check_ops(ops, (scalar1, scalar2)) ops = [ + "&", + "<<", + ">>", + "^", + "|", "bitwise_and", "bitwise_or", "bitwise_xor", "gcd", - "left_shift", "lcm", + "left_shift", "right_shift", ] diff --git a/tests/integration/test_unary_ufunc.py b/tests/integration/test_unary_ufunc.py index c1deefe853..9d0021613c 100644 --- a/tests/integration/test_unary_ufunc.py +++ b/tests/integration/test_unary_ufunc.py @@ -61,39 +61,46 @@ def check_result(op, in_np, out_np, out_num, **isclose_kwargs): def check_op(op, in_np, out_dtype="d", **check_kwargs): - op_np = getattr(np, op) - op_num = getattr(num, op) + in_num = num.array(in_np) - assert op_np.nout == 1 + if op.isidentifier(): + op_np = getattr(np, op) + op_num = getattr(num, op) - in_num = num.array(in_np) + assert op_np.nout == 1 + + out_np = op_np(in_np) + out_num = op_num(in_num) - out_np = op_np(in_np) - out_num = op_num(in_num) + assert check_result(op, in_np, out_np, out_num, **check_kwargs) - assert check_result(op, in_np, out_np, out_num, **check_kwargs) + out_np = np.empty(out_np.shape, dtype=out_dtype) + out_num = num.empty(out_num.shape, dtype=out_dtype) - out_np = np.empty(out_np.shape, dtype=out_dtype) - out_num = num.empty(out_num.shape, dtype=out_dtype) + op_np(in_np, out=out_np) + op_num(in_num, out=out_num) - op_np(in_np, out=out_np) - op_num(in_num, out=out_num) + assert check_result(op, in_np, out_np, out_num, **check_kwargs) - assert check_result(op, in_np, out_np, out_num, **check_kwargs) + out_np = np.empty(out_np.shape, dtype=out_dtype) + out_num = num.empty(out_num.shape, dtype=out_dtype) - out_np = np.empty(out_np.shape, dtype=out_dtype) - out_num = num.empty(out_num.shape, dtype=out_dtype) + op_np(in_np, out_np) + op_num(in_num, out_num) - op_np(in_np, out_np) - op_num(in_num, out_num) + assert check_result(op, in_np, out_np, out_num, **check_kwargs) - assert check_result(op, in_np, out_np, out_num, **check_kwargs) + # Ask cuNumeric to produce outputs to NumPy ndarrays + out_num = np.ones(out_np.shape, dtype=out_dtype) + op_num(in_num, out_num) - # Ask cuNumeric to produce outputs to NumPy ndarrays - out_num = np.ones(out_np.shape, dtype=out_dtype) - op_num(in_num, out_num) + assert check_result(op, in_np, out_np, out_num, **check_kwargs) + + else: + out_np = eval(f"{op} in_np") + out_num = eval(f"{op} in_num") - assert check_result(op, in_np, out_np, out_num, **check_kwargs) + assert check_result(op, in_np, out_np, out_num, **check_kwargs) def check_ops(ops, in_np, out_dtype="d"): @@ -155,6 +162,8 @@ def check_math_ops(op, **kwargs): # Math operations math_ops = ( + "+", + "-", "absolute", "conjugate", "exp", @@ -283,7 +292,7 @@ def test_arc_hyp_trig_ops(op): check_op(op, np.array(np.random.uniform(low=1, high=5))) -bit_ops = ("invert",) +bit_ops = ("invert", "~") @pytest.mark.parametrize("op", bit_ops) From 0645bc7330c5896c8b767303735a997448b42d1e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 4 Oct 2023 18:04:20 -0700 Subject: [PATCH 07/18] [pre-commit.ci] pre-commit autoupdate (#1055) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black: 23.3.0 → 23.9.1](https://github.com/psf/black/compare/23.3.0...23.9.1) - [github.com/PyCQA/flake8: 6.0.0 → 6.1.0](https://github.com/PyCQA/flake8/compare/6.0.0...6.1.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Manolis Papadakis --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1b637e8ae7..03cfc8b1c8 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,11 +11,11 @@ repos: hooks: - id: isort - repo: https://github.com/psf/black - rev: 23.3.0 + rev: 23.9.1 hooks: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format From 8138188a8bcccfdd3558f1133580b971a978688a Mon Sep 17 00:00:00 2001 From: Jeremy Date: Thu, 19 Oct 2023 12:04:09 -0700 Subject: [PATCH 08/18] Added variance as a unary reduction (#593) * added variance as a unary reduction * fix variance eager implementation * build fixes * Added more tests. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Work-around (consistent) for 1D array. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix for 1D arrays masquerading as Nd. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Added relevant comment for 1D array branch. * Tests for keepdims. * Clean-up. * Fix for test_mean.py. * Dox fix: added var entry in RST file. * Put ignore directive back. * Fixed doc. * More dox fixes. * Fix the check for the cases that trigger use of VARIANCE * Addressed minor review comments on dox. * Commit fixes necessary for 1475898 to work * Addressed changes on np.square(). * Addressed changes on where arg. * Addressed changes on module.py var doc string. * Addressed changes on axis signature in var(). --------- Co-authored-by: Andrei Schaffer Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Andrei Schaffer <37386037+aschaffer@users.noreply.github.com> Co-authored-by: Manolis Papadakis Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 151 +++++++++++-- cunumeric/config.py | 4 + cunumeric/deferred.py | 4 + cunumeric/eager.py | 20 ++ cunumeric/module.py | 73 +++++++ cunumeric/utils.py | 9 +- docs/cunumeric/source/api/ndarray.rst | 2 +- docs/cunumeric/source/api/statistics.rst | 1 + src/cunumeric/cunumeric_c.h | 2 + .../unary/scalar_unary_red_template.inl | 12 +- src/cunumeric/unary/unary_red_util.h | 52 +++++ tests/integration/test_stats.py | 205 ++++++++++++++++++ 12 files changed, 507 insertions(+), 28 deletions(-) create mode 100644 tests/integration/test_stats.py diff --git a/cunumeric/array.py b/cunumeric/array.py index 91ad41dde5..a219e416ac 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -54,7 +54,13 @@ from .coverage import FALLBACK_WARNING, clone_class, is_implemented from .runtime import runtime from .types import NdShape -from .utils import deep_apply, dot_modes, to_core_dtype +from .utils import ( + calculate_volume, + deep_apply, + dot_modes, + to_core_dtype, + tuple_pop, +) if TYPE_CHECKING: from pathlib import Path @@ -3079,12 +3085,40 @@ def max( where=where, ) + def _summation_dtype( + self, dtype: Optional[np.dtype[Any]] + ) -> np.dtype[Any]: + # Pick our dtype if it wasn't picked yet + if dtype is None: + if self.dtype.kind != "f" and self.dtype.kind != "c": + return np.dtype(np.float64) + else: + return self.dtype + return dtype + + def _normalize_summation( + self, sum_array: Any, axis: Any, dtype: np.dtype[Any], ddof: int = 0 + ) -> None: + if axis is None: + divisor = reduce(lambda x, y: x * y, self.shape, 1) - ddof + else: + divisor = self.shape[axis] - ddof + + # Divide by the number of things in the collapsed dimensions + # Pick the right kinds of division based on the dtype + if dtype.kind == "f" or dtype.kind == "c": + sum_array.__itruediv__( + np.array(divisor, dtype=sum_array.dtype), + ) + else: + sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype)) + @add_boilerplate() def mean( self, axis: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + dtype: Optional[np.dtype[Any]] = None, + out: Optional[ndarray] = None, keepdims: bool = False, ) -> ndarray: """a.mean(axis=None, dtype=None, out=None, keepdims=False) @@ -3105,14 +3139,11 @@ def mean( if axis is not None and not isinstance(axis, int): raise NotImplementedError( "cunumeric.mean only supports int types for " - "'axis' currently" + "`axis` currently" ) - # Pick our dtype if it wasn't picked yet - if dtype is None: - if self.dtype.kind != "f" and self.dtype.kind != "c": - dtype = np.dtype(np.float64) - else: - dtype = self.dtype + + dtype = self._summation_dtype(dtype) + # Do the sum if out is not None and out.dtype == dtype: sum_array = self.sum( @@ -3127,18 +3158,9 @@ def mean( dtype=dtype, keepdims=keepdims, ) - if axis is None: - divisor = reduce(lambda x, y: x * y, self.shape, 1) - else: - divisor = self.shape[axis] - # Divide by the number of things in the collapsed dimensions - # Pick the right kinds of division based on the dtype - if dtype.kind == "f" or dtype.kind == "c": - sum_array.__itruediv__( - np.array(divisor, dtype=sum_array.dtype), - ) - else: - sum_array.__ifloordiv__(np.array(divisor, dtype=sum_array.dtype)) + + self._normalize_summation(sum_array, axis, dtype) + # Convert to the output we didn't already put it there if out is not None and sum_array is not out: assert out.dtype != sum_array.dtype @@ -3147,6 +3169,91 @@ def mean( else: return sum_array + @add_boilerplate() + def var( + self, + axis: Optional[Union[int, tuple[int, ...]]] = None, + dtype: Optional[np.dtype[Any]] = None, + out: Optional[ndarray] = None, + ddof: int = 0, + keepdims: bool = False, + *, + where: Union[bool, ndarray] = True, + ) -> ndarray: + """a.var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False) + + Returns the variance of the array elements along given axis. + + Refer to :func:`cunumeric.var` for full documentation. + + See Also + -------- + cunumeric.var : equivalent function + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + if axis is not None and not isinstance(axis, int): + raise NotImplementedError( + "cunumeric.var only supports int types for `axis` currently" + ) + + # this could be computed as a single pass through the array + # by computing both and and then computing - ^2. + # this would takee the difference of two large numbers and is unstable + # the mean needs to be computed first and the variance computed + # directly as <(x-mu)^2>, which then requires two passes through the + # data to first compute the mean and then compute the variance + # see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance + # TODO(https://github.com/nv-legate/cunumeric/issues/590) + + dtype = self._summation_dtype(dtype) + # calculate the mean, but keep the dimensions so that the + # mean can be broadcast against the original array + mu = self.mean(axis=axis, dtype=dtype, keepdims=True) + + # 1D arrays (or equivalent) should benefit from this unary reduction: + # + if axis is None or calculate_volume(tuple_pop(self.shape, axis)) == 1: + # this is a scalar reduction and we can optimize this as a single + # pass through a scalar reduction + result = self._perform_unary_reduction( + UnaryRedCode.VARIANCE, + self, + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + where=where, + args=(mu,), + ) + else: + # TODO(https://github.com/nv-legate/cunumeric/issues/591) + # there isn't really support for generic binary reductions + # right now all of the current binary reductions are boolean + # reductions like allclose. To implement this a single pass would + # require a variant of einsum/dot that produces + # (self-mu)*(self-mu) rather than self*mu. For now, we have to + # compute delta = self-mu in a first pass and then compute + # delta*delta in second pass + delta = self - mu + + result = self._perform_unary_reduction( + UnaryRedCode.SUM_SQUARES, + delta, + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + where=where, + ) + + self._normalize_summation(result, axis=axis, dtype=dtype, ddof=ddof) + + return result + @add_boilerplate() def min( self, diff --git a/cunumeric/config.py b/cunumeric/config.py index 6c5bbbb184..bdea334a16 100644 --- a/cunumeric/config.py +++ b/cunumeric/config.py @@ -187,6 +187,8 @@ class _CunumericSharedLib: CUNUMERIC_RED_NANSUM: int CUNUMERIC_RED_PROD: int CUNUMERIC_RED_SUM: int + CUNUMERIC_RED_SUM_SQUARES: int + CUNUMERIC_RED_VARIANCE: int CUNUMERIC_REPEAT: int CUNUMERIC_SCALAR_UNARY_RED: int CUNUMERIC_SCAN_GLOBAL: int @@ -452,6 +454,8 @@ class UnaryRedCode(IntEnum): NANSUM = _cunumeric.CUNUMERIC_RED_NANSUM PROD = _cunumeric.CUNUMERIC_RED_PROD SUM = _cunumeric.CUNUMERIC_RED_SUM + SUM_SQUARES = _cunumeric.CUNUMERIC_RED_SUM_SQUARES + VARIANCE = _cunumeric.CUNUMERIC_RED_VARIANCE # Match these to CuNumericBinaryOpCode in cunumeric_c.h diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 6a56f65dbe..a67d9912d2 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -159,6 +159,8 @@ def __init__( _UNARY_RED_TO_REDUCTION_OPS: Dict[int, int] = { UnaryRedCode.SUM: ReductionOp.ADD, + UnaryRedCode.SUM_SQUARES: ReductionOp.ADD, + UnaryRedCode.VARIANCE: ReductionOp.ADD, UnaryRedCode.PROD: ReductionOp.MUL, UnaryRedCode.MAX: ReductionOp.MAX, UnaryRedCode.MIN: ReductionOp.MIN, @@ -209,6 +211,8 @@ def min_identity( _UNARY_RED_IDENTITIES: Dict[UnaryRedCode, Callable[[Any], Any]] = { UnaryRedCode.SUM: lambda _: 0, + UnaryRedCode.SUM_SQUARES: lambda _: 0, + UnaryRedCode.VARIANCE: lambda _: 0, UnaryRedCode.PROD: lambda _: 1, UnaryRedCode.MIN: min_identity, UnaryRedCode.MAX: max_identity, diff --git a/cunumeric/eager.py b/cunumeric/eager.py index 680f1b5a17..63284eb942 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -1524,6 +1524,26 @@ def unary_reduction( else where.array, **kws, ) + elif op == UnaryRedCode.SUM_SQUARES: + squared = np.square(rhs.array) + np.sum( + squared, + out=self.array, + axis=orig_axis, + where=where, + keepdims=keepdims, + ) + elif op == UnaryRedCode.VARIANCE: + (mu,) = args + centered = np.subtract(rhs.array, mu) + squares = np.square(centered) + np.sum( + squares, + axis=orig_axis, + where=where, + keepdims=keepdims, + out=self.array, + ) elif op == UnaryRedCode.CONTAINS: self.array.fill(args[0] in rhs.array) elif op == UnaryRedCode.COUNT_NONZERO: diff --git a/cunumeric/module.py b/cunumeric/module.py index e8d933da65..e2bbc78f7a 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -7061,6 +7061,79 @@ def mean( return a.mean(axis=axis, dtype=dtype, out=out, keepdims=keepdims) +@add_boilerplate("a") +def var( + a: ndarray, + axis: Optional[Union[int, tuple[int, ...]]] = None, + dtype: Optional[np.dtype[Any]] = None, + out: Optional[ndarray] = None, + ddof: int = 0, + keepdims: bool = False, + *, + where: Union[bool, ndarray] = True, +) -> ndarray: + """ + Compute the variance along the specified axis. + + Returns the variance of the array elements, a measure of the spread of + a distribution. The variance is computed for the flattened array + by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple[int], optional + Axis or axes along which the variance is computed. The default is to + compute the variance of the flattened array. + + If this is a tuple of ints, a variance is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is float64; for arrays of float types + it is the same as the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have the + same shape as the expected output, but the type is cast if necessary. + ddof : int, optional + “Delta Degrees of Freedom”: the divisor used in the calculation is + N - ddof, where N represents the number of elements. By default + ddof is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + where : array_like of bool, optional + A boolean array which is broadcasted to match the dimensions of array, + and selects elements to include in the reduction. + + Returns + ------- + m : ndarray, see dtype parameter above + If `out=None`, returns a new array of the same dtype as above + containing the variance values, otherwise a reference to the output + array is returned. + + See Also + -------- + numpy.var + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.var( + axis=axis, + dtype=dtype, + out=out, + ddof=ddof, + keepdims=keepdims, + where=where, + ) + + # Histograms diff --git a/cunumeric/utils.py b/cunumeric/utils.py index 93e45fb740..8c2d701401 100644 --- a/cunumeric/utils.py +++ b/cunumeric/utils.py @@ -18,7 +18,7 @@ from functools import reduce from string import ascii_lowercase, ascii_uppercase from types import FrameType -from typing import Any, Callable, List, Sequence, Tuple, Union +from typing import Any, Callable, List, Sequence, Tuple, TypeVar, Union import legate.core.types as ty import numpy as np @@ -108,6 +108,13 @@ def calculate_volume(shape: NdShape) -> int: return reduce(lambda x, y: x * y, shape) +T = TypeVar("T") + + +def tuple_pop(tup: Tuple[T, ...], index: int) -> Tuple[T, ...]: + return tup[:index] + tup[index + 1 :] + + Modes = Tuple[List[str], List[str], List[str]] diff --git a/docs/cunumeric/source/api/ndarray.rst b/docs/cunumeric/source/api/ndarray.rst index afdd1406f8..aca3b9ce0e 100644 --- a/docs/cunumeric/source/api/ndarray.rst +++ b/docs/cunumeric/source/api/ndarray.rst @@ -158,7 +158,7 @@ Calculation ndarray.sum ndarray.cumsum ndarray.mean - .. ndarray.var + ndarray.var .. ndarray.std ndarray.prod ndarray.cumprod diff --git a/docs/cunumeric/source/api/statistics.rst b/docs/cunumeric/source/api/statistics.rst index 7d844d7887..9227c93aef 100644 --- a/docs/cunumeric/source/api/statistics.rst +++ b/docs/cunumeric/source/api/statistics.rst @@ -10,6 +10,7 @@ Averages and variances :toctree: generated/ mean + var Histograms diff --git a/src/cunumeric/cunumeric_c.h b/src/cunumeric/cunumeric_c.h index 74c05fcd2a..b5b3928355 100644 --- a/src/cunumeric/cunumeric_c.h +++ b/src/cunumeric/cunumeric_c.h @@ -150,6 +150,8 @@ enum CuNumericUnaryRedCode { CUNUMERIC_RED_NANSUM, CUNUMERIC_RED_PROD, CUNUMERIC_RED_SUM, + CUNUMERIC_RED_SUM_SQUARES, + CUNUMERIC_RED_VARIANCE }; // Match these to BinaryOpCode in config.py diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 9f073c716c..3ca19e8a7e 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -46,6 +46,7 @@ struct ScalarUnaryRed { Point origin; Point shape; RHS to_find; + RHS mu; bool dense; struct DenseReduction {}; @@ -61,6 +62,7 @@ struct ScalarUnaryRed { out = args.out.reduce_accessor(); if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { to_find = args.args[0].scalar(); } + if constexpr (OP_CODE == UnaryRedCode::VARIANCE) { mu = args.args[0].scalar(); } #ifndef LEGATE_BOUNDS_CHECKS // Check to see if this is dense or not @@ -79,6 +81,8 @@ struct ScalarUnaryRed { OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { auto p = pitches.unflatten(idx, origin); OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); + } else if constexpr (OP_CODE == UnaryRedCode::VARIANCE) { + OP::template fold(lhs, OP::convert(inptr[idx] - mu, identity)); } else { OP::template fold(lhs, OP::convert(inptr[idx], identity)); } @@ -86,15 +90,15 @@ struct ScalarUnaryRed { __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, SparseReduction) const noexcept { + auto p = pitches.unflatten(idx, origin); if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - auto point = pitches.unflatten(idx, origin); - if (in[point] == to_find) { lhs = true; } + if (in[p] == to_find) { lhs = true; } } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { - auto p = pitches.unflatten(idx, origin); OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); + } else if constexpr (OP_CODE == UnaryRedCode::VARIANCE) { + OP::template fold(lhs, OP::convert(in[p] - mu, identity)); } else { - auto p = pitches.unflatten(idx, origin); OP::template fold(lhs, OP::convert(in[p], identity)); } } diff --git a/src/cunumeric/unary/unary_red_util.h b/src/cunumeric/unary/unary_red_util.h index 34d92710bf..e822e40b45 100644 --- a/src/cunumeric/unary/unary_red_util.h +++ b/src/cunumeric/unary/unary_red_util.h @@ -40,6 +40,8 @@ enum class UnaryRedCode : int { NANSUM = CUNUMERIC_RED_NANSUM, PROD = CUNUMERIC_RED_PROD, SUM = CUNUMERIC_RED_SUM, + SUM_SQUARES = CUNUMERIC_RED_SUM_SQUARES, + VARIANCE = CUNUMERIC_RED_VARIANCE }; template @@ -89,6 +91,10 @@ constexpr decltype(auto) op_dispatch(UnaryRedCode op_code, Functor f, Fnargs&&.. return f.template operator()(std::forward(args)...); case UnaryRedCode::SUM: return f.template operator()(std::forward(args)...); + case UnaryRedCode::SUM_SQUARES: + return f.template operator()(std::forward(args)...); + case UnaryRedCode::VARIANCE: + return f.template operator()(std::forward(args)...); default: break; } assert(false); @@ -264,6 +270,52 @@ struct UnaryRedOp { __CUDA_HD__ static VAL convert(const RHS& rhs, const VAL) { return rhs; } }; +template +struct UnaryRedOp { + static constexpr bool valid = true; + + using RHS = legate::legate_type_of; + using VAL = RHS; + using OP = Legion::SumReduction; + + template + __CUDA_HD__ static void fold(VAL& a, VAL b) + { + OP::template fold(a, b); + } + + template + __CUDA_HD__ static VAL convert(const Legion::Point&, int32_t, const VAL, const RHS& rhs) + { + return rhs * rhs; + } + + __CUDA_HD__ static VAL convert(const RHS& rhs, const VAL) { return rhs * rhs; } +}; + +template +struct UnaryRedOp { + static constexpr bool valid = true; + + using RHS = legate::legate_type_of; + using VAL = RHS; + using OP = Legion::SumReduction; + + template + __CUDA_HD__ static void fold(VAL& a, VAL b) + { + OP::template fold(a, b); + } + + template + __CUDA_HD__ static VAL convert(const Legion::Point&, int32_t, const VAL, const RHS& rhs) + { + return rhs * rhs; + } + + __CUDA_HD__ static VAL convert(const RHS& rhs, const VAL) { return rhs * rhs; } +}; + template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; diff --git a/tests/integration/test_stats.py b/tests/integration/test_stats.py new file mode 100644 index 0000000000..dfa1b0fa33 --- /dev/null +++ b/tests/integration/test_stats.py @@ -0,0 +1,205 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import functools + +import numpy as np +import pytest +from utils.comparisons import allclose + +import cunumeric as num + +np.random.seed(143) + + +def check_result(in_np, out_np, out_num, **isclose_kwargs): + if in_np.dtype == "e" or out_np.dtype == "e": + # The mantissa is only 10 bits, 2**-10 ~= 10^(-4) + # Gives 1e-3 as rtol to provide extra rounding error. + f16_rtol = 1e-2 + rtol = isclose_kwargs.setdefault("rtol", f16_rtol) + # make sure we aren't trying to fp16 compare with less precision + assert rtol >= f16_rtol + + if "negative_test" in isclose_kwargs: + is_negative_test = isclose_kwargs["negative_test"] + else: + is_negative_test = False + + result = ( + allclose(out_np, out_num, **isclose_kwargs) + and out_np.dtype == out_num.dtype + ) + if not result and not is_negative_test: + print("cunumeric failed the test") + print("Input:") + print(in_np) + print(f"dtype: {in_np.dtype}") + print("NumPy output:") + print(out_np) + print(f"dtype: {out_np.dtype}") + print("cuNumeric output:") + print(out_num) + print(f"dtype: {out_num.dtype}") + return result + + +def check_op(op_np, op_num, in_np, out_dtype, **check_kwargs): + in_num = num.array(in_np) + + out_np = op_np(in_np) + out_num = op_num(in_num) + + assert check_result(in_np, out_np, out_num, **check_kwargs) + + out_np = np.empty(out_np.shape, dtype=out_dtype) + out_num = num.empty(out_num.shape, dtype=out_dtype) + + op_np(in_np, out=out_np) + op_num(in_num, out=out_num) + + assert check_result(in_np, out_np, out_num, **check_kwargs) + + +def get_op_input( + shape=(4, 5), + a_min=None, + a_max=None, + randint=False, + offset=None, + astype=None, + out_dtype="d", + replace_zero=None, + **check_kwargs, +): + if randint: + assert a_min is not None + assert a_max is not None + in_np = np.random.randint(a_min, a_max, size=shape) + else: + in_np = np.random.randn(*shape) + if offset is not None: + in_np = in_np + offset + if a_min is not None: + in_np = np.maximum(a_min, in_np) + if a_max is not None: + in_np = np.minimum(a_max, in_np) + if astype is not None: + in_np = in_np.astype(astype) + + if replace_zero is not None: + in_np[in_np == 0] = replace_zero + + # converts to a scalar if shape is (1,) + if in_np.ndim == 1 and in_np.shape[0] == 1: + in_np = in_np[0] + + return in_np + + +dtypes = ( + "e", + "f", + "d", +) + + +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("ddof", [0, 1]) +@pytest.mark.parametrize("axis", [None, 0, 1]) +@pytest.mark.parametrize("keepdims", [False, True]) +def test_var_default_shape(dtype, ddof, axis, keepdims): + np_in = get_op_input(astype=dtype) + + op_np = functools.partial(np.var, ddof=ddof, axis=axis, keepdims=keepdims) + op_num = functools.partial( + num.var, ddof=ddof, axis=axis, keepdims=keepdims + ) + + check_op(op_np, op_num, np_in, dtype) + + +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("ddof", [0, 1]) +@pytest.mark.parametrize("axis", [None, 0, 1, 2]) +@pytest.mark.parametrize("shape", [(10,), (4, 5), (2, 3, 4)]) +def test_var_w_shape(dtype, ddof, axis, shape): + np_in = get_op_input(astype=dtype, shape=shape) + + if axis is not None and axis >= len(shape): + axis = None + + op_np = functools.partial(np.var, ddof=ddof, axis=axis) + op_num = functools.partial(num.var, ddof=ddof, axis=axis) + + check_op(op_np, op_num, np_in, dtype) + + +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("ddof", [0, 1]) +@pytest.mark.parametrize( + "axis", + [ + None, + ], +) +@pytest.mark.parametrize( + "shape", + [ + (10, 1), + ], +) +def test_var_corners(dtype, ddof, axis, shape): + np_in = get_op_input(astype=dtype, shape=shape) + + if axis is not None and axis >= len(shape): + axis = None + + op_np = functools.partial(np.var, ddof=ddof, axis=axis) + op_num = functools.partial(num.var, ddof=ddof, axis=axis) + + check_op(op_np, op_num, np_in, dtype) + + +@pytest.mark.xfail +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("ddof", [0, 1]) +@pytest.mark.parametrize( + "axis", + [ + None, + ], +) +@pytest.mark.parametrize( + "shape", + [ + (1,), + ], +) +def test_var_xfail(dtype, ddof, axis, shape): + np_in = get_op_input(astype=dtype, shape=shape) + + op_np = functools.partial(np.var, ddof=ddof, axis=axis) + op_num = functools.partial(num.var, ddof=ddof, axis=axis) + + check_op(op_np, op_num, np_in, dtype, negative_test=True) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + + sys.exit(pytest.main(sys.argv)) From 2666c08bbdb2ad5f83876ac8ed5c61bbe01af712 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 26 Oct 2023 16:33:34 -0700 Subject: [PATCH 09/18] Create bug_report.yml (#1062) --- .github/ISSUE_TEMPLATE/bug_report.yml | 97 +++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yml diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml new file mode 100644 index 0000000000..8bf716ed17 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -0,0 +1,97 @@ +name: Bug report +description: Submit a bug report +title: "[BUG] " +labels: TRIAGE +body: + - type: markdown + attributes: + value: "# Bug report" + - type: markdown + attributes: + value: Thank you for reporting a bug and helping us improve Cunumeric! + - type: markdown + attributes: + value: > + Please fill out all of the required information. + - type: markdown + attributes: + value: | + --- + ## Environment information + - type: textarea + id: legate_issue + attributes: + label: Software versions + description: >- + Run `legate-issue` and paste the output here. + placeholder: | + Python : 3.10.11 | packaged by conda-forge | (main, May 10 2023, 18:58:44) [GCC 11.3.0] + Platform : Linux-5.14.0-1042-oem-x86_64-with-glibc2.31 + Legion : v23.11.00.dev-16-g2499f878 + Legate : 23.11.00.dev+17.gb7b50313 + Cunumeric : (ImportError: cannot import name 'LogicalArray' from 'legate.core') + Numpy : 1.24.4 + Scipy : 1.10.1 + Numba : (not installed) + CTK package : cuda-version-11.8-h70ddcb2_2 (conda-forge) + GPU Driver : 515.65.01 + GPU Devices : + GPU 0: Quadro RTX 8000 + GPU 1: Quadro RTX 8000 + validations: + required: true + - type: input + id: jupyter + attributes: + label: Jupyter notebook / Jupyter Lab version + description: >- + Please supply if the issue you are reporting is related to Jupyter + notebook or Jupyter Lab. + validations: + required: false + - type: markdown + attributes: + value: | + ## Issue details + - type: textarea + id: expected-behavior + attributes: + label: Expected behavior + description: What did you expect to happen? + validations: + required: true + - type: textarea + id: observed-behavior + attributes: + label: Observed behavior + description: What did actually happen? + validations: + required: true + - type: markdown + attributes: + value: | + ## Directions to reproduce + - type: textarea + id: example + attributes: + label: Example code or instructions + description: > + Please provide detailed instructions to reproduce the issue. Ideally this includes a + [Complete, minimal, self-contained example code](https://stackoverflow.com/help/minimal-reproducible-example) + given here or as a link to code in another repository. + render: Python + validations: + required: true + - type: markdown + attributes: + value: | + ## Additional information + - type: textarea + id: traceback-console + attributes: + label: Stack traceback or browser console output + description: > + Add any error messages or logs that might be helpful in reproducing and + identifying the bug, for example a Python stack traceback. + validations: + required: false From 3fdc85600b171a3b83ca45b0746efb3080684237 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 2 Nov 2023 15:24:31 -0700 Subject: [PATCH 10/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3ed163b571..78924b360a 100644 --- a/README.md +++ b/README.md @@ -119,7 +119,7 @@ with cuNumeric going forward: new features to cuNumeric. * We plan to add support for sharded file I/O for loading and storing large data sets that could never be loaded on a single node. - Initially this will begin with native support for [h5py](https://www.h5py.org/) + Initially this will begin with native support for hdf5 and zarr, but will grow to accommodate other formats needed by our lighthouse applications. * Strong scaling: while cuNumeric is currently implemented in a way that From 586a005ab6bb1536af456a735071cf320351167b Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Fri, 3 Nov 2023 09:50:19 -0700 Subject: [PATCH 11/18] Fix #1065 (#1067) * Fix #1065 * Bump legate.core commit --- cmake/versions.json | 2 +- cunumeric/eager.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/versions.json b/cmake/versions.json index b99da26cb4..93a1d80010 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "8997f997be02936304b3ac23fe785f1de7a3424b" + "git_tag" : "a4b5430ebb2c52e3f8da8f27534bc0db8826b804" } } } diff --git a/cunumeric/eager.py b/cunumeric/eager.py index 63284eb942..26fc980168 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -1615,7 +1615,7 @@ def where(self, rhs1: Any, rhs2: Any, rhs3: Any) -> None: if self.deferred is not None: self.deferred.where(rhs1, rhs2, rhs3) else: - self.array[:] = np.where(rhs1.array, rhs2.array, rhs3.array) + self.array[...] = np.where(rhs1.array, rhs2.array, rhs3.array) def argwhere(self) -> NumPyThunk: if self.deferred is not None: From f8c94f022947c72de121f38a6301ec322c4c11bb Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 6 Nov 2023 15:24:19 -0800 Subject: [PATCH 12/18] Suggest using mamba over conda (#1068) --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 78924b360a..7516331ff0 100644 --- a/README.md +++ b/README.md @@ -40,15 +40,15 @@ If you have questions, please contact us at legate(at)nvidia.com. cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric): ``` -conda install -c nvidia -c conda-forge -c legate cunumeric +mamba install -c nvidia -c conda-forge -c legate cunumeric ``` Only linux-64 packages are available at the moment. The default package contains GPU support, and is compatible with CUDA >= 11.8 (CUDA driver version >= r520), and Volta or later GPU architectures. There are -also CPU-only packages available, and will be automatically selected by `conda` -when installing on a machine without GPUs. +also CPU-only packages available, and will be automatically selected when +installing on a machine without GPUs. See the build instructions at https://nv-legate.github.io/cunumeric for details about building cuNumeric from source. From a010564e568648f8fe2c8f7b9aadf5f0a9bcca59 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 6 Nov 2023 16:02:04 -0800 Subject: [PATCH 13/18] Missing CUDA libs are now included in main conda env get script (#1058) * CUDA libraries now included in main conda env generation script * Bump legate.core ref to commit with updated generate-conda-envs --- continuous_integration/scripts/build-cunumeric-all | 6 ------ 1 file changed, 6 deletions(-) diff --git a/continuous_integration/scripts/build-cunumeric-all b/continuous_integration/scripts/build-cunumeric-all index bcdbf62ec5..66f5ccb6e0 100755 --- a/continuous_integration/scripts/build-cunumeric-all +++ b/continuous_integration/scripts/build-cunumeric-all @@ -3,12 +3,6 @@ setup_env() { yaml_file=$(find ~/.artifacts -name "environment*.yaml" | head -n 1) - [ "${USE_CUDA:-}" = "ON" ] && - echo " - libcublas-dev" >> "${yaml_file}" && - echo " - libcufft-dev" >> "${yaml_file}" && - echo " - libcurand-dev" >> "${yaml_file}" && - echo " - libcusolver-dev" >> "${yaml_file}"; - echo "YAML file..." cat "${yaml_file}" From fce99c0316f1311f9468ff593b6e7a1a4df5a237 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 6 Nov 2023 16:02:31 -0800 Subject: [PATCH 14/18] Typos (#1063) --- scripts/util/build-caching.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/util/build-caching.sh b/scripts/util/build-caching.sh index 70de985d32..9fb4c1b4a4 100755 --- a/scripts/util/build-caching.sh +++ b/scripts/util/build-caching.sh @@ -7,9 +7,9 @@ if [[ -n "$(which sccache)" ]]; then CMAKE_CUDA_COMPILER_LAUNCHER="${CMAKE_CUDA_COMPILER_LAUNCHER:-$(which sccache)}"; elif [[ -n "$(which ccache)" ]]; then # Use ccache if installed - CMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER:-$(which cache)}"; - CMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER:-$(which cache)}"; - CMAKE_CUDA_COMPILER_LAUNCHER="${CMAKE_CUDA_COMPILER_LAUNCHER:-$(which cache)}"; + CMAKE_C_COMPILER_LAUNCHER="${CMAKE_C_COMPILER_LAUNCHER:-$(which ccache)}"; + CMAKE_CXX_COMPILER_LAUNCHER="${CMAKE_CXX_COMPILER_LAUNCHER:-$(which ccache)}"; + CMAKE_CUDA_COMPILER_LAUNCHER="${CMAKE_CUDA_COMPILER_LAUNCHER:-$(which ccache)}"; fi export CMAKE_C_COMPILER_LAUNCHER="$CMAKE_C_COMPILER_LAUNCHER" From 8c67416c95583ca29e9a712e1cb2060ec57fd565 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 7 Nov 2023 10:08:05 -0800 Subject: [PATCH 15/18] Fixes #1069, #1070 (#1072) * Find handling of optimize=True in einsum * Use einsum path optimizer by default cuNumeric can only contract two arrays at a time, so the naive input-order contraction path can easily result in huge intermediates. * Bump legate.core git hash --- cmake/versions.json | 2 +- cunumeric/module.py | 13 ++++++++----- tests/integration/test_einsum.py | 4 +--- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/cmake/versions.json b/cmake/versions.json index 93a1d80010..43d60fa5e1 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "a4b5430ebb2c52e3f8da8f27534bc0db8826b804" + "git_tag" : "6fa0acc9dcfa89be2702f1de6c045bc262f752b1" } } } diff --git a/cunumeric/module.py b/cunumeric/module.py index e2bbc78f7a..47c4dea90e 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -4587,7 +4587,7 @@ def einsum( out: Optional[ndarray] = None, dtype: Optional[np.dtype[Any]] = None, casting: CastingKind = "safe", - optimize: Union[bool, str] = False, + optimize: Union[bool, Literal["greedy", "optimal"]] = True, ) -> ndarray: """ Evaluates the Einstein summation convention on the operands. @@ -4628,9 +4628,10 @@ def einsum( Default is 'safe'. optimize : ``{False, True, 'greedy', 'optimal'}``, optional - Controls if intermediate optimization should occur. No optimization - will occur if False. Uses opt_einsum to find an optimized contraction - plan if True. + Controls if intermediate optimization should occur. If False then + arrays will be contracted in input order, one at a time. True (the + default) will use the 'greedy' algorithm. See ``cunumeric.einsum_path`` + for more information on the available optimization algorithms. Returns ------- @@ -4654,7 +4655,9 @@ def einsum( if out is not None: out = convert_to_cunumeric_ndarray(out, share=True) - if not optimize: + if optimize is True: + optimize = "greedy" + elif optimize is False: optimize = NullOptimizer() # This call normalizes the expression (adds the output part if it's diff --git a/tests/integration/test_einsum.py b/tests/integration/test_einsum.py index e482e8cf09..4fcdd2402f 100644 --- a/tests/integration/test_einsum.py +++ b/tests/integration/test_einsum.py @@ -272,7 +272,7 @@ def test_cast(expr, dtype): False, "optimal", "greedy", - pytest.param(True, marks=pytest.mark.xfail), + True, ], ) def test_optimize(optimize): @@ -282,8 +282,6 @@ def test_optimize(optimize): np_res = np.einsum("ik,kj->ij", a, b, optimize=optimize) num_res = num.einsum("ik,kj->ij", a, b, optimize=optimize) assert allclose(np_res, num_res) - # when optimize=True, cunumeric raises - # TypeError: 'bool' object is not iterable def test_expr_opposite(): From b66e2ecdf7d245fd9b754b6f4ef6d42c7ed6681d Mon Sep 17 00:00:00 2001 From: Jeremy Date: Tue, 7 Nov 2023 23:42:26 -0800 Subject: [PATCH 16/18] Add batched cholesky implementation and tests (#1029) * add batched cholesky implementation and tests * missing files * fix correctness issues in transpose lower implementation * address PR comments * remove print statements * address more PR comments * test fixes * remove outdated comment * Add missing "throws exception" annotation --------- Co-authored-by: Manolis Papadakis --- cunumeric/config.py | 2 + cunumeric/linalg/cholesky.py | 40 ++++- cunumeric/linalg/linalg.py | 4 - cunumeric_cpp.cmake | 3 + src/cunumeric/cunumeric_c.h | 1 + src/cunumeric/mapper.cc | 19 +++ src/cunumeric/matrix/batched_cholesky.cc | 85 ++++++++++ src/cunumeric/matrix/batched_cholesky.cu | 111 +++++++++++++ src/cunumeric/matrix/batched_cholesky.h | 38 +++++ src/cunumeric/matrix/batched_cholesky_omp.cc | 83 ++++++++++ .../matrix/batched_cholesky_template.inl | 147 ++++++++++++++++++ src/cunumeric/matrix/potrf.cc | 72 ++++----- src/cunumeric/matrix/potrf.cu | 55 ++++--- src/cunumeric/matrix/potrf_omp.cc | 72 ++++----- src/cunumeric/matrix/potrf_template.inl | 20 +++ tests/integration/test_cholesky.py | 49 +++++- tests/unit/cunumeric/test_config.py | 1 + 17 files changed, 692 insertions(+), 110 deletions(-) create mode 100644 src/cunumeric/matrix/batched_cholesky.cc create mode 100644 src/cunumeric/matrix/batched_cholesky.cu create mode 100644 src/cunumeric/matrix/batched_cholesky.h create mode 100644 src/cunumeric/matrix/batched_cholesky_omp.cc create mode 100644 src/cunumeric/matrix/batched_cholesky_template.inl diff --git a/cunumeric/config.py b/cunumeric/config.py index bdea334a16..635544bd86 100644 --- a/cunumeric/config.py +++ b/cunumeric/config.py @@ -32,6 +32,7 @@ class _CunumericSharedLib: CUNUMERIC_ADVANCED_INDEXING: int CUNUMERIC_ARANGE: int CUNUMERIC_ARGWHERE: int + CUNUMERIC_BATCHED_CHOLESKY: int CUNUMERIC_BINARY_OP: int CUNUMERIC_BINARY_RED: int CUNUMERIC_BINCOUNT: int @@ -333,6 +334,7 @@ class CuNumericOpCode(IntEnum): ADVANCED_INDEXING = _cunumeric.CUNUMERIC_ADVANCED_INDEXING ARANGE = _cunumeric.CUNUMERIC_ARANGE ARGWHERE = _cunumeric.CUNUMERIC_ARGWHERE + BATCHED_CHOLESKY = _cunumeric.CUNUMERIC_BATCHED_CHOLESKY BINARY_OP = _cunumeric.CUNUMERIC_BINARY_OP BINARY_RED = _cunumeric.CUNUMERIC_BINARY_RED BINCOUNT = _cunumeric.CUNUMERIC_BINCOUNT diff --git a/cunumeric/linalg/cholesky.py b/cunumeric/linalg/cholesky.py index 9bba033619..4ff4fe2127 100644 --- a/cunumeric/linalg/cholesky.py +++ b/cunumeric/linalg/cholesky.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -202,11 +202,47 @@ def tril(context: Context, p_output: StorePartition, n: int) -> None: task.execute() +def _batched_cholesky(output: DeferredArray, input: DeferredArray) -> None: + # the only feasible implementation for right now is that + # each cholesky submatrix fits on a single proc. We will have + # wildly varying memory available depending on the system. + # Just use a fixed cutoff to provide some sensible warning. + # TODO: find a better way to inform the user dims are too big + context: Context = output.context + task = context.create_auto_task(CuNumericOpCode.BATCHED_CHOLESKY) + task.add_input(input.base) + task.add_output(output.base) + ndim = input.base.ndim + task.add_broadcast(input.base, (ndim - 2, ndim - 1)) + task.add_broadcast(output.base, (ndim - 2, ndim - 1)) + task.add_alignment(input.base, output.base) + task.throws_exception(LinAlgError) + task.execute() + + def cholesky( output: DeferredArray, input: DeferredArray, no_tril: bool ) -> None: runtime = output.runtime - context = output.context + context: Context = output.context + if len(input.base.shape) > 2: + if no_tril: + raise NotImplementedError( + "batched cholesky expects to only " + "produce the lower triangular matrix" + ) + size = input.base.shape[-1] + # Choose 32768 as dimension cutoff for warning + # so that for float64 anything larger than + # 8 GiB produces a warning + if size > 32768: + runtime.warn( + "batched cholesky is only valid" + " when the square submatrices fit" + f" on a single proc, n > {size} may be too large", + category=UserWarning, + ) + return _batched_cholesky(output, input) if runtime.num_procs == 1: transpose_copy_single(context, input.base, output.base) diff --git a/cunumeric/linalg/linalg.py b/cunumeric/linalg/linalg.py index f3f7eb9fb8..d1c0498b2e 100644 --- a/cunumeric/linalg/linalg.py +++ b/cunumeric/linalg/linalg.py @@ -82,10 +82,6 @@ def cholesky(a: ndarray) -> ndarray: elif shape[-1] != shape[-2]: raise ValueError("Last 2 dimensions of the array must be square") - if len(shape) > 2: - raise NotImplementedError( - "cuNumeric needs to support stacked 2d arrays" - ) return _cholesky(a) diff --git a/cunumeric_cpp.cmake b/cunumeric_cpp.cmake index 4270962bae..f7feee620e 100644 --- a/cunumeric_cpp.cmake +++ b/cunumeric_cpp.cmake @@ -143,6 +143,7 @@ list(APPEND cunumeric_SOURCES src/cunumeric/index/putmask.cc src/cunumeric/item/read.cc src/cunumeric/item/write.cc + src/cunumeric/matrix/batched_cholesky.cc src/cunumeric/matrix/contract.cc src/cunumeric/matrix/diag.cc src/cunumeric/matrix/gemm.cc @@ -195,6 +196,7 @@ if(Legion_USE_OpenMP) src/cunumeric/index/repeat_omp.cc src/cunumeric/index/wrap_omp.cc src/cunumeric/index/zip_omp.cc + src/cunumeric/matrix/batched_cholesky_omp.cc src/cunumeric/matrix/contract_omp.cc src/cunumeric/matrix/diag_omp.cc src/cunumeric/matrix/gemm_omp.cc @@ -245,6 +247,7 @@ if(Legion_USE_CUDA) src/cunumeric/index/putmask.cu src/cunumeric/item/read.cu src/cunumeric/item/write.cu + src/cunumeric/matrix/batched_cholesky.cu src/cunumeric/matrix/contract.cu src/cunumeric/matrix/diag.cu src/cunumeric/matrix/gemm.cu diff --git a/src/cunumeric/cunumeric_c.h b/src/cunumeric/cunumeric_c.h index b5b3928355..99d9bea191 100644 --- a/src/cunumeric/cunumeric_c.h +++ b/src/cunumeric/cunumeric_c.h @@ -29,6 +29,7 @@ enum CuNumericOpCode { CUNUMERIC_ADVANCED_INDEXING, CUNUMERIC_ARANGE, CUNUMERIC_ARGWHERE, + CUNUMERIC_BATCHED_CHOLESKY, CUNUMERIC_BINARY_OP, CUNUMERIC_BINARY_RED, CUNUMERIC_BINCOUNT, diff --git a/src/cunumeric/mapper.cc b/src/cunumeric/mapper.cc index 247ded4fdd..ba7114e45f 100644 --- a/src/cunumeric/mapper.cc +++ b/src/cunumeric/mapper.cc @@ -145,6 +145,25 @@ std::vector CuNumericMapper::store_mappings( } return std::move(mappings); } + // CHANGE: If this code is changed, make sure all layouts are + // consistent with those assumed in batched_cholesky.cu, etc + case CUNUMERIC_BATCHED_CHOLESKY: { + std::vector mappings; + auto& inputs = task.inputs(); + auto& outputs = task.outputs(); + mappings.reserve(inputs.size() + outputs.size()); + for (auto& input : inputs) { + mappings.push_back(StoreMapping::default_mapping(input, options.front())); + mappings.back().policy.exact = true; + mappings.back().policy.ordering.set_c_order(); + } + for (auto& output : outputs) { + mappings.push_back(StoreMapping::default_mapping(output, options.front())); + mappings.back().policy.exact = true; + mappings.back().policy.ordering.set_c_order(); + } + return std::move(mappings); + } case CUNUMERIC_TRILU: { if (task.scalars().size() == 2) return {}; // If we're here, this task was the post-processing for Cholesky. diff --git a/src/cunumeric/matrix/batched_cholesky.cc b/src/cunumeric/matrix/batched_cholesky.cc new file mode 100644 index 0000000000..30dbe3c53d --- /dev/null +++ b/src/cunumeric/matrix/batched_cholesky.cc @@ -0,0 +1,85 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "cunumeric/matrix/batched_cholesky.h" +#include "cunumeric/cunumeric.h" +#include "cunumeric/matrix/batched_cholesky_template.inl" + +#include +#include +#include + +namespace cunumeric { + +using namespace legate; + +template <> +void CopyBlockImpl::operator()(void* dst, const void* src, size_t size) +{ + ::memcpy(dst, src, size); +} + +template +struct BatchedTransposeImplBody { + using VAL = legate_type_of; + + static constexpr int tile_size = 64; + + void operator()(VAL* out, int n) const + { + VAL tile[tile_size][tile_size]; + int nblocks = (n + tile_size - 1) / tile_size; + + for (int rb = 0; rb < nblocks; ++rb) { + for (int cb = 0; cb < nblocks; ++cb) { + int r_start = rb * tile_size; + int r_stop = std::min(r_start + tile_size, n); + int c_start = cb * tile_size; + int c_stop = std::min(c_start + tile_size, n); + for (int r = r_start, tr = 0; r < r_stop; ++r, ++tr) { + for (int c = c_start, tc = 0; c < c_stop; ++c, ++tc) { + if (r <= c) { + tile[tr][tc] = out[r * n + c]; + } else { + tile[tr][tc] = 0; + } + } + } + for (int r = c_start, tr = 0; r < c_stop; ++r, ++tr) { + for (int c = r_start, tc = 0; c < r_stop; ++c, ++tc) { out[r * n + c] = tile[tc][tr]; } + } + } + } + } +}; + +/*static*/ void BatchedCholeskyTask::cpu_variant(TaskContext& context) +{ +#ifdef LEGATE_USE_OPENMP + openblas_set_num_threads(1); // make sure this isn't overzealous +#endif + batched_cholesky_task_context_dispatch(context); +} + +namespace // unnamed +{ +static void __attribute__((constructor)) register_tasks(void) +{ + BatchedCholeskyTask::register_variants(); +} +} // namespace + +} // namespace cunumeric diff --git a/src/cunumeric/matrix/batched_cholesky.cu b/src/cunumeric/matrix/batched_cholesky.cu new file mode 100644 index 0000000000..26fe3058f7 --- /dev/null +++ b/src/cunumeric/matrix/batched_cholesky.cu @@ -0,0 +1,111 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "cunumeric/matrix/batched_cholesky.h" +#include "cunumeric/matrix/potrf.h" +#include "cunumeric/matrix/batched_cholesky_template.inl" + +#include "cunumeric/cuda_help.h" + +namespace cunumeric { + +using namespace legate; + +#define TILE_DIM 32 +#define BLOCK_ROWS 8 + +template <> +void CopyBlockImpl::operator()(void* dst, const void* src, size_t size) +{ + cudaMemcpyAsync(dst, src, size, cudaMemcpyDeviceToDevice, get_cached_stream()); +} + +template +__global__ static void __launch_bounds__((TILE_DIM * BLOCK_ROWS), MIN_CTAS_PER_SM) + transpose_2d_lower(VAL* out, int n) +{ + __shared__ VAL tile[TILE_DIM][TILE_DIM + 1 /*avoid bank conflicts*/]; + + // The y dim is fast-moving index for coalescing + auto r_block = blockIdx.x * TILE_DIM; + auto c_block = blockIdx.y * TILE_DIM; + auto r = blockIdx.x * TILE_DIM + threadIdx.x; + auto c = blockIdx.y * TILE_DIM + threadIdx.y; + auto stride = BLOCK_ROWS; + // The tile coordinates + auto tr = threadIdx.x; + auto tc = threadIdx.y; + auto offset = r * n + c; + + // only execute across the upper diagonal + // a single thread block will store the upper diagonal block into + // a temp shared memory then set the block to zeros + if (c_block >= r_block) { +#pragma unroll + for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS, offset += stride) { + if (r < n && (c + i) < n) { + if (r <= (c + i)) { + tile[tr][tc + i] = out[offset]; + // clear the upper diagonal entry + out[offset] = 0; + } else { + tile[tr][tc + i] = 0; + } + } + } + + // Make sure all the data is in shared memory + __syncthreads(); + + // Transpose the global coordinates, keep y the fast-moving index + r = blockIdx.y * TILE_DIM + threadIdx.x; + c = blockIdx.x * TILE_DIM + threadIdx.y; + offset = r * n + c; + +#pragma unroll + for (int i = 0; i < TILE_DIM; i += BLOCK_ROWS, offset += stride) { + if (r < n && (c + i) < n) { + if (r >= (c + i)) { out[offset] = tile[tc + i][tr]; } + } + } + } +} + +template +struct BatchedTransposeImplBody { + using VAL = legate_type_of; + + void operator()(VAL* out, int n) const + { + const dim3 blocks((n + TILE_DIM - 1) / TILE_DIM, (n + TILE_DIM - 1) / TILE_DIM, 1); + const dim3 threads(TILE_DIM, BLOCK_ROWS, 1); + + auto stream = get_cached_stream(); + + // CUDA Potrf produces the full matrix, we only want + // the lower diagonal + transpose_2d_lower<<>>(out, n); + + CHECK_CUDA_STREAM(stream); + } +}; + +/*static*/ void BatchedCholeskyTask::gpu_variant(TaskContext& context) +{ + batched_cholesky_task_context_dispatch(context); +} + +} // namespace cunumeric diff --git a/src/cunumeric/matrix/batched_cholesky.h b/src/cunumeric/matrix/batched_cholesky.h new file mode 100644 index 0000000000..fceba2a9f9 --- /dev/null +++ b/src/cunumeric/matrix/batched_cholesky.h @@ -0,0 +1,38 @@ +/* Copyright 2021-2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_c.h" + +namespace cunumeric { + +class BatchedCholeskyTask : public CuNumericTask { + public: + static const int TASK_ID = CUNUMERIC_BATCHED_CHOLESKY; + + public: + static void cpu_variant(legate::TaskContext& context); +#ifdef LEGATE_USE_OPENMP + static void omp_variant(legate::TaskContext& context); +#endif +#ifdef LEGATE_USE_CUDA + static void gpu_variant(legate::TaskContext& context); +#endif +}; + +} // namespace cunumeric diff --git a/src/cunumeric/matrix/batched_cholesky_omp.cc b/src/cunumeric/matrix/batched_cholesky_omp.cc new file mode 100644 index 0000000000..84b311ff25 --- /dev/null +++ b/src/cunumeric/matrix/batched_cholesky_omp.cc @@ -0,0 +1,83 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "cunumeric/cunumeric.h" +#include "cunumeric/matrix/batched_cholesky.h" +#include "cunumeric/matrix/batched_cholesky_template.inl" + +#include +#include +#include + +namespace cunumeric { + +using namespace legate; + +template <> +void CopyBlockImpl::operator()(void* dst, const void* src, size_t n) +{ + ::memcpy(dst, src, n); +} + +template +struct BatchedTransposeImplBody { + using VAL = legate_type_of; + + static constexpr int tile_size = 64; + + void operator()(VAL* out, int n) const + { + int nblocks = (n + tile_size - 1) / tile_size; + +#pragma omp parallel for + for (int rb = 0; rb < nblocks; ++rb) { + // only loop the upper diagonal + // transpose the elements that are there and + // zero out the elements after reading them + for (int cb = rb; cb < nblocks; ++cb) { + VAL tile[tile_size][tile_size]; + int r_start = rb * tile_size; + int r_stop = std::min(r_start + tile_size, n); + int c_start = cb * tile_size; + int c_stop = std::min(c_start + tile_size, n); + + for (int r = r_start, tr = 0; r < r_stop; ++r, ++tr) { + for (int c = c_start, tc = 0; c < c_stop; ++c, ++tc) { + if (r <= c) { + auto offset = r * n + c; + tile[tr][tc] = out[offset]; + out[offset] = 0; + } else { + tile[tr][tc] = 0; + } + } + } + + for (int r = c_start, tr = 0; r < c_stop; ++r, ++tr) { + for (int c = r_start, tc = 0; c < r_stop; ++c, ++tc) { out[r * n + c] = tile[tc][tr]; } + } + } + } + } +}; + +/*static*/ void BatchedCholeskyTask::omp_variant(TaskContext& context) +{ + openblas_set_num_threads(omp_get_max_threads()); + batched_cholesky_task_context_dispatch(context); +} + +} // namespace cunumeric diff --git a/src/cunumeric/matrix/batched_cholesky_template.inl b/src/cunumeric/matrix/batched_cholesky_template.inl new file mode 100644 index 0000000000..8d266e3f06 --- /dev/null +++ b/src/cunumeric/matrix/batched_cholesky_template.inl @@ -0,0 +1,147 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +// Useful for IDEs +#include +#include "cunumeric/cunumeric.h" +#include "cunumeric/matrix/batched_cholesky.h" +#include "cunumeric/matrix/potrf_template.inl" +#include "cunumeric/matrix/transpose_template.inl" +#include "cunumeric/pitches.h" + +namespace cunumeric { + +using namespace legate; + +template +struct BatchedCholeskyImplBody { + template + void operator()(T* array, int32_t m, int32_t n) + { + PotrfImplBody()(array, m, n); + } +}; + +template +struct CopyBlockImpl { + void operator()(void* dst, const void* src, size_t n); +}; + +template +struct BatchedTransposeImplBody { + using VAL = legate_type_of; + + void operator()(VAL* array, int32_t n); +}; + +template +struct _cholesky_supported { + static constexpr bool value = CODE == Type::Code::FLOAT64 || CODE == Type::Code::FLOAT32 || + CODE == Type::Code::COMPLEX64 || CODE == Type::Code::COMPLEX128; +}; + +template +struct BatchedCholeskyImpl { + template + void operator()(Array& input_array, Array& output_array) const + { + using VAL = legate_type_of; + + auto shape = input_array.shape(); + if (shape != output_array.shape()) { + throw legate::TaskException( + "Batched cholesky is not supported when input/output shapes differ"); + } + + Pitches pitches; + size_t volume = pitches.flatten(shape); + + if (volume == 0) return; + + auto ncols = shape.hi[DIM - 1] - shape.lo[DIM - 1] + 1; + + size_t in_strides[DIM]; + size_t out_strides[DIM]; + + auto input = input_array.read_accessor(shape).ptr(shape, in_strides); + if (in_strides[DIM - 2] != ncols || in_strides[DIM - 1] != 1) { + throw legate::TaskException( + "Bad input accessor in batched cholesky, last two dimensions must be non-transformed and " + "dense with stride == 1"); + } + + auto output = output_array.write_accessor(shape).ptr(shape, out_strides); + if (out_strides[DIM - 2] != ncols || out_strides[DIM - 1] != 1) { + throw legate::TaskException( + "Bad output accessor in batched cholesky, last two dimensions must be non-transformed and " + "dense with stride == 1"); + } + + if (shape.empty()) return; + + int num_blocks = 1; + for (int i = 0; i < (DIM - 2); ++i) { num_blocks *= (shape.hi[i] - shape.lo[i] + 1); } + + auto m = static_cast(shape.hi[DIM - 2] - shape.lo[DIM - 2] + 1); + auto n = static_cast(shape.hi[DIM - 1] - shape.lo[DIM - 1] + 1); + assert(m > 0 && n > 0); + + auto block_stride = m * n; + + for (int i = 0; i < num_blocks; ++i) { + if constexpr (_cholesky_supported::value) { + CopyBlockImpl()(output, input, sizeof(VAL) * block_stride); + PotrfImplBody()(output, m, n); + // Implicit assumption here about the cholesky code created. + // We assume the output has C layout, but each subblock + // will be generated in Fortran layout. Transpose the Fortran + // subblock into C layout. + // CHANGE: If this code is changed, please make sure all changes + // are consistent with those found in mapper.cc. + BatchedTransposeImplBody()(output, n); + input += block_stride; + output += block_stride; + } + } + } +}; + +template +static void batched_cholesky_task_context_dispatch(TaskContext& context) +{ + auto& batched_input = context.inputs()[0]; + auto& batched_output = context.outputs()[0]; + if (batched_input.code() != batched_output.code()) { + throw legate::TaskException( + "batched cholesky is not yet supported when input/output types differ"); + } + if (batched_input.dim() != batched_output.dim()) { + throw legate::TaskException("input/output have different dims in batched cholesky"); + } + if (batched_input.dim() <= 2) { + throw legate::TaskException( + "internal error: batched cholesky input does not have more than 2 dims"); + } + double_dispatch(batched_input.dim(), + batched_input.code(), + BatchedCholeskyImpl{}, + batched_input, + batched_output); +} + +} // namespace cunumeric diff --git a/src/cunumeric/matrix/potrf.cc b/src/cunumeric/matrix/potrf.cc index 02ae062461..46ed58b6a3 100644 --- a/src/cunumeric/matrix/potrf.cc +++ b/src/cunumeric/matrix/potrf.cc @@ -25,48 +25,48 @@ namespace cunumeric { using namespace legate; template <> -struct PotrfImplBody { - void operator()(float* array, int32_t m, int32_t n) - { - char uplo = 'L'; - int32_t info = 0; - LAPACK_spotrf(&uplo, &n, array, &m, &info); - if (info != 0) throw legate::TaskException("Matrix is not positive definite"); - } -}; +void PotrfImplBody::operator()(float* array, + int32_t m, + int32_t n) +{ + char uplo = 'L'; + int32_t info = 0; + LAPACK_spotrf(&uplo, &n, array, &m, &info); + if (info != 0) throw legate::TaskException("Matrix is not positive definite"); +} template <> -struct PotrfImplBody { - void operator()(double* array, int32_t m, int32_t n) - { - char uplo = 'L'; - int32_t info = 0; - LAPACK_dpotrf(&uplo, &n, array, &m, &info); - if (info != 0) throw legate::TaskException("Matrix is not positive definite"); - } -}; +void PotrfImplBody::operator()(double* array, + int32_t m, + int32_t n) +{ + char uplo = 'L'; + int32_t info = 0; + LAPACK_dpotrf(&uplo, &n, array, &m, &info); + if (info != 0) throw legate::TaskException("Matrix is not positive definite"); +} template <> -struct PotrfImplBody { - void operator()(complex* array, int32_t m, int32_t n) - { - char uplo = 'L'; - int32_t info = 0; - LAPACK_cpotrf(&uplo, &n, reinterpret_cast<__complex__ float*>(array), &m, &info); - if (info != 0) throw legate::TaskException("Matrix is not positive definite"); - } -}; +void PotrfImplBody::operator()(complex* array, + int32_t m, + int32_t n) +{ + char uplo = 'L'; + int32_t info = 0; + LAPACK_cpotrf(&uplo, &n, reinterpret_cast<__complex__ float*>(array), &m, &info); + if (info != 0) throw legate::TaskException("Matrix is not positive definite"); +} template <> -struct PotrfImplBody { - void operator()(complex* array, int32_t m, int32_t n) - { - char uplo = 'L'; - int32_t info = 0; - LAPACK_zpotrf(&uplo, &n, reinterpret_cast<__complex__ double*>(array), &m, &info); - if (info != 0) throw legate::TaskException("Matrix is not positive definite"); - } -}; +void PotrfImplBody::operator()(complex* array, + int32_t m, + int32_t n) +{ + char uplo = 'L'; + int32_t info = 0; + LAPACK_zpotrf(&uplo, &n, reinterpret_cast<__complex__ double*>(array), &m, &info); + if (info != 0) throw legate::TaskException("Matrix is not positive definite"); +} /*static*/ void PotrfTask::cpu_variant(TaskContext& context) { diff --git a/src/cunumeric/matrix/potrf.cu b/src/cunumeric/matrix/potrf.cu index 68616525f5..8f13a5168c 100644 --- a/src/cunumeric/matrix/potrf.cu +++ b/src/cunumeric/matrix/potrf.cu @@ -49,41 +49,38 @@ static inline void potrf_template( } template <> -struct PotrfImplBody { - void operator()(float* array, int32_t m, int32_t n) - { - potrf_template(cusolverDnSpotrf_bufferSize, cusolverDnSpotrf, array, m, n); - } -}; +void PotrfImplBody::operator()(float* array, + int32_t m, + int32_t n) +{ + potrf_template(cusolverDnSpotrf_bufferSize, cusolverDnSpotrf, array, m, n); +} template <> -struct PotrfImplBody { - void operator()(double* array, int32_t m, int32_t n) - { - potrf_template(cusolverDnDpotrf_bufferSize, cusolverDnDpotrf, array, m, n); - } -}; +void PotrfImplBody::operator()(double* array, + int32_t m, + int32_t n) +{ + potrf_template(cusolverDnDpotrf_bufferSize, cusolverDnDpotrf, array, m, n); +} template <> -struct PotrfImplBody { - void operator()(complex* array, int32_t m, int32_t n) - { - potrf_template( - cusolverDnCpotrf_bufferSize, cusolverDnCpotrf, reinterpret_cast(array), m, n); - } -}; +void PotrfImplBody::operator()(complex* array, + int32_t m, + int32_t n) +{ + potrf_template( + cusolverDnCpotrf_bufferSize, cusolverDnCpotrf, reinterpret_cast(array), m, n); +} template <> -struct PotrfImplBody { - void operator()(complex* array, int32_t m, int32_t n) - { - potrf_template(cusolverDnZpotrf_bufferSize, - cusolverDnZpotrf, - reinterpret_cast(array), - m, - n); - } -}; +void PotrfImplBody::operator()(complex* array, + int32_t m, + int32_t n) +{ + potrf_template( + cusolverDnZpotrf_bufferSize, cusolverDnZpotrf, reinterpret_cast(array), m, n); +} /*static*/ void PotrfTask::gpu_variant(TaskContext& context) { diff --git a/src/cunumeric/matrix/potrf_omp.cc b/src/cunumeric/matrix/potrf_omp.cc index d26143a6f2..36b32968d1 100644 --- a/src/cunumeric/matrix/potrf_omp.cc +++ b/src/cunumeric/matrix/potrf_omp.cc @@ -26,48 +26,48 @@ namespace cunumeric { using namespace legate; template <> -struct PotrfImplBody { - void operator()(float* array, int32_t m, int32_t n) - { - char uplo = 'L'; - int32_t info = 0; - LAPACK_spotrf(&uplo, &n, array, &m, &info); - if (info != 0) throw legate::TaskException("Matrix is not positive definite"); - } -}; +void PotrfImplBody::operator()(float* array, + int32_t m, + int32_t n) +{ + char uplo = 'L'; + int32_t info = 0; + LAPACK_spotrf(&uplo, &n, array, &m, &info); + if (info != 0) throw legate::TaskException("Matrix is not positive definite"); +} template <> -struct PotrfImplBody { - void operator()(double* array, int32_t m, int32_t n) - { - char uplo = 'L'; - int32_t info = 0; - LAPACK_dpotrf(&uplo, &n, array, &m, &info); - if (info != 0) throw legate::TaskException("Matrix is not positive definite"); - } -}; +void PotrfImplBody::operator()(double* array, + int32_t m, + int32_t n) +{ + char uplo = 'L'; + int32_t info = 0; + LAPACK_dpotrf(&uplo, &n, array, &m, &info); + if (info != 0) throw legate::TaskException("Matrix is not positive definite"); +} template <> -struct PotrfImplBody { - void operator()(complex* array, int32_t m, int32_t n) - { - char uplo = 'L'; - int32_t info = 0; - LAPACK_cpotrf(&uplo, &n, reinterpret_cast<__complex__ float*>(array), &m, &info); - if (info != 0) throw legate::TaskException("Matrix is not positive definite"); - } -}; +void PotrfImplBody::operator()(complex* array, + int32_t m, + int32_t n) +{ + char uplo = 'L'; + int32_t info = 0; + LAPACK_cpotrf(&uplo, &n, reinterpret_cast<__complex__ float*>(array), &m, &info); + if (info != 0) throw legate::TaskException("Matrix is not positive definite"); +} template <> -struct PotrfImplBody { - void operator()(complex* array, int32_t m, int32_t n) - { - char uplo = 'L'; - int32_t info = 0; - LAPACK_zpotrf(&uplo, &n, reinterpret_cast<__complex__ double*>(array), &m, &info); - if (info != 0) throw legate::TaskException("Matrix is not positive definite"); - } -}; +void PotrfImplBody::operator()(complex* array, + int32_t m, + int32_t n) +{ + char uplo = 'L'; + int32_t info = 0; + LAPACK_zpotrf(&uplo, &n, reinterpret_cast<__complex__ double*>(array), &m, &info); + if (info != 0) throw legate::TaskException("Matrix is not positive definite"); +} /*static*/ void PotrfTask::omp_variant(TaskContext& context) { diff --git a/src/cunumeric/matrix/potrf_template.inl b/src/cunumeric/matrix/potrf_template.inl index 55c782ad05..7e42521897 100644 --- a/src/cunumeric/matrix/potrf_template.inl +++ b/src/cunumeric/matrix/potrf_template.inl @@ -26,6 +26,26 @@ using namespace legate; template struct PotrfImplBody; +template +struct PotrfImplBody { + void operator()(float* array, int32_t m, int32_t n); +}; + +template +struct PotrfImplBody { + void operator()(double* array, int32_t m, int32_t n); +}; + +template +struct PotrfImplBody { + void operator()(complex* array, int32_t m, int32_t n); +}; + +template +struct PotrfImplBody { + void operator()(complex* array, int32_t m, int32_t n); +}; + template struct support_potrf : std::false_type {}; template <> diff --git a/tests/integration/test_cholesky.py b/tests/integration/test_cholesky.py index 91edbaa7ea..5b2659a160 100644 --- a/tests/integration/test_cholesky.py +++ b/tests/integration/test_cholesky.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2023 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -56,10 +56,14 @@ def test_diagonal(): assert allclose(b**2.0, a) +def _get_real_symm_posdef(n): + a = num.random.rand(n, n) + return a + a.T + num.eye(n) * n + + @pytest.mark.parametrize("n", SIZES) def test_real(n): - a = num.random.rand(n, n) - b = a + a.T + num.eye(n) * n + b = _get_real_symm_posdef(n) c = num.linalg.cholesky(b) c_np = np.linalg.cholesky(b.__array__()) assert allclose(c, c_np) @@ -80,6 +84,45 @@ def test_complex(n): assert allclose(c, c_np) +@pytest.mark.parametrize("n", SIZES) +def test_batched_3d(n): + batch = 4 + a = _get_real_symm_posdef(n) + np_a = a.__array__() + a_batched = num.einsum("i,jk->ijk", np.arange(batch) + 1, a) + test_c = num.linalg.cholesky(a_batched) + for i in range(batch): + correct = np.linalg.cholesky(np_a * (i + 1)) + test = test_c[i, :] + assert allclose(correct, test) + + +def test_batched_empty(): + batch = 4 + a = _get_real_symm_posdef(8) + a_batched = num.einsum("i,jk->ijk", np.arange(batch) + 1, a) + a_sliced = a_batched[0:0, :, :] + empty = num.linalg.cholesky(a_sliced) + assert empty.shape == a_sliced.shape + + +@pytest.mark.parametrize("n", SIZES) +def test_batched_4d(n): + batch = 2 + a = _get_real_symm_posdef(n) + np_a = a.__array__() + + outer = np.einsum("i,j->ij", np.arange(batch) + 1, np.arange(batch) + 1) + + a_batched = num.einsum("ij,kl->ijkl", outer, a) + test_c = num.linalg.cholesky(a_batched) + for i in range(batch): + for j in range(batch): + correct = np.linalg.cholesky(np_a * (i + 1) * (j + 1)) + test = test_c[i, j, :] + assert allclose(correct, test) + + if __name__ == "__main__": import sys diff --git a/tests/unit/cunumeric/test_config.py b/tests/unit/cunumeric/test_config.py index 5e85ccfde5..6f8f43df5c 100644 --- a/tests/unit/cunumeric/test_config.py +++ b/tests/unit/cunumeric/test_config.py @@ -117,6 +117,7 @@ def test_CuNumericOpCode() -> None: "ADVANCED_INDEXING", "ARANGE", "ARGWHERE", + "BATCHED_CHOLESKY", "BINARY_OP", "BINARY_RED", "BINCOUNT", From 98a73bd44a91419377de8774a47733ce5c630847 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 8 Nov 2023 14:12:57 -0800 Subject: [PATCH 17/18] Remove negative test that's now passing (#1073) --- tests/integration/test_cholesky.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/tests/integration/test_cholesky.py b/tests/integration/test_cholesky.py index 5b2659a160..c4b52754b0 100644 --- a/tests/integration/test_cholesky.py +++ b/tests/integration/test_cholesky.py @@ -35,12 +35,6 @@ def test_array_negative_1dim(): num.linalg.cholesky(arr) -def test_array_negative_3dim(): - arr = num.random.randint(0, 9, size=(3, 3, 3)) - with pytest.raises(NotImplementedError): - num.linalg.cholesky(arr) - - def test_array_negative(): arr = num.random.randint(0, 9, size=(3, 2, 3)) expected_exc = ValueError From 6ffdc4c5220e85379446c6066549c0b7c109aa40 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 8 Nov 2023 15:11:19 -0800 Subject: [PATCH 18/18] adding v23.11 to documentation (#1074) --- docs/cunumeric/source/versions.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/cunumeric/source/versions.rst b/docs/cunumeric/source/versions.rst index 4a21cc9ef9..1760786d8e 100644 --- a/docs/cunumeric/source/versions.rst +++ b/docs/cunumeric/source/versions.rst @@ -11,3 +11,4 @@ Versions 23.03 23.07 23.09 + 23.11