From 65ad815e62433a5ac15446392c10ae54a61093bf Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 22 Jan 2025 17:56:05 -0500 Subject: [PATCH 01/19] Run spark-rapids-jni CI --- .github/workflows/pr.yaml | 4 +++- .github/workflows/spark-rapids-jni.yaml | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/spark-rapids-jni.yaml diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index e955b8f1f80..45c090a0f8a 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -126,7 +126,7 @@ jobs: uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-25.02 with: enable_check_generated_files: false - ignored_pr_jobs: "telemetry-summarize" + ignored_pr_jobs: "telemetry-summarize spark-rapids-jni" conda-cpp-build: needs: checks secrets: inherit @@ -339,6 +339,8 @@ jobs: node_type: cpu4 build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" + spark-rapids-jni: + uses: ./.github/workflows/spark-rapids-jni.yaml telemetry-summarize: # This job must use a self-hosted runner to record telemetry traces. diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml new file mode 100644 index 00000000000..1ec518915cd --- /dev/null +++ b/.github/workflows/spark-rapids-jni.yaml @@ -0,0 +1,20 @@ +name: spark-rapids-jni + +on: + workflow_dispatch: + +jobs: + spark-rapids-jni-build: + runs-on: linux-amd64-cpu8 + container: + image: ubuntu:24.04 # TODO + steps: + - uses: actions/checkout@v4 + with: + repository: NVIDIA/spark-rapids-jni + submodules: recursive + - uses: actions/checkout@v4 + with: + path: thirdparty/cudf + - run: | + git status From e6a044ce0b76fcc1ee256523d19affd6ba53cd80 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 22 Jan 2025 18:05:23 -0500 Subject: [PATCH 02/19] Oops --- .github/workflows/spark-rapids-jni.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index 1ec518915cd..b9b71174b78 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -1,7 +1,7 @@ name: spark-rapids-jni on: - workflow_dispatch: + workflow_call: jobs: spark-rapids-jni-build: From 0ec1b10874898c2369c3ce6c53702136dcf55a23 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 22 Jan 2025 18:06:59 -0500 Subject: [PATCH 03/19] Container --- .github/workflows/spark-rapids-jni.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index b9b71174b78..d96c3cb74e6 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -5,9 +5,7 @@ on: jobs: spark-rapids-jni-build: - runs-on: linux-amd64-cpu8 - container: - image: ubuntu:24.04 # TODO + runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: From 1e5c19fc4248d113a3f903eae00715058eee8b30 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Wed, 22 Jan 2025 18:08:27 -0500 Subject: [PATCH 04/19] Diff --- .github/workflows/spark-rapids-jni.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index d96c3cb74e6..1d3517294ff 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -16,3 +16,4 @@ jobs: path: thirdparty/cudf - run: | git status + git diff thirdparty/cudf From bf0c0da58c153b1f770b8ef0d9d21d593598a917 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 09:30:20 -0500 Subject: [PATCH 05/19] Use temporary image from Spark team --- .github/workflows/spark-rapids-jni.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index 1d3517294ff..004ed27fbf1 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -5,7 +5,9 @@ on: jobs: spark-rapids-jni-build: - runs-on: ubuntu-latest + runs-on: linux-amd64-cpu8 + container: + image: peixinl198/spark-rapids-jni:rockylinux8-cuda12.2.0 steps: - uses: actions/checkout@v4 with: From e122f1c06c806474af05aced89424720e93f8014 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 09:42:08 -0500 Subject: [PATCH 06/19] Debug --- .github/workflows/spark-rapids-jni.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index 004ed27fbf1..d4be1df2360 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -17,5 +17,6 @@ jobs: with: path: thirdparty/cudf - run: | + id git status git diff thirdparty/cudf From 36c0d399d410936da6e7dd6e53a7780f012f7a70 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 09:46:22 -0500 Subject: [PATCH 07/19] More debugging --- .github/workflows/spark-rapids-jni.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index d4be1df2360..187881211d4 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -18,5 +18,7 @@ jobs: path: thirdparty/cudf - run: | id + pwd + ls -l git status git diff thirdparty/cudf From 500f43fd9a354dd225787b93e0f38a0cb123fe1b Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 09:50:38 -0500 Subject: [PATCH 08/19] More listing --- .github/workflows/spark-rapids-jni.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index 187881211d4..b205d901146 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -19,6 +19,6 @@ jobs: - run: | id pwd - ls -l + ls -laR git status git diff thirdparty/cudf From d0c755d4443f448fad72ce16016bb3b7c198f4f5 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 10:01:08 -0500 Subject: [PATCH 09/19] Safe --- .github/workflows/spark-rapids-jni.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index b205d901146..d13a736885a 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -17,8 +17,6 @@ jobs: with: path: thirdparty/cudf - run: | - id - pwd - ls -laR + git config --global safe.directory $(pwd) git status git diff thirdparty/cudf From cd2356629359fa567b5ab755cc717a0b54efb359 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 10:13:06 -0500 Subject: [PATCH 10/19] Build --- .github/workflows/spark-rapids-jni.yaml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index d13a736885a..0d4598e548c 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -17,6 +17,4 @@ jobs: with: path: thirdparty/cudf - run: | - git config --global safe.directory $(pwd) - git status - git diff thirdparty/cudf + GPU_ARCHS=89-real LIBCUDF_DEPENDENCY_MODE=latest scl enable gcc-toolset-11 build/buildcpp.sh From 040600ff96a5d469076d78d2cb29c6b5bc4b7cfa Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 10:25:08 -0500 Subject: [PATCH 11/19] mkdir --- .github/workflows/spark-rapids-jni.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index 0d4598e548c..93f7e4a8904 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -17,4 +17,5 @@ jobs: with: path: thirdparty/cudf - run: | + mkdir target GPU_ARCHS=89-real LIBCUDF_DEPENDENCY_MODE=latest scl enable gcc-toolset-11 build/buildcpp.sh From 8390b003ea29e7348d1b3d8806e4b817a61fa102 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 10:29:10 -0500 Subject: [PATCH 12/19] Set name --- .github/workflows/spark-rapids-jni.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index 93f7e4a8904..a5f1f9b1dc5 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -16,6 +16,7 @@ jobs: - uses: actions/checkout@v4 with: path: thirdparty/cudf - - run: | + - name: "Build spark-rapids-jni" + run: | mkdir target GPU_ARCHS=89-real LIBCUDF_DEPENDENCY_MODE=latest scl enable gcc-toolset-11 build/buildcpp.sh From 662593169f6cc15271c66f663e1bd3f0a1543329 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 11:43:43 -0500 Subject: [PATCH 13/19] Deliberately introduce build error for spark-rapids-jni --- cpp/src/io/utilities/base64_utilities.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/utilities/base64_utilities.cpp b/cpp/src/io/utilities/base64_utilities.cpp index 00fc54f9883..a20b5198d19 100644 --- a/cpp/src/io/utilities/base64_utilities.cpp +++ b/cpp/src/io/utilities/base64_utilities.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2024-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,6 +58,8 @@ // altered: applying clang-format for libcudf on this file. +#error "This error is for spark-rapids-jni" + #include "base64_utilities.hpp" #include From 95d63fd97fa59953c6ba70cc9c894723f08cdf7b Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 23 Jan 2025 11:59:45 -0500 Subject: [PATCH 14/19] Revert "Deliberately introduce build error for spark-rapids-jni" This reverts commit 662593169f6cc15271c66f663e1bd3f0a1543329. --- cpp/src/io/utilities/base64_utilities.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/io/utilities/base64_utilities.cpp b/cpp/src/io/utilities/base64_utilities.cpp index a20b5198d19..00fc54f9883 100644 --- a/cpp/src/io/utilities/base64_utilities.cpp +++ b/cpp/src/io/utilities/base64_utilities.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024-2025, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -58,8 +58,6 @@ // altered: applying clang-format for libcudf on this file. -#error "This error is for spark-rapids-jni" - #include "base64_utilities.hpp" #include From 01ca64e218b3f4055308f67f95c6e2cce24b8259 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 30 Jan 2025 12:48:15 -0500 Subject: [PATCH 15/19] Use official spark-rapids-jni CI image --- .github/workflows/spark-rapids-jni.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index a5f1f9b1dc5..8e0d5ccb37f 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -7,7 +7,7 @@ jobs: spark-rapids-jni-build: runs-on: linux-amd64-cpu8 container: - image: peixinl198/spark-rapids-jni:rockylinux8-cuda12.2.0 + image: rapidsai/ci-spark-rapids-jni:rockylinux8-cuda12.2.0 steps: - uses: actions/checkout@v4 with: From d263f97f388d2c9928f2c4f0bb1077653d8dce92 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 30 Jan 2025 12:50:36 -0500 Subject: [PATCH 16/19] Review feedback --- .github/workflows/pr.yaml | 2 ++ .github/workflows/spark-rapids-jni.yaml | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 45c090a0f8a..b9da7a054cc 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -340,7 +340,9 @@ jobs: build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" spark-rapids-jni: + needs: changed-files uses: ./.github/workflows/spark-rapids-jni.yaml + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java telemetry-summarize: # This job must use a self-hosted runner to record telemetry traces. diff --git a/.github/workflows/spark-rapids-jni.yaml b/.github/workflows/spark-rapids-jni.yaml index 8e0d5ccb37f..097e97df8c5 100644 --- a/.github/workflows/spark-rapids-jni.yaml +++ b/.github/workflows/spark-rapids-jni.yaml @@ -19,4 +19,4 @@ jobs: - name: "Build spark-rapids-jni" run: | mkdir target - GPU_ARCHS=89-real LIBCUDF_DEPENDENCY_MODE=latest scl enable gcc-toolset-11 build/buildcpp.sh + GPU_ARCHS=90 LIBCUDF_DEPENDENCY_MODE=latest USE_GDS=on scl enable gcc-toolset-11 build/buildcpp.sh From bd31192107025a498efc213092f6631a941ae1a5 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Thu, 30 Jan 2025 14:02:59 -0500 Subject: [PATCH 17/19] Add test_spark condition --- .github/workflows/pr.yaml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index b9da7a054cc..9562c786e80 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -120,6 +120,16 @@ jobs: - '!img/**' - '!java/**' - '!notebooks/**' + test_spark: + - '**' + - '!.devcontainer/**' + - '!CONTRIBUTING.md' + - '!README.md' + - '!ci/cudf_pandas_scripts/**' + - '!docs/**' + - '!img/**' + - '!notebooks/**' + - '!python/**' checks: secrets: inherit needs: telemetry-setup @@ -342,7 +352,7 @@ jobs: spark-rapids-jni: needs: changed-files uses: ./.github/workflows/spark-rapids-jni.yaml - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_spark telemetry-summarize: # This job must use a self-hosted runner to record telemetry traces. From 4c63457a0bc5aaa5383d2dbdfc1e6742223d720b Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Fri, 31 Jan 2025 09:57:56 -0500 Subject: [PATCH 18/19] Re-run CI From f8d97b58f9abef1113a8a2930b054a6eaeb0e6b7 Mon Sep 17 00:00:00 2001 From: Kyle Edwards Date: Tue, 4 Feb 2025 09:41:36 -0500 Subject: [PATCH 19/19] Use test_java --- .github/workflows/pr.yaml | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index fbd59cbcaa0..cf94c91b172 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -120,16 +120,6 @@ jobs: - '!img/**' - '!java/**' - '!notebooks/**' - test_spark: - - '**' - - '!.devcontainer/**' - - '!CONTRIBUTING.md' - - '!README.md' - - '!ci/cudf_pandas_scripts/**' - - '!docs/**' - - '!img/**' - - '!notebooks/**' - - '!python/**' checks: secrets: inherit needs: telemetry-setup @@ -355,7 +345,7 @@ jobs: spark-rapids-jni: needs: changed-files uses: ./.github/workflows/spark-rapids-jni.yaml - if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_spark + if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java telemetry-summarize: # This job must use a self-hosted runner to record telemetry traces.