From daa7b417cfe265b3862537db5cc12fe62d1d7bb3 Mon Sep 17 00:00:00 2001 From: PHILO-HE Date: Fri, 29 Dec 2023 14:27:15 +0800 Subject: [PATCH] Initial commit --- .github/workflows/velox_be.yml | 239 ++++++++++-------- README.md | 2 +- dev/buildbundle-veloxbe.sh | 1 + tools/gluten-te/centos/build.sh | 2 +- tools/gluten-te/centos/buildenv.sh | 4 +- tools/gluten-te/centos/defaults.conf | 32 ++- .../centos/gha/gha-checkout/checkout.sh | 2 +- .../centos/gha/gha-checkout/clean.sh | 2 +- .../gluten-te/centos/gha/gha-checkout/exec.sh | 2 +- tools/gluten-te/github_action/build.sh | 90 +++++++ tools/gluten-te/github_action/buildenv.sh | 89 +++++++ tools/gluten-te/github_action/cbash-build.sh | 59 +++++ .../gluten-te/github_action/centos-7-deps.sh | 49 ++++ .../gluten-te/github_action/centos-8-deps.sh | 37 +++ tools/gluten-te/github_action/checkout.sh | 41 +++ tools/gluten-te/github_action/clean.sh | 25 ++ tools/gluten-te/github_action/defaults.conf | 100 ++++++++ .../gluten-te/github_action/dockerfile-build | 83 ++++++ .../github_action/dockerfile-buildenv-centos | 75 ++++++ .../github_action/dockerfile-buildenv-ubuntu | 119 +++++++++ tools/gluten-te/github_action/exec.sh | 28 ++ tools/gluten-te/github_action/scripts/cmd.sh | 18 ++ tools/gluten-te/github_action/scripts/env.sh | 24 ++ tools/gluten-te/github_action/scripts/init.sh | 25 ++ .../github_action/scripts/set-login-env.sh | 18 ++ tools/gluten-te/ubuntu/defaults.conf | 34 ++- .../ubuntu/gha/gha-checkout/checkout.sh | 2 +- .../ubuntu/gha/gha-checkout/clean.sh | 2 +- .../gluten-te/ubuntu/gha/gha-checkout/exec.sh | 2 +- 29 files changed, 1069 insertions(+), 137 deletions(-) create mode 100755 tools/gluten-te/github_action/build.sh create mode 100755 tools/gluten-te/github_action/buildenv.sh create mode 100755 tools/gluten-te/github_action/cbash-build.sh create mode 100755 tools/gluten-te/github_action/centos-7-deps.sh create mode 100755 tools/gluten-te/github_action/centos-8-deps.sh create mode 100755 tools/gluten-te/github_action/checkout.sh create mode 100755 tools/gluten-te/github_action/clean.sh create mode 100755 tools/gluten-te/github_action/defaults.conf create mode 100755 tools/gluten-te/github_action/dockerfile-build create mode 100755 tools/gluten-te/github_action/dockerfile-buildenv-centos create mode 100755 tools/gluten-te/github_action/dockerfile-buildenv-ubuntu create mode 100755 tools/gluten-te/github_action/exec.sh create mode 100755 tools/gluten-te/github_action/scripts/cmd.sh create mode 100755 tools/gluten-te/github_action/scripts/env.sh create mode 100755 tools/gluten-te/github_action/scripts/init.sh create mode 100755 tools/gluten-te/github_action/scripts/set-login-env.sh diff --git a/.github/workflows/velox_be.yml b/.github/workflows/velox_be.yml index 603d2e6d2a3d..4fadc6838f7b 100644 --- a/.github/workflows/velox_be.yml +++ b/.github/workflows/velox_be.yml @@ -12,6 +12,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# test cache name: Velox backend @@ -42,81 +43,86 @@ concurrency: group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }} cancel-in-progress: true -jobs: +env: + PATH_TO_CHECKOUT: ./tools/gluten-te +jobs: ubuntu2004-test-spark32: runs-on: velox-self-hosted + env: + OS_IMAGE: ubuntu + OS_VERSION: 20.04 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name ubuntu2004-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/ubuntu:20.04 \ - 'cd /opt/gluten && sleep 14400' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec ubuntu2004-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --build_tests=ON' - name: Build Gluten CPP library run: | - docker exec ubuntu2004-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --build_tests=ON --build_examples=ON --build_benchmarks=ON' - name: Run CPP unit test run: | - docker exec ubuntu2004-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/cpp/build && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build && \ ctest -V' - name: Run HBM CPP unit test run: | - docker exec ubuntu2004-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/cpp/build && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build && \ cmake -DBUILD_TESTS=ON -DENABLE_HBM=ON .. && \ cmake --build . --target hbw_allocator_test -- -j && \ ctest -V -R TestHbw' - name: Build and run unit test for Spark 3.2.2 (other tests) run: | - docker exec ubuntu2004-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -DargLine="-Dspark.test.home=/opt/spark322" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ mvn test -Pspark-3.2 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' # Cpp micro benchmarks will use generated files from unit test in backends-velox module. - name: Run micro benchmarks run: | - docker exec ubuntu2004-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/cpp/build/velox/benchmarks && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/cpp/build/velox/benchmarks && \ ./generic_benchmark --with-shuffle --threads 1 --iterations 1' - name: Exit docker container if: ${{ always() }} run: | - docker stop ubuntu2004-test-$GITHUB_RUN_ID || true + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh + ubuntu2004-test-spark32-slow: runs-on: velox-self-hosted + env: + OS_IMAGE: ubuntu + OS_VERSION: 20.04 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name ubuntu2004-test-slow-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/ubuntu:20.04 \ - 'cd /opt/gluten && sleep 14400' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec ubuntu2004-test-slow-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON' - name: Build Gluten CPP library run: | - docker exec ubuntu2004-test-slow-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox' - name: Build and run unit test for Spark 3.2.2(slow tests) run: | - docker exec ubuntu2004-test-slow-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.2 -Pspark-ut -Pbackends-velox -Prss -Piceberg -Pdelta -DargLine="-Dspark.test.home=/opt/spark322" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 run: | - docker exec ubuntu2004-test-slow-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -125,34 +131,36 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - docker stop ubuntu2004-test-slow-$GITHUB_RUN_ID || true + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh + ubuntu2004-test-spark33-slow: runs-on: velox-self-hosted + env: + OS_IMAGE: ubuntu + OS_VERSION: 20.04 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name ubuntu2004-test-spark33-slow-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/ubuntu:20.04 \ - 'cd /opt/gluten && sleep 14400' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec ubuntu2004-test-spark33-slow-$GITHUB_RUN_ID bash -l -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON' - name: Build Gluten CPP library run: | - docker exec ubuntu2004-test-spark33-slow-$GITHUB_RUN_ID bash -l -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox' - name: Build and Run unit test for Spark 3.3.1(slow tests) run: | - docker exec ubuntu2004-test-spark33-slow-$GITHUB_RUN_ID bash -l -c 'cd /opt/gluten && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.3 run: | - docker exec ubuntu2004-test-spark33-slow-$GITHUB_RUN_ID bash -l -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.3 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -161,64 +169,67 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - docker stop ubuntu2004-test-spark33-slow-$GITHUB_RUN_ID || true + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh + ubuntu2004-test-spark33: runs-on: velox-self-hosted + env: + OS_IMAGE: ubuntu + OS_VERSION: 20.04 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name ubuntu2004-test-spark33-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/ubuntu:20.04 \ - 'cd /opt/gluten && sleep 14400' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec ubuntu2004-test-spark33-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON' - name: Build Gluten CPP library run: | - docker exec ubuntu2004-test-spark33-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --build_examples=ON' - name: Build and Run unit test for Spark 3.3.1(other tests) run: | - docker exec ubuntu2004-test-spark33-$GITHUB_RUN_ID bash -c 'cd /opt/gluten && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ mvn test -Pspark-3.3 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' - name: Exit docker container if: ${{ always() }} run: | - docker stop ubuntu2004-test-spark33-$GITHUB_RUN_ID || true + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh ubuntu2004-test-spark34-slow: runs-on: velox-self-hosted + env: + OS_IMAGE: ubuntu + OS_VERSION: 20.04 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name ubuntu2004-test-spark34-slow-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/ubuntu:20.04 \ - 'cd /opt/gluten && sleep 14400' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec ubuntu2004-test-spark34-slow-$GITHUB_RUN_ID bash -l -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON' - name: Build Gluten CPP library run: | - docker exec ubuntu2004-test-spark34-slow-$GITHUB_RUN_ID bash -l -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox ' - name: Build and Run unit test for Spark 3.4.1(slow tests) run: | - docker exec ubuntu2004-test-spark34-slow-$GITHUB_RUN_ID bash -l -c 'cd /opt/gluten && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest' - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.4 run: | - docker exec ubuntu2004-test-spark34-slow-$GITHUB_RUN_ID bash -l -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.4 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -227,64 +238,68 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - docker stop ubuntu2004-test-spark34-slow-$GITHUB_RUN_ID || true + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh + ubuntu2004-test-spark34: runs-on: velox-self-hosted + env: + OS_IMAGE: ubuntu + OS_VERSION: 20.04 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name ubuntu2004-test-spark34-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/ubuntu:20.04 \ - 'cd /opt/gluten && sleep 14400' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec ubuntu2004-test-spark34-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON' - name: Build Gluten CPP library run: | - docker exec ubuntu2004-test-spark34-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --build_examples=ON' - name: Build and Run unit test for Spark 3.4.1(other tests) run: | - docker exec ubuntu2004-test-spark34-$GITHUB_RUN_ID bash -c 'cd /opt/gluten && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten && \ mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -Pspark-ut -DargLine="-Dspark.test.home=/opt/spark331" -DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,io.glutenproject.tags.UDFTest,io.glutenproject.tags.SkipTestTags && \ mvn test -Pspark-3.4 -Pbackends-velox -DtagsToExclude=None -DtagsToInclude=io.glutenproject.tags.UDFTest' - name: Exit docker container if: ${{ always() }} run: | - docker stop ubuntu2004-test-spark34-$GITHUB_RUN_ID || true + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh + ubuntu2204-test: runs-on: velox-self-hosted + env: + OS_IMAGE: ubuntu + OS_VERSION: 22.04 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name ubuntu2204-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/ubuntu:22.04 \ - 'cd /opt/gluten && sleep 14400' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build Gluten CPP library run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build for Spark 3.2.2 run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -292,7 +307,7 @@ jobs: --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=40g -s=10.0 --threads=32 --iterations=1' - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.2 with Celeborn run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh \ 'mv /opt/apache-celeborn-0.3.0-incubating-bin/conf/celeborn-env.sh.template /opt/apache-celeborn-0.3.0-incubating-bin/conf/celeborn-env.sh && \ echo -e "CELEBORN_MASTER_MEMORY=4g\nCELEBORN_WORKER_MEMORY=4g\nCELEBORN_WORKER_OFFHEAP_MEMORY=8g" > /opt/apache-celeborn-0.3.0-incubating-bin/conf/celeborn-env.sh && \ echo -e "celeborn.worker.commitFiles.threads 128\nceleborn.worker.sortPartition.threads 64" > /opt/apache-celeborn-0.3.0-incubating-bin/conf/celeborn-defaults.conf \ @@ -306,12 +321,12 @@ jobs: && bash /opt/apache-celeborn-0.3.0-incubating-bin/sbin/stop-master.sh' - name: Build for Spark 3.3.1 run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.3 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.3 run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.3 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -319,12 +334,12 @@ jobs: --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=30g -s=10.0 --threads=32 --iterations=1' - name: Build for Spark 3.4.1 run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.4 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF10.0 Parquet local spark3.4 run: | - docker exec ubuntu2204-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.4 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -333,19 +348,21 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - docker stop ubuntu2204-test-$GITHUB_RUN_ID || true + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh + centos8-test: runs-on: velox-self-hosted + env: + OS_IMAGE: centos + OS_VERSION: 8 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name centos8-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/centos:8 \ - bash -c 'cd /opt/gluten && sleep 14400' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec centos8-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' source /env.sh && \ sudo yum -y install patch && \ cd /opt/gluten/ep/build-velox/src && \ @@ -353,18 +370,18 @@ jobs: ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build Gluten CPP library run: | - docker exec centos8-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' source /env.sh && \ cd /opt/gluten/cpp && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build for Spark 3.2.2 run: | - docker exec centos8-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 run: | - docker exec centos8-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -372,7 +389,7 @@ jobs: --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 random kill tasks run: | - docker exec centos8-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 --skip-data-gen --random-kill-tasks \ @@ -381,41 +398,39 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - docker stop centos8-test-$GITHUB_RUN_ID || true - + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh + centos7-test: runs-on: velox-self-hosted + env: + OS_IMAGE: centos + OS_VERSION: 7 steps: - uses: actions/checkout@v4 - name: Setup docker container run: | - EXTRA_DOCKER_OPTIONS="--name centos7-test-$GITHUB_RUN_ID -e NUM_THREADS=30 --detach" \ - NON_INTERACTIVE=ON \ - MOUNT_MAVEN_CACHE=OFF \ - OS_IMAGE=centos:7 \ - OS_VERSION=7 \ - tools/gluten-te/centos/cbash-mount.sh sleep 14400 + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/checkout.sh - name: Build Gluten velox third party run: | - docker exec centos7-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' yum -y install epel-release centos-release-scl patch sudo && \ cd /opt/gluten/ep/build-velox/src && \ ./get_velox.sh --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF && \ ./build_velox.sh --velox_home=/opt/velox --enable_ep_cache=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF --enable_hdfs=ON' - name: Build Gluten CPP library run: | - docker exec centos7-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten/cpp && \ source /opt/rh/devtoolset-9/enable && \ ./compile.sh --build_velox_backend=ON --velox_home=/opt/velox --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON --enable_abfs=OFF' - name: Build for Spark 3.2.2 run: | - docker exec centos7-test-$GITHUB_RUN_ID bash -c ' + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh ' cd /opt/gluten && \ mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests' - name: TPC-H SF1.0 && TPC-DS SF30.0 Parquet local spark3.2 run: | - docker exec centos7-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -423,7 +438,7 @@ jobs: --local --preset=velox --benchmark-type=ds --error-on-memleak --off-heap-size=50g -s=30.0 --threads=32 --iterations=1' - name: TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation off run: | - docker exec centos7-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ @@ -435,7 +450,7 @@ jobs: -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q67/Q95 low memory, memory isolation on run: | - docker exec centos7-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ mvn clean install -Pspark-3.2 \ && GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q67,q95 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ @@ -447,7 +462,7 @@ jobs: -d=OVER_ACQUIRE:0.5,spark.gluten.memory.overAcquiredMemoryRatio=0.5' || true - name: TPC-DS SF30.0 Parquet local spark3.2 Q23A/Q23B low memory run: | - docker exec centos7-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q23a,q23b -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ --skip-data-gen -m=OffHeapExecutionMemory \ @@ -459,7 +474,7 @@ jobs: -d=PARTIAL_MODE:FLUSHED,spark.gluten.sql.columnar.backend.velox.maxPartialAggregationMemoryRatio=0.05,spark.gluten.sql.columnar.backend.velox.maxExtendedPartialAggregationMemoryRatio=0.1,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinPct=100,spark.gluten.sql.columnar.backend.velox.abandonPartialAggregationMinRows=0' - name: (To be fixed) TPC-DS SF30.0 Parquet local spark3.2 Q97 low memory # The case currently causes crash with "free: invalid size". run: | - docker exec centos7-test-$GITHUB_RUN_ID bash -c 'cd /opt/gluten/tools/gluten-it && \ + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/exec.sh 'cd /opt/gluten/tools/gluten-it && \ GLUTEN_IT_JVM_ARGS=-Xmx50G sbin/gluten-it.sh parameterized \ --local --preset=velox --benchmark-type=ds --error-on-memleak --queries=q97 -s=30.0 --threads=12 --shuffle-partitions=72 --iterations=1 \ --skip-data-gen -m=OffHeapExecutionMemory \ @@ -470,37 +485,38 @@ jobs: - name: Exit docker container if: ${{ always() }} run: | - docker stop centos7-test-$GITHUB_RUN_ID || true + $PATH_TO_CHECKOUT/$OS_IMAGE/gha/gha-checkout/clean.sh + static-build-test: runs-on: velox-self-hosted steps: - uses: actions/checkout@v4 - - name: Setup docker container - run: | - docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.2.4:5000/gluten-dev/centos:7 \ - bash -c 'cd /opt/gluten && sleep 14400' - - name: Build Gluten CPP library - run: | - docker exec -i static-build-test-$GITHUB_RUN_ID bash -c ' - source /env.sh && \ - sudo yum -y install patch && \ - cd /opt/gluten && \ - sudo -E ./dev/vcpkg/setup-build-depends.sh && \ - source ./dev/vcpkg/env.sh && \ - ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=OFF' - - name: Build for Spark 3.2.2 - run: | - docker exec static-build-test-$GITHUB_RUN_ID bash -c ' - cd /opt/gluten && \ - mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests && \ - cd /opt/gluten/tools/gluten-it && \ - mvn clean install -Pspark-3.2' + # - name: Setup docker container + # run: | + # docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ + # -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID -e NUM_THREADS=30 -detach 10.0.0.25:5000/gluten-te/gluten-build-centos:7 \ + # bash -c 'cd /opt/gluten && sleep 14400' + # - name: Build Gluten CPP library + # run: | + # docker exec -i static-build-test-$GITHUB_RUN_ID bash -c ' + # source /env.sh && \ + # sudo yum -y install patch && \ + # cd /opt/gluten && \ + # sudo -E ./dev/vcpkg/setup-build-depends.sh && \ + # source ./dev/vcpkg/env.sh && \ + # ./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_tests=ON --build_benchmarks=ON --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=OFF' + # - name: Build for Spark 3.2.2 + # run: | + # docker exec static-build-test-$GITHUB_RUN_ID bash -c ' + # cd /opt/gluten && \ + # mvn clean install -Pspark-3.2 -Pbackends-velox -Prss -Piceberg -Pdelta -DskipTests && \ + # cd /opt/gluten/tools/gluten-it && \ + # mvn clean install -Pspark-3.2' - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (centos 8) run: | docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.2.4:5000/gluten-dev/centos:8 \ + -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.0.25:5000/gluten-te/gluten-build-centos:8 \ bash -c 'cd /opt/gluten/tools/gluten-it \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -509,7 +525,7 @@ jobs: - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 20.04) run: | docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.2.4:5000/gluten-dev/ubuntu:20.04 \ + -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.0.25:5000/gluten-te/gluten-build-ubuntu:20.04 \ 'cd /opt/gluten/tools/gluten-it \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -518,7 +534,7 @@ jobs: - name: TPC-H SF1.0 && TPC-DS SF1.0 Parquet local spark3.2 (ubuntu 22.04) run: | docker run --rm --init --privileged --ulimit nofile=65536:65536 --ulimit core=-1 --security-opt seccomp=unconfined \ - -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.2.4:5000/gluten-dev/ubuntu:22.04 \ + -v $PWD:/opt/gluten --name static-build-test-$GITHUB_RUN_ID-tpc -e NUM_THREADS=30 10.0.0.25:5000/gluten-te/gluten-build-ubuntu:22.04 \ 'cd /opt/gluten/tools/gluten-it \ && GLUTEN_IT_JVM_ARGS=-Xmx5G sbin/gluten-it.sh queries-compare \ --local --preset=velox --benchmark-type=h --error-on-memleak --disable-aqe --off-heap-size=10g -s=1.0 --threads=16 --iterations=1 \ @@ -528,3 +544,4 @@ jobs: if: ${{ always() }} run: | docker stop static-build-test-$GITHUB_RUN_ID || true + diff --git a/README.md b/README.md index 8bdb2b2564c4..12178ab0442d 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ *This project is still under active development now, and doesn't have a stable release. Welcome to evaluate it.* # 1 Introduction - +test ## 1.1 Problem Statement Apache Spark is a stable, mature project that has been developed for many years. It is one of the best frameworks to scale out for processing petabyte-scale datasets. However, the Spark community has had to address performance challenges that require various optimizations over time. As a key optimization in Spark 2.0, Whole Stage Code Generation is introduced to replace Volcano Model, which achieves 2x speedup. Henceforth, most optimizations are at query plan level. Single operator's performance almost stops growing. diff --git a/dev/buildbundle-veloxbe.sh b/dev/buildbundle-veloxbe.sh index 3bfd6994a556..b6c2ec88c3a5 100755 --- a/dev/buildbundle-veloxbe.sh +++ b/dev/buildbundle-veloxbe.sh @@ -1,4 +1,5 @@ #!/bin/bash +# test2 BASEDIR=$(dirname $0) source "$BASEDIR/builddeps-veloxbe.sh" diff --git a/tools/gluten-te/centos/build.sh b/tools/gluten-te/centos/build.sh index c4d1cc5eb464..e70da87cf70e 100755 --- a/tools/gluten-te/centos/build.sh +++ b/tools/gluten-te/centos/build.sh @@ -54,7 +54,7 @@ BUILD_BACKEND_TYPE=${BUILD_BACKEND_TYPE:-$DEFAULT_BUILD_BACKEND_TYPE} # Build will result in this image DOCKER_TARGET_IMAGE_BUILD=${DOCKER_TARGET_IMAGE_BUILD:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILD} -DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE" +DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE:$OS_VERSION" ## diff --git a/tools/gluten-te/centos/buildenv.sh b/tools/gluten-te/centos/buildenv.sh index bd51192cf1f1..19986cc7262f 100755 --- a/tools/gluten-te/centos/buildenv.sh +++ b/tools/gluten-te/centos/buildenv.sh @@ -42,14 +42,12 @@ TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE} # Set operating system OS_IMAGE=${OS_IMAGE:-$DEFAULT_OS_IMAGE} - -# Set os version OS_VERSION=${OS_VERSION:-$DEFAULT_OS_VERSION} # Build will result in this image DOCKER_TARGET_IMAGE_BUILDENV=${DOCKER_TARGET_IMAGE_BUILDENV:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV} -DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILDENV-$OS_IMAGE" +DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILDENV-$OS_IMAGE:$OS_VERSION" if [ "$USE_ALI_MAVEN_MIRROR" == "ON" ] then diff --git a/tools/gluten-te/centos/defaults.conf b/tools/gluten-te/centos/defaults.conf index 1cdc10f5ebe6..e71744695886 100755 --- a/tools/gluten-te/centos/defaults.conf +++ b/tools/gluten-te/centos/defaults.conf @@ -11,7 +11,8 @@ DEFAULT_NON_INTERACTIVE=OFF DEFAULT_PRESERVE_CONTAINER=OFF # The codes will be used in build -DEFAULT_GLUTEN_REPO=https://github.com/oap-project/gluten.git +# for 10.0.0.25 test at PHILO-HE/gluten +DEFAULT_GLUTEN_REPO=https://github.com/PHILO-HE/gluten.git DEFAULT_GLUTEN_BRANCH=main # Create debug build @@ -21,17 +22,28 @@ DEFAULT_DEBUG_BUILD=OFF DEFAULT_BUILD_BACKEND_TYPE=velox # HTTP proxy -DEFAULT_HTTP_PROXY_HOST= -DEFAULT_HTTP_PROXY_PORT= +# If http proxy is http://child-prc.intel.com:913, write it as +# DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com +# DEFAULT_HTTP_PROXY_PORT=913 +# do not write http:// +# DEFAULT_HTTP_PROXY_HOST= +# DEFAULT_HTTP_PROXY_PORT= +# for 10.0.0.25 test +DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com +DEFAULT_HTTP_PROXY_PORT=913 # If on, use maven mirror settings for PRC's network environment -DEFAULT_USE_ALI_MAVEN_MIRROR=OFF +# DEFAULT_USE_ALI_MAVEN_MIRROR=OFF +# for 10.0.0.25 test +DEFAULT_USE_ALI_MAVEN_MIRROR=ON # Base operator system image used in build scripts. -DEFAULT_OS_IMAGE=centos:8 +# DEFAULT_OS_IMAGE=ubuntu or centos +DEFAULT_OS_IMAGE= # Version ID of os image -DEFAULT_OS_VERSION=8 +# DEFAULT_OS_VERSION=20.04 or 8 +DEFAULT_OS_VERSION= # Set timezone name DEFAULT_TIMEZONE=Asia/Shanghai @@ -43,10 +55,14 @@ DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV=gluten-te/gluten-buildenv DEFAULT_DOCKER_TARGET_IMAGE_BUILD=gluten-te/gluten-build # Docker registry used to pull pre-built images to speed-up builds -DEFAULT_DOCKER_CACHE_REGISTRY= +# DEFAULT_DOCKER_CACHE_REGISTRY= +# for 10.0.0.25 test +DEFAULT_DOCKER_CACHE_REGISTRY=10.0.0.25:5000 # Docker registry to push pre-built images -DEFAULT_DOCKER_PUSH_REGISTRY= +# DEFAULT_DOCKER_PUSH_REGISTRY= +# for 10.0.0.25 test +DEFAULT_DOCKER_PUSH_REGISTRY=10.0.0.25:5000 ## For cbash.sh diff --git a/tools/gluten-te/centos/gha/gha-checkout/checkout.sh b/tools/gluten-te/centos/gha/gha-checkout/checkout.sh index 15c28f5849d0..415fcf1f2a16 100755 --- a/tools/gluten-te/centos/gha/gha-checkout/checkout.sh +++ b/tools/gluten-te/centos/gha/gha-checkout/checkout.sh @@ -26,7 +26,7 @@ then exit 1 fi -export EXTRA_DOCKER_OPTIONS="$EXTRA_DOCKER_OPTIONS --name gha-checkout-$GITHUB_RUN_ID --detach -v $BASEDIR/scripts:/opt/scripts" +export EXTRA_DOCKER_OPTIONS="$EXTRA_DOCKER_OPTIONS --name gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID --detach -v $BASEDIR/scripts:/opt/scripts" export NON_INTERACTIVE=ON $BASEDIR/../../cbash-build.sh 'sleep 14400' diff --git a/tools/gluten-te/centos/gha/gha-checkout/clean.sh b/tools/gluten-te/centos/gha/gha-checkout/clean.sh index f67f6bf0058b..29a97351f517 100755 --- a/tools/gluten-te/centos/gha/gha-checkout/clean.sh +++ b/tools/gluten-te/centos/gha/gha-checkout/clean.sh @@ -22,4 +22,4 @@ then exit 1 fi -docker stop gha-checkout-$GITHUB_RUN_ID || true +docker stop gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID || true diff --git a/tools/gluten-te/centos/gha/gha-checkout/exec.sh b/tools/gluten-te/centos/gha/gha-checkout/exec.sh index 1497ab6f2edf..3d0b9fe5f79c 100755 --- a/tools/gluten-te/centos/gha/gha-checkout/exec.sh +++ b/tools/gluten-te/centos/gha/gha-checkout/exec.sh @@ -25,4 +25,4 @@ then exit 1 fi -docker exec gha-checkout-$GITHUB_RUN_ID bash -c "cd /opt/gluten && $BASH_ARGS" +docker exec gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID bash -c "cd /opt/gluten && $BASH_ARGS" diff --git a/tools/gluten-te/github_action/build.sh b/tools/gluten-te/github_action/build.sh new file mode 100755 index 000000000000..e70da87cf70e --- /dev/null +++ b/tools/gluten-te/github_action/build.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(dirname $0) + +source "$BASEDIR/buildenv.sh" + +## Debug build flags + +# Create debug build +DEBUG_BUILD=${DEBUG_BUILD:-$DEFAULT_DEBUG_BUILD} + +if [ -n $JDK_DEBUG_BUILD ] +then + echo "Do not set JDK_DEBUG_BUILD manually!" +fi + +if [ -n $GLUTEN_DEBUG_BUILD ] +then + echo "Do not set GLUTEN_DEBUG_BUILD manually!" +fi + +if [ "$DEBUG_BUILD" == "ON" ] +then + JDK_DEBUG_BUILD=OFF + GLUTEN_DEBUG_BUILD=ON +else + JDK_DEBUG_BUILD=OFF + GLUTEN_DEBUG_BUILD=OFF +fi + +# The branches used to prepare dependencies +CACHE_GLUTEN_REPO=${CACHE_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO} +CACHE_GLUTEN_BRANCH=${CACHE_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH} + +# Backend type +BUILD_BACKEND_TYPE=${BUILD_BACKEND_TYPE:-$DEFAULT_BUILD_BACKEND_TYPE} + +# Build will result in this image +DOCKER_TARGET_IMAGE_BUILD=${DOCKER_TARGET_IMAGE_BUILD:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILD} + +DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE:$OS_VERSION" + +## + +BUILD_DOCKER_BUILD_ARGS= + +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg BUILDKIT_INLINE_CACHE=1" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE=$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg JDK_DEBUG_BUILD=$JDK_DEBUG_BUILD" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg GLUTEN_DEBUG_BUILD=$GLUTEN_DEBUG_BUILD" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_REPO=$CACHE_GLUTEN_REPO" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_BRANCH=$CACHE_GLUTEN_BRANCH" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg BUILD_BACKEND_TYPE=$BUILD_BACKEND_TYPE" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -f $BASEDIR/dockerfile-build" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --target gluten-build" +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -t $DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" + +if [ -n "$DOCKER_CACHE_REGISTRY" ] +then + BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --cache-from $DOCKER_CACHE_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" +fi + +BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS $BASEDIR" + +docker build $BUILD_DOCKER_BUILD_ARGS + +if [ -n "$DOCKER_PUSH_REGISTRY" ] +then + docker tag "$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" + docker push "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" +fi + +# EOF diff --git a/tools/gluten-te/github_action/buildenv.sh b/tools/gluten-te/github_action/buildenv.sh new file mode 100755 index 000000000000..19986cc7262f --- /dev/null +++ b/tools/gluten-te/github_action/buildenv.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(dirname $0) + +source "$BASEDIR/defaults.conf" + +# Enable buildkit +export DOCKER_BUILDKIT=1 +export BUILDKIT_PROGRESS=plain + +# Docker registry used to pull pre-built images to speed-up builds +DOCKER_CACHE_REGISTRY=${DOCKER_CACHE_REGISTRY:-$DEFAULT_DOCKER_CACHE_REGISTRY} + +# Docker registry to push pre-built images +DOCKER_PUSH_REGISTRY=${DOCKER_PUSH_REGISTRY:-$DEFAULT_DOCKER_PUSH_REGISTRY} + +# HTTP proxy +HTTP_PROXY_HOST=${HTTP_PROXY_HOST:-$DEFAULT_HTTP_PROXY_HOST} +HTTP_PROXY_PORT=${HTTP_PROXY_PORT:-$DEFAULT_HTTP_PROXY_PORT} + +# If on, use maven mirror settings for PRC's network environment +USE_ALI_MAVEN_MIRROR=${USE_ALI_MAVEN_MIRROR:-$DEFAULT_USE_ALI_MAVEN_MIRROR} + +# Set timezone name +TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE} + +# Set operating system +OS_IMAGE=${OS_IMAGE:-$DEFAULT_OS_IMAGE} +OS_VERSION=${OS_VERSION:-$DEFAULT_OS_VERSION} + +# Build will result in this image +DOCKER_TARGET_IMAGE_BUILDENV=${DOCKER_TARGET_IMAGE_BUILDENV:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV} + +DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILDENV-$OS_IMAGE:$OS_VERSION" + +if [ "$USE_ALI_MAVEN_MIRROR" == "ON" ] +then + MAVEN_MIRROR_URL='https://maven.aliyun.com/repository/public' +else + MAVEN_MIRROR_URL= +fi + +## + +BUILDENV_DOCKER_BUILD_ARGS= + +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg BUILDKIT_INLINE_CACHE=1" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg TIMEZONE=$TIMEZONE" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg OS_IMAGE=$OS_IMAGE --build-arg OS_VERSION=$OS_VERSION" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg MAVEN_MIRROR_URL=$MAVEN_MIRROR_URL" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg HTTP_PROXY_HOST=$HTTP_PROXY_HOST" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg HTTP_PROXY_PORT=$HTTP_PROXY_PORT" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -f $BASEDIR/dockerfile-buildenv" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --target gluten-buildenv" +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -t $DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" + +if [ -n "$DOCKER_CACHE_REGISTRY" ] +then + BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --cache-from $DOCKER_CACHE_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" +fi + +BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS $BASEDIR" + +docker build $BUILDENV_DOCKER_BUILD_ARGS + +if [ -n "$DOCKER_PUSH_REGISTRY" ] +then + docker tag "$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" + docker push "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" +fi + +# EOF diff --git a/tools/gluten-te/github_action/cbash-build.sh b/tools/gluten-te/github_action/cbash-build.sh new file mode 100755 index 000000000000..b47f6635c839 --- /dev/null +++ b/tools/gluten-te/github_action/cbash-build.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(dirname $0) + +source "$BASEDIR/build.sh" + +# Non-interactive during docker run +NON_INTERACTIVE=${NON_INTERACTIVE:-$DEFAULT_NON_INTERACTIVE} + +# Do not remove stopped docker container +PRESERVE_CONTAINER=${PRESERVE_CONTAINER:-$DEFAULT_PRESERVE_CONTAINER} + +# Docker options +EXTRA_DOCKER_OPTIONS=${EXTRA_DOCKER_OPTIONS:-$DEFAULT_EXTRA_DOCKER_OPTIONS} + +# Whether to mount Maven cache +MOUNT_MAVEN_CACHE=${MOUNT_MAVEN_CACHE:-$DEFAULT_MOUNT_MAVEN_CACHE} + +CBASH_DOCKER_RUN_ARGS= +if [ "$NON_INTERACTIVE" != "ON" ] +then + CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -it" +fi +if [ "$PRESERVE_CONTAINER" != "ON" ] +then + CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --rm" +fi +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --init" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --privileged" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --ulimit nofile=65536:65536" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --ulimit core=-1" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --security-opt seccomp=unconfined" +if [ "$MOUNT_MAVEN_CACHE" == "ON" ] +then + CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -v $HOME/.m2/repository:/root/.m2/repository" +fi +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -v $HOME/.ccache:/root/.ccache" +CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS $EXTRA_DOCKER_OPTIONS" + +CBASH_BASH_ARGS="$*" +BASH_ARGS="$CBASH_BASH_ARGS" + +docker run $CBASH_DOCKER_RUN_ARGS $DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE bash -c "cd /opt/gluten && $BASH_ARGS" diff --git a/tools/gluten-te/github_action/centos-7-deps.sh b/tools/gluten-te/github_action/centos-7-deps.sh new file mode 100755 index 000000000000..4971efc94511 --- /dev/null +++ b/tools/gluten-te/github_action/centos-7-deps.sh @@ -0,0 +1,49 @@ +#! /bin/sh +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +#CENTOS_MIRROR_URL=https://mirrors.edge.kernel.org/centos +#CENTOS_MIRROR_GPGKEY="${CENTOS_MIRROR_URL}/RPM-GPG-KEY-CentOS-7" +# +#cp /etc/yum.repos.d/CentOS-Base.repo /tmp/CentOS-Base.repo +#sed -i "/^mirrorlist/d;s/^\#baseurl=/baseurl=/" /tmp/CentOS-Base.repo +#sed -i "s|^gpgkey=.*$|gpgkey=${CENTOS_MIRROR_GPGKEY}|" /tmp/CentOS-Base.repo +#sed -i "s|http://mirror.centos.org/centos|${CENTOS_MIRROR_URL}|" /tmp/CentOS-Base.repo +#rm /etc/yum.repos.d/* +#mv /tmp/CentOS-Base.repo /etc/yum.repos.d/ + +sed -e 's|^mirrorlist=|#mirrorlist=|g' \ + -e 's|^#baseurl=http://mirror.centos.org/centos|baseurl=https://mirrors.ustc.edu.cn/centos|g' \ + -i.bak \ + /etc/yum.repos.d/CentOS-Base.repo + +# Disable fastestmirror +sed -i "s/enabled=1/enabled=0/" /etc/yum/pluginconf.d/fastestmirror.conf + +yum -y install epel-release centos-release-scl +yum -y install \ + git \ + dnf \ + cmake3 \ + ccache \ + devtoolset-9 \ + java-1.8.0-openjdk \ + java-1.8.0-openjdk-devel \ + ninja-build \ + wget + +ln -s /usr/bin/cmake3 /usr/local/bin/cmake diff --git a/tools/gluten-te/github_action/centos-8-deps.sh b/tools/gluten-te/github_action/centos-8-deps.sh new file mode 100755 index 000000000000..f76db6df5e7d --- /dev/null +++ b/tools/gluten-te/github_action/centos-8-deps.sh @@ -0,0 +1,37 @@ +#! /bin/sh +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +sed -i -e "s|mirrorlist=|#mirrorlist=|g" /etc/yum.repos.d/CentOS-* +# The connection to vault.centos.org in CI is unstable +# sed -i -e "s|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g" /etc/yum.repos.d/CentOS-* +minorver=8.5.2111 +sed -i -e \ + "s|^#baseurl=http://mirror.centos.org/\$contentdir/\$releasever|baseurl=https://mirrors.aliyun.com/centos-vault/$minorver|g" \ + /etc/yum.repos.d/CentOS-*.repo + +dnf install -y epel-release sudo +yum -y update && yum clean all && yum install -y dnf-plugins-core +yum config-manager --set-enabled powertools +dnf --enablerepo=powertools install -y ninja-build +dnf --enablerepo=powertools install -y libdwarf-devel +dnf install -y --setopt=install_weak_deps=False ccache gcc-toolset-9 git wget which libevent-devel \ + openssl-devel re2-devel libzstd-devel lz4-devel double-conversion-devel \ + curl-devel cmake libicu-devel + +yum -y update && yum clean all && yum install -y java-1.8.0-openjdk-devel patch +dnf -y install gcc-toolset-9-gcc gcc-toolset-9-gcc-c++ diff --git a/tools/gluten-te/github_action/checkout.sh b/tools/gluten-te/github_action/checkout.sh new file mode 100755 index 000000000000..62788ec3fa86 --- /dev/null +++ b/tools/gluten-te/github_action/checkout.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +BASEDIR=$(readlink -f $(dirname $0)) + +source "$BASEDIR/defaults.conf" + +if [ -z "$GITHUB_RUN_ID" ] +then + echo "Unable to parse GITHUB_RUN_ID." + exit 1 +fi + +export EXTRA_DOCKER_OPTIONS="$EXTRA_DOCKER_OPTIONS --name gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID --detach -v $BASEDIR/scripts:/opt/scripts" +export NON_INTERACTIVE=ON + +$BASEDIR/cbash-build.sh 'sleep 14400' + +# The target branches +TARGET_GLUTEN_REPO=${TARGET_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO} +FALLBACK_GLUTEN_BRANCH=${FALLBACK_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH} +FALLBACK_GLUTEN_COMMIT="$(git ls-remote $TARGET_GLUTEN_REPO $FALLBACK_GLUTEN_BRANCH | awk '{print $1;}')" + +TARGET_GLUTEN_COMMIT="${GITHUB_SHA:-$FALLBACK_GLUTEN_COMMIT}" + +$BASEDIR/exec.sh "/opt/scripts/init.sh $TARGET_GLUTEN_REPO $TARGET_GLUTEN_COMMIT" diff --git a/tools/gluten-te/github_action/clean.sh b/tools/gluten-te/github_action/clean.sh new file mode 100755 index 000000000000..29a97351f517 --- /dev/null +++ b/tools/gluten-te/github_action/clean.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +if [ -z "$GITHUB_RUN_ID" ] +then + echo "Unable to parse GITHUB_RUN_ID." + exit 1 +fi + +docker stop gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID || true diff --git a/tools/gluten-te/github_action/defaults.conf b/tools/gluten-te/github_action/defaults.conf new file mode 100755 index 000000000000..2be72954b251 --- /dev/null +++ b/tools/gluten-te/github_action/defaults.conf @@ -0,0 +1,100 @@ +#!/bin/bash + +set -ex + +## For basic scripts + +# Non-interactive during docker run +DEFAULT_NON_INTERACTIVE=OFF + +# Do not remove stopped docker container +DEFAULT_PRESERVE_CONTAINER=OFF + +# The codes will be used in build +# for 10.0.0.25 test at PHILO-HE/gluten +DEFAULT_GLUTEN_REPO=https://github.com/PHILO-HE/gluten.git +DEFAULT_GLUTEN_BRANCH=main + +# Create debug build +DEFAULT_DEBUG_BUILD=OFF + +# Backend type (velox) +DEFAULT_BUILD_BACKEND_TYPE=velox + +# HTTP proxy +# If http proxy is http://child-prc.intel.com:913, write it as +# DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com +# DEFAULT_HTTP_PROXY_PORT=913 +# do not write http:// +# DEFAULT_HTTP_PROXY_HOST= +# DEFAULT_HTTP_PROXY_PORT= +# for 10.0.0.25 test +DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com +DEFAULT_HTTP_PROXY_PORT=913 + +# If on, use maven mirror settings for PRC's network environment +# DEFAULT_USE_ALI_MAVEN_MIRROR=OFF +# for 10.0.0.25 test +DEFAULT_USE_ALI_MAVEN_MIRROR=ON + +# Base operator system image used in build scripts. +# DEFAULT_OS_IMAGE=ubuntu or centos +DEFAULT_OS_IMAGE= + +# Version ID of os image +# DEFAULT_OS_VERSION=20.04 or 8 +DEFAULT_OS_VERSION= + +# Set timezone name +DEFAULT_TIMEZONE=Asia/Shanghai + +# Build will result in this image +DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV=gluten-te/gluten-buildenv + +# Build will result in this image +DEFAULT_DOCKER_TARGET_IMAGE_BUILD=gluten-te/gluten-build + +# Docker registry used to pull pre-built images to speed-up builds +# DEFAULT_DOCKER_CACHE_REGISTRY= +# for 10.0.0.25 test +DEFAULT_DOCKER_CACHE_REGISTRY=10.0.0.25:5000 + +# Docker registry to push pre-built images +# DEFAULT_DOCKER_PUSH_REGISTRY= +# for 10.0.0.25 test +DEFAULT_DOCKER_PUSH_REGISTRY=10.0.0.25:5000 + +## For tpc.sh + +# Java options +DEFAULT_EXTRA_JAVA_OPTIONS="-Xmx2G" + +# Run GDB. +DEFAULT_RUN_GDB=OFF + +# Run GDB server. +DEFAULT_RUN_GDB_SERVER=OFF + +# GDB server bind port +DEFAULT_GDB_SERVER_PORT=2345 + +# Run JVM jdwp server. +DEFAULT_RUN_JDWP_SERVER=OFF + +# JVM jdwp bind port +DEFAULT_JDWP_SERVER_PORT=5005 + +# Docker options +DEFAULT_EXTRA_DOCKER_OPTIONS="--network bridge" + +# Build will result in this image +DEFAULT_DOCKER_TARGET_IMAGE_TPC=gluten-te/gluten-tpc +DEFAULT_DOCKER_TARGET_IMAGE_TPC_GDB=gluten-te/gluten-tpc-gdb +DEFAULT_DOCKER_TARGET_IMAGE_TPC_GDB_SERVER=gluten-te/gluten-tpc-gdb-server + +## For cbash.sh + +# Whether to mount Maven cache +DEFAULT_MOUNT_MAVEN_CACHE=OFF + +# EOF diff --git a/tools/gluten-te/github_action/dockerfile-build b/tools/gluten-te/github_action/dockerfile-build new file mode 100755 index 000000000000..4bcfbb90f656 --- /dev/null +++ b/tools/gluten-te/github_action/dockerfile-build @@ -0,0 +1,83 @@ +ARG DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE + +FROM $DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE AS gluten-build +MAINTAINER Hongze Zhang + +# Whether debug build is enabled +ARG JDK_DEBUG_BUILD +ARG GLUTEN_DEBUG_BUILD +RUN echo "JDK debug build is [$JDK_DEBUG_BUILD]!" +RUN echo "Gluten debug build is [$GLUTEN_DEBUG_BUILD]!" + +# If JDK debug is on +RUN if [ "$JDK_DEBUG_BUILD" == "ON" ]; \ + then \ + apt-get update; \ + DEBIAN_FRONTEND=noninteractive apt-get uninstall -y openjdk-8-jdk; \ + DEBIAN_FRONTEND=noninteractive apt-get uninstall -y maven; \ + mkdir -p /opt/jdk/ \ + && mkdir -p /opt/maven/ \ + && cd /opt/jdk/ \ + && wget https://builds.shipilev.net/openjdk-jdk8/openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && tar -xvf openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && rm -f openjdk-jdk8-linux-x86_64-server-fastdebug-gcc8-glibc2.28.tar.xz \ + && cd /opt/maven/ \ + && wget https://dlcdn.apache.org/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz \ + && tar -xvf apache-maven-3.6.3-bin.tar.gz \ + && rm -f apache-maven-3.6.3-bin.tar.gz \ + && cp -rs /opt/jdk/j2sdk-image/bin/* /usr/local/bin/ \ + && cp -rs /opt/maven/apache-maven-3.6.3/bin/mvn /usr/local/bin/ \ + && echo "JAVA_HOME=/opt/jdk/j2sdk-image" > ~/.mavenrc; \ + fi + +# These branches are mainly for pre-downloading dependencies to speed-up builds. +# Thus it should not be required to change these values every time when the build branch +# is changed. +ARG CACHE_GLUTEN_REPO +ARG CACHE_GLUTEN_BRANCH + +RUN test -n "$CACHE_GLUTEN_REPO" || (echo "CACHE_GLUTEN_REPO not set" && false) +RUN test -n "$CACHE_GLUTEN_BRANCH" || (echo "CACHE_GLUTEN_BRANCH not set" && false) + +RUN cd /opt/ \ + && git clone $CACHE_GLUTEN_REPO -b $CACHE_GLUTEN_BRANCH gluten + +# Set ccache size +RUN ccache -M 128G +RUN ccache -s + +# Default Gluten Maven build options (empty as of now) +ENV GLUTEN_MAVEN_OPTIONS= +#RUN set-login-env "GLUTEN_MAVEN_OPTIONS=" + +ARG BUILD_BACKEND_TYPE + +RUN test -n "$BUILD_BACKEND_TYPE" || (echo "BUILD_BACKEND_TYPE not set" && false) + +RUN if [ "$BUILD_BACKEND_TYPE" == "velox" ]; \ + then \ + if [ "$GLUTEN_DEBUG_BUILD" == "ON" ]; then GLUTEN_BUILD_TYPE="Debug"; else GLUTEN_BUILD_TYPE="Release"; fi; \ + DEPS_INSTALL_SCRIPT="source /env.sh && bash /opt/gluten/dev/builddeps-veloxbe.sh \ + --enable_hdfs=ON --enable_s3=ON --enable_gcs=ON \ + --build_type=$GLUTEN_BUILD_TYPE --enable_ep_cache=ON"; \ + EXTRA_MAVEN_OPTIONS="-Pspark-3.2 \ + -Pbackends-velox \ + -Prss \ + -DskipTests \ + -Dscalastyle.skip=true \ + -Dcheckstyle.skip=true"; \ + else \ + echo "Unrecognizable backend type: $BUILD_BACKEND_TYPE"; \ + exit 1; \ + fi \ + && echo $EXTRA_MAVEN_OPTIONS > ~/.gluten-mvn-options \ + && echo $DEPS_INSTALL_SCRIPT > ~/.gluten-deps-install-script + +# Prebuild Gluten +RUN EXTRA_MAVEN_OPTIONS=$(cat ~/.gluten-mvn-options) \ + DEPS_INSTALL_SCRIPT=$(cat ~/.gluten-deps-install-script) \ + && cd /opt/gluten \ + && bash -c "$DEPS_INSTALL_SCRIPT" \ + && bash -c "mvn clean install $GLUTEN_MAVEN_OPTIONS $EXTRA_MAVEN_OPTIONS" + +# EOF diff --git a/tools/gluten-te/github_action/dockerfile-buildenv-centos b/tools/gluten-te/github_action/dockerfile-buildenv-centos new file mode 100755 index 000000000000..b0428dbf13ce --- /dev/null +++ b/tools/gluten-te/github_action/dockerfile-buildenv-centos @@ -0,0 +1,75 @@ +ARG OS_IMAGE + +FROM $OS_IMAGE AS gluten-buildenv +MAINTAINER Hongze Zhang + +SHELL ["/bin/bash", "-c"] + +# REQUIRED PROXYS: WGET, GIT, MAVEN (also Maven mirror) +ARG HTTP_PROXY_HOST +ARG HTTP_PROXY_PORT + +ENV http_proxy=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV https_proxy=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} + +ARG MAVEN_MIRROR_URL + +RUN if [ -n "$MAVEN_MIRROR_URL" ]; \ + then \ + MAVEN_SETTINGS_TEMPLATE="mavenmirrorcentralMavenMirror{{MAVEN_MIRROR_URL}}httpproxy{{MAVEN_PROXY_ENABLE}}http{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}httpsproxy{{MAVEN_PROXY_ENABLE}}https{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}"; \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s@{{MAVEN_MIRROR_URL}}@$MAVEN_MIRROR_URL@g"); \ + else \ + MAVEN_SETTINGS_TEMPLATE="httpproxy{{MAVEN_PROXY_ENABLE}}http{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}httpsproxy{{MAVEN_PROXY_ENABLE}}https{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}"; \ + fi \ + && if [ -n "$HTTP_PROXY_HOST" ]; \ + then \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_ENABLE}}/true/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_HOST}}/$HTTP_PROXY_HOST/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_PORT}}/$HTTP_PROXY_PORT/g"); \ + else \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_ENABLE}}/false/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_HOST}}/localhost/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_PORT}}/8888/g"); \ + fi \ + && MAVEN_SETTINGS=$MAVEN_SETTINGS_TEMPLATE \ + && mkdir -p /root/.m2/ \ + && echo $MAVEN_SETTINGS > /root/.m2/settings.xml + +# Display environment information +RUN ulimit -a +RUN env +RUN cat /root/.m2/settings.xml + +# Install deps from repo +ARG OS_VERSION +COPY centos-$OS_VERSION-deps.sh /tmp/deps.sh +RUN /tmp/deps.sh \ + && rm /tmp/deps.sh \ + && yum clean all \ + && dnf clean all \ + && rm -rf /var/cache/yum + +# Install deps from url +ENV PATH="$PATH:/usr/lib/jvm/java-1.8.0-openjdk/bin" +RUN wget https://downloads.apache.org/maven/maven-3/3.8.8/binaries/apache-maven-3.8.8-bin.tar.gz && \ + tar -xvf apache-maven-3.8.8-bin.tar.gz && \ + rm apache-maven-3.8.8-bin.tar.gz && \ + mv apache-maven-3.8.8 /usr/lib/maven + +# # Build & install Spark 3.2.2 +# RUN cd /opt && git clone --depth 1 --branch v3.2.2 https://github.com/apache/spark.git spark322 +# RUN cd /opt/spark322 && ./build/mvn -Pyarn -DskipTests clean install + +# # Build & install Spark 3.3.1 +# RUN cd /opt && git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git spark331 +# RUN cd /opt/spark331 && ./build/mvn -Pyarn -DskipTests clean install + +ENV PATH="$PATH:/usr/lib/maven/bin" +ENV LD_LIBRARY_PATH=/usr/local/lib64:/usr/local/lib:/usr/lib64:/usr/lib:/lib64:/lib + +# Velox setup scripts require sudo +RUN yum -y install sudo \ + && yum clean all +RUN echo '%wheel ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers + +COPY scripts/env.sh /env.sh diff --git a/tools/gluten-te/github_action/dockerfile-buildenv-ubuntu b/tools/gluten-te/github_action/dockerfile-buildenv-ubuntu new file mode 100755 index 000000000000..fb88d170a8e5 --- /dev/null +++ b/tools/gluten-te/github_action/dockerfile-buildenv-ubuntu @@ -0,0 +1,119 @@ +ARG OS_IMAGE + +FROM $OS_IMAGE AS gluten-buildenv +MAINTAINER Hongze Zhang + +SHELL ["/bin/bash", "-l", "-c"] +ENTRYPOINT ["/bin/bash", "-l", "-c"] +CMD ["/bin/bash"] + +# Add script for adding environment variables for login-shell (e.g. a shell via ssh) +COPY scripts/set-login-env.sh /usr/local/sbin/set-login-env + +# REQUIRED PROXIES: APT, WGET, GIT, MAVEN (also Maven mirror) +ARG HTTP_PROXY_HOST +ARG HTTP_PROXY_PORT + +# Sometimes ENV a=b won't work when the shell is not docker-default, so we +# use both two ways to set the variables +ENV http_proxy=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV https_proxy=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV no_proxy=localhost,127.0.0.1,127.0.0.0/8,172.16.0.0/12,192.168.0.0/16 +ENV HTTP_PROXY=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV HTTPS_PROXY=${HTTP_PROXY_HOST:+"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT"} +ENV NO_PROXY=localhost,127.0.0.1,127.0.0.0/8,172.16.0.0/12,192.168.0.0/16 +RUN set-login-env "http_proxy=${HTTP_PROXY_HOST:+http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT}" +RUN set-login-env "https_proxy=${HTTP_PROXY_HOST:+http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT}" +RUN set-login-env "no_proxy=localhost,127.0.0.1,127.0.0.0/8,172.16.0.0/12,192.168.0.0/16" +RUN set-login-env "HTTP_PROXY=${HTTP_PROXY_HOST:+http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT}" +RUN set-login-env "HTTPS_PROXY=${HTTP_PROXY_HOST:+http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT}" +RUN set-login-env "NO_PROXY=localhost,127.0.0.1,127.0.0.0/8,172.16.0.0/12,192.168.0.0/16" + +RUN if [ -n "$HTTP_PROXY_HOST" ]; then echo "Acquire::http::Proxy \"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT\";" >> /etc/apt/apt.conf; fi +RUN if [ -n "$HTTP_PROXY_HOST" ]; then echo "Acquire::https::Proxy \"http://$HTTP_PROXY_HOST:$HTTP_PROXY_PORT\";" >> /etc/apt/apt.conf; fi + +ARG MAVEN_MIRROR_URL + +RUN if [ -n "$MAVEN_MIRROR_URL" ]; \ + then \ + MAVEN_SETTINGS_TEMPLATE="mavenmirrorcentralMavenMirror{{MAVEN_MIRROR_URL}}httpproxy{{MAVEN_PROXY_ENABLE}}http{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}httpsproxy{{MAVEN_PROXY_ENABLE}}https{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}"; \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s@{{MAVEN_MIRROR_URL}}@$MAVEN_MIRROR_URL@g"); \ + else \ + MAVEN_SETTINGS_TEMPLATE="httpproxy{{MAVEN_PROXY_ENABLE}}http{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}httpsproxy{{MAVEN_PROXY_ENABLE}}https{{MAVEN_PROXY_HOST}}{{MAVEN_PROXY_PORT}}"; \ + fi \ + && if [ -n "$HTTP_PROXY_HOST" ]; \ + then \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_ENABLE}}/true/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_HOST}}/$HTTP_PROXY_HOST/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_PORT}}/$HTTP_PROXY_PORT/g"); \ + else \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_ENABLE}}/false/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_HOST}}/localhost/g"); \ + MAVEN_SETTINGS_TEMPLATE=$(echo $MAVEN_SETTINGS_TEMPLATE | sed "s/{{MAVEN_PROXY_PORT}}/8888/g"); \ + fi \ + && MAVEN_SETTINGS=$MAVEN_SETTINGS_TEMPLATE \ + && mkdir -p /root/.m2/ \ + && echo $MAVEN_SETTINGS > /root/.m2/settings.xml + +# Display environment information +RUN ulimit -a +RUN env +RUN cat /etc/apt/apt.conf || (echo "Apt proxy not set" && true) +RUN cat /root/.m2/settings.xml + +## APT dependencies + +# Update, then install essentials +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y sudo locales wget tar tzdata git ccache cmake ninja-build build-essential llvm-11-dev clang-11 libiberty-dev libdwarf-dev libre2-dev libz-dev libssl-dev libboost-all-dev libcurl4-openssl-dev + +# install HBM dependencies +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y autoconf automake g++ libnuma-dev libtool numactl unzip libdaxctl-dev + +# Install OpenJDK 8 and Maven +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y openjdk-8-jdk +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y maven + +# Setup SSH server +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y openssh-server +RUN systemctl disable ssh +RUN ssh-keygen -A +RUN mkdir -p /run/sshd +RUN echo 'PermitRootLogin yes' >> /etc/ssh/sshd_config.d/override.conf +RUN echo 'X11Forwarding yes' >> /etc/ssh/sshd_config.d/override.conf +RUN echo 'X11UseLocalhost no' >> /etc/ssh/sshd_config.d/override.conf +RUN echo -e "123\n123" | passwd + +ARG TIMEZONE +RUN test -n "$TIMEZONE" || (echo "TIMEZONE not set" && false) + +RUN TZ=$TIMEZONE \ + && ln -snf /usr/share/zoneinfo/$TZ /etc/localtime \ + && echo $TZ > /etc/timezone \ + && dpkg-reconfigure -f noninteractive tzdata + +# Configure locale +RUN sed -i '/en_US.UTF-8/s/^# //g' /etc/locale.gen \ + && locale-gen + +ENV LANG=en_US.UTF-8 +ENV LANGUAGE=en_US:en +ENV LC_ALL=en_US.UTF-8 +RUN set-login-env "LANG=en_US.UTF-8" +RUN set-login-env "LANGUAGE=en_US:en" +RUN set-login-env "LC_ALL=en_US.UTF-8" + +# Build & install Spark 3.2.2 +RUN cd /opt && git clone --depth 1 --branch v3.2.2 https://github.com/apache/spark.git spark322 +RUN cd /opt/spark322 && ./build/mvn -Pyarn -DskipTests clean install + +# Build & install Spark 3.3.1 +RUN cd /opt && git clone --depth 1 --branch v3.3.1 https://github.com/apache/spark.git spark331 +RUN cd /opt/spark331 && ./build/mvn -Pyarn -DskipTests clean install + +# Build & install Spark 3.4.1 +RUN cd /opt && git clone --depth 1 --branch v3.4.1 https://github.com/apache/spark.git spark341 +RUN cd /opt/spark341 && ./build/mvn -Pyarn -DskipTests clean install + +# Prepare entry command +COPY scripts/cmd.sh /root/.cmd.sh +CMD ["/root/.cmd.sh"] diff --git a/tools/gluten-te/github_action/exec.sh b/tools/gluten-te/github_action/exec.sh new file mode 100755 index 000000000000..3d0b9fe5f79c --- /dev/null +++ b/tools/gluten-te/github_action/exec.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +CBASH_BASH_ARGS="$*" +BASH_ARGS="$CBASH_BASH_ARGS" + +if [ -z "$GITHUB_RUN_ID" ] +then + echo "Unable to parse GITHUB_RUN_ID." + exit 1 +fi + +docker exec gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID bash -c "cd /opt/gluten && $BASH_ARGS" diff --git a/tools/gluten-te/github_action/scripts/cmd.sh b/tools/gluten-te/github_action/scripts/cmd.sh new file mode 100755 index 000000000000..85f47f193b30 --- /dev/null +++ b/tools/gluten-te/github_action/scripts/cmd.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +echo "Running gluten-buildenv docker container with SSH X11 forwarding enabled. SSH password: 123" +/usr/sbin/sshd -D diff --git a/tools/gluten-te/github_action/scripts/env.sh b/tools/gluten-te/github_action/scripts/env.sh new file mode 100755 index 000000000000..0782e97f8ef9 --- /dev/null +++ b/tools/gluten-te/github_action/scripts/env.sh @@ -0,0 +1,24 @@ +#! /bin/sh +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# shellcheck disable=SC1091,SC2155 + +if [ -f "/opt/rh/gcc-toolset-9/enable" ]; then + . /opt/rh/gcc-toolset-9/enable +elif [ -f "/opt/rh/devtoolset-9/enable" ]; then # CentOS 7 + . /opt/rh/devtoolset-9/enable +fi + +export MAKEFLAGS="-j$(nproc)" \ No newline at end of file diff --git a/tools/gluten-te/github_action/scripts/init.sh b/tools/gluten-te/github_action/scripts/init.sh new file mode 100755 index 000000000000..7c70ccf0d406 --- /dev/null +++ b/tools/gluten-te/github_action/scripts/init.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -ex + +TARGET_GLUTEN_REPO=$1 +TARGET_GLUTEN_COMMIT=$2 + +cd /opt/gluten + +git fetch $TARGET_GLUTEN_REPO $TARGET_GLUTEN_COMMIT:build_$TARGET_GLUTEN_COMMIT +git checkout build_$TARGET_GLUTEN_COMMIT diff --git a/tools/gluten-te/github_action/scripts/set-login-env.sh b/tools/gluten-te/github_action/scripts/set-login-env.sh new file mode 100755 index 000000000000..7211b714989f --- /dev/null +++ b/tools/gluten-te/github_action/scripts/set-login-env.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ENV="$*" +echo "export $ENV" >> /etc/profile.d/99-gluten.sh diff --git a/tools/gluten-te/ubuntu/defaults.conf b/tools/gluten-te/ubuntu/defaults.conf index 1971c1af0dd0..068dcd0c8b79 100644 --- a/tools/gluten-te/ubuntu/defaults.conf +++ b/tools/gluten-te/ubuntu/defaults.conf @@ -11,7 +11,8 @@ DEFAULT_NON_INTERACTIVE=OFF DEFAULT_PRESERVE_CONTAINER=OFF # The codes will be used in build -DEFAULT_GLUTEN_REPO=https://github.com/oap-project/gluten.git +# for 10.0.0.25 test at PHILO-HE/gluten +DEFAULT_GLUTEN_REPO=https://github.com/PHILO-HE/gluten.git DEFAULT_GLUTEN_BRANCH=main # Create debug build @@ -21,14 +22,29 @@ DEFAULT_DEBUG_BUILD=OFF DEFAULT_BUILD_BACKEND_TYPE=velox # HTTP proxy -DEFAULT_HTTP_PROXY_HOST= -DEFAULT_HTTP_PROXY_PORT= +# If http proxy is http://child-prc.intel.com:913, write it as +# DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com +# DEFAULT_HTTP_PROXY_PORT=913 +# do not write http:// +# DEFAULT_HTTP_PROXY_HOST= +# DEFAULT_HTTP_PROXY_PORT= +# for 10.0.0.25 test +DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com +DEFAULT_HTTP_PROXY_PORT=913 # If on, use maven mirror settings for PRC's network environment -DEFAULT_USE_ALI_MAVEN_MIRROR=OFF +# DEFAULT_USE_ALI_MAVEN_MIRROR=OFF +# for 10.0.0.25 test +DEFAULT_USE_ALI_MAVEN_MIRROR=ON # Base operator system image used in build scripts. -DEFAULT_OS_IMAGE=ubuntu:20.04 +# DEFAULT_OS_IMAGE=ubuntu or centos +DEFAULT_OS_IMAGE= + +# Version ID of os image +# DEFAULT_OS_VERSION=20.04 or 8 +DEFAULT_OS_VERSION= + # Set timezone name DEFAULT_TIMEZONE=Asia/Shanghai @@ -40,10 +56,14 @@ DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV=gluten-te/gluten-buildenv DEFAULT_DOCKER_TARGET_IMAGE_BUILD=gluten-te/gluten-build # Docker registry used to pull pre-built images to speed-up builds -DEFAULT_DOCKER_CACHE_REGISTRY= +# DEFAULT_DOCKER_CACHE_REGISTRY= +# for 10.0.0.25 test +DEFAULT_DOCKER_CACHE_REGISTRY=10.0.0.25:5000 # Docker registry to push pre-built images -DEFAULT_DOCKER_PUSH_REGISTRY= +# DEFAULT_DOCKER_PUSH_REGISTRY= +# for 10.0.0.25 test +DEFAULT_DOCKER_PUSH_REGISTRY=10.0.0.25:5000 ## For tpc.sh diff --git a/tools/gluten-te/ubuntu/gha/gha-checkout/checkout.sh b/tools/gluten-te/ubuntu/gha/gha-checkout/checkout.sh index 15c28f5849d0..415fcf1f2a16 100755 --- a/tools/gluten-te/ubuntu/gha/gha-checkout/checkout.sh +++ b/tools/gluten-te/ubuntu/gha/gha-checkout/checkout.sh @@ -26,7 +26,7 @@ then exit 1 fi -export EXTRA_DOCKER_OPTIONS="$EXTRA_DOCKER_OPTIONS --name gha-checkout-$GITHUB_RUN_ID --detach -v $BASEDIR/scripts:/opt/scripts" +export EXTRA_DOCKER_OPTIONS="$EXTRA_DOCKER_OPTIONS --name gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID --detach -v $BASEDIR/scripts:/opt/scripts" export NON_INTERACTIVE=ON $BASEDIR/../../cbash-build.sh 'sleep 14400' diff --git a/tools/gluten-te/ubuntu/gha/gha-checkout/clean.sh b/tools/gluten-te/ubuntu/gha/gha-checkout/clean.sh index f67f6bf0058b..29a97351f517 100755 --- a/tools/gluten-te/ubuntu/gha/gha-checkout/clean.sh +++ b/tools/gluten-te/ubuntu/gha/gha-checkout/clean.sh @@ -22,4 +22,4 @@ then exit 1 fi -docker stop gha-checkout-$GITHUB_RUN_ID || true +docker stop gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID || true diff --git a/tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh b/tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh index 1497ab6f2edf..3d0b9fe5f79c 100755 --- a/tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh +++ b/tools/gluten-te/ubuntu/gha/gha-checkout/exec.sh @@ -25,4 +25,4 @@ then exit 1 fi -docker exec gha-checkout-$GITHUB_RUN_ID bash -c "cd /opt/gluten && $BASH_ARGS" +docker exec gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID bash -c "cd /opt/gluten && $BASH_ARGS"