Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
zedong-peng committed Jan 2, 2024
1 parent cf46a3a commit b9cb014
Show file tree
Hide file tree
Showing 28 changed files with 1,068 additions and 137 deletions.
239 changes: 128 additions & 111 deletions .github/workflows/velox_be.yml

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
*<b>This project is still under active development now, and doesn't have a stable release. Welcome to evaluate it.</b>*

# 1 Introduction

test
## 1.1 Problem Statement

Apache Spark is a stable, mature project that has been developed for many years. It is one of the best frameworks to scale out for processing petabyte-scale datasets. However, the Spark community has had to address performance challenges that require various optimizations over time. As a key optimization in Spark 2.0, Whole Stage Code Generation is introduced to replace Volcano Model, which achieves 2x speedup. Henceforth, most optimizations are at query plan level. Single operator's performance almost stops growing.
Expand Down
2 changes: 1 addition & 1 deletion tools/gluten-te/centos/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ BUILD_BACKEND_TYPE=${BUILD_BACKEND_TYPE:-$DEFAULT_BUILD_BACKEND_TYPE}
# Build will result in this image
DOCKER_TARGET_IMAGE_BUILD=${DOCKER_TARGET_IMAGE_BUILD:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILD}

DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE"
DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE:$OS_VERSION"

##

Expand Down
4 changes: 1 addition & 3 deletions tools/gluten-te/centos/buildenv.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,12 @@ TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE}

# Set operating system
OS_IMAGE=${OS_IMAGE:-$DEFAULT_OS_IMAGE}

# Set os version
OS_VERSION=${OS_VERSION:-$DEFAULT_OS_VERSION}

# Build will result in this image
DOCKER_TARGET_IMAGE_BUILDENV=${DOCKER_TARGET_IMAGE_BUILDENV:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV}

DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILDENV-$OS_IMAGE"
DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILDENV-$OS_IMAGE:$OS_VERSION"

if [ "$USE_ALI_MAVEN_MIRROR" == "ON" ]
then
Expand Down
32 changes: 24 additions & 8 deletions tools/gluten-te/centos/defaults.conf
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ DEFAULT_NON_INTERACTIVE=OFF
DEFAULT_PRESERVE_CONTAINER=OFF

# The codes will be used in build
DEFAULT_GLUTEN_REPO=https://github.com/oap-project/gluten.git
# for 10.0.0.25 test at PHILO-HE/gluten
DEFAULT_GLUTEN_REPO=https://github.com/PHILO-HE/gluten.git
DEFAULT_GLUTEN_BRANCH=main

# Create debug build
Expand All @@ -21,17 +22,28 @@ DEFAULT_DEBUG_BUILD=OFF
DEFAULT_BUILD_BACKEND_TYPE=velox

# HTTP proxy
DEFAULT_HTTP_PROXY_HOST=
DEFAULT_HTTP_PROXY_PORT=
# If http proxy is http://child-prc.intel.com:913, write it as
# DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com
# DEFAULT_HTTP_PROXY_PORT=913
# do not write http://
# DEFAULT_HTTP_PROXY_HOST=
# DEFAULT_HTTP_PROXY_PORT=
# for 10.0.0.25 test
DEFAULT_HTTP_PROXY_HOST=child-prc.intel.com
DEFAULT_HTTP_PROXY_PORT=913

# If on, use maven mirror settings for PRC's network environment
DEFAULT_USE_ALI_MAVEN_MIRROR=OFF
# DEFAULT_USE_ALI_MAVEN_MIRROR=OFF
# for 10.0.0.25 test
DEFAULT_USE_ALI_MAVEN_MIRROR=ON

# Base operator system image used in build scripts.
DEFAULT_OS_IMAGE=centos:8
# DEFAULT_OS_IMAGE=ubuntu or centos
DEFAULT_OS_IMAGE=

# Version ID of os image
DEFAULT_OS_VERSION=8
# DEFAULT_OS_VERSION=20.04 or 8
DEFAULT_OS_VERSION=

# Set timezone name
DEFAULT_TIMEZONE=Asia/Shanghai
Expand All @@ -43,10 +55,14 @@ DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV=gluten-te/gluten-buildenv
DEFAULT_DOCKER_TARGET_IMAGE_BUILD=gluten-te/gluten-build

# Docker registry used to pull pre-built images to speed-up builds
DEFAULT_DOCKER_CACHE_REGISTRY=
# DEFAULT_DOCKER_CACHE_REGISTRY=
# for 10.0.0.25 test
DEFAULT_DOCKER_CACHE_REGISTRY=10.0.0.25:5000

# Docker registry to push pre-built images
DEFAULT_DOCKER_PUSH_REGISTRY=
# DEFAULT_DOCKER_PUSH_REGISTRY=
# for 10.0.0.25 test
DEFAULT_DOCKER_PUSH_REGISTRY=10.0.0.25:5000

## For cbash.sh

Expand Down
2 changes: 1 addition & 1 deletion tools/gluten-te/centos/gha/gha-checkout/checkout.sh
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ then
exit 1
fi

export EXTRA_DOCKER_OPTIONS="$EXTRA_DOCKER_OPTIONS --name gha-checkout-$GITHUB_RUN_ID --detach -v $BASEDIR/scripts:/opt/scripts"
export EXTRA_DOCKER_OPTIONS="$EXTRA_DOCKER_OPTIONS --name gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID --detach -v $BASEDIR/scripts:/opt/scripts"
export NON_INTERACTIVE=ON

$BASEDIR/../../cbash-build.sh 'sleep 14400'
Expand Down
2 changes: 1 addition & 1 deletion tools/gluten-te/centos/gha/gha-checkout/clean.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@ then
exit 1
fi

docker stop gha-checkout-$GITHUB_RUN_ID || true
docker stop gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID || true
2 changes: 1 addition & 1 deletion tools/gluten-te/centos/gha/gha-checkout/exec.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ then
exit 1
fi

docker exec gha-checkout-$GITHUB_RUN_ID bash -c "cd /opt/gluten && $BASH_ARGS"
docker exec gha-checkout-$GITHUB_JOB-$GITHUB_RUN_ID bash -c "cd /opt/gluten && $BASH_ARGS"
90 changes: 90 additions & 0 deletions tools/gluten-te/github_action/build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

BASEDIR=$(dirname $0)

source "$BASEDIR/buildenv.sh"

## Debug build flags

# Create debug build
DEBUG_BUILD=${DEBUG_BUILD:-$DEFAULT_DEBUG_BUILD}

if [ -n $JDK_DEBUG_BUILD ]
then
echo "Do not set JDK_DEBUG_BUILD manually!"
fi

if [ -n $GLUTEN_DEBUG_BUILD ]
then
echo "Do not set GLUTEN_DEBUG_BUILD manually!"
fi

if [ "$DEBUG_BUILD" == "ON" ]
then
JDK_DEBUG_BUILD=OFF
GLUTEN_DEBUG_BUILD=ON
else
JDK_DEBUG_BUILD=OFF
GLUTEN_DEBUG_BUILD=OFF
fi

# The branches used to prepare dependencies
CACHE_GLUTEN_REPO=${CACHE_GLUTEN_REPO:-$DEFAULT_GLUTEN_REPO}
CACHE_GLUTEN_BRANCH=${CACHE_GLUTEN_BRANCH:-$DEFAULT_GLUTEN_BRANCH}

# Backend type
BUILD_BACKEND_TYPE=${BUILD_BACKEND_TYPE:-$DEFAULT_BUILD_BACKEND_TYPE}

# Build will result in this image
DOCKER_TARGET_IMAGE_BUILD=${DOCKER_TARGET_IMAGE_BUILD:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILD}

DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILD-$OS_IMAGE:$OS_VERSION"

##

BUILD_DOCKER_BUILD_ARGS=

BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg BUILDKIT_INLINE_CACHE=1"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE=$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg JDK_DEBUG_BUILD=$JDK_DEBUG_BUILD"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg GLUTEN_DEBUG_BUILD=$GLUTEN_DEBUG_BUILD"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_REPO=$CACHE_GLUTEN_REPO"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg CACHE_GLUTEN_BRANCH=$CACHE_GLUTEN_BRANCH"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --build-arg BUILD_BACKEND_TYPE=$BUILD_BACKEND_TYPE"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -f $BASEDIR/dockerfile-build"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --target gluten-build"
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS -t $DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE"

if [ -n "$DOCKER_CACHE_REGISTRY" ]
then
BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS --cache-from $DOCKER_CACHE_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE"
fi

BUILD_DOCKER_BUILD_ARGS="$BUILD_DOCKER_BUILD_ARGS $BASEDIR"

docker build $BUILD_DOCKER_BUILD_ARGS

if [ -n "$DOCKER_PUSH_REGISTRY" ]
then
docker tag "$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE" "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE"
docker push "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE"
fi

# EOF
89 changes: 89 additions & 0 deletions tools/gluten-te/github_action/buildenv.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

BASEDIR=$(dirname $0)

source "$BASEDIR/defaults.conf"

# Enable buildkit
export DOCKER_BUILDKIT=1
export BUILDKIT_PROGRESS=plain

# Docker registry used to pull pre-built images to speed-up builds
DOCKER_CACHE_REGISTRY=${DOCKER_CACHE_REGISTRY:-$DEFAULT_DOCKER_CACHE_REGISTRY}

# Docker registry to push pre-built images
DOCKER_PUSH_REGISTRY=${DOCKER_PUSH_REGISTRY:-$DEFAULT_DOCKER_PUSH_REGISTRY}

# HTTP proxy
HTTP_PROXY_HOST=${HTTP_PROXY_HOST:-$DEFAULT_HTTP_PROXY_HOST}
HTTP_PROXY_PORT=${HTTP_PROXY_PORT:-$DEFAULT_HTTP_PROXY_PORT}

# If on, use maven mirror settings for PRC's network environment
USE_ALI_MAVEN_MIRROR=${USE_ALI_MAVEN_MIRROR:-$DEFAULT_USE_ALI_MAVEN_MIRROR}

# Set timezone name
TIMEZONE=${TIMEZONE:-$DEFAULT_TIMEZONE}

# Set operating system
OS_IMAGE=${OS_IMAGE:-$DEFAULT_OS_IMAGE}
OS_VERSION=${OS_VERSION:-$DEFAULT_OS_VERSION}

# Build will result in this image
DOCKER_TARGET_IMAGE_BUILDENV=${DOCKER_TARGET_IMAGE_BUILDENV:-$DEFAULT_DOCKER_TARGET_IMAGE_BUILDENV}

DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE="$DOCKER_TARGET_IMAGE_BUILDENV-$OS_IMAGE:$OS_VERSION"

if [ "$USE_ALI_MAVEN_MIRROR" == "ON" ]
then
MAVEN_MIRROR_URL='https://maven.aliyun.com/repository/public'
else
MAVEN_MIRROR_URL=
fi

##

BUILDENV_DOCKER_BUILD_ARGS=

BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --ulimit nofile=8192:8192"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg BUILDKIT_INLINE_CACHE=1"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg TIMEZONE=$TIMEZONE"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg OS_IMAGE=$OS_IMAGE --build-arg OS_VERSION=$OS_VERSION"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg MAVEN_MIRROR_URL=$MAVEN_MIRROR_URL"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg HTTP_PROXY_HOST=$HTTP_PROXY_HOST"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --build-arg HTTP_PROXY_PORT=$HTTP_PROXY_PORT"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -f $BASEDIR/dockerfile-buildenv"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --target gluten-buildenv"
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS -t $DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"

if [ -n "$DOCKER_CACHE_REGISTRY" ]
then
BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS --cache-from $DOCKER_CACHE_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
fi

BUILDENV_DOCKER_BUILD_ARGS="$BUILDENV_DOCKER_BUILD_ARGS $BASEDIR"

docker build $BUILDENV_DOCKER_BUILD_ARGS

if [ -n "$DOCKER_PUSH_REGISTRY" ]
then
docker tag "$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE" "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
docker push "$DOCKER_PUSH_REGISTRY/$DOCKER_TARGET_IMAGE_BUILDENV_WITH_OS_IMAGE"
fi

# EOF
59 changes: 59 additions & 0 deletions tools/gluten-te/github_action/cbash-build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -ex

BASEDIR=$(dirname $0)

source "$BASEDIR/build.sh"

# Non-interactive during docker run
NON_INTERACTIVE=${NON_INTERACTIVE:-$DEFAULT_NON_INTERACTIVE}

# Do not remove stopped docker container
PRESERVE_CONTAINER=${PRESERVE_CONTAINER:-$DEFAULT_PRESERVE_CONTAINER}

# Docker options
EXTRA_DOCKER_OPTIONS=${EXTRA_DOCKER_OPTIONS:-$DEFAULT_EXTRA_DOCKER_OPTIONS}

# Whether to mount Maven cache
MOUNT_MAVEN_CACHE=${MOUNT_MAVEN_CACHE:-$DEFAULT_MOUNT_MAVEN_CACHE}

CBASH_DOCKER_RUN_ARGS=
if [ "$NON_INTERACTIVE" != "ON" ]
then
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -it"
fi
if [ "$PRESERVE_CONTAINER" != "ON" ]
then
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --rm"
fi
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --init"
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --privileged"
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --ulimit nofile=65536:65536"
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --ulimit core=-1"
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS --security-opt seccomp=unconfined"
if [ "$MOUNT_MAVEN_CACHE" == "ON" ]
then
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -v $HOME/.m2/repository:/root/.m2/repository"
fi
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS -v $HOME/.ccache:/root/.ccache"
CBASH_DOCKER_RUN_ARGS="$CBASH_DOCKER_RUN_ARGS $EXTRA_DOCKER_OPTIONS"

CBASH_BASH_ARGS="$*"
BASH_ARGS="$CBASH_BASH_ARGS"

docker run $CBASH_DOCKER_RUN_ARGS $DOCKER_TARGET_IMAGE_BUILD_WITH_OS_IMAGE bash -c "cd /opt/gluten && $BASH_ARGS"
Loading

0 comments on commit b9cb014

Please sign in to comment.