Skip to content

Commit

Permalink
ARROW-17021: [C++][R][CI] Enable use of sccache in crossbow (apache#1…
Browse files Browse the repository at this point in the history
…3556)

This is a first stab at enabling sccache, I used the R nightly job as an example as it compiles arrow on mac, win and linux.
I want to open this for feedback specifically with an eye towards the cmake changes I implemented which enable this as long as the correct envvars (AWS creds + bucket) are set and sccache is available. My hope was to keep the changes required to activate sccache for existing jobs as minimal as possible.

While working on this I also noticed that `ThirdpartyToolchain.cmake` is not build using (s)ccache, the flags had to be explicitly set. That fix alone should speed up CI across the board (in arrow and crossbow). 
For the R task this results in a build time reduction of 40-70% 🚀 

Here a run of the same task without caching https://github.com/ursacomputing/crossbow/actions/runs/2563558583

cc: @kszucs @raulcd @kou

Thanks for your input!


Authored-by: Jacob Wujciak-Jens <jacob@wujciak.de>
Signed-off-by: Sutou Kouhei <kou@clear-code.com>
  • Loading branch information
assignUser authored Sep 16, 2022
1 parent decddbb commit c58e6a3
Show file tree
Hide file tree
Showing 23 changed files with 219 additions and 38 deletions.
15 changes: 10 additions & 5 deletions ci/docker/centos-7-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
FROM centos:centos7

RUN yum install -y \
curl \
diffutils \
gcc-c++ \
libcurl-devel \
Expand All @@ -31,8 +32,12 @@ ARG cmake=3.23.1
RUN mkdir /opt/cmake-${cmake}
RUN wget -nv -O - https://github.com/Kitware/CMake/releases/download/v${cmake}/cmake-${cmake}-Linux-x86_64.tar.gz | \
tar -xzf - --strip-components=1 -C /opt/cmake-${cmake}
ENV PATH=/opt/cmake-${cmake}/bin:$PATH
ENV CC=/usr/bin/gcc
ENV CXX=/usr/bin/g++
ENV EXTRA_CMAKE_FLAGS="-DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX"
ENV ARROW_R_DEV=TRUE

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN bash /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV PATH=/opt/cmake-${cmake}/bin:$PATH \
CC=/usr/bin/gcc \
CXX=/usr/bin/g++ \
EXTRA_CMAKE_FLAGS="-DCMAKE_C_COMPILER=$CC -DCMAKE_CXX_COMPILER=$CXX" \
ARROW_R_DEV=TRUE \
3 changes: 3 additions & 0 deletions ci/docker/conda-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ RUN mamba install -q -y \
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
ARROW_DEPENDENCY_SOURCE=CONDA \
Expand Down
2 changes: 1 addition & 1 deletion ci/docker/conda.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ FROM ${arch}/ubuntu:18.04
# install build essentials
RUN export DEBIAN_FRONTEND=noninteractive && \
apt-get update -y -q && \
apt-get install -y -q wget tzdata libc6-dbg gdb \
apt-get install -y -q curl wget tzdata libc6-dbg gdb \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*

Expand Down
4 changes: 4 additions & 0 deletions ci/docker/debian-10-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ RUN apt-get update -y -q && \
ccache \
clang-${llvm} \
cmake \
curl \
g++ \
gcc \
gdb \
Expand Down Expand Up @@ -76,6 +77,9 @@ RUN apt-get update -y -q && \
COPY ci/scripts/install_minio.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_minio.sh latest /usr/local

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV absl_SOURCE=BUNDLED \
ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
Expand Down
4 changes: 4 additions & 0 deletions ci/docker/debian-11-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ RUN apt-get update -y -q && \
ccache \
clang-${llvm} \
cmake \
curl \
g++ \
gcc \
gdb \
Expand Down Expand Up @@ -78,6 +79,9 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV absl_SOURCE=BUNDLED \
ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
Expand Down
4 changes: 4 additions & 0 deletions ci/docker/fedora-35-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ RUN dnf update -y && \
ccache \
clang-devel \
cmake \
curl \
curl-devel \
flatbuffers-devel \
gcc \
Expand Down Expand Up @@ -71,6 +72,9 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV absl_SOURCE=BUNDLED \
ARROW_BUILD_TESTS=ON \
ARROW_DEPENDENCY_SOURCE=SYSTEM \
Expand Down
5 changes: 5 additions & 0 deletions ci/docker/ubuntu-18.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ RUN apt-get update -y -q && \
ca-certificates \
ccache \
cmake \
curl \
g++ \
gcc \
gdb \
Expand Down Expand Up @@ -100,6 +101,10 @@ RUN apt-get update -y -q && \
# - s3 tests would require boost-asio that is included since Boost 1.66.0
# ARROW-17051: this build uses static Protobuf, so we must also use
# static Arrow to run Flight/Flight SQL tests

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV ARROW_BUILD_STATIC=ON \
ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
Expand Down
4 changes: 4 additions & 0 deletions ci/docker/ubuntu-20.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ RUN apt-get update -y -q && \
build-essential \
ccache \
cmake \
curl \
git \
libssl-dev \
libcurl4-openssl-dev \
Expand Down Expand Up @@ -70,6 +71,9 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
ARROW_FLIGHT=ON \
Expand Down
4 changes: 4 additions & 0 deletions ci/docker/ubuntu-20.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ RUN apt-get update -y -q && \
ca-certificates \
ccache \
cmake \
curl \
g++ \
gcc \
gdb \
Expand Down Expand Up @@ -116,6 +117,9 @@ RUN /arrow/ci/scripts/install_gcs_testbench.sh default
COPY ci/scripts/install_ceph.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_ceph.sh

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# Prioritize system packages and local installation
# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
Expand Down
4 changes: 4 additions & 0 deletions ci/docker/ubuntu-22.04-cpp-minimal.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ RUN apt-get update -y -q && \
build-essential \
ccache \
cmake \
curl \
git \
libssl-dev \
libcurl4-openssl-dev \
Expand Down Expand Up @@ -70,6 +71,9 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

ENV ARROW_BUILD_TESTS=ON \
ARROW_DATASET=ON \
ARROW_FLIGHT=ON \
Expand Down
4 changes: 4 additions & 0 deletions ci/docker/ubuntu-22.04-cpp.dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ RUN apt-get update -y -q && \
ca-certificates \
ccache \
cmake \
curl \
gdb \
git \
libbenchmark-dev \
Expand Down Expand Up @@ -143,6 +144,9 @@ RUN /arrow/ci/scripts/install_minio.sh latest /usr/local
COPY ci/scripts/install_gcs_testbench.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_gcs_testbench.sh default

COPY ci/scripts/install_sccache.sh /arrow/ci/scripts/
RUN /arrow/ci/scripts/install_sccache.sh unknown-linux-musl /usr/local/bin

# Prioritize system packages and local installation
# The following dependencies will be downloaded due to missing/invalid packages
# provided by the distribution:
Expand Down
3 changes: 3 additions & 0 deletions ci/scripts/PKGBUILD
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ build() {
# set the appropriate compiler definition.
export CPPFLAGS="-DUTF8PROC_STATIC"

# CMAKE_UNITY_BUILD is set to OFF as otherwise some compute functionality
# segfaults in tests

MSYS2_ARG_CONV_EXCL="-DCMAKE_INSTALL_PREFIX=" \
${MINGW_PREFIX}/bin/cmake.exe \
${ARROW_CPP_DIR} \
Expand Down
5 changes: 5 additions & 0 deletions ci/scripts/cpp_build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ if [ "${ARROW_USE_CCACHE}" == "ON" ]; then
ccache -s
fi

if command -v sccache &> /dev/null; then
echo "=== sccache stats after the build ==="
sccache --show-stats
fi

if [ "${BUILD_DOCS_CPP}" == "ON" ]; then
pushd ${source_dir}/apidoc
doxygen
Expand Down
Empty file modified ci/scripts/download_tz_database.sh
100644 → 100755
Empty file.
53 changes: 53 additions & 0 deletions ci/scripts/install_sccache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

set -e

if [ "$#" -lt 1 -o "$#" -gt 3 ]; then
echo "Usage: $0 <build> <prefix> <arch> <version>"
echo "Will default to arch=x86_64 and version=0.3.0 "
exit 1
fi

BUILD=$1
PREFIX=$2
ARCH=${3:-x86_64}
VERSION=${4:-0.3.0}

SCCACHE_URL="https://github.com/mozilla/sccache/releases/download/v$VERSION/sccache-v$VERSION-$ARCH-$BUILD.tar.gz"
SCCACHE_ARCHIVE=sccache.tar.gz

# Download archive and checksum
curl -L $SCCACHE_URL --output $SCCACHE_ARCHIVE
curl -L $SCCACHE_URL.sha256 --output $SCCACHE_ARCHIVE.sha256

echo "$(cat $SCCACHE_ARCHIVE.sha256) $SCCACHE_ARCHIVE" | sha256sum --check --status

if [ ! -d $PREFIX ]; then
mkdir -p $PREFIX
fi

tar -xzvf $SCCACHE_ARCHIVE --strip-component=1 --directory $PREFIX --wildcards sccache*/sccache*
chmod u+x $PREFIX/sccache

if [ "${GITHUB_ACTIONS}" = "true" ]; then
echo "$PREFIX" >> $GITHUB_PATH
# Add executable for windows as mingw workaround.
echo "SCCACHE_PATH=$PREFIX/sccache.exe" >> $GITHUB_ENV
fi
31 changes: 30 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -192,18 +192,47 @@ else()
set(PYTHON_EXECUTABLE ${Python3_EXECUTABLE})
endif()

if(ARROW_USE_SCCACHE
AND NOT CMAKE_C_COMPILER_LAUNCHER
AND NOT CMAKE_CXX_COMPILER_LAUNCHER)

find_program(SCCACHE_FOUND sccache)

if(NOT SCCACHE_FOUND AND DEFINED ENV{SCCACHE_PATH})
# cmake has problems finding sccache from within mingw
message(STATUS "Did not find sccache, using envvar fallback.")
set(SCCACHE_FOUND $ENV{SCCACHE_PATH})
endif()

# Only use sccache if a storage backend is configured
if(SCCACHE_FOUND
AND (DEFINED ENV{SCCACHE_AZURE_BLOB_CONTAINER}
OR DEFINED ENV{SCCACHE_BUCKET}
OR DEFINED ENV{SCCACHE_DIR}
OR DEFINED ENV{SCCACHE_GCS_BUCKET}
OR DEFINED ENV{SCCACHE_MEMCACHED}
OR DEFINED ENV{SCCACHE_REDIS}
))
message(STATUS "Using sccache: ${SCCACHE_FOUND}")
set(CMAKE_C_COMPILER_LAUNCHER ${SCCACHE_FOUND})
set(CMAKE_CXX_COMPILER_LAUNCHER ${SCCACHE_FOUND})
endif()
endif()

if(ARROW_USE_CCACHE
AND NOT CMAKE_C_COMPILER_LAUNCHER
AND NOT CMAKE_CXX_COMPILER_LAUNCHER)

find_program(CCACHE_FOUND ccache)

if(CCACHE_FOUND)
message(STATUS "Using ccache: ${CCACHE_FOUND}")
set(CMAKE_C_COMPILER_LAUNCHER ${CCACHE_FOUND})
set(CMAKE_CXX_COMPILER_LAUNCHER ${CCACHE_FOUND})
# ARROW-3985: let ccache preserve C++ comments, because some of them may be
# meaningful to the compiler
set(ENV{CCACHE_COMMENTS} "1")
endif(CCACHE_FOUND)
endif()
endif()

if(ARROW_USE_PRECOMPILED_HEADERS AND ${CMAKE_VERSION} VERSION_LESS "3.16.0")
Expand Down
3 changes: 3 additions & 0 deletions cpp/cmake_modules/DefineOptions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ if(ARROW_DEFINE_OPTIONS)

define_option(ARROW_USE_CCACHE "Use ccache when compiling (if available)" ON)

define_option(ARROW_USE_SCCACHE "Use sccache when compiling (if available),;\
takes precedence over ccache if a storage backend is configured" ON)

define_option(ARROW_USE_LD_GOLD "Use ld.gold for linking on Linux (if available)" OFF)

define_option(ARROW_USE_PRECOMPILED_HEADERS "Use precompiled headers when compiling"
Expand Down
2 changes: 2 additions & 0 deletions dev/tasks/docker-tests/github.linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ jobs:

- name: Execute Docker Build
shell: bash
env:
{{ macros.github_set_sccache_envvars()|indent(8) }}
run: |
archery docker run \
-e SETUPTOOLS_SCM_PRETEND_VERSION="{{ arrow.no_rc_version }}" \
Expand Down
13 changes: 13 additions & 0 deletions dev/tasks/macros.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -365,3 +365,16 @@ on:
{% endfor %}
{% endif %}
{%- endmacro -%}

{% macro github_set_sccache_envvars(sccache_key_prefix = "sccache") %}
{% set sccache_vars = {
"AWS_SECRET_ACCESS_KEY": '${{ secrets.AWS_SECRET_ACCESS_KEY }}',
"AWS_ACCESS_KEY_ID": '${{ secrets.AWS_ACCESS_KEY_ID }}',
"SCCACHE_BUCKET": '${{ secrets.SCCACHE_BUCKET }}',
"SCCACHE_S3_KEY_PREFIX": sccache_key_prefix
}
%}
{% for key, value in sccache_vars.items() %}
{{ key }}: "{{ value }}"
{% endfor %}
{% endmacro %}
Loading

0 comments on commit c58e6a3

Please sign in to comment.