Skip to content

Commit

Permalink
sync recipe with conda-forge feedstock
Browse files Browse the repository at this point in the history
  • Loading branch information
h-vetinari committed Sep 12, 2022
1 parent 6c675c3 commit bc7ce97
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 73 deletions.
36 changes: 19 additions & 17 deletions dev/tasks/conda-recipes/arrow-cpp/bld-arrow.bat
Original file line number Diff line number Diff line change
Expand Up @@ -15,38 +15,40 @@ if "%cuda_compiler_version%"=="None" (
)

cmake -G "Ninja" ^
-DBUILD_SHARED_LIBS=ON ^
-DCMAKE_INSTALL_PREFIX="%LIBRARY_PREFIX%" ^
-DARROW_DEPENDENCY_SOURCE=SYSTEM ^
-DARROW_PACKAGE_PREFIX="%LIBRARY_PREFIX%" ^
-DLLVM_TOOLS_BINARY_DIR="%LIBRARY_BIN%" ^
-DPython3_EXECUTABLE="%PYTHON%" ^
-DARROW_WITH_BZ2:BOOL=ON ^
-DARROW_WITH_ZLIB:BOOL=ON ^
-DARROW_WITH_ZSTD:BOOL=ON ^
-DARROW_WITH_LZ4:BOOL=ON ^
-DARROW_WITH_SNAPPY:BOOL=ON ^
-DARROW_WITH_BROTLI:BOOL=ON ^
-DARROW_BOOST_USE_SHARED:BOOL=ON ^
-DARROW_BUILD_TESTS:BOOL=OFF ^
-DARROW_BUILD_UTILITIES:BOOL=OFF ^
-DARROW_BUILD_STATIC:BOOL=OFF ^
-DCMAKE_BUILD_TYPE=release ^
-DARROW_SIMD_LEVEL=NONE ^
-DARROW_PYTHON:BOOL=ON ^
-DARROW_MIMALLOC:BOOL=ON ^
-DARROW_DATASET:BOOL=ON ^
-DARROW_FLIGHT:BOOL=ON ^
-DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS:BOOL=ON ^
-DARROW_HDFS:BOOL=ON ^
-DARROW_GCS:BOOL=ON ^
-DARROW_HDFS:BOOL=ON ^
-DARROW_MIMALLOC:BOOL=ON ^
-DARROW_PARQUET:BOOL=ON ^
-DPARQUET_REQUIRE_ENCRYPTION:BOOL=ON ^
-DARROW_GANDIVA:BOOL=ON ^
-DARROW_ORC:BOOL=ON ^
-DARROW_ORC:BOOL=OFF ^
-DARROW_PYTHON:BOOL=ON ^
-DARROW_S3:BOOL=ON ^
-DBoost_NO_BOOST_CMAKE=ON ^
-DARROW_SIMD_LEVEL:STRING=NONE ^
-DARROW_SUBSTRAIT:BOOL=ON ^
-DARROW_WITH_BROTLI:BOOL=ON ^
-DARROW_WITH_BZ2:BOOL=ON ^
-DARROW_WITH_LZ4:BOOL=ON ^
-DARROW_WITH_SNAPPY:BOOL=ON ^
-DARROW_WITH_ZLIB:BOOL=ON ^
-DARROW_WITH_ZSTD:BOOL=ON ^
-DBUILD_SHARED_LIBS=ON ^
-DCMAKE_BUILD_TYPE=release ^
-DCMAKE_CXX_STANDARD=17 ^
-DCMAKE_INSTALL_PREFIX="%LIBRARY_PREFIX%" ^
-DCMAKE_UNITY_BUILD=ON ^
-DBoost_NO_BOOST_CMAKE=ON ^
-DLLVM_TOOLS_BINARY_DIR="%LIBRARY_BIN%" ^
-DPython3_EXECUTABLE="%PYTHON%" ^
%EXTRA_CMAKE_ARGS% ^
..
if errorlevel 1 exit 1
Expand Down
6 changes: 3 additions & 3 deletions dev/tasks/conda-recipes/arrow-cpp/bld-pyarrow.bat
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@ pushd "%SRC_DIR%"\python
SET ARROW_HOME=%LIBRARY_PREFIX%
SET SETUPTOOLS_SCM_PRETEND_VERSION=%PKG_VERSION%
SET PYARROW_BUILD_TYPE=release
SET PYARROW_WITH_GCS=1
SET PYARROW_WITH_S3=1
SET PYARROW_WITH_HDFS=1
SET PYARROW_WITH_DATASET=1
SET PYARROW_WITH_FLIGHT=1
SET PYARROW_WITH_GANDIVA=1
SET PYARROW_WITH_GCS=1
SET PYARROW_WITH_HDFS=1
SET PYARROW_WITH_PARQUET=1
SET PYARROW_WITH_PARQUET_ENCRYPTION=1
SET PYARROW_WITH_S3=1
SET PYARROW_CMAKE_GENERATOR=Ninja

:: Enable CUDA support
Expand Down
27 changes: 12 additions & 15 deletions dev/tasks/conda-recipes/arrow-cpp/build-arrow.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env bash
#!/bin/bash

set -e
set -x
Expand Down Expand Up @@ -36,34 +36,29 @@ fi

if [[ "${target_platform}" == "osx-arm64" ]]; then
# We need llvm 11+ support in Arrow for this
# Tell jemalloc to support 16K page size on apple arm64 silicon
EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=OFF -DARROW_JEMALLOC_LG_PAGE=14"
EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=OFF"
sed -ie "s;protoc-gen-grpc.*$;protoc-gen-grpc=${BUILD_PREFIX}/bin/grpc_cpp_plugin\";g" ../src/arrow/flight/CMakeLists.txt
elif [[ "${target_platform}" == "linux-aarch64" ]]; then
# Tell jemalloc to support both 4k and 64k page arm64 systems
# See https://github.com/apache/arrow/pull/10940
EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=ON -DARROW_JEMALLOC_LG_PAGE=16"
sed -ie 's;"--with-jemalloc-prefix\=je_arrow_";"--with-jemalloc-prefix\=je_arrow_" "--with-lg-page\=14";g' ../cmake_modules/ThirdpartyToolchain.cmake
else
EXTRA_CMAKE_ARGS=" ${EXTRA_CMAKE_ARGS} -DARROW_GANDIVA=ON"
fi

if [[ "${target_platform}" == osx-* ]]; then
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_CXX_STANDARD=14"
else
EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS} -DCMAKE_CXX_STANDARD=17"
# Limit number of threads used to avoid hardware oversubscription
if [[ "${target_platform}" == "linux-aarch64" ]] || [[ "${target_platform}" == "linux-ppc64le" ]]; then
export CMAKE_BUILD_PARALLEL_LEVEL=3
fi

cmake \
cmake -GNinja \
-DARROW_BOOST_USE_SHARED=ON \
-DARROW_BUILD_BENCHMARKS=OFF \
-DARROW_BUILD_STATIC=OFF \
-DARROW_BUILD_TESTS=OFF \
-DARROW_BUILD_UTILITIES=OFF \
-DBUILD_SHARED_LIBS=ON \
-DARROW_DATASET=ON \
-DARROW_DEPENDENCY_SOURCE=SYSTEM \
-DARROW_FLIGHT=ON \
-DARROW_FLIGHT_REQUIRE_TLSCREDENTIALSOPTIONS=ON \
-DARROW_FLIGHT_SQL=ON \
-DARROW_GCS=ON \
-DARROW_HDFS=ON \
-DARROW_JEMALLOC=ON \
Expand All @@ -76,20 +71,22 @@ cmake \
-DARROW_PYTHON=ON \
-DARROW_S3=ON \
-DARROW_SIMD_LEVEL=NONE \
-DARROW_SUBSTRAIT=ON \
-DARROW_USE_LD_GOLD=ON \
-DARROW_WITH_BROTLI=ON \
-DARROW_WITH_BZ2=ON \
-DARROW_WITH_LZ4=ON \
-DARROW_WITH_SNAPPY=ON \
-DARROW_WITH_ZLIB=ON \
-DARROW_WITH_ZSTD=ON \
-DBUILD_SHARED_LIBS=ON \
-DCMAKE_BUILD_TYPE=release \
-DCMAKE_CXX_STANDARD=17 \
-DCMAKE_INSTALL_LIBDIR=lib \
-DCMAKE_INSTALL_PREFIX=$PREFIX \
-DLLVM_TOOLS_BINARY_DIR=$PREFIX/bin \
-DPython3_EXECUTABLE=${PYTHON} \
-DProtobuf_PROTOC_EXECUTABLE=$BUILD_PREFIX/bin/protoc \
-GNinja \
${EXTRA_CMAKE_ARGS} \
..

Expand All @@ -102,6 +99,6 @@ if [[ "${target_platform}" == "osx-arm64" ]]; then
sed -ie 's/tpidr_el0/tpidrro_el0/g' mimalloc_ep-prefix/src/mimalloc_ep/include/mimalloc-internal.h
fi

ninja install
cmake --build . --target install --config Release

popd
5 changes: 5 additions & 0 deletions dev/tasks/conda-recipes/arrow-cpp/build-pyarrow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ if [[ "${target_platform}" == "linux-aarch64" ]]; then
export PYARROW_CMAKE_OPTIONS="-DARROW_ARMV8_ARCH=armv8-a ${PYARROW_CMAKE_OPTIONS}"
fi

# Limit number of threads used to avoid hardware oversubscription
if [[ "${target_platform}" == "linux-aarch64" ]] || [[ "${target_platform}" == "linux-ppc64le" ]]; then
export CMAKE_BUILD_PARALLEL_LEVEL=4
fi

cd python

$PYTHON setup.py \
Expand Down
91 changes: 53 additions & 38 deletions dev/tasks/conda-recipes/arrow-cpp/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,15 @@ source:

build:
number: 0
# for cuda on win/linux, building with 9.2 is enough to be compatible with all later versions,
# since arrow is only using libcuda, and not libcudart.
skip: true # [(win or linux) and cuda_compiler_version not in ("None", "10.2")]
# for cuda support, building with one version is enough to be compatible with
# all later versions, since arrow is only using libcuda, and not libcudart.
skip: true # [(win or linux64) and cuda_compiler_version not in ("None", "10.2")]
skip: true # [linux and (aarch64 or ppc64le) and cuda_compiler_version not in ("None", "11.2")]
skip: true # [osx and cuda_compiler_version != "None"]
# CUDA builds on ppc64le currently run out of time on Travis CI.
# It may be possible to move these to cross-compilation, but this will take additional work.
# Hence this is skipped for now until this can be addressed.
skip: true # [linux and ppc64le and cuda_compiler_version != "None"]
run_exports:
- {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }}

Expand All @@ -26,7 +31,7 @@ outputs:
version: {{ build_ext_version }}
build:
number: {{ proc_build_number }}
string: "{{ build_ext }}"
string: {{ build_ext }}
test:
commands:
- exit 0
Expand All @@ -35,7 +40,7 @@ outputs:
license: Apache-2.0
license_file:
- LICENSE.txt
summary: 'A meta-package to select Arrow build variant'
summary: A meta-package to select Arrow build variant

- name: arrow-cpp
script: build-arrow.sh # [not win]
Expand All @@ -47,8 +52,7 @@ outputs:
- {{ pin_subpackage("arrow-cpp", max_pin="x.x.x") }}
ignore_run_exports:
- cudatoolkit
track_features:
{{ "- arrow-cuda" if cuda_enabled else "" }}
track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }}
requirements:
build:
- python # [build_platform != target_platform]
Expand All @@ -58,7 +62,7 @@ outputs:
- gnuconfig # [osx and arm64]
- libprotobuf
- grpc-cpp
# aws-sdk-cpp 1.8.* doesn't work with newer CMake
# aws-sdk-cpp 1.8.* doesn't work with a newer CMake
- cmake <3.22
- autoconf # [unix]
- ninja
Expand All @@ -69,6 +73,9 @@ outputs:
host:
# https://issues.apache.org/jira/browse/ARROW-15141
- aws-sdk-cpp 1.8.186
# abseil is only here to help conda pick the right constraints for pyarrow, see
# https://github.com/conda-forge/arrow-cpp-feedstock/pull/815#issuecomment-1216713245
- libabseil
- boost-cpp >=1.70
- brotli
- bzip2
Expand All @@ -78,13 +85,14 @@ outputs:
- google-cloud-cpp
- grpc-cpp
- libprotobuf
- clangdev 10 # [not (osx and arm64)]
- llvmdev 10 # [not (osx and arm64)]
- clangdev 14 # [not (osx and arm64)]
- llvmdev 14 # [not (osx and arm64)]
- libutf8proc
- lz4-c
- numpy
- orc # [unix]
- openssl
# gandiva depends on openssl
- openssl # [not (osx and arm64)]
- orc # [unix]
- python
- rapidjson
- re2
Expand Down Expand Up @@ -112,6 +120,7 @@ outputs:
# headers
- test -f $PREFIX/include/arrow/api.h # [unix]
- test -f $PREFIX/include/arrow/flight/types.h # [unix]
- test -f $PREFIX/include/arrow/flight/sql/api.h # [unix]
- test -f $PREFIX/include/plasma/client.h # [unix]
- test -f $PREFIX/include/gandiva/engine.h # [unix and not (osx and arm64)]
- test -f $PREFIX/include/parquet/api/reader.h # [unix]
Expand All @@ -120,39 +129,47 @@ outputs:
- if not exist %LIBRARY_INC%\\parquet\\api\\reader.h exit 1 # [win]

# shared
- test -f $PREFIX/lib/libarrow.so # [linux]
- test -f $PREFIX/lib/libarrow_dataset.so # [linux]
- test -f $PREFIX/lib/libarrow_flight.so # [linux]
- test -f $PREFIX/lib/libparquet.so # [linux]
- test -f $PREFIX/lib/libgandiva.so # [linux]
- test -f $PREFIX/lib/libplasma.so # [linux]
- test -f $PREFIX/lib/libarrow.so # [linux]
- test -f $PREFIX/lib/libarrow_dataset.so # [linux]
- test -f $PREFIX/lib/libarrow_flight.so # [linux]
- test -f $PREFIX/lib/libarrow_flight_sql.so # [linux]
- test -f $PREFIX/lib/libarrow_python.so # [linux]
- test -f $PREFIX/lib/libparquet.so # [linux]
- test -f $PREFIX/lib/libgandiva.so # [linux]
- test -f $PREFIX/lib/libplasma.so # [linux]
- test -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT} # [(cuda_compiler_version != "None") and unix]
- test ! -f $PREFIX/lib/libarrow_cuda${SHLIB_EXT} # [(cuda_compiler_version == "None") and unix]
- if not exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1 # [(cuda_compiler_version != "None") and win]
- if exist %PREFIX%\\Library\\bin\\arrow_cuda.dll exit 1 # [(cuda_compiler_version == "None") and win]
- test -f $PREFIX/lib/libarrow.dylib # [osx]
- test -f $PREFIX/lib/libarrow_dataset.dylib # [osx]
- test -f $PREFIX/lib/libarrow_python.dylib # [osx]
- test -f $PREFIX/lib/libgandiva.dylib # [osx and not arm64]
- test -f $PREFIX/lib/libparquet.dylib # [osx]
- test -f $PREFIX/lib/libplasma.dylib # [osx]
- if not exist %PREFIX%\\Library\\bin\\arrow.dll exit 1 # [win]
- if not exist %PREFIX%\\Library\\bin\\arrow_dataset.dll exit 1 # [win]
- if not exist %PREFIX%\\Library\\bin\\arrow_flight.dll exit 1 # [win]
- if not exist %PREFIX%\\Library\\bin\\arrow_python.dll exit 1 # [win]
- if not exist %PREFIX%\\Library\\bin\\parquet.dll exit 1 # [win]
- if not exist %PREFIX%\\Library\\bin\\gandiva.dll exit 1 # [win]

# absence of static libraries
- test ! -f $PREFIX/lib/libarrow.a # [unix]
- test ! -f $PREFIX/lib/libarrow_dataset.a # [unix]
- test ! -f $PREFIX/lib/libarrow_flight.a # [unix]
- test ! -f $PREFIX/lib/libplasma.a # [unix]
- test ! -f $PREFIX/lib/libparquet.a # [unix]
- test ! -f $PREFIX/lib/libgandiva.a # [unix]
- if exist %PREFIX%\\Library\\lib\\arrow_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\arrow_dataset_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\arrow_flight_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\parquet_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\gandiva_static.lib exit 1 # [win]
- test ! -f $PREFIX/lib/libarrow.a # [unix]
- test ! -f $PREFIX/lib/libarrow_dataset.a # [unix]
- test ! -f $PREFIX/lib/libarrow_flight.a # [unix]
- test ! -f $PREFIX/lib/libarrow_flight_sql.a # [unix]
- test ! -f $PREFIX/lib/libarrow_python.a # [unix]
- test ! -f $PREFIX/lib/libplasma.a # [unix]
- test ! -f $PREFIX/lib/libparquet.a # [unix]
- test ! -f $PREFIX/lib/libgandiva.a # [unix]
- if exist %PREFIX%\\Library\\lib\\arrow_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\arrow_dataset_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\arrow_flight_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\arrow_flight_sql_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\arrow_python_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\parquet_static.lib exit 1 # [win]
- if exist %PREFIX%\\Library\\lib\\gandiva_static.lib exit 1 # [win]

- name: pyarrow
script: build-pyarrow.sh # [not win]
Expand All @@ -164,8 +181,7 @@ outputs:
- cudatoolkit
ignore_run_exports_from:
- openssl
track_features:
{{ "- arrow-cuda" if cuda_enabled else "" }}
track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }}
requirements:
build:
- python # [build_platform != target_platform]
Expand All @@ -181,12 +197,10 @@ outputs:
- {{ compiler("cuda") }} # [cuda_compiler_version != "None"]
host:
- {{ pin_subpackage('arrow-cpp', exact=True) }}
- clangdev 10 # [not (osx and arm64)]
- cython
- llvmdev 10 # [not (osx and arm64)]
- numpy
- openssl
- python
- openssl
- setuptools
- setuptools_scm
- six
Expand All @@ -208,6 +222,8 @@ outputs:
summary: Python libraries for Apache Arrow

test:
files:
- test_read_parquet.py
imports:
- pyarrow
- pyarrow.dataset
Expand All @@ -230,6 +246,7 @@ outputs:
- if exist %SP_DIR%/pyarrow/tests/test_array.py exit 1 # [win]
# Need to remove dot from PY_VER; %MYVAR:x=y% replaces "x" in %MYVAR% with "y"
- if not exist %SP_DIR%/pyarrow/_cuda.cp%PY_VER:.=%-win_amd64.pyd exit 1 # [win and cuda_compiler_version != "None"]
- python test_read_parquet.py

- name: pyarrow-tests
script: build-pyarrow.sh # [not win]
Expand All @@ -241,8 +258,7 @@ outputs:
- cudatoolkit
ignore_run_exports_from:
- openssl
track_features:
{{ "- arrow-cuda" if cuda_enabled else "" }}
track_features: {{ "[arrow-cuda]" if cuda_enabled else "" }}
requirements:
build:
- python # [build_platform != target_platform]
Expand All @@ -259,9 +275,7 @@ outputs:
host:
- {{ pin_subpackage('arrow-cpp', exact=True) }}
- {{ pin_subpackage('pyarrow', exact=True) }}
- clangdev 10 # [not (osx and arm64)]
- cython
- llvmdev 10 # [not (osx and arm64)]
- numpy
- python
- openssl
Expand Down Expand Up @@ -310,3 +324,4 @@ extra:
- pearu
- nealrichardson
- jakirkham
- h-vetinari
5 changes: 5 additions & 0 deletions dev/tasks/conda-recipes/arrow-cpp/test_read_parquet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import pyarrow as pa
import pyarrow.parquet as pq

table = pa.Table.from_pydict({"a": [1, 2]})
pq.write_table(table, "test.parquet")

0 comments on commit bc7ce97

Please sign in to comment.