From 668b1a7536dd694fba6fe006a323295f90f1c431 Mon Sep 17 00:00:00 2001 From: victor Date: Wed, 12 Nov 2025 03:18:20 +0000 Subject: [PATCH 01/15] cross compile --- CMakeLists.txt | 5 + chdb/build/build_static_lib_mac_on_linux.sh | 204 ++++++++++++++ chdb/build_mac_on_linux.sh | 278 ++++++++++++++++++++ cmake/darwin/toolchain-aarch64.cmake | 3 + cmake/darwin/toolchain-x86_64.cmake | 3 + cmake/tools.cmake | 2 + 6 files changed, 495 insertions(+) create mode 100755 chdb/build/build_static_lib_mac_on_linux.sh create mode 100755 chdb/build_mac_on_linux.sh diff --git a/CMakeLists.txt b/CMakeLists.txt index f798d19698c..cf0dd320236 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -353,6 +353,11 @@ set (CMAKE_ASM_FLAGS_RELWITHDEBINFO "${CMAKE_ASM_FLAGS_RELWITHDEBINFO} -fPI set (CMAKE_ASM_FLAGS_DEBUG "${CMAKE_ASM_FLAGS_DEBUG} -O${DEBUG_O_LEVEL} ${DEBUG_INFO_FLAGS} ${CMAKE_ASM_FLAGS_ADD}") if (OS_DARWIN) + # Set macOS deployment target if specified + if (CMAKE_OSX_DEPLOYMENT_TARGET) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-macosx_version_min,${CMAKE_OSX_DEPLOYMENT_TARGET}") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-macosx_version_min,${CMAKE_OSX_DEPLOYMENT_TARGET}") + endif() if (USE_PYTHON) set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,-undefined,dynamic_lookup") else() diff --git a/chdb/build/build_static_lib_mac_on_linux.sh b/chdb/build/build_static_lib_mac_on_linux.sh new file mode 100755 index 00000000000..e63878ee3ca --- /dev/null +++ b/chdb/build/build_static_lib_mac_on_linux.sh @@ -0,0 +1,204 @@ +#!/bin/bash + +set -e + +# Cross-compile chdb static library for macOS (x86_64 or arm64) on Linux +# Usage: ./build_static_lib_mac_on_linux.sh [x86_64|arm64] [Release|Debug] + +# Parse arguments +TARGET_ARCH=${1:-x86_64} +build_type=${2:-Release} + +# Validate architecture +if [[ "$TARGET_ARCH" != "x86_64" && "$TARGET_ARCH" != "arm64" ]]; then + echo "Error: Invalid architecture. Use 'x86_64' or 'arm64'" + echo "Usage: $0 [x86_64|arm64] [Release|Debug]" + exit 1 +fi + +echo "Cross-compiling chdb static library for macOS ${TARGET_ARCH} on Linux..." + +# Verify we're running on Linux +if [ "$(uname)" != "Linux" ]; then + echo "Error: This script must be run on Linux" + exit 1 +fi + +# Verify required environment variables +if [ -z "${CCTOOLS:-}" ]; then + echo "Error: CCTOOLS environment variable not set. Please set it to the cctools bin directory." + echo "Example: export CCTOOLS=/path/to/cctools" + exit 1 +fi + +# Set architecture-specific variables +if [ "$TARGET_ARCH" == "x86_64" ]; then + DARWIN_TRIPLE="x86_64-apple-darwin" + CMAKE_ARCH="x86_64" + TOOLCHAIN_FILE="cmake/darwin/toolchain-x86_64.cmake" + BUILD_DIR_SUFFIX="static-lib-darwin-x86_64" + OUTPUT_SUFFIX="darwin-x86_64" + EXAMPLE_DIR_SUFFIX="darwin-x86_64" + MACOS_MIN_VERSION="10.15" + # x86_64 specific: disable AVX for compatibility + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" +else + # arm64 + DARWIN_TRIPLE="aarch64-apple-darwin" + CMAKE_ARCH="aarch64" + TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" + BUILD_DIR_SUFFIX="static-lib-darwin-arm64" + OUTPUT_SUFFIX="darwin-arm64" + EXAMPLE_DIR_SUFFIX="darwin-arm64" + MACOS_MIN_VERSION="11.0" + # ARM64 specific: disable x86 features + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0 -DNO_ARMV81_OR_HIGHER=0" +fi + +# Check if cctools exist for this architecture +if [ ! -f "${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar" ]; then + echo "Error: cctools not found at ${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar" + echo "Tip: You may need to rebuild cctools with support for ${TARGET_ARCH}" + exit 1 +fi + +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +. ${MY_DIR}/../vars.sh + +BUILD_DIR=${PROJ_DIR}/build-${BUILD_DIR_SUFFIX} + +# Set up cross-compilation tools +export CC=clang-19 +export CXX=clang++-19 + +# macOS-specific settings +GLIBC_COMPATIBILITY="-DGLIBC_COMPATIBILITY=0" +UNWIND="-DUSE_UNWIND=0" +HDFS="-DENABLE_HDFS=0 -DENABLE_GSASL_LIBRARY=0 -DENABLE_KRB5=0" +MYSQL="-DENABLE_MYSQL=0" +ICU="-DENABLE_ICU=0" +RUST_FEATURES="-DENABLE_RUST=0" +JEMALLOC="-DENABLE_JEMALLOC=0" +LLVM="-DENABLE_EMBEDDED_COMPILER=0 -DENABLE_DWARF_PARSER=0" + +if [ ! -d $BUILD_DIR ]; then + mkdir $BUILD_DIR +fi + +cd ${BUILD_DIR} + +CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \ + -DENABLE_CLICKHOUSE_KEEPER=0 -DENABLE_CLICKHOUSE_KEEPER_CONVERTER=0 -DENABLE_CLICKHOUSE_LOCAL=1 -DENABLE_CLICKHOUSE_SU=0 -DENABLE_CLICKHOUSE_BENCHMARK=0 \ + -DENABLE_AZURE_BLOB_STORAGE=1 -DENABLE_CLICKHOUSE_COPIER=0 -DENABLE_CLICKHOUSE_DISKS=0 -DENABLE_CLICKHOUSE_FORMAT=0 -DENABLE_CLICKHOUSE_GIT_IMPORT=0 \ + -DENABLE_AWS_S3=1 -DENABLE_HIVE=0 -DENABLE_AVRO=1 \ + -DENABLE_CLICKHOUSE_OBFUSCATOR=0 -DENABLE_CLICKHOUSE_ODBC_BRIDGE=0 -DENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER=0 \ + -DENABLE_KAFKA=1 -DENABLE_LIBPQXX=1 -DENABLE_NATS=0 -DENABLE_AMQPCPP=0 -DENABLE_NURAFT=0 \ + -DENABLE_CASSANDRA=0 -DENABLE_ODBC=0 -DENABLE_NLP=0 \ + -DENABLE_LDAP=0 \ + ${MYSQL} \ + ${HDFS} \ + -DENABLE_LIBRARIES=0 ${RUST_FEATURES} \ + ${GLIBC_COMPATIBILITY} \ + -DENABLE_UTILS=0 ${LLVM} ${UNWIND} \ + ${ICU} -DENABLE_UTF8PROC=1 ${JEMALLOC} \ + -DENABLE_PARQUET=1 -DENABLE_ROCKSDB=1 -DENABLE_SQLITE=1 -DENABLE_VECTORSCAN=1 \ + -DENABLE_PROTOBUF=1 -DENABLE_THRIFT=1 -DENABLE_MSGPACK=1 \ + -DENABLE_BROTLI=1 -DENABLE_H3=1 -DENABLE_CURL=1 \ + -DENABLE_CLICKHOUSE_ALL=0 -DUSE_STATIC_LIBRARIES=1 -DSPLIT_SHARED_LIBRARIES=0 \ + -DENABLE_SIMDJSON=1 -DENABLE_RAPIDJSON=1 \ + ${CPU_FEATURES} \ + -DENABLE_AVX512=0 -DENABLE_AVX512_VBMI=0 \ + -DENABLE_LIBFIU=1 \ + -DCHDB_VERSION=${CHDB_VERSION} \ + -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar \ + -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/${DARWIN_TRIPLE}-install_name_tool \ + -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ranlib \ + -DCMAKE_LINKER:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ld \ + -DLINKER_NAME=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ld \ + -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + " + +echo "Running cmake configuration..." +cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 -DCHDB_STATIC_LIBRARY_BUILD=1 .. + +echo "Building with ninja..." +ninja -d keeprsp + +BINARY=${BUILD_DIR}/programs/clickhouse +rm -f ${BINARY} + +cd ${BUILD_DIR} +ninja -d keeprsp -v > build.log || true + +ccache -s || true + +cd ${MY_DIR} + +# Create static library +echo "Creating static library libchdb.a for macOS..." +python3 create_static_libchdb.py +if [ $? -ne 0 ]; then + echo "Error: Failed to create static library" + exit 1 +fi + +# Prepare cpp-example directory and copy header file +echo "Preparing cpp-example-${EXAMPLE_DIR_SUFFIX} directory..." +if [ ! -d ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX} ]; then + cp -r ${MY_DIR}/cpp-example ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX} +fi + +cd ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX} +cp ${PROJ_DIR}/programs/local/chdb.h . +cp ${MY_DIR}/libchdb.a . +echo "Copied chdb.h and libchdb.a to cpp-example-${EXAMPLE_DIR_SUFFIX} directory" + +echo "Note: Skipping C++ example compilation for cross-compilation." +echo "The example can be compiled on the target macOS ${TARGET_ARCH} system with:" +echo " clang chdb_example.cpp -o chdb_example -mmacosx-version-min=${MACOS_MIN_VERSION} -L. -lchdb -liconv -framework CoreFoundation" + +# For cross-compilation, we'll create a minimal analysis without running the compiled binary +echo "Creating analysis files for cross-compilation..." + +# Copy map file analysis tools but don't run them (since we can't execute macOS binaries on Linux) +echo "Note: Skipping map file analysis for cross-compilation." +echo "Run the following on macOS ${TARGET_ARCH} to create minimal library:" +echo " cd ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX}" +echo " clang chdb_example.cpp -o chdb_example -mmacosx-version-min=${MACOS_MIN_VERSION} -L. -lchdb -liconv -framework CoreFoundation -Wl,-map,chdb_example.map" +echo " cd ${MY_DIR}" +echo " python3 extract_chdb_objects.py --map-file=cpp-example-${EXAMPLE_DIR_SUFFIX}/chdb_example.map" +echo " python3 create_minimal_libchdb.py" + +# For now, we'll use the full libchdb.a as the final output +echo "Using full libchdb.a for cross-compilation (minimal version requires macOS execution)" + +# Strip the libchdb.a if not debug build +if [ ${build_type} == "Debug" ]; then + echo -e "\nDebug build, skip strip" +else + echo -e "\nStrip the libchdb.a:" + # Use macOS-compatible strip command via cctools + if [ -f "${CCTOOLS}/bin/${DARWIN_TRIPLE}-strip" ]; then + ${CCTOOLS}/bin/${DARWIN_TRIPLE}-strip -S -x libchdb.a + else + echo "Warning: macOS strip not found, skipping strip step" + fi +fi + +echo "Note: Skipping Go test for cross-compilation." + +# Copy final library to project root +OUTPUT_NAME="libchdb-${OUTPUT_SUFFIX}.a" +echo "Copying libchdb.a to project root as ${OUTPUT_NAME}..." +cp ${MY_DIR}/libchdb.a ${PROJ_DIR}/${OUTPUT_NAME} +echo "Final ${OUTPUT_NAME} created at ${PROJ_DIR}/${OUTPUT_NAME}" + +# Print final library size +echo "Final ${OUTPUT_NAME} size:" +ls -lh ${PROJ_DIR}/${OUTPUT_NAME} + +echo "Cross-compilation for macOS ${TARGET_ARCH} completed successfully!" +echo "Generated files:" +echo " - ${PROJ_DIR}/${OUTPUT_NAME}" +echo " - ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX}/ (for testing on macOS ${TARGET_ARCH})" \ No newline at end of file diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh new file mode 100755 index 00000000000..0935c2ba975 --- /dev/null +++ b/chdb/build_mac_on_linux.sh @@ -0,0 +1,278 @@ +#!/bin/bash + +set -e + +# Cross-compile chdb for macOS (x86_64 or arm64) on Linux +# Usage: ./build_mac_on_linux_universal.sh [x86_64|arm64] [Release|Debug] + +# Parse arguments +TARGET_ARCH=${1:-x86_64} +build_type=${2:-Release} + +# Validate architecture +if [[ "$TARGET_ARCH" != "x86_64" && "$TARGET_ARCH" != "arm64" ]]; then + echo "Error: Invalid architecture. Use 'x86_64' or 'arm64'" + echo "Usage: $0 [x86_64|arm64] [Release|Debug]" + exit 1 +fi + +echo "Cross-compiling chdb for macOS ${TARGET_ARCH} on Linux..." + +# Verify we're running on Linux +if [ "$(uname)" != "Linux" ]; then + echo "Error: This script must be run on Linux" + exit 1 +fi + +# Verify required environment variables +if [ -z "${CCTOOLS:-}" ]; then + echo "Error: CCTOOLS environment variable not set. Please set it to the cctools bin directory." + echo "Example: export CCTOOLS=/path/to/cctools" + exit 1 +fi + +# Set architecture-specific variables +if [ "$TARGET_ARCH" == "x86_64" ]; then + DARWIN_TRIPLE="x86_64-apple-darwin" + CMAKE_ARCH="x86_64" + TOOLCHAIN_FILE="cmake/darwin/toolchain-x86_64.cmake" + BUILD_DIR_SUFFIX="darwin-x86_64" + # x86_64 specific: disable AVX for compatibility + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" +else + # arm64 + DARWIN_TRIPLE="aarch64-apple-darwin" + CMAKE_ARCH="aarch64" + TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" + BUILD_DIR_SUFFIX="darwin-arm64" + # ARM64 specific: disable x86 features, may need to disable some ARM features for compatibility + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0 -DNO_ARMV81_OR_HIGHER=0" +fi + +# Check if cctools exist for this architecture +if [ ! -f "${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar" ]; then + echo "Error: cctools not found at ${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar" + echo "Tip: You may need to rebuild cctools with support for ${TARGET_ARCH}" + exit 1 +fi + +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" + +. ${DIR}/vars.sh + +BUILD_DIR=${PROJ_DIR}/build-${BUILD_DIR_SUFFIX} + +# Set up cross-compilation tools +export CC=clang-19 +export CXX=clang++-19 + +# macOS-specific settings +GLIBC_COMPATIBILITY="-DGLIBC_COMPATIBILITY=0" +UNWIND="-DUSE_UNWIND=0" +JEMALLOC="-DENABLE_JEMALLOC=0" +PYINIT_ENTRY="-Wl,-exported_symbol,_PyInit_${CHDB_PY_MOD}" +HDFS="-DENABLE_HDFS=0 -DENABLE_GSASL_LIBRARY=0 -DENABLE_KRB5=0" +MYSQL="-DENABLE_MYSQL=0" +ICU="-DENABLE_ICU=0" +SED_INPLACE="sed -i" +RUST_FEATURES="-DENABLE_RUST=0" + +# Disable embedded compiler for cross-compilation +LLVM="-DENABLE_EMBEDDED_COMPILER=0 -DENABLE_DWARF_PARSER=0" + +if [ ! -d $BUILD_DIR ]; then + mkdir $BUILD_DIR +fi + +cd ${BUILD_DIR} + +CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \ + -DENABLE_CLICKHOUSE_KEEPER=0 -DENABLE_CLICKHOUSE_KEEPER_CONVERTER=0 -DENABLE_CLICKHOUSE_LOCAL=1 -DENABLE_CLICKHOUSE_SU=0 -DENABLE_CLICKHOUSE_BENCHMARK=0 \ + -DENABLE_AZURE_BLOB_STORAGE=1 -DENABLE_CLICKHOUSE_COPIER=0 -DENABLE_CLICKHOUSE_DISKS=0 -DENABLE_CLICKHOUSE_FORMAT=0 -DENABLE_CLICKHOUSE_GIT_IMPORT=0 \ + -DENABLE_AWS_S3=1 -DENABLE_HIVE=0 -DENABLE_AVRO=1 \ + -DENABLE_CLICKHOUSE_OBFUSCATOR=0 -DENABLE_CLICKHOUSE_ODBC_BRIDGE=0 -DENABLE_CLICKHOUSE_STATIC_FILES_DISK_UPLOADER=0 \ + -DENABLE_KAFKA=1 -DENABLE_LIBPQXX=1 -DENABLE_NATS=0 -DENABLE_AMQPCPP=0 -DENABLE_NURAFT=0 \ + -DENABLE_CASSANDRA=0 -DENABLE_ODBC=0 -DENABLE_NLP=0 \ + -DENABLE_LDAP=0 \ + ${MYSQL} \ + ${HDFS} \ + -DENABLE_LIBRARIES=0 ${RUST_FEATURES} \ + ${GLIBC_COMPATIBILITY} \ + -DENABLE_UTILS=0 ${LLVM} ${UNWIND} \ + ${ICU} -DENABLE_UTF8PROC=1 ${JEMALLOC} \ + -DENABLE_PARQUET=1 -DENABLE_ROCKSDB=1 -DENABLE_SQLITE=1 -DENABLE_VECTORSCAN=1 \ + -DENABLE_PROTOBUF=1 -DENABLE_THRIFT=1 -DENABLE_MSGPACK=1 \ + -DENABLE_BROTLI=1 -DENABLE_H3=1 -DENABLE_CURL=1 \ + -DENABLE_CLICKHOUSE_ALL=0 -DUSE_STATIC_LIBRARIES=1 -DSPLIT_SHARED_LIBRARIES=0 \ + -DENABLE_SIMDJSON=1 -DENABLE_RAPIDJSON=1 \ + ${CPU_FEATURES} \ + -DENABLE_AVX512=0 -DENABLE_AVX512_VBMI=0 \ + -DENABLE_LIBFIU=1 \ + -DCHDB_VERSION=${CHDB_VERSION} \ + -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar \ + -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/${DARWIN_TRIPLE}-install_name_tool \ + -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ranlib \ + -DCMAKE_LINKER:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ld \ + -DLINKER_NAME=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ld \ + -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ + " + +LIBCHDB_SO="libchdb.so" + +# Build libchdb.so +echo "Running cmake configuration..." +cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 .. + +echo "Building with ninja..." +ninja -d keeprsp + +BINARY=${BUILD_DIR}/programs/clickhouse +echo -e "\nBINARY: ${BINARY}" +ls -lh ${BINARY} +echo -e "\nfile info of ${BINARY}" +file ${BINARY} +rm -f ${BINARY} + +cd ${BUILD_DIR} +ninja -d keeprsp -v > build.log || true +USING_RESPONSE_FILE=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log | grep '@CMakeFiles/clickhouse.rsp' || true) + +if [ ! "${USING_RESPONSE_FILE}" == "" ]; then + if [ -f CMakeFiles/clickhouse.rsp ]; then + cp -a CMakeFiles/clickhouse.rsp CMakeFiles/libchdb.rsp + else + echo "CMakeFiles/clickhouse.rsp not found" + exit 1 + fi +fi + +LIBCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \ + | sed "s/-o programs\/clickhouse/-fPIC -shared -o ${LIBCHDB_SO}/" \ + | sed 's/^[^&]*&& //' | sed 's/&&.*//' \ + | sed 's/ -Wl,-undefined,error/ -Wl,-undefined,dynamic_lookup/g' \ + | sed 's/ -Xlinker --no-undefined//g' \ + | sed 's/@CMakeFiles\/clickhouse.rsp/@CMakeFiles\/libchdb.rsp/g' \ + ) + +# Generate the command to generate libchdb.so +LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/ '${CHDB_PY_MODULE}'/ '${LIBCHDB_SO}'/g') + +if [ ! "${USING_RESPONSE_FILE}" == "" ]; then + ${SED_INPLACE} 's/ '${CHDB_PY_MODULE}'/ '${LIBCHDB_SO}'/g' CMakeFiles/libchdb.rsp +fi + +# For macOS, replace PyInit entry point with exported symbols for libchdb +LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/ '${PYINIT_ENTRY}'/ -Wl,-exported_symbol,_query_stable -Wl,-exported_symbol,_free_result -Wl,-exported_symbol,_query_stable_v2 -Wl,-exported_symbol,_free_result_v2/g') + +LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed 's/@CMakeFiles\/clickhouse.rsp/@CMakeFiles\/libchdb.rsp/g') + +# Save the command to a file for debug +echo ${LIBCHDB_CMD} > libchdb_cmd.sh + +# Build libchdb.so +echo "Building libchdb.so..." +${LIBCHDB_CMD} + +LIBCHDB_DIR=${BUILD_DIR}/ +LIBCHDB=${LIBCHDB_DIR}/${LIBCHDB_SO} +ls -lh ${LIBCHDB} + +# Build chdb python module +py_version="3.9" +current_py_version=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") +if [ "$current_py_version" != "$py_version" ]; then + echo "Error: Current Python version is $current_py_version, but required version is $py_version" + echo "Please switch to Python $py_version using: pyenv shell $py_version" + exit 1 +fi +cmake ${CMAKE_ARGS} -DENABLE_PYTHON=1 -DPYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION=${py_version} .. +ninja -d keeprsp || true + +# Delete the binary and run ninja -v again to capture the command +/bin/rm -f ${BINARY} +cd ${BUILD_DIR} +ninja -d keeprsp -v > build.log || true + +USING_RESPONSE_FILE=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log | grep '@CMakeFiles/clickhouse.rsp' || true) + +if [ ! "${USING_RESPONSE_FILE}" == "" ]; then + if [ -f CMakeFiles/clickhouse.rsp ]; then + cp -a CMakeFiles/clickhouse.rsp CMakeFiles/pychdb.rsp + else + echo "CMakeFiles/clickhouse.rsp not found" + exit 1 + fi +fi + +# Extract the command to generate CHDB_PY_MODULE +PYCHDB_CMD=$(grep -m 1 'clang++.*-o programs/clickhouse .*' build.log \ + | sed "s/-o programs\/clickhouse/-fPIC -Wl,-undefined,dynamic_lookup -shared ${PYINIT_ENTRY} -o ${CHDB_PY_MODULE}/" \ + | sed 's/^[^&]*&& //' | sed 's/&&.*//' \ + | sed 's/ -Wl,-undefined,error/ -Wl,-undefined,dynamic_lookup/g' \ + | sed 's/ -Xlinker --no-undefined//g' \ + | sed 's/@CMakeFiles\/clickhouse.rsp/@CMakeFiles\/pychdb.rsp/g' \ + ) + +# For macOS, set rpath +PYCHDB_CMD=$(echo ${PYCHDB_CMD} | sed 's|-Wl,-rpath,/[^[:space:]]*/pybind11-cmake|-Wl,-rpath,@loader_path|g') + +# Save the command to a file for debug +echo ${PYCHDB_CMD} > pychdb_cmd.sh + +echo "Building Python module..." +${PYCHDB_CMD} + +ls -lh ${CHDB_PY_MODULE} + +## Check all the so files +LIBCHDB_DIR=${BUILD_DIR}/ + +PYCHDB=${LIBCHDB_DIR}/${CHDB_PY_MODULE} +LIBCHDB=${LIBCHDB_DIR}/${LIBCHDB_SO} + +if [ ${build_type} == "Debug" ]; then + echo -e "\nDebug build, skip strip" +else + echo -e "\nStrip the binary:" + ${STRIP} --remove-section=.comment --remove-section=.note ${PYCHDB} + ${STRIP} --remove-section=.comment --remove-section=.note ${LIBCHDB} +fi + +echo -e "\nPYCHDB: ${PYCHDB}" +ls -lh ${PYCHDB} +echo -e "\nLIBCHDB: ${LIBCHDB}" +ls -lh ${LIBCHDB} +echo -e "\nfile info of ${PYCHDB}" +file ${PYCHDB} +echo -e "\nfile info of ${LIBCHDB}" +file ${LIBCHDB} + +rm -f ${CHDB_DIR}/*.so +cp -a ${PYCHDB} ${CHDB_DIR}/${CHDB_PY_MODULE} +cp -a ${LIBCHDB} ${PROJ_DIR}/${LIBCHDB_SO} + +echo -e "\nSymbols:" +echo -e "\nPyInit in PYCHDB: ${PYCHDB}" +${NM} ${PYCHDB} | grep PyInit || true +echo -e "\nPyInit in LIBCHDB: ${LIBCHDB}" +${NM} ${LIBCHDB} | grep PyInit || echo "PyInit not found in ${LIBCHDB}, it's OK" +echo -e "\nquery_stable in PYCHDB: ${PYCHDB}" +${NM} ${PYCHDB} | grep query_stable || true +echo -e "\nquery_stable in LIBCHDB: ${LIBCHDB}" +${NM} ${LIBCHDB} | grep query_stable || true + +echo -e "\nAfter copy:" +cd ${PROJ_DIR} && pwd + +ccache -s || true + +# Skip pybind11 libraries build for cross-compilation +echo "Skipping pybind11 libraries build for cross-compilation" +echo "These should be built separately on the target macOS system using:" +echo " CMAKE_ARGS=\"\${CMAKE_ARGS}\" bash \${DIR}/build_pybind11.sh --all" + +echo -e "\nCross-compilation for macOS ${TARGET_ARCH} completed successfully!" +echo -e "Generated files:" +echo -e " - ${PROJ_DIR}/${LIBCHDB_SO}" +echo -e " - ${CHDB_DIR}/${CHDB_PY_MODULE}" +echo -e "\nBuild directory: ${BUILD_DIR}" diff --git a/cmake/darwin/toolchain-aarch64.cmake b/cmake/darwin/toolchain-aarch64.cmake index 178153c1098..0243006c184 100644 --- a/cmake/darwin/toolchain-aarch64.cmake +++ b/cmake/darwin/toolchain-aarch64.cmake @@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "aarch64-apple-darwin") set (CMAKE_ASM_COMPILER_TARGET "aarch64-apple-darwin") set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-aarch64") +# Set minimum macOS deployment target to 11.0 (Big Sur - first version with Apple Silicon support) +set (CMAKE_OSX_DEPLOYMENT_TARGET "11.0") + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake diff --git a/cmake/darwin/toolchain-x86_64.cmake b/cmake/darwin/toolchain-x86_64.cmake index b9cbe72a2b6..ab060d6aa28 100644 --- a/cmake/darwin/toolchain-x86_64.cmake +++ b/cmake/darwin/toolchain-x86_64.cmake @@ -8,4 +8,7 @@ set (CMAKE_CXX_COMPILER_TARGET "x86_64-apple-darwin") set (CMAKE_ASM_COMPILER_TARGET "x86_64-apple-darwin") set (CMAKE_OSX_SYSROOT "${CMAKE_CURRENT_LIST_DIR}/../toolchain/darwin-x86_64") +# Set minimum macOS deployment target to 10.15 (Catalina) +set (CMAKE_OSX_DEPLOYMENT_TARGET "10.15") + set (CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) # disable linkage check - it doesn't work in CMake diff --git a/cmake/tools.cmake b/cmake/tools.cmake index 13febb2d1f2..23e7d2d12e8 100644 --- a/cmake/tools.cmake +++ b/cmake/tools.cmake @@ -59,6 +59,8 @@ if (LINKER_NAME) message (FATAL_ERROR "Using linker ${LINKER_NAME} but can't find its path.") endif () set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} --ld-path=${LLD_PATH}") + set (CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} --ld-path=${LLD_PATH}") + set (CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} --ld-path=${LLD_PATH}") endif () if (LINKER_NAME) From 99c83d241c6b1a3f0d48a260b458babd49cc77ae Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Fri, 28 Nov 2025 16:51:08 +0800 Subject: [PATCH 02/15] chore: install cctools --- chdb/build_mac_on_linux.sh | 68 +++++++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 19 deletions(-) diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index 0935c2ba975..3e30a4c1956 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -24,20 +24,12 @@ if [ "$(uname)" != "Linux" ]; then exit 1 fi -# Verify required environment variables -if [ -z "${CCTOOLS:-}" ]; then - echo "Error: CCTOOLS environment variable not set. Please set it to the cctools bin directory." - echo "Example: export CCTOOLS=/path/to/cctools" - exit 1 -fi - -# Set architecture-specific variables +# Set architecture-specific variables first if [ "$TARGET_ARCH" == "x86_64" ]; then DARWIN_TRIPLE="x86_64-apple-darwin" CMAKE_ARCH="x86_64" TOOLCHAIN_FILE="cmake/darwin/toolchain-x86_64.cmake" BUILD_DIR_SUFFIX="darwin-x86_64" - # x86_64 specific: disable AVX for compatibility CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" else # arm64 @@ -45,17 +37,60 @@ else CMAKE_ARCH="aarch64" TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" BUILD_DIR_SUFFIX="darwin-arm64" - # ARM64 specific: disable x86 features, may need to disable some ARM features for compatibility CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0 -DNO_ARMV81_OR_HIGHER=0" fi -# Check if cctools exist for this architecture -if [ ! -f "${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar" ]; then - echo "Error: cctools not found at ${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar" - echo "Tip: You may need to rebuild cctools with support for ${TARGET_ARCH}" +# Install cctools if not already installed +CCTOOLS_INSTALL_DIR="${HOME}/cctools" +CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" + +if [ -z "${CCTOOLS:-}" ]; then + echo "CCTOOLS environment variable not set, checking for installation..." + + # Check if cctools is already installed + if [ -f "${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" ]; then + echo "Found existing cctools installation at ${CCTOOLS_INSTALL_DIR}" + export CCTOOLS="${CCTOOLS_BIN}" + else + echo "cctools not found, installing..." + + mkdir ~/cctools + export CCTOOLS=$(cd ~/cctools && pwd) + cd ${CCTOOLS} + + git clone https://github.com/tpoechtrager/apple-libtapi.git + cd apple-libtapi + git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 + INSTALLPREFIX=${CCTOOLS} ./build.sh + ./install.sh + cd .. + + git clone https://github.com/chdb-io/cctools-port.git + cd cctools-port/cctools + + # Set cctools target based on architecture + if [ "$TARGET_ARCH" == "x86_64" ]; then + CCTOOLS_TARGET="x86_64-apple-darwin" + else + CCTOOLS_TARGET="aarch64-apple-darwin" + fi + + ./configure --prefix=$(readlink -f ${CCTOOLS}) --with-libtapi=$(readlink -f ${CCTOOLS}) --target=${CCTOOLS_TARGET} + make install + fi +else + echo "Using CCTOOLS from environment variable: ${CCTOOLS}" +fi + +# Verify cctools installation +if [ ! -f "${CCTOOLS}/${DARWIN_TRIPLE}-ld" ]; then + echo "Error: cctools linker not found at ${CCTOOLS}/${DARWIN_TRIPLE}-ld" + echo "Please verify cctools installation or set CCTOOLS environment variable correctly" exit 1 fi +echo "cctools verified: ${CCTOOLS}/${DARWIN_TRIPLE}-ld" + DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" . ${DIR}/vars.sh @@ -109,11 +144,6 @@ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_AVX512=0 -DENABLE_AVX512_VBMI=0 \ -DENABLE_LIBFIU=1 \ -DCHDB_VERSION=${CHDB_VERSION} \ - -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar \ - -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/${DARWIN_TRIPLE}-install_name_tool \ - -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ranlib \ - -DCMAKE_LINKER:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ld \ - -DLINKER_NAME=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ld \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ " From b727ca004f217118fd47380c1c9d5603813e8953 Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Mon, 1 Dec 2025 18:43:26 +0800 Subject: [PATCH 03/15] chore: cross compiling supports building with python mode --- chdb/build.sh | 57 ----------------------------------- chdb/build_mac_on_linux.sh | 50 +++++++++++++++++------------- chdb/vars.sh | 4 +++ programs/local/CMakeLists.txt | 11 +++---- 4 files changed, 38 insertions(+), 84 deletions(-) diff --git a/chdb/build.sh b/chdb/build.sh index da078bff541..b1321882ad8 100755 --- a/chdb/build.sh +++ b/chdb/build.sh @@ -112,63 +112,6 @@ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DCHDB_VERSION=${CHDB_VERSION} \ " -# # Generate libchdb.so linkage command: -# # 1. Use ar to delete the LocalChdb.cpp.o from libclickhouse-local-lib.a -# # `ar d programs/local/libclickhouse-local-lib.a LocalChdb.cpp.o` -# # 2. Change the entry point from `PyInit_chdb` to `query_stable` -# # `-Wl,-ePyInit_chdb` to `-Wl,-equery_stable` on Linux -# # `-Wl,-exported_symbol,_PyInit_${CHDB_PY_MOD}` to -# # `-Wl,-exported_symbol,_query_stable -Wl,-exported_symbol,_free_result` on Darwin -# # 3. Change the output file name from `_chdb.cpython-xx-x86_64-linux-gnu.s` to `libchdb.so` -# # `-o _chdb.cpython-39-x86_64-linux-gnu.so` to `-o libchdb.so` -# # 4. Write the command to a file for debug -# # 5. Run the command to generate libchdb.so - -# # Remove object from archive and save it to a new archive like: -# # path/to/oldname.a -> path/to/oldname-nopy.a -# remove_obj_from_archive() { -# local archive=$1 -# local obj=$2 -# local new_archive=$(echo ${archive} | sed 's/\.a$/-nopy.a/') -# cp -a ${archive} ${new_archive} -# ${AR} d ${new_archive} ${obj} -# echo "Old archive: ${archive}" -# ls -l ${archive} -# echo "New archive: ${new_archive}" -# ls -l ${new_archive} -# local oldfile=$(basename ${archive}) -# local newfile=$(basename ${new_archive}) -# LIBCHDB_CMD=$(echo ${LIBCHDB_CMD} | sed "s/${oldfile}/${newfile}/g") -# ${SED_INPLACE} "s/${oldfile}/${newfile}/g" CMakeFiles/libchdb.rsp -# } - - -# # Step 1, 2, 3: -# # Backup the libclickhouse-local-lib.a and restore it after ar d -# # LIBCHDB_SO="libchdb.so" -# # CLEAN_CHDB_A="libclickhouse-local-chdb.a" -# # cp -a ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a.bak -# # ${AR} d ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a LocalChdb.cpp.o -# # mv ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a ${BUILD_DIR}/programs/local/${CLEAN_CHDB_A} -# # mv ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a.bak ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a -# # ls -l ${BUILD_DIR}/programs/local/ -# LIBCHDB_SO="libchdb.so" -# LIBCHDB_CMD=${PYCHDB_CMD} -# if [ "${build_type}" == "Debug" ]; then -# remove_obj_from_archive ${BUILD_DIR}/programs/local/libclickhouse-local-libd.a LocalChdb.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libdbmsd.a StoragePython.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libdbmsd.a PythonSource.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libclickhouse_common_iod.a PythonUtils.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/TableFunctions/libclickhouse_table_functionsd.a TableFunctionPython.cpp.o -# else -# remove_obj_from_archive ${BUILD_DIR}/programs/local/libclickhouse-local-lib.a LocalChdb.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libdbms.a StoragePython.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libdbms.a PythonSource.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/libclickhouse_common_io.a PythonUtils.cpp.o -# remove_obj_from_archive ${BUILD_DIR}/src/TableFunctions/libclickhouse_table_functions.a TableFunctionPython.cpp.o -# fi - - LIBCHDB_SO="libchdb.so" # Build libchdb.so cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 .. diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index 3e30a4c1956..e0ddafa4fbf 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -8,6 +8,8 @@ set -e # Parse arguments TARGET_ARCH=${1:-x86_64} build_type=${2:-Release} +DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +. ${DIR}/vars.sh cross-compile # Validate architecture if [[ "$TARGET_ARCH" != "x86_64" && "$TARGET_ARCH" != "arm64" ]]; then @@ -37,7 +39,7 @@ else CMAKE_ARCH="aarch64" TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" BUILD_DIR_SUFFIX="darwin-arm64" - CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0 -DNO_ARMV81_OR_HIGHER=0" + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" fi # Install cctools if not already installed @@ -91,17 +93,24 @@ fi echo "cctools verified: ${CCTOOLS}/${DARWIN_TRIPLE}-ld" -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# Override tools with cross-compilation versions from cctools +export STRIP="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-strip" +export AR="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" +export NM="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-nm" +export LDD="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool -L" -. ${DIR}/vars.sh +echo "Using cross-compilation tools:" +echo " STRIP: ${STRIP}" +echo " AR: ${AR}" +echo " NM: ${NM}" +echo " LDD: ${LDD}" BUILD_DIR=${PROJ_DIR}/build-${BUILD_DIR_SUFFIX} -# Set up cross-compilation tools export CC=clang-19 export CXX=clang++-19 -# macOS-specific settings +RUST_FEATURES="-DENABLE_RUST=0" GLIBC_COMPATIBILITY="-DGLIBC_COMPATIBILITY=0" UNWIND="-DUSE_UNWIND=0" JEMALLOC="-DENABLE_JEMALLOC=0" @@ -110,10 +119,11 @@ HDFS="-DENABLE_HDFS=0 -DENABLE_GSASL_LIBRARY=0 -DENABLE_KRB5=0" MYSQL="-DENABLE_MYSQL=0" ICU="-DENABLE_ICU=0" SED_INPLACE="sed -i" -RUST_FEATURES="-DENABLE_RUST=0" - -# Disable embedded compiler for cross-compilation LLVM="-DENABLE_EMBEDDED_COMPILER=0 -DENABLE_DWARF_PARSER=0" +CMAKE_AR_FILEPATH="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" +CMAKE_INSTALL_NAME_TOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-install_name_tool" +CMAKE_RANLIB_FILEPATH="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ranlib" +CMAKE_LINKER_NAME="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" if [ ! -d $BUILD_DIR ]; then mkdir $BUILD_DIR @@ -121,7 +131,12 @@ fi cd ${BUILD_DIR} -CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \ +CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_AR:FILEPATH=${CMAKE_AR_FILEPATH} \ + -DCMAKE_INSTALL_NAME_TOOL=${CMAKE_INSTALL_NAME_TOOL} \ + -DCMAKE_RANLIB:FILEPATH=${CMAKE_RANLIB_FILEPATH} \ + -DLINKER_NAME=${CMAKE_LINKER_NAME} \ + -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \ -DENABLE_CLICKHOUSE_KEEPER=0 -DENABLE_CLICKHOUSE_KEEPER_CONVERTER=0 -DENABLE_CLICKHOUSE_LOCAL=1 -DENABLE_CLICKHOUSE_SU=0 -DENABLE_CLICKHOUSE_BENCHMARK=0 \ -DENABLE_AZURE_BLOB_STORAGE=1 -DENABLE_CLICKHOUSE_COPIER=0 -DENABLE_CLICKHOUSE_DISKS=0 -DENABLE_CLICKHOUSE_FORMAT=0 -DENABLE_CLICKHOUSE_GIT_IMPORT=0 \ -DENABLE_AWS_S3=1 -DENABLE_HIVE=0 -DENABLE_AVRO=1 \ @@ -150,10 +165,8 @@ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 LIBCHDB_SO="libchdb.so" # Build libchdb.so -echo "Running cmake configuration..." +echo "Executing cmake..." cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 .. - -echo "Building with ninja..." ninja -d keeprsp BINARY=${BUILD_DIR}/programs/clickhouse @@ -208,18 +221,13 @@ LIBCHDB=${LIBCHDB_DIR}/${LIBCHDB_SO} ls -lh ${LIBCHDB} # Build chdb python module -py_version="3.9" -current_py_version=$(python -c "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}')") -if [ "$current_py_version" != "$py_version" ]; then - echo "Error: Current Python version is $current_py_version, but required version is $py_version" - echo "Please switch to Python $py_version using: pyenv shell $py_version" - exit 1 -fi -cmake ${CMAKE_ARGS} -DENABLE_PYTHON=1 -DPYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION=${py_version} .. + +CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" +cmake ${CMAKE_ARGS} -DENABLE_PYTHON=1 -DCHDB_CROSSCOMPILING=1 -DCHDB_PYTHON_INCLUDE_DIR_PREFIX=${CHDB_PYTHON_INCLUDE_DIR_PREFIX} .. ninja -d keeprsp || true # Delete the binary and run ninja -v again to capture the command -/bin/rm -f ${BINARY} +rm -f ${BINARY} cd ${BUILD_DIR} ninja -d keeprsp -v > build.log || true diff --git a/chdb/vars.sh b/chdb/vars.sh index b1b2100a5b6..6513e8e3e20 100755 --- a/chdb/vars.sh +++ b/chdb/vars.sh @@ -9,6 +9,10 @@ pushd ${PROJ_DIR} CHDB_VERSION=$(python3 -c 'import setup; print(setup.get_latest_git_tag())') popd +if [ "$1" == "cross-compile" ]; then + return +fi + # try to use largest llvm-strip version # if none of them are found, use llvm-strip or strip if [ -z "$STRIP" ]; then diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index 2cade0b59be..15d78c8036c 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -74,14 +74,13 @@ if (USE_PYTHON) ) endforeach(_file) - # get python version, something like python3.x - execute_process(COMMAND python3 -c "import sys; print('python3.'+str(sys.version_info[1]))" - OUTPUT_VARIABLE PYTHON_VERSION - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - # remove all warning, because pybind11 will generate a lot of warning if (OS_LINUX) + # get python version, something like python3.x + execute_process(COMMAND python3 -c "import sys; print('python3.'+str(sys.version_info[1]))" + OUTPUT_VARIABLE PYTHON_VERSION + OUTPUT_STRIP_TRAILING_WHITESPACE + ) # pybind11 will try to find x86_64-linux-gnu/${PYTHON_VERSION}/pyconfig.h # use -idirafter to make it find the right one and not polute the include path # set_source_files_properties(LocalChdb.cpp PROPERTIES COMPILE_FLAGS From 8163ceee459828033a28513531a60923f68ac494 Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Mon, 1 Dec 2025 18:45:49 +0800 Subject: [PATCH 04/15] chore: add PYBIND11_NOPYTHON flag --- chdb/build_mac_on_linux.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index e0ddafa4fbf..702c3c3d4dd 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -223,7 +223,7 @@ ls -lh ${LIBCHDB} # Build chdb python module CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" -cmake ${CMAKE_ARGS} -DENABLE_PYTHON=1 -DCHDB_CROSSCOMPILING=1 -DCHDB_PYTHON_INCLUDE_DIR_PREFIX=${CHDB_PYTHON_INCLUDE_DIR_PREFIX} .. +cmake ${CMAKE_ARGS} -DENABLE_PYTHON=1 -DCHDB_CROSSCOMPILING=1 -DCHDB_PYTHON_INCLUDE_DIR_PREFIX=${CHDB_PYTHON_INCLUDE_DIR_PREFIX} -DPYBIND11_NOPYTHON=ON .. ninja -d keeprsp || true # Delete the binary and run ninja -v again to capture the command From a81f71ab14e8e4f1f2365f07c91a8894db88869f Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Tue, 2 Dec 2025 03:04:09 +0800 Subject: [PATCH 05/15] chore: update cmake --- chdb/build/download_python_headers.sh | 113 ++++++++++++++++++++++++++ chdb/build/install_cctools.sh | 64 +++++++++++++++ chdb/build_mac_on_linux.sh | 79 +++++++----------- chdb/build_pybind11.sh | 93 +++++++++++++-------- contrib/pybind11-cmake/CMakeLists.txt | 28 ++++++- programs/local/CMakeLists.txt | 16 ++-- 6 files changed, 301 insertions(+), 92 deletions(-) create mode 100644 chdb/build/download_python_headers.sh create mode 100644 chdb/build/install_cctools.sh diff --git a/chdb/build/download_python_headers.sh b/chdb/build/download_python_headers.sh new file mode 100644 index 00000000000..aaeb543a650 --- /dev/null +++ b/chdb/build/download_python_headers.sh @@ -0,0 +1,113 @@ +#!/bin/bash + +set -e + +TARGET_DIR="${HOME}/python_include" +TEMP_DIR="${TARGET_DIR}/tmp" + +VERSIONS=( + "3.8.10:3.8:3.8" + "3.9.13:3.9:3.9" + "3.10.11:3.10:3.10" + "3.11.9:3.11:3.11" + "3.12.10:3.12:3.12" + "3.13.9:3.13:3.13" + "3.14.0:3.14:3.14" +) + +cleanup() { + rm -rf "$TEMP_DIR" +} +trap cleanup EXIT + +mkdir -p "$TARGET_DIR" +mkdir -p "$TEMP_DIR" + +for entry in "${VERSIONS[@]}"; do + IFS=':' read -r FULL_VER SUBDIR MINOR_VER <<< "$entry" + + echo "==========================================" + echo "Processing Python ${FULL_VER}..." + echo "==========================================" + + # 检查目标目录是否已存在 + DEST_DIR="${TARGET_DIR}/${SUBDIR}" + if [ -d "$DEST_DIR" ] && [ -f "${DEST_DIR}/Python.h" ]; then + echo "✓ Python ${FULL_VER} headers already installed at ${DEST_DIR}" + echo " Skipping..." + continue + fi + + WORK_DIR="${TEMP_DIR}/${SUBDIR}" + mkdir -p "$WORK_DIR" + cd "$WORK_DIR" + + PKG_URL="https://www.python.org/ftp/python/${FULL_VER}/python-${FULL_VER}-macos11.pkg" + + echo "Downloading: $PKG_URL" + if wget -q --spider "$PKG_URL" 2>/dev/null; then + wget -q --show-progress -O python.pkg "$PKG_URL" + else + echo "ERROR: Failed to download Python ${FULL_VER}" + exit 1 + fi + + echo "Extracting pkg with 7z..." + 7z x -y python.pkg > /dev/null + + PAYLOAD_DIR="" + for dir in Python_Framework.pkg PythonFramework-*.pkg; do + if [ -d "$dir" ] || [ -f "$dir/Payload" ]; then + PAYLOAD_DIR="$dir" + break + fi + done + + if [ -z "$PAYLOAD_DIR" ]; then + PAYLOAD_DIR=$(find . -name "Payload" -type f | head -1 | xargs dirname) + fi + + if [ -z "$PAYLOAD_DIR" ] || [ ! -f "${PAYLOAD_DIR}/Payload" ]; then + echo "ERROR: Cannot find Payload for Python ${FULL_VER}" + exit 1 + fi + + echo "Extracting Payload from ${PAYLOAD_DIR}..." + cd "$PAYLOAD_DIR" + 7z x -y Payload -so 2>/dev/null | cpio -id 2>/dev/null || true + + HEADER_SRC="" + for path in \ + "Versions/${MINOR_VER}/Headers" \ + "Headers" + do + if [ -d "$path" ] && [ -f "$path/Python.h" ]; then + HEADER_SRC="$path" + break + fi + done + + if [ -z "$HEADER_SRC" ]; then + PYTHON_H=$(find . -name "Python.h" -type f | head -1) + if [ -n "$PYTHON_H" ]; then + HEADER_SRC=$(dirname "$PYTHON_H") + fi + fi + + if [ -z "$HEADER_SRC" ] || [ ! -f "${HEADER_SRC}/Python.h" ]; then + echo "ERROR: Cannot find headers for Python ${FULL_VER}" + exit 1 + fi + + mkdir -p "$DEST_DIR" + cp -r "${HEADER_SRC}/"* "$DEST_DIR/" + + echo "✓ Python ${FULL_VER} headers installed to ${DEST_DIR}" + echo " Files: $(ls "$DEST_DIR" | wc -l | tr -d ' ') items" +done + +echo "" +echo "==========================================" +echo "Done! Headers installed to: ${TARGET_DIR}" +echo "==========================================" +ls -la "$TARGET_DIR" \ No newline at end of file diff --git a/chdb/build/install_cctools.sh b/chdb/build/install_cctools.sh new file mode 100644 index 00000000000..33fdb608cf4 --- /dev/null +++ b/chdb/build/install_cctools.sh @@ -0,0 +1,64 @@ +#!/bin/bash + +set -e + +# Parse arguments +TARGET_ARCH="${1:-x86_64}" + +# Set Darwin triple based on architecture +if [ "$TARGET_ARCH" == "x86_64" ]; then + DARWIN_TRIPLE="x86_64-apple-darwin" +else + DARWIN_TRIPLE="aarch64-apple-darwin" +fi + +# Install cctools if not already installed +CCTOOLS_INSTALL_DIR="${HOME}/cctools" +CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" + +if [ -z "${CCTOOLS:-}" ]; then + echo "CCTOOLS environment variable not set, checking for installation..." >&2 + + # Check if cctools is already installed + if [ -f "${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" ]; then + echo "Found existing cctools installation at ${CCTOOLS_INSTALL_DIR}" >&2 + export CCTOOLS="${CCTOOLS_BIN}" + else + echo "cctools not found, installing..." >&2 + + mkdir -p ~/cctools + export CCTOOLS=$(cd ~/cctools && pwd) + cd ${CCTOOLS} + + git clone https://github.com/tpoechtrager/apple-libtapi.git + cd apple-libtapi + git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 + INSTALLPREFIX=${CCTOOLS} ./build.sh + ./install.sh + cd .. + + git clone https://github.com/chdb-io/cctools-port.git + cd cctools-port/cctools + + # Set cctools target based on architecture + if [ "$TARGET_ARCH" == "x86_64" ]; then + CCTOOLS_TARGET="x86_64-apple-darwin" + else + CCTOOLS_TARGET="aarch64-apple-darwin" + fi + + ./configure --prefix=$(readlink -f ${CCTOOLS}) --with-libtapi=$(readlink -f ${CCTOOLS}) --target=${CCTOOLS_TARGET} + make install + fi +else + echo "Using CCTOOLS from environment variable: ${CCTOOLS}" >&2 +fi + +# Verify cctools installation +if [ ! -f "${CCTOOLS}/${DARWIN_TRIPLE}-ld" ]; then + echo "Error: cctools linker not found at ${CCTOOLS}/${DARWIN_TRIPLE}-ld" >&2 + echo "Please verify cctools installation or set CCTOOLS environment variable correctly" >&2 + exit 1 +fi + +echo "cctools verified: ${CCTOOLS}/${DARWIN_TRIPLE}-ld" >&2 diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index 702c3c3d4dd..6efb25e3100 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -18,14 +18,14 @@ if [[ "$TARGET_ARCH" != "x86_64" && "$TARGET_ARCH" != "arm64" ]]; then exit 1 fi -echo "Cross-compiling chdb for macOS ${TARGET_ARCH} on Linux..." - # Verify we're running on Linux if [ "$(uname)" != "Linux" ]; then echo "Error: This script must be run on Linux" exit 1 fi +echo "Cross-compiling chdb for macOS ${TARGET_ARCH} on Linux..." + # Set architecture-specific variables first if [ "$TARGET_ARCH" == "x86_64" ]; then DARWIN_TRIPLE="x86_64-apple-darwin" @@ -33,6 +33,7 @@ if [ "$TARGET_ARCH" == "x86_64" ]; then TOOLCHAIN_FILE="cmake/darwin/toolchain-x86_64.cmake" BUILD_DIR_SUFFIX="darwin-x86_64" CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" + SDK_DIR="darwin-x86_64" else # arm64 DARWIN_TRIPLE="aarch64-apple-darwin" @@ -40,58 +41,35 @@ else TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" BUILD_DIR_SUFFIX="darwin-arm64" CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" + SDK_DIR="darwin-aarch64" fi -# Install cctools if not already installed -CCTOOLS_INSTALL_DIR="${HOME}/cctools" -CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" - -if [ -z "${CCTOOLS:-}" ]; then - echo "CCTOOLS environment variable not set, checking for installation..." +# Download macOS SDK +SDK_PATH="${PROJ_DIR}/cmake/toolchain/${SDK_DIR}" +echo "Downloading macOS SDK to ${SDK_PATH}..." +mkdir -p "${SDK_PATH}" +cd "${SDK_PATH}" +if ! curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz' | tar xJ --strip-components=1; then + echo "Error: Failed to download macOS SDK" + exit 1 +fi +echo "macOS SDK downloaded successfully" - # Check if cctools is already installed - if [ -f "${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" ]; then - echo "Found existing cctools installation at ${CCTOOLS_INSTALL_DIR}" - export CCTOOLS="${CCTOOLS_BIN}" - else - echo "cctools not found, installing..." - - mkdir ~/cctools - export CCTOOLS=$(cd ~/cctools && pwd) - cd ${CCTOOLS} - - git clone https://github.com/tpoechtrager/apple-libtapi.git - cd apple-libtapi - git checkout 15dfc2a8c9a2a89d06ff227560a69f5265b692f9 - INSTALLPREFIX=${CCTOOLS} ./build.sh - ./install.sh - cd .. - - git clone https://github.com/chdb-io/cctools-port.git - cd cctools-port/cctools - - # Set cctools target based on architecture - if [ "$TARGET_ARCH" == "x86_64" ]; then - CCTOOLS_TARGET="x86_64-apple-darwin" - else - CCTOOLS_TARGET="aarch64-apple-darwin" - fi - - ./configure --prefix=$(readlink -f ${CCTOOLS}) --with-libtapi=$(readlink -f ${CCTOOLS}) --target=${CCTOOLS_TARGET} - make install - fi -else - echo "Using CCTOOLS from environment variable: ${CCTOOLS}" +# Download Python headers +echo "Downloading Python headers..." +if ! "${DIR}/build/download_python_headers.sh"; then + echo "Error: Failed to download Python headers" + exit 1 fi -# Verify cctools installation -if [ ! -f "${CCTOOLS}/${DARWIN_TRIPLE}-ld" ]; then - echo "Error: cctools linker not found at ${CCTOOLS}/${DARWIN_TRIPLE}-ld" - echo "Please verify cctools installation or set CCTOOLS environment variable correctly" +# Install cctools using the separate script +if ! eval "$("${DIR}/build/install_cctools.sh" "${TARGET_ARCH}")"; then + echo "Error: Failed to install cctools" exit 1 fi -echo "cctools verified: ${CCTOOLS}/${DARWIN_TRIPLE}-ld" +CCTOOLS_INSTALL_DIR="${HOME}/cctools" +CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" # Override tools with cross-compilation versions from cctools export STRIP="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-strip" @@ -221,7 +199,6 @@ LIBCHDB=${LIBCHDB_DIR}/${LIBCHDB_SO} ls -lh ${LIBCHDB} # Build chdb python module - CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" cmake ${CMAKE_ARGS} -DENABLE_PYTHON=1 -DCHDB_CROSSCOMPILING=1 -DCHDB_PYTHON_INCLUDE_DIR_PREFIX=${CHDB_PYTHON_INCLUDE_DIR_PREFIX} -DPYBIND11_NOPYTHON=ON .. ninja -d keeprsp || true @@ -304,10 +281,10 @@ cd ${PROJ_DIR} && pwd ccache -s || true -# Skip pybind11 libraries build for cross-compilation -echo "Skipping pybind11 libraries build for cross-compilation" -echo "These should be built separately on the target macOS system using:" -echo " CMAKE_ARGS=\"\${CMAKE_ARGS}\" bash \${DIR}/build_pybind11.sh --all" +if ! CMAKE_ARGS="${CMAKE_ARGS}" CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" bash ${DIR}/build_pybind11.sh --all --cross-compile; then + echo "Error: Failed to build pybind11 libraries" + exit 1 +fi echo -e "\nCross-compilation for macOS ${TARGET_ARCH} completed successfully!" echo -e "Generated files:" diff --git a/chdb/build_pybind11.sh b/chdb/build_pybind11.sh index 33066b916fe..63332df60f3 100755 --- a/chdb/build_pybind11.sh +++ b/chdb/build_pybind11.sh @@ -4,6 +4,7 @@ set -e build_all=false py_version="" +cross_compile=false for arg in "$@"; do case $arg in @@ -15,11 +16,19 @@ for arg in "$@"; do py_version="${arg#*=}" shift ;; + --cross-compile) + cross_compile=true + shift + ;; esac done DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. ${DIR}/vars.sh +if [ "$cross_compile" = true ]; then + . ${DIR}/vars.sh cross-compile +else + . ${DIR}/vars.sh +fi # Check if CMAKE_ARGS is passed from build.sh if [ -z "$CMAKE_ARGS" ]; then @@ -35,6 +44,14 @@ build_pybind11_nonlimitedapi() { local py_cmake_args="${CMAKE_ARGS} -DPYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION=${py_version}" + # Add cross-compile flags if needed + if [ "$cross_compile" = true ]; then + py_cmake_args="${py_cmake_args} -DCHDB_CROSSCOMPILING=1 -DPYBIND11_NOPYTHON=ON" + local python_include_dir="${CHDB_PYTHON_INCLUDE_DIR_PREFIX:-${HOME}/python_include}" + py_cmake_args="${py_cmake_args} -DCHDB_PYTHON_INCLUDE_DIR_PREFIX=${python_include_dir}" + echo "Cross-compiling mode enabled, using Python headers from ${python_include_dir}" + fi + cmake ${py_cmake_args} -DENABLE_PYTHON=1 .. # Build only the pybind11 targets @@ -65,45 +82,55 @@ build_pybind11_nonlimitedapi() { build_all_pybind11_nonlimitedapi() { local python_versions=("3.8" "3.9" "3.10" "3.11" "3.12" "3.13" "3.14") - # Skip Python 3.8 for macOS x86_64 - if [ "$(uname)" == "Darwin" ] && [ "$(uname -m)" == "x86_64" ]; then - python_versions=("3.9" "3.10" "3.11" "3.12" "3.13" "3.14") - fi - echo "Building pybind11 nonlimitedapi libraries for all Python versions..." - # Check if pyenv is available - if [ -z "$(command -v pyenv)" ]; then - echo "Error: pyenv not found. Please install pyenv first." - exit 1 - fi - - for version in "${python_versions[@]}"; do - # Use pyenv to find specific version - local pyenv_version=$(pyenv versions --bare | grep "^${version}\." | head -1) - if [ -z "$pyenv_version" ]; then - echo "Error: Python ${version} not found in pyenv. Please install it with: pyenv install ${version}.x" + if [ "$cross_compile" = true ]; then + # For cross-compilation, use pre-downloaded headers + echo "Cross-compilation mode: using pre-downloaded Python headers" + for version in "${python_versions[@]}"; do + local python_include_dir="${CHDB_PYTHON_INCLUDE_DIR_PREFIX:-${HOME}/python_include}/${version}" + if [ -f "${python_include_dir}/Python.h" ]; then + echo " Found headers for Python ${version} at: ${python_include_dir}" + build_pybind11_nonlimitedapi "${version}" + else + echo "Error: Python.h not found for Python ${version} at ${python_include_dir}" + exit 1 + fi + done + else + # Check if pyenv is available + if [ -z "$(command -v pyenv)" ]; then + echo "Error: pyenv not found. Please install pyenv first." exit 1 fi - echo "Found pyenv Python ${pyenv_version}" - export PYENV_VERSION=$pyenv_version - - local python_include=$(python -c "import sysconfig; print(sysconfig.get_path('include'))" 2>/dev/null) - local active_version=$(python --version 2>&1) - echo " Active Python: $active_version" + for version in "${python_versions[@]}"; do + # Use pyenv to find specific version + local pyenv_version=$(pyenv versions --bare | grep "^${version}\." | head -1) + if [ -z "$pyenv_version" ]; then + echo "Error: Python ${version} not found in pyenv. Please install it with: pyenv install ${version}.x" + exit 1 + fi + + echo "Found pyenv Python ${pyenv_version}" + export PYENV_VERSION=$pyenv_version + + local python_include=$(python -c "import sysconfig; print(sysconfig.get_path('include'))" 2>/dev/null) + local active_version=$(python --version 2>&1) + echo " Active Python: $active_version" + + if [ -f "$python_include/Python.h" ]; then + echo " Headers found at: $python_include" + build_pybind11_nonlimitedapi "${version}" + else + echo "Error: Python.h not found for Python ${version} at $python_include" + unset PYENV_VERSION + exit 1 + fi - if [ -f "$python_include/Python.h" ]; then - echo " Headers found at: $python_include" - build_pybind11_nonlimitedapi "${version}" - else - echo "Error: Python.h not found for Python ${version} at $python_include" unset PYENV_VERSION - exit 1 - fi - - unset PYENV_VERSION - done + done + fi echo "Finished building pybind11 nonlimitedapi libraries" } diff --git a/contrib/pybind11-cmake/CMakeLists.txt b/contrib/pybind11-cmake/CMakeLists.txt index 0427cf47b71..da5f8bd964c 100644 --- a/contrib/pybind11-cmake/CMakeLists.txt +++ b/contrib/pybind11-cmake/CMakeLists.txt @@ -6,10 +6,32 @@ endif() string(REPLACE "-Wl,-z,defs" "" CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS}") -if (DEFINED PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION) - find_package(Python ${PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION} EXACT REQUIRED COMPONENTS Interpreter Development) +if(CHDB_CROSSCOMPILING) + # For cross-compiling, manually set Python variables + if (DEFINED PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION) + # Parse version like "3.8" into major and minor + string(REPLACE "." ";" VERSION_LIST ${PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION}) + list(GET VERSION_LIST 0 Python_VERSION_MAJOR) + list(GET VERSION_LIST 1 Python_VERSION_MINOR) + set(Python_VERSION "${PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION}") + + # Set include directory based on provided prefix and version + set(Python_INCLUDE_DIRS "${CHDB_PYTHON_INCLUDE_DIR_PREFIX}/${Python_VERSION_MAJOR}.${Python_VERSION_MINOR}") + else() + # Default to Python 3.8 if not specified + set(Python_VERSION_MAJOR 3) + set(Python_VERSION_MINOR 8) + set(Python_VERSION "3.8") + set(Python_INCLUDE_DIRS "${CHDB_PYTHON_INCLUDE_DIR_PREFIX}/3.8") + endif() + + message(STATUS "Cross-compiling: Using Python ${Python_VERSION} include directory: ${Python_INCLUDE_DIRS}") else() - find_package(Python REQUIRED COMPONENTS Interpreter Development) + if (DEFINED PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION) + find_package(Python ${PYBIND11_NONLIMITEDAPI_PYTHON_HEADERS_VERSION} EXACT REQUIRED COMPONENTS Interpreter Development) + else() + find_package(Python REQUIRED COMPONENTS Interpreter Development) + endif() endif() set(LIBRARY_DIR "${ClickHouse_SOURCE_DIR}/contrib/pybind11/") diff --git a/programs/local/CMakeLists.txt b/programs/local/CMakeLists.txt index 15d78c8036c..19704ee5acb 100644 --- a/programs/local/CMakeLists.txt +++ b/programs/local/CMakeLists.txt @@ -61,11 +61,17 @@ if (USE_PYTHON) include_directories(${PYBIND11_INCLUDE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) # include Python.h - execute_process(COMMAND python3-config --includes - OUTPUT_VARIABLE PYTHON_INCLUDES - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - string(REGEX REPLACE ".*-I([^ ]+).*" "\\1" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDES}) + if(CHDB_CROSSCOMPILING) + # For cross-compiling, use the provided Python include directory + set(PYTHON_INCLUDE_DIR "${CHDB_PYTHON_INCLUDE_DIR_PREFIX}/3.8") + message(STATUS "Cross-compiling: Using Python include directory: ${PYTHON_INCLUDE_DIR}") + else() + execute_process(COMMAND python3-config --includes + OUTPUT_VARIABLE PYTHON_INCLUDES + OUTPUT_STRIP_TRAILING_WHITESPACE + ) + string(REGEX REPLACE ".*-I([^ ]+).*" "\\1" PYTHON_INCLUDE_DIR ${PYTHON_INCLUDES}) + endif() foreach(_file ${CHDB_SOURCES}) set_source_files_properties(${_file} From b37ef82c917b04ed20a9d9e1b0f6c947e58ce993 Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Tue, 2 Dec 2025 15:29:33 +0800 Subject: [PATCH 06/15] chore: update build_mac_on_linux.sh --- chdb/build/install_cctools.sh | 4 ++-- chdb/build_mac_on_linux.sh | 12 ++++++------ chdb/build_pybind11.sh | 11 +++++++++++ contrib/pybind11-cmake/CMakeLists.txt | 2 +- 4 files changed, 20 insertions(+), 9 deletions(-) diff --git a/chdb/build/install_cctools.sh b/chdb/build/install_cctools.sh index 33fdb608cf4..bfc7fe193e2 100644 --- a/chdb/build/install_cctools.sh +++ b/chdb/build/install_cctools.sh @@ -55,10 +55,10 @@ else fi # Verify cctools installation -if [ ! -f "${CCTOOLS}/${DARWIN_TRIPLE}-ld" ]; then +if [ ! -f "${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" ]; then echo "Error: cctools linker not found at ${CCTOOLS}/${DARWIN_TRIPLE}-ld" >&2 echo "Please verify cctools installation or set CCTOOLS environment variable correctly" >&2 exit 1 fi -echo "cctools verified: ${CCTOOLS}/${DARWIN_TRIPLE}-ld" >&2 +echo "cctools verified: ${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" >&2 diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index 6efb25e3100..57a50a826e4 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -57,17 +57,17 @@ echo "macOS SDK downloaded successfully" # Download Python headers echo "Downloading Python headers..." -if ! "${DIR}/build/download_python_headers.sh"; then +if ! bash "${DIR}/build/download_python_headers.sh"; then echo "Error: Failed to download Python headers" exit 1 fi # Install cctools using the separate script -if ! eval "$("${DIR}/build/install_cctools.sh" "${TARGET_ARCH}")"; then +if ! bash "${DIR}/build/install_cctools.sh" "${TARGET_ARCH}"; then echo "Error: Failed to install cctools" exit 1 fi - +# Set CCTOOLS path after installation CCTOOLS_INSTALL_DIR="${HOME}/cctools" CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" @@ -249,8 +249,8 @@ if [ ${build_type} == "Debug" ]; then echo -e "\nDebug build, skip strip" else echo -e "\nStrip the binary:" - ${STRIP} --remove-section=.comment --remove-section=.note ${PYCHDB} - ${STRIP} --remove-section=.comment --remove-section=.note ${LIBCHDB} + ${STRIP} -x ${PYCHDB} + ${STRIP} -x ${LIBCHDB} fi echo -e "\nPYCHDB: ${PYCHDB}" @@ -281,7 +281,7 @@ cd ${PROJ_DIR} && pwd ccache -s || true -if ! CMAKE_ARGS="${CMAKE_ARGS}" CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" bash ${DIR}/build_pybind11.sh --all --cross-compile; then +if ! CMAKE_ARGS="${CMAKE_ARGS}" CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" bash ${DIR}/build_pybind11.sh --all --cross-compile --build_dir=${BUILD_DIR}; then echo "Error: Failed to build pybind11 libraries" exit 1 fi diff --git a/chdb/build_pybind11.sh b/chdb/build_pybind11.sh index 63332df60f3..61a9a338408 100755 --- a/chdb/build_pybind11.sh +++ b/chdb/build_pybind11.sh @@ -5,6 +5,7 @@ set -e build_all=false py_version="" cross_compile=false +custom_build_dir="" for arg in "$@"; do case $arg in @@ -20,6 +21,10 @@ for arg in "$@"; do cross_compile=true shift ;; + --build-dir=*) + custom_build_dir="${arg#*=}" + shift + ;; esac done @@ -30,6 +35,12 @@ else . ${DIR}/vars.sh fi +# Override BUILD_DIR if custom build dir is specified +if [ -n "$custom_build_dir" ]; then + BUILD_DIR="$custom_build_dir" + echo "Using custom BUILD_DIR: ${BUILD_DIR}" +fi + # Check if CMAKE_ARGS is passed from build.sh if [ -z "$CMAKE_ARGS" ]; then echo "Error: CMAKE_ARGS not provided. This script should be called from build.sh." diff --git a/contrib/pybind11-cmake/CMakeLists.txt b/contrib/pybind11-cmake/CMakeLists.txt index da5f8bd964c..ea9a499448b 100644 --- a/contrib/pybind11-cmake/CMakeLists.txt +++ b/contrib/pybind11-cmake/CMakeLists.txt @@ -94,7 +94,7 @@ message(STATUS "Python_VERSION_MAJOR: ${Python_VERSION_MAJOR}") message(STATUS "Python_VERSION_MINOR: ${Python_VERSION_MINOR}") message(STATUS "Resulting LIBNAME: ${PYBIND11_NONLIMITEDAPI_LIBNAME}") -if(Python_FOUND) +if(Python_FOUND OR CHDB_CROSSCOMPILING) target_include_directories(${PYBIND11_NONLIMITEDAPI_LIBNAME} PRIVATE ${PYBIND11_INCLUDE_DIR}) target_include_directories(${PYBIND11_NONLIMITEDAPI_LIBNAME} PRIVATE ${Python_INCLUDE_DIRS}) target_link_libraries(${PYBIND11_NONLIMITEDAPI_LIBNAME} PUBLIC ch_contrib::pybind11_stubs) From 71ca9038a5e4d0b5accaee8ee4b13ac36b84b93c Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Tue, 2 Dec 2025 18:17:44 +0800 Subject: [PATCH 07/15] chore: cross compiling supports static build --- chdb/build/build_static_lib_mac_on_linux.sh | 172 +++++++++++--------- chdb/build/create_minimal_libchdb.py | 45 +++-- chdb/build/create_static_libchdb.py | 83 +++++++--- chdb/build_mac_on_linux.sh | 6 +- chdb/build_pybind11.sh | 4 +- 5 files changed, 188 insertions(+), 122 deletions(-) diff --git a/chdb/build/build_static_lib_mac_on_linux.sh b/chdb/build/build_static_lib_mac_on_linux.sh index e63878ee3ca..a06cb9a3346 100755 --- a/chdb/build/build_static_lib_mac_on_linux.sh +++ b/chdb/build/build_static_lib_mac_on_linux.sh @@ -2,12 +2,10 @@ set -e -# Cross-compile chdb static library for macOS (x86_64 or arm64) on Linux -# Usage: ./build_static_lib_mac_on_linux.sh [x86_64|arm64] [Release|Debug] - -# Parse arguments TARGET_ARCH=${1:-x86_64} build_type=${2:-Release} +MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +. ${MY_DIR}/../vars.sh cross-compile # Validate architecture if [[ "$TARGET_ARCH" != "x86_64" && "$TARGET_ARCH" != "arm64" ]]; then @@ -34,37 +32,57 @@ fi # Set architecture-specific variables if [ "$TARGET_ARCH" == "x86_64" ]; then DARWIN_TRIPLE="x86_64-apple-darwin" - CMAKE_ARCH="x86_64" TOOLCHAIN_FILE="cmake/darwin/toolchain-x86_64.cmake" - BUILD_DIR_SUFFIX="static-lib-darwin-x86_64" - OUTPUT_SUFFIX="darwin-x86_64" - EXAMPLE_DIR_SUFFIX="darwin-x86_64" + BUILD_DIR_SUFFIX="darwin-x86_64" MACOS_MIN_VERSION="10.15" - # x86_64 specific: disable AVX for compatibility CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" else # arm64 DARWIN_TRIPLE="aarch64-apple-darwin" - CMAKE_ARCH="aarch64" TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" - BUILD_DIR_SUFFIX="static-lib-darwin-arm64" - OUTPUT_SUFFIX="darwin-arm64" - EXAMPLE_DIR_SUFFIX="darwin-arm64" + BUILD_DIR_SUFFIX="darwin-arm64" MACOS_MIN_VERSION="11.0" - # ARM64 specific: disable x86 features - CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0 -DNO_ARMV81_OR_HIGHER=0" + CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" fi -# Check if cctools exist for this architecture -if [ ! -f "${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar" ]; then - echo "Error: cctools not found at ${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar" - echo "Tip: You may need to rebuild cctools with support for ${TARGET_ARCH}" +# Download macOS SDK +SDK_PATH="${PROJ_DIR}/cmake/toolchain/${SDK_DIR}" +echo "Downloading macOS SDK to ${SDK_PATH}..." +mkdir -p "${SDK_PATH}" +cd "${SDK_PATH}" +if ! curl -L 'https://github.com/phracker/MacOSX-SDKs/releases/download/11.3/MacOSX11.0.sdk.tar.xz' | tar xJ --strip-components=1; then + echo "Error: Failed to download macOS SDK" exit 1 fi +echo "macOS SDK downloaded successfully" -MY_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +# Download Python headers +echo "Downloading Python headers..." +if ! bash "${DIR}/build/download_python_headers.sh"; then + echo "Error: Failed to download Python headers" + exit 1 +fi -. ${MY_DIR}/../vars.sh +# Install cctools +if ! bash "${DIR}/build/install_cctools.sh" "${TARGET_ARCH}"; then + echo "Error: Failed to install cctools" + exit 1 +fi +# Set CCTOOLS path after installation +CCTOOLS_INSTALL_DIR="${HOME}/cctools" +CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" + +# Override tools with cross-compilation versions from cctools +export STRIP="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-strip" +export AR="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" +export NM="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-nm" +export LDD="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool -L" + +echo "Using cross-compilation tools:" +echo " STRIP: ${STRIP}" +echo " AR: ${AR}" +echo " NM: ${NM}" +echo " LDD: ${LDD}" BUILD_DIR=${PROJ_DIR}/build-${BUILD_DIR_SUFFIX} @@ -81,6 +99,10 @@ ICU="-DENABLE_ICU=0" RUST_FEATURES="-DENABLE_RUST=0" JEMALLOC="-DENABLE_JEMALLOC=0" LLVM="-DENABLE_EMBEDDED_COMPILER=0 -DENABLE_DWARF_PARSER=0" +CMAKE_AR_FILEPATH="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" +CMAKE_INSTALL_NAME_TOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-install_name_tool" +CMAKE_RANLIB_FILEPATH="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ranlib" +CMAKE_LINKER_NAME="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld" if [ ! -d $BUILD_DIR ]; then mkdir $BUILD_DIR @@ -88,7 +110,12 @@ fi cd ${BUILD_DIR} -CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \ +CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} \ + -DCMAKE_AR:FILEPATH=${CMAKE_AR_FILEPATH} \ + -DCMAKE_INSTALL_NAME_TOOL=${CMAKE_INSTALL_NAME_TOOL} \ + -DCMAKE_RANLIB:FILEPATH=${CMAKE_RANLIB_FILEPATH} \ + -DLINKER_NAME=${CMAKE_LINKER_NAME} \ + -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_CLICKHOUSE_SERVER=0 -DENABLE_CLICKHOUSE_CLIENT=0 \ -DENABLE_CLICKHOUSE_KEEPER=0 -DENABLE_CLICKHOUSE_KEEPER_CONVERTER=0 -DENABLE_CLICKHOUSE_LOCAL=1 -DENABLE_CLICKHOUSE_SU=0 -DENABLE_CLICKHOUSE_BENCHMARK=0 \ -DENABLE_AZURE_BLOB_STORAGE=1 -DENABLE_CLICKHOUSE_COPIER=0 -DENABLE_CLICKHOUSE_DISKS=0 -DENABLE_CLICKHOUSE_FORMAT=0 -DENABLE_CLICKHOUSE_GIT_IMPORT=0 \ -DENABLE_AWS_S3=1 -DENABLE_HIVE=0 -DENABLE_AVRO=1 \ @@ -111,18 +138,10 @@ CMAKE_ARGS="-DCMAKE_BUILD_TYPE=${build_type} -DENABLE_THINLTO=0 -DENABLE_TESTS=0 -DENABLE_AVX512=0 -DENABLE_AVX512_VBMI=0 \ -DENABLE_LIBFIU=1 \ -DCHDB_VERSION=${CHDB_VERSION} \ - -DCMAKE_AR:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ar \ - -DCMAKE_INSTALL_NAME_TOOL=${CCTOOLS}/bin/${DARWIN_TRIPLE}-install_name_tool \ - -DCMAKE_RANLIB:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ranlib \ - -DCMAKE_LINKER:FILEPATH=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ld \ - -DLINKER_NAME=${CCTOOLS}/bin/${DARWIN_TRIPLE}-ld \ -DCMAKE_TOOLCHAIN_FILE=${TOOLCHAIN_FILE} \ " -echo "Running cmake configuration..." cmake ${CMAKE_ARGS} -DENABLE_PYTHON=0 -DCHDB_STATIC_LIBRARY_BUILD=1 .. - -echo "Building with ninja..." ninja -d keeprsp BINARY=${BUILD_DIR}/programs/clickhouse @@ -137,68 +156,73 @@ cd ${MY_DIR} # Create static library echo "Creating static library libchdb.a for macOS..." -python3 create_static_libchdb.py +python3 create_static_libchdb.py --cross-compile --build-dir=build-${BUILD_DIR_SUFFIX} --ar-cmd=${AR} if [ $? -ne 0 ]; then echo "Error: Failed to create static library" exit 1 fi # Prepare cpp-example directory and copy header file -echo "Preparing cpp-example-${EXAMPLE_DIR_SUFFIX} directory..." -if [ ! -d ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX} ]; then - cp -r ${MY_DIR}/cpp-example ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX} -fi - -cd ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX} +echo "Preparing cpp-example directory..." +cd ${MY_DIR}/cpp-example cp ${PROJ_DIR}/programs/local/chdb.h . cp ${MY_DIR}/libchdb.a . -echo "Copied chdb.h and libchdb.a to cpp-example-${EXAMPLE_DIR_SUFFIX} directory" +echo "Copied chdb.h and libchdb.a to cpp-example directory" -echo "Note: Skipping C++ example compilation for cross-compilation." -echo "The example can be compiled on the target macOS ${TARGET_ARCH} system with:" -echo " clang chdb_example.cpp -o chdb_example -mmacosx-version-min=${MACOS_MIN_VERSION} -L. -lchdb -liconv -framework CoreFoundation" +# Compile example program +echo "Compiling chdb_example.cpp..." +if [ "$TARGET_ARCH" == "x86_64" ]; then + SYSROOT="${PROJ_DIR}/cmake/toolchain/darwin-x86_64" +else + SYSROOT="${PROJ_DIR}/cmake/toolchain/darwin-aarch64" +fi +clang-19 chdb_example.cpp -o chdb_example \ + --target=${DARWIN_TRIPLE} \ + -isysroot ${SYSROOT} \ + -mmacosx-version-min=${MACOS_MIN_VERSION} \ + --ld-path=${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld \ + -L. -lchdb -liconv \ + -framework CoreFoundation \ + -Wl,-map,chdb_example.map +if [ $? -ne 0 ]; then + echo "Error: Failed to compile chdb_example.cpp" + exit 1 +fi -# For cross-compilation, we'll create a minimal analysis without running the compiled binary -echo "Creating analysis files for cross-compilation..." +# Copy map file to parent directory for analysis +echo "Copying chdb_example.map to parent directory..." +cp chdb_example.map ${MY_DIR}/ +cd ${MY_DIR} -# Copy map file analysis tools but don't run them (since we can't execute macOS binaries on Linux) -echo "Note: Skipping map file analysis for cross-compilation." -echo "Run the following on macOS ${TARGET_ARCH} to create minimal library:" -echo " cd ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX}" -echo " clang chdb_example.cpp -o chdb_example -mmacosx-version-min=${MACOS_MIN_VERSION} -L. -lchdb -liconv -framework CoreFoundation -Wl,-map,chdb_example.map" -echo " cd ${MY_DIR}" -echo " python3 extract_chdb_objects.py --map-file=cpp-example-${EXAMPLE_DIR_SUFFIX}/chdb_example.map" -echo " python3 create_minimal_libchdb.py" +# Analyze map file to extract chdb objects +echo "Analyzing map file to extract chdb objects..." +python3 extract_chdb_objects.py +if [ $? -ne 0 ]; then + echo "Error: Failed to analyze map file" + exit 1 +fi -# For now, we'll use the full libchdb.a as the final output -echo "Using full libchdb.a for cross-compilation (minimal version requires macOS execution)" +# Create minimal libchdb.a based on extracted objects +echo "Creating minimal libchdb.a..." +python3 create_minimal_libchdb.py --ar-cmd=${AR} +if [ $? -ne 0 ]; then + echo "Error: Failed to create minimal libchdb.a" + exit 1 +fi -# Strip the libchdb.a if not debug build +# Strip the libchdb_minimal.a if [ ${build_type} == "Debug" ]; then echo -e "\nDebug build, skip strip" else - echo -e "\nStrip the libchdb.a:" - # Use macOS-compatible strip command via cctools - if [ -f "${CCTOOLS}/bin/${DARWIN_TRIPLE}-strip" ]; then - ${CCTOOLS}/bin/${DARWIN_TRIPLE}-strip -S -x libchdb.a - else - echo "Warning: macOS strip not found, skipping strip step" - fi + echo -e "\nStrip the libchdb_minimal.a:" + ${STRIP} -x libchdb_minimal.a fi -echo "Note: Skipping Go test for cross-compilation." - # Copy final library to project root -OUTPUT_NAME="libchdb-${OUTPUT_SUFFIX}.a" -echo "Copying libchdb.a to project root as ${OUTPUT_NAME}..." -cp ${MY_DIR}/libchdb.a ${PROJ_DIR}/${OUTPUT_NAME} -echo "Final ${OUTPUT_NAME} created at ${PROJ_DIR}/${OUTPUT_NAME}" +echo "Copying libchdb_minimal.a to project root as libchdb.a..." +cp ${MY_DIR}/libchdb_minimal.a ${PROJ_DIR}/libchdb.a +echo "Final libchdb.a created at ${PROJ_DIR}/libchdb.a" # Print final library size -echo "Final ${OUTPUT_NAME} size:" -ls -lh ${PROJ_DIR}/${OUTPUT_NAME} - -echo "Cross-compilation for macOS ${TARGET_ARCH} completed successfully!" -echo "Generated files:" -echo " - ${PROJ_DIR}/${OUTPUT_NAME}" -echo " - ${MY_DIR}/cpp-example-${EXAMPLE_DIR_SUFFIX}/ (for testing on macOS ${TARGET_ARCH})" \ No newline at end of file +echo "Final libchdb.a size:" +ls -lh ${PROJ_DIR}/libchdb.a diff --git a/chdb/build/create_minimal_libchdb.py b/chdb/build/create_minimal_libchdb.py index 849bec0e766..6faf5a30004 100644 --- a/chdb/build/create_minimal_libchdb.py +++ b/chdb/build/create_minimal_libchdb.py @@ -4,22 +4,31 @@ Create minimized libchdb.a based on chdb_objects.txt """ +import argparse import os import platform import sys import subprocess -IS_MACOS_X86 = (platform.system() == "Darwin" and platform.machine() in ["x86_64", "i386"]) AR_CMD = "" -if IS_MACOS_X86: - AR_CMD = "llvm-ar" - print(f"Using llvm-ar for macOS x86 platform to avoid archive corruption issues") -else: - AR_CMD = "ar" - print(f"Using standard ar command for platform: {platform.system()} {platform.machine()}") +def setup_ar_cmd(ar_cmd=None): + """Setup AR command based on arguments or platform""" + global AR_CMD + + if ar_cmd: + AR_CMD = ar_cmd + print(f"Using custom ar command: {AR_CMD}") + else: + IS_MACOS_X86 = (platform.system() == "Darwin" and platform.machine() in ["x86_64", "i386"]) + if IS_MACOS_X86: + AR_CMD = "llvm-ar" + print(f"Using llvm-ar for macOS x86 platform to avoid archive corruption issues") + else: + AR_CMD = "ar" + print(f"Using standard ar command for platform: {platform.system()} {platform.machine()}") -print(f"Selected ar command: {AR_CMD}") + print(f"Selected ar command: {AR_CMD}") def read_required_objects(objects_file="chdb_objects.txt"): """Read list of required target files""" @@ -241,7 +250,20 @@ def create_minimal_library(extracted_files, temp_dir, output_lib="libchdb_minima return True +def parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description='Create minimized libchdb.a based on chdb_objects.txt') + parser.add_argument('--ar-cmd', type=str, default=None, + help='Path to ar command (for cross-compilation)') + return parser.parse_args() + def main(): + # Parse arguments + args = parse_args() + + # Setup AR command + setup_ar_cmd(ar_cmd=args.ar_cmd) + print("Starting creation of minimized libchdb.a") print("=" * 50) @@ -251,13 +273,6 @@ def main(): temp_dir = "libchdb_objects_tmp_dir" output_lib = "libchdb_minimal.a" - if len(sys.argv) > 1: - chdb_objects_file = sys.argv[1] - if len(sys.argv) > 2: - original_lib = sys.argv[2] - if len(sys.argv) > 3: - output_lib = sys.argv[3] - # Read required object files required_objects = read_required_objects(chdb_objects_file) if not required_objects: diff --git a/chdb/build/create_static_libchdb.py b/chdb/build/create_static_libchdb.py index c8746fd1c43..5f7fed8398b 100755 --- a/chdb/build/create_static_libchdb.py +++ b/chdb/build/create_static_libchdb.py @@ -4,6 +4,7 @@ Script to create libchdb.a static library """ +import argparse import os import platform import re @@ -11,33 +12,54 @@ import sys import shutil -# Detect if running on macOS x86 (where ar -d has problematic behavior) -IS_MACOS_X86 = (platform.system() == "Darwin" and platform.machine() in ["x86_64", "i386"]) -IS_MACOS = platform.system() == "Darwin" +# Global variables (will be set based on arguments) +IS_MACOS_X86 = False +IS_MACOS = False +CROSS_COMPILE = False AR_CMD = "" +BUILD_DIR = "" + +def setup_platform(cross_compile=False, ar_cmd=None): + """Setup platform-specific variables""" + global IS_MACOS_X86, IS_MACOS, CROSS_COMPILE, AR_CMD + + if cross_compile: + # Cross-compiling for macOS on Linux + IS_MACOS = True + CROSS_COMPILE = True + if ar_cmd: + AR_CMD = ar_cmd + else: + AR_CMD = "ar" + print(f"Cross-compile mode: targeting macOS") + else: + # Native build + IS_MACOS_X86 = (platform.system() == "Darwin" and platform.machine() in ["x86_64", "i386"]) + IS_MACOS = platform.system() == "Darwin" + if IS_MACOS_X86: + AR_CMD = "llvm-ar" + print(f"Using llvm-ar for macOS x86 platform to avoid archive corruption issues") + else: + AR_CMD = "ar" + print(f"Using standard ar command for platform: {platform.system()} {platform.machine()}") -# Choose ar command based on platform -if IS_MACOS_X86: - AR_CMD = "llvm-ar" - print(f"Using llvm-ar for macOS x86 platform to avoid archive corruption issues") -else: - AR_CMD = "ar" - print(f"Using standard ar command for platform: {platform.system()} {platform.machine()}") - -print(f"Selected ar command: {AR_CMD}") + print(f"Selected ar command: {AR_CMD}") + print(f"CROSS_COMPILE: {CROSS_COMPILE}, IS_MACOS: {IS_MACOS}") -def parse_libchdb_cmd(): +def parse_libchdb_cmd(build_dir_override=None): """Extract object files and static libraries""" + global BUILD_DIR # Get the directory containing this script, then go up two levels script_dir = os.path.dirname(os.path.abspath(__file__)) project_root = os.path.dirname(os.path.dirname(script_dir)) - if IS_MACOS_X86: - build_dir = 'buildlib' + if build_dir_override: + build_dir = build_dir_override else: build_dir = 'build-static-lib' + BUILD_DIR = build_dir print(f"Using build directory: {build_dir}") # First, check build.log to see if it contains @CMakeFiles/clickhouse.rsp @@ -153,7 +175,7 @@ def create_static_library(obj_files, lib_files): extracted_objects = [] # Add libiconv.a to the list of libraries to extract on macOS - if IS_MACOS: + if not CROSS_COMPILE and IS_MACOS: libiconv_path = "/opt/homebrew/opt/libiconv/lib/libiconv.a" if os.path.exists(libiconv_path): lib_files.append(libiconv_path) @@ -311,16 +333,6 @@ def create_static_library(obj_files, lib_files): print(f"Extracted {target_filename} → {unique_filename} (group #{file_index})") - # if IS_MACOS_X86: - # # Move the first occurrence to the end (changes extraction order) - # move_result = subprocess.run([AR_CMD, "-m", working_archive, target_filename], - # capture_output=True) - - # if move_result.returncode != 0: - # print(f"Warning: Failed to move {target_filename} in archive") - # print(f"STDERR: {move_result.stderr.decode() if move_result.stderr else 'No error message'}") - # return False - # else: # Delete this occurrence from working archive delete_result = subprocess.run([AR_CMD, "d", working_archive, target_filename], capture_output=True) @@ -470,12 +482,29 @@ def create_static_library(obj_files, lib_files): finally: pass +def parse_args(): + """Parse command line arguments""" + parser = argparse.ArgumentParser(description='Create libchdb.a static library') + parser.add_argument('--cross-compile', '-c', action='store_true', + help='Cross-compile mode (targeting macOS from Linux)') + parser.add_argument('--build-dir', '-b', type=str, default=None, + help='Build directory path (relative to project root or absolute)') + parser.add_argument('--ar-cmd', type=str, default=None, + help='Path to ar command (for cross-compilation)') + return parser.parse_args() + def main(): print("Creating libchdb.a static library...") + # Parse arguments + args = parse_args() + + # Setup platform based on arguments + setup_platform(cross_compile=args.cross_compile, ar_cmd=args.ar_cmd) + try: # Parse the command file - obj_files, lib_files = parse_libchdb_cmd() + obj_files, lib_files = parse_libchdb_cmd(build_dir_override=args.build_dir) # Create static library success = create_static_library(obj_files, lib_files) diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index 57a50a826e4..6f604cd692f 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -29,7 +29,6 @@ echo "Cross-compiling chdb for macOS ${TARGET_ARCH} on Linux..." # Set architecture-specific variables first if [ "$TARGET_ARCH" == "x86_64" ]; then DARWIN_TRIPLE="x86_64-apple-darwin" - CMAKE_ARCH="x86_64" TOOLCHAIN_FILE="cmake/darwin/toolchain-x86_64.cmake" BUILD_DIR_SUFFIX="darwin-x86_64" CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" @@ -37,7 +36,6 @@ if [ "$TARGET_ARCH" == "x86_64" ]; then else # arm64 DARWIN_TRIPLE="aarch64-apple-darwin" - CMAKE_ARCH="aarch64" TOOLCHAIN_FILE="cmake/darwin/toolchain-aarch64.cmake" BUILD_DIR_SUFFIX="darwin-arm64" CPU_FEATURES="-DENABLE_AVX=0 -DENABLE_AVX2=0" @@ -62,7 +60,7 @@ if ! bash "${DIR}/build/download_python_headers.sh"; then exit 1 fi -# Install cctools using the separate script +# Install cctools if ! bash "${DIR}/build/install_cctools.sh" "${TARGET_ARCH}"; then echo "Error: Failed to install cctools" exit 1 @@ -281,7 +279,7 @@ cd ${PROJ_DIR} && pwd ccache -s || true -if ! CMAKE_ARGS="${CMAKE_ARGS}" CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" bash ${DIR}/build_pybind11.sh --all --cross-compile --build_dir=${BUILD_DIR}; then +if ! CMAKE_ARGS="${CMAKE_ARGS}" CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_include" bash ${DIR}/build_pybind11.sh --all --cross-compile --build-dir=${BUILD_DIR}; then echo "Error: Failed to build pybind11 libraries" exit 1 fi diff --git a/chdb/build_pybind11.sh b/chdb/build_pybind11.sh index 61a9a338408..ff7118b3867 100755 --- a/chdb/build_pybind11.sh +++ b/chdb/build_pybind11.sh @@ -73,7 +73,7 @@ build_pybind11_nonlimitedapi() { # Copy the built library to output directory local lib_name="pybind11nonlimitedapi_chdb_${py_version}" - if [ "$(uname)" == "Darwin" ]; then + if [ "$(uname)" == "Darwin" ] || [ "$cross_compile" = true ]; then local lib_file="lib${lib_name}.dylib" else local lib_file="lib${lib_name}.so" @@ -147,7 +147,7 @@ build_all_pybind11_nonlimitedapi() { } copy_stubs() { - if [ "$(uname)" == "Darwin" ]; then + if [ "$(uname)" == "Darwin" ] || [ "$cross_compile" = true ]; then local lib_file="libpybind11nonlimitedapi_stubs.dylib" else local lib_file="libpybind11nonlimitedapi_stubs.so" From 7b96798410e6e0395427b73a171db4cd17ce06ac Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Tue, 2 Dec 2025 21:26:03 +0800 Subject: [PATCH 08/15] test: add cross-compile workflow --- .github/workflows/build_macos_x86_wheels.yml | 266 +++++++++---------- chdb/build/build_static_lib.sh | 18 +- chdb/build/build_static_lib_mac_on_linux.sh | 14 +- chdb/build/test_go_example.sh | 37 +++ chdb/build_mac_on_linux.sh | 3 +- 5 files changed, 175 insertions(+), 163 deletions(-) create mode 100644 chdb/build/test_go_example.sh diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml index 1791a9fefa5..e0761e2b2dc 100644 --- a/.github/workflows/build_macos_x86_wheels.yml +++ b/.github/workflows/build_macos_x86_wheels.yml @@ -21,10 +21,118 @@ on: - '**/*.md' jobs: - build_universal_wheel: - name: Build Universal Wheel (macOS x86_64) + build_universal_wheel_on_linux: + name: Build on Linux (cross-compile for macOS x86_64) + runs-on: gh-64c + # if: ${{ !github.event.pull_request.draft }} + timeout-minutes: 600 + steps: + - name: Install Python build dependencies + run: | + sudo apt-get update + sudo apt-get install -y make build-essential libssl-dev zlib1g-dev \ + libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ + libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \ + libffi-dev liblzma-dev p7zip-full + - name: Upgrade Rust toolchain + run: | + rustup toolchain install nightly-2025-07-07 + rustup default nightly-2025-07-07 + rustup component add rust-src + rustc --version + cargo --version + - name: Install clang++ for Ubuntu + run: | + pwd + uname -a + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 19 + which clang++-19 + clang++-19 --version + sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget + # Install WebAssembly linker (wasm-ld) + sudo apt-get install -y lld-19 + # Create symlink for wasm-ld + if ! command -v wasm-ld &> /dev/null; then + sudo ln -sf /usr/bin/wasm-ld-19 /usr/bin/wasm-ld || true + fi + which wasm-ld || echo "wasm-ld not found in PATH" + ccache -s + - name: Update git + run: | + sudo add-apt-repository ppa:git-core/ppa -y + sudo apt-get update + sudo apt-get install -y git + git --version + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Update submodules + run: | + git submodule update --init --recursive --jobs 4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ubuntu-22.04-x86_64-cross-compile + max-size: 5G + append-timestamp: true + - name: remove old clang and link clang-19 to clang + run: | + sudo rm -f /usr/bin/clang || true + sudo ln -s /usr/bin/clang-19 /usr/bin/clang + sudo rm -f /usr/bin/clang++ || true + sudo ln -s /usr/bin/clang++-19 /usr/bin/clang++ + which clang++ + clang++ --version + - name: Run chdb/build_mac_on_linux.sh + timeout-minutes: 600 + run: | + source ~/.cargo/env + bash ./chdb/build_mac_on_linux.sh x86_64 + continue-on-error: false + - name: Run chdb/build/build_static_lib_mac_on_linux.sh + timeout-minutes: 600 + run: | + source ~/.cargo/env + bash ./chdb/build/build_static_lib_mac_on_linux.sh x86_64 + continue-on-error: false + - name: Check ccache statistics + run: | + ccache -s + ls -lh chdb + df -h + - name: Keep killall ccache and wait for ccache to finish + if: always() + run: | + sleep 60 + while ps -ef | grep ccache | grep -v grep; do \ + killall ccache; \ + sleep 10; \ + done + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: macos-x86_64-build-artifacts + path: | + ./libchdb.so + ./libchdb.a + ./chdb/_chdb.abi3.so + ./chdb/libpybind11nonlimitedapi_stubs.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.8.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.9.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.10.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.11.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.12.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.13.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.14.dylib + retention-days: 2 + + test_on_macos: + name: Test on macOS x86_64 runs-on: macos-15-intel - if: ${{ !github.event.pull_request.draft }} + needs: build_universal_wheel_on_linux + # if: ${{ !github.event.pull_request.draft }} timeout-minutes: 600 steps: - name: Check machine architecture @@ -86,58 +194,9 @@ jobs: - name: Remove /usr/local/bin/python3 run: | sudo rm -f /usr/local/bin/python3 - - name: Install clang++ for macOS - run: | - pwd - uname -a - export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 - brew update - brew install ca-certificates lz4 mpdecimal openssl@3 readline sqlite xz z3 zstd - brew install --ignore-dependencies llvm@19 - brew install git ninja libtool gettext gcc binutils grep findutils nasm lld@19 libiconv - brew install ccache || echo "ccache installation failed, continuing without it" - brew install go - cd /usr/local/opt/ && sudo rm -f llvm && sudo ln -sf llvm@19 llvm - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:$PATH - which clang++ - clang++ --version - which go - go version - ccache -s || echo "ccache not available yet" - - name: Scan SQLite vulnerabilities with grype - run: | - # Install grype - curl -sSfL https://raw.githubusercontent.com/anchore/grype/main/install.sh | sh -s -- -b /usr/local/bin - # Update grype vulnerability database - grype db update - # Check SQLite vulnerabilities in Homebrew packages - echo "Scanning SQLite packages for vulnerabilities..." - GRYPE_RAW_OUTPUT=$(grype dir:/opt/homebrew --scope all-layers 2>/dev/null || true) - echo "Raw grype output:" - echo "$GRYPE_RAW_OUTPUT" - SQLITE_SCAN_OUTPUT=$(echo "$GRYPE_RAW_OUTPUT" | grep -i sqlite || true) - if [ -n "$SQLITE_SCAN_OUTPUT" ]; then - echo "❌ SQLite vulnerabilities found in packages! Build should be reviewed." - echo "SQLite vulnerability details:" - echo "$SQLITE_SCAN_OUTPUT" - exit 1 - else - echo "✅ No SQLite vulnerabilities found" - fi - continue-on-error: false - - name: Upgrade Rust toolchain - run: | - rustup toolchain install nightly-2025-07-07 - rustup default nightly-2025-07-07 - rustup component add rust-src - rustc --version - cargo --version - uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Update submodules - run: | - git submodule update --init --recursive --jobs 4 - name: Update version for release if: startsWith(github.ref, 'refs/tags/v') run: | @@ -151,82 +210,35 @@ jobs: bump-my-version replace --new-version $TAG_NAME echo "Version files updated to $TAG_NAME" pyenv shell --unset - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2 + - name: Download build artifacts + uses: actions/download-artifact@v4 with: - key: ${{ runner.os }}-x86_64-ccache - max-size: 10G - append-timestamp: true - env: - CCACHE_NOHASHDIR: "true" - - name: Run chdb/build.sh + name: macos-x86_64-build-artifacts + path: ./artifacts + - name: Restore artifacts to original paths + run: | + mv ./artifacts/libchdb.so ./ + mv ./artifacts/libchdb.a ./ + mv ./artifacts/chdb/_chdb.abi3.so ./chdb/ + mv ./artifacts/chdb/libpybind11nonlimitedapi_stubs.dylib ./chdb/ + for v in 8 9 10 11 12 13 14; do + mv ./artifacts/chdb/libpybind11nonlimitedapi_chdb_3.${v}.dylib ./chdb/ + done + ls -lh ./libchdb.so ./libchdb.a + ls -lh ./chdb/*.so ./chdb/*.dylib + - name: Run chdb/test_smoke.sh timeout-minutes: 600 run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - source ~/.cargo/env pyenv shell 3.9 - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ bash gen_manifest.sh - bash ./chdb/build.sh - pyenv shell 3.9 bash -x ./chdb/test_smoke.sh continue-on-error: false - - name: Run chdb/build/build_static_lib.sh + - name: Run chdb/build/test_go_example.sh timeout-minutes: 600 run: | - export PATH="$HOME/.pyenv/bin:$PATH" - eval "$(pyenv init -)" - source ~/.cargo/env - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ - pyenv shell 3.9 - bash ./chdb/build/build_static_lib.sh - pyenv shell --unset - continue-on-error: false - - name: Debug libchdb - run: | - ls -lh - llvm-nm libchdb.so | grep query_stable || true - echo "Global Symbol in libchdb.so:" - llvm-nm -g libchdb.so || true - echo "Global Symbol in libclickhouse-local-chdb.a:" - llvm-nm -g buildlib/programs/local/libclickhouse-local-chdb.a || true - echo "Global Symbol in libclickhouse-local-lib.a:" - llvm-nm -g buildlib/programs/local/libclickhouse-local-lib.a || true - echo "pychdb_cmd.sh:" - cat buildlib/pychdb_cmd.sh - echo "libchdb_cmd.sh:" - cat buildlib/libchdb_cmd.sh - - name: Scan chdb libraries with grype - run: | - echo "Scanning chdb libraries for vulnerabilities..." - # Files to scan - FILES_TO_SCAN="" - [ -f libchdb.so ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.so" - [ -f libchdb.a ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.a" - FILES_TO_SCAN="$FILES_TO_SCAN $(find chdb/ \( -name "*.dylib" -o -name "*.so" \) 2>/dev/null || true)" - SQLITE_VULNERABILITIES_FOUND=false - for file in $FILES_TO_SCAN; do - if [ -f "$file" ]; then - echo "=== Scanning $file ===" - SCAN_OUTPUT=$(grype "$file" 2>/dev/null || true) - echo "$SCAN_OUTPUT" - if echo "$SCAN_OUTPUT" | grep -qi sqlite; then - echo "❌ SQLite vulnerability found in $file" - SQLITE_VULNERABILITIES_FOUND=true - fi - fi - done - if [ "$SQLITE_VULNERABILITIES_FOUND" = true ]; then - echo "❌ SQLite vulnerabilities detected in chdb libraries!" - exit 1 - else - echo "✅ No SQLite vulnerabilities found in chdb libraries" - fi + bash ./chdb/build/test_go_example.sh ${{ github.workspace }}/libchdb.a continue-on-error: false - name: Run libchdb stub in examples dir run: | @@ -237,10 +249,6 @@ jobs: rm -rf chdb/build/ export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - source ~/.cargo/env - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ pyenv shell 3.9 make wheel - name: Fix wheel platform tag @@ -259,12 +267,6 @@ jobs: sudo sysctl kern.corefile=$PWD/tmp/core/core.%P sudo sysctl kern.coredump=1 ulimit -c unlimited - - name: Free up disk space - run: | - # Clean more build artifacts - rm -rf buildlib/contrib 2>/dev/null || true - rm -rf buildlib/base 2>/dev/null || true - rm -rf buildlib/src 2>/dev/null || true - name: Test wheel on all Python versions run: | export PATH="$HOME/.pyenv/bin:$PATH" @@ -300,14 +302,6 @@ jobs: echo "No core files found in tmp/core" fi continue-on-error: true - - name: Keep killall ccache and wait for ccache to finish - if: always() - run: | - sleep 60 - while ps -ef | grep ccache | grep -v grep; do \ - killall ccache; \ - sleep 10; \ - done - name: Upload core files artifact if: always() && env.CORE_FILES_FOUND == 'true' uses: actions/upload-artifact@v4 @@ -362,4 +356,4 @@ jobs: python -m twine upload dist/*.whl env: TWINE_USERNAME: __token__ - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} \ No newline at end of file + TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} diff --git a/chdb/build/build_static_lib.sh b/chdb/build/build_static_lib.sh index d82459d28eb..71157755841 100755 --- a/chdb/build/build_static_lib.sh +++ b/chdb/build/build_static_lib.sh @@ -146,23 +146,7 @@ fi # Test with Go example -echo "Preparing go-example directory..." -cd ${MY_DIR}/go-example -cp ${MY_DIR}/libchdb_minimal.a ./libchdb.a -cp ${PROJ_DIR}/programs/local/chdb.h . -echo "Copied libchdb_minimal.a as libchdb.a and chdb.h to go-example directory" - -# Run Go test -echo "Running Go test..." -# export CGO_CFLAGS_ALLOW=".*" -# export CGO_LDFLAGS_ALLOW=".*" -go run . -if [ $? -ne 0 ]; then - echo "Error: Go test failed" - exit 1 -fi - -echo "Go test completed successfully!" +bash ${MY_DIR}/test_go_example.sh ${MY_DIR}/libchdb_minimal.a # Copy final library to project root echo "Copying libchdb_minimal.a to project root as libchdb.a..." diff --git a/chdb/build/build_static_lib_mac_on_linux.sh b/chdb/build/build_static_lib_mac_on_linux.sh index a06cb9a3346..0370a153530 100755 --- a/chdb/build/build_static_lib_mac_on_linux.sh +++ b/chdb/build/build_static_lib_mac_on_linux.sh @@ -22,13 +22,6 @@ if [ "$(uname)" != "Linux" ]; then exit 1 fi -# Verify required environment variables -if [ -z "${CCTOOLS:-}" ]; then - echo "Error: CCTOOLS environment variable not set. Please set it to the cctools bin directory." - echo "Example: export CCTOOLS=/path/to/cctools" - exit 1 -fi - # Set architecture-specific variables if [ "$TARGET_ARCH" == "x86_64" ]; then DARWIN_TRIPLE="x86_64-apple-darwin" @@ -73,7 +66,7 @@ CCTOOLS_INSTALL_DIR="${HOME}/cctools" CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" # Override tools with cross-compilation versions from cctools -export STRIP="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-strip" +export STRIP="llvm-strip-19" export AR="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" export NM="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-nm" export LDD="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool -L" @@ -180,6 +173,9 @@ clang-19 chdb_example.cpp -o chdb_example \ --target=${DARWIN_TRIPLE} \ -isysroot ${SYSROOT} \ -mmacosx-version-min=${MACOS_MIN_VERSION} \ + -nostdinc++ \ + -I${PROJ_DIR}/contrib/llvm-project/libcxx/include \ + -I${PROJ_DIR}/contrib/llvm-project/libcxxabi/include \ --ld-path=${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ld \ -L. -lchdb -liconv \ -framework CoreFoundation \ @@ -215,7 +211,7 @@ if [ ${build_type} == "Debug" ]; then echo -e "\nDebug build, skip strip" else echo -e "\nStrip the libchdb_minimal.a:" - ${STRIP} -x libchdb_minimal.a + ${STRIP} -S libchdb_minimal.a fi # Copy final library to project root diff --git a/chdb/build/test_go_example.sh b/chdb/build/test_go_example.sh new file mode 100644 index 00000000000..b64d938e470 --- /dev/null +++ b/chdb/build/test_go_example.sh @@ -0,0 +1,37 @@ +#!/bin/bash +set -e + +# Get script directory +MY_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJ_DIR="$(cd "${MY_DIR}/../.." && pwd)" + +# Allow custom library path +LIBCHDB_PATH="${1:-${MY_DIR}/libchdb_minimal.a}" + +echo "Testing with Go example..." +echo "Using library: ${LIBCHDB_PATH}" + +# Prepare go-example directory +echo "Preparing go-example directory..." +cd ${MY_DIR}/go-example + +# Copy library and header +if [ -f "${LIBCHDB_PATH}" ]; then + cp "${LIBCHDB_PATH}" ./libchdb.a +else + echo "Error: Library not found: ${LIBCHDB_PATH}" + exit 1 +fi + +cp ${PROJ_DIR}/programs/local/chdb.h . +echo "Copied library as libchdb.a and chdb.h to go-example directory" + +# Run Go test +echo "Running Go test..." +go run . +if [ $? -ne 0 ]; then + echo "Error: Go test failed" + exit 1 +fi + +echo "Go test completed successfully!" diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index 6f604cd692f..d2c1a6a065f 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -70,7 +70,8 @@ CCTOOLS_INSTALL_DIR="${HOME}/cctools" CCTOOLS_BIN="${CCTOOLS_INSTALL_DIR}/bin" # Override tools with cross-compilation versions from cctools -export STRIP="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-strip" +# export STRIP="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-strip" +export STRIP="llvm-strip-19" export AR="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-ar" export NM="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-nm" export LDD="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool -L" From 681e85eebd327a1ebe1501e83e23fc4309d517c4 Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Tue, 2 Dec 2025 23:16:05 +0800 Subject: [PATCH 09/15] test: add cross-compile workflow for macOS arm64 --- .../workflows/build_macos_arm64_wheels.yml | 262 +++++++++--------- .github/workflows/build_macos_x86_wheels.yml | 9 +- chdb/test_smoke.sh | 2 +- 3 files changed, 139 insertions(+), 134 deletions(-) diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml index 04cf1b2e4b6..557514f0af7 100644 --- a/.github/workflows/build_macos_arm64_wheels.yml +++ b/.github/workflows/build_macos_arm64_wheels.yml @@ -21,10 +21,118 @@ on: - '**/*.md' jobs: - build_universal_wheel: - name: Build Universal Wheel (macOS ARM64) + build_universal_wheel_on_linux: + name: Build on Linux (cross-compile for macOS arm64) + runs-on: GH-Linux-ARM64 + # if: ${{ !github.event.pull_request.draft }} + timeout-minutes: 600 + steps: + - name: Install Python build dependencies + run: | + sudo apt-get update + sudo apt-get install -y make build-essential libssl-dev zlib1g-dev \ + libbz2-dev libreadline-dev libsqlite3-dev wget curl llvm \ + libncursesw5-dev xz-utils tk-dev libxml2-dev libxmlsec1-dev \ + libffi-dev liblzma-dev p7zip-full + - name: Upgrade Rust toolchain + run: | + rustup toolchain install nightly-2025-07-07 + rustup default nightly-2025-07-07 + rustup component add rust-src + rustc --version + cargo --version + - name: Install clang++ for Ubuntu + run: | + pwd + uname -a + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 19 + which clang++-19 + clang++-19 --version + sudo apt-get install -y make cmake ccache ninja-build yasm gawk wget + # Install WebAssembly linker (wasm-ld) + sudo apt-get install -y lld-19 + # Create symlink for wasm-ld + if ! command -v wasm-ld &> /dev/null; then + sudo ln -sf /usr/bin/wasm-ld-19 /usr/bin/wasm-ld || true + fi + which wasm-ld || echo "wasm-ld not found in PATH" + ccache -s + - name: Update git + run: | + sudo add-apt-repository ppa:git-core/ppa -y + sudo apt-get update + sudo apt-get install -y git + git --version + - uses: actions/checkout@v3 + with: + fetch-depth: 0 + - name: Update submodules + run: | + git submodule update --init --recursive --jobs 4 + - name: ccache + uses: hendrikmuhs/ccache-action@v1.2 + with: + key: ubuntu-24.04-aarch64-cross-compile + max-size: 5G + append-timestamp: true + - name: remove old clang and link clang-19 to clang + run: | + sudo rm -f /usr/bin/clang || true + sudo ln -s /usr/bin/clang-19 /usr/bin/clang + sudo rm -f /usr/bin/clang++ || true + sudo ln -s /usr/bin/clang++-19 /usr/bin/clang++ + which clang++ + clang++ --version + - name: Run chdb/build_mac_on_linux.sh + timeout-minutes: 600 + run: | + source ~/.cargo/env + bash ./chdb/build_mac_on_linux.sh arm64 + continue-on-error: false + - name: Run chdb/build/build_static_lib_mac_on_linux.sh + timeout-minutes: 600 + run: | + source ~/.cargo/env + bash ./chdb/build/build_static_lib_mac_on_linux.sh arm64 + continue-on-error: false + - name: Check ccache statistics + run: | + ccache -s + ls -lh chdb + df -h + - name: Keep killall ccache and wait for ccache to finish + if: always() + run: | + sleep 60 + while ps -ef | grep ccache | grep -v grep; do \ + killall ccache; \ + sleep 10; \ + done + - name: Upload build artifacts + uses: actions/upload-artifact@v4 + with: + name: macos-arm64-build-artifacts + path: | + ./libchdb.so + ./libchdb.a + ./chdb/_chdb.abi3.so + ./chdb/libpybind11nonlimitedapi_stubs.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.8.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.9.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.10.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.11.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.12.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.13.dylib + ./chdb/libpybind11nonlimitedapi_chdb_3.14.dylib + retention-days: 1 + + test_on_macos: + name: Test on macOS arm64 runs-on: macos-14-xlarge - if: ${{ !github.event.pull_request.draft }} + # if: ${{ !github.event.pull_request.draft }} + timeout-minutes: 600 steps: - name: Check machine architecture run: | @@ -39,17 +147,6 @@ jobs: else echo "This is an x86_64 (Intel) machine" fi - - name: Free up disk space (Initial) - run: | - # Clean Homebrew cache - brew cleanup -s 2>/dev/null || true - rm -rf "$(brew --cache)" 2>/dev/null || true - sudo rm -rf ~/Library/Developer/Xcode/DerivedData 2>/dev/null || true - sudo rm -rf ~/Library/Caches/com.apple.dt.Xcode 2>/dev/null || true - sudo rm -rf /Users/runner/Library/Android 2>/dev/null || true - sudo rm -rf /tmp/* 2>/dev/null || true - echo "=== Disk usage after cleanup ===" - df -h - name: Setup pyenv run: | curl https://pyenv.run | bash @@ -97,39 +194,14 @@ jobs: - name: Remove /usr/local/bin/python3 run: | sudo rm -f /usr/local/bin/python3 - - name: Install clang++ for macOS + - name: Install go for macOS run: | - pwd - uname -a - export HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK=1 brew update - brew install ca-certificates lz4 mpdecimal readline sqlite xz z3 zstd - brew install openssl@3 || echo "OpenSSL install failed, continuing..." - brew install --ignore-dependencies llvm@19 - brew install git ninja libtool gettext binutils grep findutils nasm lld@19 libiconv - brew install ccache || echo "ccache installation failed, continuing without it" brew install go - cd /usr/local/opt/ && sudo rm -f llvm && sudo ln -sf llvm@19 llvm - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:$PATH - which clang++ - clang++ --version - which wasm-ld || echo "wasm-ld not found in PATH" - which go go version - ccache -s | echo "ccache not available yet" - - name: Upgrade Rust toolchain - run: | - rustup toolchain install nightly-2025-07-07 - rustup default nightly-2025-07-07 - rustup component add rust-src - rustc --version - cargo --version - uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Update submodules - run: | - git submodule update --init --recursive --jobs 4 - name: Update version for release if: startsWith(github.ref, 'refs/tags/v') run: | @@ -143,89 +215,34 @@ jobs: bump-my-version replace --new-version $TAG_NAME echo "Version files updated to $TAG_NAME" pyenv shell --unset - - name: Free up disk space (Before compilation) - run: | - echo "=== Disk usage before compilation cleanup ===" - df -h - brew cleanup -s 2>/dev/null || true - rm -rf "$(brew --cache)" 2>/dev/null || true - rm -rf ~/.cache/pip 2>/dev/null || true - rm -rf ~/.pyenv/.cache 2>/dev/null || true - rm -rf ~/.cargo/registry/cache 2>/dev/null || true - echo "=== Disk usage after cleanup ===" - df -h - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2 + - name: Download build artifacts + uses: actions/download-artifact@v4 with: - key: ${{ runner.os }}-arm64-ccache - max-size: 5G - append-timestamp: true - - name: Run chdb/build.sh + name: macos-arm64-build-artifacts + path: ./artifacts + - name: Restore artifacts to original paths + run: | + mv ./artifacts/libchdb.so ./ + mv ./artifacts/libchdb.a ./ + mv ./artifacts/chdb/_chdb.abi3.so ./chdb/ + mv ./artifacts/chdb/libpybind11nonlimitedapi_stubs.dylib ./chdb/ + for v in 8 9 10 11 12 13 14; do + mv ./artifacts/chdb/libpybind11nonlimitedapi_chdb_3.${v}.dylib ./chdb/ + done + ls -lh ./libchdb.so ./libchdb.a + ls -lh ./chdb/*.so ./chdb/*.dylib + - name: Run chdb/test_smoke.sh timeout-minutes: 600 run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - source ~/.cargo/env pyenv shell 3.8 - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ bash gen_manifest.sh - bash ./chdb/build.sh - pyenv shell 3.8 - bash -x ./chdb/test_smoke.sh - - name: Run chdb/build/build_static_lib.sh + bash -x ./chdb/test_smoke.sh cross-compile + - name: Run chdb/build/test_go_example.sh timeout-minutes: 600 run: | - export PATH=$HOME/.pyenv/bin:$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ - source ~/.cargo/env - eval "$(pyenv init -)" - pyenv shell 3.8 - bash ./chdb/build/build_static_lib.sh - pyenv shell --unset - continue-on-error: false - - name: Debug libchdb - run: | - ls -lh - llvm-nm libchdb.so | grep query_stable || true - echo "Global Symbol in libchdb.so:" - llvm-nm -g libchdb.so || true - echo "Global Symbol in libclickhouse-local-chdb.a:" - llvm-nm -g buildlib/programs/local/libclickhouse-local-chdb.a || true - echo "Global Symbol in libclickhouse-local-lib.a:" - llvm-nm -g buildlib/programs/local/libclickhouse-local-lib.a || true - echo "pychdb_cmd.sh:" - cat buildlib/pychdb_cmd.sh - echo "libchdb_cmd.sh:" - cat buildlib/libchdb_cmd.sh - - name: Scan chdb libraries with grype - run: | - echo "Scanning chdb libraries for vulnerabilities..." - # Files to scan - FILES_TO_SCAN="" - [ -f libchdb.so ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.so" - [ -f libchdb.a ] && FILES_TO_SCAN="$FILES_TO_SCAN libchdb.a" - FILES_TO_SCAN="$FILES_TO_SCAN $(find chdb/ \( -name "*.dylib" -o -name "*.so" \) 2>/dev/null || true)" - SQLITE_VULNERABILITIES_FOUND=false - for file in $FILES_TO_SCAN; do - if [ -f "$file" ]; then - echo "=== Scanning $file ===" - SCAN_OUTPUT=$(grype "$file" 2>/dev/null || true) - echo "$SCAN_OUTPUT" - if echo "$SCAN_OUTPUT" | grep -qi sqlite; then - echo "❌ SQLite vulnerability found in $file" - SQLITE_VULNERABILITIES_FOUND=true - fi - fi - done - if [ "$SQLITE_VULNERABILITIES_FOUND" = true ]; then - echo "❌ SQLite vulnerabilities detected in chdb libraries!" - exit 1 - else - echo "✅ No SQLite vulnerabilities found in chdb libraries" - fi + bash ./chdb/build/test_go_example.sh ${{ github.workspace }}/libchdb.a continue-on-error: false - name: Run libchdb stub in examples dir run: | @@ -236,9 +253,6 @@ jobs: rm -rf chdb/build/ export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - export PATH=$(brew --prefix llvm@19)/bin:$(brew --prefix lld@19)/bin:/usr/local/opt/grep/libexec/gnubin:/usr/local/opt/binutils/bin:$PATH:/usr/local/opt/findutils/libexec/gnubin - export CC=$(brew --prefix llvm@19)/bin/clang - export CXX=$(brew --prefix llvm@19)/bin/clang++ pyenv shell 3.8 make wheel - name: Fix wheel platform tag @@ -257,12 +271,6 @@ jobs: sudo sysctl kern.corefile=$PWD/tmp/core/core.%P sudo sysctl kern.coredump=1 ulimit -c unlimited - - name: Free up disk space - run: | - # Clean more build artifacts - rm -rf buildlib/contrib 2>/dev/null || true - rm -rf buildlib/base 2>/dev/null || true - rm -rf buildlib/src 2>/dev/null || true - name: Test wheel on all Python versions run: | ulimit -c unlimited @@ -300,14 +308,6 @@ jobs: echo "No core files found in tmp/core" fi continue-on-error: true - - name: Keep killall ccache and wait for ccache to finish - if: always() - run: | - sleep 60 - while ps -ef | grep ccache | grep -v grep; do \ - killall ccache; \ - sleep 10; \ - done - name: Upload core files artifact if: always() && env.CORE_FILES_FOUND == 'true' uses: actions/upload-artifact@v4 diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml index e0761e2b2dc..a02dcebf417 100644 --- a/.github/workflows/build_macos_x86_wheels.yml +++ b/.github/workflows/build_macos_x86_wheels.yml @@ -126,7 +126,7 @@ jobs: ./chdb/libpybind11nonlimitedapi_chdb_3.12.dylib ./chdb/libpybind11nonlimitedapi_chdb_3.13.dylib ./chdb/libpybind11nonlimitedapi_chdb_3.14.dylib - retention-days: 2 + retention-days: 1 test_on_macos: name: Test on macOS x86_64 @@ -194,6 +194,11 @@ jobs: - name: Remove /usr/local/bin/python3 run: | sudo rm -f /usr/local/bin/python3 + - name: Install go for macOS + run: | + brew update + brew install go + go version - uses: actions/checkout@v3 with: fetch-depth: 0 @@ -233,7 +238,7 @@ jobs: eval "$(pyenv init -)" pyenv shell 3.9 bash gen_manifest.sh - bash -x ./chdb/test_smoke.sh + bash -x ./chdb/test_smoke.sh cross-compile continue-on-error: false - name: Run chdb/build/test_go_example.sh timeout-minutes: 600 diff --git a/chdb/test_smoke.sh b/chdb/test_smoke.sh index ddc1f97571d..e254efe93f0 100755 --- a/chdb/test_smoke.sh +++ b/chdb/test_smoke.sh @@ -4,7 +4,7 @@ set -e DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -. ${DIR}/vars.sh +. ${DIR}/vars.sh "$1" # test the pybind module cd ${CHDB_DIR} From 6d4f4ce62b796afa92876e4a9faace9a77372a0c Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Tue, 2 Dec 2025 23:17:41 +0800 Subject: [PATCH 10/15] test: update build_macos_arm64_wheels.yml --- .github/workflows/build_macos_arm64_wheels.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml index 557514f0af7..97636e2ff0c 100644 --- a/.github/workflows/build_macos_arm64_wheels.yml +++ b/.github/workflows/build_macos_arm64_wheels.yml @@ -131,6 +131,7 @@ jobs: test_on_macos: name: Test on macOS arm64 runs-on: macos-14-xlarge + needs: build_universal_wheel_on_linux # if: ${{ !github.event.pull_request.draft }} timeout-minutes: 600 steps: From 90368019d392f32c4254b1df5d1c3a4831cb27d4 Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Wed, 3 Dec 2025 01:42:16 +0800 Subject: [PATCH 11/15] fix: fix LC_RPATH in _chdb.abi3.so --- .../workflows/build_linux_arm64_wheels-gh.yml | 2 +- .github/workflows/build_linux_x86_wheels.yml | 2 +- .github/workflows/build_macos_x86_wheels.yml | 2 +- .../workflows/build_musllinux_arm64_wheels.yml | 4 ++-- .github/workflows/build_musllinux_x86_wheels.yml | 2 +- chdb/build_mac_on_linux.sh | 16 ++++++++++++++++ 6 files changed, 22 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build_linux_arm64_wheels-gh.yml b/.github/workflows/build_linux_arm64_wheels-gh.yml index 7fa9fdf5fc2..9c9ee0bcbef 100644 --- a/.github/workflows/build_linux_arm64_wheels-gh.yml +++ b/.github/workflows/build_linux_arm64_wheels-gh.yml @@ -23,7 +23,7 @@ on: jobs: build_universal_wheel: - name: Build Universal Wheel (Linux ARM64) + name: Build Universal Wheel (Linux arm64) runs-on: GH-Linux-ARM64 if: ${{ !github.event.pull_request.draft }} steps: diff --git a/.github/workflows/build_linux_x86_wheels.yml b/.github/workflows/build_linux_x86_wheels.yml index 744ef0e0795..63789e89a41 100644 --- a/.github/workflows/build_linux_x86_wheels.yml +++ b/.github/workflows/build_linux_x86_wheels.yml @@ -1,4 +1,4 @@ -name: Build Linux X86 +name: Build Linux x86_64 on: workflow_dispatch: diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml index a02dcebf417..5ded8909e99 100644 --- a/.github/workflows/build_macos_x86_wheels.yml +++ b/.github/workflows/build_macos_x86_wheels.yml @@ -1,4 +1,4 @@ -name: Build macOS X86 +name: Build macOS x86_64 on: workflow_dispatch: diff --git a/.github/workflows/build_musllinux_arm64_wheels.yml b/.github/workflows/build_musllinux_arm64_wheels.yml index 08e03e0eea5..9e7bfc32fab 100644 --- a/.github/workflows/build_musllinux_arm64_wheels.yml +++ b/.github/workflows/build_musllinux_arm64_wheels.yml @@ -1,4 +1,4 @@ -name: Build Linux(musllinux) ARM64 +name: Build Linux(musllinux) arm64 on: workflow_dispatch: @@ -22,7 +22,7 @@ on: jobs: build_musllinux_wheels: - name: Build musllinux wheels (Alpine Linux aarch64) + name: Build musllinux wheels (Alpine Linux arm64) runs-on: GH-Linux-ARM64 if: ${{ !github.event.pull_request.draft }} steps: diff --git a/.github/workflows/build_musllinux_x86_wheels.yml b/.github/workflows/build_musllinux_x86_wheels.yml index 0a753c6a493..11cc21dc6ac 100644 --- a/.github/workflows/build_musllinux_x86_wheels.yml +++ b/.github/workflows/build_musllinux_x86_wheels.yml @@ -1,4 +1,4 @@ -name: Build Linux(musllinux) x86 +name: Build Linux(musllinux) x86_64 on: workflow_dispatch: diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index d2c1a6a065f..17fba463942 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -285,6 +285,22 @@ if ! CMAKE_ARGS="${CMAKE_ARGS}" CHDB_PYTHON_INCLUDE_DIR_PREFIX="${HOME}/python_i exit 1 fi +# Fix LC_RPATH in _chdb.abi3.so for cross-compiled builds +echo -e "\nFixing LC_RPATH in ${CHDB_PY_MODULE}..." +INSTALL_NAME_TOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-install_name_tool" +OTOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool" +OLD_RPATH=$(${OTOOL} -l ${CHDB_DIR}/${CHDB_PY_MODULE} | grep -A2 LC_RPATH | grep path | awk '{print $2}' | head -1) +if [ -n "${OLD_RPATH}" ]; then + echo " Current RPATH: ${OLD_RPATH}" + ${INSTALL_NAME_TOOL} -rpath "${OLD_RPATH}" "@loader_path" ${CHDB_DIR}/${CHDB_PY_MODULE} + echo " Changed RPATH to: @loader_path" +else + echo " No LC_RPATH found, adding @loader_path" + ${INSTALL_NAME_TOOL} -add_rpath "@loader_path" ${CHDB_DIR}/${CHDB_PY_MODULE} || true +fi +echo -e "\nVerifying LC_RPATH:" +${OTOOL} -l ${CHDB_DIR}/${CHDB_PY_MODULE} | grep -A2 LC_RPATH || echo "No LC_RPATH found" + echo -e "\nCross-compilation for macOS ${TARGET_ARCH} completed successfully!" echo -e "Generated files:" echo -e " - ${PROJ_DIR}/${LIBCHDB_SO}" From aed23706648d07ed44ae5fdb33a6aa455517023e Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Wed, 3 Dec 2025 02:44:34 +0800 Subject: [PATCH 12/15] fix: fix loader_path in _chdb.abi3.so --- chdb/build_mac_on_linux.sh | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/chdb/build_mac_on_linux.sh b/chdb/build_mac_on_linux.sh index 17fba463942..b9b4625294b 100755 --- a/chdb/build_mac_on_linux.sh +++ b/chdb/build_mac_on_linux.sh @@ -289,17 +289,23 @@ fi echo -e "\nFixing LC_RPATH in ${CHDB_PY_MODULE}..." INSTALL_NAME_TOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-install_name_tool" OTOOL="${CCTOOLS_BIN}/${DARWIN_TRIPLE}-otool" -OLD_RPATH=$(${OTOOL} -l ${CHDB_DIR}/${CHDB_PY_MODULE} | grep -A2 LC_RPATH | grep path | awk '{print $2}' | head -1) -if [ -n "${OLD_RPATH}" ]; then - echo " Current RPATH: ${OLD_RPATH}" - ${INSTALL_NAME_TOOL} -rpath "${OLD_RPATH}" "@loader_path" ${CHDB_DIR}/${CHDB_PY_MODULE} - echo " Changed RPATH to: @loader_path" + +echo -e "\nPre library dependencies:" +${OTOOL} -L ${CHDB_DIR}/${CHDB_PY_MODULE} + +STUBS_LIB="libpybind11nonlimitedapi_stubs.dylib" +OLD_STUBS_PATH=$(${OTOOL} -L ${CHDB_DIR}/${CHDB_PY_MODULE} | grep "${STUBS_LIB}" | awk '{print $1}') +if [ -n "${OLD_STUBS_PATH}" ]; then + echo "Changing ${STUBS_LIB} reference:" + echo " From: ${OLD_STUBS_PATH}" + echo " To: @loader_path/${STUBS_LIB}" + ${INSTALL_NAME_TOOL} -change "${OLD_STUBS_PATH}" "@loader_path/${STUBS_LIB}" ${CHDB_DIR}/${CHDB_PY_MODULE} else - echo " No LC_RPATH found, adding @loader_path" - ${INSTALL_NAME_TOOL} -add_rpath "@loader_path" ${CHDB_DIR}/${CHDB_PY_MODULE} || true + echo "${STUBS_LIB} not found in dependencies" fi -echo -e "\nVerifying LC_RPATH:" -${OTOOL} -l ${CHDB_DIR}/${CHDB_PY_MODULE} | grep -A2 LC_RPATH || echo "No LC_RPATH found" + +echo -e "\nPost library dependencies:" +${OTOOL} -L ${CHDB_DIR}/${CHDB_PY_MODULE} echo -e "\nCross-compilation for macOS ${TARGET_ARCH} completed successfully!" echo -e "Generated files:" From e45c876528193765698d0fa8dd754e7b3e653532 Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Wed, 3 Dec 2025 03:21:37 +0800 Subject: [PATCH 13/15] fix: fix arrow test --- examples/arrow_c_abi.h | 233 +++++++++++++++++++++++++++++++++++++++ examples/chdbArrowTest.c | 2 +- 2 files changed, 234 insertions(+), 1 deletion(-) create mode 100644 examples/arrow_c_abi.h diff --git a/examples/arrow_c_abi.h b/examples/arrow_c_abi.h new file mode 100644 index 00000000000..6abe866b5f6 --- /dev/null +++ b/examples/arrow_c_abi.h @@ -0,0 +1,233 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/// \file abi.h Arrow C Data Interface +/// +/// The Arrow C Data interface defines a very small, stable set +/// of C definitions which can be easily copied into any project's +/// source code and vendored to be used for columnar data interchange +/// in the Arrow format. For non-C/C++ languages and runtimes, +/// it should be almost as easy to translate the C definitions into +/// the corresponding C FFI declarations. +/// +/// Applications and libraries can therefore work with Arrow memory +/// without necessarily using the Arrow libraries or reinventing +/// the wheel. Developers can choose between tight integration +/// with the Arrow software project or minimal integration with +/// the Arrow format only. + +#pragma once + +#include + +// Spec and documentation: https://arrow.apache.org/docs/format/CDataInterface.html + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef ARROW_C_DATA_INTERFACE +#define ARROW_C_DATA_INTERFACE + +#define ARROW_FLAG_DICTIONARY_ORDERED 1 +#define ARROW_FLAG_NULLABLE 2 +#define ARROW_FLAG_MAP_KEYS_SORTED 4 + +struct ArrowSchema { + // Array type description + const char* format; + const char* name; + const char* metadata; + int64_t flags; + int64_t n_children; + struct ArrowSchema** children; + struct ArrowSchema* dictionary; + + // Release callback + void (*release)(struct ArrowSchema*); + // Opaque producer-specific data + void* private_data; +}; + +struct ArrowArray { + // Array data description + int64_t length; + int64_t null_count; + int64_t offset; + int64_t n_buffers; + int64_t n_children; + const void** buffers; + struct ArrowArray** children; + struct ArrowArray* dictionary; + + // Release callback + void (*release)(struct ArrowArray*); + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DATA_INTERFACE + +#ifndef ARROW_C_DEVICE_DATA_INTERFACE +#define ARROW_C_DEVICE_DATA_INTERFACE + +// Spec and Documentation: https://arrow.apache.org/docs/format/CDeviceDataInterface.html + +// DeviceType for the allocated memory +typedef int32_t ArrowDeviceType; + +// CPU device, same as using ArrowArray directly +#define ARROW_DEVICE_CPU 1 +// CUDA GPU Device +#define ARROW_DEVICE_CUDA 2 +// Pinned CUDA CPU memory by cudaMallocHost +#define ARROW_DEVICE_CUDA_HOST 3 +// OpenCL Device +#define ARROW_DEVICE_OPENCL 4 +// Vulkan buffer for next-gen graphics +#define ARROW_DEVICE_VULKAN 7 +// Metal for Apple GPU +#define ARROW_DEVICE_METAL 8 +// Verilog simulator buffer +#define ARROW_DEVICE_VPI 9 +// ROCm GPUs for AMD GPUs +#define ARROW_DEVICE_ROCM 10 +// Pinned ROCm CPU memory allocated by hipMallocHost +#define ARROW_DEVICE_ROCM_HOST 11 +// Reserved for extension +#define ARROW_DEVICE_EXT_DEV 12 +// CUDA managed/unified memory allocated by cudaMallocManaged +#define ARROW_DEVICE_CUDA_MANAGED 13 +// unified shared memory allocated on a oneAPI non-partitioned device. +#define ARROW_DEVICE_ONEAPI 14 +// GPU support for next-gen WebGPU standard +#define ARROW_DEVICE_WEBGPU 15 +// Qualcomm Hexagon DSP +#define ARROW_DEVICE_HEXAGON 16 + +struct ArrowDeviceArray { + // the Allocated Array + // + // the buffers in the array (along with the buffers of any + // children) are what is allocated on the device. + struct ArrowArray array; + // The device id to identify a specific device + int64_t device_id; + // The type of device which can access this memory. + ArrowDeviceType device_type; + // An event-like object to synchronize on if needed. + void* sync_event; + // Reserved bytes for future expansion. + int64_t reserved[3]; +}; + +#endif // ARROW_C_DEVICE_DATA_INTERFACE + +#ifndef ARROW_C_STREAM_INTERFACE +#define ARROW_C_STREAM_INTERFACE + +struct ArrowArrayStream { + // Callback to get the stream type + // (will be the same for all arrays in the stream). + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + int (*get_schema)(struct ArrowArrayStream*, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowArray must be released independently from the stream. + int (*get_next)(struct ArrowArrayStream*, struct ArrowArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowArrayStream*); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowArrayStream*); + + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_STREAM_INTERFACE + +#ifndef ARROW_C_DEVICE_STREAM_INTERFACE +#define ARROW_C_DEVICE_STREAM_INTERFACE + +// Equivalent to ArrowArrayStream, but for ArrowDeviceArrays. +// +// This stream is intended to provide a stream of data on a single +// device, if a producer wants data to be produced on multiple devices +// then multiple streams should be provided. One per device. +struct ArrowDeviceArrayStream { + // The device that this stream produces data on. + ArrowDeviceType device_type; + + // Callback to get the stream schema + // (will be the same for all arrays in the stream). + // + // Return value 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowSchema must be released independently from the stream. + // The schema should be accessible via CPU memory. + int (*get_schema)(struct ArrowDeviceArrayStream* self, struct ArrowSchema* out); + + // Callback to get the next array + // (if no error and the array is released, the stream has ended) + // + // Return value: 0 if successful, an `errno`-compatible error code otherwise. + // + // If successful, the ArrowDeviceArray must be released independently from the stream. + int (*get_next)(struct ArrowDeviceArrayStream* self, struct ArrowDeviceArray* out); + + // Callback to get optional detailed error information. + // This must only be called if the last stream operation failed + // with a non-0 return code. + // + // Return value: pointer to a null-terminated character array describing + // the last error, or NULL if no description is available. + // + // The returned pointer is only valid until the next operation on this stream + // (including release). + const char* (*get_last_error)(struct ArrowDeviceArrayStream* self); + + // Release callback: release the stream's own resources. + // Note that arrays returned by `get_next` must be individually released. + void (*release)(struct ArrowDeviceArrayStream* self); + + // Opaque producer-specific data + void* private_data; +}; + +#endif // ARROW_C_DEVICE_STREAM_INTERFACE + +#ifdef __cplusplus +} +#endif diff --git a/examples/chdbArrowTest.c b/examples/chdbArrowTest.c index f91d4d4c8c4..61b9a8dd603 100644 --- a/examples/chdbArrowTest.c +++ b/examples/chdbArrowTest.c @@ -6,7 +6,7 @@ #include #include "../programs/local/chdb.h" -#include "../contrib/arrow/cpp/src/arrow/c/abi.h" +#include "arrow_c_abi.h" // Custom ArrowArrayStream implementation data typedef struct CustomStreamData From 0bd16549a887d23ca00c9229a5729bdd978b48c4 Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Wed, 3 Dec 2025 12:03:55 +0800 Subject: [PATCH 14/15] test: disable mac tests for draft PRs --- .github/workflows/build_macos_arm64_wheels.yml | 4 ++-- .github/workflows/build_macos_x86_wheels.yml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml index 97636e2ff0c..fa0420acf5a 100644 --- a/.github/workflows/build_macos_arm64_wheels.yml +++ b/.github/workflows/build_macos_arm64_wheels.yml @@ -24,7 +24,7 @@ jobs: build_universal_wheel_on_linux: name: Build on Linux (cross-compile for macOS arm64) runs-on: GH-Linux-ARM64 - # if: ${{ !github.event.pull_request.draft }} + if: ${{ !github.event.pull_request.draft }} timeout-minutes: 600 steps: - name: Install Python build dependencies @@ -132,7 +132,7 @@ jobs: name: Test on macOS arm64 runs-on: macos-14-xlarge needs: build_universal_wheel_on_linux - # if: ${{ !github.event.pull_request.draft }} + if: ${{ !github.event.pull_request.draft }} timeout-minutes: 600 steps: - name: Check machine architecture diff --git a/.github/workflows/build_macos_x86_wheels.yml b/.github/workflows/build_macos_x86_wheels.yml index 5ded8909e99..1a6577cf50c 100644 --- a/.github/workflows/build_macos_x86_wheels.yml +++ b/.github/workflows/build_macos_x86_wheels.yml @@ -24,7 +24,7 @@ jobs: build_universal_wheel_on_linux: name: Build on Linux (cross-compile for macOS x86_64) runs-on: gh-64c - # if: ${{ !github.event.pull_request.draft }} + if: ${{ !github.event.pull_request.draft }} timeout-minutes: 600 steps: - name: Install Python build dependencies @@ -132,7 +132,7 @@ jobs: name: Test on macOS x86_64 runs-on: macos-15-intel needs: build_universal_wheel_on_linux - # if: ${{ !github.event.pull_request.draft }} + if: ${{ !github.event.pull_request.draft }} timeout-minutes: 600 steps: - name: Check machine architecture From 501c642e35e6444e8684abdea1bc2d961f561019 Mon Sep 17 00:00:00 2001 From: wudidapaopao Date: Wed, 3 Dec 2025 13:27:23 +0800 Subject: [PATCH 15/15] test: use python 3.9 --- .../workflows/build_macos_arm64_wheels.yml | 2 +- tests/test_data_insertion.ipynb | 64 +++++++++---------- 2 files changed, 32 insertions(+), 34 deletions(-) diff --git a/.github/workflows/build_macos_arm64_wheels.yml b/.github/workflows/build_macos_arm64_wheels.yml index fa0420acf5a..b11389c2a75 100644 --- a/.github/workflows/build_macos_arm64_wheels.yml +++ b/.github/workflows/build_macos_arm64_wheels.yml @@ -291,7 +291,7 @@ jobs: run: | export PATH="$HOME/.pyenv/bin:$PATH" eval "$(pyenv init -)" - pyenv shell 3.8 + pyenv shell 3.9 python -m pip install dist/*.whl --force-reinstall jupyter nbconvert --to notebook --execute tests/test_data_insertion.ipynb --output test_data_insertion_output.ipynb pyenv shell --unset diff --git a/tests/test_data_insertion.ipynb b/tests/test_data_insertion.ipynb index 20bbb87bae2..e5c7114dbbe 100644 --- a/tests/test_data_insertion.ipynb +++ b/tests/test_data_insertion.ipynb @@ -3,32 +3,28 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "jupyter": { - "is_executing": true - } - }, + "metadata": {}, "outputs": [], "source": [ "from chdb import session\n", "import time\n", - "import tempfile\n", "import os\n", + "import shutil\n", "\n", "print(\"Connecting to chdb session...\")\n", - "chs = session.Session()\n", - "\n", - "temp_csv = tempfile.NamedTemporaryFile(mode='w', suffix='.csv', delete=False)\n", - "temp_csv.write(\"movieId,embedding\\n\") # Header\n", + "session_dir = os.path.join(os.getcwd(), \"chdb_test_data_insertion_ipynb\")\n", + "os.makedirs(session_dir, exist_ok=True)\n", + "chs = session.Session(session_dir)\n", "\n", - "# Generate 10,000 rows of test data\n", - "for i in range(1, 10001):\n", - " embedding = [float(i + j * 0.1) for j in range(10)]\n", - " embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n", - " temp_csv.write(f'{i},\"{embedding_str}\"\\n')\n", + "csv_path = os.path.join(os.getcwd(), \"chdb_test_data_insertion_embedding.csv\")\n", + "with open(csv_path, 'w') as temp_csv:\n", + " temp_csv.write(\"movieId,embedding\\n\") # Header\n", "\n", - "temp_csv.close()\n", - "csv_path = temp_csv.name\n", + " # Generate 10,000 rows of test data\n", + " for i in range(1, 10001):\n", + " embedding = [float(i + j * 0.1) for j in range(10)]\n", + " embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n", + " temp_csv.write(f'{i},\"{embedding_str}\"\\n')\n", "\n", "# Setup database and table\n", "print(\"\\n=== Setup Phase ===\")\n", @@ -49,15 +45,15 @@ " result = chs.query(f\"INSERT INTO embeddings FROM INFILE '{csv_path}' FORMAT CSV\")\n", " infile_time = time.time() - start_time\n", " print(f\"✓ INFILE insertion successful! Time: {infile_time:.3f}s\")\n", - " \n", + "\n", " count = chs.query('SELECT COUNT(*) as count FROM embeddings')\n", " print(f\"Records inserted via INFILE: {count}\")\n", - " \n", + "\n", " if count != '0':\n", " print(\"Sample data from INFILE:\")\n", " sample = chs.query('SELECT movieId, embedding FROM embeddings ORDER BY movieId LIMIT 3')\n", " print(sample)\n", - " \n", + "\n", "except Exception as e:\n", " print(f\"✗ INFILE insertion failed: {e}\")\n", " infile_time = 0\n", @@ -71,10 +67,10 @@ " embedding = [float(i + j * 0.1) for j in range(10)]\n", " embedding_str = '[' + ','.join(map(str, embedding)) + ']'\n", " chs.query(f\"INSERT INTO embeddings VALUES ({i}, {embedding_str})\")\n", - " \n", + "\n", " values_time = time.time() - start_time\n", " print(f\"✓ VALUES insertion successful! Time: {values_time:.3f}s\")\n", - " \n", + "\n", "except Exception as e:\n", " print(f\"✗ VALUES insertion failed: {e}\")\n", " values_time = 0\n", @@ -84,21 +80,21 @@ "try:\n", " total_count = chs.query('SELECT COUNT(*) as total FROM embeddings')\n", " print(f\"Total records in embeddings table: {total_count}\")\n", - " \n", + "\n", " # Count by range\n", " infile_count = chs.query('SELECT COUNT(*) as infile_count FROM embeddings WHERE movieId <= 10000')\n", " values_count = chs.query('SELECT COUNT(*) as values_count FROM embeddings WHERE movieId >= 20001')\n", - " \n", + "\n", " print(f\"Records from INFILE (movieId <= 10000): {infile_count}\")\n", " print(f\"Records from VALUES (movieId >= 20001): {values_count}\")\n", - " \n", + "\n", " # Sample from both ranges\n", " print(\"\\nSample from INFILE data:\")\n", " print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId <= 10000 ORDER BY movieId LIMIT 2'))\n", - " \n", + "\n", " print(\"Sample from VALUES data:\")\n", " print(chs.query('SELECT movieId, embedding FROM embeddings WHERE movieId >= 20001 ORDER BY movieId LIMIT 2'))\n", - " \n", + "\n", "except Exception as e:\n", " print(f\"Count verification error: {e}\")\n", "\n", @@ -106,30 +102,32 @@ "print(\"\\n=== Test 4: CSV Engine Direct Reading ===\")\n", "try:\n", " print(\"Reading generated CSV file directly using CSV engine:\")\n", - " \n", + "\n", " # Method 1: Using file() function\n", " csv_count1 = chs.query(f\"SELECT COUNT(*) as csv_count FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String')\")\n", " print(f\"CSV file rows (via file() function): {csv_count1}\")\n", - " \n", + "\n", " # Method 2: Using CSV table engine directly\n", " print(\"Sample rows from CSV file:\")\n", " csv_sample = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId LIMIT 3\")\n", " print(csv_sample)\n", - " \n", + "\n", " print(\"Last few rows from CSV file:\")\n", " csv_tail = chs.query(f\"SELECT movieId, embedding FROM file('{csv_path}', 'CSV', 'movieId UInt32, embedding String') ORDER BY movieId DESC LIMIT 3\")\n", " print(csv_tail)\n", - " \n", + "\n", "except Exception as e:\n", " print(f\"CSV engine reading error: {e}\")\n", "\n", "# Cleanup\n", "print(\"\\n=== Cleanup ===\")\n", "try:\n", + " chs.close()\n", " os.unlink(csv_path)\n", - " print(\"✓ Temporary CSV file cleaned up\")\n", + " shutil.rmtree(session_dir, ignore_errors=True)\n", + " print(\"Temporary files cleaned up\")\n", "except Exception as e:\n", - " print(f\"Warning: Could not clean up temporary file: {e}\")\n", + " print(f\"Warning: Could not clean up temporary files: {e}\")\n", "\n", "print(f\"\\n=== Performance Summary ===\")\n", "if infile_time > 0:\n",