From 01c65a941e3225ce81b919d107e5b89f8f2abd40 Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 11 Jun 2024 09:44:03 +0900 Subject: [PATCH] ARROW-XXX: WIP: [C++] Add support for system mimalloc --- .github/workflows/ruby.yml | 2 ++ ci/conda_env_cpp.txt | 1 + ci/docker/debian-12-cpp.dockerfile | 1 + ci/docker/ubuntu-20.04-cpp.dockerfile | 2 ++ ci/docker/ubuntu-22.04-cpp.dockerfile | 3 +++ ci/docker/ubuntu-24.04-cpp.dockerfile | 2 ++ ci/scripts/msys2_setup.sh | 1 + cpp/cmake_modules/ThirdpartyToolchain.cmake | 27 ++++++++++++------- cpp/src/arrow/CMakeLists.txt | 2 +- cpp/src/arrow/memory_pool.cc | 14 +++------- cpp/vcpkg.json | 1 + .../linux-packages/apache-arrow/Rakefile | 11 ++++++++ .../apache-arrow/apt/ubuntu-jammy/Dockerfile | 1 + .../apache-arrow/apt/ubuntu-noble/Dockerfile | 1 + .../apache-arrow/debian/control.in | 2 ++ .../apache-arrow/yum/almalinux-8/Dockerfile | 1 + .../apache-arrow/yum/almalinux-9/Dockerfile | 1 + .../apache-arrow/yum/arrow.spec.in | 6 +++++ .../yum/centos-9-stream/Dockerfile | 1 + 19 files changed, 59 insertions(+), 21 deletions(-) diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index eb00bc5f92a8d..5df60c6ac457a 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -134,6 +134,7 @@ jobs: ARROW_GLIB_WERROR: true ARROW_HOME: /tmp/local ARROW_JEMALLOC: OFF + ARROW_MIMALLOC: ON ARROW_ORC: OFF ARROW_PARQUET: ON ARROW_WITH_BROTLI: ON @@ -211,6 +212,7 @@ jobs: ARROW_HDFS: OFF ARROW_HOME: /ucrt${{ matrix.mingw-n-bits }} ARROW_JEMALLOC: OFF + ARROW_MIMALLOC: OFF ARROW_PARQUET: ON ARROW_PYTHON: OFF ARROW_S3: ON diff --git a/ci/conda_env_cpp.txt b/ci/conda_env_cpp.txt index f28a24cac8d2d..c143387601489 100644 --- a/ci/conda_env_cpp.txt +++ b/ci/conda_env_cpp.txt @@ -37,6 +37,7 @@ libprotobuf libutf8proc lz4-c make +mimalloc ninja nodejs orc diff --git a/ci/docker/debian-12-cpp.dockerfile b/ci/docker/debian-12-cpp.dockerfile index d7a6f9df2c2ee..cc7992c044502 100644 --- a/ci/docker/debian-12-cpp.dockerfile +++ b/ci/docker/debian-12-cpp.dockerfile @@ -64,6 +64,7 @@ RUN apt-get update -y -q && \ libkrb5-dev \ libldap-dev \ liblz4-dev \ + libmimalloc-dev \ libnghttp2-dev \ libprotobuf-dev \ libprotoc-dev \ diff --git a/ci/docker/ubuntu-20.04-cpp.dockerfile b/ci/docker/ubuntu-20.04-cpp.dockerfile index d78c7a99cf4d6..288823ee6d674 100644 --- a/ci/docker/ubuntu-20.04-cpp.dockerfile +++ b/ci/docker/ubuntu-20.04-cpp.dockerfile @@ -158,6 +158,7 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_MIMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ @@ -179,6 +180,7 @@ ENV absl_SOURCE=BUNDLED \ google_cloud_cpp_storage_SOURCE=BUNDLED \ gRPC_SOURCE=BUNDLED \ GTest_SOURCE=BUNDLED \ + mimalloc_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PARQUET_BUILD_EXAMPLES=ON \ PARQUET_BUILD_EXECUTABLES=ON \ diff --git a/ci/docker/ubuntu-22.04-cpp.dockerfile b/ci/docker/ubuntu-22.04-cpp.dockerfile index f12e7456add8e..2210233cc41fb 100644 --- a/ci/docker/ubuntu-22.04-cpp.dockerfile +++ b/ci/docker/ubuntu-22.04-cpp.dockerfile @@ -87,6 +87,7 @@ RUN apt-get update -y -q && \ libkrb5-dev \ libldap-dev \ liblz4-dev \ + libmimalloc-dev \ libnghttp2-dev \ libprotobuf-dev \ libprotoc-dev \ @@ -196,6 +197,7 @@ ENV absl_SOURCE=BUNDLED \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_MIMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ @@ -215,6 +217,7 @@ ENV absl_SOURCE=BUNDLED \ AWSSDK_SOURCE=BUNDLED \ Azure_SOURCE=BUNDLED \ google_cloud_cpp_storage_SOURCE=BUNDLED \ + mimalloc_SOURCE=BUNDLED \ ORC_SOURCE=BUNDLED \ PARQUET_BUILD_EXAMPLES=ON \ PARQUET_BUILD_EXECUTABLES=ON \ diff --git a/ci/docker/ubuntu-24.04-cpp.dockerfile b/ci/docker/ubuntu-24.04-cpp.dockerfile index ecfb5e2f5096d..9b934e218a153 100644 --- a/ci/docker/ubuntu-24.04-cpp.dockerfile +++ b/ci/docker/ubuntu-24.04-cpp.dockerfile @@ -86,6 +86,7 @@ RUN apt-get update -y -q && \ libkrb5-dev \ libldap-dev \ liblz4-dev \ + libmimalloc-dev \ libnghttp2-dev \ libprotobuf-dev \ libprotoc-dev \ @@ -178,6 +179,7 @@ ENV ARROW_ACERO=ON \ ARROW_HDFS=ON \ ARROW_HOME=/usr/local \ ARROW_INSTALL_NAME_RPATH=OFF \ + ARROW_MIMALLOC=ON \ ARROW_ORC=ON \ ARROW_PARQUET=ON \ ARROW_S3=ON \ diff --git a/ci/scripts/msys2_setup.sh b/ci/scripts/msys2_setup.sh index e5b08424022de..c43dd15d2cfb7 100755 --- a/ci/scripts/msys2_setup.sh +++ b/ci/scripts/msys2_setup.sh @@ -40,6 +40,7 @@ case "${target}" in packages+=(${MINGW_PACKAGE_PREFIX}-libutf8proc) packages+=(${MINGW_PACKAGE_PREFIX}-libxml2) packages+=(${MINGW_PACKAGE_PREFIX}-lz4) + packages+=(${MINGW_PACKAGE_PREFIX}-mimalloc) packages+=(${MINGW_PACKAGE_PREFIX}-ninja) packages+=(${MINGW_PACKAGE_PREFIX}-nlohmann-json) packages+=(${MINGW_PACKAGE_PREFIX}-protobuf) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index 9dcb426f079fe..d8189767b7e07 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -58,6 +58,7 @@ set(ARROW_THIRDPARTY_DEPENDENCIES jemalloc LLVM lz4 + mimalloc nlohmann_json opentelemetry-cpp orc @@ -200,6 +201,8 @@ macro(build_dependency DEPENDENCY_NAME) build_jemalloc() elseif("${DEPENDENCY_NAME}" STREQUAL "lz4") build_lz4() + elseif("${DEPENDENCY_NAME}" STREQUAL "mimalloc") + build_mimalloc() elseif("${DEPENDENCY_NAME}" STREQUAL "nlohmann_json") build_nlohmann_json() elseif("${DEPENDENCY_NAME}" STREQUAL "opentelemetry-cpp") @@ -2194,7 +2197,7 @@ endif() # ---------------------------------------------------------------------- # mimalloc - Cross-platform high-performance allocator, from Microsoft -if(ARROW_MIMALLOC) +macro(build_mimalloc) if(NOT ARROW_ENABLE_THREADING) message(FATAL_ERROR "Can't use mimalloc with ARROW_ENABLE_THREADING=OFF") endif() @@ -2234,20 +2237,24 @@ if(ARROW_MIMALLOC) file(MAKE_DIRECTORY ${MIMALLOC_INCLUDE_DIR}) - add_library(mimalloc::mimalloc STATIC IMPORTED) - set_target_properties(mimalloc::mimalloc PROPERTIES IMPORTED_LOCATION - "${MIMALLOC_STATIC_LIB}") - target_include_directories(mimalloc::mimalloc BEFORE - INTERFACE "${MIMALLOC_INCLUDE_DIR}") - target_link_libraries(mimalloc::mimalloc INTERFACE Threads::Threads) + add_library(mimalloc STATIC IMPORTED) + set_target_properties(mimalloc PROPERTIES IMPORTED_LOCATION "${MIMALLOC_STATIC_LIB}") + target_include_directories(mimalloc BEFORE INTERFACE "${MIMALLOC_INCLUDE_DIR}") + target_link_libraries(mimalloc INTERFACE Threads::Threads) if(WIN32) - target_link_libraries(mimalloc::mimalloc INTERFACE "bcrypt.lib" "psapi.lib") + target_link_libraries(mimalloc INTERFACE "bcrypt.lib" "psapi.lib") endif() - add_dependencies(mimalloc::mimalloc mimalloc_ep) + add_dependencies(mimalloc mimalloc_ep) - list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc::mimalloc) + list(APPEND ARROW_BUNDLED_STATIC_LIBS mimalloc) set(mimalloc_VENDORED TRUE) +endmacro() + +if(ARROW_MIMALLOC) + set(CMAKE_FIND_DEBUG_MODE ON) + resolve_dependency(mimalloc) + set(CMAKE_FIND_DEBUG_MODE OFF) endif() # ---------------------------------------------------------------------- diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 6dc8358f502f5..5c105effec20a 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -449,7 +449,7 @@ if(ARROW_JEMALLOC) endif() if(ARROW_MIMALLOC) foreach(ARROW_MEMORY_POOL_TARGET ${ARROW_MEMORY_POOL_TARGETS}) - target_link_libraries(${ARROW_MEMORY_POOL_TARGET} PRIVATE mimalloc::mimalloc) + target_link_libraries(${ARROW_MEMORY_POOL_TARGET} PRIVATE mimalloc) endforeach() endif() diff --git a/cpp/src/arrow/memory_pool.cc b/cpp/src/arrow/memory_pool.cc index 2f8ce3a6fa8c7..a34d90e4d7e3a 100644 --- a/cpp/src/arrow/memory_pool.cc +++ b/cpp/src/arrow/memory_pool.cc @@ -85,19 +85,13 @@ struct SupportedBackend { const std::vector& SupportedBackends() { static std::vector backends = { - // ARROW-12316: Apple => mimalloc first, then jemalloc - // non-Apple => jemalloc first, then mimalloc -#if defined(ARROW_JEMALLOC) && !defined(__APPLE__) - {"jemalloc", MemoryPoolBackend::Jemalloc}, -#endif #ifdef ARROW_MIMALLOC - {"mimalloc", MemoryPoolBackend::Mimalloc}, + {"mimalloc", MemoryPoolBackend::Mimalloc}, #endif -#if defined(ARROW_JEMALLOC) && defined(__APPLE__) - {"jemalloc", MemoryPoolBackend::Jemalloc}, +#ifdef ARROW_JEMALLOC + {"jemalloc", MemoryPoolBackend::Jemalloc}, #endif - {"system", MemoryPoolBackend::System} - }; + {"system", MemoryPoolBackend::System}}; return backends; } diff --git a/cpp/vcpkg.json b/cpp/vcpkg.json index f087a6d24c8f9..5fed915d34984 100644 --- a/cpp/vcpkg.json +++ b/cpp/vcpkg.json @@ -38,6 +38,7 @@ "grpc", "gtest", "lz4", + "mimalloc", "openssl", "orc", "protobuf", diff --git a/dev/tasks/linux-packages/apache-arrow/Rakefile b/dev/tasks/linux-packages/apache-arrow/Rakefile index cdc6d2cf35b66..f43df83fe8638 100644 --- a/dev/tasks/linux-packages/apache-arrow/Rakefile +++ b/dev/tasks/linux-packages/apache-arrow/Rakefile @@ -116,10 +116,21 @@ class ApacheArrowPackageTask < PackageTask control.gsub(/@USE_SYSTEM_PROTOBUF@/, use_system_protobuf) end + def apt_prepare_debian_control_mimalloc(control, target) + case target + when /\Aubuntu-(?:focal|jammy)/ + use_system_mimalloc = "#" + else + use_system_mimalloc = "" + end + control.gsub(/@USE_SYSTEM_MIMALLOC@/, use_system_mimalloc) + end + def apt_prepare_debian_control(control_in, target) control = control_in.dup control = apt_prepare_debian_control_cuda_architecture(control, target) control = apt_prepare_debian_control_grpc(control, target) + control = apt_prepare_debian_control_mimalloc(control, target) control = apt_prepare_debian_control_protobuf(control, target) control end diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile index e6718e59b0aba..17246793151c2 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-jammy/Dockerfile @@ -51,6 +51,7 @@ RUN \ libgrpc++-dev \ libgtest-dev \ liblz4-dev \ + libmimalloc-dev \ libprotobuf-dev \ libprotoc-dev \ libre2-dev \ diff --git a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile index 87ea2402456b0..e5663fa045cfd 100644 --- a/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/apt/ubuntu-noble/Dockerfile @@ -52,6 +52,7 @@ RUN \ libgrpc++-dev \ libgtest-dev \ liblz4-dev \ + libmimalloc-dev \ libmlir-15-dev \ libprotobuf-dev \ libprotoc-dev \ diff --git a/dev/tasks/linux-packages/apache-arrow/debian/control.in b/dev/tasks/linux-packages/apache-arrow/debian/control.in index c33e3ac791be1..9040bbcfa0bd3 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/control.in +++ b/dev/tasks/linux-packages/apache-arrow/debian/control.in @@ -20,6 +20,7 @@ Build-Depends: @USE_SYSTEM_GRPC@ libgrpc++-dev, libgtest-dev, liblz4-dev, +@USE_SYSTEM_MIMALLOC@ libmimalloc-dev, @USE_SYSTEM_PROTOBUF@ libprotobuf-dev, @USE_SYSTEM_PROTOBUF@ libprotoc-dev, libre2-dev, @@ -145,6 +146,7 @@ Depends: libcurl4-openssl-dev, liblz4-dev, libc-ares-dev, +@USE_SYSTEM_MIMALLOC@ libmimalloc-dev, @USE_SYSTEM_PROTOBUF@ libprotobuf-dev, @USE_SYSTEM_PROTOBUF@ libprotoc-dev, libre2-dev, diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile index d846915ab21de..a689682217622 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-8/Dockerfile @@ -46,6 +46,7 @@ RUN \ llvm-static \ lz4-devel \ make \ + mimalloc-devel \ ncurses-devel \ ninja-build \ openssl-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile index 222ab1b58d34d..38823677c6fa2 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/almalinux-9/Dockerfile @@ -48,6 +48,7 @@ RUN \ llvm-static \ lz4-devel \ make \ + mimalloc-devel \ ncurses-devel \ ninja-build \ openssl-devel \ diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index d5e6c3a332eb3..393d7d3986370 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -122,6 +122,9 @@ BuildRequires: json-devel %endif BuildRequires: libzstd-devel BuildRequires: lz4-devel %{lz4_requirement} +%if %{use_mimalloc} +BuildRequires: mimalloc-devel +%endif BuildRequires: ninja-build BuildRequires: openssl-devel BuildRequires: pkgconfig @@ -285,6 +288,9 @@ Requires: json-devel %endif Requires: libzstd-devel Requires: lz4-devel %{lz4_requirement} +%if %{use_mimalloc} +Requires: mimalloc-devel +%endif Requires: openssl-devel %if %{have_rapidjson} Requires: rapidjson-devel diff --git a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile index 9522d999af5b7..4a54ed2aa421f 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile +++ b/dev/tasks/linux-packages/apache-arrow/yum/centos-9-stream/Dockerfile @@ -44,6 +44,7 @@ RUN \ llvm-devel \ lz4-devel \ make \ + mimalloc-devel \ ncurses-devel \ ninja-build \ openssl-devel \