Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into improve_parallelism_of_refine_host
Browse files Browse the repository at this point in the history
  • Loading branch information
anaruse authored Dec 14, 2023
2 parents c9bfc9a + 80a48ca commit a439b44
Show file tree
Hide file tree
Showing 35 changed files with 440 additions and 340 deletions.
8 changes: 4 additions & 4 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -386,6 +386,8 @@ SKBUILD_EXTRA_CMAKE_ARGS="${EXTRA_CMAKE_ARGS}"
if [[ "${EXTRA_CMAKE_ARGS}" != *"DFIND_RAFT_CPP"* ]]; then
SKBUILD_EXTRA_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS} -DFIND_RAFT_CPP=ON"
fi
# Replace spaces with semicolons in SKBUILD_EXTRA_CMAKE_ARGS
SKBUILD_EXTRA_CMAKE_ARGS=$(echo ${SKBUILD_EXTRA_CMAKE_ARGS} | sed 's/ /;/g')

# If clean given, run it prior to any other steps
if (( ${CLEAN} == 1 )); then
Expand Down Expand Up @@ -493,15 +495,13 @@ fi

# Build and (optionally) install the pylibraft Python package
if (( ${NUMARGS} == 0 )) || hasArg pylibraft; then
SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \
SKBUILD_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/pylibraft
fi

# Build and (optionally) install the raft-dask Python package
if (( ${NUMARGS} == 0 )) || hasArg raft-dask; then
SKBUILD_CONFIGURE_OPTIONS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
SKBUILD_BUILD_OPTIONS="-j${PARALLEL_LEVEL}" \
SKBUILD_CMAKE_ARGS="${SKBUILD_EXTRA_CMAKE_ARGS}" \
python -m pip install --no-build-isolation --no-deps ${REPODIR}/python/raft-dask
fi

Expand Down
2 changes: 1 addition & 1 deletion ci/build_wheel_pylibraft.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
set -euo pipefail

# Set up skbuild options. Enable sccache in skbuild config options
export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_RAFT_CPP=OFF"

ci/build_wheel.sh pylibraft python/pylibraft
2 changes: 1 addition & 1 deletion ci/build_wheel_raft_dask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
set -euo pipefail

# Set up skbuild options. Enable sccache in skbuild config options
export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DFIND_RAFT_CPP=OFF"
export SKBUILD_CMAKE_ARGS="-DDETECT_CONDA_ENV=OFF;-DFIND_RAFT_CPP=OFF"

ci/build_wheel.sh raft-dask python/raft-dask
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ dependencies:
- rapids-dask-dependency==24.2.*
- recommonmark
- rmm==24.2.*
- scikit-build>=0.13.1
- scikit-build-core>=0.7.0
- scikit-learn
- scipy
- sphinx-copybutton
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ dependencies:
- rapids-dask-dependency==24.2.*
- recommonmark
- rmm==24.2.*
- scikit-build>=0.13.1
- scikit-build-core>=0.7.0
- scikit-learn
- scipy
- sphinx-copybutton
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-120_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ dependencies:
- rapids-dask-dependency==24.2.*
- recommonmark
- rmm==24.2.*
- scikit-build>=0.13.1
- scikit-build-core>=0.7.0
- scikit-learn
- scipy
- sphinx-copybutton
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/all_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ dependencies:
- rapids-dask-dependency==24.2.*
- recommonmark
- rmm==24.2.*
- scikit-build>=0.13.1
- scikit-build-core>=0.7.0
- scikit-learn
- scipy
- sphinx-copybutton
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/bench_ann_cuda-118_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ dependencies:
- pandas
- pyyaml
- rmm==24.2.*
- scikit-build>=0.13.1
- scikit-build-core>=0.7.0
- sysroot_linux-aarch64==2.17
name: bench_ann_cuda-118_arch-aarch64
2 changes: 1 addition & 1 deletion conda/environments/bench_ann_cuda-118_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,6 @@ dependencies:
- pandas
- pyyaml
- rmm==24.2.*
- scikit-build>=0.13.1
- scikit-build-core>=0.7.0
- sysroot_linux-64==2.17
name: bench_ann_cuda-118_arch-x86_64
2 changes: 1 addition & 1 deletion conda/environments/bench_ann_cuda-120_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ dependencies:
- pandas
- pyyaml
- rmm==24.2.*
- scikit-build>=0.13.1
- scikit-build-core>=0.7.0
- sysroot_linux-aarch64==2.17
name: bench_ann_cuda-120_arch-aarch64
2 changes: 1 addition & 1 deletion conda/environments/bench_ann_cuda-120_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,6 @@ dependencies:
- pandas
- pyyaml
- rmm==24.2.*
- scikit-build>=0.13.1
- scikit-build-core>=0.7.0
- sysroot_linux-64==2.17
name: bench_ann_cuda-120_arch-x86_64
2 changes: 1 addition & 1 deletion conda/recipes/pylibraft/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ requirements:
- libraft-headers {{ version }}
- python x.x
- rmm ={{ minor_version }}
- scikit-build >=0.13.1
- scikit-build-core >=0.7.0
- setuptools
run:
- {{ pin_compatible('cuda-version', max_pin='x', min_pin='x') }}
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/raft-dask/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ requirements:
- pylibraft {{ version }}
- python x.x
- rmm ={{ minor_version }}
- scikit-build >=0.13.1
- scikit-build-core >=0.7.0
- setuptools
- ucx {{ ucx_version }}
- ucx-proc=*=gpu
Expand Down
45 changes: 33 additions & 12 deletions cpp/bench/ann/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,21 @@ if(RAFT_ANN_BENCH_USE_FAISS)
include(cmake/thirdparty/get_faiss.cmake)
endif()

# ##################################################################################################
# * Enable NVTX if available

# Note: ANN_BENCH wrappers have extra NVTX code not related to raft::nvtx.They track gbench
# benchmark cases and iterations. This is to make limited NVTX available to all algos, not just
# raft.
if(TARGET CUDA::nvtx3)
set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
get_target_property(CMAKE_REQUIRED_INCLUDES CUDA::nvtx3 INTERFACE_INCLUDE_DIRECTORIES)
unset(NVTX3_HEADERS_FOUND CACHE)
# Check the headers explicitly to make sure the cpu-only build succeeds
CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
endif()

# ##################################################################################################
# * Configure tests function-------------------------------------------------------------

Expand All @@ -141,8 +156,13 @@ function(ConfigureAnnBench)
add_dependencies(${BENCH_NAME} ANN_BENCH)
else()
add_executable(${BENCH_NAME} ${ConfigureAnnBench_PATH})
target_compile_definitions(${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN)
target_link_libraries(${BENCH_NAME} PRIVATE benchmark::benchmark)
target_compile_definitions(
${BENCH_NAME} PRIVATE ANN_BENCH_BUILD_MAIN
$<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
)
target_link_libraries(
${BENCH_NAME} PRIVATE benchmark::benchmark $<$<BOOL:${NVTX3_HEADERS_FOUND}>:CUDA::nvtx3>
)
endif()

target_link_libraries(
Expand Down Expand Up @@ -340,8 +360,16 @@ if(RAFT_ANN_BENCH_SINGLE_EXE)
target_include_directories(ANN_BENCH PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})

target_link_libraries(
ANN_BENCH PRIVATE nlohmann_json::nlohmann_json benchmark_static dl -static-libgcc
-static-libstdc++ CUDA::nvtx3
ANN_BENCH
PRIVATE raft::raft
nlohmann_json::nlohmann_json
benchmark_static
dl
-static-libgcc
fmt::fmt-header-only
spdlog::spdlog_header_only
-static-libstdc++
$<$<BOOL:${NVTX3_HEADERS_FOUND}>:CUDA::nvtx3>
)
set_target_properties(
ANN_BENCH
Expand All @@ -355,17 +383,10 @@ if(RAFT_ANN_BENCH_SINGLE_EXE)
BUILD_RPATH "\$ORIGIN"
INSTALL_RPATH "\$ORIGIN"
)

# Disable NVTX when the nvtx3 headers are missing
set(_CMAKE_REQUIRED_INCLUDES_ORIG ${CMAKE_REQUIRED_INCLUDES})
get_target_property(CMAKE_REQUIRED_INCLUDES ANN_BENCH INCLUDE_DIRECTORIES)
CHECK_INCLUDE_FILE_CXX(nvtx3/nvToolsExt.h NVTX3_HEADERS_FOUND)
set(CMAKE_REQUIRED_INCLUDES ${_CMAKE_REQUIRED_INCLUDES_ORIG})
target_compile_definitions(
ANN_BENCH
PRIVATE
$<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}
">
$<$<BOOL:${CUDAToolkit_FOUND}>:ANN_BENCH_LINK_CUDART="libcudart.so.${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}.${CUDAToolkit_VERSION_PATCH}">
$<$<BOOL:${NVTX3_HEADERS_FOUND}>:ANN_BENCH_NVTX3_HEADERS_FOUND>
)

Expand Down
15 changes: 7 additions & 8 deletions cpp/bench/ann/src/common/ann_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include "cuda_stub.hpp" // cudaStream_t

#include <memory>
#include <stdexcept>
#include <string>
#include <vector>
Expand Down Expand Up @@ -64,17 +65,10 @@ inline auto parse_memory_type(const std::string& memory_type) -> MemoryType
}
}

class AlgoProperty {
public:
inline AlgoProperty() {}
inline AlgoProperty(MemoryType dataset_memory_type_, MemoryType query_memory_type_)
: dataset_memory_type(dataset_memory_type_), query_memory_type(query_memory_type_)
{
}
struct AlgoProperty {
MemoryType dataset_memory_type;
// neighbors/distances should have same memory type as queries
MemoryType query_memory_type;
virtual ~AlgoProperty() = default;
};

class AnnBase {
Expand Down Expand Up @@ -125,6 +119,11 @@ class ANN : public AnnBase {
// The client code should call set_search_dataset() before searching,
// and should not release dataset before searching is finished.
virtual void set_search_dataset(const T* /*dataset*/, size_t /*nrow*/){};

/**
* Make a shallow copy of the ANN wrapper that shares the resources and ensures thread-safe access
* to them. */
virtual auto copy() -> std::unique_ptr<ANN<T>> = 0;
};

} // namespace raft::bench::ann
Expand Down
16 changes: 7 additions & 9 deletions cpp/bench/ann/src/common/benchmark.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ std::condition_variable cond_var;
std::atomic_int processed_threads{0};

static inline std::unique_ptr<AnnBase> current_algo{nullptr};
static inline std::shared_ptr<AlgoProperty> current_algo_props{nullptr};
static inline std::unique_ptr<AlgoProperty> current_algo_props{nullptr};

using kv_series = std::vector<std::tuple<std::string, std::vector<nlohmann::json>>>;

Expand Down Expand Up @@ -241,9 +241,8 @@ void bench_search(::benchmark::State& state,
return;
}

auto algo_property = parse_algo_property(algo->get_preference(), sp_json);
current_algo_props = std::make_shared<AlgoProperty>(algo_property.dataset_memory_type,
algo_property.query_memory_type);
current_algo_props = std::make_unique<AlgoProperty>(
std::move(parse_algo_property(algo->get_preference(), sp_json)));

if (search_param->needs_dataset()) {
try {
Expand Down Expand Up @@ -277,23 +276,22 @@ void bench_search(::benchmark::State& state,
// We are accessing shared variables (like current_algo, current_algo_probs) before the
// benchmark loop, therefore the synchronization here is necessary.
}
const auto algo_property = *current_algo_props;
query_set = dataset->query_set(algo_property.query_memory_type);
query_set = dataset->query_set(current_algo_props->query_memory_type);

/**
* Each thread will manage its own outputs
*/
std::shared_ptr<buf<float>> distances =
std::make_shared<buf<float>>(algo_property.query_memory_type, k * query_set_size);
std::make_shared<buf<float>>(current_algo_props->query_memory_type, k * query_set_size);
std::shared_ptr<buf<std::size_t>> neighbors =
std::make_shared<buf<std::size_t>>(algo_property.query_memory_type, k * query_set_size);
std::make_shared<buf<std::size_t>>(current_algo_props->query_memory_type, k * query_set_size);

cuda_timer gpu_timer;
auto start = std::chrono::high_resolution_clock::now();
{
nvtx_case nvtx{state.name()};

ANN<T>* algo = dynamic_cast<ANN<T>*>(current_algo.get());
auto algo = dynamic_cast<ANN<T>*>(current_algo.get())->copy();
for (auto _ : state) {
[[maybe_unused]] auto ntx_lap = nvtx.lap();
[[maybe_unused]] auto gpu_lap = gpu_timer.lap();
Expand Down
Loading

0 comments on commit a439b44

Please sign in to comment.