Skip to content

Commit

Permalink
Merge branch 'CAGRA-remove-max_dim-template-param' of github.com:enp1…
Browse files Browse the repository at this point in the history
…s0/raft into CAGRA-remove-max_dim-template-param
  • Loading branch information
enp1s0 committed Dec 4, 2023
2 parents 773e9ec + 5645953 commit bbe9104
Show file tree
Hide file tree
Showing 52 changed files with 529 additions and 1,613 deletions.
12 changes: 6 additions & 6 deletions ci/build_python.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,11 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)

version=$(rapids-generate-version)
git_commit=$(git rev-parse HEAD)
export RAPIDS_PACKAGE_VERSION=${version}
export RAPIDS_PACKAGE_VERSION=${version}
echo "${version}" > VERSION

package_dir="python"
for package_name in pylibraft raft-dask; do
for package_name in pylibraft raft-dask; do
underscore_package_name=$(echo "${package_name}" | tr "-" "_")
sed -i "/^__git_commit__/ s/= .*/= \"${git_commit}\"/g" "${package_dir}/${package_name}/${underscore_package_name}/_version.py"
done
Expand All @@ -39,10 +39,10 @@ rapids-conda-retry mambabuild \

# Build ann-bench for each cuda and python version
rapids-conda-retry mambabuild \
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/raft-ann-bench
--no-test \
--channel "${CPP_CHANNEL}" \
--channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
conda/recipes/raft-ann-bench

# Build ann-bench-cpu only in CUDA 11 jobs since it only depends on python
# version
Expand Down
2 changes: 2 additions & 0 deletions ci/release/update-version.sh
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ for FILE in dependencies.yaml conda/environments/*.yaml; do
sed_runner "/-.* ${DEP}==/ s/==.*/==${NEXT_SHORT_TAG_PEP440}\.*/g" ${FILE};
done
sed_runner "/-.* ucx-py==/ s/==.*/==${NEXT_UCX_PY_SHORT_TAG_PEP440}\.*/g" ${FILE};
sed_runner "/-.* ucx-py-cu11==/ s/==.*/==${NEXT_UCX_PY_SHORT_TAG_PEP440}\.*/g" ${FILE};
sed_runner "/-.* ucx-py-cu12==/ s/==.*/==${NEXT_UCX_PY_SHORT_TAG_PEP440}\.*/g" ${FILE};
done
for FILE in python/*/pyproject.toml; do
for DEP in "${DEPENDENCIES[@]}"; do
Expand Down
4 changes: 2 additions & 2 deletions conda/recipes/libraft/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ cuda11_cuda_profiler_api_run_version:
- ">=11.4.240,<12"

spdlog_version:
- ">=1.11.0,<1.12"
- ">=1.12.0,<1.13"

fmt_version:
- ">=9.1.0,<10"
- ">=10.1.1,<11"
4 changes: 2 additions & 2 deletions conda/recipes/raft-ann-bench-cpu/conda_build_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ nlohmann_json_version:
- ">=3.11.2"

spdlog_version:
- ">=1.11.0,<1.12"
- ">=1.12.0,<1.13"

fmt_version:
- ">=9.1.0,<10"
- ">=10.1.1,<11"
10 changes: 0 additions & 10 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -372,16 +372,6 @@ if(RAFT_COMPILE_LIBRARY)
src/neighbors/detail/refine_host_float_float.cpp
src/neighbors/detail/refine_host_int8_t_float.cpp
src/neighbors/detail/refine_host_uint8_t_float.cpp
src/neighbors/detail/selection_faiss_int32_t_float.cu
src/neighbors/detail/selection_faiss_int_double.cu
src/neighbors/detail/selection_faiss_long_float.cu
src/neighbors/detail/selection_faiss_size_t_double.cu
src/neighbors/detail/selection_faiss_size_t_float.cu
src/neighbors/detail/selection_faiss_uint32_t_float.cu
src/neighbors/detail/selection_faiss_int64_t_double.cu
src/neighbors/detail/selection_faiss_int64_t_half.cu
src/neighbors/detail/selection_faiss_uint32_t_double.cu
src/neighbors/detail/selection_faiss_uint32_t_half.cu
src/neighbors/ivf_flat_build_float_int64_t.cu
src/neighbors/ivf_flat_build_int8_t_int64_t.cu
src/neighbors/ivf_flat_build_uint8_t_int64_t.cu
Expand Down
2 changes: 1 addition & 1 deletion cpp/bench/ann/src/raft/raft_ivf_flat_wrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ class RaftIvfFlatGpu : public ANN<T> {
AlgoProperty get_preference() const override
{
AlgoProperty property;
property.dataset_memory_type = MemoryType::Device;
property.dataset_memory_type = MemoryType::HostMmap;
property.query_memory_type = MemoryType::Device;
return property;
}
Expand Down
3 changes: 0 additions & 3 deletions cpp/bench/prims/matrix/select_k.cu
Original file line number Diff line number Diff line change
Expand Up @@ -279,9 +279,6 @@ const static size_t MAX_MEMORY = 16 * 1024 * 1024 * 1024ULL;
SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kWarpDistributed, input) \
SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kWarpDistributedShm, input) \
} \
if (input.k <= raft::neighbors::detail::kFaissMaxK<IdxT, KeyT>()) { \
SELECTION_REGISTER_ALGO_INPUT(KeyT, IdxT, kFaissBlockSelect, input) \
} \
} \
}

Expand Down
1 change: 0 additions & 1 deletion cpp/include/raft/matrix/detail/select_k-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
#include <raft/matrix/init.cuh>

#include <raft/core/resource/thrust_policy.hpp>
#include <raft/neighbors/detail/selection_faiss.cuh>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/mr/device/device_memory_resource.hpp>
#include <thrust/scan.h>
Expand Down
40 changes: 40 additions & 0 deletions cpp/include/raft/neighbors/brute_force-inl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -284,6 +284,44 @@ void fused_l2_knn(raft::resources const& handle,
/**
* @brief Build the index from the dataset for efficient search.
*
* This function builds a brute force index for the given dataset. This lets you re-use
* precalculated norms for the dataset, leading to a speedup over calling
* raft::neighbors::brute_force::knn repeatedly.
*
* Example usage:
* @code{.cpp}
* #include <raft/neighbors/brute_force.cuh>
* #include <raft/core/device_mdarray.hpp>
* #include <raft/random/make_blobs.cuh>
*
* // create a random dataset
* int n_rows = 10000;
* int n_cols = 10000;
*
* raft::device_resources res;
* auto dataset = raft::make_device_matrix<float, int64_t>(res, n_rows, n_cols);
* auto labels = raft::make_device_vector<int64_t, int64_t>(res, n_rows);
*
* raft::random::make_blobs(res, dataset.view(), labels.view());
*
* // create a brute_force knn index from the dataset
* auto index = raft::neighbors::brute_force::build(res,
* raft::make_const_mdspan(dataset.view()));
*
* // Use the constructed index to search for the nearest 128 neighbors
* int k = 128;
* auto search = raft::make_const_mdspan(dataset.view());
*
* auto indices= raft::make_device_matrix<int, int64_t>(res, search.extent(0), k);
* auto distances = raft::make_device_matrix<float, int64_t>(res, search.extent(0), k);
*
* raft::neighbors::brute_force::search(res,
* index,
* search,
* indices.view(),
* distances.view());
* @endcode
*
* @tparam T data element type
*
* @param[in] res
Expand Down Expand Up @@ -330,6 +368,8 @@ index<T> build(raft::resources const& res,
/**
* @brief Brute Force search using the constructed index.
*
* See raft::neighbors::brute_force::build for a usage example
*
* @tparam T data element type
* @tparam IdxT type of the indices
*
Expand Down
6 changes: 3 additions & 3 deletions cpp/include/raft/neighbors/brute_force.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ namespace raft::neighbors::brute_force {
* int n_cols = 10000;
* raft::device_resources res;
* auto dataset = raft::make_device_matrix<float, int>(res, n_rows, n_cols);
* auto labels = raft::make_device_vector<float, int>(res, n_rows);
* auto dataset = raft::make_device_matrix<float, int64_t>(res, n_rows, n_cols);
* auto labels = raft::make_device_vector<int64_t, int64_t>(res, n_rows);
* raft::make_blobs(res, dataset.view(), labels.view());
* raft::random::make_blobs(res, dataset.view(), labels.view());
*
* // create a brute_force knn index from the dataset
* auto index = raft::neighbors::brute_force::build(res,
Expand Down
106 changes: 74 additions & 32 deletions cpp/include/raft/neighbors/detail/ivf_flat_build.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ RAFT_KERNEL build_index_kernel(const LabelT* labels,
uint32_t* list_sizes_ptr,
IdxT n_rows,
uint32_t dim,
uint32_t veclen)
uint32_t veclen,
IdxT batch_offset = 0)
{
const IdxT i = IdxT(blockDim.x) * IdxT(blockIdx.x) + threadIdx.x;
if (i >= n_rows) { return; }
Expand All @@ -131,7 +132,7 @@ RAFT_KERNEL build_index_kernel(const LabelT* labels,
auto* list_data = list_data_ptrs[list_id];

// Record the source vector id in the index
list_index[inlist_id] = source_ixs == nullptr ? i : source_ixs[i];
list_index[inlist_id] = source_ixs == nullptr ? i + batch_offset : source_ixs[i];

// The data is written in interleaved groups of `index::kGroupSize` vectors
using interleaved_group = Pow2<kIndexGroupSize>;
Expand Down Expand Up @@ -180,16 +181,33 @@ void extend(raft::resources const& handle,

auto new_labels = raft::make_device_vector<LabelT, IdxT>(handle, n_rows);
raft::cluster::kmeans_balanced_params kmeans_params;
kmeans_params.metric = index->metric();
auto new_vectors_view = raft::make_device_matrix_view<const T, IdxT>(new_vectors, n_rows, dim);
kmeans_params.metric = index->metric();
auto orig_centroids_view =
raft::make_device_matrix_view<const float, IdxT>(index->centers().data_handle(), n_lists, dim);
raft::cluster::kmeans_balanced::predict(handle,
kmeans_params,
new_vectors_view,
orig_centroids_view,
new_labels.view(),
utils::mapping<float>{});
// Calculate the batch size for the input data if it's not accessible directly from the device
constexpr size_t kReasonableMaxBatchSize = 65536;
size_t max_batch_size = std::min<size_t>(n_rows, kReasonableMaxBatchSize);

// Predict the cluster labels for the new data, in batches if necessary
utils::batch_load_iterator<T> vec_batches(new_vectors,
n_rows,
index->dim(),
max_batch_size,
stream,
resource::get_workspace_resource(handle));

for (const auto& batch : vec_batches) {
auto batch_data_view =
raft::make_device_matrix_view<const T, IdxT>(batch.data(), batch.size(), index->dim());
auto batch_labels_view = raft::make_device_vector_view<LabelT, IdxT>(
new_labels.data_handle() + batch.offset(), batch.size());
raft::cluster::kmeans_balanced::predict(handle,
kmeans_params,
batch_data_view,
orig_centroids_view,
batch_labels_view,
utils::mapping<float>{});
}

auto* list_sizes_ptr = index->list_sizes().data_handle();
auto old_list_sizes_dev = raft::make_device_vector<uint32_t, IdxT>(handle, n_lists);
Expand All @@ -202,14 +220,19 @@ void extend(raft::resources const& handle,
auto list_sizes_view =
raft::make_device_vector_view<std::remove_pointer_t<decltype(list_sizes_ptr)>, IdxT>(
list_sizes_ptr, n_lists);
auto const_labels_view = make_const_mdspan(new_labels.view());
raft::cluster::kmeans_balanced::helpers::calc_centers_and_sizes(handle,
new_vectors_view,
const_labels_view,
centroids_view,
list_sizes_view,
false,
utils::mapping<float>{});
for (const auto& batch : vec_batches) {
auto batch_data_view =
raft::make_device_matrix_view<const T, IdxT>(batch.data(), batch.size(), index->dim());
auto batch_labels_view = raft::make_device_vector_view<const LabelT, IdxT>(
new_labels.data_handle() + batch.offset(), batch.size());
raft::cluster::kmeans_balanced::helpers::calc_centers_and_sizes(handle,
batch_data_view,
batch_labels_view,
centroids_view,
list_sizes_view,
false,
utils::mapping<float>{});
}
} else {
raft::stats::histogram<uint32_t, IdxT>(raft::stats::HistTypeAuto,
reinterpret_cast<int32_t*>(list_sizes_ptr),
Expand Down Expand Up @@ -244,20 +267,39 @@ void extend(raft::resources const& handle,
// we'll rebuild the `list_sizes_ptr` in the following kernel, using it as an atomic counter.
raft::copy(list_sizes_ptr, old_list_sizes_dev.data_handle(), n_lists, stream);

// Kernel to insert the new vectors
const dim3 block_dim(256);
const dim3 grid_dim(raft::ceildiv<IdxT>(n_rows, block_dim.x));
build_index_kernel<<<grid_dim, block_dim, 0, stream>>>(new_labels.data_handle(),
new_vectors,
new_indices,
index->data_ptrs().data_handle(),
index->inds_ptrs().data_handle(),
list_sizes_ptr,
n_rows,
dim,
index->veclen());
RAFT_CUDA_TRY(cudaPeekAtLastError());

utils::batch_load_iterator<IdxT> vec_indices(
new_indices, n_rows, 1, max_batch_size, stream, resource::get_workspace_resource(handle));
utils::batch_load_iterator<IdxT> idx_batch = vec_indices.begin();
size_t next_report_offset = 0;
size_t d_report_offset = n_rows * 5 / 100;
for (const auto& batch : vec_batches) {
auto batch_data_view =
raft::make_device_matrix_view<const T, IdxT>(batch.data(), batch.size(), index->dim());
// Kernel to insert the new vectors
const dim3 block_dim(256);
const dim3 grid_dim(raft::ceildiv<IdxT>(batch.size(), block_dim.x));
build_index_kernel<T, IdxT, LabelT>
<<<grid_dim, block_dim, 0, stream>>>(new_labels.data_handle() + batch.offset(),
batch_data_view.data_handle(),
idx_batch->data(),
index->data_ptrs().data_handle(),
index->inds_ptrs().data_handle(),
list_sizes_ptr,
batch.size(),
dim,
index->veclen(),
batch.offset());
RAFT_CUDA_TRY(cudaPeekAtLastError());

if (batch.offset() > next_report_offset) {
float progress = batch.offset() * 100.0f / n_rows;
RAFT_LOG_DEBUG("ivf_flat::extend added vectors %zu, %6.1f%% complete",
static_cast<size_t>(batch.offset()),
progress);
next_report_offset += d_report_offset;
}
++idx_batch;
}
// Precompute the centers vector norms for L2Expanded distance
if (!index->center_norms().has_value()) {
index->allocate_center_norms(handle);
Expand Down
67 changes: 0 additions & 67 deletions cpp/include/raft/neighbors/detail/selection_faiss-ext.cuh

This file was deleted.

Loading

0 comments on commit bbe9104

Please sign in to comment.