diff --git a/c/include/cuvs/neighbors/cagra.h b/c/include/cuvs/neighbors/cagra.h index 487ada503d..0cbce5a8ea 100644 --- a/c/include/cuvs/neighbors/cagra.h +++ b/c/include/cuvs/neighbors/cagra.h @@ -783,7 +783,7 @@ cuvsError_t cuvsCagraSerialize(cuvsResources_t res, * cuvsError_t res_create_status = cuvsResourcesCreate(&res); * * // create an index with `cuvsCagraBuild` - * cuvsCagraSerializeHnswlib(res, "/path/to/index", index); + * cuvsCagraSerializeToHnswlib(res, "/path/to/index", index); * @endcode * * @param[in] res cuvsResources_t opaque C handle diff --git a/c/include/cuvs/neighbors/hnsw.h b/c/include/cuvs/neighbors/hnsw.h index d526a4f877..b383912339 100644 --- a/c/include/cuvs/neighbors/hnsw.h +++ b/c/include/cuvs/neighbors/hnsw.h @@ -419,7 +419,7 @@ cuvsError_t cuvsHnswSerialize(cuvsResources_t res, const char* filename, cuvsHns * cuvsError_t res_create_status = cuvsResourcesCreate(&res); * * // create an index with `cuvsCagraBuild` - * cuvsCagraSerializeHnswlib(res, "/path/to/index", index); + * cuvsCagraSerializeToHnswlib(res, "/path/to/index", index); * * // Load the serialized CAGRA index from file as an hnswlib index * // The index should have the same dtype as the one used to build CAGRA the index diff --git a/c/include/cuvs/neighbors/tiered_index.h b/c/include/cuvs/neighbors/tiered_index.h index 12b3ee0655..c210e2bc8b 100644 --- a/c/include/cuvs/neighbors/tiered_index.h +++ b/c/include/cuvs/neighbors/tiered_index.h @@ -239,6 +239,22 @@ cuvsError_t cuvsTieredIndexExtend(cuvsResources_t res, * @} */ +/** + * @defgroup tiered_c_index_compact Tiered index compact + * @{ + */ +/** + * @brief Compact the index + * + * @param[in] res cuvsResources_t opaque C handle + * @param[inout] index Tiered index to be compacted + * @return cuvsError_t + */ +cuvsError_t cuvsTieredIndexCompact(cuvsResources_t res, cuvsTieredIndex_t index); +/** + * @} + */ + /** * @defgroup tiered_c_index_merge Tiered index merge * @{ @@ -258,7 +274,6 @@ cuvsError_t cuvsTieredIndexMerge(cuvsResources_t res, cuvsTieredIndex_t* indices, size_t num_indices, cuvsTieredIndex_t output_index); - /** * @} */ diff --git a/c/src/neighbors/tiered_index.cpp b/c/src/neighbors/tiered_index.cpp index 2a7d54b16d..071c4283fe 100644 --- a/c/src/neighbors/tiered_index.cpp +++ b/c/src/neighbors/tiered_index.cpp @@ -204,6 +204,14 @@ void _merge(cuvsResources_t res, output_index->algo = indices[0]->algo; } +template +void _compact(cuvsResources_t res, cuvsTieredIndex index) +{ + auto res_ptr = reinterpret_cast(res); + auto index_ptr = reinterpret_cast*>(index.addr); + + tiered_index::compact(*res_ptr, index_ptr); +} } // namespace extern "C" cuvsError_t cuvsTieredIndexCreate(cuvsTieredIndex_t* index) @@ -352,6 +360,28 @@ extern "C" cuvsError_t cuvsTieredIndexExtend(cuvsResources_t res, }); } +extern "C" cuvsError_t cuvsTieredIndexCompact(cuvsResources_t res, cuvsTieredIndex_t index_c_ptr) +{ + return cuvs::core::translate_exceptions([=] { + auto index = *index_c_ptr; + switch (index.algo) { + case CUVS_TIERED_INDEX_ALGO_CAGRA: { + _compact>(res, index); + break; + } + case CUVS_TIERED_INDEX_ALGO_IVF_FLAT: { + _compact>(res, index); + break; + } + case CUVS_TIERED_INDEX_ALGO_IVF_PQ: { + _compact>(res, index); + break; + } + default: RAFT_FAIL("unsupported tiered index algorithm"); + } + }); +} + extern "C" cuvsError_t cuvsTieredIndexMerge(cuvsResources_t res, cuvsTieredIndexParams_t params, cuvsTieredIndex_t* indices, diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index 9812a26a5d..f7a13b94cf 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -26,6 +26,7 @@ dependencies: - go - graphviz - ipython +- libboost-devel - libclang==20.1.4 - libcublas-dev - libcurand-dev @@ -33,11 +34,13 @@ dependencies: - libcusparse-dev - librmm==25.12.*,>=0.0.0a0 - make +- maven - nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc - openblas +- openjdk=22.* - pre-commit - pylibraft==25.12.*,>=0.0.0a0 - pytest-cov diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 896c08e0e2..9e246f3e75 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -26,6 +26,7 @@ dependencies: - go - graphviz - ipython +- libboost-devel - libclang==20.1.4 - libcublas-dev - libcurand-dev @@ -33,11 +34,13 @@ dependencies: - libcusparse-dev - librmm==25.12.*,>=0.0.0a0 - make +- maven - nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc - openblas +- openjdk=22.* - pre-commit - pylibraft==25.12.*,>=0.0.0a0 - pytest-cov diff --git a/conda/environments/all_cuda-130_arch-aarch64.yaml b/conda/environments/all_cuda-130_arch-aarch64.yaml index c9f180e849..efc1efea40 100644 --- a/conda/environments/all_cuda-130_arch-aarch64.yaml +++ b/conda/environments/all_cuda-130_arch-aarch64.yaml @@ -26,6 +26,7 @@ dependencies: - go - graphviz - ipython +- libboost-devel - libclang==20.1.4 - libcublas-dev - libcurand-dev @@ -33,11 +34,13 @@ dependencies: - libcusparse-dev - librmm==25.12.*,>=0.0.0a0 - make +- maven - nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc - openblas +- openjdk=22.* - pre-commit - pylibraft==25.12.*,>=0.0.0a0 - pytest-cov diff --git a/conda/environments/all_cuda-130_arch-x86_64.yaml b/conda/environments/all_cuda-130_arch-x86_64.yaml index a464e15db4..964ec4c741 100644 --- a/conda/environments/all_cuda-130_arch-x86_64.yaml +++ b/conda/environments/all_cuda-130_arch-x86_64.yaml @@ -26,6 +26,7 @@ dependencies: - go - graphviz - ipython +- libboost-devel - libclang==20.1.4 - libcublas-dev - libcurand-dev @@ -33,11 +34,13 @@ dependencies: - libcusparse-dev - librmm==25.12.*,>=0.0.0a0 - make +- maven - nccl>=2.19 - ninja - numpy>=1.23,<3.0a0 - numpydoc - openblas +- openjdk=22.* - pre-commit - pylibraft==25.12.*,>=0.0.0a0 - pytest-cov diff --git a/cpp/src/neighbors/detail/knn_brute_force.cuh b/cpp/src/neighbors/detail/knn_brute_force.cuh index 118d377e7d..053999667f 100644 --- a/cpp/src/neighbors/detail/knn_brute_force.cuh +++ b/cpp/src/neighbors/detail/knn_brute_force.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -77,7 +77,8 @@ void tiled_brute_force_knn(const raft::resources& handle, const uint32_t* filter_bits = nullptr, DistanceEpilogue distance_epilogue = raft::identity_op(), cuvs::neighbors::filtering::FilterType filter_type = - cuvs::neighbors::filtering::FilterType::Bitmap) + cuvs::neighbors::filtering::FilterType::Bitmap, + size_t filter_col_offset = 0) { // Figure out the number of rows/cols to tile for size_t tile_rows = 0; @@ -237,9 +238,9 @@ void tiled_brute_force_knn(const raft::resources& handle, count, count + current_query_size * current_centroid_size, [=] __device__(IndexType idx) { - IndexType row = i + (idx / current_centroid_size); - IndexType col = j + (idx % current_centroid_size); - IndexType g_idx = row * n_cols + col; + IndexType row = i + (idx / current_centroid_size); + IndexType col = j + (idx % current_centroid_size) + filter_col_offset; + IndexType g_idx = row * n_cols + col; IndexType item_idx = (g_idx) >> 5; uint32_t bit_idx = (g_idx) & 31; uint32_t filter = filter_bits[item_idx]; @@ -585,12 +586,12 @@ void brute_force_search_filtered( metric == cuvs::distance::DistanceType::L2Expanded || metric == cuvs::distance::DistanceType::L2SqrtExpanded || metric == cuvs::distance::DistanceType::CosineExpanded, - "Only Euclidean, IP, and Cosine are supported!"); + "Only Euclidean, IP, and Cosine distance are supported!"); RAFT_EXPECTS(idx.has_norms() || !(metric == cuvs::distance::DistanceType::L2Expanded || metric == cuvs::distance::DistanceType::L2SqrtExpanded || metric == cuvs::distance::DistanceType::CosineExpanded), - "Index must has norms when using Euclidean, IP, and Cosine!"); + "Index must have norms when using Euclidean, IP, or Cosine distance!"); IdxT n_queries = queries.extent(0); IdxT n_dataset = idx.dataset().extent(0); diff --git a/cpp/src/neighbors/detail/tiered_index.cuh b/cpp/src/neighbors/detail/tiered_index.cuh index 1338b4c78f..bb7af6ae37 100644 --- a/cpp/src/neighbors/detail/tiered_index.cuh +++ b/cpp/src/neighbors/detail/tiered_index.cuh @@ -22,6 +22,8 @@ #include #include +#include "knn_brute_force.cuh" + namespace cuvs::neighbors::tiered_index::detail { /** Storage for brute force based incremental indices @@ -210,19 +212,62 @@ struct index_state { temp_distances.data_handle(), n_queries, k), sample_filter); - // search the bfknn index auto offset = n_queries * k; auto bfknn_neighbors = raft::make_device_matrix_view( temp_neighbors.data_handle() + offset, n_queries, k); auto bfknn_distances = raft::make_device_matrix_view( temp_distances.data_handle() + offset, n_queries, k); - brute_force::search(res, - brute_force::search_params(), - bfknn_index, - queries, - bfknn_neighbors, - bfknn_distances, - sample_filter); + + switch (sample_filter.get_filter_type()) { + case filtering::FilterType::None: { + brute_force::search(res, + brute_force::search_params(), + bfknn_index, + queries, + bfknn_neighbors, + bfknn_distances, + sample_filter); + break; + } + case filtering::FilterType::Bitset: { + // We need to adjust the filter by the number of ann rows - which + // is a little tricky since this might not be aligned to the uint32_t + // bitset filter. Use the detail api directly here which can support this + auto idx_norm = + bfknn_index.has_norms() ? const_cast(bfknn_index.norms().data_handle()) : nullptr; + + auto actual_filter = + dynamic_cast*>( + &sample_filter); + const uint32_t* filter_data = actual_filter->view().data(); + + neighbors::detail::tiled_brute_force_knn( + res, + queries.data_handle(), + bfknn_index.dataset().data_handle(), + n_queries, + bfknn_rows(), + storage->dim, + k, + bfknn_distances.data_handle(), + bfknn_neighbors.data_handle(), + build_params.metric, + 2.0, + 0, + 0, + idx_norm, + nullptr, + filter_data, + raft::identity_op(), + filtering::FilterType::Bitset, + ann_rows()); + + break; + } + default: { + RAFT_FAIL("Only bitset filter is supported in tiered index"); + } + } if (!distance::is_min_close(build_params.metric)) { // knn_merge_parts doesn't currently support InnerProduct distances etc diff --git a/dependencies.yaml b/dependencies.yaml index b66e9d8691..282dfc1a80 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -20,6 +20,7 @@ files: - depends_on_pylibraft - depends_on_nccl - docs + - java - rapids_build - run_py_cuvs - rust diff --git a/python/cuvs/cuvs/neighbors/tiered_index/__init__.py b/python/cuvs/cuvs/neighbors/tiered_index/__init__.py index dbd5c6ea1c..c6bfe3af80 100644 --- a/python/cuvs/cuvs/neighbors/tiered_index/__init__.py +++ b/python/cuvs/cuvs/neighbors/tiered_index/__init__.py @@ -2,12 +2,13 @@ # SPDX-License-Identifier: Apache-2.0 -from .tiered_index import Index, IndexParams, build, extend, search +from .tiered_index import Index, IndexParams, build, compact, extend, search __all__ = [ "Index", "IndexParams", "build", + "compact", "extend", "search", ] diff --git a/python/cuvs/cuvs/neighbors/tiered_index/tiered_index.pxd b/python/cuvs/cuvs/neighbors/tiered_index/tiered_index.pxd index bd82c63a64..ea45d85e4f 100644 --- a/python/cuvs/cuvs/neighbors/tiered_index/tiered_index.pxd +++ b/python/cuvs/cuvs/neighbors/tiered_index/tiered_index.pxd @@ -65,3 +65,6 @@ cdef extern from "cuvs/neighbors/tiered_index.h" nogil: cuvsError_t cuvsTieredIndexExtend(cuvsResources_t res, DLManagedTensor* new_vectors, cuvsTieredIndex_t index) + + cuvsError_t cuvsTieredIndexCompact(cuvsResources_t res, + cuvsTieredIndex_t index) diff --git a/python/cuvs/cuvs/neighbors/tiered_index/tiered_index.pyx b/python/cuvs/cuvs/neighbors/tiered_index/tiered_index.pyx index 29df0387fa..61c1997a73 100644 --- a/python/cuvs/cuvs/neighbors/tiered_index/tiered_index.pyx +++ b/python/cuvs/cuvs/neighbors/tiered_index/tiered_index.pyx @@ -376,3 +376,33 @@ def extend(Index index, new_vectors, resources=None): )) return index + + +@auto_sync_resources +def compact(Index index, resources=None): + """ + Compact the index + + This function takes any data that has been added incrementally, and ensures + that it been added to the ANN index. + + Parameters + ---------- + index : tiered_index.Index + Trained tiered_index object. + {resources_docstring} + + Returns + ------- + index: py:class:`cuvs.neighbors.tiered_index.Index` + """ + + cdef cuvsResources_t res = resources.get_c_obj() + + with cuda_interruptible(): + check_cuvs(cuvsTieredIndexCompact( + res, + index.index + )) + + return index diff --git a/python/cuvs/cuvs/tests/test_tiered_index.py b/python/cuvs/cuvs/tests/test_tiered_index.py new file mode 100644 index 0000000000..f326061ffa --- /dev/null +++ b/python/cuvs/cuvs/tests/test_tiered_index.py @@ -0,0 +1,100 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np +import pytest +from pylibraft.common import device_ndarray + +from cuvs.neighbors import ( + brute_force, + cagra, + filters, + ivf_flat, + ivf_pq, + tiered_index, +) +from cuvs.tests.ann_utils import calc_recall, create_sparse_bitset + + +@pytest.mark.parametrize("n_dataset_rows", [1024, 10000]) +@pytest.mark.parametrize("n_query_rows", [10]) +@pytest.mark.parametrize("n_cols", [10]) +@pytest.mark.parametrize("k", [8, 16]) +@pytest.mark.parametrize("dtype", ["float32"]) +@pytest.mark.parametrize( + "metric", + [ + "sqeuclidean", + "inner_product", + ], +) +@pytest.mark.parametrize( + "algo", + [ + "cagra", + "ivf_flat", + "ivf_pq", + ], +) +@pytest.mark.parametrize("filter_type", ["bitset_filter", "no_filter"]) +def test_tiered_index( + n_dataset_rows, n_query_rows, n_cols, k, dtype, metric, algo, filter_type +): + dataset = np.random.random_sample((n_dataset_rows, n_cols)).astype(dtype) + queries = np.random.random_sample((n_query_rows, n_cols)).astype(dtype) + + indices = np.zeros((n_query_rows, k), dtype="int64") + distances = np.zeros((n_query_rows, k), dtype="float32") + + dataset_device = device_ndarray(dataset) + queries_device = device_ndarray(queries) + indices_device = device_ndarray(indices) + distances_device = device_ndarray(distances) + + # build with half the dataset, then extend with the other half + dataset_1_device = device_ndarray(dataset[: n_dataset_rows // 2, :]) + dataset_2_device = device_ndarray(dataset[n_dataset_rows // 2 :, :]) + + build_params = tiered_index.IndexParams( + metric=metric, algo=algo, min_ann_rows=1000 + ) + index = tiered_index.build(build_params, dataset_1_device) + index = tiered_index.extend(index, dataset_2_device) + + if filter_type == "bitset_filter": + sparsity = 0.5 + bitset = create_sparse_bitset(n_dataset_rows, sparsity) + bitset_device = device_ndarray(bitset) + prefilter = filters.from_bitset(bitset_device) + + # compact the index until we fully support filtered search here + # index = tiered_index.compact(index) + else: + prefilter = filters.no_filter() + + if algo == "cagra": + search_params = cagra.SearchParams() + elif algo == "ivf_flat": + search_params = ivf_flat.SearchParams(n_probes=64) + elif algo == "ivf_pq": + search_params = ivf_pq.SearchParams(n_probes=64) + + ret_distances, ret_indices = tiered_index.search( + search_params, + index, + queries_device, + k, + neighbors=indices_device, + distances=distances_device, + filter=prefilter, + ) + + bfknn_index = brute_force.build(dataset_device, metric) + groundtruth_neighbors, groundtruth_indices = brute_force.search( + bfknn_index, queries_device, k, prefilter=prefilter + ) + + ret_indices = ret_indices.copy_to_host() + groundtruth_indices = groundtruth_indices.copy_to_host() + recall = calc_recall(ret_indices, groundtruth_indices) + assert recall > 0.7