Skip to content

Commit

Permalink
Additional Distances for CAGRA C and Python API (#546)
Browse files Browse the repository at this point in the history
Add InnerProduct metric to CAGRA C and Python API + updates to CAGRA pytests.
Closes #545

Authors:
  - Tarang Jain (https://github.com/tarang-jain)

Approvers:
  - Divye Gala (https://github.com/divyegala)

URL: #546
  • Loading branch information
tarang-jain authored Dec 19, 2024
1 parent b3ce774 commit 660a2ca
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 13 deletions.
3 changes: 3 additions & 0 deletions cpp/include/cuvs/neighbors/cagra.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#pragma once

#include <cuvs/core/c_api.h>
#include <cuvs/distance/distance.h>
#include <dlpack/dlpack.h>
#include <stdbool.h>
#include <stdint.h>
Expand Down Expand Up @@ -87,6 +88,8 @@ typedef struct cuvsCagraCompressionParams* cuvsCagraCompressionParams_t;
*
*/
struct cuvsCagraIndexParams {
/** Distance type. */
cuvsDistanceType metric;
/** Degree of input graph for pruning. */
size_t intermediate_graph_degree;
/** Degree of output graph. */
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/neighbors/cagra_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ void* _build(cuvsResources_t res, cuvsCagraIndexParams params, DLManagedTensor*
auto res_ptr = reinterpret_cast<raft::resources*>(res);
auto index = new cuvs::neighbors::cagra::index<T, uint32_t>(*res_ptr);

auto index_params = cuvs::neighbors::cagra::index_params();
auto index_params = cuvs::neighbors::cagra::index_params();
index_params.metric = static_cast<cuvs::distance::DistanceType>((int)params.metric),
index_params.intermediate_graph_degree = params.intermediate_graph_degree;
index_params.graph_degree = params.graph_degree;

Expand Down Expand Up @@ -252,7 +253,8 @@ extern "C" cuvsError_t cuvsCagraSearch(cuvsResources_t res,
extern "C" cuvsError_t cuvsCagraIndexParamsCreate(cuvsCagraIndexParams_t* params)
{
return cuvs::core::translate_exceptions([=] {
*params = new cuvsCagraIndexParams{.intermediate_graph_degree = 128,
*params = new cuvsCagraIndexParams{.metric = L2Expanded,
.intermediate_graph_degree = 128,
.graph_degree = 64,
.build_algo = IVF_PQ,
.nn_descent_niter = 20};
Expand Down
2 changes: 2 additions & 0 deletions python/cuvs/cuvs/neighbors/cagra/cagra.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ from libcpp cimport bool

from cuvs.common.c_api cimport cuvsError_t, cuvsResources_t
from cuvs.common.cydlpack cimport DLDataType, DLManagedTensor
from cuvs.distance_type cimport cuvsDistanceType


cdef extern from "cuvs/neighbors/cagra.h" nogil:
Expand All @@ -47,6 +48,7 @@ cdef extern from "cuvs/neighbors/cagra.h" nogil:
ctypedef cuvsCagraCompressionParams* cuvsCagraCompressionParams_t

ctypedef struct cuvsCagraIndexParams:
cuvsDistanceType metric
size_t intermediate_graph_degree
size_t graph_degree
cuvsCagraGraphBuildAlgo build_algo
Expand Down
20 changes: 12 additions & 8 deletions python/cuvs/cuvs/neighbors/cagra/cagra.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@ from libcpp cimport bool, cast
from libcpp.string cimport string

from cuvs.common cimport cydlpack
from cuvs.distance_type cimport cuvsDistanceType

from pylibraft.common import auto_convert_output, cai_wrapper, device_ndarray
from pylibraft.common.cai_wrapper import wrap_array
from pylibraft.common.interruptible import cuda_interruptible

from cuvs.distance import DISTANCE_TYPES
from cuvs.neighbors.common import _check_input_array

from libc.stdint cimport (
Expand Down Expand Up @@ -131,9 +133,11 @@ cdef class IndexParams:
Parameters
----------
metric : string denoting the metric type, default="sqeuclidean"
Valid values for metric: ["sqeuclidean"], where
Valid values for metric: ["sqeuclidean", "inner_product"], where
- sqeuclidean is the euclidean distance without the square root
operation, i.e.: distance(a,b) = \\sum_i (a_i - b_i)^2
- inner_product distance is defined as
distance(a, b) = \\sum_i a_i * b_i.
intermediate_graph_degree : int, default = 128
graph_degree : int, default = 64
Expand All @@ -151,6 +155,7 @@ cdef class IndexParams:
"""

cdef cuvsCagraIndexParams* params
cdef object _metric

# hold on to a reference to the compression, to keep from being GC'ed
cdef public object compression
Expand All @@ -170,10 +175,8 @@ cdef class IndexParams:
nn_descent_niter=20,
compression=None):

# todo (dgd): enable once other metrics are present
# and exposed in cuVS C API
# self.params.metric = _get_metric(metric)
# self.params.metric_arg = 0
self._metric = metric
self.params.metric = <cuvsDistanceType>DISTANCE_TYPES[metric]
self.params.intermediate_graph_degree = intermediate_graph_degree
self.params.graph_degree = graph_degree
if build_algo == "ivf_pq":
Expand All @@ -186,9 +189,9 @@ cdef class IndexParams:
self.params.compression = \
<cuvsCagraCompressionParams_t><size_t>compression.get_handle()

# @property
# def metric(self):
# return self.params.metric
@property
def metric(self):
return self._metric

@property
def intermediate_graph_degree(self):
Expand Down Expand Up @@ -247,6 +250,7 @@ def build(IndexParams index_params, dataset, resources=None):
The following distance metrics are supported:
- L2
- InnerProduct
Parameters
----------
Expand Down
8 changes: 5 additions & 3 deletions python/cuvs/cuvs/test/test_cagra.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def run_cagra_build_search_test(
n_queries=100,
k=10,
dtype=np.float32,
metric="euclidean",
metric="sqeuclidean",
intermediate_graph_degree=128,
graph_degree=64,
build_algo="ivf_pq",
Expand All @@ -42,6 +42,8 @@ def run_cagra_build_search_test(
):
dataset = generate_data((n_rows, n_cols), dtype)
if metric == "inner_product":
if dtype in [np.int8, np.uint8]:
pytest.skip("skip normalization for int8/uint8 data")
dataset = normalize(dataset, norm="l2", axis=1)
dataset_device = device_ndarray(dataset)

Expand Down Expand Up @@ -122,7 +124,7 @@ def run_cagra_build_search_test(
@pytest.mark.parametrize("dtype", [np.float32, np.int8, np.uint8])
@pytest.mark.parametrize("array_type", ["device", "host"])
@pytest.mark.parametrize("build_algo", ["ivf_pq", "nn_descent"])
@pytest.mark.parametrize("metric", ["euclidean"])
@pytest.mark.parametrize("metric", ["sqeuclidean", "inner_product"])
def test_cagra_dataset_dtype_host_device(
dtype, array_type, inplace, build_algo, metric
):
Expand All @@ -145,7 +147,7 @@ def test_cagra_dataset_dtype_host_device(
"graph_degree": 32,
"add_data_on_build": True,
"k": 1,
"metric": "euclidean",
"metric": "sqeuclidean",
"build_algo": "ivf_pq",
},
{
Expand Down

0 comments on commit 660a2ca

Please sign in to comment.