Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,7 @@ if(BUILD_SHARED_LIBS)
src/distance/detail/kernels/gram_matrix.cu
src/distance/detail/kernels/kernel_factory.cu
src/distance/detail/kernels/kernel_matrices.cu
src/distance/detail/pairwise_matrix/dispatch_bitwise_hamming_uint8_t_uint32_t_uint32_t_int.cu
src/distance/detail/pairwise_matrix/dispatch_canberra_float_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_canberra_half_float_float_int.cu
src/distance/detail/pairwise_matrix/dispatch_canberra_double_double_double_int.cu
Expand Down
95 changes: 94 additions & 1 deletion cpp/include/cuvs/distance/distance.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2021-2023, NVIDIA CORPORATION.
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -332,6 +332,99 @@ void pairwise_distance(
cuvs::distance::DistanceType metric,
float metric_arg = 2.0f);

// BitwiseHamming integer type overloads

/**
* @brief Compute pairwise distances for two matrices (uint8_t specialization for BitwiseHamming)
*
* Note: Only contiguous row- or column-major layouts supported currently.
*
* Usage example:
* @code{.cpp}
* #include <raft/core/resources.hpp>
* #include <raft/core/device_mdarray.hpp>
* #include <cuvs/distance/distance.hpp>
*
* raft::resources handle;
* int n_samples = 5000;
* int n_features = 50;
*
* auto input = raft::make_device_matrix<uint8_t>(handle, n_samples, n_features);
*
* // ... fill input with binary data ...
*
* auto output = raft::make_device_matrix<uint32_t>(handle, n_samples, n_samples);
*
* auto metric = cuvs::distance::DistanceType::BitwiseHamming;
* cuvs::distance::pairwise_distance(handle,
* raft::make_const(input.view()),
* raft::make_const(input.view()),
* output.view(),
* metric);
* @endcode
*
* @param[in] handle raft handle
* @param[in] x first set of points (size n*k)
* @param[in] y second set of points (size m*k)
* @param[out] dist output distance matrix (size n*m)
* @param[in] metric distance to evaluate (must be BitwiseHamming)
* @param[in] metric_arg metric argument (unused for BitwiseHamming)
*/
void pairwise_distance(
raft::resources const& handle,
raft::device_matrix_view<const uint8_t, std::int64_t, raft::layout_c_contiguous> const x,
raft::device_matrix_view<const uint8_t, std::int64_t, raft::layout_c_contiguous> const y,
raft::device_matrix_view<uint32_t, std::int64_t, raft::layout_c_contiguous> dist,
cuvs::distance::DistanceType metric,
uint32_t metric_arg = 2);

/**
* @brief Compute pairwise distances for two matrices (uint8_t specialization for BitwiseHamming
* with column major layout)
*
* Note: Only contiguous row- or column-major layouts supported currently.
*
* Usage example:
* @code{.cpp}
* #include <raft/core/resources.hpp>
* #include <raft/core/device_mdarray.hpp>
* #include <cuvs/distance/distance.hpp>
*
* raft::resources handle;
* int n_samples = 5000;
* int n_features = 50;
*
* auto input = raft::make_device_matrix<uint8_t, std::int64_t, raft::layout_f_contiguous>(
* handle, n_samples, n_features);
*
* // ... fill input with binary data ...
*
* auto output = raft::make_device_matrix<uint32_t, std::int64_t, raft::layout_f_contiguous>(
* handle, n_samples, n_samples);
*
* auto metric = cuvs::distance::DistanceType::BitwiseHamming;
* cuvs::distance::pairwise_distance(handle,
* raft::make_const(input.view()),
* raft::make_const(input.view()),
* output.view(),
* metric);
* @endcode
*
* @param[in] handle raft handle
* @param[in] x first set of points (size n*k)
* @param[in] y second set of points (size m*k)
* @param[out] dist output distance matrix (size n*m)
* @param[in] metric distance to evaluate (must be BitwiseHamming)
* @param[in] metric_arg metric argument (unused for BitwiseHamming)
*/
void pairwise_distance(
raft::resources const& handle,
raft::device_matrix_view<const uint8_t, std::int64_t, raft::layout_f_contiguous> const x,
raft::device_matrix_view<const uint8_t, std::int64_t, raft::layout_f_contiguous> const y,
raft::device_matrix_view<uint32_t, std::int64_t, raft::layout_f_contiguous> dist,
cuvs::distance::DistanceType metric,
uint32_t metric_arg = 2);

/**
* @brief Compute sparse pairwise distances between x and y, using the provided
* input configuration and distance function.
Expand Down
35 changes: 34 additions & 1 deletion cpp/src/distance/detail/distance.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2018-2024, NVIDIA CORPORATION.
* Copyright (c) 2018-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -53,6 +53,7 @@ using distance_tag = std::integral_constant<DistanceType, d>;
* They are implemented below. The documentation of this function serves as
* documentation for all functions. The following overloads are defined:
*
* - DistanceType::BitwiseHamming:
* - DistanceType::Canberra:
* - DistanceType::CorrelationExpanded:
* - DistanceType::CosineExpanded:
Expand Down Expand Up @@ -88,6 +89,38 @@ using distance_tag = std::integral_constant<DistanceType, d>;
* @param is_row_major Whether the matrices are row-major or col-major
* @param metric_arg The `p` argument for Lp.
*/
template <typename DataT, typename AccT, typename OutT, typename FinOpT, typename IdxT = int>
void distance_impl(raft::resources const& handle,
distance_tag<DistanceType::BitwiseHamming> distance_type,
const DataT* x,
const DataT* y,
OutT* out,
IdxT m,
IdxT n,
IdxT k,
AccT*, // workspace unused
size_t, // worksize unused
FinOpT fin_op,
bool is_row_major,
DataT) // metric_arg unused
{
// BitwiseHamming only works with integral types
if constexpr (std::is_integral_v<DataT>) {
ops::bitwise_hamming_distance_op<DataT, AccT, IdxT> distance_op{};

const OutT* x_norm = nullptr;
const OutT* y_norm = nullptr;

cudaStream_t stream = raft::resource::get_cuda_stream(handle);

pairwise_matrix_dispatch<decltype(distance_op), DataT, AccT, OutT, FinOpT, IdxT>(
distance_op, m, n, k, x, y, x_norm, y_norm, out, fin_op, stream, is_row_major);
} else {
RAFT_FAIL(
"BitwiseHamming distance requires integral data types (uint8_t, uint32_t, uint64_t). ");
}
}

template <typename DataT, typename AccT, typename OutT, typename FinOpT, typename IdxT = int>
void distance_impl(raft::resources const& handle,
distance_tag<DistanceType::Canberra> distance_type,
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/distance/detail/distance_ops/all_ops.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -20,6 +20,7 @@
#include "cutlass.cuh"

// The distance operations:
#include "../distance_ops/bitwise_hamming.cuh"
#include "../distance_ops/canberra.cuh"
#include "../distance_ops/correlation.cuh"
#include "../distance_ops/cosine.cuh"
Expand Down
92 changes: 92 additions & 0 deletions cpp/src/distance/detail/distance_ops/bitwise_hamming.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Copyright (c) 2025, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cstdint>
#include <raft/util/cuda_dev_essentials.cuh> // DI

namespace cuvs::distance::detail::ops {

/**
* @brief the BitwiseHamming distance matrix calculation
*
* It computes the following equation:
*
* c_ij = sum_k popcount(x_ik XOR y_kj)
*
* This counts the number of differing bits between corresponding elements
* across all dimensions.
*/
template <typename DataType, typename AccType, typename IdxType>
struct bitwise_hamming_distance_op {
using DataT = DataType;
using AccT = AccType;
using IdxT = IdxType;

bitwise_hamming_distance_op() noexcept {}

// Load norms of input data
static constexpr bool use_norms = false;
// Whether the core function requires so many instructions that it makes sense
// to reduce loop unrolling, etc. We do this to keep compile times in check.
static constexpr bool expensive_inner_loop = false;

// Size of shared memory. This is normally decided by the kernel policy, but
// some ops such as correlation_distance_op use more.
template <typename Policy>
static constexpr size_t shared_mem_size()
{
return Policy::SmemSize;
}

DI void core(AccT& acc, DataT& x, DataT& y) const
{
if constexpr (sizeof(DataT) == 1) {
acc += __popc(static_cast<uint32_t>(x ^ y) & 0xffu);
} else if constexpr (sizeof(DataT) == 2) {
acc += __popc(static_cast<uint32_t>(x ^ y) & 0xffffu);
} else if constexpr (sizeof(DataT) == 4) {
acc += __popc(x ^ y);
} else if constexpr (sizeof(DataT) == 8) {
acc += __popcll(x ^ y);
} else {
static_assert(sizeof(DataT) <= 8,
"BitwiseHamming distance only supports types up to 64 bits");
}
};

template <typename Policy>
DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh],
AccT* regxn,
AccT* regyn,
IdxT gridStrideX,
IdxT gridStrideY) const
{
// No normalization needed for bitwise Hamming distance
// The result is the raw count of differing bits
#pragma unroll
for (int i = 0; i < Policy::AccRowsPerTh; ++i) {
#pragma unroll
for (int j = 0; j < Policy::AccColsPerTh; ++j) {
// acc[i][j] already contains the correct bitwise Hamming distance
// No additional processing needed
}
}
}
};

} // namespace cuvs::distance::detail::ops
Loading