From 2fa244edc9b1a421e64654a4e5120518ec6e31ef Mon Sep 17 00:00:00 2001 From: aamijar Date: Tue, 3 Jun 2025 02:25:09 +0000 Subject: [PATCH 01/11] init umap-dice-coefficient --- cpp/CMakeLists.txt | 3 + cpp/src/distance/detail/distance.cuh | 82 ++++++- .../distance/detail/distance_ops/all_ops.cuh | 1 + cpp/src/distance/detail/distance_ops/dice.cuh | 93 ++++++++ .../detail/pairwise_matrix/dispatch-ext.cuh | 2 + .../pairwise_matrix/dispatch_00_generate.py | 209 +++++++++--------- .../dispatch_dice_double_double_double_int.cu | 51 +++++ .../dispatch_dice_float_float_float_int.cu | 51 +++++ .../dispatch_dice_half_float_float_int.cu | 51 +++++ cpp/src/distance/distance-ext.cuh | 2 + cpp/src/distance/distance-inl.cuh | 3 + cpp/src/distance/distance.cu | 2 + 12 files changed, 445 insertions(+), 105 deletions(-) create mode 100644 cpp/src/distance/detail/distance_ops/dice.cuh create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu create mode 100644 cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 5ccb6765ab..29fcfcf2d8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -344,6 +344,9 @@ if(BUILD_SHARED_LIBS) src/distance/detail/pairwise_matrix/dispatch_cosine_float_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_cosine_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_cosine_double_double_double_int.cu + src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu + src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_float_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_half_float_float_int.cu src/distance/detail/pairwise_matrix/dispatch_hamming_unexpanded_double_double_double_int.cu diff --git a/cpp/src/distance/detail/distance.cuh b/cpp/src/distance/detail/distance.cuh index d6fd046469..9914ee31af 100644 --- a/cpp/src/distance/detail/distance.cuh +++ b/cpp/src/distance/detail/distance.cuh @@ -56,6 +56,7 @@ using distance_tag = std::integral_constant; * - DistanceType::Canberra: * - DistanceType::CorrelationExpanded: * - DistanceType::CosineExpanded: + * - DistanceType::DiceExpanded: * - DistanceType::HammingUnexpanded: * - DistanceType::HellingerExpanded: * - DistanceType::JensenShannon: @@ -239,6 +240,79 @@ void distance_impl(raft::resources const& handle, distance_op, m, n, k, x, y, x_norm, y_norm, out, fin_op, stream, is_row_major); } +template +void distance_impl(raft::resources const& handle, + distance_tag distance_type, + const DataT* x, + const DataT* y, + OutT* out, + IdxT m, + IdxT n, + IdxT k, + AccT* workspace, + size_t worksize, + FinOpT fin_op, + bool is_row_major, + DataT) // unused +{ + // raft distance support inputs as float/double and output as uint8_t/float/double. + static_assert(!((sizeof(OutT) > 1) && (sizeof(AccT) != sizeof(OutT))), + "OutT can be uint8_t, float, double," + "if sizeof(OutT) > 1 then sizeof(AccT) == sizeof(OutT)."); + + ASSERT(!(worksize < (m + n) * sizeof(AccT)), "workspace size error"); + ASSERT(workspace != nullptr, "workspace is null"); + + cudaStream_t stream = raft::resource::get_cuda_stream(handle); + + OutT* x_norm = workspace; + OutT* y_norm = workspace; + // TODO: Column major case looks to have lower accuracy for X == Y, + // perhaps the use of stridedSummationKernel could be causing this, + // need to investigate and fix. + if (x == y && is_row_major) { + raft::linalg::reduce(x_norm, + x, + k, + std::max(m, n), + (AccT)0, + is_row_major, + true, + stream, + false, + raft::identity_op(), + raft::add_op()); + } else { + y_norm += m; + raft::linalg::reduce(x_norm, + x, + k, + m, + (AccT)0, + is_row_major, + true, + stream, + false, + raft::identity_op(), + raft::add_op()); + raft::linalg::reduce(y_norm, + y, + k, + n, + (AccT)0, + is_row_major, + true, + stream, + false, + raft::identity_op(), + raft::add_op()); + } + + ops::dice_distance_op distance_op{}; + pairwise_matrix_dispatch( + distance_op, m, n, k, x, y, x_norm, y_norm, out, fin_op, stream, is_row_major); +} + template void distance_impl(raft::resources const& handle, distance_tag distance_type, @@ -810,9 +884,11 @@ template size_t getWorkspaceSize(const InType* x, const InType* y, Index_ m, Index_ n, Index_ k) { - size_t worksize = 0; - constexpr bool is_allocated = (distanceType <= cuvs::distance::DistanceType::CosineExpanded) || - (distanceType == cuvs::distance::DistanceType::CorrelationExpanded); + size_t worksize = 0; + constexpr bool is_allocated = + (distanceType <= cuvs::distance::DistanceType::CosineExpanded) || + (distanceType == cuvs::distance::DistanceType::CorrelationExpanded) || + (distanceType == cuvs::distance::DistanceType::DiceExpanded); constexpr int numOfBuffers = (distanceType == cuvs::distance::DistanceType::CorrelationExpanded) ? 2 : 1; diff --git a/cpp/src/distance/detail/distance_ops/all_ops.cuh b/cpp/src/distance/detail/distance_ops/all_ops.cuh index 534aa16fa1..ce58b8a81c 100644 --- a/cpp/src/distance/detail/distance_ops/all_ops.cuh +++ b/cpp/src/distance/detail/distance_ops/all_ops.cuh @@ -23,6 +23,7 @@ #include "../distance_ops/canberra.cuh" #include "../distance_ops/correlation.cuh" #include "../distance_ops/cosine.cuh" +#include "../distance_ops/dice.cuh" #include "../distance_ops/hamming.cuh" #include "../distance_ops/hellinger.cuh" #include "../distance_ops/jensen_shannon.cuh" diff --git a/cpp/src/distance/detail/distance_ops/dice.cuh b/cpp/src/distance/detail/distance_ops/dice.cuh new file mode 100644 index 0000000000..27ce39cd4a --- /dev/null +++ b/cpp/src/distance/detail/distance_ops/dice.cuh @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include // DI + +namespace cuvs::distance::detail::ops { + +// Epilogue operator for CUTLASS based kernel +template +struct dice_cutlass_op { + __device__ dice_cutlass_op() noexcept {} + __device__ AccT operator()(AccT& aNorm, const AccT& bNorm, AccT& accVal) const noexcept + { + return static_cast(1.0) - static_cast(2 * accVal / (aNorm + bNorm)); + } + __device__ AccT operator()(DataT aData) const noexcept { return aData; } +}; + +/** + * @brief the expanded dice distance matrix calculation + * + * It computes the following equation: + * + * d(x, y) = 1 - 2*(x ⋅ y) / ( Σ(x) + Σ(y) ) + */ +template +struct dice_distance_op { + using DataT = DataType; + using AccT = AccType; + using IdxT = IdxType; + + // Load norms of input data + static constexpr bool use_norms = true; + // Whether the core function requires so many instructions that it makes sense + // to reduce loop unrolling, etc. We do this to keep compile times in check. + static constexpr bool expensive_inner_loop = false; + + // Size of shared memory. This is normally decided by the kernel policy, but + // some ops such as correlation_distance_op use more. + template + static constexpr size_t shared_mem_size() + { + return Policy::SmemSize + ((Policy::Mblk + Policy::Nblk) * sizeof(DataT)); + } + + DI void core(AccT& acc, DataT& x, DataT& y) const + { + if constexpr ((std::is_same_v && std::is_same_v)) { + acc += __half2float(x != DataT(0) ? DataT(1) : DataT(0)) * + __half2float(y != DataT(0) ? DataT(1) : DataT(0)); + } else { + acc += x != DataT(0) ? DataT(1) : DataT(0) * y != DataT(0) ? DataT(1) : DataT(0); + } + }; + + template + DI void epilog(AccT acc[Policy::AccRowsPerTh][Policy::AccColsPerTh], + AccT* regxn, + AccT* regyn, + IdxT gridStrideX, + IdxT gridStrideY) const + { +#pragma unroll + for (int i = 0; i < Policy::AccRowsPerTh; ++i) { +#pragma unroll + for (int j = 0; j < Policy::AccColsPerTh; ++j) { + acc[i][j] = 1.0 - (2 * acc[i][j] / (regxn[i] + regyn[j])); + } + } + } + + constexpr dice_cutlass_op get_cutlass_op() const + { + return dice_cutlass_op(); + } +}; + +} // namespace cuvs::distance::detail::ops diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh index 49497ab3a2..3c3422f2fb 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh @@ -95,6 +95,8 @@ instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( cuvs::distance::detail::ops::correlation_distance_op, int); instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( cuvs::distance::detail::ops::cosine_distance_op, int); +instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( + cuvs::distance::detail::ops::dice_distance_op, int); instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( cuvs::distance::detail::ops::hamming_distance_op, int); instantiate_cuvs_distance_detail_pairwise_matrix_dispatch_by_algo_default( diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py index d0913833f1..4c54f68662 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py @@ -86,72 +86,77 @@ ] op_instances = [ - dict( - path_prefix="canberra", - OpT="cuvs::distance::detail::ops::canberra_distance_op", - archs = [60], - ), - dict( - path_prefix="correlation", - OpT="cuvs::distance::detail::ops::correlation_distance_op", - archs = [60], - ), - dict( - path_prefix="cosine", - OpT="cuvs::distance::detail::ops::cosine_distance_op", - archs = [60, 80], - ), - dict( - path_prefix="hamming_unexpanded", - OpT="cuvs::distance::detail::ops::hamming_distance_op", - archs = [60], - ), - dict( - path_prefix="hellinger_expanded", - OpT="cuvs::distance::detail::ops::hellinger_distance_op", - archs = [60], - ), - # inner product is handled by cublas. - dict( - path_prefix="jensen_shannon", - OpT="cuvs::distance::detail::ops::jensen_shannon_distance_op", - archs = [60], - ), - dict( - path_prefix="kl_divergence", - OpT="cuvs::distance::detail::ops::kl_divergence_op", - archs = [60], - ), - dict( - path_prefix="l1", - OpT="cuvs::distance::detail::ops::l1_distance_op", - archs = [60], - ), - dict( - path_prefix="l2_expanded", - OpT="cuvs::distance::detail::ops::l2_exp_distance_op", + # dict( + # path_prefix="canberra", + # OpT="cuvs::distance::detail::ops::canberra_distance_op", + # archs = [60], + # ), + # dict( + # path_prefix="correlation", + # OpT="cuvs::distance::detail::ops::correlation_distance_op", + # archs = [60], + # ), + # dict( + # path_prefix="cosine", + # OpT="cuvs::distance::detail::ops::cosine_distance_op", + # archs = [60, 80], + # ), + dict( + path_prefix="dice", + OpT="cuvs::distance::detail::ops::dice_distance_op", archs = [60, 80], ), - dict( - path_prefix="l2_unexpanded", - OpT="cuvs::distance::detail::ops::l2_unexp_distance_op", - archs = [60], - ), - dict( - path_prefix="l_inf", - OpT="cuvs::distance::detail::ops::l_inf_distance_op", - archs = [60], - ), - dict( - path_prefix="lp_unexpanded", - OpT="cuvs::distance::detail::ops::lp_unexp_distance_op", - archs = [60], - ), - dict( - path_prefix="russel_rao", - OpT="cuvs::distance::detail::ops::russel_rao_distance_op", - archs = [60], - ), + # dict( + # path_prefix="hamming_unexpanded", + # OpT="cuvs::distance::detail::ops::hamming_distance_op", + # archs = [60], + # ), + # dict( + # path_prefix="hellinger_expanded", + # OpT="cuvs::distance::detail::ops::hellinger_distance_op", + # archs = [60], + # ), + # # inner product is handled by cublas. + # dict( + # path_prefix="jensen_shannon", + # OpT="cuvs::distance::detail::ops::jensen_shannon_distance_op", + # archs = [60], + # ), + # dict( + # path_prefix="kl_divergence", + # OpT="cuvs::distance::detail::ops::kl_divergence_op", + # archs = [60], + # ), + # dict( + # path_prefix="l1", + # OpT="cuvs::distance::detail::ops::l1_distance_op", + # archs = [60], + # ), + # dict( + # path_prefix="l2_expanded", + # OpT="cuvs::distance::detail::ops::l2_exp_distance_op", + # archs = [60, 80], + # ), + # dict( + # path_prefix="l2_unexpanded", + # OpT="cuvs::distance::detail::ops::l2_unexp_distance_op", + # archs = [60], + # ), + # dict( + # path_prefix="l_inf", + # OpT="cuvs::distance::detail::ops::l_inf_distance_op", + # archs = [60], + # ), + # dict( + # path_prefix="lp_unexpanded", + # OpT="cuvs::distance::detail::ops::lp_unexp_distance_op", + # archs = [60], + # ), + # dict( + # path_prefix="russel_rao", + # OpT="cuvs::distance::detail::ops::russel_rao_distance_op", + # archs = [60], + # ), ] def arch_headers(archs): @@ -178,49 +183,49 @@ def arch_headers(archs): f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") print(f"src/distance/detail/pairwise_matrix/{path}") -# Dispatch kernels for with the RBF fin op. -with open("dispatch_rbf.cu", "w") as f: - OpT="cuvs::distance::detail::ops::l2_unexp_distance_op" - archs = [60] +# # Dispatch kernels for with the RBF fin op. +# with open("dispatch_rbf.cu", "w") as f: +# OpT="cuvs::distance::detail::ops::l2_unexp_distance_op" +# archs = [60] - f.write(header) - f.write("#include \"../kernels/rbf_fin_op.cuh\" // rbf_fin_op\n") - f.write(arch_headers(archs)) - f.write(macro) +# f.write(header) +# f.write("#include \"../kernels/rbf_fin_op.cuh\" // rbf_fin_op\n") +# f.write(arch_headers(archs)) +# f.write(macro) - for dt in data_type_instances: - DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); - IdxT = "int64_t" # overwrite IdxT +# for dt in data_type_instances: +# DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); +# IdxT = "int64_t" # overwrite IdxT - FinOpT = f"cuvs::distance::kernels::detail::rbf_fin_op<{DataT}>" - f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") +# FinOpT = f"cuvs::distance::kernels::detail::rbf_fin_op<{DataT}>" +# f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") - f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") +# f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") - print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu") +# print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu") # L2 with int64_t indices for kmeans code -int64_t_op_instances = [ - dict( - path_prefix="l2_expanded", - OpT="cuvs::distance::detail::ops::l2_exp_distance_op", - archs = [60, 80], - )] - -for op in int64_t_op_instances: - for dt in data_type_instances: - DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); - - IdxT = "int64_t" - path = f"dispatch_{op['path_prefix']}_{DataT}_{AccT}_{OutT}_{IdxT}.cu" - with open(path, "w") as f: - f.write(header) - f.write(arch_headers(op["archs"])) - f.write(macro) - - OpT = op['OpT'] - FinOpT = "raft::identity_op" - f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") - f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") - print(f"src/distance/detail/pairwise_matrix/{path}") +# int64_t_op_instances = [ +# dict( +# path_prefix="l2_expanded", +# OpT="cuvs::distance::detail::ops::l2_exp_distance_op", +# archs = [60, 80], +# )] + +# for op in int64_t_op_instances: +# for dt in data_type_instances: +# DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); + +# IdxT = "int64_t" +# path = f"dispatch_{op['path_prefix']}_{DataT}_{AccT}_{OutT}_{IdxT}.cu" +# with open(path, "w") as f: +# f.write(header) +# f.write(arch_headers(op["archs"])) +# f.write(macro) + +# OpT = op['OpT'] +# FinOpT = "raft::identity_op" +# f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") +# f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") +# print(f"src/distance/detail/pairwise_matrix/{path}") diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu new file mode 100644 index 0000000000..b07e49f59a --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include "dispatch_sm80.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::dice_distance_op, double, double, double, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu new file mode 100644 index 0000000000..5f312cc634 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include "dispatch_sm80.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::dice_distance_op, float, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu new file mode 100644 index 0000000000..a7abcd8f07 --- /dev/null +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * NOTE: this file is generated by dispatch_00_generate.py + * + * Make changes there and run in this directory: + * + * > python dispatch_00_generate.py + * + */ + +#include "../distance_ops/all_ops.cuh" // ops::* +#include "dispatch-inl.cuh" // dispatch +#include "dispatch_sm60.cuh" +#include "dispatch_sm80.cuh" +#include // raft::identity_op +#define instantiate_raft_distance_detail_pairwise_matrix_dispatch( \ + OpT, DataT, AccT, OutT, FinOpT, IdxT) \ + template void cuvs::distance::detail:: \ + pairwise_matrix_dispatch, DataT, AccT, OutT, FinOpT, IdxT>( \ + OpT distance_op, \ + IdxT m, \ + IdxT n, \ + IdxT k, \ + const DataT* x, \ + const DataT* y, \ + const OutT* x_norm, \ + const OutT* y_norm, \ + OutT* out, \ + FinOpT fin_op, \ + cudaStream_t stream, \ + bool is_row_major) + +instantiate_raft_distance_detail_pairwise_matrix_dispatch( + cuvs::distance::detail::ops::dice_distance_op, half, float, float, raft::identity_op, int); + +#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch diff --git a/cpp/src/distance/distance-ext.cuh b/cpp/src/distance/distance-ext.cuh index a692a62a35..3582fb7182 100644 --- a/cpp/src/distance/distance-ext.cuh +++ b/cpp/src/distance/distance-ext.cuh @@ -223,6 +223,7 @@ void pairwise_distance(raft::resources const& handle, instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::Canberra); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::CorrelationExpanded); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::CosineExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::DiceExpanded); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::HammingUnexpanded); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::HellingerExpanded); @@ -310,6 +311,7 @@ instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType: instantiate_cuvs_distance_getWorkspaceSize_by_algo( cuvs::distance::DistanceType::CorrelationExpanded); instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::CosineExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::DiceExpanded); instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::HammingUnexpanded); instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::HellingerExpanded); diff --git a/cpp/src/distance/distance-inl.cuh b/cpp/src/distance/distance-inl.cuh index e047d31448..5e6ac54085 100644 --- a/cpp/src/distance/distance-inl.cuh +++ b/cpp/src/distance/distance-inl.cuh @@ -307,6 +307,9 @@ void pairwise_distance(raft::resources const& handle, case DistanceType::RusselRaoExpanded: dispatch(std::integral_constant{}); break; + case DistanceType::DiceExpanded: + dispatch(std::integral_constant{}); + break; default: THROW("Unknown or unsupported distance metric '%d'!", (int)metric); }; } diff --git a/cpp/src/distance/distance.cu b/cpp/src/distance/distance.cu index 47e72460f0..facdca8858 100644 --- a/cpp/src/distance/distance.cu +++ b/cpp/src/distance/distance.cu @@ -88,6 +88,7 @@ instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::Canberra); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::CorrelationExpanded); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::CosineExpanded); +instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::DiceExpanded); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::HammingUnexpanded); instantiate_cuvs_distance_distance_by_algo(cuvs::distance::DistanceType::HellingerExpanded); @@ -176,6 +177,7 @@ instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType: instantiate_cuvs_distance_getWorkspaceSize_by_algo( cuvs::distance::DistanceType::CorrelationExpanded); instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::CosineExpanded); +instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::DiceExpanded); instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::HammingUnexpanded); instantiate_cuvs_distance_getWorkspaceSize_by_algo(cuvs::distance::DistanceType::HellingerExpanded); From 8eab501eabd7b69f7e1fad828cf37c7dc3c69b91 Mon Sep 17 00:00:00 2001 From: aamijar Date: Tue, 3 Jun 2025 02:30:52 +0000 Subject: [PATCH 02/11] update dispatch generate py --- .../pairwise_matrix/dispatch_00_generate.py | 208 +++++++++--------- 1 file changed, 104 insertions(+), 104 deletions(-) diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py index 4c54f68662..68d814c528 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py @@ -86,77 +86,77 @@ ] op_instances = [ - # dict( - # path_prefix="canberra", - # OpT="cuvs::distance::detail::ops::canberra_distance_op", - # archs = [60], - # ), - # dict( - # path_prefix="correlation", - # OpT="cuvs::distance::detail::ops::correlation_distance_op", - # archs = [60], - # ), - # dict( - # path_prefix="cosine", - # OpT="cuvs::distance::detail::ops::cosine_distance_op", - # archs = [60, 80], - # ), + dict( + path_prefix="canberra", + OpT="cuvs::distance::detail::ops::canberra_distance_op", + archs = [60], + ), + dict( + path_prefix="correlation", + OpT="cuvs::distance::detail::ops::correlation_distance_op", + archs = [60], + ), + dict( + path_prefix="cosine", + OpT="cuvs::distance::detail::ops::cosine_distance_op", + archs = [60, 80], + ), dict( path_prefix="dice", OpT="cuvs::distance::detail::ops::dice_distance_op", archs = [60, 80], ), - # dict( - # path_prefix="hamming_unexpanded", - # OpT="cuvs::distance::detail::ops::hamming_distance_op", - # archs = [60], - # ), - # dict( - # path_prefix="hellinger_expanded", - # OpT="cuvs::distance::detail::ops::hellinger_distance_op", - # archs = [60], - # ), - # # inner product is handled by cublas. - # dict( - # path_prefix="jensen_shannon", - # OpT="cuvs::distance::detail::ops::jensen_shannon_distance_op", - # archs = [60], - # ), - # dict( - # path_prefix="kl_divergence", - # OpT="cuvs::distance::detail::ops::kl_divergence_op", - # archs = [60], - # ), - # dict( - # path_prefix="l1", - # OpT="cuvs::distance::detail::ops::l1_distance_op", - # archs = [60], - # ), - # dict( - # path_prefix="l2_expanded", - # OpT="cuvs::distance::detail::ops::l2_exp_distance_op", - # archs = [60, 80], - # ), - # dict( - # path_prefix="l2_unexpanded", - # OpT="cuvs::distance::detail::ops::l2_unexp_distance_op", - # archs = [60], - # ), - # dict( - # path_prefix="l_inf", - # OpT="cuvs::distance::detail::ops::l_inf_distance_op", - # archs = [60], - # ), - # dict( - # path_prefix="lp_unexpanded", - # OpT="cuvs::distance::detail::ops::lp_unexp_distance_op", - # archs = [60], - # ), - # dict( - # path_prefix="russel_rao", - # OpT="cuvs::distance::detail::ops::russel_rao_distance_op", - # archs = [60], - # ), + dict( + path_prefix="hamming_unexpanded", + OpT="cuvs::distance::detail::ops::hamming_distance_op", + archs = [60], + ), + dict( + path_prefix="hellinger_expanded", + OpT="cuvs::distance::detail::ops::hellinger_distance_op", + archs = [60], + ), + # inner product is handled by cublas. + dict( + path_prefix="jensen_shannon", + OpT="cuvs::distance::detail::ops::jensen_shannon_distance_op", + archs = [60], + ), + dict( + path_prefix="kl_divergence", + OpT="cuvs::distance::detail::ops::kl_divergence_op", + archs = [60], + ), + dict( + path_prefix="l1", + OpT="cuvs::distance::detail::ops::l1_distance_op", + archs = [60], + ), + dict( + path_prefix="l2_expanded", + OpT="cuvs::distance::detail::ops::l2_exp_distance_op", + archs = [60, 80], + ), + dict( + path_prefix="l2_unexpanded", + OpT="cuvs::distance::detail::ops::l2_unexp_distance_op", + archs = [60], + ), + dict( + path_prefix="l_inf", + OpT="cuvs::distance::detail::ops::l_inf_distance_op", + archs = [60], + ), + dict( + path_prefix="lp_unexpanded", + OpT="cuvs::distance::detail::ops::lp_unexp_distance_op", + archs = [60], + ), + dict( + path_prefix="russel_rao", + OpT="cuvs::distance::detail::ops::russel_rao_distance_op", + archs = [60], + ), ] def arch_headers(archs): @@ -183,49 +183,49 @@ def arch_headers(archs): f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") print(f"src/distance/detail/pairwise_matrix/{path}") -# # Dispatch kernels for with the RBF fin op. -# with open("dispatch_rbf.cu", "w") as f: -# OpT="cuvs::distance::detail::ops::l2_unexp_distance_op" -# archs = [60] +# Dispatch kernels for with the RBF fin op. +with open("dispatch_rbf.cu", "w") as f: + OpT="cuvs::distance::detail::ops::l2_unexp_distance_op" + archs = [60] -# f.write(header) -# f.write("#include \"../kernels/rbf_fin_op.cuh\" // rbf_fin_op\n") -# f.write(arch_headers(archs)) -# f.write(macro) + f.write(header) + f.write("#include \"../kernels/rbf_fin_op.cuh\" // rbf_fin_op\n") + f.write(arch_headers(archs)) + f.write(macro) -# for dt in data_type_instances: -# DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); -# IdxT = "int64_t" # overwrite IdxT + for dt in data_type_instances: + DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); + IdxT = "int64_t" # overwrite IdxT -# FinOpT = f"cuvs::distance::kernels::detail::rbf_fin_op<{DataT}>" -# f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") + FinOpT = f"cuvs::distance::kernels::detail::rbf_fin_op<{DataT}>" + f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") -# f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") + f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") -# print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu") + print("src/distance/detail/pairwise_matrix/dispatch_rbf.cu") # L2 with int64_t indices for kmeans code -# int64_t_op_instances = [ -# dict( -# path_prefix="l2_expanded", -# OpT="cuvs::distance::detail::ops::l2_exp_distance_op", -# archs = [60, 80], -# )] - -# for op in int64_t_op_instances: -# for dt in data_type_instances: -# DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); - -# IdxT = "int64_t" -# path = f"dispatch_{op['path_prefix']}_{DataT}_{AccT}_{OutT}_{IdxT}.cu" -# with open(path, "w") as f: -# f.write(header) -# f.write(arch_headers(op["archs"])) -# f.write(macro) - -# OpT = op['OpT'] -# FinOpT = "raft::identity_op" -# f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") -# f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") -# print(f"src/distance/detail/pairwise_matrix/{path}") +int64_t_op_instances = [ + dict( + path_prefix="l2_expanded", + OpT="cuvs::distance::detail::ops::l2_exp_distance_op", + archs = [60, 80], + )] + +for op in int64_t_op_instances: + for dt in data_type_instances: + DataT, AccT, OutT, IdxT = (dt[k] for k in ["DataT", "AccT", "OutT", "IdxT"]); + + IdxT = "int64_t" + path = f"dispatch_{op['path_prefix']}_{DataT}_{AccT}_{OutT}_{IdxT}.cu" + with open(path, "w") as f: + f.write(header) + f.write(arch_headers(op["archs"])) + f.write(macro) + + OpT = op['OpT'] + FinOpT = "raft::identity_op" + f.write(f"\ninstantiate_raft_distance_detail_pairwise_matrix_dispatch({OpT}, {DataT}, {AccT}, {OutT}, {FinOpT}, {IdxT});\n") + f.write("\n#undef instantiate_raft_distance_detail_pairwise_matrix_dispatch\n") + print(f"src/distance/detail/pairwise_matrix/{path}") From 696fdab82d5b1ec0304101e540d49970061869da Mon Sep 17 00:00:00 2001 From: aamijar Date: Tue, 3 Jun 2025 21:21:35 +0000 Subject: [PATCH 03/11] add gtests --- cpp/tests/CMakeLists.txt | 1 + cpp/tests/distance/dist_dice.cu | 112 +++++++++++++++++++++++++++ cpp/tests/distance/distance_base.cuh | 32 ++++++++ cpp/tests/test_utils.h | 17 ++++ 4 files changed, 162 insertions(+) create mode 100644 cpp/tests/distance/dist_dice.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b49b5acd7d..e2170e594f 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -228,6 +228,7 @@ if(BUILD_TESTS) distance/dist_canberra.cu distance/dist_correlation.cu distance/dist_cos.cu + distance/dist_dice.cu distance/dist_hamming.cu distance/dist_hellinger.cu distance/dist_inner_product.cu diff --git a/cpp/tests/distance/dist_dice.cu b/cpp/tests/distance/dist_dice.cu new file mode 100644 index 0000000000..36ac62ede8 --- /dev/null +++ b/cpp/tests/distance/dist_dice.cu @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2025, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "../test_utils.cuh" +#include "distance_base.cuh" + +namespace cuvs { +namespace distance { + +template +class DistanceExpDice : public DistanceTest { +}; + +template +class DistanceExpDiceXequalY + : public DistanceTestSameBuffer {}; + +const std::vector> inputsf = { + {0.001f, 128, (65536 + 128) * 128, 8, true, 1234ULL}, + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, (65536 + 128) * 128, 128, 8, false, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; + +const std::vector> inputsXeqYf = { + {0.01f, 1024, 1024, 32, true, 1234ULL}, + {0.01f, 1024, 32, 1024, true, 1234ULL}, + {0.01f, 32, 1024, 1024, true, 1234ULL}, + {0.03f, 1024, 1024, 1024, true, 1234ULL}, + {0.01f, 1024, 1024, 32, false, 1234ULL}, + {0.01f, 1024, 32, 1024, false, 1234ULL}, + {0.01f, 32, 1024, 1024, false, 1234ULL}, + {0.03f, 1024, 1024, 1024, false, 1234ULL}, +}; + +typedef DistanceExpDice DistanceExpDiceF; +TEST_P(DistanceExpDiceF, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApproxNaN(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceF, ::testing::ValuesIn(inputsf)); + +typedef DistanceExpDiceXequalY DistanceExpDiceXequalYF; +TEST_P(DistanceExpDiceXequalYF, Result) +{ + int m = params.m; + int n = params.m; + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[0].data(), + dist[0].data(), + m, + n, + cuvs::CompareApproxNaN(params.tolerance), + stream)); + n = params.isRowMajor ? m : m / 2; + m = params.isRowMajor ? m / 2 : m; + + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[1].data(), + dist[1].data(), + m, + n, + cuvs::CompareApproxNaN(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceXequalYF, ::testing::ValuesIn(inputsXeqYf)); + +const std::vector> inputsd = { + {0.001, 1024, 1024, 32, true, 1234ULL}, + {0.001, 1024, 32, 1024, true, 1234ULL}, + {0.001, 32, 1024, 1024, true, 1234ULL}, + {0.003, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; +typedef DistanceExpDice DistanceExpDiceD; +TEST_P(DistanceExpDiceD, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApproxNaN(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceD, ::testing::ValuesIn(inputsd)); + +class BigMatrixDice : public BigMatrixDistanceTest {}; +TEST_F(BigMatrixDice, Result) {} + +} // end namespace distance +} // end namespace cuvs diff --git a/cpp/tests/distance/distance_base.cuh b/cpp/tests/distance/distance_base.cuh index e9a7bd286a..88e646429f 100644 --- a/cpp/tests/distance/distance_base.cuh +++ b/cpp/tests/distance/distance_base.cuh @@ -140,6 +140,34 @@ RAFT_KERNEL naiveCosineDistanceKernel(OutputType* dist, dist[outidx] = (OutputType)1.0 - acc_ab / (raft::sqrt(acc_a) * raft::sqrt(acc_b)); } +template +RAFT_KERNEL naiveDiceDistanceKernel( + OutputType* dist, const DataType* x, const DataType* y, int m, int n, int k, bool isRowMajor) +{ + int midx = threadIdx.x + blockIdx.x * blockDim.x; + int nidx = threadIdx.y + blockIdx.y * blockDim.y; + if (midx >= m || nidx >= n) { return; } + + OutputType acc_a = OutputType(0); + OutputType acc_b = OutputType(0); + OutputType acc_ab = OutputType(0); + + for (int i = 0; i < k; ++i) { + int xidx = isRowMajor ? i + midx * k : i * m + midx; + int yidx = isRowMajor ? i + nidx * k : i * n + nidx; + auto a = half2float(x[xidx]); + auto b = half2float(y[yidx]); + acc_a += a; + acc_b += b; + acc_ab += a * b; + } + + int outidx = isRowMajor ? midx * n + nidx : midx + m * nidx; + + // Use 1.0 - (dice dissimilarity) to calc the distance + dist[outidx] = (OutputType)1.0 - (2 * acc_ab / ((acc_a) + (acc_b))); +} + template RAFT_KERNEL naiveInnerProductKernel(OutputType* dist, const DataType* x, @@ -443,6 +471,10 @@ void naiveDistance(OutputType* dist, naiveCorrelationDistanceKernel <<>>(dist, x, y, m, n, k, isRowMajor); break; + case cuvs::distance::DistanceType::DiceExpanded: + naiveDiceDistanceKernel + <<>>(dist, x, y, m, n, k, isRowMajor); + break; default: FAIL() << "should be here\n"; } RAFT_CUDA_TRY(cudaPeekAtLastError()); diff --git a/cpp/tests/test_utils.h b/cpp/tests/test_utils.h index ee4e553fa3..2dffd22961 100644 --- a/cpp/tests/test_utils.h +++ b/cpp/tests/test_utils.h @@ -64,6 +64,23 @@ struct CompareApprox { T eps; }; +template +struct CompareApproxNaN { + CompareApproxNaN(T eps_) : eps(eps_) {} + bool operator()(const T& a, const T& b) const + { + T diff = std::abs(a - b); + T m = std::max(std::abs(a), std::abs(b)); + T ratio = diff > eps ? diff / m : diff; + + if (std::isnan(a) && std::isnan(b)) { return true; } + return (ratio <= eps); + } + + private: + T eps; +}; + template ::std::ostream& operator<<(::std::ostream& os, const raft::KeyValuePair& kv) { From 5286b646aa87ecdb34cb1e6f4c3cbe5fb3da8338 Mon Sep 17 00:00:00 2001 From: aamijar Date: Wed, 4 Jun 2025 00:30:42 +0000 Subject: [PATCH 04/11] fixes for half types --- cpp/src/distance/detail/distance.cuh | 4 +- cpp/src/distance/detail/distance_ops/dice.cuh | 7 +- cpp/tests/distance/dist_dice.cu | 90 +++++++++++++++++-- cpp/tests/distance/distance_base.cuh | 6 +- 4 files changed, 94 insertions(+), 13 deletions(-) diff --git a/cpp/src/distance/detail/distance.cuh b/cpp/src/distance/detail/distance.cuh index 9914ee31af..f0cd9bdbf3 100644 --- a/cpp/src/distance/detail/distance.cuh +++ b/cpp/src/distance/detail/distance.cuh @@ -265,8 +265,8 @@ void distance_impl(raft::resources const& handle, cudaStream_t stream = raft::resource::get_cuda_stream(handle); - OutT* x_norm = workspace; - OutT* y_norm = workspace; + AccT* x_norm = workspace; + AccT* y_norm = workspace; // TODO: Column major case looks to have lower accuracy for X == Y, // perhaps the use of stridedSummationKernel could be causing this, // need to investigate and fix. diff --git a/cpp/src/distance/detail/distance_ops/dice.cuh b/cpp/src/distance/detail/distance_ops/dice.cuh index 27ce39cd4a..91b0bb48fd 100644 --- a/cpp/src/distance/detail/distance_ops/dice.cuh +++ b/cpp/src/distance/detail/distance_ops/dice.cuh @@ -55,16 +55,15 @@ struct dice_distance_op { template static constexpr size_t shared_mem_size() { - return Policy::SmemSize + ((Policy::Mblk + Policy::Nblk) * sizeof(DataT)); + return Policy::SmemSize + ((Policy::Mblk + Policy::Nblk) * sizeof(AccT)); } DI void core(AccT& acc, DataT& x, DataT& y) const { if constexpr ((std::is_same_v && std::is_same_v)) { - acc += __half2float(x != DataT(0) ? DataT(1) : DataT(0)) * - __half2float(y != DataT(0) ? DataT(1) : DataT(0)); + acc += __half2float(x) * __half2float(y); } else { - acc += x != DataT(0) ? DataT(1) : DataT(0) * y != DataT(0) ? DataT(1) : DataT(0); + acc += x * y; } }; diff --git a/cpp/tests/distance/dist_dice.cu b/cpp/tests/distance/dist_dice.cu index 36ac62ede8..a97e00541e 100644 --- a/cpp/tests/distance/dist_dice.cu +++ b/cpp/tests/distance/dist_dice.cu @@ -20,13 +20,15 @@ namespace cuvs { namespace distance { -template -class DistanceExpDice : public DistanceTest { -}; +template +class DistanceExpDice + : public DistanceTest {}; -template +template class DistanceExpDiceXequalY - : public DistanceTestSameBuffer {}; + : public DistanceTestSameBuffer {}; const std::vector> inputsf = { {0.001f, 128, (65536 + 128) * 128, 8, true, 1234ULL}, @@ -52,6 +54,17 @@ const std::vector> inputsXeqYf = { {0.03f, 1024, 1024, 1024, false, 1234ULL}, }; +const std::vector> inputsh = { + {0.001f, 1024, 1024, 32, true, 1234ULL}, + {0.001f, 1024, 32, 1024, true, 1234ULL}, + {0.001f, 32, 1024, 1024, true, 1234ULL}, + {0.003f, 1024, 1024, 1024, true, 1234ULL}, + {0.001f, 1024, 1024, 32, false, 1234ULL}, + {0.001f, 1024, 32, 1024, false, 1234ULL}, + {0.001f, 32, 1024, 1024, false, 1234ULL}, + {0.003f, 1024, 1024, 1024, false, 1234ULL}, +}; + typedef DistanceExpDice DistanceExpDiceF; TEST_P(DistanceExpDiceF, Result) { @@ -62,6 +75,16 @@ TEST_P(DistanceExpDiceF, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceF, ::testing::ValuesIn(inputsf)); +typedef DistanceExpDice DistanceExpDiceH; +TEST_P(DistanceExpDiceH, Result) +{ + int m = params.isRowMajor ? params.m : params.n; + int n = params.isRowMajor ? params.n : params.m; + ASSERT_TRUE(devArrMatch( + dist_ref.data(), dist.data(), m, n, cuvs::CompareApproxNaN(params.tolerance), stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceH, ::testing::ValuesIn(inputsh)); + typedef DistanceExpDiceXequalY DistanceExpDiceXequalYF; TEST_P(DistanceExpDiceXequalYF, Result) { @@ -85,6 +108,20 @@ TEST_P(DistanceExpDiceXequalYF, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceXequalYF, ::testing::ValuesIn(inputsXeqYf)); +typedef DistanceExpDiceXequalY DistanceExpDiceXequalYH; +TEST_P(DistanceExpDiceXequalYH, Result) +{ + int m = params.m; + int n = params.m; + ASSERT_TRUE(cuvs::devArrMatch(dist_ref[0].data(), + dist[0].data(), + m, + n, + cuvs::CompareApproxNaN(params.tolerance), + stream)); +} +INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceXequalYH, ::testing::ValuesIn(inputsh)); + const std::vector> inputsd = { {0.001, 1024, 1024, 32, true, 1234ULL}, {0.001, 1024, 32, 1024, true, 1234ULL}, @@ -108,5 +145,48 @@ INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceD, ::testing::ValuesIn(inp class BigMatrixDice : public BigMatrixDistanceTest {}; TEST_F(BigMatrixDice, Result) {} +// Simple test to verify half precision works +TEST(DistanceExpDiceH, SimpleHalfTest) +{ + raft::resources handle; + cudaStream_t stream = raft::resource::get_cuda_stream(handle); + + const int m = 32; + const int n = 32; + const int k = 16; + + rmm::device_uvector x(m * k, stream); + rmm::device_uvector y(n * k, stream); + rmm::device_uvector dist(m * n, stream); + + // Initialize with simple values + raft::random::RngState r(1234ULL); + uniform(handle, r, x.data(), m * k, half(0.0), half(1.0)); + uniform(handle, r, y.data(), n * k, half(0.0), half(1.0)); + + // Create views + auto x_v = + raft::make_device_matrix_view(x.data(), m, k); + auto y_v = + raft::make_device_matrix_view(y.data(), n, k); + auto dist_v = raft::make_device_matrix_view( + dist.data(), m, n); + + // Compute distance + cuvs::distance::pairwise_distance( + handle, x_v, y_v, dist_v, cuvs::distance::DistanceType::DiceExpanded, 2.0f); + + // Just verify it doesn't crash and produces reasonable values + raft::resource::sync_stream(handle); + + // Basic sanity check - distances should be in [0, 1] range for Dice + ASSERT_TRUE(devArrMatch( + 0.0f, + dist.data(), + m * n, + [](float expected, float actual) { return actual >= -0.1f && actual <= 1.1f; }, + stream)); +} + } // end namespace distance } // end namespace cuvs diff --git a/cpp/tests/distance/distance_base.cuh b/cpp/tests/distance/distance_base.cuh index 88e646429f..576121aeea 100644 --- a/cpp/tests/distance/distance_base.cuh +++ b/cpp/tests/distance/distance_base.cuh @@ -573,7 +573,8 @@ class DistanceTest : public ::testing::TestWithParam Date: Wed, 4 Jun 2025 00:34:58 +0000 Subject: [PATCH 05/11] remove unused --- cpp/tests/distance/dist_dice.cu | 43 --------------------------------- 1 file changed, 43 deletions(-) diff --git a/cpp/tests/distance/dist_dice.cu b/cpp/tests/distance/dist_dice.cu index a97e00541e..6f85fdc09e 100644 --- a/cpp/tests/distance/dist_dice.cu +++ b/cpp/tests/distance/dist_dice.cu @@ -145,48 +145,5 @@ INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceD, ::testing::ValuesIn(inp class BigMatrixDice : public BigMatrixDistanceTest {}; TEST_F(BigMatrixDice, Result) {} -// Simple test to verify half precision works -TEST(DistanceExpDiceH, SimpleHalfTest) -{ - raft::resources handle; - cudaStream_t stream = raft::resource::get_cuda_stream(handle); - - const int m = 32; - const int n = 32; - const int k = 16; - - rmm::device_uvector x(m * k, stream); - rmm::device_uvector y(n * k, stream); - rmm::device_uvector dist(m * n, stream); - - // Initialize with simple values - raft::random::RngState r(1234ULL); - uniform(handle, r, x.data(), m * k, half(0.0), half(1.0)); - uniform(handle, r, y.data(), n * k, half(0.0), half(1.0)); - - // Create views - auto x_v = - raft::make_device_matrix_view(x.data(), m, k); - auto y_v = - raft::make_device_matrix_view(y.data(), n, k); - auto dist_v = raft::make_device_matrix_view( - dist.data(), m, n); - - // Compute distance - cuvs::distance::pairwise_distance( - handle, x_v, y_v, dist_v, cuvs::distance::DistanceType::DiceExpanded, 2.0f); - - // Just verify it doesn't crash and produces reasonable values - raft::resource::sync_stream(handle); - - // Basic sanity check - distances should be in [0, 1] range for Dice - ASSERT_TRUE(devArrMatch( - 0.0f, - dist.data(), - m * n, - [](float expected, float actual) { return actual >= -0.1f && actual <= 1.1f; }, - stream)); -} - } // end namespace distance } // end namespace cuvs From 59cb596b811f8034d43c1aa597b33cf266e8f476 Mon Sep 17 00:00:00 2001 From: aamijar Date: Wed, 4 Jun 2025 22:00:16 +0000 Subject: [PATCH 06/11] fix double tests --- cpp/tests/distance/dist_dice.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/tests/distance/dist_dice.cu b/cpp/tests/distance/dist_dice.cu index 6f85fdc09e..23cc574621 100644 --- a/cpp/tests/distance/dist_dice.cu +++ b/cpp/tests/distance/dist_dice.cu @@ -122,7 +122,7 @@ TEST_P(DistanceExpDiceXequalYH, Result) } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceXequalYH, ::testing::ValuesIn(inputsh)); -const std::vector> inputsd = { +const std::vector> inputsd = { {0.001, 1024, 1024, 32, true, 1234ULL}, {0.001, 1024, 32, 1024, true, 1234ULL}, {0.001, 32, 1024, 1024, true, 1234ULL}, @@ -132,13 +132,13 @@ const std::vector> inputsd = { {0.001f, 32, 1024, 1024, false, 1234ULL}, {0.003f, 1024, 1024, 1024, false, 1234ULL}, }; -typedef DistanceExpDice DistanceExpDiceD; +typedef DistanceExpDice DistanceExpDiceD; TEST_P(DistanceExpDiceD, Result) { int m = params.isRowMajor ? params.m : params.n; int n = params.isRowMajor ? params.n : params.m; ASSERT_TRUE(devArrMatch( - dist_ref.data(), dist.data(), m, n, cuvs::CompareApproxNaN(params.tolerance), stream)); + dist_ref.data(), dist.data(), m, n, cuvs::CompareApproxNaN(params.tolerance), stream)); } INSTANTIATE_TEST_CASE_P(DistanceTests, DistanceExpDiceD, ::testing::ValuesIn(inputsd)); From 591c6cf2f9e1b942d83466d03ccb800f92786899 Mon Sep 17 00:00:00 2001 From: aamijar Date: Fri, 6 Jun 2025 21:23:36 +0000 Subject: [PATCH 07/11] update year --- cpp/src/distance/detail/distance.cuh | 2 +- cpp/src/distance/detail/distance_ops/all_ops.cuh | 2 +- cpp/src/distance/detail/distance_ops/dice.cuh | 2 +- cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh | 2 +- .../distance/detail/pairwise_matrix/dispatch_00_generate.py | 4 ++-- .../pairwise_matrix/dispatch_dice_double_double_double_int.cu | 2 +- .../pairwise_matrix/dispatch_dice_float_float_float_int.cu | 2 +- .../pairwise_matrix/dispatch_dice_half_float_float_int.cu | 2 +- cpp/src/distance/distance-ext.cuh | 2 +- cpp/src/distance/distance-inl.cuh | 2 +- cpp/src/distance/distance.cu | 2 +- cpp/tests/distance/distance_base.cuh | 2 +- cpp/tests/test_utils.h | 2 +- 13 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cpp/src/distance/detail/distance.cuh b/cpp/src/distance/detail/distance.cuh index f0cd9bdbf3..29b93c5056 100644 --- a/cpp/src/distance/detail/distance.cuh +++ b/cpp/src/distance/detail/distance.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/detail/distance_ops/all_ops.cuh b/cpp/src/distance/detail/distance_ops/all_ops.cuh index ce58b8a81c..049487faf1 100644 --- a/cpp/src/distance/detail/distance_ops/all_ops.cuh +++ b/cpp/src/distance/detail/distance_ops/all_ops.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/detail/distance_ops/dice.cuh b/cpp/src/distance/detail/distance_ops/dice.cuh index 91b0bb48fd..3ed5c78025 100644 --- a/cpp/src/distance/detail/distance_ops/dice.cuh +++ b/cpp/src/distance/detail/distance_ops/dice.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh index 3c3422f2fb..737fe6cb68 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py index 68d814c528..62808fed87 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ # NOTE: this template is not perfectly formatted. Use pre-commit to get # everything in shape again. header = """/* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu index b07e49f59a..6626e8ebd3 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu index 5f312cc634..4b308b54ca 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu index a7abcd8f07..45840fd6dc 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2024, NVIDIA CORPORATION. + * Copyright (c) 2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/distance-ext.cuh b/cpp/src/distance/distance-ext.cuh index 3582fb7182..893a755c2e 100644 --- a/cpp/src/distance/distance-ext.cuh +++ b/cpp/src/distance/distance-ext.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/distance-inl.cuh b/cpp/src/distance/distance-inl.cuh index 5e6ac54085..2307555c13 100644 --- a/cpp/src/distance/distance-inl.cuh +++ b/cpp/src/distance/distance-inl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/distance/distance.cu b/cpp/src/distance/distance.cu index facdca8858..26497238c1 100644 --- a/cpp/src/distance/distance.cu +++ b/cpp/src/distance/distance.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/distance/distance_base.cuh b/cpp/tests/distance/distance_base.cuh index 576121aeea..3d93da8c3f 100644 --- a/cpp/tests/distance/distance_base.cuh +++ b/cpp/tests/distance/distance_base.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/test_utils.h b/cpp/tests/test_utils.h index 2dffd22961..0111397239 100644 --- a/cpp/tests/test_utils.h +++ b/cpp/tests/test_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2024, NVIDIA CORPORATION. + * Copyright (c) 2018-2025, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From 18626b6dc8f835c07051be942053dc8c8740be27 Mon Sep 17 00:00:00 2001 From: aamijar Date: Thu, 12 Jun 2025 18:01:34 +0000 Subject: [PATCH 08/11] fix style --- cpp/CMakeLists.txt | 2 +- cpp/tests/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 29fcfcf2d8..3e28b7bc0a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2020-2024, NVIDIA CORPORATION. +# Copyright (c) 2020-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index e2170e594f..8784e75094 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2021-2024, NVIDIA CORPORATION. +# Copyright (c) 2021-2025, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at From 49156b342869e80362e0994209770c675b462d76 Mon Sep 17 00:00:00 2001 From: aamijar Date: Fri, 13 Jun 2025 04:54:40 +0000 Subject: [PATCH 09/11] fix norm --- cpp/src/distance/detail/distance.cuh | 46 ++++++++-------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/cpp/src/distance/detail/distance.cuh b/cpp/src/distance/detail/distance.cuh index 622bd041c6..adfeff7e40 100644 --- a/cpp/src/distance/detail/distance.cuh +++ b/cpp/src/distance/detail/distance.cuh @@ -258,41 +258,21 @@ void distance_impl(raft::resources const& handle, // perhaps the use of stridedSummationKernel could be causing this, // need to investigate and fix. if (x == y && is_row_major) { - raft::linalg::reduce(x_norm, - x, - k, - std::max(m, n), - (AccT)0, - is_row_major, - true, - stream, - false, - raft::identity_op(), - raft::add_op()); + raft::linalg::reduce( + x_norm, x, k, std::max(m, n), (AccT)0, stream, false, raft::identity_op(), raft::add_op()); } else { y_norm += m; - raft::linalg::reduce(x_norm, - x, - k, - m, - (AccT)0, - is_row_major, - true, - stream, - false, - raft::identity_op(), - raft::add_op()); - raft::linalg::reduce(y_norm, - y, - k, - n, - (AccT)0, - is_row_major, - true, - stream, - false, - raft::identity_op(), - raft::add_op()); + if (is_row_major) { + raft::linalg::reduce( + x_norm, x, k, m, (AccT)0, stream, false, raft::identity_op(), raft::add_op()); + raft::linalg::reduce( + y_norm, y, k, n, (AccT)0, stream, false, raft::identity_op(), raft::add_op()); + } else { + raft::linalg::reduce( + x_norm, x, k, m, (AccT)0, stream, false, raft::identity_op(), raft::add_op()); + raft::linalg::reduce( + y_norm, y, k, n, (AccT)0, stream, false, raft::identity_op(), raft::add_op()); + } } ops::dice_distance_op distance_op{}; From 36ebb145ed89082a6d030970f624d98a47b4832b Mon Sep 17 00:00:00 2001 From: aamijar Date: Tue, 16 Dec 2025 08:02:58 +0000 Subject: [PATCH 10/11] fix headers --- cpp/src/distance/detail/distance_ops/all_ops.cuh | 2 +- cpp/src/distance/detail/distance_ops/dice.cuh | 15 ++------------- .../detail/pairwise_matrix/dispatch-ext.cuh | 2 +- .../pairwise_matrix/dispatch_00_generate.py | 2 +- .../dispatch_dice_double_double_double_int.cu | 15 ++------------- .../dispatch_dice_float_float_float_int.cu | 15 ++------------- .../dispatch_dice_half_float_float_int.cu | 15 ++------------- cpp/src/distance/distance-ext.cuh | 2 +- cpp/src/distance/distance-inl.cuh | 2 +- cpp/src/distance/distance.cu | 2 +- cpp/tests/distance/dist_dice.cu | 15 ++------------- cpp/tests/distance/distance_base.cuh | 2 +- cpp/tests/test_utils.h | 2 +- 13 files changed, 18 insertions(+), 73 deletions(-) diff --git a/cpp/src/distance/detail/distance_ops/all_ops.cuh b/cpp/src/distance/detail/distance_ops/all_ops.cuh index e08c55b57b..134edca835 100644 --- a/cpp/src/distance/detail/distance_ops/all_ops.cuh +++ b/cpp/src/distance/detail/distance_ops/all_ops.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/src/distance/detail/distance_ops/dice.cuh b/cpp/src/distance/detail/distance_ops/dice.cuh index 3ed5c78025..d228c88d4e 100644 --- a/cpp/src/distance/detail/distance_ops/dice.cuh +++ b/cpp/src/distance/detail/distance_ops/dice.cuh @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh index 1f2fe9ccbb..1ade0f4af4 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch-ext.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2023, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2023-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py index 49e48192a8..b52d29fea8 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_00_generate.py @@ -84,7 +84,7 @@ dict( path_prefix="dice", OpT="cuvs::distance::detail::ops::dice_distance_op", - archs = [60, 80], + archs=[60, 80], ), dict( path_prefix="hamming_unexpanded", diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu index 6626e8ebd3..64e75f0bab 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_double_double_double_int.cu @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ /* diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu index 4b308b54ca..830c023b47 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_float_float_float_int.cu @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ /* diff --git a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu index 45840fd6dc..0aea26a846 100644 --- a/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu +++ b/cpp/src/distance/detail/pairwise_matrix/dispatch_dice_half_float_float_int.cu @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ /* diff --git a/cpp/src/distance/distance-ext.cuh b/cpp/src/distance/distance-ext.cuh index 6b9fb68ca9..f0155e3efa 100644 --- a/cpp/src/distance/distance-ext.cuh +++ b/cpp/src/distance/distance-ext.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/distance/distance-inl.cuh b/cpp/src/distance/distance-inl.cuh index 5025dcbaad..28b908ca7d 100644 --- a/cpp/src/distance/distance-inl.cuh +++ b/cpp/src/distance/distance-inl.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ #pragma once diff --git a/cpp/src/distance/distance.cu b/cpp/src/distance/distance.cu index fd9b70a934..7d88580b90 100644 --- a/cpp/src/distance/distance.cu +++ b/cpp/src/distance/distance.cu @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/tests/distance/dist_dice.cu b/cpp/tests/distance/dist_dice.cu index 23cc574621..0e0e314bc8 100644 --- a/cpp/tests/distance/dist_dice.cu +++ b/cpp/tests/distance/dist_dice.cu @@ -1,17 +1,6 @@ /* - * Copyright (c) 2025, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. + * SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 */ #include "../test_utils.cuh" diff --git a/cpp/tests/distance/distance_base.cuh b/cpp/tests/distance/distance_base.cuh index 864048fbdd..c1bd04fb7a 100644 --- a/cpp/tests/distance/distance_base.cuh +++ b/cpp/tests/distance/distance_base.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ diff --git a/cpp/tests/test_utils.h b/cpp/tests/test_utils.h index 305953db88..748b5462cc 100644 --- a/cpp/tests/test_utils.h +++ b/cpp/tests/test_utils.h @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2018-2024, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2018-2025, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ From 3e730b3b0c5b73adde9db827581fc9aa791ca6d5 Mon Sep 17 00:00:00 2001 From: aamijar Date: Tue, 16 Dec 2025 08:27:32 +0000 Subject: [PATCH 11/11] remove cutlass op --- cpp/src/distance/detail/distance_ops/dice.cuh | 16 ---------------- cpp/tests/CMakeLists.txt | 1 + 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/cpp/src/distance/detail/distance_ops/dice.cuh b/cpp/src/distance/detail/distance_ops/dice.cuh index d228c88d4e..b3dd8cc9bb 100644 --- a/cpp/src/distance/detail/distance_ops/dice.cuh +++ b/cpp/src/distance/detail/distance_ops/dice.cuh @@ -9,17 +9,6 @@ namespace cuvs::distance::detail::ops { -// Epilogue operator for CUTLASS based kernel -template -struct dice_cutlass_op { - __device__ dice_cutlass_op() noexcept {} - __device__ AccT operator()(AccT& aNorm, const AccT& bNorm, AccT& accVal) const noexcept - { - return static_cast(1.0) - static_cast(2 * accVal / (aNorm + bNorm)); - } - __device__ AccT operator()(DataT aData) const noexcept { return aData; } -}; - /** * @brief the expanded dice distance matrix calculation * @@ -71,11 +60,6 @@ struct dice_distance_op { } } } - - constexpr dice_cutlass_op get_cutlass_op() const - { - return dice_cutlass_op(); - } }; } // namespace cuvs::distance::detail::ops diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 198dd9b70c..67d61a4435 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -295,6 +295,7 @@ ConfigureTest( PATH distance/dist_canberra.cu distance/dist_correlation.cu distance/dist_cos.cu + distance/dist_dice.cu distance/dist_hamming.cu distance/dist_hellinger.cu distance/dist_inner_product.cu