Skip to content

Commit 9459d78

Browse files
committed
Merge remote-tracking branch 'origin/branch-25.02' into rhdong/bitset-to-csr-dev
2 parents 7ddd5cc + 5c826d7 commit 9459d78

File tree

7 files changed

+213
-39
lines changed

7 files changed

+213
-39
lines changed

cpp/include/raft/core/bitmap.hpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,18 @@ struct bitmap_view : public bitset_view<bitmap_t, index_t> {
5353
* @param bitmap_ptr Device raw pointer
5454
* @param rows Number of row in the matrix.
5555
* @param cols Number of col in the matrix.
56+
* @param original_nbits Original number of bits used when the bitmap was created, to handle
57+
* potential mismatches of data types. This is useful for using ANN indexes when a bitmap was
58+
* originally created with a different data type than the ones currently supported in cuVS ANN
59+
* indexes.
5660
*/
57-
_RAFT_HOST_DEVICE bitmap_view(bitmap_t* bitmap_ptr, index_t rows, index_t cols)
58-
: bitset_view<bitmap_t, index_t>(bitmap_ptr, rows * cols), rows_(rows), cols_(cols)
61+
_RAFT_HOST_DEVICE bitmap_view(bitmap_t* bitmap_ptr,
62+
index_t rows,
63+
index_t cols,
64+
index_t original_nbits = 0)
65+
: bitset_view<bitmap_t, index_t>(bitmap_ptr, rows * cols, original_nbits),
66+
rows_(rows),
67+
cols_(cols)
5968
{
6069
}
6170

@@ -65,11 +74,18 @@ struct bitmap_view : public bitset_view<bitmap_t, index_t> {
6574
* @param bitmap_span Device vector view of the bitmap
6675
* @param rows Number of row in the matrix.
6776
* @param cols Number of col in the matrix.
77+
* @param original_nbits Original number of bits used when the bitmap was created, to handle
78+
* potential mismatches of data types. This is useful for using ANN indexes when a bitmap was
79+
* originally created with a different data type than the ones currently supported in cuVS ANN
80+
* indexes.
6881
*/
6982
_RAFT_HOST_DEVICE bitmap_view(raft::device_vector_view<bitmap_t, index_t> bitmap_span,
7083
index_t rows,
71-
index_t cols)
72-
: bitset_view<bitmap_t, index_t>(bitmap_span, rows * cols), rows_(rows), cols_(cols)
84+
index_t cols,
85+
index_t original_nbits = 0)
86+
: bitset_view<bitmap_t, index_t>(bitmap_span, rows * cols, original_nbits),
87+
rows_(rows),
88+
cols_(cols)
7389
{
7490
}
7591

cpp/include/raft/core/bitset.cuh

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,41 @@
3333

3434
namespace raft::core {
3535

36+
template <typename index_t>
37+
_RAFT_HOST_DEVICE void inline compute_original_nbits_position(const index_t original_nbits,
38+
const index_t nbits,
39+
const index_t sample_index,
40+
index_t& new_bit_index,
41+
index_t& new_bit_offset)
42+
{
43+
const index_t original_bit_index = sample_index / original_nbits;
44+
const index_t original_bit_offset = sample_index % original_nbits;
45+
new_bit_index = original_bit_index * original_nbits / nbits;
46+
new_bit_offset = 0;
47+
if (original_nbits > nbits) {
48+
new_bit_index += original_bit_offset / nbits;
49+
new_bit_offset = original_bit_offset % nbits;
50+
} else {
51+
index_t ratio = nbits / original_nbits;
52+
new_bit_offset += (original_bit_index % ratio) * original_nbits;
53+
new_bit_offset += original_bit_offset % nbits;
54+
}
55+
}
56+
3657
template <typename bitset_t, typename index_t>
3758
_RAFT_HOST_DEVICE inline bool bitset_view<bitset_t, index_t>::test(const index_t sample_index) const
3859
{
39-
const bitset_t bit_element = bitset_ptr_[sample_index / bitset_element_size];
40-
const index_t bit_index = sample_index % bitset_element_size;
41-
const bool is_bit_set = (bit_element & (bitset_t{1} << bit_index)) != 0;
60+
const index_t nbits = sizeof(bitset_t) * 8;
61+
index_t bit_index = 0;
62+
index_t bit_offset = 0;
63+
if (original_nbits_ == 0 || nbits == original_nbits_) {
64+
bit_index = sample_index / bitset_element_size;
65+
bit_offset = sample_index % bitset_element_size;
66+
} else {
67+
compute_original_nbits_position(original_nbits_, nbits, sample_index, bit_index, bit_offset);
68+
}
69+
const bitset_t bit_element = bitset_ptr_[bit_index];
70+
const bool is_bit_set = (bit_element & (bitset_t{1} << bit_offset)) != 0;
4271
return is_bit_set;
4372
}
4473

@@ -52,14 +81,22 @@ template <typename bitset_t, typename index_t>
5281
_RAFT_DEVICE void bitset_view<bitset_t, index_t>::set(const index_t sample_index,
5382
bool set_value) const
5483
{
55-
const index_t bit_element = sample_index / bitset_element_size;
56-
const index_t bit_index = sample_index % bitset_element_size;
57-
const bitset_t bitmask = bitset_t{1} << bit_index;
84+
const index_t nbits = sizeof(bitset_t) * 8;
85+
index_t bit_index = 0;
86+
index_t bit_offset = 0;
87+
88+
if (original_nbits_ == 0 || nbits == original_nbits_) {
89+
bit_index = sample_index / bitset_element_size;
90+
bit_offset = sample_index % bitset_element_size;
91+
} else {
92+
compute_original_nbits_position(original_nbits_, nbits, sample_index, bit_index, bit_offset);
93+
}
94+
const bitset_t bitmask = bitset_t{1} << bit_offset;
5895
if (set_value) {
59-
atomicOr(bitset_ptr_ + bit_element, bitmask);
96+
atomicOr(bitset_ptr_ + bit_index, bitmask);
6097
} else {
6198
const bitset_t bitmask2 = ~bitmask;
62-
atomicAnd(bitset_ptr_ + bit_element, bitmask2);
99+
atomicAnd(bitset_ptr_ + bit_index, bitmask2);
63100
}
64101
}
65102

cpp/include/raft/core/bitset.hpp

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,38 @@ template <typename bitset_t = uint32_t, typename index_t = uint32_t>
4242
struct bitset_view {
4343
static constexpr index_t bitset_element_size = sizeof(bitset_t) * 8;
4444

45-
_RAFT_HOST_DEVICE bitset_view(bitset_t* bitset_ptr, index_t bitset_len)
46-
: bitset_ptr_{bitset_ptr}, bitset_len_{bitset_len}
45+
/**
46+
* @brief Create a bitset view from a device pointer to the bitset.
47+
*
48+
* @param bitset_ptr Device pointer to the bitset
49+
* @param bitset_len Number of bits in the bitset
50+
* @param original_nbits Original number of bits used when the bitset was created, to handle
51+
* potential mismatches of data types. This is useful for using ANN indexes when a bitset was
52+
* originally created with a different data type than the ones currently supported in cuVS ANN
53+
* indexes.
54+
*/
55+
_RAFT_HOST_DEVICE bitset_view(bitset_t* bitset_ptr,
56+
index_t bitset_len,
57+
index_t original_nbits = 0)
58+
: bitset_ptr_{bitset_ptr}, bitset_len_{bitset_len}, original_nbits_{original_nbits}
4759
{
4860
}
4961
/**
5062
* @brief Create a bitset view from a device vector view of the bitset.
5163
*
5264
* @param bitset_span Device vector view of the bitset
5365
* @param bitset_len Number of bits in the bitset
66+
* @param original_nbits Original number of bits used when the bitset was created, to handle
67+
* potential mismatches of data types. This is useful for using ANN indexes when a bitset was
68+
* originally created with a different data type than the ones currently supported in cuVS ANN
69+
* indexes.
5470
*/
5571
_RAFT_HOST_DEVICE bitset_view(raft::device_vector_view<bitset_t, index_t> bitset_span,
56-
index_t bitset_len)
57-
: bitset_ptr_{bitset_span.data_handle()}, bitset_len_{bitset_len}
72+
index_t bitset_len,
73+
index_t original_nbits = 0)
74+
: bitset_ptr_{bitset_span.data_handle()},
75+
bitset_len_{bitset_len},
76+
original_nbits_{original_nbits}
5877
{
5978
}
6079
/**
@@ -180,6 +199,12 @@ struct bitset_view {
180199
return (bitset_len + bits_per_element - 1) / bits_per_element;
181200
}
182201

202+
/**
203+
* @brief Get the original number of bits of the bitset.
204+
*/
205+
auto get_original_nbits() const -> index_t { return original_nbits_; }
206+
void set_original_nbits(index_t original_nbits) { original_nbits_ = original_nbits; }
207+
183208
/**
184209
* @brief Converts to a Compressed Sparse Row (CSR) format matrix.
185210
*
@@ -246,6 +271,7 @@ struct bitset_view {
246271
private:
247272
bitset_t* bitset_ptr_;
248273
index_t bitset_len_;
274+
index_t original_nbits_;
249275
};
250276

251277
/**

cpp/include/raft/sparse/detail/coo.cuh

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ class COO {
182182
* @param n_rows: number of rows in the dense matrix
183183
* @param n_cols: number of columns in the dense matrix
184184
*/
185-
void setSize(int n_rows, int n_cols)
185+
void setSize(Index_Type n_rows, Index_Type n_cols)
186186
{
187187
this->n_rows = n_rows;
188188
this->n_cols = n_cols;
@@ -192,7 +192,7 @@ class COO {
192192
* @brief Set the number of rows and cols for a square dense matrix
193193
* @param n: number of rows and cols
194194
*/
195-
void setSize(int n)
195+
void setSize(Index_Type n)
196196
{
197197
this->n_rows = n;
198198
this->n_cols = n;
@@ -204,7 +204,10 @@ class COO {
204204
* @param init: should values be initialized to 0?
205205
* @param stream: CUDA stream to use
206206
*/
207-
void allocate(int nnz, bool init, cudaStream_t stream) { this->allocate(nnz, 0, init, stream); }
207+
void allocate(Index_Type nnz, bool init, cudaStream_t stream)
208+
{
209+
this->allocate(nnz, 0, init, stream);
210+
}
208211

209212
/**
210213
* @brief Allocate the underlying arrays
@@ -213,7 +216,7 @@ class COO {
213216
* @param init: should values be initialized to 0?
214217
* @param stream: CUDA stream to use
215218
*/
216-
void allocate(int nnz, int size, bool init, cudaStream_t stream)
219+
void allocate(Index_Type nnz, Index_Type size, bool init, cudaStream_t stream)
217220
{
218221
this->allocate(nnz, size, size, init, stream);
219222
}
@@ -226,7 +229,8 @@ class COO {
226229
* @param init: should values be initialized to 0?
227230
* @param stream: stream to use for init
228231
*/
229-
void allocate(int nnz, int n_rows, int n_cols, bool init, cudaStream_t stream)
232+
void allocate(
233+
Index_Type nnz, Index_Type n_rows, Index_Type n_cols, bool init, cudaStream_t stream)
230234
{
231235
this->n_rows = n_rows;
232236
this->n_cols = n_cols;

cpp/include/raft/sparse/solver/detail/lanczos.cuh

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -624,7 +624,7 @@ static int lanczosRestart(raft::resources const& handle,
624624
value_type_t* shifts_host;
625625

626626
// Orthonormal matrix for similarity transform
627-
value_type_t* V_dev = work_dev + n * iter;
627+
value_type_t* V_dev = work_dev + (size_t)n * (size_t)iter;
628628

629629
// -------------------------------------------------------
630630
// Implementation
@@ -641,7 +641,7 @@ static int lanczosRestart(raft::resources const& handle,
641641
// std::cout <<std::endl;
642642

643643
// Initialize similarity transform with identity matrix
644-
memset(V_host, 0, iter * iter * sizeof(value_type_t));
644+
memset(V_host, 0, (size_t)iter * (size_t)iter * (size_t)sizeof(value_type_t));
645645
for (i = 0; i < iter; ++i)
646646
V_host[IDX(i, i, iter)] = 1;
647647

@@ -679,8 +679,11 @@ static int lanczosRestart(raft::resources const& handle,
679679
WARNING("error in implicitly shifted QR algorithm");
680680

681681
// Obtain new residual
682-
RAFT_CUDA_TRY(cudaMemcpyAsync(
683-
V_dev, V_host, iter * iter * sizeof(value_type_t), cudaMemcpyHostToDevice, stream));
682+
RAFT_CUDA_TRY(cudaMemcpyAsync(V_dev,
683+
V_host,
684+
(size_t)iter * (size_t)iter * (size_t)sizeof(value_type_t),
685+
cudaMemcpyHostToDevice,
686+
stream));
684687

685688
beta_host[iter - 1] = beta_host[iter - 1] * V_host[IDX(iter - 1, iter_new - 1, iter)];
686689
RAFT_CUBLAS_TRY(raft::linalg::detail::cublasgemv(cublas_h,
@@ -716,7 +719,7 @@ static int lanczosRestart(raft::resources const& handle,
716719

717720
RAFT_CUDA_TRY(cudaMemcpyAsync(lanczosVecs_dev,
718721
work_dev,
719-
n * iter_new * sizeof(value_type_t),
722+
(size_t)n * (size_t)iter_new * (size_t)sizeof(value_type_t),
720723
cudaMemcpyDeviceToDevice,
721724
stream));
722725

@@ -1045,10 +1048,10 @@ int computeSmallestEigenvectors(
10451048
unsigned long long seed = 1234567)
10461049
{
10471050
// Matrix dimension
1048-
index_type_t n = A.nrows_;
1051+
size_t n = A.nrows_;
10491052

10501053
// Check that parameters are valid
1051-
RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors.");
1054+
RAFT_EXPECTS(nEigVecs > 0 && (size_t)nEigVecs <= n, "Invalid number of eigenvectors.");
10521055
RAFT_EXPECTS(restartIter > 0, "Invalid restartIter.");
10531056
RAFT_EXPECTS(tol > 0, "Invalid tolerance.");
10541057
RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");
@@ -1395,10 +1398,10 @@ int computeLargestEigenvectors(
13951398
unsigned long long seed = 123456)
13961399
{
13971400
// Matrix dimension
1398-
index_type_t n = A.nrows_;
1401+
size_t n = A.nrows_;
13991402

14001403
// Check that parameters are valid
1401-
RAFT_EXPECTS(nEigVecs > 0 && nEigVecs <= n, "Invalid number of eigenvectors.");
1404+
RAFT_EXPECTS(nEigVecs > 0 && (size_t)nEigVecs <= n, "Invalid number of eigenvectors.");
14021405
RAFT_EXPECTS(restartIter > 0, "Invalid restartIter.");
14031406
RAFT_EXPECTS(tol > 0, "Invalid tolerance.");
14041407
RAFT_EXPECTS(maxIter >= nEigVecs, "Invalid maxIter.");

cpp/include/raft/spectral/detail/matrix_wrappers.hpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -39,14 +39,14 @@
3939
// =========================================================
4040

4141
// Get index of matrix entry
42-
#define IDX(i, j, lda) ((i) + (j) * (lda))
42+
#define IDX(i, j, lda) ((size_t)(i) + (j) * (lda))
4343

4444
namespace raft {
4545
namespace spectral {
4646
namespace matrix {
4747
namespace detail {
4848

49-
using size_type = int; // for now; TODO: move it in appropriate header
49+
using size_type = size_t; // for now; TODO: move it in appropriate header
5050

5151
// Apply diagonal matrix to vector:
5252
//
@@ -326,7 +326,7 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
326326
raft_handle, row_offsets, col_indices, values, nrows, nnz),
327327
diagonal_(raft_handle, nrows)
328328
{
329-
vector_t<value_type> ones{raft_handle, nrows};
329+
vector_t<value_type> ones{raft_handle, (size_t)nrows};
330330
ones.fill(1.0);
331331
sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
332332
}
@@ -341,7 +341,7 @@ struct laplacian_matrix_t : sparse_matrix_t<index_type, value_type> {
341341
csr_m.nnz_),
342342
diagonal_(raft_handle, csr_m.nrows_)
343343
{
344-
vector_t<value_type> ones{raft_handle, csr_m.nrows_};
344+
vector_t<value_type> ones{raft_handle, (size_t)csr_m.nrows_};
345345
ones.fill(1.0);
346346
sparse_matrix_t<index_type, value_type>::mv(1, ones.raw(), 0, diagonal_.raw());
347347
}

0 commit comments

Comments
 (0)