Skip to content

Commit 66508db

Browse files
Continue to make KvikIO a shared library by moving code from hpp to cpp (#581)
In light of the initiative to make KvikiIO a shared library, this PR further separates the implementation from the interface as thoroughly as possible. This PR is marked "breaking" because: - The function `getenv_or` initially in the `detail` namespace has been moved outside to become part of the public API. cuDF uses this function and needs a timely code update. - Other classes and functions initially in the `detail` namespace for internal use only have been relocated to the `.cpp` files, so downstream applications that happen to use those entities will now see compile errors. Notes: - Functions initially prefixed with the attribute `[[nodiscard]]` in the header now have them in the declaration (.hpp) only, not in the definition (.cpp). - Classes or functions initially in the `detail` namespace from `X.hpp` are now moved to the unnamed namespace in the `X.cpp` files, unless they are used elsewhere (e.g. `Y.cpp`). Authors: - Tianyu Liu (https://github.com/kingcrimsontianyu) - Mads R. B. Kristensen (https://github.com/madsbk) Approvers: - Kyle Edwards (https://github.com/KyleFromNVIDIA) - Mads R. B. Kristensen (https://github.com/madsbk) URL: #581
1 parent 883971a commit 66508db

32 files changed

+2110
-1367
lines changed

cpp/CMakeLists.txt

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# =============================================================================
2-
# Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
# Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
#
44
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
55
# in compliance with the License. You may obtain a copy of the License at
@@ -131,7 +131,23 @@ include(cmake/thirdparty/get_thread_pool.cmake)
131131
# ##################################################################################################
132132
# * library targets --------------------------------------------------------------------------------
133133

134-
set(SOURCES "src/file_handle.cpp")
134+
set(SOURCES
135+
"src/batch.cpp"
136+
"src/bounce_buffer.cpp"
137+
"src/buffer.cpp"
138+
"src/cufile/config.cpp"
139+
"src/cufile/driver.cpp"
140+
"src/defaults.cpp"
141+
"src/error.cpp"
142+
"src/file_handle.cpp"
143+
"src/posix_io.cpp"
144+
"src/shim/cuda.cpp"
145+
"src/shim/cufile.cpp"
146+
"src/shim/libcurl.cpp"
147+
"src/shim/utils.cpp"
148+
"src/stream.cpp"
149+
"src/utils.cpp"
150+
)
135151

136152
if(KvikIO_REMOTE_SUPPORT)
137153
list(APPEND SOURCES "src/remote_handle.cpp")

cpp/include/kvikio/batch.hpp

Lines changed: 17 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2023-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -73,68 +73,30 @@ class BatchHandle {
7373
*
7474
* @param max_num_events The maximum number of operations supported by this instance.
7575
*/
76-
BatchHandle(int max_num_events) : _initialized{true}, _max_num_events{max_num_events}
77-
{
78-
CUFILE_TRY(cuFileAPI::instance().BatchIOSetUp(&_handle, max_num_events));
79-
}
76+
BatchHandle(int max_num_events);
8077

8178
/**
8279
* @brief BatchHandle support move semantic but isn't copyable
8380
*/
8481
BatchHandle(const BatchHandle&) = delete;
8582
BatchHandle& operator=(BatchHandle const&) = delete;
86-
BatchHandle(BatchHandle&& o) noexcept
87-
: _initialized{std::exchange(o._initialized, false)},
88-
_max_num_events{std::exchange(o._max_num_events, 0)}
89-
{
90-
_handle = std::exchange(o._handle, CUfileBatchHandle_t{});
91-
}
92-
~BatchHandle() noexcept { close(); }
83+
BatchHandle(BatchHandle&& o) noexcept;
84+
~BatchHandle() noexcept;
9385

94-
[[nodiscard]] bool closed() const noexcept { return !_initialized; }
86+
[[nodiscard]] bool closed() const noexcept;
9587

9688
/**
9789
* @brief Destroy the batch handle and free up resources
9890
*/
99-
void close() noexcept
100-
{
101-
if (closed()) { return; }
102-
_initialized = false;
103-
104-
cuFileAPI::instance().BatchIODestroy(_handle);
105-
}
91+
void close() noexcept;
10692

10793
/**
10894
* @brief Submit a vector of batch operations
10995
*
11096
* @param operations The vector of batch operations, which must not exceed the
11197
* `max_num_events`.
11298
*/
113-
void submit(const std::vector<BatchOp>& operations)
114-
{
115-
if (convert_size2ssize(operations.size()) > _max_num_events) {
116-
throw CUfileException("Cannot submit more than the max_num_events)");
117-
}
118-
std::vector<CUfileIOParams_t> io_batch_params;
119-
io_batch_params.reserve(operations.size());
120-
for (const auto& op : operations) {
121-
if (op.file_handle.is_compat_mode_preferred()) {
122-
throw CUfileException("Cannot submit a FileHandle opened in compatibility mode");
123-
}
124-
125-
io_batch_params.push_back(CUfileIOParams_t{.mode = CUFILE_BATCH,
126-
.u = {.batch = {.devPtr_base = op.devPtr_base,
127-
.file_offset = op.file_offset,
128-
.devPtr_offset = op.devPtr_offset,
129-
.size = op.size}},
130-
.fh = op.file_handle.handle(),
131-
.opcode = op.opcode,
132-
.cookie = nullptr});
133-
}
134-
135-
CUFILE_TRY(cuFileAPI::instance().BatchIOSubmit(
136-
_handle, io_batch_params.size(), io_batch_params.data(), 0));
137-
}
99+
void submit(const std::vector<BatchOp>& operations);
138100

139101
/**
140102
* @brief Get status of submitted operations
@@ -148,16 +110,9 @@ class BatchHandle {
148110
*/
149111
std::vector<CUfileIOEvents_t> status(unsigned min_nr,
150112
unsigned max_nr,
151-
struct timespec* timeout = nullptr)
152-
{
153-
std::vector<CUfileIOEvents_t> ret;
154-
ret.resize(_max_num_events);
155-
CUFILE_TRY(cuFileAPI::instance().BatchIOGetStatus(_handle, min_nr, &max_nr, &ret[0], timeout));
156-
ret.resize(max_nr);
157-
return ret;
158-
}
159-
160-
void cancel() { CUFILE_TRY(cuFileAPI::instance().BatchIOCancel(_handle)); }
113+
struct timespec* timeout = nullptr);
114+
115+
void cancel();
161116
};
162117

163118
#else
@@ -166,24 +121,19 @@ class BatchHandle {
166121
public:
167122
BatchHandle() noexcept = default;
168123

169-
BatchHandle(int max_num_events)
170-
{
171-
throw CUfileException("BatchHandle requires cuFile's batch API, please build with CUDA v12.1+");
172-
}
124+
BatchHandle(int max_num_events);
173125

174-
[[nodiscard]] bool closed() const noexcept { return true; }
126+
[[nodiscard]] bool closed() const noexcept;
175127

176-
void close() noexcept {}
128+
void close() noexcept;
177129

178-
void submit(const std::vector<BatchOp>& operations) {}
130+
void submit(const std::vector<BatchOp>& operations);
179131

180132
std::vector<CUfileIOEvents_t> status(unsigned min_nr,
181133
unsigned max_nr,
182-
struct timespec* timeout = nullptr)
183-
{
184-
return std::vector<CUfileIOEvents_t>{};
185-
}
186-
void cancel() {}
134+
struct timespec* timeout = nullptr);
135+
136+
void cancel();
187137
};
188138

189139
#endif

cpp/include/kvikio/bounce_buffer.hpp

Lines changed: 14 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -15,7 +15,6 @@
1515
*/
1616
#pragma once
1717

18-
#include <mutex>
1918
#include <stack>
2019

2120
#include <kvikio/defaults.hpp>
@@ -47,18 +46,15 @@ class AllocRetain {
4746
std::size_t const _size;
4847

4948
public:
50-
Alloc(AllocRetain* manager, void* alloc, std::size_t size)
51-
: _manager(manager), _alloc{alloc}, _size{size}
52-
{
53-
}
49+
Alloc(AllocRetain* manager, void* alloc, std::size_t size);
5450
Alloc(Alloc const&) = delete;
5551
Alloc& operator=(Alloc const&) = delete;
5652
Alloc(Alloc&& o) = delete;
5753
Alloc& operator=(Alloc&& o) = delete;
58-
~Alloc() noexcept { _manager->put(_alloc, _size); }
59-
void* get() noexcept { return _alloc; }
60-
void* get(std::ptrdiff_t offset) noexcept { return static_cast<char*>(_alloc) + offset; }
61-
std::size_t size() noexcept { return _size; }
54+
~Alloc() noexcept;
55+
void* get() noexcept;
56+
void* get(std::ptrdiff_t offset) noexcept;
57+
std::size_t size() noexcept;
6258
};
6359

6460
AllocRetain() = default;
@@ -77,80 +73,28 @@ class AllocRetain {
7773
*
7874
* @return The number of bytes cleared
7975
*/
80-
std::size_t _clear()
81-
{
82-
std::size_t ret = _free_allocs.size() * _size;
83-
while (!_free_allocs.empty()) {
84-
CUDA_DRIVER_TRY(cudaAPI::instance().MemFreeHost(_free_allocs.top()));
85-
_free_allocs.pop();
86-
}
87-
return ret;
88-
}
76+
std::size_t _clear();
8977

9078
/**
9179
* @brief Ensure the sizes of the retained allocations match `defaults::bounce_buffer_size()`
9280
*
9381
* NB: `_mutex` must be taken prior to calling this function.
9482
*/
95-
void _ensure_alloc_size()
96-
{
97-
auto const bounce_buffer_size = defaults::bounce_buffer_size();
98-
if (_size != bounce_buffer_size) {
99-
_clear();
100-
_size = bounce_buffer_size;
101-
}
102-
}
83+
void _ensure_alloc_size();
10384

10485
public:
105-
[[nodiscard]] Alloc get()
106-
{
107-
std::lock_guard const lock(_mutex);
108-
_ensure_alloc_size();
109-
110-
// Check if we have an allocation available
111-
if (!_free_allocs.empty()) {
112-
void* ret = _free_allocs.top();
113-
_free_allocs.pop();
114-
return Alloc(this, ret, _size);
115-
}
116-
117-
// If no available allocation, allocate and register a new one
118-
void* alloc{};
119-
// Allocate page-locked host memory
120-
CUDA_DRIVER_TRY(cudaAPI::instance().MemHostAlloc(&alloc, _size, CU_MEMHOSTREGISTER_PORTABLE));
121-
return Alloc(this, alloc, _size);
122-
}
123-
124-
void put(void* alloc, std::size_t size)
125-
{
126-
std::lock_guard const lock(_mutex);
127-
_ensure_alloc_size();
128-
129-
// If the size of `alloc` matches the sizes of the retained allocations,
130-
// it is added to the set of free allocation otherwise it is freed.
131-
if (size == _size) {
132-
_free_allocs.push(alloc);
133-
} else {
134-
CUDA_DRIVER_TRY(cudaAPI::instance().MemFreeHost(alloc));
135-
}
136-
}
86+
[[nodiscard]] Alloc get();
87+
88+
void put(void* alloc, std::size_t size);
13789

13890
/**
13991
* @brief Free all retained allocations
14092
*
14193
* @return The number of bytes cleared
14294
*/
143-
std::size_t clear()
144-
{
145-
std::lock_guard const lock(_mutex);
146-
return _clear();
147-
}
148-
149-
KVIKIO_EXPORT static AllocRetain& instance()
150-
{
151-
static AllocRetain _instance;
152-
return _instance;
153-
}
95+
std::size_t clear();
96+
97+
KVIKIO_EXPORT static AllocRetain& instance();
15498

15599
AllocRetain(AllocRetain const&) = delete;
156100
AllocRetain& operator=(AllocRetain const&) = delete;

cpp/include/kvikio/buffer.hpp

Lines changed: 10 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -15,17 +15,8 @@
1515
*/
1616
#pragma once
1717

18-
#include <algorithm>
19-
#include <iostream>
20-
#include <map>
2118
#include <vector>
2219

23-
#include <kvikio/defaults.hpp>
24-
#include <kvikio/error.hpp>
25-
#include <kvikio/shim/cufile.hpp>
26-
#include <kvikio/shim/cufile_h_wrapper.hpp>
27-
#include <kvikio/utils.hpp>
28-
2920
namespace kvikio {
3021

3122
/**
@@ -44,32 +35,17 @@ namespace kvikio {
4435
* streaming buffer that is reused across multiple cuFile IO operations.
4536
*/
4637
/*NOLINTNEXTLINE(readability-function-cognitive-complexity)*/
47-
inline void buffer_register(const void* devPtr_base,
48-
std::size_t size,
49-
int flags = 0,
50-
const std::vector<int>& errors_to_ignore = std::vector<int>())
51-
{
52-
if (defaults::is_compat_mode_preferred()) { return; }
53-
CUfileError_t status = cuFileAPI::instance().BufRegister(devPtr_base, size, flags);
54-
if (status.err != CU_FILE_SUCCESS) {
55-
// Check if `status.err` is in `errors_to_ignore`
56-
if (std::find(errors_to_ignore.begin(), errors_to_ignore.end(), status.err) ==
57-
errors_to_ignore.end()) {
58-
CUFILE_TRY(status);
59-
}
60-
}
61-
}
38+
void buffer_register(const void* devPtr_base,
39+
std::size_t size,
40+
int flags = 0,
41+
const std::vector<int>& errors_to_ignore = std::vector<int>());
6242

6343
/**
6444
* @brief deregister an already registered device memory from cuFile
6545
*
6646
* @param devPtr_base device pointer to deregister
6747
*/
68-
inline void buffer_deregister(const void* devPtr_base)
69-
{
70-
if (defaults::is_compat_mode_preferred()) { return; }
71-
CUFILE_TRY(cuFileAPI::instance().BufDeregister(devPtr_base));
72-
}
48+
void buffer_deregister(const void* devPtr_base);
7349

7450
/**
7551
* @brief Register device memory allocation which is part of devPtr. Use this
@@ -85,23 +61,15 @@ inline void buffer_deregister(const void* devPtr_base)
8561
* @warning This API is intended for usecases where the memory is used as
8662
* streaming buffer that is reused across multiple cuFile IO operations.
8763
*/
88-
inline void memory_register(const void* devPtr,
89-
int flags = 0,
90-
const std::vector<int>& errors_to_ignore = {})
91-
{
92-
auto [base, nbytes, offset] = get_alloc_info(devPtr);
93-
buffer_register(base, nbytes, flags, errors_to_ignore);
94-
}
64+
void memory_register(const void* devPtr,
65+
int flags = 0,
66+
const std::vector<int>& errors_to_ignore = {});
9567

9668
/**
9769
* @brief deregister an already registered device memory from cuFile.
9870
*
9971
* @param devPtr device pointer to deregister
10072
*/
101-
inline void memory_deregister(const void* devPtr)
102-
{
103-
auto [base, nbytes, offset] = get_alloc_info(devPtr);
104-
buffer_deregister(base);
105-
}
73+
void memory_deregister(const void* devPtr);
10674

10775
} // namespace kvikio

0 commit comments

Comments
 (0)