-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpinned_memory.zig
More file actions
40 lines (31 loc) · 1.18 KB
/
pinned_memory.zig
File metadata and controls
40 lines (31 loc) · 1.18 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
// examples/kernel/10_Integration/D_MemoryManagement/pinned_memory.zig
// Reference: cuda-samples/0_Introduction/simpleZeroCopy
// API: driver.allocPinned, freePinned, memcpyHtoDAsync
const cuda = @import("zcuda");
const driver = cuda.driver;
/// Pinned (page-locked) memory for faster H2D/D2H transfers.
pub fn main() !void {
var ctx = try driver.CudaContext.new(0);
defer ctx.deinit();
var stream = try ctx.newStream();
defer stream.deinit();
const n: u32 = 4096;
// Allocate pinned host memory
var h_pinned = try driver.allocPinned(f32, n);
defer driver.freePinned(h_pinned);
// Initialize
for (0..n) |i| h_pinned[i] = @floatFromInt(i);
// Allocate device memory
var d_buf = try stream.alloc(f32, n);
defer d_buf.deinit();
// Pinned H→D transfer (faster than pageable memory)
try stream.memcpyHtoDAsync(f32, d_buf, h_pinned[0..n]);
// Pinned D→H transfer
var h_result = try driver.allocPinned(f32, n);
defer driver.freePinned(h_result);
try stream.memcpyDtoHAsync(f32, h_result[0..n], d_buf);
try stream.sync();
for (0..n) |i| {
if (h_result[i] != h_pinned[i]) return error.PinnedCopyFailed;
}
}