Driver Module

Device management, memory allocation, kernel launch, streams, events, and graphs.

Import: const driver = @import("zcuda").driver;

CudaContext

Entry point for all CUDA operations. Manages the device, streams, memory, and modules.

Creation & Lifecycle

fn new(ordinal: usize) !*CudaContext;    // Create context on device N
fn deinit(self: *const Self) void;       // Release context
fn bindToThread(self) !void;             // Bind context to current thread

Device Info

fn deviceCount() !i32;                           // Number of CUDA devices
fn name(self) []const u8;                         // Device name
fn uuid(self) !CUuuid;                            // Device UUID
fn computeCapability(self) !struct{major, minor}; // SM version
fn totalMem(self) !usize;                         // Total memory (bytes)
fn attribute(self, attr) !i32;                     // Query device attribute
fn getOrdinal(self) usize;                         // Device ordinal index

Memory Info & Limits

fn freeMem(self) !usize;                          // Free memory (bytes)
fn memInfo(self) !struct{free, total};             // Free/total memory
fn getLimit(self, limit) !usize;                   // Query context limit
fn setLimit(self, limit, value) !void;             // Set context limit
fn getCacheConfig(self) !CUfunc_cache;             // L1/shared preference
fn setCacheConfig(self, config) !void;             // Set L1/shared preference
fn setBlockingSynchronize(self) !void;             // Enable blocking sync
fn synchronize(self) !void;                        // Synchronize context

Stream & Module Management

fn defaultStream(self) *const CudaStream;          // Default stream
fn newStream(self) !CudaStream;                    // Create non-blocking stream
fn loadModule(self, ptx) !CudaModule;              // Load PTX module
fn createEvent(self, flags) !CudaEvent;            // Create event
fn allocManaged(self, T, len) !CudaSlice(T);       // Unified memory

CudaStream

Asynchronous execution stream for memory operations and kernel launches.

Memory Operations

fn alloc(T, allocator, n) !CudaSlice(T);          // Allocate device memory
fn allocZeros(T, allocator, n) !CudaSlice(T);     // Allocate + zero-fill
fn cloneHtoD(T, host_slice) !CudaSlice(T);        // Host → Device copy
fn memcpyHtoD(T, dst, src) !void;                  // Copy host → device
fn memcpyDtoH(T, dst, src) !void;                  // Copy device → host
fn cloneDtoH(T, allocator, src) ![]T;              // Clone device → new host buf
fn memcpyDtoD(T, dst, src) !void;                  // Copy device → device
fn memcpyHtoDAsync(T, dst, src) !void;             // Async host → device
fn memcpyDtoHAsync(T, dst, src) !void;             // Async device → host
fn memcpyDtoDAsync(T, dst, src) !void;             // Async device → device

Kernel Launch

fn launch(func, config, args) !void;               // Launch kernel

Synchronization & Events

fn synchronize(self) !void;                        // Wait for all operations
fn waitEvent(self, event) !void;                    // Wait for event
fn query(self) !bool;                              // Non-blocking completion check
fn createEvent(self, flags) !CudaEvent;            // Create event
fn recordEvent(self, event) !void;                 // Record event

Unified Memory & Graph Capture

fn prefetchAsync(T, slice) !void;                  // Prefetch to device
fn beginCapture(self) !void;                       // Begin graph capture
fn endCapture(self) !?CudaGraph;                   // End capture → executable graph
fn captureStatus(self) !CUstreamCaptureStatus;     // Query capture status

CudaSlice(T)

Typed, owning device memory (analogous to Vec<T> on GPU).

fn deinit(self) void;                              // Free device memory
fn slice(self, start, end) CudaView(T);            // Immutable sub-view
fn sliceMut(self, start, end) CudaViewMut(T);      // Mutable sub-view
fn devicePtr(self) DevicePtr(T);                    // Get typed device pointer

CudaView(T) / CudaViewMut(T)

Non-owning views into device memory (analogous to []const T / []T).

fn devicePtr(self) DevicePtr(T);                    // Get typed device pointer
fn subView(self, start, end) Self;                  // Create sub-view

CudaModule / CudaFunction

// CudaModule
fn deinit(self) void;                              // Unload module
fn getFunction(self, name) !CudaFunction;          // Get kernel by name

// CudaFunction
fn getAttribute(self, attrib) !i32;                // Query function attribute

CudaEvent

fn deinit(self) void;                              // Destroy event
fn record(self, stream) !void;                     // Record on stream
fn synchronize(self) !void;                        // Wait for event
fn elapsedTime(start, end) !f32;                   // Milliseconds between events
fn query(self) !bool;                              // Non-blocking completion check

CudaGraph

fn launch(self) !void;                             // Replay recorded graph
fn deinit(self) void;                              // Destroy graph

Shared Types

const Dim3 = struct { x: u32 = 1, y: u32 = 1, z: u32 = 1 };

const LaunchConfig = struct {
    grid_dim: Dim3,
    block_dim: Dim3,
    shared_mem_bytes: u32,

    fn forNumElems(n: u32) LaunchConfig;            // Auto-configure for N elements
    fn forNumElemsCustom(n: u32, tpb: u32) LaunchConfig;
};

const DevicePtr = fn(T: type) struct { ptr: usize };

Example

const cuda = @import("zcuda");

const ctx = try cuda.driver.CudaContext.new(0);
defer ctx.deinit();

const stream = ctx.defaultStream();
const data = try stream.cloneHtoD(f32, &[_]f32{ 1.0, 2.0, 3.0 });
defer data.deinit();

var result: [3]f32 = undefined;
try stream.memcpyDtoH(f32, &result, data);

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Driver Module

CudaContext

Creation & Lifecycle

Device Info

Memory Info & Limits

Stream & Module Management

CudaStream

Memory Operations

Kernel Launch

Synchronization & Events

Unified Memory & Graph Capture

CudaSlice(T)

CudaView(T) / CudaViewMut(T)

CudaModule / CudaFunction

CudaEvent

CudaGraph

Shared Types

Example

FilesExpand file tree

README.md

Latest commit

History

README.md

File metadata and controls

Driver Module

CudaContext

Creation & Lifecycle

Device Info

Memory Info & Limits

Stream & Module Management

CudaStream

Memory Operations

Kernel Launch

Synchronization & Events

Unified Memory & Graph Capture

CudaSlice(T)

CudaView(T) / CudaViewMut(T)

CudaModule / CudaFunction

CudaEvent

CudaGraph

Shared Types

Example