Skip to content

Commit

Permalink
Merge pull request #18 from bobcao3/master
Browse files Browse the repository at this point in the history
KMT handles on Windows & Better protection for on-the-fly GPU resources
  • Loading branch information
MCRcortex authored Sep 23, 2023
2 parents 230fee3 + 23815f5 commit 9ee80c0
Show file tree
Hide file tree
Showing 3 changed files with 224 additions and 188 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import org.lwjgl.vulkan.*;

import java.util.*;
import java.util.concurrent.LinkedBlockingDeque;

import static org.lwjgl.system.MemoryStack.stackPush;
import static org.lwjgl.util.vma.Vma.VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
Expand Down Expand Up @@ -130,7 +129,7 @@ public void buildTLAS(VSemaphore semIn, VSemaphore semOut, VSemaphore[] blocking

var buildInfo = VkAccelerationStructureBuildGeometryInfoKHR.calloc(1, stack)
.sType$Default()
.mode(VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR)//TODO: explore using VK_BUILD_ACCELERATION_STRUCTURE_MODE_UPDATE_KHR to speedup build times
.mode(VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR)
.type(VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR)
.pGeometries(geometries)
.geometryCount(geometries.capacity());
Expand Down Expand Up @@ -335,7 +334,11 @@ private final class TLASSectionManager extends TLASGeometryManager {
private int setCapacity = 0;

private record DescUpdateJob(int binding, int dstArrayElement, List<VBuffer> buffers) {}
private final LinkedBlockingDeque<DescUpdateJob> descUpdateJobs = new LinkedBlockingDeque<>();
private record ArenaDeallocJob(int index, int count, List<VBuffer> geometryBuffers) {}

private final Deque<DescUpdateJob> descUpdateJobs = new ArrayDeque<>();
private final Deque<ArenaDeallocJob> arenaDeallocJobs = new ArrayDeque<>();
private final Deque<VDescriptorPool> descPoolsToRelease = new ArrayDeque<>();

public void resizeBindlessSet(int newSize, VFence fence) {
if (geometryBufferSetLayout == null) {
Expand Down Expand Up @@ -364,13 +367,7 @@ public void resizeBindlessSet(int newSize, VFence fence) {
vkUpdateDescriptorSets(context.device, null, setCopy);
}

// This breaks the shit out of it
// context.sync.addCallback(fence, () -> {
// geometryBufferDescPool.free();
// });

vkDeviceWaitIdle(context.device);
geometryBufferDescPool.free();
descPoolsToRelease.add(geometryBufferDescPool);
}

geometryBufferDescPool = newGeometryBufferDescPool;
Expand All @@ -396,6 +393,9 @@ public void setGeometryUpdateMemory(VCmdBuff cmd, VFence fence, VkAccelerationSt
dub.buffer(job.binding, job.dstArrayElement, job.buffers);
}
dub.apply();

// Queue up the arena dealloc jobs to be done after the fence is done
Vulkanite.INSTANCE.addSyncedCallback(() -> { fenceTick(); });
}

//TODO: mixinto RenderSection and add a reference to a holder for us, its much faster than a hashmap
Expand All @@ -415,6 +415,17 @@ private Holder(int id, RenderSection section) {

Map<ChunkSectionPos, Holder> tmp = new HashMap<>();

public void fenceTick() {
while (!arenaDeallocJobs.isEmpty()) {
var job = arenaDeallocJobs.poll();
arena.free(job.index, job.count);
job.geometryBuffers.forEach(buffer -> buffer.free());
}
while (!descPoolsToRelease.isEmpty()) {
descPoolsToRelease.poll().free();
}
}

public void update(AccelerationBlasBuilder.BLASBuildResult result) {
var data = result.data();
var holder = tmp.computeIfAbsent(data.section().getPosition(), a -> new Holder(alloc(), data.section()));
Expand All @@ -424,8 +435,7 @@ public void update(AccelerationBlasBuilder.BLASBuildResult result) {
holder.structure = result.structure();

if (holder.geometryIndex != -1) {
arena.free(holder.geometryIndex, holder.geometryBuffers.size());
holder.geometryBuffers.forEach(buffer -> Vulkanite.INSTANCE.addSyncedCallback(buffer::free));
arenaDeallocJobs.add(new ArenaDeallocJob(holder.geometryIndex, holder.geometryBuffers.size(), holder.geometryBuffers));
}
holder.geometryBuffers = data.geometryBuffers();
holder.geometryIndex = arena.allocate(holder.geometryBuffers.size());
Expand Down Expand Up @@ -454,10 +464,15 @@ public void remove(RenderSection section) {

free(holder.id);

for (var job : descUpdateJobs) {
if (job.buffers == holder.geometryBuffers) {
descUpdateJobs.remove(job);
}
}

if (holder.geometryIndex != -1) {
arena.free(holder.geometryIndex, holder.geometryBuffers.size());
arenaDeallocJobs.add(new ArenaDeallocJob(holder.geometryIndex, holder.geometryBuffers.size(), holder.geometryBuffers));
}
holder.geometryBuffers.forEach(buffer -> Vulkanite.INSTANCE.addSyncedCallback(buffer::free));
}
}

Expand Down
58 changes: 15 additions & 43 deletions src/main/java/me/cortex/vulkanite/lib/memory/MemoryManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import static org.lwjgl.opengl.EXTMemoryObjectFD.GL_HANDLE_TYPE_OPAQUE_FD_EXT;
import static org.lwjgl.opengl.EXTMemoryObjectFD.glImportMemoryFdEXT;
import static org.lwjgl.opengl.EXTMemoryObjectWin32.glImportMemoryWin32HandleEXT;
import static org.lwjgl.opengl.EXTMemoryObjectWin32.GL_HANDLE_TYPE_OPAQUE_WIN32_KMT_EXT;
import static org.lwjgl.opengl.EXTSemaphoreWin32.GL_HANDLE_TYPE_OPAQUE_WIN32_EXT;
import static org.lwjgl.opengl.GL11C.*;
import static org.lwjgl.opengl.GL12.GL_TEXTURE_3D;
Expand All @@ -35,31 +36,20 @@
import static org.lwjgl.vulkan.VK10.*;
import static org.lwjgl.vulkan.VK11.VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
import static org.lwjgl.vulkan.VK11.VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT;
import static org.lwjgl.vulkan.VK11.VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT;

public class MemoryManager {
private static final int EXTERNAL_MEMORY_HANDLE_TYPE = Vulkanite.IS_WINDOWS?VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT:VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
private static final int EXTERNAL_MEMORY_HANDLE_TYPE = Vulkanite.IS_WINDOWS?VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT:VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT;
private final VkDevice device;
private final VmaAllocator allocator;
private final VmaAllocator.MemoryPool sharedBlocked;
private final VmaAllocator.MemoryPool sharedDedicated;
private final boolean hasDeviceAddresses;

private static final long sharedBlockSize = 64L << 20L; // 64 MB

public MemoryManager(VkDevice device, boolean hasDeviceAddresses) {
this.device = device;
this.hasDeviceAddresses = hasDeviceAddresses;
allocator = new VmaAllocator(device, hasDeviceAddresses);
//Note: this technically creates a memory leak, since we never free it, however
// memory manager should never be created more than once per application, so it should bo ok
sharedBlocked = allocator.createPool(sharedBlockSize,
VkExportMemoryAllocateInfo.calloc()
.sType$Default()
.handleTypes(EXTERNAL_MEMORY_HANDLE_TYPE));
sharedDedicated = allocator.createPool(0,
VkExportMemoryAllocateInfo.calloc()
.sType$Default()
.handleTypes(EXTERNAL_MEMORY_HANDLE_TYPE));
allocator = new VmaAllocator(device, this.hasDeviceAddresses, sharedBlockSize, EXTERNAL_MEMORY_HANDLE_TYPE);
}

public class ExternalMemoryTracker {
Expand All @@ -83,7 +73,7 @@ public static int acquire(VmaAllocator.Allocation allocation, VkDevice device) {
_CHECK_(vkGetMemoryWin32HandleKHR(device, VkMemoryGetWin32HandleInfoKHR.calloc(stack)
.sType$Default()
.memory(vkMemory)
.handleType(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT), pb));
.handleType(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT_BIT), pb));
nativeHandle = pb.get(0);
} else {
IntBuffer pb = stack.callocInt(1);
Expand All @@ -104,7 +94,7 @@ public static int acquire(VmaAllocator.Allocation allocation, VkDevice device) {
if (Vulkanite.IS_WINDOWS) {
glImportMemoryWin32HandleEXT(newMemoryObject,
memorySize,
GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, nativeHandle);
GL_HANDLE_TYPE_OPAQUE_WIN32_KMT_EXT, nativeHandle);
_CHECK_GL_ERROR_();
} else {
glImportMemoryFdEXT(newMemoryObject, memorySize,
Expand Down Expand Up @@ -142,16 +132,16 @@ public static void release(long memory) {
glDeleteMemoryObjectsEXT(tracked.desc.glMemoryObj);
_CHECK_GL_ERROR_();
if (Vulkanite.IS_WINDOWS) {
if (!Kernel32.INSTANCE.CloseHandle(new WinNT.HANDLE(new Pointer(tracked.desc.handle)))) {
int error = Kernel32.INSTANCE.GetLastError();
System.err.println("STATE MIGHT BE BROKEN! Failed to close handle: " + error);
// throw new IllegalStateException();
}
// if (!Kernel32.INSTANCE.CloseHandle(new WinNT.HANDLE(new Pointer(tracked.desc.handle)))) {
// int error = Kernel32.INSTANCE.GetLastError();
// System.err.println("STATE MIGHT BE BROKEN! Failed to close handle: " + error);
// throw new IllegalStateException();
// }
} else {
int code = 0;
if ((code = LibC.INSTANCE.close((int) tracked.desc.handle)) != 0) {
System.err.println("STATE MIGHT BE BROKEN! Failed to close FD: " + code);
// throw new IllegalStateException();
throw new IllegalStateException();
}
}
MEMORY_TO_HANDLES.remove(memory);
Expand All @@ -163,12 +153,7 @@ public static void release(long memory) {
};

public VGBuffer createSharedBuffer(long size, int usage, int properties) {
return createSharedBuffer(size, usage, properties, 0);
}
public VGBuffer createSharedBuffer(long size, int usage, int properties, int alignment) {
try (var stack = stackPush()) {
VmaAllocator.BufferAllocation alloc = null;

var bufferCreateInfo = VkBufferCreateInfo
.calloc(stack)
.sType$Default()
Expand All @@ -177,16 +162,11 @@ public VGBuffer createSharedBuffer(long size, int usage, int properties, int ali
.pNext(VkExternalMemoryBufferCreateInfo.calloc(stack)
.sType$Default()
.handleTypes(EXTERNAL_MEMORY_HANDLE_TYPE));

var allocationCreateInfo = VmaAllocationCreateInfo.calloc(stack)
.usage(VMA_MEMORY_USAGE_AUTO)
.requiredFlags(properties);

if (size > sharedBlockSize) {
allocationCreateInfo.flags(VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT);
alloc = sharedDedicated.alloc(bufferCreateInfo, allocationCreateInfo, alignment);
} else {
alloc = sharedBlocked.alloc(bufferCreateInfo, allocationCreateInfo, alignment);
}
var alloc = allocator.allocShared(bufferCreateInfo, allocationCreateInfo);

int memoryObject = ExternalMemoryTracker.acquire(alloc, device);

Expand Down Expand Up @@ -229,17 +209,9 @@ public VGImage createSharedImage(int width, int height, int depth, int mipLevels
createInfo.extent().width(width).height(height).depth(depth);

var allocInfo = VmaAllocationCreateInfo.calloc(stack)
.usage(VMA_MEMORY_USAGE_AUTO)
.requiredFlags(properties);

VmaAllocator.ImageAllocation alloc = null;

try {
alloc = sharedBlocked.alloc(createInfo, allocInfo);
} catch(AssertionError e) {
allocInfo.flags(VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT);
alloc = sharedDedicated.alloc(createInfo, allocInfo);
}
var alloc = allocator.allocShared(createInfo, allocInfo);

int memoryObject = ExternalMemoryTracker.acquire(alloc, device);

Expand Down
Loading

0 comments on commit 9ee80c0

Please sign in to comment.