From 40448d1dc84e589c1f1632c9a0e48821143975c0 Mon Sep 17 00:00:00 2001 From: Cheng Cao Date: Wed, 20 Sep 2023 21:57:05 -0700 Subject: [PATCH] BINDLESS GEOMETRY BUFFERS! --- .../acceleration/AccelerationBlasBuilder.java | 2 +- .../acceleration/AccelerationManager.java | 9 +- .../acceleration/AccelerationTLASManager.java | 109 ++++++++++++++---- .../client/rendering/VulkanPipeline.java | 5 +- .../DescriptorSetLayoutBuilder.java | 19 +++ .../descriptors/DescriptorUpdateBuilder.java | 22 ++++ .../lib/descriptors/VDescriptorPool.java | 42 ++++++- 7 files changed, 178 insertions(+), 30 deletions(-) diff --git a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationBlasBuilder.java b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationBlasBuilder.java index a49d9fc..c3cb5d1 100644 --- a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationBlasBuilder.java +++ b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationBlasBuilder.java @@ -349,7 +349,7 @@ private void run() { private VBuffer uploadTerrainGeometry(BuiltSectionMeshParts meshParts, VCmdBuff cmd) { var buff = context.memory.createBuffer(meshParts.getVertexData().getLength(), - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); cmd.encodeDataUpload(context.memory, MemoryUtil.memAddress(meshParts.getVertexData().getDirectBuffer()), buff, 0, meshParts.getVertexData().getLength()); diff --git a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationManager.java b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationManager.java index 998f42e..9a06609 100644 --- a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationManager.java +++ b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationManager.java @@ -1,6 +1,7 @@ package me.cortex.vulkanite.acceleration; import me.cortex.vulkanite.lib.base.VContext; +import me.cortex.vulkanite.lib.descriptors.VDescriptorSetLayout; import me.cortex.vulkanite.lib.memory.VAccelerationStructure; import me.cortex.vulkanite.lib.memory.VBuffer; import me.cortex.vulkanite.lib.other.sync.VSemaphore; @@ -72,7 +73,11 @@ public void cleanup() { tlasManager.cleanupTick(); } - public VBuffer getReferenceBuffer() { - return tlasManager.getReferenceBuffer(); + public long getGeometrySet() { + return tlasManager.getGeometrySet(); + } + + public VDescriptorSetLayout getGeometryLayout() { + return tlasManager.getGeometryLayout(); } } diff --git a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationTLASManager.java b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationTLASManager.java index 648c101..7cb5cd8 100644 --- a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationTLASManager.java +++ b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationTLASManager.java @@ -7,25 +7,29 @@ import me.cortex.vulkanite.lib.base.VContext; import me.cortex.vulkanite.lib.cmd.VCmdBuff; import me.cortex.vulkanite.lib.cmd.VCommandPool; +import me.cortex.vulkanite.lib.descriptors.DescriptorSetLayoutBuilder; +import me.cortex.vulkanite.lib.descriptors.DescriptorUpdateBuilder; +import me.cortex.vulkanite.lib.descriptors.VDescriptorPool; +import me.cortex.vulkanite.lib.descriptors.VDescriptorSetLayout; import me.cortex.vulkanite.lib.memory.VAccelerationStructure; import me.cortex.vulkanite.lib.memory.VBuffer; import me.cortex.vulkanite.lib.other.sync.VFence; import me.cortex.vulkanite.lib.other.sync.VSemaphore; import me.jellysquid.mods.sodium.client.render.chunk.RenderSection; import net.minecraft.util.math.ChunkSectionPos; -import net.minecraft.world.chunk.ChunkSection; import org.joml.Matrix4x3f; import org.lwjgl.system.MemoryUtil; import org.lwjgl.vulkan.*; import java.util.*; +import java.util.concurrent.LinkedBlockingDeque; import static org.lwjgl.system.MemoryStack.stackPush; import static org.lwjgl.util.vma.Vma.VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; import static org.lwjgl.vulkan.KHRAccelerationStructure.*; import static org.lwjgl.vulkan.KHRBufferDeviceAddress.VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR; import static org.lwjgl.vulkan.VK10.*; -import static org.lwjgl.vulkan.VK12.VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; +import static org.lwjgl.vulkan.VK12.*; public class AccelerationTLASManager { private final TLASSectionManager buildDataManager = new TLASSectionManager(); @@ -41,6 +45,7 @@ public AccelerationTLASManager(VContext context, int queue) { this.context = context; this.queue = queue; this.singleUsePool = context.cmd.createSingleUsePool(); + this.buildDataManager.resizeBindlessSet(0, null); } //Returns a sync semaphore to chain in the next command submit @@ -309,25 +314,88 @@ protected void update(int id, VkAccelerationStructureInstanceKHR data) { } } + private static int roundUpPow2(int v) { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; + } + private final class TLASSectionManager extends TLASGeometryManager { private final TlasPointerArena arena = new TlasPointerArena(30000); - private final long arrayRef = MemoryUtil.nmemCalloc(30000 * 3, 8); - public VBuffer geometryReferenceBuffer; + + private VDescriptorSetLayout geometryBufferSetLayout; + private VDescriptorPool geometryBufferDescPool; + private long geometryBufferDescSet = 0; + + private int setCapacity = 0; + + private record DescUpdateJob(int binding, int dstArrayElement, List buffers) {} + private final LinkedBlockingDeque descUpdateJobs = new LinkedBlockingDeque<>(); + + public void resizeBindlessSet(int newSize, VFence fence) { + if (geometryBufferSetLayout == null) { + var layoutBuilder = new DescriptorSetLayoutBuilder(VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT); + layoutBuilder.binding(0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 65536, VK_SHADER_STAGE_ALL); + layoutBuilder.setBindingFlags(0, VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT); + geometryBufferSetLayout = layoutBuilder.build(context); + } + + if (newSize > setCapacity) { + int newCapacity = roundUpPow2(Math.max(newSize, 32)); + var newGeometryBufferDescPool = new VDescriptorPool(context, VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT, 1, newCapacity, geometryBufferSetLayout.types); + newGeometryBufferDescPool.allocateSets(geometryBufferSetLayout, new int[]{newCapacity}); + long newGeometryBufferDescSet = newGeometryBufferDescPool.get(0); + + System.out.println("New geometry desc set: " + Long.toHexString(newGeometryBufferDescSet) + " with capacity " + newCapacity); + + if (geometryBufferDescSet != 0) { + try (var stack = stackPush()) { + var setCopy = VkCopyDescriptorSet.calloc(1, stack); + setCopy.get(0) + .sType$Default() + .srcSet(geometryBufferDescSet) + .dstSet(newGeometryBufferDescSet) + .descriptorCount(setCapacity); + vkUpdateDescriptorSets(context.device, null, setCopy); + } + + // This breaks the shit out of it + // context.sync.addCallback(fence, () -> { + // geometryBufferDescPool.free(); + // }); + + vkDeviceWaitIdle(context.device); + geometryBufferDescPool.free(); + } + + geometryBufferDescPool = newGeometryBufferDescPool; + geometryBufferDescSet = newGeometryBufferDescSet; + setCapacity = newCapacity; + } + + } @Override public void setGeometryUpdateMemory(VCmdBuff cmd, VFence fence, VkAccelerationStructureGeometryKHR struct) { super.setGeometryUpdateMemory(cmd, fence, struct); - var ref = geometryReferenceBuffer; - if (ref != null) { - context.sync.addCallback(fence, ref::free); + resizeBindlessSet(arena.maxIndex, fence); + + if (descUpdateJobs.isEmpty()) { + return; } - geometryReferenceBuffer = context.memory.createBuffer(8L * arena.maxIndex, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_HEAP_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - 0, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT); - long ptr = geometryReferenceBuffer.map(); - MemoryUtil.memCopy(arrayRef, ptr, 8L * arena.maxIndex); - geometryReferenceBuffer.unmap(); + + var dub = new DescriptorUpdateBuilder(context, descUpdateJobs.size()); + dub.set(geometryBufferDescSet); + while (!descUpdateJobs.isEmpty()) { + var job = descUpdateJobs.poll(); + dub.buffer(job.binding, job.dstArrayElement, job.buffers); + } + dub.apply(); } //TODO: mixinto RenderSection and add a reference to a holder for us, its much faster than a hashmap @@ -362,10 +430,7 @@ public void update(AccelerationBlasBuilder.BLASBuildResult result) { holder.geometryBuffers = data.geometryBuffers(); holder.geometryIndex = arena.allocate(holder.geometryBuffers.size()); - for (int i = 0; i < holder.geometryBuffers.size(); i++) { - MemoryUtil.memPutAddress(arrayRef + 8L*(holder.geometryIndex+i), holder.geometryBuffers.get(i).deviceAddress()); - } - + descUpdateJobs.add(new DescUpdateJob(0, holder.geometryIndex, holder.geometryBuffers)); try (var stack = stackPush()) { var asi = VkAccelerationStructureInstanceKHR.calloc(stack) @@ -432,8 +497,12 @@ public void free(int pos, int count) { } } - public VBuffer getReferenceBuffer() { - return buildDataManager.geometryReferenceBuffer; + public long getGeometrySet() { + return buildDataManager.geometryBufferDescSet; + } + + public VDescriptorSetLayout getGeometryLayout() { + return buildDataManager.geometryBufferSetLayout; } //Called for cleaning up any remaining loose resources diff --git a/src/main/java/me/cortex/vulkanite/client/rendering/VulkanPipeline.java b/src/main/java/me/cortex/vulkanite/client/rendering/VulkanPipeline.java index 0951594..3f0e3e7 100644 --- a/src/main/java/me/cortex/vulkanite/client/rendering/VulkanPipeline.java +++ b/src/main/java/me/cortex/vulkanite/client/rendering/VulkanPipeline.java @@ -147,7 +147,7 @@ public VulkanPipeline(VContext ctx, AccelerationManager accelerationManager, Ray raytracePipelines = new VRaytracePipeline[passes.length]; for (int i = 0; i < passes.length; i++) { - var builder = new RaytracePipelineBuilder().addLayout(layout); + var builder = new RaytracePipelineBuilder().addLayout(layout).addLayout(accelerationManager.getGeometryLayout()); passes[i].apply(builder); raytracePipelines[i] = builder.build(ctx, 1); } @@ -233,7 +233,6 @@ public void renderPostShadows(VGImage outImg, Camera camera, ShaderStorageBuffer .set(desc) .uniform(0, uboBuffer) .acceleration(1, tlas) - .buffer(2, accelerationManager.getReferenceBuffer()) .imageStore(3, composite0mainView.getView(()->outImg)) .imageSampler(4, blockAtlasView.getView(), sampler) .imageSampler(5, blockAtlasNormalView.getView(), sampler) @@ -263,7 +262,7 @@ public void renderPostShadows(VGImage outImg, Camera camera, ShaderStorageBuffer for (var pipeline : raytracePipelines) { pipeline.bind(cmd); - pipeline.bindDSet(cmd, desc); + pipeline.bindDSet(cmd, desc, accelerationManager.getGeometrySet()); pipeline.trace(cmd, outImg.width, outImg.height, 1); } diff --git a/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorSetLayoutBuilder.java b/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorSetLayoutBuilder.java index 425b039..d1e6148 100644 --- a/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorSetLayoutBuilder.java +++ b/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorSetLayoutBuilder.java @@ -5,8 +5,10 @@ import org.lwjgl.system.MemoryUtil; import org.lwjgl.vulkan.VkDescriptorSetLayoutBinding; import org.lwjgl.vulkan.VkDescriptorSetLayoutCreateInfo; +import org.lwjgl.vulkan.VkDescriptorSetLayoutBindingFlagsCreateInfo; import java.nio.LongBuffer; +import java.util.HashMap; import static me.cortex.vulkanite.lib.other.VUtil._CHECK_; import static org.lwjgl.system.MemoryStack.stackPush; @@ -14,6 +16,7 @@ public class DescriptorSetLayoutBuilder { private IntArrayList types = new IntArrayList(); + private HashMap bindingFlagsMap = new HashMap<>(); private VkDescriptorSetLayoutBinding.Buffer bindings = VkDescriptorSetLayoutBinding.calloc(0); public DescriptorSetLayoutBuilder binding(int binding, int type, int count, int stages) { bindings = VkDescriptorSetLayoutBinding.create(MemoryUtil.nmemRealloc(bindings.address(), (bindings.capacity() + 1L) * VkDescriptorSetLayoutBinding.SIZEOF), bindings.capacity() + 1); @@ -30,6 +33,10 @@ public DescriptorSetLayoutBuilder binding(int type, int stages) { return binding(bindings.capacity(), type, stages); } + public void setBindingFlags(int binding, int flag) { + bindingFlagsMap.put(binding, flag); + } + int flags; public DescriptorSetLayoutBuilder() { this(0); @@ -45,6 +52,18 @@ public VDescriptorSetLayout build(VContext ctx) { .pBindings(bindings) .flags(flags); + if (!bindingFlagsMap.isEmpty()) { + var bindingFlags = new int[bindings.remaining()]; + for (var i = 0; i < bindings.remaining(); i++) { + bindingFlags[i] = bindingFlagsMap.getOrDefault(i, 0); + } + + var bindingInfo = VkDescriptorSetLayoutBindingFlagsCreateInfo.calloc(stack) + .sType$Default() + .pBindingFlags(stack.ints(bindingFlags)); + info.pNext(bindingInfo); + } + LongBuffer pBuffer = stack.mallocLong(1); _CHECK_(vkCreateDescriptorSetLayout(ctx.device, info, null, pBuffer)); return new VDescriptorSetLayout(ctx, pBuffer.get(0), types.toIntArray()); diff --git a/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorUpdateBuilder.java b/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorUpdateBuilder.java index 9891332..7c7360c 100644 --- a/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorUpdateBuilder.java +++ b/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorUpdateBuilder.java @@ -14,6 +14,8 @@ import static org.lwjgl.vulkan.KHRAccelerationStructure.VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; import static org.lwjgl.vulkan.VK10.*; +import java.util.List; + public class DescriptorUpdateBuilder { private final VContext ctx; private final MemoryStack stack; @@ -60,6 +62,26 @@ public DescriptorUpdateBuilder buffer(int binding, VBuffer buffer, long offset, return this; } + public DescriptorUpdateBuilder buffer(int binding, int dstArrayElement, List buffers) { + var bufInfo = VkDescriptorBufferInfo.calloc(buffers.size(), stack); + for (int i = 0; i < buffers.size(); i++) { + bufInfo.get(i) + .buffer(buffers.get(i).buffer()) + .offset(0) + .range(VK_WHOLE_SIZE); + } + updates.get() + .sType$Default() + .dstBinding(binding) + .dstSet(set) + .dstArrayElement(dstArrayElement) + .descriptorType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) + .descriptorCount(buffers.size()) + .pBufferInfo(bufInfo); + + return this; + } + public DescriptorUpdateBuilder uniform(int binding, VBuffer buffer) { return uniform(binding, buffer, 0, VK_WHOLE_SIZE); diff --git a/src/main/java/me/cortex/vulkanite/lib/descriptors/VDescriptorPool.java b/src/main/java/me/cortex/vulkanite/lib/descriptors/VDescriptorPool.java index a717281..9926f08 100644 --- a/src/main/java/me/cortex/vulkanite/lib/descriptors/VDescriptorPool.java +++ b/src/main/java/me/cortex/vulkanite/lib/descriptors/VDescriptorPool.java @@ -5,6 +5,7 @@ import org.lwjgl.vulkan.VkDescriptorPoolCreateInfo; import org.lwjgl.vulkan.VkDescriptorPoolSize; import org.lwjgl.vulkan.VkDescriptorSetAllocateInfo; +import org.lwjgl.vulkan.VkDescriptorSetVariableDescriptorCountAllocateInfo; import java.nio.LongBuffer; @@ -37,8 +38,26 @@ public VDescriptorPool(VContext ctx, int flags, int numSets, int... types) { } } + public VDescriptorPool(VContext ctx, int flags, int numSets, int countPerType, int... types) { + this.ctx = ctx; + this.sets = new long[numSets]; - public void allocateSets(VDescriptorSetLayout layout) { + try (var stack = stackPush()) { + var sizes = VkDescriptorPoolSize.calloc(types.length, stack); + for (int i = 0; i < types.length; i++) { + sizes.get(i).type(types[i]).descriptorCount(numSets * countPerType); + } + LongBuffer pPool = stack.mallocLong(1); + _CHECK_(vkCreateDescriptorPool(ctx.device, VkDescriptorPoolCreateInfo.calloc(stack) + .sType$Default() + .flags(flags) + .maxSets(numSets) + .pPoolSizes(sizes), null, pPool)); + pool = pPool.get(0); + } + } + + public void allocateSets(VDescriptorSetLayout layout, int... variableSizes) { try (var stack = stackPush()) { var layouts = stack.mallocLong(sets.length); for (int i = 0; i < sets.length; i++) { @@ -46,16 +65,31 @@ public void allocateSets(VDescriptorSetLayout layout) { } layouts.rewind(); LongBuffer pDescriptorSets = stack.mallocLong(sets.length); - _CHECK_(vkAllocateDescriptorSets(ctx.device, VkDescriptorSetAllocateInfo - .calloc(stack) + var allocInfo = VkDescriptorSetAllocateInfo.calloc(stack) .sType$Default() .descriptorPool(pool) - .pSetLayouts(layouts), pDescriptorSets), + .pSetLayouts(layouts); + if (variableSizes != null) { + var descriptorCounts = stack.mallocInt(variableSizes.length); + for (int i = 0; i < variableSizes.length; i++) { + descriptorCounts.put(variableSizes[i]); + } + descriptorCounts.rewind(); + var variableCountInfo = VkDescriptorSetVariableDescriptorCountAllocateInfo.calloc(stack) + .sType$Default() + .pDescriptorCounts(descriptorCounts); + allocInfo.pNext(variableCountInfo.address()); + } + _CHECK_(vkAllocateDescriptorSets(ctx.device, allocInfo, pDescriptorSets), "Failed to allocate descriptor set"); pDescriptorSets.get(sets); } } + public void allocateSets(VDescriptorSetLayout layout) { + allocateSets(layout, null); + } + public long get(int idx) { return sets[idx]; }