diff --git a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationBlasBuilder.java b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationBlasBuilder.java index a49d9fc..c3cb5d1 100644 --- a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationBlasBuilder.java +++ b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationBlasBuilder.java @@ -349,7 +349,7 @@ private void run() { private VBuffer uploadTerrainGeometry(BuiltSectionMeshParts meshParts, VCmdBuff cmd) { var buff = context.memory.createBuffer(meshParts.getVertexData().getLength(), - VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); cmd.encodeDataUpload(context.memory, MemoryUtil.memAddress(meshParts.getVertexData().getDirectBuffer()), buff, 0, meshParts.getVertexData().getLength()); diff --git a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationManager.java b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationManager.java index 998f42e..9a06609 100644 --- a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationManager.java +++ b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationManager.java @@ -1,6 +1,7 @@ package me.cortex.vulkanite.acceleration; import me.cortex.vulkanite.lib.base.VContext; +import me.cortex.vulkanite.lib.descriptors.VDescriptorSetLayout; import me.cortex.vulkanite.lib.memory.VAccelerationStructure; import me.cortex.vulkanite.lib.memory.VBuffer; import me.cortex.vulkanite.lib.other.sync.VSemaphore; @@ -72,7 +73,11 @@ public void cleanup() { tlasManager.cleanupTick(); } - public VBuffer getReferenceBuffer() { - return tlasManager.getReferenceBuffer(); + public long getGeometrySet() { + return tlasManager.getGeometrySet(); + } + + public VDescriptorSetLayout getGeometryLayout() { + return tlasManager.getGeometryLayout(); } } diff --git a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationTLASManager.java b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationTLASManager.java index 648c101..7cb5cd8 100644 --- a/src/main/java/me/cortex/vulkanite/acceleration/AccelerationTLASManager.java +++ b/src/main/java/me/cortex/vulkanite/acceleration/AccelerationTLASManager.java @@ -7,25 +7,29 @@ import me.cortex.vulkanite.lib.base.VContext; import me.cortex.vulkanite.lib.cmd.VCmdBuff; import me.cortex.vulkanite.lib.cmd.VCommandPool; +import me.cortex.vulkanite.lib.descriptors.DescriptorSetLayoutBuilder; +import me.cortex.vulkanite.lib.descriptors.DescriptorUpdateBuilder; +import me.cortex.vulkanite.lib.descriptors.VDescriptorPool; +import me.cortex.vulkanite.lib.descriptors.VDescriptorSetLayout; import me.cortex.vulkanite.lib.memory.VAccelerationStructure; import me.cortex.vulkanite.lib.memory.VBuffer; import me.cortex.vulkanite.lib.other.sync.VFence; import me.cortex.vulkanite.lib.other.sync.VSemaphore; import me.jellysquid.mods.sodium.client.render.chunk.RenderSection; import net.minecraft.util.math.ChunkSectionPos; -import net.minecraft.world.chunk.ChunkSection; import org.joml.Matrix4x3f; import org.lwjgl.system.MemoryUtil; import org.lwjgl.vulkan.*; import java.util.*; +import java.util.concurrent.LinkedBlockingDeque; import static org.lwjgl.system.MemoryStack.stackPush; import static org.lwjgl.util.vma.Vma.VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; import static org.lwjgl.vulkan.KHRAccelerationStructure.*; import static org.lwjgl.vulkan.KHRBufferDeviceAddress.VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT_KHR; import static org.lwjgl.vulkan.VK10.*; -import static org.lwjgl.vulkan.VK12.VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT; +import static org.lwjgl.vulkan.VK12.*; public class AccelerationTLASManager { private final TLASSectionManager buildDataManager = new TLASSectionManager(); @@ -41,6 +45,7 @@ public AccelerationTLASManager(VContext context, int queue) { this.context = context; this.queue = queue; this.singleUsePool = context.cmd.createSingleUsePool(); + this.buildDataManager.resizeBindlessSet(0, null); } //Returns a sync semaphore to chain in the next command submit @@ -309,25 +314,88 @@ protected void update(int id, VkAccelerationStructureInstanceKHR data) { } } + private static int roundUpPow2(int v) { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; + } + private final class TLASSectionManager extends TLASGeometryManager { private final TlasPointerArena arena = new TlasPointerArena(30000); - private final long arrayRef = MemoryUtil.nmemCalloc(30000 * 3, 8); - public VBuffer geometryReferenceBuffer; + + private VDescriptorSetLayout geometryBufferSetLayout; + private VDescriptorPool geometryBufferDescPool; + private long geometryBufferDescSet = 0; + + private int setCapacity = 0; + + private record DescUpdateJob(int binding, int dstArrayElement, List buffers) {} + private final LinkedBlockingDeque descUpdateJobs = new LinkedBlockingDeque<>(); + + public void resizeBindlessSet(int newSize, VFence fence) { + if (geometryBufferSetLayout == null) { + var layoutBuilder = new DescriptorSetLayoutBuilder(VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT); + layoutBuilder.binding(0, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 65536, VK_SHADER_STAGE_ALL); + layoutBuilder.setBindingFlags(0, VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT); + geometryBufferSetLayout = layoutBuilder.build(context); + } + + if (newSize > setCapacity) { + int newCapacity = roundUpPow2(Math.max(newSize, 32)); + var newGeometryBufferDescPool = new VDescriptorPool(context, VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT, 1, newCapacity, geometryBufferSetLayout.types); + newGeometryBufferDescPool.allocateSets(geometryBufferSetLayout, new int[]{newCapacity}); + long newGeometryBufferDescSet = newGeometryBufferDescPool.get(0); + + System.out.println("New geometry desc set: " + Long.toHexString(newGeometryBufferDescSet) + " with capacity " + newCapacity); + + if (geometryBufferDescSet != 0) { + try (var stack = stackPush()) { + var setCopy = VkCopyDescriptorSet.calloc(1, stack); + setCopy.get(0) + .sType$Default() + .srcSet(geometryBufferDescSet) + .dstSet(newGeometryBufferDescSet) + .descriptorCount(setCapacity); + vkUpdateDescriptorSets(context.device, null, setCopy); + } + + // This breaks the shit out of it + // context.sync.addCallback(fence, () -> { + // geometryBufferDescPool.free(); + // }); + + vkDeviceWaitIdle(context.device); + geometryBufferDescPool.free(); + } + + geometryBufferDescPool = newGeometryBufferDescPool; + geometryBufferDescSet = newGeometryBufferDescSet; + setCapacity = newCapacity; + } + + } @Override public void setGeometryUpdateMemory(VCmdBuff cmd, VFence fence, VkAccelerationStructureGeometryKHR struct) { super.setGeometryUpdateMemory(cmd, fence, struct); - var ref = geometryReferenceBuffer; - if (ref != null) { - context.sync.addCallback(fence, ref::free); + resizeBindlessSet(arena.maxIndex, fence); + + if (descUpdateJobs.isEmpty()) { + return; } - geometryReferenceBuffer = context.memory.createBuffer(8L * arena.maxIndex, - VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, - VK_MEMORY_HEAP_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT, - 0, VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT); - long ptr = geometryReferenceBuffer.map(); - MemoryUtil.memCopy(arrayRef, ptr, 8L * arena.maxIndex); - geometryReferenceBuffer.unmap(); + + var dub = new DescriptorUpdateBuilder(context, descUpdateJobs.size()); + dub.set(geometryBufferDescSet); + while (!descUpdateJobs.isEmpty()) { + var job = descUpdateJobs.poll(); + dub.buffer(job.binding, job.dstArrayElement, job.buffers); + } + dub.apply(); } //TODO: mixinto RenderSection and add a reference to a holder for us, its much faster than a hashmap @@ -362,10 +430,7 @@ public void update(AccelerationBlasBuilder.BLASBuildResult result) { holder.geometryBuffers = data.geometryBuffers(); holder.geometryIndex = arena.allocate(holder.geometryBuffers.size()); - for (int i = 0; i < holder.geometryBuffers.size(); i++) { - MemoryUtil.memPutAddress(arrayRef + 8L*(holder.geometryIndex+i), holder.geometryBuffers.get(i).deviceAddress()); - } - + descUpdateJobs.add(new DescUpdateJob(0, holder.geometryIndex, holder.geometryBuffers)); try (var stack = stackPush()) { var asi = VkAccelerationStructureInstanceKHR.calloc(stack) @@ -432,8 +497,12 @@ public void free(int pos, int count) { } } - public VBuffer getReferenceBuffer() { - return buildDataManager.geometryReferenceBuffer; + public long getGeometrySet() { + return buildDataManager.geometryBufferDescSet; + } + + public VDescriptorSetLayout getGeometryLayout() { + return buildDataManager.geometryBufferSetLayout; } //Called for cleaning up any remaining loose resources diff --git a/src/main/java/me/cortex/vulkanite/client/rendering/VulkanPipeline.java b/src/main/java/me/cortex/vulkanite/client/rendering/VulkanPipeline.java index 7eb8037..0a742ae 100644 --- a/src/main/java/me/cortex/vulkanite/client/rendering/VulkanPipeline.java +++ b/src/main/java/me/cortex/vulkanite/client/rendering/VulkanPipeline.java @@ -61,7 +61,10 @@ public class VulkanPipeline { private VDescriptorSetLayout commonLayout; private VDescriptorSetLayout customtexLayout; private VDescriptorSetLayout storageBufferLayout; - private VDescriptorPool descriptors; + + private VDescriptorPool commonDescriptorPool; + private VDescriptorPool customtexDescriptorPool; + private VDescriptorPool storageBufferDescriptorPool; private final VSampler sampler; private final VSampler ctexSampler; @@ -104,7 +107,7 @@ public VulkanPipeline(VContext ctx, AccelerationManager accelerationManager, Ray PBRTextureHolder holder = PBRTextureManager.INSTANCE.getOrLoadHolder(blockAtlas.getGlId());//((TextureAtlasExtension)blockAtlas).getPBRHolder() return ((IVGImage)holder.getSpecularTexture()).getVGImage(); }); - this.placeholderImage = ctx.memory.createImage2D(1, 1, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + this.placeholderImage = ctx.memory.createImage2D(4, 4, 1, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); this.placeholderImageView = new VImageView(ctx, placeholderImage); try (var stack = stackPush()) { @@ -146,14 +149,13 @@ public VulkanPipeline(VContext ctx, AccelerationManager accelerationManager, Ray try { commonLayout = new DescriptorSetLayoutBuilder() - .binding(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_ALL)// camera data - .binding(VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_ALL)// funni acceleration buffer - .binding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL)// funni buffer buffer - .binding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_ALL)//block texture - .binding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_ALL)//block texture normal - .binding(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_ALL)//block texture specular + .binding(0, VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, VK_SHADER_STAGE_ALL)// camera data + .binding(1, VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR, VK_SHADER_STAGE_ALL)// funni acceleration buffer + .binding(3, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_ALL)//block texture + .binding(4, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_ALL)//block texture normal + .binding(5, VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, VK_SHADER_STAGE_ALL)//block texture specular // Reordered these so output texture is last... this means you can dynamically add more output textures without messing other ids - .binding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_ALL)// output texture + .binding(6, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_ALL)// output texture .build(ctx); DescriptorSetLayoutBuilder ctexLayoutBuilder = new DescriptorSetLayoutBuilder(); @@ -170,19 +172,21 @@ public VulkanPipeline(VContext ctx, AccelerationManager accelerationManager, Ray storageBufferLayout = ssboLayoutBuilder.build(ctx); - // Using commonLayout.types is good enough because both VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER and VK_DESCRIPTOR_TYPE_STORAGE_BUFFER are used there already... //TODO: use frameahead count instead of just... 10 - descriptors = new VDescriptorPool(ctx, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 10, commonLayout.types); - descriptors.allocateSets(new VDescriptorSetLayout[]{ - commonLayout, - customtexLayout, - storageBufferLayout - }); + commonDescriptorPool = new VDescriptorPool(ctx, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 10, commonLayout.types); + commonDescriptorPool.allocateSets(commonLayout); + + customtexDescriptorPool = new VDescriptorPool(ctx, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 10, customtexLayout.types); + customtexDescriptorPool.allocateSets(customtexLayout); + + storageBufferDescriptorPool = new VDescriptorPool(ctx, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 10, storageBufferLayout.types); + storageBufferDescriptorPool.allocateSets(storageBufferLayout); raytracePipelines = new VRaytracePipeline[passes.length]; for (int i = 0; i < passes.length; i++) { var builder = new RaytracePipelineBuilder() .addLayout(commonLayout) + .addLayout(accelerationManager.getGeometryLayout()) .addLayout(customtexLayout) .addLayout(storageBufferLayout); passes[i].apply(builder); @@ -265,15 +269,14 @@ public void renderPostShadows(VGImage outImg, Camera camera, ShaderStorageBuffer uboBuffer.unmap(); uboBuffer.flush(); - long commonSet = descriptors.get(0); - long ctexSet = descriptors.get(1); - long ssboSet = descriptors.get(2); + long commonSet = commonDescriptorPool.get(fidx); + long ctexSet = customtexDescriptorPool.get(fidx); + long ssboSet = storageBufferDescriptorPool.get(fidx); var updater = new DescriptorUpdateBuilder(ctx, 7, placeholderImageView) .set(commonSet) .uniform(0, uboBuffer) .acceleration(1, tlas) - .buffer(2, accelerationManager.getReferenceBuffer()) .imageSampler(3, blockAtlasView.getView(), sampler) .imageSampler(4, blockAtlasNormalView.getView(), sampler) .imageSampler(5, blockAtlasSpecularView.getView(), sampler) @@ -320,7 +323,7 @@ public void renderPostShadows(VGImage outImg, Camera camera, ShaderStorageBuffer for (var pipeline : raytracePipelines) { pipeline.bind(cmd); - pipeline.bindDSet(cmd, new long[]{commonSet, ctexSet, ssboSet}); + pipeline.bindDSet(cmd, commonSet, accelerationManager.getGeometrySet(), ctexSet, ssboSet); pipeline.trace(cmd, outImg.width, outImg.height, 1); } @@ -359,7 +362,9 @@ public void destory() { commonLayout.free(); customtexLayout.free(); storageBufferLayout.free(); - descriptors.free(); + commonDescriptorPool.free(); + customtexDescriptorPool.free(); + storageBufferDescriptorPool.free(); ctx.sync.checkFences(); singleUsePool.doReleases(); singleUsePool.free(); diff --git a/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorSetLayoutBuilder.java b/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorSetLayoutBuilder.java index 425b039..b2f4d6c 100644 --- a/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorSetLayoutBuilder.java +++ b/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorSetLayoutBuilder.java @@ -5,8 +5,10 @@ import org.lwjgl.system.MemoryUtil; import org.lwjgl.vulkan.VkDescriptorSetLayoutBinding; import org.lwjgl.vulkan.VkDescriptorSetLayoutCreateInfo; +import org.lwjgl.vulkan.VkDescriptorSetLayoutBindingFlagsCreateInfo; import java.nio.LongBuffer; +import java.util.HashMap; import static me.cortex.vulkanite.lib.other.VUtil._CHECK_; import static org.lwjgl.system.MemoryStack.stackPush; @@ -14,6 +16,7 @@ public class DescriptorSetLayoutBuilder { private IntArrayList types = new IntArrayList(); + private HashMap bindingFlagsMap = new HashMap<>(); private VkDescriptorSetLayoutBinding.Buffer bindings = VkDescriptorSetLayoutBinding.calloc(0); public DescriptorSetLayoutBuilder binding(int binding, int type, int count, int stages) { bindings = VkDescriptorSetLayoutBinding.create(MemoryUtil.nmemRealloc(bindings.address(), (bindings.capacity() + 1L) * VkDescriptorSetLayoutBinding.SIZEOF), bindings.capacity() + 1); @@ -30,6 +33,10 @@ public DescriptorSetLayoutBuilder binding(int type, int stages) { return binding(bindings.capacity(), type, stages); } + public void setBindingFlags(int binding, int flag) { + bindingFlagsMap.put(binding, flag); + } + int flags; public DescriptorSetLayoutBuilder() { this(0); @@ -45,6 +52,18 @@ public VDescriptorSetLayout build(VContext ctx) { .pBindings(bindings) .flags(flags); + if (!bindingFlagsMap.isEmpty()) { + var bindingFlags = new int[bindings.remaining()]; + for (var i = 0; i < bindings.remaining(); i++) { + bindingFlags[i] = bindingFlagsMap.getOrDefault(bindings.get(i).binding(), 0); + } + + var bindingInfo = VkDescriptorSetLayoutBindingFlagsCreateInfo.calloc(stack) + .sType$Default() + .pBindingFlags(stack.ints(bindingFlags)); + info.pNext(bindingInfo); + } + LongBuffer pBuffer = stack.mallocLong(1); _CHECK_(vkCreateDescriptorSetLayout(ctx.device, info, null, pBuffer)); return new VDescriptorSetLayout(ctx, pBuffer.get(0), types.toIntArray()); diff --git a/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorUpdateBuilder.java b/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorUpdateBuilder.java index 9891332..7c7360c 100644 --- a/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorUpdateBuilder.java +++ b/src/main/java/me/cortex/vulkanite/lib/descriptors/DescriptorUpdateBuilder.java @@ -14,6 +14,8 @@ import static org.lwjgl.vulkan.KHRAccelerationStructure.VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR; import static org.lwjgl.vulkan.VK10.*; +import java.util.List; + public class DescriptorUpdateBuilder { private final VContext ctx; private final MemoryStack stack; @@ -60,6 +62,26 @@ public DescriptorUpdateBuilder buffer(int binding, VBuffer buffer, long offset, return this; } + public DescriptorUpdateBuilder buffer(int binding, int dstArrayElement, List buffers) { + var bufInfo = VkDescriptorBufferInfo.calloc(buffers.size(), stack); + for (int i = 0; i < buffers.size(); i++) { + bufInfo.get(i) + .buffer(buffers.get(i).buffer()) + .offset(0) + .range(VK_WHOLE_SIZE); + } + updates.get() + .sType$Default() + .dstBinding(binding) + .dstSet(set) + .dstArrayElement(dstArrayElement) + .descriptorType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER) + .descriptorCount(buffers.size()) + .pBufferInfo(bufInfo); + + return this; + } + public DescriptorUpdateBuilder uniform(int binding, VBuffer buffer) { return uniform(binding, buffer, 0, VK_WHOLE_SIZE); diff --git a/src/main/java/me/cortex/vulkanite/lib/descriptors/VDescriptorPool.java b/src/main/java/me/cortex/vulkanite/lib/descriptors/VDescriptorPool.java index 9d1869d..9926f08 100644 --- a/src/main/java/me/cortex/vulkanite/lib/descriptors/VDescriptorPool.java +++ b/src/main/java/me/cortex/vulkanite/lib/descriptors/VDescriptorPool.java @@ -5,6 +5,7 @@ import org.lwjgl.vulkan.VkDescriptorPoolCreateInfo; import org.lwjgl.vulkan.VkDescriptorPoolSize; import org.lwjgl.vulkan.VkDescriptorSetAllocateInfo; +import org.lwjgl.vulkan.VkDescriptorSetVariableDescriptorCountAllocateInfo; import java.nio.LongBuffer; @@ -17,50 +18,79 @@ public class VDescriptorPool extends TrackedResourceObject { private final long pool; private final long[] sets; - private int usedSets = 0; - public VDescriptorPool(VContext ctx, int flags, int maxSets, int... types) { + public VDescriptorPool(VContext ctx, int flags, int numSets, int... types) { this.ctx = ctx; - this.sets = new long[maxSets]; + this.sets = new long[numSets]; try (var stack = stackPush()) { var sizes = VkDescriptorPoolSize.calloc(types.length, stack); for (int i = 0; i < types.length; i++) { - sizes.get(i).type(types[i]).descriptorCount(maxSets); + sizes.get(i).type(types[i]).descriptorCount(numSets); } LongBuffer pPool = stack.mallocLong(1); _CHECK_(vkCreateDescriptorPool(ctx.device, VkDescriptorPoolCreateInfo.calloc(stack) .sType$Default() .flags(flags) - .maxSets(maxSets) + .maxSets(numSets) .pPoolSizes(sizes), null, pPool)); pool = pPool.get(0); } } - public void allocateSets(VDescriptorSetLayout[] layouts) { + public VDescriptorPool(VContext ctx, int flags, int numSets, int countPerType, int... types) { + this.ctx = ctx; + this.sets = new long[numSets]; + + try (var stack = stackPush()) { + var sizes = VkDescriptorPoolSize.calloc(types.length, stack); + for (int i = 0; i < types.length; i++) { + sizes.get(i).type(types[i]).descriptorCount(numSets * countPerType); + } + LongBuffer pPool = stack.mallocLong(1); + _CHECK_(vkCreateDescriptorPool(ctx.device, VkDescriptorPoolCreateInfo.calloc(stack) + .sType$Default() + .flags(flags) + .maxSets(numSets) + .pPoolSizes(sizes), null, pPool)); + pool = pPool.get(0); + } + } + + public void allocateSets(VDescriptorSetLayout layout, int... variableSizes) { try (var stack = stackPush()) { - usedSets = layouts.length; - var pLayouts = stack.mallocLong(usedSets); - for (int i = 0; i < usedSets; i++) { - pLayouts.put(layouts[i].layout); + var layouts = stack.mallocLong(sets.length); + for (int i = 0; i < sets.length; i++) { + layouts.put(layout.layout); } - pLayouts.rewind(); + layouts.rewind(); LongBuffer pDescriptorSets = stack.mallocLong(sets.length); - _CHECK_(vkAllocateDescriptorSets(ctx.device, VkDescriptorSetAllocateInfo - .calloc(stack) + var allocInfo = VkDescriptorSetAllocateInfo.calloc(stack) .sType$Default() .descriptorPool(pool) - .pSetLayouts(pLayouts), pDescriptorSets), + .pSetLayouts(layouts); + if (variableSizes != null) { + var descriptorCounts = stack.mallocInt(variableSizes.length); + for (int i = 0; i < variableSizes.length; i++) { + descriptorCounts.put(variableSizes[i]); + } + descriptorCounts.rewind(); + var variableCountInfo = VkDescriptorSetVariableDescriptorCountAllocateInfo.calloc(stack) + .sType$Default() + .pDescriptorCounts(descriptorCounts); + allocInfo.pNext(variableCountInfo.address()); + } + _CHECK_(vkAllocateDescriptorSets(ctx.device, allocInfo, pDescriptorSets), "Failed to allocate descriptor set"); pDescriptorSets.get(sets); } } + public void allocateSets(VDescriptorSetLayout layout) { + allocateSets(layout, null); + } + public long get(int idx) { - if(idx < 0 || idx >= usedSets) { - throw new IllegalArgumentException("Descriptor set out of range: " + idx); - } return sets[idx]; } diff --git a/src/main/java/me/cortex/vulkanite/lib/memory/HandleDescriptorManger.java b/src/main/java/me/cortex/vulkanite/lib/memory/HandleDescriptorManger.java index 7d71aec..d50ab0b 100644 --- a/src/main/java/me/cortex/vulkanite/lib/memory/HandleDescriptorManger.java +++ b/src/main/java/me/cortex/vulkanite/lib/memory/HandleDescriptorManger.java @@ -28,9 +28,9 @@ public static void close(long handleDescriptor) { throw new IllegalStateException(); } } else { - //if (LibC.INSTANCE.close((int) handleDescriptor) != 0) { - // throw new IllegalStateException(); - //} + if (LibC.INSTANCE.close((int) handleDescriptor) != 0) { + throw new IllegalStateException(); + } } } } diff --git a/src/main/java/me/cortex/vulkanite/lib/memory/MemoryManager.java b/src/main/java/me/cortex/vulkanite/lib/memory/MemoryManager.java index 2bfbf8d..4905611 100644 --- a/src/main/java/me/cortex/vulkanite/lib/memory/MemoryManager.java +++ b/src/main/java/me/cortex/vulkanite/lib/memory/MemoryManager.java @@ -5,12 +5,15 @@ import com.sun.jna.platform.win32.Kernel32; import com.sun.jna.platform.win32.WinNT; import me.cortex.vulkanite.client.Vulkanite; + import org.lwjgl.PointerBuffer; import org.lwjgl.util.vma.VmaAllocationCreateInfo; import org.lwjgl.vulkan.*; import java.nio.IntBuffer; import java.nio.LongBuffer; +import java.util.HashMap; +import java.util.function.Function; import static me.cortex.vulkanite.lib.other.VUtil._CHECK_; import static me.cortex.vulkanite.lib.other.VUtil._CHECK_GL_ERROR_; @@ -37,92 +40,160 @@ public class MemoryManager { private static final int EXTERNAL_MEMORY_HANDLE_TYPE = Vulkanite.IS_WINDOWS?VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT:VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT; private final VkDevice device; private final VmaAllocator allocator; - private final VmaAllocator.MemoryPool shared; + private final VmaAllocator.MemoryPool sharedBlocked; + private final VmaAllocator.MemoryPool sharedDedicated; private final boolean hasDeviceAddresses; + private static final long sharedBlockSize = 64L << 20L; // 64 MB + public MemoryManager(VkDevice device, boolean hasDeviceAddresses) { this.device = device; this.hasDeviceAddresses = hasDeviceAddresses; allocator = new VmaAllocator(device, hasDeviceAddresses); //Note: this technically creates a memory leak, since we never free it, however // memory manager should never be created more than once per application, so it should bo ok - shared = allocator.createPool(VkExportMemoryAllocateInfo.calloc() - .sType$Default() - .handleTypes(EXTERNAL_MEMORY_HANDLE_TYPE)); + sharedBlocked = allocator.createPool(sharedBlockSize, + VkExportMemoryAllocateInfo.calloc() + .sType$Default() + .handleTypes(EXTERNAL_MEMORY_HANDLE_TYPE)); + sharedDedicated = allocator.createPool(0, + VkExportMemoryAllocateInfo.calloc() + .sType$Default() + .handleTypes(EXTERNAL_MEMORY_HANDLE_TYPE)); } - private long importMemoryWin32(int memoryObject, VmaAllocator.Allocation allocation) { - try (var stack = stackPush()) { - PointerBuffer pb = stack.callocPointer(1); - _CHECK_(vkGetMemoryWin32HandleKHR(device, VkMemoryGetWin32HandleInfoKHR.calloc(stack) - .sType$Default() - .memory(allocation.ai.deviceMemory()) - .handleType(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT), pb)); - long handle = pb.get(0); - if (handle == 0) - throw new IllegalStateException(); - HandleDescriptorManger.add(handle); - - //TODO: fixme: the `alloc.ai.size() + alloc.ai.offset()` is an extreamly ugly hack - // it is ment to extend over the entire size of vkMemoryObject, but im not sure how to obtain it - glImportMemoryWin32HandleEXT(memoryObject, allocation.ai.size() + allocation.ai.offset(), GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, handle); - return handle; + public class ExternalMemoryTracker { + public record HandleDescriptor(long handle, int glMemoryObj) { + } + public record HandleDescriptorTracked(HandleDescriptor desc, int refCount) { } - } - private int importMemoryFd(int memoryObject, VmaAllocator.Allocation allocation) { - try (var stack = stackPush()) { - IntBuffer pb = stack.callocInt(1); - _CHECK_(vkGetMemoryFdKHR(device, VkMemoryGetFdInfoKHR.calloc(stack) - .sType$Default() - .memory(allocation.ai.deviceMemory()) - .handleType(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT), pb)); - int descriptor = pb.get(0); - if (descriptor == 0) - throw new IllegalStateException(); - HandleDescriptorManger.add(descriptor); - - //TODO: fixme: the `alloc.ai.size() + alloc.ai.offset()` is an extreamly ugly hack - // it is ment to extend over the entire size of vkMemoryObject, but im not sure how to obtain it - glImportMemoryFdEXT(memoryObject, allocation.ai.size() + allocation.ai.offset(), GL_HANDLE_TYPE_OPAQUE_FD_EXT, descriptor); - return descriptor; + // Maps VK memory to {GL memory, Native handle} tuple & with refernce counting + private static final HashMap MEMORY_TO_HANDLES = new HashMap<>(); + + // Get the GL memory associated with the given vulkan memory object + public static int acquire(VmaAllocator.Allocation allocation, VkDevice device) { + synchronized (MEMORY_TO_HANDLES) { + long vkMemory = allocation.ai.deviceMemory(); + if (!MEMORY_TO_HANDLES.containsKey(vkMemory)) { + long nativeHandle = 0; + try (var stack = stackPush()) { + if (Vulkanite.IS_WINDOWS) { + PointerBuffer pb = stack.callocPointer(1); + _CHECK_(vkGetMemoryWin32HandleKHR(device, VkMemoryGetWin32HandleInfoKHR.calloc(stack) + .sType$Default() + .memory(vkMemory) + .handleType(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT), pb)); + nativeHandle = pb.get(0); + } else { + IntBuffer pb = stack.callocInt(1); + _CHECK_(vkGetMemoryFdKHR(device, VkMemoryGetFdInfoKHR.calloc(stack) + .sType$Default() + .memory(vkMemory) + .handleType(VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT), pb)); + nativeHandle = pb.get(0); + } + } + + if (nativeHandle == 0) + throw new IllegalStateException(); + + int newMemoryObject = glCreateMemoryObjectsEXT(); + // Everything larger than the shared block size must be dedicated allocation + long memorySize = Long.max(allocation.ai.offset() + allocation.ai.size(), sharedBlockSize); + if (Vulkanite.IS_WINDOWS) { + glImportMemoryWin32HandleEXT(newMemoryObject, + memorySize, + GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, nativeHandle); + _CHECK_GL_ERROR_(); + } else { + glImportMemoryFdEXT(newMemoryObject, memorySize, + GL_HANDLE_TYPE_OPAQUE_FD_EXT, (int) nativeHandle); + _CHECK_GL_ERROR_(); + } + + if (memorySize > sharedBlockSize) { + // Section 6.2 of the OpenGL 4.5 spec + glMemoryObjectParameteriEXT(newMemoryObject, GL_DEDICATED_MEMORY_OBJECT_EXT, GL_TRUE); + _CHECK_GL_ERROR_(); + } + + if (newMemoryObject == 0) + throw new IllegalStateException(); + + MEMORY_TO_HANDLES.put(vkMemory, + new HandleDescriptorTracked(new HandleDescriptor(nativeHandle, newMemoryObject), 0)); + } + + var tracked = MEMORY_TO_HANDLES.get(vkMemory); + MEMORY_TO_HANDLES.put(vkMemory, new HandleDescriptorTracked(tracked.desc, tracked.refCount + 1)); + + return tracked.desc.glMemoryObj; + } } - } - private long importMemory(int memoryObject, VmaAllocator.Allocation allocation) { - return Vulkanite.IS_WINDOWS?importMemoryWin32(memoryObject,allocation):importMemoryFd(memoryObject,allocation); - } - //TODO: there is a better way to do shared memory, a vk memory object from vma should be put into a single memory object - // then the memory object should be reused multiple times, this is the corrent and more efficent way - // that is, since `alloc.ai.deviceMemory()` is shared by multiple allocations, they can also share a single memory object + public static void release(long memory) { + synchronized (MEMORY_TO_HANDLES) { + var tracked = MEMORY_TO_HANDLES.get(memory); + if (tracked.refCount <= 0) { + throw new IllegalStateException(); + } + if (tracked.refCount == 1) { + glDeleteMemoryObjectsEXT(tracked.desc.glMemoryObj); + _CHECK_GL_ERROR_(); + if (Vulkanite.IS_WINDOWS) { + if (!Kernel32.INSTANCE.CloseHandle(new WinNT.HANDLE(new Pointer(tracked.desc.handle)))) { + int error = Kernel32.INSTANCE.GetLastError(); + System.err.println("STATE MIGHT BE BROKEN! Failed to close handle: " + error); + // throw new IllegalStateException(); + } + } else { + int code = 0; + if ((code = LibC.INSTANCE.close((int) tracked.desc.handle)) != 0) { + System.err.println("STATE MIGHT BE BROKEN! Failed to close FD: " + code); + // throw new IllegalStateException(); + } + } + MEMORY_TO_HANDLES.remove(memory); + } else { + MEMORY_TO_HANDLES.put(memory, new HandleDescriptorTracked(tracked.desc, tracked.refCount - 1)); + } + } + } + }; + public VGBuffer createSharedBuffer(long size, int usage, int properties) { return createSharedBuffer(size, usage, properties, 0); } public VGBuffer createSharedBuffer(long size, int usage, int properties, int alignment) { try (var stack = stackPush()) { - var alloc = shared.alloc(VkBufferCreateInfo + VmaAllocator.BufferAllocation alloc = null; + + var bufferCreateInfo = VkBufferCreateInfo .calloc(stack) .sType$Default() .size(size) .usage(usage) .pNext(VkExternalMemoryBufferCreateInfo.calloc(stack) .sType$Default() - .handleTypes(EXTERNAL_MEMORY_HANDLE_TYPE)), - // VERY IMPORTANT: create dedicated allocation so underlying memory object is only used by this buffer, - // thus only imported once. - VmaAllocationCreateInfo.calloc(stack) + .handleTypes(EXTERNAL_MEMORY_HANDLE_TYPE)); + var allocationCreateInfo = VmaAllocationCreateInfo.calloc(stack) .usage(VMA_MEMORY_USAGE_AUTO) - .requiredFlags(properties) - .flags(VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT), - alignment); + .requiredFlags(properties); + + if (size > sharedBlockSize) { + allocationCreateInfo.flags(VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT); + alloc = sharedDedicated.alloc(bufferCreateInfo, allocationCreateInfo, alignment); + } else { + alloc = sharedBlocked.alloc(bufferCreateInfo, allocationCreateInfo, alignment); + } - int memoryObject = glCreateMemoryObjectsEXT(); - long handle = importMemory(memoryObject, alloc); + int memoryObject = ExternalMemoryTracker.acquire(alloc, device); int glId = glCreateBuffers(); glNamedBufferStorageMemEXT(glId, size, memoryObject, alloc.ai.offset()); _CHECK_GL_ERROR_(); - return new VGBuffer(alloc, glId, memoryObject, handle); + return new VGBuffer(alloc, glId); } } @@ -156,14 +227,21 @@ public VGImage createSharedImage(int width, int height, int depth, int mipLevels .usage(usage) .samples(VK_SAMPLE_COUNT_1_BIT); createInfo.extent().width(width).height(height).depth(depth); - var alloc = shared.alloc(createInfo, - VmaAllocationCreateInfo.calloc(stack) + + var allocInfo = VmaAllocationCreateInfo.calloc(stack) .usage(VMA_MEMORY_USAGE_AUTO) - .requiredFlags(properties) - .flags(VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT)); + .requiredFlags(properties); + + VmaAllocator.ImageAllocation alloc = null; + + try { + alloc = sharedBlocked.alloc(createInfo, allocInfo); + } catch(AssertionError e) { + allocInfo.flags(VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT); + alloc = sharedDedicated.alloc(createInfo, allocInfo); + } - int memoryObject = glCreateMemoryObjectsEXT(); - long handle = importMemory(memoryObject, alloc); + int memoryObject = ExternalMemoryTracker.acquire(alloc, device); int glId = glCreateTextures(glImageType); @@ -192,7 +270,7 @@ public VGImage createSharedImage(int width, int height, int depth, int mipLevels } _CHECK_GL_ERROR_(); - return new VGImage(alloc, width, height, depth, mipLevels, vkFormat, glFormat, glId, memoryObject, handle); + return new VGImage(alloc, width, height, depth, mipLevels, vkFormat, glFormat, glId); } } diff --git a/src/main/java/me/cortex/vulkanite/lib/memory/VGBuffer.java b/src/main/java/me/cortex/vulkanite/lib/memory/VGBuffer.java index 46dd3d6..d69a098 100644 --- a/src/main/java/me/cortex/vulkanite/lib/memory/VGBuffer.java +++ b/src/main/java/me/cortex/vulkanite/lib/memory/VGBuffer.java @@ -1,28 +1,22 @@ package me.cortex.vulkanite.lib.memory; import static me.cortex.vulkanite.lib.other.VUtil._CHECK_GL_ERROR_; -import static org.lwjgl.opengl.ARBDirectStateAccess.glCreateBuffers; -import static org.lwjgl.opengl.EXTMemoryObject.glDeleteMemoryObjectsEXT; import static org.lwjgl.opengl.GL15C.glDeleteBuffers; -import static org.lwjgl.opengl.GL15C.glGenBuffers; public class VGBuffer extends VBuffer { public final int glId; - private final int glMemObj; - private final long handle; - VGBuffer(VmaAllocator.BufferAllocation allocation, int glId, int glMemObj, long handle) { + private final long vkMemory; + VGBuffer(VmaAllocator.BufferAllocation allocation, int glId) { super(allocation); this.glId = glId; - this.glMemObj = glMemObj; - this.handle = handle; + this.vkMemory = allocation.ai.deviceMemory(); } @Override public void free() { - HandleDescriptorManger.close(handle); glDeleteBuffers(glId); - glDeleteMemoryObjectsEXT(glMemObj); _CHECK_GL_ERROR_(); + MemoryManager.ExternalMemoryTracker.release(this.vkMemory); super.free(); } } diff --git a/src/main/java/me/cortex/vulkanite/lib/memory/VGImage.java b/src/main/java/me/cortex/vulkanite/lib/memory/VGImage.java index 91e5b5b..7fd9e11 100644 --- a/src/main/java/me/cortex/vulkanite/lib/memory/VGImage.java +++ b/src/main/java/me/cortex/vulkanite/lib/memory/VGImage.java @@ -1,29 +1,24 @@ package me.cortex.vulkanite.lib.memory; import static me.cortex.vulkanite.lib.other.VUtil._CHECK_GL_ERROR_; -import static org.lwjgl.opengl.EXTMemoryObject.glDeleteMemoryObjectsEXT; import static org.lwjgl.opengl.GL11C.glDeleteTextures; public class VGImage extends VImage { public final int glId; - private final int glMemObj; public final int glFormat; - private final long handle; + private final long vkMemory; - VGImage(VmaAllocator.ImageAllocation allocation, int width, int height, int depth, int mipLayers, int format, int glFormat, int glId, int glMemObj, long handle) { + VGImage(VmaAllocator.ImageAllocation allocation, int width, int height, int depth, int mipLayers, int format, int glFormat, int glId) { super(allocation, width, height, depth, mipLayers, format); this.glId = glId; - this.glMemObj = glMemObj; this.glFormat = glFormat; - this.handle = handle; + this.vkMemory = allocation.ai.deviceMemory(); } - public void free() { - HandleDescriptorManger.close(handle); - + public void free() { glDeleteTextures(glId); - glDeleteMemoryObjectsEXT(glMemObj); _CHECK_GL_ERROR_(); + MemoryManager.ExternalMemoryTracker.release(this.vkMemory); super.free(); } } diff --git a/src/main/java/me/cortex/vulkanite/lib/memory/VmaAllocator.java b/src/main/java/me/cortex/vulkanite/lib/memory/VmaAllocator.java index 4dfed64..e5d5e72 100644 --- a/src/main/java/me/cortex/vulkanite/lib/memory/VmaAllocator.java +++ b/src/main/java/me/cortex/vulkanite/lib/memory/VmaAllocator.java @@ -60,12 +60,12 @@ public VmaAllocator(VkDevice device, boolean enableDeviceAddresses) { } } - public MemoryPool createPool() { - return new MemoryPool(0); + public MemoryPool createPool(long blockSize) { + return new MemoryPool(blockSize, 0); } - public MemoryPool createPool(Struct chain) { - return new MemoryPool(chain.address()); + public MemoryPool createPool(long blockSize, Struct chain) { + return new MemoryPool(blockSize, chain.address()); } @@ -258,10 +258,11 @@ public void free() { public class MemoryPool { private final long pool; - public MemoryPool(long pNext) { + public MemoryPool(long blockSize, long pNext) { try (var stack = stackPush()) { - VmaPoolCreateInfo pci = VmaPoolCreateInfo.calloc(stack); - pci.pMemoryAllocateNext(pNext); + VmaPoolCreateInfo pci = VmaPoolCreateInfo.calloc(stack) + .blockSize(blockSize) + .pMemoryAllocateNext(pNext); PointerBuffer pb = stack.callocPointer(1); if (vmaCreatePool(allocator, pci, pb) != VK_SUCCESS) { throw new RuntimeException("Failed to create memory pool");