diff --git a/docs/backends.rst b/docs/backends.rst index 01c2f576..c17c764f 100644 --- a/docs/backends.rst +++ b/docs/backends.rst @@ -158,12 +158,14 @@ bytes you wish to change. :param data_offset: The starting offset in the data at which to begin copying. -There are two functions that allow you to perform multiple draw calls at once. -Both require that you enable the feature "multi-draw-indirect". +There are four functions that allow you to perform multiple draw calls at once. +Two take the number of draws to perform as an argument; two have this value in a buffer. Typically, these calls do not reduce work or increase parallelism on the GPU. Rather they reduce driver overhead on the CPU. +The first two require that you enable the feature ``"multi-draw-indirect"``. + .. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect(render_pass_encoder, buffer, *, offset=0, count): Equivalent to:: @@ -171,22 +173,68 @@ they reduce driver overhead on the CPU. render_pass_encoder.draw_indirect(buffer, offset + i * 16) :param render_pass_encoder: The current render pass encoder. - :param buffer: The indirect buffer containing the arguments. + :param buffer: The indirect buffer containing the arguments. Must have length + at least offset + 16 * count. :param offset: The byte offset in the indirect buffer containing the first argument. + Must be a multiple of 4. :param count: The number of draw operations to perform. .. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count): Equivalent to:: + for i in range(count): render_pass_encoder.draw_indexed_indirect(buffer, offset + i * 2-) :param render_pass_encoder: The current render pass encoder. - :param buffer: The indirect buffer containing the arguments. + :param buffer: The indirect buffer containing the arguments. Must have length + at least offset + 20 * count. :param offset: The byte offset in the indirect buffer containing the first argument. + Must be a multiple of 4. :param count: The number of draw operations to perform. +The second two require that you enable the feature ``"multi-draw-indirect-count"``. +They are identical to the previous two, except that the ``count`` argument is replaced by +three arguments. The value at ``count_buffer_offset`` in ``count_buffer`` is treated as +an unsigned 32-bit integer. The ``count`` is the minimum of this value and ``max_count``. + +.. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count): + + Equivalent to:: + + count = min(, max_count) + for i in range(count): + render_pass_encoder.draw_indirect(buffer, offset + i * 16) + + :param render_pass_encoder: The current render pass encoder. + :param buffer: The indirect buffer containing the arguments. Must have length + at least offset + 16 * max_count. + :param offset: The byte offset in the indirect buffer containing the first argument. + Must be a multiple of 4. + :param count_buffer: The indirect buffer containing the count. + :param count_buffer_offset: The offset into count_buffer. + Must be a multiple of 4. + :param max_count: The maximum number of draw operations to perform. + +.. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count): + + Equivalent to:: + + count = min(, max_count) + for i in range(count): + render_pass_encoder.draw_indexed_indirect(buffer, offset + i * 2-) + + :param render_pass_encoder: The current render pass encoder. + :param buffer: The indirect buffer containing the arguments. Must have length + at least offset + 20 * max_count. + :param offset: The byte offset in the indirect buffer containing the first argument. + Must be a multiple of 4. + :param count_buffer: The indirect buffer containing the count. + :param count_buffer_offset: The offset into count_buffer. + Must be a multiple of 4. + :param max_count: The maximum number of draw operations to perform. + Some GPUs allow you collect statistics on their pipelines. Those GPUs that support this have the feature "pipeline-statistics-query", and you must enable this feature when getting the device. diff --git a/tests/test_wgpu_vertex_instance.py b/tests/test_wgpu_vertex_instance.py index 518060c9..8ba51dd9 100644 --- a/tests/test_wgpu_vertex_instance.py +++ b/tests/test_wgpu_vertex_instance.py @@ -8,9 +8,11 @@ from wgpu.backends.wgpu_native.extras import ( multi_draw_indexed_indirect, multi_draw_indirect, + multi_draw_indirect_count, + multi_draw_indexed_indirect_count, ) -MAX_INFO = 100 +MAX_INFO = 1000 if not can_use_wgpu_lib: pytest.skip("Skipping tests that need the wgpu lib", allow_module_level=True) @@ -68,7 +70,7 @@ class Runner: REQUIRED_FEATURES = ["indirect-first-instance"] - OPTIONAL_FEATURES = ["multi-draw-indirect"] # we'll be adding more + OPTIONAL_FEATURES = ["multi-draw-indirect", "multi-draw-indirect-count"] @classmethod def is_usable(cls): @@ -163,11 +165,39 @@ def __init__(self): # We're going to want to try calling these draw functions from a buffer, and it # would be nice to test that these buffers have an offset self.draw_data_buffer = self.device.create_buffer_with_data( - data=np.uint32([0, 0, *self.draw_args1, *self.draw_args2]), - usage="INDIRECT", + # The zeros at the beginning are to test "offset". + # The zeros at the end are because the _count methods require to buffer to + # be at least byte_offset + 16 * max_count bytes long + data=np.uint32([0, 0, *self.draw_args1, *self.draw_args2, *([0] * 50)]), + usage="INDIRECT", # copy dst for patching ) self.draw_data_buffer_indexed = self.device.create_buffer_with_data( - data=np.uint32([0, 0, *self.draw_indexed_args1, *self.draw_indexed_args2]), + # The zeros at the beginning are to test "offset". + # The zeros at the end are because the _count methods require to buffer to + # be at least byte_offset + 20 * max_count bytes long + data=np.uint32( + [0, 0, *self.draw_indexed_args1, *self.draw_indexed_args2, *([0] * 50)] + ), + usage="INDIRECT", + ) + + self.count_buffer = self.device.create_buffer_with_data( + data=(np.int32([10, 2])), usage="INDIRECT" + ) + self.draw_data_buffer_patched = self.device.create_buffer_with_data( + # The zeros at the beginning are to test the "offset". + # The zeros at the end are because the _count methods require to buffer to + # be at least byte_offset + 16 * max_count bytes long + data=np.uint32([10, 2, *self.draw_args1, *self.draw_args2, *([0] * 50)]), + usage="INDIRECT", # copy dst for patching + ) + self.draw_data_buffer_indexed_patched = self.device.create_buffer_with_data( + # The zeros at the beginning are to test "offset". + # The zeros at the end are because the _count methods require to buffer to + # be at least byte_offset + 20 * max_count bytes long + data=np.uint32( + [10, 2, *self.draw_indexed_args1, *self.draw_indexed_args2, *([0] * 50)] + ), usage="INDIRECT", ) @@ -211,7 +241,8 @@ def run_draw_test(self, draw_function, indexed, *, expected_result=None): expected_result = self.expected_result_draw_indexed else: expected_result = self.expected_result_draw - assert info_set == expected_result + if info_set != expected_result: + pytest.fail(f"Expected {sorted(expected_result)}\nGot {sorted(info_set)}") if not Runner.is_usable(): @@ -337,5 +368,50 @@ def draw(encoder): ) +@pytest.mark.parametrize("bug_patch", [False, True]) +@pytest.mark.parametrize("indexed", [False, True]) +@pytest.mark.parametrize("test_max_count", [False, True]) +def test_multi_draw_indirect_count(runner, test_max_count, indexed, bug_patch): + if "multi-draw-indirect-count" not in runner.device.features: + pytest.skip("Must have 'multi-draw-indirect-count' to run") + + print(f"{bug_patch=}, {indexed=}, {test_max_count=} \n") + + if indexed: + function = multi_draw_indexed_indirect_count + if not bug_patch: + buffer = runner.draw_data_buffer_indexed + else: + buffer = runner.draw_data_buffer_indexed_patched + else: + function = multi_draw_indirect_count + if not bug_patch: + buffer = runner.draw_data_buffer + else: + buffer = runner.draw_data_buffer_patched + + # Either way, we're going to do 2 draws. But one via the max_count and one via the + # information in the buffer. + if test_max_count: + # We pull a count of 10, but we're limiting it to 2 via max_count + count_buffer_offset, max_count = 0, 2 + else: + # We pull a count of 2, and set the max_count to something bigger. Buffer + # is required to be big enough to handle max_count. + count_buffer_offset, max_count = 4, 10 + + def draw(encoder): + function( + encoder, + buffer, + offset=8, + count_buffer=runner.count_buffer, + count_buffer_offset=count_buffer_offset, + max_count=max_count, + ) + + runner.run_draw_test(draw, indexed) + + if __name__ == "__main__": run_tests(globals()) diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py index 07db3b1b..5af1d359 100644 --- a/wgpu/backends/wgpu_native/_api.py +++ b/wgpu/backends/wgpu_native/_api.py @@ -3293,6 +3293,32 @@ def _multi_draw_indexed_indirect(self, buffer, offset, count): self._internal, buffer._internal, int(offset), int(count) ) + def _multi_draw_indirect_count( + self, buffer, offset, count_buffer, count_buffer_offset, max_count + ): + # H: void f(WGPURenderPassEncoder encoder, WGPUBuffer buffer, uint64_t offset, WGPUBuffer count_buffer, uint64_t count_buffer_offset, uint32_t max_count) + libf.wgpuRenderPassEncoderMultiDrawIndirectCount( + self._internal, + buffer._internal, + int(offset), + count_buffer._internal, + int(count_buffer_offset), + int(max_count), + ) + + def _multi_draw_indexed_indirect_count( + self, buffer, offset, count_buffer, count_buffer_offset, max_count + ): + # H: void f(WGPURenderPassEncoder encoder, WGPUBuffer buffer, uint64_t offset, WGPUBuffer count_buffer, uint64_t count_buffer_offset, uint32_t max_count) + libf.wgpuRenderPassEncoderMultiDrawIndexedIndirectCount( + self._internal, + buffer._internal, + int(offset), + count_buffer._internal, + int(count_buffer_offset), + int(max_count), + ) + def _maybe_keep_alive(self, object): pass diff --git a/wgpu/backends/wgpu_native/extras.py b/wgpu/backends/wgpu_native/extras.py index 28aed319..1ebc0f97 100644 --- a/wgpu/backends/wgpu_native/extras.py +++ b/wgpu/backends/wgpu_native/extras.py @@ -101,6 +101,52 @@ def multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count) render_pass_encoder._multi_draw_indexed_indirect(buffer, offset, count) +def multi_draw_indirect_count( + render_pass_encoder, + buffer, + *, + offset=0, + count_buffer, + count_buffer_offset=0, + max_count, +): + """ + This is equivalent to: + + count = min(, max_count) + for i in range(count): + render_pass_encoder.draw(buffer, offset + i * 16) + + You must enable the feature "multi-draw-indirect-count" to use this function. + """ + render_pass_encoder._multi_draw_indirect_count( + buffer, offset, count_buffer, count_buffer_offset, max_count + ) + + +def multi_draw_indexed_indirect_count( + render_pass_encoder, + buffer, + *, + offset=0, + count_buffer, + count_buffer_offset=0, + max_count, +): + """ + This is equivalent to: + + count = min(, max_count) + for i in range(count): + render_pass_encoder.draw_indexed(buffer, offset + i * 20) + + You must enable the feature "multi-draw-indirect-count" to use this function. + """ + render_pass_encoder._multi_draw_indexed_indirect_count( + buffer, offset, count_buffer, count_buffer_offset, max_count + ) + + def create_statistics_query_set(device, *, label="", count: int, statistics): """ Create a query set that can collect the specified pipeline statistics. diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index 3c69e6ac..a6cc5579 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -20,7 +20,7 @@ * Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture * Validated 37 classes, 121 methods, 46 properties ### Patching API for backends/wgpu_native/_api.py -* Validated 37 classes, 119 methods, 0 properties +* Validated 37 classes, 121 methods, 0 properties ## Validating backends/wgpu_native/_api.py * Enum field FeatureName.texture-compression-bc-sliced-3d missing in wgpu.h * Enum field FeatureName.clip-distances missing in wgpu.h @@ -35,6 +35,6 @@ * Enum CanvasAlphaMode missing in wgpu.h * Enum CanvasToneMappingMode missing in wgpu.h * Wrote 236 enum mappings and 47 struct-field mappings to wgpu_native/_mappings.py -* Validated 140 C function calls -* Not using 65 C functions +* Validated 142 C function calls +* Not using 63 C functions * Validated 82 C structs