From ee2884147ab2145b4e250ce5397cbfdf4175eabf Mon Sep 17 00:00:00 2001 From: Frank Yellin Date: Fri, 20 Sep 2024 00:34:38 -0700 Subject: [PATCH 1/7] multi_draw_indirect_counter --- tests/test_wgpu_vertex_instance.py | 63 ++++++++++++++++++++++++++--- wgpu/backends/wgpu_native/_api.py | 26 ++++++++++++ wgpu/backends/wgpu_native/extras.py | 51 +++++++++++++++++++++-- wgpu/resources/codegen_report.md | 6 +-- 4 files changed, 133 insertions(+), 13 deletions(-) diff --git a/tests/test_wgpu_vertex_instance.py b/tests/test_wgpu_vertex_instance.py index ecda57dc..12807bcc 100644 --- a/tests/test_wgpu_vertex_instance.py +++ b/tests/test_wgpu_vertex_instance.py @@ -8,16 +8,18 @@ from wgpu.backends.wgpu_native.extras import ( multi_draw_indexed_indirect, multi_draw_indirect, + multi_draw_indirect_count, + multi_draw_indexed_indirect_count, ) -MAX_INFO = 100 +MAX_INFO = 1000 if not can_use_wgpu_lib: pytest.skip("Skipping tests that need the wgpu lib", allow_module_level=True) """ -The fundamental informartion about any of the many draw commands is the +The fundamental information about any of the many draw commands is the pair that is passed to the vertex shader. By using point-list topology, each call to the vertex shader turns into a single call to the fragment shader, where the pair is recorded. @@ -68,7 +70,7 @@ class Runner: REQUIRED_FEATURES = ["indirect-first-instance"] - OPTIONAL_FEATURES = ["multi-draw-indirect"] # we'll be adding more + OPTIONAL_FEATURES = ["multi-draw-indirect", "multi-draw-indirect-count"] @classmethod def is_usable(cls): @@ -82,6 +84,7 @@ def __init__(self): *[x for x in self.OPTIONAL_FEATURES if x in adapter.features], ] self.device = adapter.request_device(required_features=features) + self.output_texture = self.device.create_texture( # Actual size is immaterial. Could just be 1x1 size=[128, 128], @@ -163,11 +166,19 @@ def __init__(self): # We're going to want to try calling these draw functions from a buffer, and it # would be nice to test that these buffers have an offset self.draw_data_buffer = self.device.create_buffer_with_data( - data=np.uint32([0, 0, *self.draw_args1, *self.draw_args2]), + # The zeros at the beginning are to test "offset". + # The zeros at the end are because the _count methods require to buffer to + # be at least byte_offset + 16 * max_count bytes long + data=np.uint32([0, 0, *self.draw_args1, *self.draw_args2, *([0] * 50)]), usage="INDIRECT", ) self.draw_data_buffer_indexed = self.device.create_buffer_with_data( - data=np.uint32([0, 0, *self.draw_indexed_args1, *self.draw_indexed_args2]), + # The zeros at the beginning are to test "offset". + # The zeros at the end are because the _count methods require to buffer to + # be at least byte_offset + 20 * max_count bytes long + data=np.uint32( + [0, 0, *self.draw_indexed_args1, *self.draw_indexed_args2, *([0] * 50)] + ), usage="INDIRECT", ) @@ -211,7 +222,8 @@ def run_draw_test(self, draw_function, indexed, *, expected_result=None): expected_result = self.expected_result_draw_indexed else: expected_result = self.expected_result_draw - assert info_set == expected_result + if info_set != expected_result: + pytest.fail(f"Expected {sorted(info_set)}\nGot {sorted(expected_result)}") if not Runner.is_usable(): @@ -337,5 +349,44 @@ def draw(encoder): ) +@pytest.mark.parametrize("indexed", [False, True]) +@pytest.mark.parametrize("test_max_count", [False, True]) +def test_multi_draw_indirect_count(runner, test_max_count, indexed): + if "multi-draw-indirect-count" not in runner.device.features: + pytest.skip("Must have 'multi-draw-indirect-count' to run") + + print(f"{test_max_count=}, {indexed=} \n") + + count_buffer = runner.device.create_buffer_with_data( + data=(np.int32([10, 2])), usage="INDIRECT" + ) + if indexed: + function = multi_draw_indexed_indirect_count + buffer = runner.draw_data_buffer_indexed + else: + function = multi_draw_indirect_count + buffer = runner.draw_data_buffer + + if test_max_count: + # We pull a count of 10, but we're limiting it to 2 via max_count + count_buffer_offset, max_count = 0, 2 + else: + # We pull a count of 2, and set the max_count to something bigger. Buffer + # is required to be big enough to handle max_count. + count_buffer_offset, max_count = 4, 10 + + def draw(encoder): + function( + encoder, + buffer, + offset=8, + count_buffer=count_buffer, + count_buffer_offset=count_buffer_offset, + max_count=max_count, + ) + + runner.run_draw_test(draw, indexed) + + if __name__ == "__main__": run_tests(globals()) diff --git a/wgpu/backends/wgpu_native/_api.py b/wgpu/backends/wgpu_native/_api.py index 621dd54c..7e337e64 100644 --- a/wgpu/backends/wgpu_native/_api.py +++ b/wgpu/backends/wgpu_native/_api.py @@ -3031,6 +3031,32 @@ def _multi_draw_indexed_indirect(self, buffer, offset, count): self._internal, buffer._internal, int(offset), int(count) ) + def _multi_draw_indirect_count( + self, buffer, offset, count_buffer, count_buffer_offset, max_count + ): + # H: void f(WGPURenderPassEncoder encoder, WGPUBuffer buffer, uint64_t offset, WGPUBuffer count_buffer, uint64_t count_buffer_offset, uint32_t max_count) + libf.wgpuRenderPassEncoderMultiDrawIndirectCount( + self._internal, + buffer._internal, + int(offset), + count_buffer._internal, + int(count_buffer_offset), + int(max_count), + ) + + def _multi_draw_indexed_indirect_count( + self, buffer, offset, count_buffer, count_buffer_offset, max_count + ): + # H: void f(WGPURenderPassEncoder encoder, WGPUBuffer buffer, uint64_t offset, WGPUBuffer count_buffer, uint64_t count_buffer_offset, uint32_t max_count) + libf.wgpuRenderPassEncoderMultiDrawIndexedIndirectCount( + self._internal, + buffer._internal, + int(offset), + count_buffer._internal, + int(count_buffer_offset), + int(max_count), + ) + class GPURenderBundleEncoder( classes.GPURenderBundleEncoder, diff --git a/wgpu/backends/wgpu_native/extras.py b/wgpu/backends/wgpu_native/extras.py index e04196c9..ec9e8c1a 100644 --- a/wgpu/backends/wgpu_native/extras.py +++ b/wgpu/backends/wgpu_native/extras.py @@ -66,22 +66,65 @@ def set_push_constants( def multi_draw_indirect(render_pass_encoder, buffer, *, offset=0, count): """ - This is equvalent to + This is equivalent to for i in range(count): render_pass_encoder.draw(buffer, offset + i * 16) - You must enable the featue "multi-draw-indirect" to use this function. + You must enable the feature "multi-draw-indirect" to use this function. """ render_pass_encoder._multi_draw_indirect(buffer, offset, count) def multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count): """ - This is equvalent to + This is equivalent to for i in range(count): render_pass_encoder.draw_indexed(buffer, offset + i * 20) - You must enable the featue "multi-draw-indirect" to use this function. + You must enable the feature "multi-draw-indirect" to use this function. """ render_pass_encoder._multi_draw_indexed_indirect(buffer, offset, count) + + +def multi_draw_indirect_count( + render_pass_encoder, + buffer, + *, + offset=0, + count_buffer, + count_buffer_offset=0, + max_count, +): + """ + This is equivalent to + for i in range(count): + render_pass_encoder.draw(buffer, offset + i * 16) + + You must enable the feature "multi-draw-indirect-count" to use this function. + """ + render_pass_encoder._multi_draw_indirect_count( + buffer, offset, count_buffer, count_buffer_offset, max_count + ) + + +def multi_draw_indexed_indirect_count( + render_pass_encoder, + buffer, + *, + offset=0, + count_buffer, + count_buffer_offset=0, + max_count, +): + """ + This is equivalent to + + for i in range(count): + render_pass_encoder.draw_indexed(buffer, offset + i * 20) + + You must enable the feature "multi-draw-indirect-count" to use this function. + """ + render_pass_encoder._multi_draw_indexed_indirect_count( + buffer, offset, count_buffer, count_buffer_offset, max_count + ) diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index 4f9a16d2..91c19da9 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -20,7 +20,7 @@ * Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture * Validated 37 classes, 112 methods, 45 properties ### Patching API for backends/wgpu_native/_api.py -* Validated 37 classes, 100 methods, 0 properties +* Validated 37 classes, 102 methods, 0 properties ## Validating backends/wgpu_native/_api.py * Enum field FeatureName.texture-compression-bc-sliced-3d missing in wgpu.h * Enum field FeatureName.clip-distances missing in wgpu.h @@ -35,6 +35,6 @@ * Enum CanvasAlphaMode missing in wgpu.h * Enum CanvasToneMappingMode missing in wgpu.h * Wrote 236 enum mappings and 47 struct-field mappings to wgpu_native/_mappings.py -* Validated 133 C function calls -* Not using 70 C functions +* Validated 135 C function calls +* Not using 68 C functions * Validated 81 C structs From 53ccceaa47cb4ea78b5c3d790c2c9ce4c05a4879 Mon Sep 17 00:00:00 2001 From: Frank Yellin Date: Fri, 20 Sep 2024 09:41:13 -0700 Subject: [PATCH 2/7] Add documentation. --- docs/backends.rst | 53 ++++++++++++++++++++++++++--- wgpu/backends/wgpu_native/extras.py | 7 ++-- 2 files changed, 54 insertions(+), 6 deletions(-) diff --git a/docs/backends.rst b/docs/backends.rst index 8b985140..dbee99f0 100644 --- a/docs/backends.rst +++ b/docs/backends.rst @@ -159,12 +159,14 @@ bytes you wish to change. :param data_offset: The starting offset in the data at which to begin copying. -There are two functions that allow you to perform multiple draw calls at once. -Both require that you enable the feature "multi-draw-indirect". +There are four functions that allow you to perform multiple draw calls at once. +Two take the number of draws to perform as an argument; two have this value in a buffer. Typically, these calls do not reduce work or increase parallelism on the GPU. Rather they reduce driver overhead on the CPU. +The first two require that you enable the feature ``"multi-draw-indirect"``. + .. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect(render_pass_encoder, buffer, *, offset=0, count): Equivalent to:: @@ -172,8 +174,10 @@ they reduce driver overhead on the CPU. render_pass_encoder.draw_indirect(buffer, offset + i * 16) :param render_pass_encoder: The current render pass encoder. - :param buffer: The indirect buffer containing the arguments. + :param buffer: The indirect buffer containing the arguments. Must have length + at least offset + 16 * count. :param offset: The byte offset in the indirect buffer containing the first argument. + Must be a multiple of 4. :param count: The number of draw operations to perform. .. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count): @@ -184,10 +188,51 @@ they reduce driver overhead on the CPU. :param render_pass_encoder: The current render pass encoder. - :param buffer: The indirect buffer containing the arguments. + :param buffer: The indirect buffer containing the arguments. Must have length + at least offset + 20 * count. :param offset: The byte offset in the indirect buffer containing the first argument. + Must be a multiple of 4. :param count: The number of draw operations to perform. +The second two require that you enable the feature ``"multi-draw-indirect-count"``. +They are identical to the previous two, except that the ``count`` argument is replaced by +three arguments. The value at ``count_buffer_offset`` in ``count_buffer`` is treated as +an unsigned 32-bit integer. The ``count`` is the minimum of this value and ``max_count``. + +.. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count): + + Equivalent to:: + count = min(, max_count) + for i in range(count): + render_pass_encoder.draw_indirect(buffer, offset + i * 16) + + :param render_pass_encoder: The current render pass encoder. + :param buffer: The indirect buffer containing the arguments. Must have length + at least offset + 16 * max_count. + :param offset: The byte offset in the indirect buffer containing the first argument. + Must be a multiple of 4. + :param count_buffer: The indirect buffer containing the count. + :param count_buffer_offset: The offset into count_buffer. + Must be a multiple of 4. + :param max_count: The maximum number of draw operations to perform. + +.. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count): + + Equivalent to:: + count = min(, max_count) + for i in range(count): + render_pass_encoder.draw_indexed_indirect(buffer, offset + i * 2-) + + :param render_pass_encoder: The current render pass encoder. + :param buffer: The indirect buffer containing the arguments. Must have length + at least offset + 20 * max_count. + :param offset: The byte offset in the indirect buffer containing the first argument. + Must be a multiple of 4. + :param count_buffer: The indirect buffer containing the count. + :param count_buffer_offset: The offset into count_buffer. + Must be a multiple of 4. + :param max_count: The maximum number of draw operations to perform. + The js_webgpu backend --------------------- diff --git a/wgpu/backends/wgpu_native/extras.py b/wgpu/backends/wgpu_native/extras.py index ec9e8c1a..8083c9b0 100644 --- a/wgpu/backends/wgpu_native/extras.py +++ b/wgpu/backends/wgpu_native/extras.py @@ -97,7 +97,9 @@ def multi_draw_indirect_count( max_count, ): """ - This is equivalent to + This is equivalent to: + + count = min(, max_count) for i in range(count): render_pass_encoder.draw(buffer, offset + i * 16) @@ -118,8 +120,9 @@ def multi_draw_indexed_indirect_count( max_count, ): """ - This is equivalent to + This is equivalent to: + count = min(, max_count) for i in range(count): render_pass_encoder.draw_indexed(buffer, offset + i * 20) From 021241edf8cb4b17a33f75dd624381fd21bb698a Mon Sep 17 00:00:00 2001 From: Frank Yellin Date: Fri, 20 Sep 2024 10:35:39 -0700 Subject: [PATCH 3/7] Bring back the bug. I accidentally indented a line a code and it ended up inside a function definition. --- tests/test_wgpu_vertex_instance.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_wgpu_vertex_instance.py b/tests/test_wgpu_vertex_instance.py index 12807bcc..1371fa9a 100644 --- a/tests/test_wgpu_vertex_instance.py +++ b/tests/test_wgpu_vertex_instance.py @@ -385,7 +385,7 @@ def draw(encoder): max_count=max_count, ) - runner.run_draw_test(draw, indexed) + runner.run_draw_test(draw, indexed) if __name__ == "__main__": From 2bd540862a36f741b1a4c719356e25265344230d Mon Sep 17 00:00:00 2001 From: Frank Yellin Date: Fri, 20 Sep 2024 13:11:07 -0700 Subject: [PATCH 4/7] Update to show everything reported by bug report. Note that "indexed" works when we do the bug_patch, but not otherwise. --- tests/test_wgpu_vertex_instance.py | 46 +++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 10 deletions(-) diff --git a/tests/test_wgpu_vertex_instance.py b/tests/test_wgpu_vertex_instance.py index 1371fa9a..6527ee10 100644 --- a/tests/test_wgpu_vertex_instance.py +++ b/tests/test_wgpu_vertex_instance.py @@ -170,7 +170,7 @@ def __init__(self): # The zeros at the end are because the _count methods require to buffer to # be at least byte_offset + 16 * max_count bytes long data=np.uint32([0, 0, *self.draw_args1, *self.draw_args2, *([0] * 50)]), - usage="INDIRECT", + usage="INDIRECT", # copy dst for patching ) self.draw_data_buffer_indexed = self.device.create_buffer_with_data( # The zeros at the beginning are to test "offset". @@ -182,6 +182,26 @@ def __init__(self): usage="INDIRECT", ) + self.count_buffer = self.device.create_buffer_with_data( + data=(np.int32([10, 2])), usage="INDIRECT" + ) + self.draw_data_buffer_patched = self.device.create_buffer_with_data( + # The zeros at the beginning are to test the "offset". + # The zeros at the end are because the _count methods require to buffer to + # be at least byte_offset + 16 * max_count bytes long + data=np.uint32([10, 2, *self.draw_args1, *self.draw_args2, *([0] * 50)]), + usage="INDIRECT", # copy dst for patching + ) + self.draw_data_buffer_indexed_patched = self.device.create_buffer_with_data( + # The zeros at the beginning are to test "offset". + # The zeros at the end are because the _count methods require to buffer to + # be at least byte_offset + 20 * max_count bytes long + data=np.uint32( + [10, 2, *self.draw_indexed_args1, *self.draw_indexed_args2, *([0] * 50)] + ), + usage="INDIRECT", + ) + # And let's not forget our index buffer. self.index_buffer = self.device.create_buffer_with_data( data=(np.uint32(indices)), usage="INDEX" @@ -223,7 +243,7 @@ def run_draw_test(self, draw_function, indexed, *, expected_result=None): else: expected_result = self.expected_result_draw if info_set != expected_result: - pytest.fail(f"Expected {sorted(info_set)}\nGot {sorted(expected_result)}") + pytest.fail(f"Expected {sorted(expected_result)}\nGot {sorted(info_set)}") if not Runner.is_usable(): @@ -349,24 +369,30 @@ def draw(encoder): ) +@pytest.mark.parametrize("bug_patch", [False, True]) @pytest.mark.parametrize("indexed", [False, True]) @pytest.mark.parametrize("test_max_count", [False, True]) -def test_multi_draw_indirect_count(runner, test_max_count, indexed): +def test_multi_draw_indirect_count(runner, test_max_count, indexed, bug_patch): if "multi-draw-indirect-count" not in runner.device.features: pytest.skip("Must have 'multi-draw-indirect-count' to run") - print(f"{test_max_count=}, {indexed=} \n") + print(f"{bug_patch=}, {indexed=}, {test_max_count=} \n") - count_buffer = runner.device.create_buffer_with_data( - data=(np.int32([10, 2])), usage="INDIRECT" - ) if indexed: function = multi_draw_indexed_indirect_count - buffer = runner.draw_data_buffer_indexed + if not bug_patch: + buffer = runner.draw_data_buffer_indexed + else: + buffer = runner.draw_data_buffer_indexed_patched else: function = multi_draw_indirect_count - buffer = runner.draw_data_buffer + if not bug_patch: + buffer = runner.draw_data_buffer + else: + buffer = runner.draw_data_buffer_patched + # Either way, we're going to do 2 draws. But one via the max_count and one via the + # information in the buffer. if test_max_count: # We pull a count of 10, but we're limiting it to 2 via max_count count_buffer_offset, max_count = 0, 2 @@ -380,7 +406,7 @@ def draw(encoder): encoder, buffer, offset=8, - count_buffer=count_buffer, + count_buffer=runner.count_buffer, count_buffer_offset=count_buffer_offset, max_count=max_count, ) From b6565e77b1043482cb915e153bf5ec9222b3594f Mon Sep 17 00:00:00 2001 From: Frank Yellin Date: Tue, 24 Sep 2024 10:33:40 -0700 Subject: [PATCH 5/7] Add blank line where needed. --- docs/backends.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/backends.rst b/docs/backends.rst index dbee99f0..4d45eac5 100644 --- a/docs/backends.rst +++ b/docs/backends.rst @@ -183,6 +183,7 @@ The first two require that you enable the feature ``"multi-draw-indirect"``. .. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count): Equivalent to:: + for i in range(count): render_pass_encoder.draw_indexed_indirect(buffer, offset + i * 2-) @@ -202,6 +203,7 @@ an unsigned 32-bit integer. The ``count`` is the minimum of this value and ``max .. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count): Equivalent to:: + count = min(, max_count) for i in range(count): render_pass_encoder.draw_indirect(buffer, offset + i * 16) From f5f0b60c16d6c7e110270ca6a56d2dbd7b5d2b40 Mon Sep 17 00:00:00 2001 From: fyellin Date: Tue, 24 Sep 2024 23:11:02 -0700 Subject: [PATCH 6/7] Update backends.rst Add missing newline --- docs/backends.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/backends.rst b/docs/backends.rst index 4d45eac5..4e7d7ed1 100644 --- a/docs/backends.rst +++ b/docs/backends.rst @@ -221,6 +221,7 @@ an unsigned 32-bit integer. The ``count`` is the minimum of this value and ``max .. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count): Equivalent to:: + count = min(, max_count) for i in range(count): render_pass_encoder.draw_indexed_indirect(buffer, offset + i * 2-) From 719d32a41a928c12013b93052864315f0d41da68 Mon Sep 17 00:00:00 2001 From: Frank Yellin Date: Thu, 7 Nov 2024 17:09:23 -0800 Subject: [PATCH 7/7] Fix codegen --- wgpu/resources/codegen_report.md | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/wgpu/resources/codegen_report.md b/wgpu/resources/codegen_report.md index 30cb560d..a6cc5579 100644 --- a/wgpu/resources/codegen_report.md +++ b/wgpu/resources/codegen_report.md @@ -20,11 +20,7 @@ * Diffs for GPUQueue: add read_buffer, add read_texture, hide copy_external_image_to_texture * Validated 37 classes, 121 methods, 46 properties ### Patching API for backends/wgpu_native/_api.py -<<<<<<< HEAD -* Validated 37 classes, 122 methods, 0 properties -======= -* Validated 37 classes, 119 methods, 0 properties ->>>>>>> main +* Validated 37 classes, 121 methods, 0 properties ## Validating backends/wgpu_native/_api.py * Enum field FeatureName.texture-compression-bc-sliced-3d missing in wgpu.h * Enum field FeatureName.clip-distances missing in wgpu.h