From 42ffd8d57a05913b98eda9eae8f421ab48e2c641 Mon Sep 17 00:00:00 2001 From: Robert Swain Date: Mon, 28 Aug 2023 22:36:14 +0200 Subject: [PATCH] Implement batching for 2D and 3D meshes --- crates/bevy_core_pipeline/Cargo.toml | 1 + crates/bevy_core_pipeline/src/core_2d/mod.rs | 25 +- crates/bevy_core_pipeline/src/core_3d/mod.rs | 69 +++- crates/bevy_core_pipeline/src/prepass/mod.rs | 47 ++- crates/bevy_gizmos/src/pipeline_2d.rs | 3 +- crates/bevy_gizmos/src/pipeline_3d.rs | 3 +- crates/bevy_pbr/Cargo.toml | 1 + crates/bevy_pbr/src/material.rs | 69 +++- crates/bevy_pbr/src/prepass/mod.rs | 4 + crates/bevy_pbr/src/render/light.rs | 27 +- crates/bevy_pbr/src/render/mesh.rs | 244 ++++++++++---- crates/bevy_pbr/src/wireframe.rs | 3 +- crates/bevy_render/Cargo.toml | 1 + crates/bevy_render/src/render_phase/mod.rs | 35 +- .../render_resource/batched_uniform_buffer.rs | 3 +- .../src/render_resource/gpu_array_buffer.rs | 15 +- crates/bevy_sprite/Cargo.toml | 1 + crates/bevy_sprite/src/mesh2d/material.rs | 71 +++- crates/bevy_sprite/src/mesh2d/mesh.rs | 311 +++++++++++++++--- crates/bevy_sprite/src/mesh2d/mesh2d.wgsl | 8 +- .../src/mesh2d/mesh2d_bindings.wgsl | 9 +- .../src/mesh2d/mesh2d_functions.wgsl | 34 +- .../bevy_sprite/src/mesh2d/mesh2d_types.wgsl | 12 +- crates/bevy_sprite/src/render/mod.rs | 14 +- crates/bevy_ui/Cargo.toml | 1 + crates/bevy_ui/src/render/mod.rs | 8 +- crates/bevy_ui/src/render/render_pass.rs | 25 +- examples/shader/texture_binding_array.rs | 1 + 28 files changed, 851 insertions(+), 194 deletions(-) diff --git a/crates/bevy_core_pipeline/Cargo.toml b/crates/bevy_core_pipeline/Cargo.toml index 27825880d3473f..e15cdf85775ecb 100644 --- a/crates/bevy_core_pipeline/Cargo.toml +++ b/crates/bevy_core_pipeline/Cargo.toml @@ -33,3 +33,4 @@ bevy_utils = { path = "../bevy_utils", version = "0.12.0-dev" } serde = { version = "1", features = ["derive"] } bitflags = "2.3" radsort = "0.1" +nonmax = "0.5.3" diff --git a/crates/bevy_core_pipeline/src/core_2d/mod.rs b/crates/bevy_core_pipeline/src/core_2d/mod.rs index 49f4260b203fc7..064775345e075d 100644 --- a/crates/bevy_core_pipeline/src/core_2d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_2d/mod.rs @@ -19,6 +19,8 @@ pub mod graph { } pub const CORE_2D: &str = graph::NAME; +use std::ops::Range; + pub use camera_2d::*; pub use main_pass_2d_node::*; @@ -36,6 +38,7 @@ use bevy_render::{ Extract, ExtractSchedule, Render, RenderApp, RenderSet, }; use bevy_utils::FloatOrd; +use nonmax::NonMaxU32; use crate::{tonemapping::TonemappingNode, upscaling::UpscalingNode}; @@ -83,7 +86,8 @@ pub struct Transparent2d { pub entity: Entity, pub pipeline: CachedRenderPipelineId, pub draw_function: DrawFunctionId, - pub batch_size: usize, + pub batch_range: Range, + pub dynamic_offset: Option, } impl PhaseItem for Transparent2d { @@ -110,8 +114,23 @@ impl PhaseItem for Transparent2d { } #[inline] - fn batch_size(&self) -> usize { - self.batch_size + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn dynamic_offset(&self) -> Option { + self.dynamic_offset + } + + #[inline] + fn dynamic_offset_mut(&mut self) -> &mut Option { + &mut self.dynamic_offset } } diff --git a/crates/bevy_core_pipeline/src/core_3d/mod.rs b/crates/bevy_core_pipeline/src/core_3d/mod.rs index f415751cde7192..8cd4245afd3f62 100644 --- a/crates/bevy_core_pipeline/src/core_3d/mod.rs +++ b/crates/bevy_core_pipeline/src/core_3d/mod.rs @@ -24,7 +24,7 @@ pub mod graph { } pub const CORE_3D: &str = graph::NAME; -use std::cmp::Reverse; +use std::{cmp::Reverse, ops::Range}; pub use camera_3d::*; pub use main_opaque_pass_3d_node::*; @@ -51,6 +51,7 @@ use bevy_render::{ Extract, ExtractSchedule, Render, RenderApp, RenderSet, }; use bevy_utils::{FloatOrd, HashMap}; +use nonmax::NonMaxU32; use crate::{ prepass::{ @@ -135,7 +136,8 @@ pub struct Opaque3d { pub pipeline: CachedRenderPipelineId, pub entity: Entity, pub draw_function: DrawFunctionId, - pub batch_size: usize, + pub batch_range: Range, + pub dynamic_offset: Option, } impl PhaseItem for Opaque3d { @@ -164,8 +166,23 @@ impl PhaseItem for Opaque3d { } #[inline] - fn batch_size(&self) -> usize { - self.batch_size + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn dynamic_offset(&self) -> Option { + self.dynamic_offset + } + + #[inline] + fn dynamic_offset_mut(&mut self) -> &mut Option { + &mut self.dynamic_offset } } @@ -181,7 +198,8 @@ pub struct AlphaMask3d { pub pipeline: CachedRenderPipelineId, pub entity: Entity, pub draw_function: DrawFunctionId, - pub batch_size: usize, + pub batch_range: Range, + pub dynamic_offset: Option, } impl PhaseItem for AlphaMask3d { @@ -210,8 +228,23 @@ impl PhaseItem for AlphaMask3d { } #[inline] - fn batch_size(&self) -> usize { - self.batch_size + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn dynamic_offset(&self) -> Option { + self.dynamic_offset + } + + #[inline] + fn dynamic_offset_mut(&mut self) -> &mut Option { + &mut self.dynamic_offset } } @@ -227,7 +260,8 @@ pub struct Transparent3d { pub pipeline: CachedRenderPipelineId, pub entity: Entity, pub draw_function: DrawFunctionId, - pub batch_size: usize, + pub batch_range: Range, + pub dynamic_offset: Option, } impl PhaseItem for Transparent3d { @@ -255,8 +289,23 @@ impl PhaseItem for Transparent3d { } #[inline] - fn batch_size(&self) -> usize { - self.batch_size + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn dynamic_offset(&self) -> Option { + self.dynamic_offset + } + + #[inline] + fn dynamic_offset_mut(&mut self) -> &mut Option { + &mut self.dynamic_offset } } diff --git a/crates/bevy_core_pipeline/src/prepass/mod.rs b/crates/bevy_core_pipeline/src/prepass/mod.rs index 38c71050a194b5..7e484547fbd439 100644 --- a/crates/bevy_core_pipeline/src/prepass/mod.rs +++ b/crates/bevy_core_pipeline/src/prepass/mod.rs @@ -27,7 +27,7 @@ pub mod node; -use std::cmp::Reverse; +use std::{cmp::Reverse, ops::Range}; use bevy_ecs::prelude::*; use bevy_reflect::Reflect; @@ -37,6 +37,7 @@ use bevy_render::{ texture::CachedTexture, }; use bevy_utils::FloatOrd; +use nonmax::NonMaxU32; pub const DEPTH_PREPASS_FORMAT: TextureFormat = TextureFormat::Depth32Float; pub const NORMAL_PREPASS_FORMAT: TextureFormat = TextureFormat::Rgb10a2Unorm; @@ -83,6 +84,8 @@ pub struct Opaque3dPrepass { pub entity: Entity, pub pipeline_id: CachedRenderPipelineId, pub draw_function: DrawFunctionId, + pub batch_range: Range, + pub dynamic_offset: Option, } impl PhaseItem for Opaque3dPrepass { @@ -109,6 +112,26 @@ impl PhaseItem for Opaque3dPrepass { // Key negated to match reversed SortKey ordering radsort::sort_by_key(items, |item| -item.distance); } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn dynamic_offset(&self) -> Option { + self.dynamic_offset + } + + #[inline] + fn dynamic_offset_mut(&mut self) -> &mut Option { + &mut self.dynamic_offset + } } impl CachedRenderPipelinePhaseItem for Opaque3dPrepass { @@ -128,6 +151,8 @@ pub struct AlphaMask3dPrepass { pub entity: Entity, pub pipeline_id: CachedRenderPipelineId, pub draw_function: DrawFunctionId, + pub batch_range: Range, + pub dynamic_offset: Option, } impl PhaseItem for AlphaMask3dPrepass { @@ -154,6 +179,26 @@ impl PhaseItem for AlphaMask3dPrepass { // Key negated to match reversed SortKey ordering radsort::sort_by_key(items, |item| -item.distance); } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn dynamic_offset(&self) -> Option { + self.dynamic_offset + } + + #[inline] + fn dynamic_offset_mut(&mut self) -> &mut Option { + &mut self.dynamic_offset + } } impl CachedRenderPipelinePhaseItem for AlphaMask3dPrepass { diff --git a/crates/bevy_gizmos/src/pipeline_2d.rs b/crates/bevy_gizmos/src/pipeline_2d.rs index fa345f2bf05e15..e5472ff61a4dde 100644 --- a/crates/bevy_gizmos/src/pipeline_2d.rs +++ b/crates/bevy_gizmos/src/pipeline_2d.rs @@ -178,7 +178,8 @@ fn queue_line_gizmos_2d( draw_function, pipeline, sort_key: FloatOrd(f32::INFINITY), - batch_size: 1, + batch_range: 0..1, + dynamic_offset: None, }); } } diff --git a/crates/bevy_gizmos/src/pipeline_3d.rs b/crates/bevy_gizmos/src/pipeline_3d.rs index 33712fa0205577..c15a1e404256a6 100644 --- a/crates/bevy_gizmos/src/pipeline_3d.rs +++ b/crates/bevy_gizmos/src/pipeline_3d.rs @@ -192,7 +192,8 @@ fn queue_line_gizmos_3d( draw_function, pipeline, distance: 0., - batch_size: 1, + batch_range: 0..1, + dynamic_offset: None, }); } } diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml index 5ff50b66d6644b..371cb5c4a87bf7 100644 --- a/crates/bevy_pbr/Cargo.toml +++ b/crates/bevy_pbr/Cargo.toml @@ -33,3 +33,4 @@ bytemuck = { version = "1", features = ["derive"] } naga_oil = "0.8" radsort = "0.1" smallvec = "1.6" +nonmax = "0.5.3" diff --git a/crates/bevy_pbr/src/material.rs b/crates/bevy_pbr/src/material.rs index 68361938dec167..545609cd6c3c6b 100644 --- a/crates/bevy_pbr/src/material.rs +++ b/crates/bevy_pbr/src/material.rs @@ -21,7 +21,6 @@ use bevy_ecs::{ }; use bevy_reflect::{TypePath, TypeUuid}; use bevy_render::{ - extract_component::ExtractComponentPlugin, mesh::{Mesh, MeshVertexBufferLayout}, prelude::Image, render_asset::{prepare_assets, RenderAssets}, @@ -30,13 +29,13 @@ use bevy_render::{ RenderPhase, SetItemPipeline, TrackedRenderPass, }, render_resource::{ - AsBindGroup, AsBindGroupError, BindGroup, BindGroupLayout, OwnedBindingResource, - PipelineCache, RenderPipelineDescriptor, Shader, ShaderRef, SpecializedMeshPipeline, - SpecializedMeshPipelineError, SpecializedMeshPipelines, + AsBindGroup, AsBindGroupError, BindGroup, BindGroupId, BindGroupLayout, + OwnedBindingResource, PipelineCache, RenderPipelineDescriptor, Shader, ShaderRef, + SpecializedMeshPipeline, SpecializedMeshPipelineError, SpecializedMeshPipelines, }, renderer::RenderDevice, texture::FallbackImage, - view::{ExtractedView, Msaa, VisibleEntities}, + view::{ExtractedView, Msaa, ViewVisibility, VisibleEntities}, Extract, ExtractSchedule, Render, RenderApp, RenderSet, }; use bevy_utils::{tracing::error, HashMap, HashSet}; @@ -187,8 +186,7 @@ where M::Data: PartialEq + Eq + Hash + Clone, { fn build(&self, app: &mut App) { - app.add_asset::() - .add_plugins(ExtractComponentPlugin::>::extract_visible()); + app.add_asset::(); if let Ok(render_app) = app.get_sub_app_mut(RenderApp) { render_app @@ -200,7 +198,10 @@ where .init_resource::>() .init_resource::>() .init_resource::>>() - .add_systems(ExtractSchedule, extract_materials::) + .add_systems( + ExtractSchedule, + (extract_materials::, extract_material_meshes::), + ) .add_systems( Render, ( @@ -232,6 +233,27 @@ where } } +fn extract_material_meshes( + mut commands: Commands, + mut previous_len: Local, + query: Extract)>>, +) { + let mut values = Vec::with_capacity(*previous_len); + for (entity, view_visibility, material) in &query { + if view_visibility.get() { + // NOTE: MaterialBindGroupId is inserted here to avoid a table move. Upcoming changes + // to use SparseSet for render world entity storage will do this automatically. + values.push(( + entity, + (material.clone_weak(), MaterialBindGroupId::default()), + )); + } + } + *previous_len = values.len(); + // FIXME: Entities still have to be spawned because phases assume entities exist + commands.insert_or_spawn_batch(values); +} + /// A key uniquely identifying a specialized [`MaterialPipeline`]. pub struct MaterialPipelineKey { pub mesh_key: MeshPipelineKey, @@ -383,7 +405,12 @@ pub fn queue_material_meshes( msaa: Res, render_meshes: Res>, render_materials: Res>, - material_meshes: Query<(&Handle, &Handle, &MeshTransforms)>, + mut material_meshes: Query<( + &Handle, + &mut MaterialBindGroupId, + &Handle, + &MeshTransforms, + )>, images: Res>, mut views: Query<( &ExtractedView, @@ -467,8 +494,8 @@ pub fn queue_material_meshes( let rangefinder = view.rangefinder3d(); for visible_entity in &visible_entities.entities { - if let Ok((material_handle, mesh_handle, mesh_transforms)) = - material_meshes.get(*visible_entity) + if let Ok((material_handle, mut material_binding_meta, mesh_handle, mesh_transforms)) = + material_meshes.get_mut(*visible_entity) { if let (Some(mesh), Some(material)) = ( render_meshes.get(mesh_handle), @@ -515,6 +542,8 @@ pub fn queue_material_meshes( } }; + *material_binding_meta = material.get_binding_meta(); + let distance = rangefinder .distance_translation(&mesh_transforms.transform.translation) + material.properties.depth_bias; @@ -525,7 +554,8 @@ pub fn queue_material_meshes( draw_function: draw_opaque_pbr, pipeline: pipeline_id, distance, - batch_size: 1, + batch_range: 0..1, + dynamic_offset: None, }); } AlphaMode::Mask(_) => { @@ -534,7 +564,8 @@ pub fn queue_material_meshes( draw_function: draw_alpha_mask_pbr, pipeline: pipeline_id, distance, - batch_size: 1, + batch_range: 0..1, + dynamic_offset: None, }); } AlphaMode::Blend @@ -546,7 +577,8 @@ pub fn queue_material_meshes( draw_function: draw_transparent_pbr, pipeline: pipeline_id, distance, - batch_size: 1, + batch_range: 0..1, + dynamic_offset: None, }); } } @@ -574,6 +606,15 @@ pub struct PreparedMaterial { pub properties: MaterialProperties, } +#[derive(Component, Default, PartialEq, Eq, Deref, DerefMut)] +pub struct MaterialBindGroupId(Option); + +impl PreparedMaterial { + pub fn get_binding_meta(&self) -> MaterialBindGroupId { + MaterialBindGroupId(Some(self.bind_group.id())) + } +} + #[derive(Resource)] pub struct ExtractedMaterials { extracted: Vec<(Handle, M)>, diff --git a/crates/bevy_pbr/src/prepass/mod.rs b/crates/bevy_pbr/src/prepass/mod.rs index 21ea58e57f8f65..a5065df88417b9 100644 --- a/crates/bevy_pbr/src/prepass/mod.rs +++ b/crates/bevy_pbr/src/prepass/mod.rs @@ -852,6 +852,8 @@ pub fn queue_prepass_material_meshes( draw_function: opaque_draw_prepass, pipeline_id, distance, + batch_range: 0..1, + dynamic_offset: None, }); } AlphaMode::Mask(_) => { @@ -860,6 +862,8 @@ pub fn queue_prepass_material_meshes( draw_function: alpha_mask_draw_prepass, pipeline_id, distance, + batch_range: 0..1, + dynamic_offset: None, }); } AlphaMode::Blend diff --git a/crates/bevy_pbr/src/render/light.rs b/crates/bevy_pbr/src/render/light.rs index e18e9d4acae4ae..ddfba8710132d0 100644 --- a/crates/bevy_pbr/src/render/light.rs +++ b/crates/bevy_pbr/src/render/light.rs @@ -30,7 +30,8 @@ use bevy_utils::{ tracing::{error, warn}, HashMap, }; -use std::{hash::Hash, num::NonZeroU64}; +use nonmax::NonMaxU32; +use std::{hash::Hash, num::NonZeroU64, ops::Range}; #[derive(Component)] pub struct ExtractedPointLight { @@ -1641,6 +1642,8 @@ pub fn queue_shadows( pipeline: pipeline_id, entity, distance: 0.0, // TODO: sort front-to-back + batch_range: 0..1, + dynamic_offset: None, }); } } @@ -1654,6 +1657,8 @@ pub struct Shadow { pub entity: Entity, pub pipeline: CachedRenderPipelineId, pub draw_function: DrawFunctionId, + pub batch_range: Range, + pub dynamic_offset: Option, } impl PhaseItem for Shadow { @@ -1681,6 +1686,26 @@ impl PhaseItem for Shadow { // better than rebinding everything at a high rate. radsort::sort_by_key(items, |item| item.sort_key()); } + + #[inline] + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn dynamic_offset(&self) -> Option { + self.dynamic_offset + } + + #[inline] + fn dynamic_offset_mut(&mut self) -> &mut Option { + &mut self.dynamic_offset + } } impl CachedRenderPipelinePhaseItem for Shadow { diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index e9abd146098514..e10a4a1dab164b 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -1,15 +1,15 @@ use crate::{ environment_map, prepass, EnvironmentMapLight, FogMeta, GlobalLightMeta, GpuFog, GpuLights, - GpuPointLights, LightMeta, NotShadowCaster, NotShadowReceiver, PreviousGlobalTransform, - ScreenSpaceAmbientOcclusionTextures, Shadow, ShadowSamplers, ViewClusterBindings, - ViewFogUniformOffset, ViewLightsUniformOffset, ViewShadowBindings, + GpuPointLights, LightMeta, MaterialBindGroupId, NotShadowCaster, NotShadowReceiver, + PreviousGlobalTransform, ScreenSpaceAmbientOcclusionTextures, Shadow, ShadowSamplers, + ViewClusterBindings, ViewFogUniformOffset, ViewLightsUniformOffset, ViewShadowBindings, CLUSTERED_FORWARD_STORAGE_BUFFER_COUNT, MAX_CASCADES_PER_LIGHT, MAX_DIRECTIONAL_LIGHTS, }; use bevy_app::Plugin; use bevy_asset::{load_internal_asset, Assets, Handle, HandleId, HandleUntyped}; use bevy_core_pipeline::{ core_3d::{AlphaMask3d, Opaque3d, Transparent3d}, - prepass::ViewPrepassTextures, + prepass::{AlphaMask3dPrepass, Opaque3dPrepass, ViewPrepassTextures}, tonemapping::{ get_lut_bind_group_layout_entries, get_lut_bindings, Tonemapping, TonemappingLuts, }, @@ -30,7 +30,10 @@ use bevy_render::{ }, prelude::Msaa, render_asset::RenderAssets, - render_phase::{PhaseItem, RenderCommand, RenderCommandResult, RenderPhase, TrackedRenderPass}, + render_phase::{ + CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, RenderCommand, + RenderCommandResult, RenderPhase, TrackedRenderPass, + }, render_resource::*, renderer::{RenderDevice, RenderQueue}, texture::{ @@ -42,7 +45,7 @@ use bevy_render::{ }; use bevy_transform::components::GlobalTransform; use bevy_utils::{tracing::error, HashMap, Hashed}; -use fixedbitset::FixedBitSet; +use nonmax::NonMaxU32; use crate::render::{ morph::{extract_morphs, prepare_morphs, MorphIndex, MorphUniform}, @@ -384,60 +387,180 @@ pub fn extract_skinned_meshes( commands.insert_or_spawn_batch(values); } +/// Data necessary to be equal for two draw commands to be mergeable +/// +/// This is based on the following assumptions: +/// - Only entities with prepared assets (pipelines, materials, meshes) are +/// queued to phases +/// - View bindings are constant across a phase for a given draw function as +/// phases are per-view +/// - `prepare_mesh_uniforms` is the only system that performs this batching +/// and has sole responsibility for preparing the per-object data. As such +/// the mesh binding and dynamic offsets are assumed to only be variable as a +/// result of the `prepare_mesh_uniforms` system, e.g. due to having to split +/// data across separate uniform bindings within the same buffer due to the +/// maximum uniform buffer binding size. +#[derive(Default, PartialEq, Eq)] +struct BatchMeta<'mat, 'mesh> { + /// The pipeline id encompasses all pipeline configuration including vertex + /// buffers and layouts, shaders and their specializations, bind group + /// layouts, etc. + pipeline_id: Option, + /// The draw function id defines the RenderCommands that are called to + /// set the pipeline and bindings, and make the draw command + draw_function_id: Option, + /// The material binding meta includes the material bind group id and + /// dynamic offsets. + material_binding_meta: Option<&'mat MaterialBindGroupId>, + mesh_handle: Option<&'mesh Handle>, + dynamic_offset: Option, +} + +impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> { + #[inline] + fn matches(&self, other: &BatchMeta<'mat, 'mesh>, consider_material: bool) -> bool { + self.pipeline_id == other.pipeline_id + && self.draw_function_id == other.draw_function_id + && self.mesh_handle == other.mesh_handle + && self.dynamic_offset == other.dynamic_offset + && (!consider_material || self.material_binding_meta == other.material_binding_meta) + } +} + +#[derive(Default)] +struct BatchState<'mat, 'mesh> { + meta: BatchMeta<'mat, 'mesh>, + /// The base index in the object data binding's array + gpu_array_buffer_index: GpuArrayBufferIndex, + /// The number of entities in the batch + count: u32, + item_index: usize, +} + +fn update_batch_data(item: &mut I, batch: &BatchState) { + let BatchState { + count, + gpu_array_buffer_index, + .. + } = batch; + *item.batch_range_mut() = gpu_array_buffer_index.index..(gpu_array_buffer_index.index + *count); + *item.dynamic_offset_mut() = gpu_array_buffer_index.dynamic_offset; +} + +fn process_phase( + object_data_buffer: &mut GpuArrayBuffer, + object_query: &ObjectQuery, + phase: &mut RenderPhase, + consider_material: bool, +) { + let mut batch = BatchState::default(); + for i in 0..phase.items.len() { + let item = &mut phase.items[i]; + let Ok((material_binding_meta, mesh_handle, mesh_transforms)) = + object_query.get(item.entity()) + else { + // It is necessary to start a new batch if an entity not matching the query is + // encountered. This can be achieved by resetting the pipelined id. + batch.meta.pipeline_id = None; + continue; + }; + let gpu_array_buffer_index = object_data_buffer.push(MeshUniform::from(mesh_transforms)); + let batch_meta = BatchMeta { + pipeline_id: Some(item.cached_pipeline()), + draw_function_id: Some(item.draw_function()), + material_binding_meta, + mesh_handle: Some(mesh_handle), + dynamic_offset: gpu_array_buffer_index.dynamic_offset, + }; + if !batch_meta.matches(&batch.meta, consider_material) { + if batch.count > 0 { + update_batch_data(&mut phase.items[batch.item_index], &batch); + } + + batch.meta = batch_meta; + batch.gpu_array_buffer_index = gpu_array_buffer_index; + batch.count = 0; + batch.item_index = i; + } + batch.count += 1; + } + if !phase.items.is_empty() && batch.count > 0 { + update_batch_data(&mut phase.items[batch.item_index], &batch); + } +} + +type ObjectQuery<'w, 's, 'mat, 'mesh, 'data> = Query< + 'w, + 's, + ( + Option<&'mat MaterialBindGroupId>, + &'mesh Handle, + &'data MeshTransforms, + ), +>; + #[allow(clippy::too_many_arguments)] pub fn prepare_mesh_uniforms( - mut seen: Local, - mut commands: Commands, - mut previous_len: Local, render_device: Res, render_queue: Res, - mut gpu_array_buffer: ResMut>, - views: Query<( - &RenderPhase, - &RenderPhase, - &RenderPhase, + gpu_array_buffer: ResMut>, + mut views: Query<( + Option<&mut RenderPhase>, + Option<&mut RenderPhase>, + &mut RenderPhase, + &mut RenderPhase, + &mut RenderPhase, )>, - shadow_views: Query<&RenderPhase>, - meshes: Query<(Entity, &MeshTransforms)>, + mut shadow_views: Query<&mut RenderPhase>, + meshes: ObjectQuery, ) { - gpu_array_buffer.clear(); - seen.clear(); - - let mut indices = Vec::with_capacity(*previous_len); - let mut push_indices = |(mesh, mesh_uniform): (Entity, &MeshTransforms)| { - let index = mesh.index() as usize; - if !seen.contains(index) { - if index >= seen.len() { - seen.grow(index + 1); - } - seen.insert(index); - indices.push((mesh, gpu_array_buffer.push(mesh_uniform.into()))); - } - }; + let gpu_array_buffer = gpu_array_buffer.into_inner(); - for (opaque_phase, transparent_phase, alpha_phase) in &views { - meshes - .iter_many(opaque_phase.iter_entities()) - .for_each(&mut push_indices); - - meshes - .iter_many(transparent_phase.iter_entities()) - .for_each(&mut push_indices); + gpu_array_buffer.clear(); - meshes - .iter_many(alpha_phase.iter_entities()) - .for_each(&mut push_indices); + for ( + opaque_prepass_phase, + alpha_mask_prepass_phase, + opaque_phase, + alpha_mask_phase, + transparent_phase, + ) in &mut views + { + if let Some(opaque_prepass_phase) = opaque_prepass_phase { + process_phase( + gpu_array_buffer, + &meshes, + opaque_prepass_phase.into_inner(), + false, + ); + } + if let Some(alpha_mask_prepass_phase) = alpha_mask_prepass_phase { + process_phase( + gpu_array_buffer, + &meshes, + alpha_mask_prepass_phase.into_inner(), + true, + ); + } + process_phase(gpu_array_buffer, &meshes, opaque_phase.into_inner(), true); + process_phase( + gpu_array_buffer, + &meshes, + alpha_mask_phase.into_inner(), + true, + ); + process_phase( + gpu_array_buffer, + &meshes, + transparent_phase.into_inner(), + true, + ); } - for shadow_phase in &shadow_views { - meshes - .iter_many(shadow_phase.iter_entities()) - .for_each(&mut push_indices); + for shadow_phase in &mut shadow_views { + process_phase(gpu_array_buffer, &meshes, shadow_phase.into_inner(), false); } - *previous_len = indices.len(); - commands.insert_or_spawn_batch(indices); - gpu_array_buffer.write_buffer(&render_device, &render_queue); } @@ -1371,16 +1494,15 @@ impl RenderCommand

for SetMeshBindGroup { type ViewWorldQuery = (); type ItemWorldQuery = ( Read>, - Read>, Option>, Option>, ); #[inline] fn render<'w>( - _item: &P, + item: &P, _view: (), - (mesh, batch_indices, skin_index, morph_index): ROQueryItem, + (mesh, skin_index, morph_index): ROQueryItem, bind_groups: SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { @@ -1399,8 +1521,8 @@ impl RenderCommand

for SetMeshBindGroup { let mut dynamic_offsets: [u32; 3] = Default::default(); let mut index_count = 0; - if let Some(mesh_index) = batch_indices.dynamic_offset { - dynamic_offsets[index_count] = mesh_index; + if let Some(mesh_index) = item.dynamic_offset() { + dynamic_offsets[index_count] = mesh_index.get(); index_count += 1; } if let Some(skin_index) = skin_index { @@ -1421,22 +1543,23 @@ pub struct DrawMesh; impl RenderCommand

for DrawMesh { type Param = SRes>; type ViewWorldQuery = (); - type ItemWorldQuery = (Read>, Read>); + type ItemWorldQuery = Read>; #[inline] fn render<'w>( - _item: &P, + item: &P, _view: (), - (batch_indices, mesh_handle): ROQueryItem<'_, Self::ItemWorldQuery>, + mesh_handle: ROQueryItem<'_, Self::ItemWorldQuery>, meshes: SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { if let Some(gpu_mesh) = meshes.into_inner().get(mesh_handle) { + let batch_range = item.batch_range(); pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..)); #[cfg(all(feature = "webgl", target_arch = "wasm32"))] pass.set_push_constants( ShaderStages::VERTEX, 0, - &(batch_indices.index as i32).to_le_bytes(), + &(batch_range.start as i32).to_le_bytes(), ); match &gpu_mesh.buffer_info { GpuBufferInfo::Indexed { @@ -1445,13 +1568,10 @@ impl RenderCommand

for DrawMesh { count, } => { pass.set_index_buffer(buffer.slice(..), 0, *index_format); - pass.draw_indexed(0..*count, 0, batch_indices.index..batch_indices.index + 1); + pass.draw_indexed(0..*count, 0, batch_range.clone()); } GpuBufferInfo::NonIndexed => { - pass.draw( - 0..gpu_mesh.vertex_count, - batch_indices.index..batch_indices.index + 1, - ); + pass.draw(0..gpu_mesh.vertex_count, batch_range.clone()); } } RenderCommandResult::Success diff --git a/crates/bevy_pbr/src/wireframe.rs b/crates/bevy_pbr/src/wireframe.rs index e227bfdb7d4d63..c9c7de18ae6c6a 100644 --- a/crates/bevy_pbr/src/wireframe.rs +++ b/crates/bevy_pbr/src/wireframe.rs @@ -152,7 +152,8 @@ fn queue_wireframes( draw_function: draw_custom, distance: rangefinder .distance_translation(&mesh_transforms.transform.translation), - batch_size: 1, + batch_range: 0..1, + dynamic_offset: None, }); } }; diff --git a/crates/bevy_render/Cargo.toml b/crates/bevy_render/Cargo.toml index dd2f90aef46831..fe945ab00b6bcf 100644 --- a/crates/bevy_render/Cargo.toml +++ b/crates/bevy_render/Cargo.toml @@ -83,6 +83,7 @@ encase = { version = "0.6.1", features = ["glam"] } # For wgpu profiling using tracing. Use `RUST_LOG=info` to also capture the wgpu spans. profiling = { version = "1", features = ["profile-with-tracing"], optional = true } async-channel = "1.8" +nonmax = "0.5.3" [target.'cfg(target_arch = "wasm32")'.dependencies] js-sys = "0.3" diff --git a/crates/bevy_render/src/render_phase/mod.rs b/crates/bevy_render/src/render_phase/mod.rs index 54870cfc260b78..dec5ddf77621d1 100644 --- a/crates/bevy_render/src/render_phase/mod.rs +++ b/crates/bevy_render/src/render_phase/mod.rs @@ -31,6 +31,7 @@ mod rangefinder; pub use draw::*; pub use draw_state::*; +use nonmax::NonMaxU32; pub use rangefinder::*; use crate::render_resource::{CachedRenderPipelineId, PipelineCache}; @@ -93,13 +94,13 @@ impl RenderPhase { let mut index = 0; while index < self.items.len() { let item = &self.items[index]; - let batch_size = item.batch_size(); - if batch_size > 0 { + let batch_range = item.batch_range(); + if batch_range.is_empty() { + index += 1; + } else { let draw_function = draw_functions.get_mut(item.draw_function()).unwrap(); draw_function.draw(world, render_pass, view, item); - index += batch_size; - } else { - index += 1; + index += batch_range.len(); } } } @@ -124,13 +125,13 @@ impl RenderPhase { let mut index = 0; while index < items.len() { let item = &items[index]; - let batch_size = item.batch_size(); - if batch_size > 0 { + let batch_range = item.batch_range(); + if batch_range.is_empty() { + index += 1; + } else { let draw_function = draw_functions.get_mut(item.draw_function()).unwrap(); draw_function.draw(world, render_pass, view, item); - index += batch_size; - } else { - index += 1; + index += batch_range.len(); } } } @@ -182,12 +183,14 @@ pub trait PhaseItem: Sized + Send + Sync + 'static { items.sort_unstable_by_key(|item| item.sort_key()); } - /// The number of items to skip after rendering this [`PhaseItem`]. - /// - /// Items with a `batch_size` of 0 will not be rendered. - fn batch_size(&self) -> usize { - 1 - } + /// The range of instances that the batch covers. After doing a batched draw, batch range + /// length phase items will be skipped. This design is to avoid having to restructure the + /// render phase unnecessarily. + fn batch_range(&self) -> &Range; + fn batch_range_mut(&mut self) -> &mut Range; + + fn dynamic_offset(&self) -> Option; + fn dynamic_offset_mut(&mut self) -> &mut Option; } /// A [`PhaseItem`] item, that automatically sets the appropriate render pipeline, diff --git a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs index a9fba2ac7fb426..8a850ff9bd001f 100644 --- a/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs +++ b/crates/bevy_render/src/render_resource/batched_uniform_buffer.rs @@ -7,6 +7,7 @@ use encase::{ private::{ArrayMetadata, BufferMut, Metadata, RuntimeSizedArray, WriteInto, Writer}, ShaderType, }; +use nonmax::NonMaxU32; use std::{marker::PhantomData, num::NonZeroU64}; use wgpu::{BindingResource, Limits}; @@ -77,7 +78,7 @@ impl BatchedUniformBuffer { pub fn push(&mut self, component: T) -> GpuArrayBufferIndex { let result = GpuArrayBufferIndex { index: self.temp.0.len() as u32, - dynamic_offset: Some(self.current_offset), + dynamic_offset: NonMaxU32::new(self.current_offset), element_type: PhantomData, }; self.temp.0.push(component); diff --git a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs index 45eaba4f732465..92fbab4fb1216d 100644 --- a/crates/bevy_render/src/render_resource/gpu_array_buffer.rs +++ b/crates/bevy_render/src/render_resource/gpu_array_buffer.rs @@ -5,6 +5,7 @@ use crate::{ }; use bevy_ecs::{prelude::Component, system::Resource}; use encase::{private::WriteInto, ShaderSize, ShaderType}; +use nonmax::NonMaxU32; use std::{marker::PhantomData, mem}; use wgpu::{BindGroupLayoutEntry, BindingResource, BindingType, BufferBindingType, ShaderStages}; @@ -118,12 +119,22 @@ impl GpuArrayBuffer { } /// An index into a [`GpuArrayBuffer`] for a given element. -#[derive(Component)] +#[derive(Component, Clone)] pub struct GpuArrayBufferIndex { /// The index to use in a shader into the array. pub index: u32, /// The dynamic offset to use when setting the bind group in a pass. /// Only used on platforms that don't support storage buffers. - pub dynamic_offset: Option, + pub dynamic_offset: Option, pub element_type: PhantomData, } + +impl Default for GpuArrayBufferIndex { + fn default() -> Self { + Self { + index: u32::MAX, + dynamic_offset: None, + element_type: Default::default(), + } + } +} diff --git a/crates/bevy_sprite/Cargo.toml b/crates/bevy_sprite/Cargo.toml index 37db1b9eb2a203..69494d9da180b8 100644 --- a/crates/bevy_sprite/Cargo.toml +++ b/crates/bevy_sprite/Cargo.toml @@ -31,3 +31,4 @@ guillotiere = "0.6.0" thiserror = "1.0" rectangle-pack = "0.4" bitflags = "2.3" +nonmax = "0.5.3" diff --git a/crates/bevy_sprite/src/mesh2d/material.rs b/crates/bevy_sprite/src/mesh2d/material.rs index 98fd7791935df2..3693d36a4f7644 100644 --- a/crates/bevy_sprite/src/mesh2d/material.rs +++ b/crates/bevy_sprite/src/mesh2d/material.rs @@ -16,7 +16,6 @@ use bevy_ecs::{ use bevy_log::error; use bevy_reflect::{TypePath, TypeUuid}; use bevy_render::{ - extract_component::ExtractComponentPlugin, mesh::{Mesh, MeshVertexBufferLayout}, prelude::Image, render_asset::{prepare_assets, RenderAssets}, @@ -25,9 +24,9 @@ use bevy_render::{ RenderPhase, SetItemPipeline, TrackedRenderPass, }, render_resource::{ - AsBindGroup, AsBindGroupError, BindGroup, BindGroupLayout, OwnedBindingResource, - PipelineCache, RenderPipelineDescriptor, Shader, ShaderRef, SpecializedMeshPipeline, - SpecializedMeshPipelineError, SpecializedMeshPipelines, + AsBindGroup, AsBindGroupError, BindGroup, BindGroupId, BindGroupLayout, + OwnedBindingResource, PipelineCache, RenderPipelineDescriptor, Shader, ShaderRef, + SpecializedMeshPipeline, SpecializedMeshPipelineError, SpecializedMeshPipelines, }, renderer::RenderDevice, texture::FallbackImage, @@ -40,8 +39,8 @@ use std::hash::Hash; use std::marker::PhantomData; use crate::{ - DrawMesh2d, Mesh2dHandle, Mesh2dPipeline, Mesh2dPipelineKey, Mesh2dUniform, SetMesh2dBindGroup, - SetMesh2dViewBindGroup, + DrawMesh2d, Mesh2dHandle, Mesh2dPipeline, Mesh2dPipelineKey, Mesh2dTransforms, + SetMesh2dBindGroup, SetMesh2dViewBindGroup, }; /// Materials are used alongside [`Material2dPlugin`] and [`MaterialMesh2dBundle`] @@ -151,8 +150,7 @@ where M::Data: PartialEq + Eq + Hash + Clone, { fn build(&self, app: &mut App) { - app.add_asset::() - .add_plugins(ExtractComponentPlugin::>::extract_visible()); + app.add_asset::(); if let Ok(render_app) = app.get_sub_app_mut(RenderApp) { render_app @@ -160,7 +158,10 @@ where .init_resource::>() .init_resource::>() .init_resource::>>() - .add_systems(ExtractSchedule, extract_materials_2d::) + .add_systems( + ExtractSchedule, + (extract_materials_2d::, extract_material_meshes_2d::), + ) .add_systems( Render, ( @@ -182,6 +183,26 @@ where } } +fn extract_material_meshes_2d( + mut commands: Commands, + mut previous_len: Local, + query: Extract)>>, +) { + let mut values = Vec::with_capacity(*previous_len); + for (entity, view_visibility, material) in &query { + if view_visibility.get() { + // NOTE: Material2dBindGroupId is inserted here to avoid a table move. Upcoming changes + // to use SparseSet for render world entity storage will do this automatically. + values.push(( + entity, + (material.clone_weak(), Material2dBindGroupId::default()), + )); + } + } + *previous_len = values.len(); + commands.insert_or_spawn_batch(values); +} + /// Render pipeline data for a given [`Material2d`] #[derive(Resource)] pub struct Material2dPipeline { @@ -335,7 +356,12 @@ pub fn queue_material2d_meshes( msaa: Res, render_meshes: Res>, render_materials: Res>, - material2d_meshes: Query<(&Handle, &Mesh2dHandle, &Mesh2dUniform)>, + mut material2d_meshes: Query<( + &Handle, + &mut Material2dBindGroupId, + &Mesh2dHandle, + &Mesh2dTransforms, + )>, mut views: Query<( &ExtractedView, &VisibleEntities, @@ -380,8 +406,12 @@ pub fn queue_material2d_meshes( } for visible_entity in &visible_entities.entities { - if let Ok((material2d_handle, mesh2d_handle, mesh2d_uniform)) = - material2d_meshes.get(*visible_entity) + if let Ok(( + material2d_handle, + mut material2d_bind_group, + mesh2d_handle, + mesh2d_uniform, + )) = material2d_meshes.get_mut(*visible_entity) { if let Some(material2d) = render_materials.get(material2d_handle) { if let Some(mesh) = render_meshes.get(&mesh2d_handle.0) { @@ -406,7 +436,8 @@ pub fn queue_material2d_meshes( } }; - let mesh_z = mesh2d_uniform.transform.w_axis.z; + *material2d_bind_group = material2d.get_binding_meta(); + let mesh_z = mesh2d_uniform.transform.translation.z; transparent_phase.add(Transparent2d { entity: *visible_entity, draw_function: draw_transparent_pbr, @@ -416,8 +447,9 @@ pub fn queue_material2d_meshes( // -z in front of the camera, the largest distance is -far with values increasing toward the // camera. As such we can just use mesh_z as the distance sort_key: FloatOrd(mesh_z), - // This material is not batched - batch_size: 1, + // Batching is done in prepare_mesh2d_uniforms + batch_range: 0..1, + dynamic_offset: None, }); } } @@ -426,6 +458,9 @@ pub fn queue_material2d_meshes( } } +#[derive(Component, Default, PartialEq, Eq, Deref, DerefMut)] +pub struct Material2dBindGroupId(Option); + /// Data prepared for a [`Material2d`] instance. pub struct PreparedMaterial2d { pub bindings: Vec, @@ -433,6 +468,12 @@ pub struct PreparedMaterial2d { pub key: T::Data, } +impl PreparedMaterial2d { + pub fn get_binding_meta(&self) -> Material2dBindGroupId { + Material2dBindGroupId(Some(self.bind_group.id())) + } +} + #[derive(Resource)] pub struct ExtractedMaterials2d { extracted: Vec<(Handle, M)>, diff --git a/crates/bevy_sprite/src/mesh2d/mesh.rs b/crates/bevy_sprite/src/mesh2d/mesh.rs index 8489d152fa427a..6bf23b7d7411c2 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh.rs +++ b/crates/bevy_sprite/src/mesh2d/mesh.rs @@ -1,19 +1,22 @@ use bevy_app::Plugin; use bevy_asset::{load_internal_asset, Handle, HandleUntyped}; +use bevy_core_pipeline::core_2d::Transparent2d; use bevy_ecs::{ prelude::*, query::ROQueryItem, system::{lifetimeless::*, SystemParamItem, SystemState}, }; -use bevy_math::{Mat4, Vec2}; +use bevy_math::{Affine3, Affine3A, Vec2, Vec3Swizzles, Vec4}; use bevy_reflect::{Reflect, TypeUuid}; use bevy_render::{ - extract_component::{ComponentUniforms, DynamicUniformIndex, UniformComponentPlugin}, globals::{GlobalsBuffer, GlobalsUniform}, mesh::{GpuBufferInfo, Mesh, MeshVertexBufferLayout}, render_asset::RenderAssets, - render_phase::{PhaseItem, RenderCommand, RenderCommandResult, TrackedRenderPass}, + render_phase::{ + CachedRenderPipelinePhaseItem, DrawFunctionId, PhaseItem, RenderCommand, + RenderCommandResult, RenderPhase, TrackedRenderPass, + }, render_resource::*, renderer::{RenderDevice, RenderQueue}, texture::{ @@ -25,11 +28,14 @@ use bevy_render::{ Extract, ExtractSchedule, Render, RenderApp, RenderSet, }; use bevy_transform::components::GlobalTransform; +use nonmax::NonMaxU32; + +use crate::Material2dBindGroupId; /// Component for rendering with meshes in the 2d pipeline, usually with a [2d material](crate::Material2d) such as [`ColorMaterial`](crate::ColorMaterial). /// /// It wraps a [`Handle`] to differentiate from the 3d pipelines which use the handles directly as components -#[derive(Default, Clone, Component, Debug, Reflect)] +#[derive(Default, Clone, Component, Debug, Reflect, PartialEq, Eq)] #[reflect(Component)] pub struct Mesh2dHandle(pub Handle); @@ -83,12 +89,6 @@ impl Plugin for Mesh2dRenderPlugin { "mesh2d_types.wgsl", Shader::from_wgsl ); - load_internal_asset!( - app, - MESH2D_BINDINGS_HANDLE, - "mesh2d_bindings.wgsl", - Shader::from_wgsl - ); load_internal_asset!( app, MESH2D_FUNCTIONS_HANDLE, @@ -97,8 +97,6 @@ impl Plugin for Mesh2dRenderPlugin { ); load_internal_asset!(app, MESH2D_SHADER_HANDLE, "mesh2d.wgsl", Shader::from_wgsl); - app.add_plugins(UniformComponentPlugin::::default()); - if let Ok(render_app) = app.get_sub_app_mut(RenderApp) { render_app .init_resource::>() @@ -106,6 +104,7 @@ impl Plugin for Mesh2dRenderPlugin { .add_systems( Render, ( + prepare_mesh2d_uniforms.in_set(RenderSet::PrepareResources), prepare_mesh2d_bind_group.in_set(RenderSet::PrepareBindGroups), prepare_mesh2d_view_bind_groups.in_set(RenderSet::PrepareBindGroups), ), @@ -114,19 +113,93 @@ impl Plugin for Mesh2dRenderPlugin { } fn finish(&self, app: &mut bevy_app::App) { + let mut mesh_bindings_shader_defs = Vec::with_capacity(1); + if let Ok(render_app) = app.get_sub_app_mut(RenderApp) { - render_app.init_resource::(); + if let Some(per_object_buffer_batch_size) = GpuArrayBuffer::::batch_size( + render_app.world.resource::(), + ) { + mesh_bindings_shader_defs.push(ShaderDefVal::UInt( + "PER_OBJECT_BUFFER_BATCH_SIZE".into(), + per_object_buffer_batch_size, + )); + } + + render_app + .insert_resource(GpuArrayBuffer::::new( + render_app.world.resource::(), + )) + .init_resource::(); } + + // Load the mesh_bindings shader module here as it depends on runtime information about + // whether storage buffers are supported, or the maximum uniform buffer binding size. + load_internal_asset!( + app, + MESH2D_BINDINGS_HANDLE, + "mesh2d_bindings.wgsl", + Shader::from_wgsl_with_defs, + mesh_bindings_shader_defs + ); } } -#[derive(Component, ShaderType, Clone)] +#[derive(Component)] +pub struct Mesh2dTransforms { + pub transform: Affine3, + pub flags: u32, +} + +#[derive(ShaderType, Clone)] pub struct Mesh2dUniform { - pub transform: Mat4, - pub inverse_transpose_model: Mat4, + // Affine 4x3 matrix transposed to 3x4 + pub transform: [Vec4; 3], + // 3x3 matrix packed in mat2x4 and f32 as: + // [0].xyz, [1].x, + // [1].yz, [2].xy + // [2].z + pub inverse_transpose_model_a: [Vec4; 2], + pub inverse_transpose_model_b: f32, pub flags: u32, } +impl From<&Mesh2dTransforms> for Mesh2dUniform { + fn from(mesh_transforms: &Mesh2dTransforms) -> Self { + let transpose_model_3x3 = mesh_transforms.transform.matrix3.transpose(); + let inverse_transpose_model_3x3 = Affine3A::from(&mesh_transforms.transform) + .inverse() + .matrix3 + .transpose(); + Self { + transform: [ + transpose_model_3x3 + .x_axis + .extend(mesh_transforms.transform.translation.x), + transpose_model_3x3 + .y_axis + .extend(mesh_transforms.transform.translation.y), + transpose_model_3x3 + .z_axis + .extend(mesh_transforms.transform.translation.z), + ], + inverse_transpose_model_a: [ + ( + inverse_transpose_model_3x3.x_axis, + inverse_transpose_model_3x3.y_axis.x, + ) + .into(), + ( + inverse_transpose_model_3x3.y_axis.yz(), + inverse_transpose_model_3x3.z_axis.xy(), + ) + .into(), + ], + inverse_transpose_model_b: inverse_transpose_model_3x3.z_axis.z, + flags: mesh_transforms.flags, + } + } +} + // NOTE: These must match the bit flags in bevy_sprite/src/mesh2d/mesh2d.wgsl! bitflags::bitflags! { #[repr(transparent)] @@ -146,15 +219,13 @@ pub fn extract_mesh2d( if !view_visibility.get() { continue; } - let transform = transform.compute_matrix(); values.push(( entity, ( Mesh2dHandle(handle.0.clone_weak()), - Mesh2dUniform { + Mesh2dTransforms { + transform: (&transform.affine()).into(), flags: MeshFlags::empty().bits(), - transform, - inverse_transpose_model: transform.inverse().transpose(), }, ), )); @@ -163,19 +234,163 @@ pub fn extract_mesh2d( commands.insert_or_spawn_batch(values); } +/// Data necessary to be equal for two draw commands to be mergeable +/// +/// This is based on the following assumptions: +/// - Only entities with prepared assets (pipelines, materials, meshes) are +/// queued to phases +/// - View bindings are constant across a phase for a given draw function as +/// phases are per-view +/// - `prepare_mesh_uniforms` is the only system that performs this batching +/// and has sole responsibility for preparing the per-object data. As such +/// the mesh binding and dynamic offsets are assumed to only be variable as a +/// result of the `prepare_mesh_uniforms` system, e.g. due to having to split +/// data across separate uniform bindings within the same buffer due to the +/// maximum uniform buffer binding size. +#[derive(Default, PartialEq, Eq)] +struct BatchMeta<'mat, 'mesh> { + /// The pipeline id encompasses all pipeline configuration including vertex + /// buffers and layouts, shaders and their specializations, bind group + /// layouts, etc. + pipeline_id: Option, + /// The draw function id defines the RenderCommands that are called to + /// set the pipeline and bindings, and make the draw command + draw_function_id: Option, + /// The material binding meta includes the material bind group id and + /// dynamic offsets. + material2d_bind_group: Option<&'mat Material2dBindGroupId>, + mesh_handle: Option<&'mesh Mesh2dHandle>, + dynamic_offset: Option, +} + +impl<'mat, 'mesh> BatchMeta<'mat, 'mesh> { + fn matches(&self, other: &BatchMeta<'mat, 'mesh>, consider_material: bool) -> bool { + self.pipeline_id == other.pipeline_id + && self.draw_function_id == other.draw_function_id + && self.mesh_handle == other.mesh_handle + && self.dynamic_offset == other.dynamic_offset + && (!consider_material || self.material2d_bind_group == other.material2d_bind_group) + } +} + +#[derive(Default)] +struct BatchState<'mat, 'mesh> { + meta: BatchMeta<'mat, 'mesh>, + /// The base index in the object data binding's array + gpu_array_buffer_index: GpuArrayBufferIndex, + /// The number of entities in the batch + count: u32, + item_index: usize, +} + +fn update_batch_data(item: &mut I, batch: &BatchState) { + let BatchState { + count, + gpu_array_buffer_index, + .. + } = batch; + *item.batch_range_mut() = gpu_array_buffer_index.index..(gpu_array_buffer_index.index + *count); + *item.dynamic_offset_mut() = gpu_array_buffer_index.dynamic_offset; +} + +fn process_phase( + object_data_buffer: &mut GpuArrayBuffer, + object_query: &ObjectQuery, + phase: &mut RenderPhase, + consider_material: bool, +) { + let mut batch = BatchState::default(); + for i in 0..phase.items.len() { + let item = &mut phase.items[i]; + let Ok((material2d_bind_group, mesh_handle, mesh_transforms)) = + object_query.get(item.entity()) + else { + // It is necessary to start a new batch if an entity not matching the query is + // encountered. This can be achieved by resetting the pipelined id. + batch.meta.pipeline_id = None; + continue; + }; + let gpu_array_buffer_index = object_data_buffer.push(Mesh2dUniform::from(mesh_transforms)); + let batch_meta = BatchMeta { + pipeline_id: Some(item.cached_pipeline()), + draw_function_id: Some(item.draw_function()), + material2d_bind_group, + mesh_handle: Some(mesh_handle), + dynamic_offset: gpu_array_buffer_index.dynamic_offset, + }; + if !batch_meta.matches(&batch.meta, consider_material) { + if batch.count > 0 { + update_batch_data(&mut phase.items[batch.item_index], &batch); + } + + batch.meta = batch_meta; + batch.gpu_array_buffer_index = gpu_array_buffer_index; + batch.count = 0; + batch.item_index = i; + } + batch.count += 1; + } + if !phase.items.is_empty() && batch.count > 0 { + update_batch_data(&mut phase.items[batch.item_index], &batch); + } +} + +type ObjectQuery<'w, 's, 'mat, 'mesh, 'data> = Query< + 'w, + 's, + ( + Option<&'mat Material2dBindGroupId>, + &'mesh Mesh2dHandle, + &'data Mesh2dTransforms, + ), +>; + +#[allow(clippy::too_many_arguments)] +pub fn prepare_mesh2d_uniforms( + render_device: Res, + render_queue: Res, + gpu_array_buffer: ResMut>, + mut views: Query<&mut RenderPhase>, + meshes: ObjectQuery, +) { + if meshes.is_empty() { + return; + } + + let gpu_array_buffer = gpu_array_buffer.into_inner(); + + gpu_array_buffer.clear(); + + for transparent_phase in &mut views { + process_phase( + gpu_array_buffer, + &meshes, + transparent_phase.into_inner(), + true, + ); + } + + gpu_array_buffer.write_buffer(&render_device, &render_queue); +} + #[derive(Resource, Clone)] pub struct Mesh2dPipeline { pub view_layout: BindGroupLayout, pub mesh_layout: BindGroupLayout, // This dummy white texture is to be used in place of optional textures pub dummy_white_gpu_image: GpuImage, + pub per_object_buffer_batch_size: Option, } impl FromWorld for Mesh2dPipeline { fn from_world(world: &mut World) -> Self { - let mut system_state: SystemState<(Res, Res)> = - SystemState::new(world); - let (render_device, default_sampler) = system_state.get_mut(world); + let mut system_state: SystemState<( + Res, + Res, + Res, + )> = SystemState::new(world); + let (render_device, render_queue, default_sampler) = system_state.get_mut(world); + let render_device = render_device.into_inner(); let view_layout = render_device.create_bind_group_layout(&BindGroupLayoutDescriptor { entries: &[ // View @@ -204,16 +419,11 @@ impl FromWorld for Mesh2dPipeline { }); let mesh_layout = render_device.create_bind_group_layout(&BindGroupLayoutDescriptor { - entries: &[BindGroupLayoutEntry { - binding: 0, - visibility: ShaderStages::VERTEX | ShaderStages::FRAGMENT, - ty: BindingType::Buffer { - ty: BufferBindingType::Uniform, - has_dynamic_offset: true, - min_binding_size: Some(Mesh2dUniform::min_size()), - }, - count: None, - }], + entries: &[GpuArrayBuffer::::binding_layout( + 0, + ShaderStages::VERTEX_FRAGMENT, + render_device, + )], label: Some("mesh2d_layout"), }); // A 1x1x1 'all 1.0' texture to use as a dummy texture to use in place of optional StandardMaterial textures @@ -226,7 +436,6 @@ impl FromWorld for Mesh2dPipeline { }; let format_size = image.texture_descriptor.format.pixel_size(); - let render_queue = world.resource_mut::(); render_queue.write_texture( ImageCopyTexture { texture: &texture, @@ -260,6 +469,9 @@ impl FromWorld for Mesh2dPipeline { view_layout, mesh_layout, dummy_white_gpu_image, + per_object_buffer_batch_size: GpuArrayBuffer::::batch_size( + render_device, + ), } } } @@ -484,9 +696,9 @@ pub fn prepare_mesh2d_bind_group( mut commands: Commands, mesh2d_pipeline: Res, render_device: Res, - mesh2d_uniforms: Res>, + mesh2d_uniforms: Res>, ) { - if let Some(binding) = mesh2d_uniforms.uniforms().binding() { + if let Some(binding) = mesh2d_uniforms.binding() { commands.insert_resource(Mesh2dBindGroup { value: render_device.create_bind_group(&BindGroupDescriptor { entries: &[BindGroupEntry { @@ -564,20 +776,26 @@ pub struct SetMesh2dBindGroup; impl RenderCommand

for SetMesh2dBindGroup { type Param = SRes; type ViewWorldQuery = (); - type ItemWorldQuery = Read>; + type ItemWorldQuery = (); #[inline] fn render<'w>( - _item: &P, + item: &P, _view: (), - mesh2d_index: &'_ DynamicUniformIndex, + _item_query: (), mesh2d_bind_group: SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { + let mut dynamic_offsets: [u32; 1] = Default::default(); + let mut index_count = 0; + if let Some(mesh_index) = item.dynamic_offset() { + dynamic_offsets[index_count] = mesh_index.get(); + index_count += 1; + } pass.set_bind_group( I, &mesh2d_bind_group.into_inner().value, - &[mesh2d_index.index()], + &dynamic_offsets[..index_count], ); RenderCommandResult::Success } @@ -591,14 +809,21 @@ impl RenderCommand

for DrawMesh2d { #[inline] fn render<'w>( - _item: &P, + item: &P, _view: (), mesh_handle: ROQueryItem<'w, Self::ItemWorldQuery>, meshes: SystemParamItem<'w, '_, Self::Param>, pass: &mut TrackedRenderPass<'w>, ) -> RenderCommandResult { + let batch_range = item.batch_range(); if let Some(gpu_mesh) = meshes.into_inner().get(&mesh_handle.0) { pass.set_vertex_buffer(0, gpu_mesh.vertex_buffer.slice(..)); + #[cfg(all(feature = "webgl", target_arch = "wasm32"))] + pass.set_push_constants( + ShaderStages::VERTEX, + 0, + &(batch_range.start as i32).to_le_bytes(), + ); match &gpu_mesh.buffer_info { GpuBufferInfo::Indexed { buffer, @@ -606,10 +831,10 @@ impl RenderCommand

for DrawMesh2d { count, } => { pass.set_index_buffer(buffer.slice(..), 0, *index_format); - pass.draw_indexed(0..*count, 0, 0..1); + pass.draw_indexed(0..*count, 0, batch_range.clone()); } GpuBufferInfo::NonIndexed => { - pass.draw(0..gpu_mesh.vertex_count, 0..1); + pass.draw(0..gpu_mesh.vertex_count, batch_range.clone()); } } RenderCommandResult::Success diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d.wgsl index 2b99639836d318..003f7dda13af90 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh2d.wgsl +++ b/crates/bevy_sprite/src/mesh2d/mesh2d.wgsl @@ -8,6 +8,7 @@ #endif struct Vertex { + @builtin(instance_index) instance_index: u32, #ifdef VERTEX_POSITIONS @location(0) position: vec3, #endif @@ -33,20 +34,21 @@ fn vertex(vertex: Vertex) -> MeshVertexOutput { #endif #ifdef VERTEX_POSITIONS + var model = mesh_functions::get_model_matrix(vertex.instance_index); out.world_position = mesh_functions::mesh2d_position_local_to_world( - mesh.model, + model, vec4(vertex.position, 1.0) ); out.position = mesh_functions::mesh2d_position_world_to_clip(out.world_position); #endif #ifdef VERTEX_NORMALS - out.world_normal = mesh_functions::mesh2d_normal_local_to_world(vertex.normal); + out.world_normal = mesh_functions::mesh2d_normal_local_to_world(vertex.normal, vertex.instance_index); #endif #ifdef VERTEX_TANGENTS out.world_tangent = mesh_functions::mesh2d_tangent_local_to_world( - mesh.model, + model, vertex.tangent ); #endif diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d_bindings.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d_bindings.wgsl index 6d51f963e083fb..95cf7cd775f318 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh2d_bindings.wgsl +++ b/crates/bevy_sprite/src/mesh2d/mesh2d_bindings.wgsl @@ -1,6 +1,11 @@ #define_import_path bevy_sprite::mesh2d_bindings -#import bevy_sprite::mesh2d_types +#import bevy_sprite::mesh2d_types Mesh2d +#ifdef PER_OBJECT_BUFFER_BATCH_SIZE @group(2) @binding(0) -var mesh: bevy_sprite::mesh2d_types::Mesh2d; +var mesh: array; +#else +@group(2) @binding(0) +var mesh: array; +#endif // PER_OBJECT_BUFFER_BATCH_SIZE diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl index cf8d6e2522068c..e242b08badefd0 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl +++ b/crates/bevy_sprite/src/mesh2d/mesh2d_functions.wgsl @@ -2,6 +2,31 @@ #import bevy_sprite::mesh2d_view_bindings view #import bevy_sprite::mesh2d_bindings mesh +#import bevy_render::instance_index get_instance_index + +fn affine_to_square(affine: mat3x4) -> mat4x4 { + return transpose(mat4x4( + affine[0], + affine[1], + affine[2], + vec4(0.0, 0.0, 0.0, 1.0), + )); +} + +fn mat2x4_f32_to_mat3x3_unpack( + a: mat2x4, + b: f32, +) -> mat3x3 { + return mat3x3( + a[0].xyz, + vec3(a[0].w, a[1].xy), + vec3(a[1].zw, b), + ); +} + +fn get_model_matrix(instance_index: u32) -> mat4x4 { + return affine_to_square(mesh[get_instance_index(instance_index)].model); +} fn mesh2d_position_local_to_world(model: mat4x4, vertex_position: vec4) -> vec4 { return model * vertex_position; @@ -19,11 +44,10 @@ fn mesh2d_position_local_to_clip(model: mat4x4, vertex_position: vec4) return mesh2d_position_world_to_clip(world_position); } -fn mesh2d_normal_local_to_world(vertex_normal: vec3) -> vec3 { - return mat3x3( - mesh.inverse_transpose_model[0].xyz, - mesh.inverse_transpose_model[1].xyz, - mesh.inverse_transpose_model[2].xyz +fn mesh2d_normal_local_to_world(vertex_normal: vec3, instance_index: u32) -> vec3 { + return mat2x4_f32_to_mat3x3_unpack( + mesh[instance_index].inverse_transpose_model_a, + mesh[instance_index].inverse_transpose_model_b, ) * vertex_normal; } diff --git a/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl b/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl index 1de0218112a47a..d65b8010e667e2 100644 --- a/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl +++ b/crates/bevy_sprite/src/mesh2d/mesh2d_types.wgsl @@ -1,8 +1,16 @@ #define_import_path bevy_sprite::mesh2d_types struct Mesh2d { - model: mat4x4, - inverse_transpose_model: mat4x4, + // Affine 4x3 matrix transposed to 3x4 + // Use bevy_sprite::mesh2d_functions::affine_to_square to unpack + model: mat3x4, + // 3x3 matrix packed in mat2x4 and f32 as: + // [0].xyz, [1].x, + // [1].yz, [2].xy + // [2].z + // Use bevy_sprite::mesh2d_functions::mat2x4_f32_to_mat3x3_unpack to unpack + inverse_transpose_model_a: mat2x4, + inverse_transpose_model_b: f32, // 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options. flags: u32, }; diff --git a/crates/bevy_sprite/src/render/mod.rs b/crates/bevy_sprite/src/render/mod.rs index 3949fbe10d7331..6c18198dfbeeb0 100644 --- a/crates/bevy_sprite/src/render/mod.rs +++ b/crates/bevy_sprite/src/render/mod.rs @@ -576,8 +576,9 @@ pub fn queue_sprites( pipeline: colored_pipeline, entity: *entity, sort_key, - // batch_size will be calculated in prepare_sprites - batch_size: 0, + // batch_range and dynamic_offset will be calculated in prepare_sprites + batch_range: 0..0, + dynamic_offset: None, }); } else { transparent_phase.add(Transparent2d { @@ -585,8 +586,9 @@ pub fn queue_sprites( pipeline, entity: *entity, sort_key, - // batch_size will be calculated in prepare_sprites - batch_size: 0, + // batch_range and dynamic_offset will be calculated in prepare_sprites + batch_range: 0..0, + dynamic_offset: None, }); } } @@ -750,7 +752,9 @@ pub fn prepare_sprites( )); } - transparent_phase.items[batch_item_index].batch_size += 1; + transparent_phase.items[batch_item_index] + .batch_range_mut() + .end += 1; batches.last_mut().unwrap().1.range.end += 1; index += 1; } diff --git a/crates/bevy_ui/Cargo.toml b/crates/bevy_ui/Cargo.toml index 60e8d74477ac55..41d6451eef342d 100644 --- a/crates/bevy_ui/Cargo.toml +++ b/crates/bevy_ui/Cargo.toml @@ -36,3 +36,4 @@ serde = { version = "1", features = ["derive"] } smallvec = { version = "1.6", features = ["union", "const_generics"] } bytemuck = { version = "1.5", features = ["derive"] } thiserror = "1.0.0" +nonmax = "0.5.3" diff --git a/crates/bevy_ui/src/render/mod.rs b/crates/bevy_ui/src/render/mod.rs index 2f4229b5b83f5a..53c5269b2d3a93 100644 --- a/crates/bevy_ui/src/render/mod.rs +++ b/crates/bevy_ui/src/render/mod.rs @@ -4,6 +4,7 @@ mod render_pass; use bevy_core_pipeline::{core_2d::Camera2d, core_3d::Camera3d}; use bevy_ecs::storage::SparseSet; use bevy_hierarchy::Parent; +use bevy_render::render_phase::PhaseItem; use bevy_render::view::ViewVisibility; use bevy_render::{ExtractSchedule, Render}; use bevy_window::{PrimaryWindow, Window}; @@ -670,8 +671,9 @@ pub fn queue_uinodes( pipeline, entity: *entity, sort_key: FloatOrd(extracted_uinode.stack_index as f32), - // batch_size will be calculated in prepare_uinodes - batch_size: 0, + // batch_range will be calculated in prepare_uinodes + batch_range: 0..0, + dynamic_offset: None, }); } } @@ -895,7 +897,7 @@ pub fn prepare_uinodes( } index += QUAD_INDICES.len() as u32; existing_batch.unwrap().1.range.end = index; - ui_phase.items[batch_item_index].batch_size += 1; + ui_phase.items[batch_item_index].batch_range_mut().end += 1; } else { batch_image_handle = HandleId::Id(Uuid::nil(), u64::MAX); } diff --git a/crates/bevy_ui/src/render/render_pass.rs b/crates/bevy_ui/src/render/render_pass.rs index 697aa11104c7e1..66cc02155a40cb 100644 --- a/crates/bevy_ui/src/render/render_pass.rs +++ b/crates/bevy_ui/src/render/render_pass.rs @@ -1,3 +1,5 @@ +use std::ops::Range; + use super::{UiBatch, UiImageBindGroups, UiMeta}; use crate::{prelude::UiCameraConfig, DefaultCameraView}; use bevy_asset::Handle; @@ -13,6 +15,7 @@ use bevy_render::{ view::*, }; use bevy_utils::FloatOrd; +use nonmax::NonMaxU32; pub struct UiPassNode { ui_view_query: QueryState< @@ -91,7 +94,8 @@ pub struct TransparentUi { pub entity: Entity, pub pipeline: CachedRenderPipelineId, pub draw_function: DrawFunctionId, - pub batch_size: usize, + pub batch_range: Range, + pub dynamic_offset: Option, } impl PhaseItem for TransparentUi { @@ -118,8 +122,23 @@ impl PhaseItem for TransparentUi { } #[inline] - fn batch_size(&self) -> usize { - self.batch_size + fn batch_range(&self) -> &Range { + &self.batch_range + } + + #[inline] + fn batch_range_mut(&mut self) -> &mut Range { + &mut self.batch_range + } + + #[inline] + fn dynamic_offset(&self) -> Option { + self.dynamic_offset + } + + #[inline] + fn dynamic_offset_mut(&mut self) -> &mut Option { + &mut self.dynamic_offset } } diff --git a/examples/shader/texture_binding_array.rs b/examples/shader/texture_binding_array.rs index 90bf00afc705ba..249bcde29838a3 100644 --- a/examples/shader/texture_binding_array.rs +++ b/examples/shader/texture_binding_array.rs @@ -140,6 +140,7 @@ impl AsBindGroup for BindlessMaterial { Ok(PreparedBindGroup { bindings: vec![], + dynamic_offsets: vec![], bind_group, data: (), })