From ceec7596681cd53080ae54d5c7f30f8e9a282a80 Mon Sep 17 00:00:00 2001 From: "n.savitchev" Date: Tue, 3 Dec 2024 11:09:45 +0300 Subject: [PATCH] sync on 2024/12/03, rev 8486c81d97afccec3ee17945750171fbd2fc5c14 --- DagorEngine.rev.txt | 2 +- build_all.cmd | 4 +- build_all_linux.sh | 3 - build_all_macOS.sh | 3 - outerSpace/prog/jamfile | 12 +- prog/daNetGame/shaders/rendinst.hlsli | 7 + prog/daNetGame/shaders/rendinst_inc.dshl | 28 +- prog/daNetGameLibs/daGdp/_shaders.blk | 2 + .../daGdp/debug/globalManagerDebugES.cpp.inl | 8 +- prog/daNetGameLibs/daGdp/render/common.h | 8 +- .../daGdp/render/globalManager.cpp | 249 ++++++-- .../daGdp/render/globalManager.h | 3 +- .../render/globalManagerES.cpp.gen.es.cpp | 57 ++ .../daGdp/render/globalManagerES.cpp.inl | 19 + .../daGdp/render/objects/riex.cpp | 232 +++++--- prog/daNetGameLibs/daGdp/render/placer.cpp | 163 ++++- prog/daNetGameLibs/daGdp/render/placer.h | 44 +- .../daGdp/render/placers/heightmap.cpp | 202 +------ .../daGdp/render/placers/heightmapES.cpp.inl | 4 +- .../daGdp/render/placers/volume.cpp | 523 +++++++++++++++++ .../daGdp/render/placers/volume.h | 62 ++ .../render/placers/volumeES.cpp.gen.es.cpp | 326 ++++++++++ .../daGdp/render/placers/volumeES.cpp.inl | 228 +++++++ .../daGdp/render/riexProcessor.cpp | 43 ++ .../daGdp/render/riexProcessor.h | 39 ++ .../daGdp/shaders/dagdp_common.dshl | 1 + .../daGdp/shaders/dagdp_common.hlsli | 14 + .../daGdp/shaders/dagdp_common_placer.dshl | 46 +- .../daGdp/shaders/dagdp_common_placer.hlsli | 2 +- .../daGdp/shaders/dagdp_dynamic.dshl | 115 ++++ .../daGdp/shaders/dagdp_dynamic.hlsli | 6 + .../daGdp/shaders/dagdp_heightmap.dshl | 42 +- .../daGdp/shaders/dagdp_riex.dshl | 117 +++- .../daGdp/shaders/dagdp_riex.hlsli | 18 + .../daGdp/shaders/dagdp_volume.dshl | 555 ++++++++++++++++++ .../daGdp/shaders/dagdp_volume.hlsli | 28 + .../daGdp/templates/dagdp.template.blk | 40 ++ .../sound/sound_utils/es/sound_debug.das | 31 + .../templates/sound_utils.template.blk | 7 + .../gamePhys/collision/contactSolver.cpp | 9 +- .../gamePhys/collision/contactSolver.h | 2 +- prog/tools/ShaderCompiler2/shCode.cpp | 2 +- .../_basic/components/scrollbar.nut | 64 +- samples/dngSceneViewer/prog/jamfile | 2 +- 44 files changed, 2942 insertions(+), 430 deletions(-) create mode 100644 prog/daNetGame/shaders/rendinst.hlsli create mode 100644 prog/daNetGameLibs/daGdp/render/placers/volume.cpp create mode 100644 prog/daNetGameLibs/daGdp/render/placers/volume.h create mode 100644 prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.gen.es.cpp create mode 100644 prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl create mode 100644 prog/daNetGameLibs/daGdp/render/riexProcessor.cpp create mode 100644 prog/daNetGameLibs/daGdp/render/riexProcessor.h create mode 100644 prog/daNetGameLibs/daGdp/shaders/dagdp_dynamic.dshl create mode 100644 prog/daNetGameLibs/daGdp/shaders/dagdp_dynamic.hlsli create mode 100644 prog/daNetGameLibs/daGdp/shaders/dagdp_riex.hlsli create mode 100644 prog/daNetGameLibs/daGdp/shaders/dagdp_volume.dshl create mode 100644 prog/daNetGameLibs/daGdp/shaders/dagdp_volume.hlsli diff --git a/DagorEngine.rev.txt b/DagorEngine.rev.txt index 57d653204..9df591caa 100644 --- a/DagorEngine.rev.txt +++ b/DagorEngine.rev.txt @@ -1 +1 @@ -93442a8e07574323ca76a3089cb84980bd4e422a +8486c81d97afccec3ee17945750171fbd2fc5c14 diff --git a/build_all.cmd b/build_all.cmd index b2713f058..7fb29b429 100644 --- a/build_all.cmd +++ b/build_all.cmd @@ -56,9 +56,7 @@ call compile_all_prog_vromfs.cmd cd shaders call compile_shaders_dx11.bat call compile_shaders_tools.bat -cd ..\..\develop\gameBase -call create_vfsroms.bat -cd ..\gui +cd ..\..\develop\gui call build_ui.cmd popd pushd outerSpace\prog\utils\dev_launcher diff --git a/build_all_linux.sh b/build_all_linux.sh index cf643666d..d23fa1f07 100755 --- a/build_all_linux.sh +++ b/build_all_linux.sh @@ -45,9 +45,6 @@ popd pushd gameBase ../../../tools/dagor_cdk/linux-$ARCH/vromfsPacker-dev mk.vromfs.blk -quiet -addpath:. popd -pushd ../develop/gameBase -../../../tools/dagor_cdk/linux-$ARCH/vromfsPacker-dev gamedata.vromfs.blk -quiet -popd popd pushd outerSpace/develop/gui ../../../tools/dagor_cdk/linux-$ARCH/vromfsPacker-dev input.vromfs.blk -quiet diff --git a/build_all_macOS.sh b/build_all_macOS.sh index 5450009ca..c6a482a81 100755 --- a/build_all_macOS.sh +++ b/build_all_macOS.sh @@ -44,9 +44,6 @@ popd pushd gameBase ../../../tools/dagor_cdk/macOS-x86_64/vromfsPacker-dev mk.vromfs.blk -quiet -addpath:. popd -pushd ../develop/gameBase -../../../tools/dagor_cdk/macOS-x86_64/vromfsPacker-dev gamedata.vromfs.blk -quiet -popd popd pushd outerSpace/develop/gui ../../../tools/dagor_cdk/macOS-x86_64/vromfsPacker-dev input.vromfs.blk -quiet diff --git a/outerSpace/prog/jamfile b/outerSpace/prog/jamfile index 41808ae24..dc24eed1a 100644 --- a/outerSpace/prog/jamfile +++ b/outerSpace/prog/jamfile @@ -101,13 +101,6 @@ if $(Dedicated) = no { daNetGameLibs/render_debug ; } -if $(FmodStudio) != src && $(HaveSound) != no { - AddLibs += $(FmodStaticLibs) ; - for dll in $(FmodStudioSharedLibs) { - BundleCopy += @$(dll) $(dll:D=) ; - #ALWAYS $(dll) ; - } -} if $(HaveRenderer) = yes { gamePulls += framework_blurred_ui_pull ; @@ -237,6 +230,11 @@ if $(Dedicated) != yes { } } +if $(FmodStudio) != src && $(HaveSound) != no { + AddLibs += $(FmodStaticLibs) ; + if $(Platform) != macOS { for dll in $(FmodStudioSharedLibs) { BundleCopy += @$(dll) $(dll:D=) ; } } +} + if $(Platform) in windows && $(Dedicated) != yes && $(CheckOnly) != yes { local rc = platform/$(Platform)/$(Game).rc ; if [ GLOB $(Root)/$(Location)/$(rc:D) : $(rc:D=) ] { Sources += $(rc) ; } else { Echo Skip missing $(rc) ; } diff --git a/prog/daNetGame/shaders/rendinst.hlsli b/prog/daNetGame/shaders/rendinst.hlsli new file mode 100644 index 000000000..be01b0389 --- /dev/null +++ b/prog/daNetGame/shaders/rendinst.hlsli @@ -0,0 +1,7 @@ +#ifndef RENDINST_HLSLI_INCLUDED +#define RENDINST_HLSLI_INCLUDED + +#define INST_OFFSET_FLAG_USE_INDIRECTION (1u << 31u) +#define INST_OFFSET_MASK_VALUE (0x7FFFFFFFu) + +#endif // RENDINST_HLSLI_INCLUDED diff --git a/prog/daNetGame/shaders/rendinst_inc.dshl b/prog/daNetGame/shaders/rendinst_inc.dshl index fbbf21b19..6bc6a2576 100644 --- a/prog/daNetGame/shaders/rendinst_inc.dshl +++ b/prog/daNetGame/shaders/rendinst_inc.dshl @@ -4,6 +4,7 @@ include "rotation_palette_inc.dshl" include "paint_details_inc.dshl" hlsl { #define RENDINST_FLOAT_POS 1 } // for debug switching between floats and halfs +hlsl { #include "rendinst.hlsli" } int rendinst_use_cell_sbuffer = 0 always_referenced; interval rendinst_use_cell_sbuffer: off < 1, on; @@ -22,6 +23,7 @@ int useBboxInCbuffer = 1 always_referenced; int rendinst_perinst_buff_no = 6 always_referenced; int rendinst_instance_buff_no = 5 always_referenced; int per_instance_data_no = 14; +int ri_additional_instance_offsets_data_no = 15; int ri_vertex_data_no = 12 always_referenced; buffer impostor_data_buffer; @@ -115,7 +117,23 @@ macro RENDINST_INSTANCING() ##if rendinst_render_type == ri_ex_only #define INST_OFFSET_GETTER 0 ##else - #define INST_OFFSET_GETTER get_immediate_dword_0() + uint inst_offset_getter() + { + uint val = get_immediate_dword_0(); + + ##if hardware.dx11 || hardware.dx12 + // Only needed, if startInstanceLocation is not used. + BRANCH + if (val & INST_OFFSET_FLAG_USE_INDIRECTION) + { + // Additional indirection for GPU-generated offsets. + return 4 * loadBuffer(ri_additional_instance_offsets, val & INST_OFFSET_MASK_VALUE); + } + ##endif + + return val; + } + #define INST_OFFSET_GETTER inst_offset_getter() ##endif #endif @@ -384,6 +402,14 @@ macro RENDINST_SCENE_COMMON() impostor_data_buffer@buf = impostor_data_buffer hlsl { Buffer impostor_data_buffer@buf; }; + + if (hardware.dx11 || hardware.dx12) + { + ri_additional_instance_offsets@buf : register(ri_additional_instance_offsets_data_no) + hlsl { + ByteAddressBuffer ri_additional_instance_offsets@buf; + } + } } (ps) { impostor_data_buffer@buf = impostor_data_buffer hlsl { diff --git a/prog/daNetGameLibs/daGdp/_shaders.blk b/prog/daNetGameLibs/daGdp/_shaders.blk index 217b24ecb..19a80952c 100644 --- a/prog/daNetGameLibs/daGdp/_shaders.blk +++ b/prog/daNetGameLibs/daGdp/_shaders.blk @@ -1,2 +1,4 @@ +file:t = "daGdp/shaders/dagdp_dynamic.dshl" file:t = "daGdp/shaders/dagdp_heightmap.dshl" +file:t = "daGdp/shaders/dagdp_volume.dshl" file:t = "daGdp/shaders/dagdp_riex.dshl" \ No newline at end of file diff --git a/prog/daNetGameLibs/daGdp/debug/globalManagerDebugES.cpp.inl b/prog/daNetGameLibs/daGdp/debug/globalManagerDebugES.cpp.inl index d8f18cfb7..481165dc9 100644 --- a/prog/daNetGameLibs/daGdp/debug/globalManagerDebugES.cpp.inl +++ b/prog/daNetGameLibs/daGdp/debug/globalManagerDebugES.cpp.inl @@ -22,8 +22,7 @@ void GlobalManager::imgui() for (int viewIndex = 0; viewIndex < views.size(); ++viewIndex) { const auto &builder = debug.builders[viewIndex]; - const auto &view = views[viewIndex]; - maxInstances += builder.maxInstancesPerViewport * view.info.maxViewports; + maxInstances += builder.totalMaxInstances; } uint64_t reservedBytes = maxInstances * sizeof(PerInstanceData); @@ -47,8 +46,9 @@ void GlobalManager::imgui() ImGui::BulletText("View kind: %d", static_cast(eastl::to_underlying(view.info.kind))); ImGui::BulletText("View max draw distance: %f", view.info.maxDrawDistance); ImGui::BulletText("Max viewports: %" PRIu32, view.info.maxViewports); - ImGui::BulletText("Max instances %" PRIu32, builder.maxInstancesPerViewport * view.info.maxViewports); - ImGui::BulletText("Max instances per viewport: %" PRIu32, builder.maxInstancesPerViewport); + ImGui::BulletText("Total max instances %" PRIu32, builder.totalMaxInstances); + ImGui::BulletText("Max static instances per viewport: %" PRIu32, builder.maxStaticInstancesPerViewport); + ImGui::BulletText("Max dynamic instances: %" PRIu32, builder.dynamicInstanceRegion.maxCount); ImGui::BulletText("Renderables: %" PRIu32, builder.numRenderables); ImGui::TreePop(); diff --git a/prog/daNetGameLibs/daGdp/render/common.h b/prog/daNetGameLibs/daGdp/render/common.h index 8bdc9f107..e99975d75 100644 --- a/prog/daNetGameLibs/daGdp/render/common.h +++ b/prog/daNetGameLibs/daGdp/render/common.h @@ -91,14 +91,20 @@ struct RulesBuilder dag::VectorMap objectGroups; dag::VectorMap placers; RenderableId nextRenderableId = 0; + uint32_t maxObjects = 0; // 0 means no limit. }; struct ViewBuilder { dag::RelocatableFixedVector renderablesMaxInstances; dag::RelocatableFixedVector renderablesInstanceRegions; - uint32_t maxInstancesPerViewport = 0; + uint32_t totalMaxInstances = 0; // static for all viewports, and dynamic. + uint32_t maxStaticInstancesPerViewport = 0; uint32_t numRenderables = 0; + bool hasDynamicPlacers = false; + + // Additional, GPU-suballocated memory. Located directly after static regions. + InstanceRegion dynamicInstanceRegion = {}; }; enum class ViewKind diff --git a/prog/daNetGameLibs/daGdp/render/globalManager.cpp b/prog/daNetGameLibs/daGdp/render/globalManager.cpp index 6f85a59f5..0795c1a77 100644 --- a/prog/daNetGameLibs/daGdp/render/globalManager.cpp +++ b/prog/daNetGameLibs/daGdp/render/globalManager.cpp @@ -1,17 +1,55 @@ // Copyright (C) Gaijin Games KFT. All rights reserved. +#include #include #include #include +#include #include +#include #include -#include "globalManager.h" +#include <3d/dag_ringCPUQueryLock.h> #include #include +#include "../shaders/dagdp_common.hlsli" +#include "../shaders/dagdp_dynamic.hlsli" +#include "globalManager.h" + +namespace var +{ +static ShaderVariableInfo dyn_region("dagdp__dyn_region"); +static ShaderVariableInfo dyn_counters_num("dagdp__dyn_counters_num"); +}; // namespace var + +using TmpName = eastl::fixed_string; namespace dagdp { +#if DAGOR_DBGLEVEL == 0 +static constexpr float DYNAMIC_THRESHOLD_MULTIPLIER = 1.0f; +#else +// In debug mode, we want to be more conservative to catch issues earlier. +static constexpr float DYNAMIC_THRESHOLD_MULTIPLIER = 0.75f; +#endif + +struct ViewConstants +{ + dagdp::ViewInfo viewInfo; + uint32_t totalMaxInstances; // static for all viewports, and dynamic. + uint32_t maxStaticInstancesPerViewport; + uint32_t totalCounters; // Of each kind. + uint32_t numRenderables; + InstanceRegion dynamicInstanceRegion; +}; + +struct ViewPersistentData +{ + UniqueBuf dynAllocsBuffer; + RingCPUBufferLock readback; + ViewConstants constants; +}; + void GlobalManager::reconfigure(const GlobalConfig &new_config) { config = new_config; @@ -109,6 +147,7 @@ void GlobalManager::rebuildRules() return; } + queryLevelSettings(rulesBuilder); accumulateObjectGroups(rulesBuilder); accumulatePlacers(rulesBuilder); @@ -149,22 +188,31 @@ void GlobalManager::rebuildViews() viewBuilder.numRenderables = numRenderables; viewBuilder.renderablesMaxInstances.resize(numRenderables); - // While processing, each placer will add to the total limits. + // While processing, each placer will add to the static limits. g_entity_mgr->broadcastEventImmediate(EventViewProcess(rulesBuilder, view.info, &viewBuilder)); - // At this point, all view-specific limits are known, and we can calculate the instance regions. + // At this point, all static limits are known, and we can calculate the instance regions. viewBuilder.renderablesInstanceRegions.reserve(numRenderables); for (const uint32_t maxInstances : viewBuilder.renderablesMaxInstances) { InstanceRegion region; - region.baseIndex = viewBuilder.maxInstancesPerViewport; + region.baseIndex = viewBuilder.maxStaticInstancesPerViewport; region.maxCount = maxInstances; viewBuilder.renderablesInstanceRegions.push_back(region); - viewBuilder.maxInstancesPerViewport += maxInstances; + viewBuilder.maxStaticInstancesPerViewport += maxInstances; } - if (viewBuilder.maxInstancesPerViewport != 0) + viewBuilder.totalMaxInstances = viewBuilder.maxStaticInstancesPerViewport * view.info.maxViewports; + if (viewBuilder.hasDynamicPlacers && rulesBuilder.maxObjects > 0) // -V560 + { + // Dynamic region starts after all static regions. + viewBuilder.dynamicInstanceRegion.baseIndex = viewBuilder.totalMaxInstances; + viewBuilder.dynamicInstanceRegion.maxCount = rulesBuilder.maxObjects; + viewBuilder.totalMaxInstances += rulesBuilder.maxObjects; + } + + if (viewBuilder.totalMaxInstances > 0) { dabfg::NodeHandle viewProviderNode; @@ -202,40 +250,165 @@ void GlobalManager::rebuildViews() if (viewProviderNode) view.nodes.push_back(eastl::move(viewProviderNode)); - // Create the setup node. - dabfg::NodeHandle setupNode = - (dabfg::root() / "dagdp" / view.info.uniqueName.c_str()) - .registerNode("setup", DABFG_PP_NODE_SRC, - [numRenderables, maxInstancesPerViewport = viewBuilder.maxInstancesPerViewport, info = view.info]( - dabfg::Registry registry) { - view_multiplex(registry, info.kind); - const uint32_t totalCounters = info.maxViewports * numRenderables; - - auto countersHandle = registry.create("counters", dabfg::History::No) - .structuredBufferUaSr(totalCounters) - .atStage(dabfg::Stage::UNKNOWN) // TODO: d3d::clear_rwbufi semantics is not defined. - .useAs(dabfg::Usage::UNKNOWN) - .handle(); - - const size_t perInstanceFormatElementStride = get_tex_format_desc(perInstanceFormat).bytesPerElement; - G_ASSERT(sizeof(PerInstanceData) % perInstanceFormatElementStride == 0); - const size_t perInstanceElements = sizeof(PerInstanceData) / perInstanceFormatElementStride; - - registry.create("instance_data", dabfg::History::No) - .buffer({static_cast(perInstanceFormatElementStride), - static_cast(info.maxViewports * maxInstancesPerViewport * perInstanceElements), - SBCF_BIND_UNORDERED | SBCF_BIND_SHADER_RES, perInstanceFormat}); - - registry.registerBuffer("per_draw_gathered_data", [](auto) { return ManagedBufView(get_per_draw_gathered_data()); }) - .atStage(dabfg::Stage::TRANSFER); - - return [countersHandle] { - uint32_t zeros[4] = {}; - d3d::clear_rwbufi(countersHandle.get(), zeros); - }; - }); + auto persistentData = eastl::make_shared(); + persistentData->constants.viewInfo = view.info; + persistentData->constants.totalMaxInstances = viewBuilder.totalMaxInstances; + persistentData->constants.maxStaticInstancesPerViewport = viewBuilder.maxStaticInstancesPerViewport; + persistentData->constants.totalCounters = view.info.maxViewports * numRenderables; + persistentData->constants.numRenderables = numRenderables; + persistentData->constants.dynamicInstanceRegion = viewBuilder.dynamicInstanceRegion; + + if (viewBuilder.dynamicInstanceRegion.maxCount > 0) + { + { + TmpName bufferName(TmpName::CtorSprintf(), "dagdp_%s_dyn_allocs", view.info.uniqueName.c_str()); + persistentData->dynAllocsBuffer = dag::buffers::create_ua_sr_structured(sizeof(DynAlloc), + view.info.maxViewports * numRenderables, bufferName.c_str(), d3d::buffers::Init::Zero); + + // Match last FG usage. + d3d::resource_barrier({persistentData->dynAllocsBuffer.getBuf(), RB_RO_SRV | RB_STAGE_COMPUTE}); + } + + { + TmpName bufferName(TmpName::CtorSprintf(), "dagdp_%s_readback", view.info.uniqueName.c_str()); + persistentData->readback.init(sizeof(uint32_t), DYN_COUNTERS_PREFIX, 4, bufferName.c_str(), SBCF_UA_STRUCTURED_READBACK, 0, + false); + } + } + + const dabfg::NameSpace ns = dabfg::root() / "dagdp" / view.info.uniqueName.c_str(); + + dabfg::NodeHandle setupNode = ns.registerNode("setup", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + const auto &constants = persistentData->constants; + view_multiplex(registry, constants.viewInfo.kind); + + const size_t perInstanceFormatElementStride = get_tex_format_desc(perInstanceFormat).bytesPerElement; + G_ASSERT(sizeof(PerInstanceData) % perInstanceFormatElementStride == 0); + const size_t perInstanceElements = sizeof(PerInstanceData) / perInstanceFormatElementStride; + + registry.create("instance_data", dabfg::History::No) + .buffer({static_cast(perInstanceFormatElementStride), + static_cast(constants.totalMaxInstances * perInstanceElements), SBCF_BIND_UNORDERED | SBCF_BIND_SHADER_RES, + perInstanceFormat}); + + registry.registerBuffer("per_draw_gathered_data", [](auto) { return ManagedBufView(get_per_draw_gathered_data()); }) + .atStage(dabfg::Stage::TRANSFER); + }); + + dabfg::NodeHandle staticSetupNode = + ns.registerNode("setup_static", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + const auto &constants = persistentData->constants; + view_multiplex(registry, constants.viewInfo.kind); + + const auto countersHandle = registry.create("counters", dabfg::History::No) + .structuredBufferUaSr(constants.totalCounters) + .atStage(dabfg::Stage::UNKNOWN) // TODO: d3d::clear_rwbufi semantics is not defined. + .useAs(dabfg::Usage::UNKNOWN) + .handle(); + + return [countersHandle] { + uint32_t zeros[4] = {}; + d3d::clear_rwbufi(countersHandle.get(), zeros); + }; + }); + + dabfg::NodeHandle dynamicSetupNode = + ns.registerNode("setup_dynamic", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + const auto &constants = persistentData->constants; + view_multiplex(registry, constants.viewInfo.kind); + + const auto dynCountersHandle = registry.create("dyn_counters_stage0", dabfg::History::No) + .structuredBufferUaSr(constants.totalCounters + DYN_COUNTERS_PREFIX) + .atStage(dabfg::Stage::UNKNOWN) // TODO: d3d::clear_rwbufi semantics is not defined. + .useAs(dabfg::Usage::UNKNOWN) + .handle(); + + registry + .registerBuffer("dyn_allocs_stage0", [persistentData](auto) { return ManagedBufView(persistentData->dynAllocsBuffer); }) + .atStage(dabfg::Stage::COMPUTE) + .useAs(dabfg::Usage::SHADER_RESOURCE); + + return [dynCountersHandle] { + uint32_t zeros[4] = {}; + d3d::clear_rwbufi(dynCountersHandle.get(), zeros); + }; + }); + + dabfg::NodeHandle dynamicRecountNode = ns.registerNode("recount", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + const auto &constants = persistentData->constants; + view_multiplex(registry, constants.viewInfo.kind); + + registry.rename("dyn_allocs_stage0", "dyn_allocs_stage1", dabfg::History::No) + .buffer() + .atStage(dabfg::Stage::COMPUTE) + .bindToShaderVar("dagdp__dyn_allocs"); + + registry.rename("dyn_counters_stage0", "dyn_counters_stage1", dabfg::History::No) + .buffer() + .atStage(dabfg::Stage::COMPUTE) + .bindToShaderVar("dagdp__dyn_counters"); + + return [persistentData, shader = ComputeShader("dagdp_dynamic_recount")] { + const auto &constants = persistentData->constants; + + // TODO: to support more, need a multi-stage prefix sum here. Not done for simplicity. + G_ASSERT(constants.totalCounters <= DAGDP_DYNAMIC_RECOUNT_GROUP_SIZE); + + ShaderGlobal::set_int(var::dyn_counters_num, constants.totalCounters); + ShaderGlobal::set_int4(var::dyn_region, constants.dynamicInstanceRegion.baseIndex, constants.dynamicInstanceRegion.maxCount, + 0, 0); + const bool res = shader.dispatchThreads(constants.totalCounters, 1, 1); + if (!res) + logerr("daGdp: dynamic recount dispatch failed!"); + }; + }); + + dabfg::NodeHandle dynamicReadbackNode = + ns.registerNode("readback", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + view_multiplex(registry, persistentData->constants.viewInfo.kind); + registry.executionHas(dabfg::SideEffects::External); + + const auto dynCountersHandle = + registry.read("dyn_counters_stage1").buffer().atStage(dabfg::Stage::TRANSFER).useAs(dabfg::Usage::COPY).handle(); + + return [persistentData, dynCountersHandle] { + auto &dynCounters = const_cast(dynCountersHandle.ref()); + + int ignoredStride; + uint32_t ignoredFrameNum; + if (const auto *data = (uint32_t *)persistentData->readback.lock(ignoredStride, ignoredFrameNum, false)) + { + const uint32_t totalPlaced = data[DYN_COUNTERS_INDEX_TOTAL_PLACED]; + const uint32_t totalCapacity = data[DYN_COUNTERS_INDEX_TOTAL_CAPACITY]; + + if (totalPlaced > totalCapacity * DYNAMIC_THRESHOLD_MULTIPLIER) + LOGERR_ONCE("daGdp: dynamic placement overflow detected! %" PRIu32 " > %" PRIu32 " * %f", totalPlaced, totalCapacity, + DYNAMIC_THRESHOLD_MULTIPLIER); + + persistentData->readback.unlock(); + } + + auto *target = (Sbuffer *)persistentData->readback.getNewTarget(ignoredFrameNum); + if (target) + { + dynCounters.copyTo(target, 0, 0, sizeof(uint32_t) * DYN_COUNTERS_PREFIX); + persistentData->readback.startCPUCopy(); + } + }; + }); + view.nodes.push_back(eastl::move(setupNode)); + if (viewBuilder.maxStaticInstancesPerViewport > 0) + view.nodes.push_back(eastl::move(staticSetupNode)); + + if (viewBuilder.dynamicInstanceRegion.maxCount > 0) + { + view.nodes.push_back(eastl::move(dynamicSetupNode)); + view.nodes.push_back(eastl::move(dynamicRecountNode)); + view.nodes.push_back(eastl::move(dynamicReadbackNode)); + } + const auto nodeInserter = [&nodes = view.nodes](dabfg::NodeHandle node) { nodes.push_back(eastl::move(node)); }; g_entity_mgr->broadcastEventImmediate(EventViewFinalize(view.info, viewBuilder, nodeInserter)); } diff --git a/prog/daNetGameLibs/daGdp/render/globalManager.h b/prog/daNetGameLibs/daGdp/render/globalManager.h index 1c963b7dc..721a32174 100644 --- a/prog/daNetGameLibs/daGdp/render/globalManager.h +++ b/prog/daNetGameLibs/daGdp/render/globalManager.h @@ -69,8 +69,9 @@ class GlobalManager #endif private: - static void accumulatePlacers(RulesBuilder &rules_builder); + static void queryLevelSettings(RulesBuilder &rules_builder); static void accumulateObjectGroups(RulesBuilder &rules_builder); + static void accumulatePlacers(RulesBuilder &rules_builder); void recreateViews(); void rebuildRules(); diff --git a/prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.gen.es.cpp b/prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.gen.es.cpp index 0ae3d2541..3cb150d68 100644 --- a/prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.gen.es.cpp +++ b/prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.gen.es.cpp @@ -186,6 +186,63 @@ static ecs::EntitySystemDesc dagdp_placer_changed_es_es_desc ecs::EventComponentsDisappear>::build(), 0 ,"render","dagdp__name,dagdp__object_groups"); +static constexpr ecs::ComponentDesc dagdp_level_settings_changed_es_comps[] = +{ +//start of 2 rq components at [0] + {ECS_HASH("dagdp_level_settings"), ecs::ComponentTypeInfo()}, + {ECS_HASH("dagdp__max_objects"), ecs::ComponentTypeInfo()} +}; +static void dagdp_level_settings_changed_es_all_events(const ecs::Event &__restrict evt, const ecs::QueryView &__restrict components) +{ + G_UNUSED(components); + dagdp::dagdp_level_settings_changed_es(evt + ); +} +static ecs::EntitySystemDesc dagdp_level_settings_changed_es_es_desc +( + "dagdp_level_settings_changed_es", + "prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.inl", + ecs::EntitySystemOps(nullptr, dagdp_level_settings_changed_es_all_events), + empty_span(), + empty_span(), + make_span(dagdp_level_settings_changed_es_comps+0, 2)/*rq*/, + empty_span(), + ecs::EventSetBuilder::build(), + 0 +,"render","dagdp__max_objects"); +static constexpr ecs::ComponentDesc level_settings_ecs_query_comps[] = +{ +//start of 1 ro components at [0] + {ECS_HASH("dagdp__max_objects"), ecs::ComponentTypeInfo()}, +//start of 1 rq components at [1] + {ECS_HASH("dagdp_level_settings"), ecs::ComponentTypeInfo()} +}; +static ecs::CompileTimeQueryDesc level_settings_ecs_query_desc +( + "dagdp::level_settings_ecs_query", + empty_span(), + make_span(level_settings_ecs_query_comps+0, 1)/*ro*/, + make_span(level_settings_ecs_query_comps+1, 1)/*rq*/, + empty_span()); +template +inline void dagdp::level_settings_ecs_query(Callable function) +{ + perform_query(g_entity_mgr, level_settings_ecs_query_desc.getHandle(), + [&function](const ecs::QueryView& __restrict components) + { + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp != compE); do + { + function( + ECS_RO_COMP(level_settings_ecs_query_comps, "dagdp__max_objects", int) + ); + + }while (++comp != compE); + } + ); +} static constexpr ecs::ComponentDesc object_groups_named_ecs_query_comps[] = { //start of 2 ro components at [0] diff --git a/prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.inl b/prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.inl index 03f955b3f..3abc9b6c7 100644 --- a/prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.inl +++ b/prog/daNetGameLibs/daGdp/render/globalManagerES.cpp.inl @@ -83,6 +83,15 @@ static void dagdp_on_level_unload_es(const UnloadLevel &, dagdp::GlobalManager & dagdp__global_manager.reconfigure(emptyConfig); } +template +static inline void level_settings_ecs_query(Callable); + +void GlobalManager::queryLevelSettings(RulesBuilder &rules_builder) +{ + level_settings_ecs_query( + [&](ECS_REQUIRE(ecs::Tag dagdp_level_settings) int dagdp__max_objects) { rules_builder.maxObjects = dagdp__max_objects; }); +} + template static inline void object_groups_named_ecs_query(Callable); @@ -168,6 +177,16 @@ static void dagdp_placer_changed_es(const ecs::Event &) manager_ecs_query([](dagdp::GlobalManager &dagdp__global_manager) { dagdp__global_manager.invalidateRules(); }); } +ECS_TAG(render) +ECS_ON_EVENT(on_appear) +ECS_ON_EVENT(on_disappear) +ECS_TRACK(dagdp__max_objects) +ECS_REQUIRE(ecs::Tag dagdp_level_settings, int dagdp__max_objects) +static void dagdp_level_settings_changed_es(const ecs::Event &) +{ + manager_ecs_query([](dagdp::GlobalManager &dagdp__global_manager) { dagdp__global_manager.invalidateRules(); }); +} + } // namespace dagdp static bool dagdp_console_handler(const char *argv[], int argc) diff --git a/prog/daNetGameLibs/daGdp/render/objects/riex.cpp b/prog/daNetGameLibs/daGdp/render/objects/riex.cpp index 963ca85b0..53117314d 100644 --- a/prog/daNetGameLibs/daGdp/render/objects/riex.cpp +++ b/prog/daNetGameLibs/daGdp/render/objects/riex.cpp @@ -8,6 +8,10 @@ #include #include #include +#include +#include +#include +#include #include #include #include @@ -23,12 +27,15 @@ #include #include #include -#include "riex.h" +#include #include #include #include #include #include +#include "../../shaders/dagdp_riex.hlsli" +#include "shaders/rendinst.hlsli" +#include "riex.h" namespace var { @@ -47,6 +54,8 @@ static ShaderVariableInfo instancing_type("instancing_type"); static ShaderVariableInfo rendinst_render_pass("rendinst_render_pass"); } // namespace external_var +using TmpName = eastl::fixed_string; + namespace dagdp { @@ -78,6 +87,11 @@ static constexpr SubPassRange VIEW_KIND_SUBPASSES[VIEW_KIND_COUNT] = { static constexpr size_t SUBPASS_COUNT = eastl::to_underlying(SubPass::COUNT); +// There are now separate counters for static regions, and dynamic regions. +static constexpr uint32_t COUNTER_KINDS_NUM = 2; +static constexpr uint32_t COUNTER_KIND_STATIC = 0; +static constexpr uint32_t COUNTER_KIND_DYNAMIC = 1; + struct StageRange { int first; @@ -120,7 +134,7 @@ static bool uses_extended_multi_draw_struct() { return d3d::get_driver_code().is // Each of these corresponds to V actual draw calls, where V is the number of viewports. struct ProtoDrawCall { - InstanceRegion instanceRegion; + InstanceRegion staticInstanceRegion; const ShaderMesh::RElem *rElem; RenderableId rId; @@ -139,7 +153,7 @@ struct MultiDrawCall uint32_t count; // How many draw calls are in the span? bool isForcedSingle; // For some platforms and/or materials, we can't use multidraw. - InstanceRegion instanceRegion; + InstanceRegion staticInstanceRegion; // All of the draw calls share the below state: int stage; // ShaderMesh::STG_* @@ -187,6 +201,7 @@ static bool can_coalesce(const ProtoDrawCall &a, const ProtoDrawCall &b) struct RiexConstants { bool isExtendedArgs; + bool isNonMultidrawIndirectionNeeded; uint32_t argsStride; uint32_t argsDwords; @@ -194,10 +209,12 @@ struct RiexConstants uint32_t instanceBufferElementsCount; ViewInfo viewInfo; - uint32_t maxInstancesPerViewport; + uint32_t maxStaticInstancesPerViewport; uint32_t maxDrawCallsPerViewport; uint32_t numRenderables; + bool haveDynamicRegions; + eastl::array haveSubPass; }; @@ -227,6 +244,7 @@ struct RiexPersistentData dag::Vector resources; CallbackToken rElemsUpdatedToken; shaders::UniqueOverrideStateId afterPrePassOverride; + int riAdditionalInstanceOffsetRegNo = -1; dag::Vector renderablesInfo; eastl::hash_set usedRI; @@ -237,8 +255,6 @@ struct RiexPersistentData bool areBuffersValid = false; }; -using TmpName = eastl::fixed_string; - class ExternalShaderVarsScope { float global_transp_r; @@ -269,40 +285,47 @@ void update_draw_calls(LockedBuffer &locked_buffer, const RiexConstants &constants, const dag::Vector &proto_draw_calls) { - for (uint32_t viewportIndex = 0; viewportIndex < constants.viewInfo.maxViewports; ++viewportIndex) - { - uint32_t baseIndex = viewportIndex * proto_draw_calls.size(); - for (auto [i, call] : enumerate(proto_draw_calls)) + for (uint32_t counterKind = 0; counterKind < COUNTER_KINDS_NUM; ++counterKind) + for (uint32_t viewportIndex = 0; viewportIndex < constants.viewInfo.maxViewports; ++viewportIndex) { - DrawIndexedIndirectArgs *args; - if constexpr (eastl::is_same::value) - args = &locked_buffer[baseIndex + i].args; - else - args = &locked_buffer[baseIndex + i]; - - args->indexCountPerInstance = 0; // Will be patched by CS. - args->instanceCount = 0; // Will be patched by CS. - args->startIndexLocation = call.rElem->si; - args->baseVertexLocation = call.rElem->baseVertex; - - uint32_t drawCallId = 0; - if (is_packed_material(call.dvState.const_state)) + uint32_t baseIndex = (viewportIndex + counterKind * constants.viewInfo.maxViewports) * proto_draw_calls.size(); + for (auto [i, call] : enumerate(proto_draw_calls)) { - // Note: taken from riExtraRendererT.h - const uint32_t materialOffset = get_material_offset(call.dvState.const_state); - const uint32_t instanceBaseIndex = constants.maxInstancesPerViewport * viewportIndex + call.instanceRegion.baseIndex; - drawCallId = (instanceBaseIndex << MATRICES_OFFSET_SHIFT) | materialOffset; - } + DrawIndexedIndirectArgs *args; + if constexpr (eastl::is_same_v) + args = &locked_buffer[baseIndex + i].args; + else + args = &locked_buffer[baseIndex + i]; + + args->indexCountPerInstance = 0; // Will be patched by CS. + args->instanceCount = 0; // Will be patched by CS. + args->startIndexLocation = call.rElem->si; + args->baseVertexLocation = call.rElem->baseVertex; + + const bool isPacked = is_packed_material(call.dvState.const_state); + const uint32_t instanceBaseIndex = + constants.maxStaticInstancesPerViewport * viewportIndex + call.staticInstanceRegion.baseIndex; + + uint32_t drawCallId = 0; + if (isPacked) + { + // Note: taken from riExtraRendererT.h + const uint32_t materialOffset = get_material_offset(call.dvState.const_state); - if constexpr (eastl::is_same::value) - { - locked_buffer[baseIndex + i].drawCallId = drawCallId; - args->startInstanceLocation = 0; + // For dynamic counters, drawCallId needs to be patched on the GPU. + if (counterKind == COUNTER_KIND_STATIC) + drawCallId = (instanceBaseIndex << MATRICES_OFFSET_SHIFT) | materialOffset; + } + + if constexpr (eastl::is_same_v) + { + locked_buffer[baseIndex + i].drawCallId = drawCallId; + args->startInstanceLocation = isPacked ? 0 : instanceBaseIndex; + } + else + args->startInstanceLocation = isPacked ? drawCallId : instanceBaseIndex; } - else - args->startInstanceLocation = drawCallId; } - } } [[nodiscard]] static bool update_buffers(RiexPersistentData &persistent_data, @@ -310,7 +333,7 @@ void update_draw_calls(LockedBuffer &locked_buffer, { RiexConstants &constants = persistent_data.constants; - const uint32_t drawCallsActualCount = proto_draw_calls.size() * constants.viewInfo.maxViewports; + const uint32_t drawCallsActualCount = COUNTER_KINDS_NUM * proto_draw_calls.size() * constants.viewInfo.maxViewports; const uint32_t drawCallsActualByteSize = drawCallsActualCount * constants.argsStride; UniqueBuf stagingForDrawCalls; { @@ -342,31 +365,37 @@ void update_draw_calls(LockedBuffer &locked_buffer, } const uint32_t counterPatchesActualCount = proto_draw_calls.size() * constants.viewInfo.maxViewports; - const uint32_t counterPatchesActualByteSize = counterPatchesActualCount * sizeof(uint4); + const uint32_t counterPatchesActualByteSize = counterPatchesActualCount * sizeof(RiexPatch); UniqueBuf stagingForCounterPatches; { TmpName bufferName(TmpName::CtorSprintf(), "dagdp_riex_%s_counter_patches_staging", constants.viewInfo.uniqueName.c_str()); stagingForCounterPatches = dag::buffers::create_staging(counterPatchesActualByteSize, bufferName.c_str()); - auto lockedBuffer = lock_sbuffer(stagingForCounterPatches.getBuf(), 0, counterPatchesActualCount, VBLOCK_WRITEONLY); + auto lockedBuffer = lock_sbuffer(stagingForCounterPatches.getBuf(), 0, counterPatchesActualCount, VBLOCK_WRITEONLY); if (!lockedBuffer) { logerr("daGdp: could not lock buffer %s", bufferName.c_str()); return false; } - for (int viewportIndex = 0; viewportIndex < constants.viewInfo.maxViewports; ++viewportIndex) + G_STATIC_ASSERT(COUNTER_KINDS_NUM == 2); + for (uint32_t viewportIndex = 0; viewportIndex < constants.viewInfo.maxViewports; ++viewportIndex) { - uint32_t baseIndex = viewportIndex * proto_draw_calls.size(); + const uint32_t baseIndex = viewportIndex * proto_draw_calls.size(); + const uint32_t baseIndexDynamic = + (viewportIndex + COUNTER_KIND_DYNAMIC * constants.viewInfo.maxViewports) * proto_draw_calls.size(); for (auto [i, call] : enumerate(proto_draw_calls)) { - uint32_t byteOffset = (baseIndex + i) * constants.argsStride + (constants.isExtendedArgs ? sizeof(uint32_t) : 0); - uint32_t counterIndex = viewportIndex * constants.numRenderables + call.rId; - - uint4 &patch = lockedBuffer[baseIndex + i]; - patch.x = byteOffset; - patch.y = counterIndex; - patch.z = call.rElem->numf * 3; + const bool isPacked = is_packed_material(call.dvState.const_state); + + RiexPatch &patch = lockedBuffer[baseIndex + i]; + patch.argsByteOffsetStatic = (baseIndex + i) * constants.argsStride + (constants.isExtendedArgs ? sizeof(uint32_t) : 0); + patch.argsByteOffsetDynamic = + (baseIndexDynamic + i) * constants.argsStride + (constants.isExtendedArgs ? sizeof(uint32_t) : 0); + patch.localCounterIndex = viewportIndex * constants.numRenderables + call.rId; + patch.indexCount = call.rElem->numf * 3; + patch.materialOffset = isPacked ? get_material_offset(call.dvState.const_state) : 0; + patch.flags = (constants.isExtendedArgs ? RIEX_PATCH_FLAG_BIT_EXTENDED_ARGS : 0) | (isPacked ? RIEX_PATCH_FLAG_BIT_PACKED : 0); } } } @@ -434,7 +463,7 @@ static void rebuild_cache(RiexPersistentData &persistent_data) auto &call = protoDrawCallsFmem.push_back(); call.rElem = &rElem; - call.instanceRegion = renderable.instanceRegion; + call.staticInstanceRegion = renderable.instanceRegion; call.rId = renderable.rId; call.stage = stage; @@ -466,7 +495,7 @@ static void rebuild_cache(RiexPersistentData &persistent_data) // Non-packed materials cannot use multidraw. multiCall.isForcedSingle = !is_packed_material(call.dvState.const_state); - multiCall.instanceRegion = call.instanceRegion; + multiCall.staticInstanceRegion = call.staticInstanceRegion; multiCall.stage = call.stage; multiCall.sElem = call.sElem; @@ -567,11 +596,18 @@ static void render(SubPass sub_pass, // static ShaderVariableInfo perDrawDataVarId("perDrawInstanceData"); // ShaderGlobal::set_buffer(perDrawDataVarId, get_per_draw_gathered_data()); + // Hack: BFG does not allow to specify two read-only usages. + d3d::resource_barrier({draw_args_buf, RB_RO_SRV | RB_RO_INDIRECT_BUFFER | RB_STAGE_VERTEX}); + + if (persistent_data.riAdditionalInstanceOffsetRegNo != -1) + d3d::set_buffer(STAGE_VS, persistent_data.riAdditionalInstanceOffsetRegNo, draw_args_buf); + Vbuffer *lastVb = nullptr; int lastVbStride = 0; shaders::CombinedDynVariantState lastDvState{}; ScriptedShaderElement *lastSElem = nullptr; bool lastShaderOverride = false; + uint32_t lastImmediate = ~0u; const auto &multiCallSpan = persistent_data.cache.multiCallSpans[eastl::to_underlying(sub_pass)]; @@ -619,27 +655,43 @@ static void render(SubPass sub_pass, lastDvState = call.dvState; } - uint32_t additionalByteOffset = 0; - if (call.isForcedSingle) + // When draw call ID is present, but not used, we have to skip it in the extended indirect args. + uint32_t additionalByteOffset = call.isForcedSingle && constants.isExtendedArgs ? sizeof(uint32_t) : 0; + + for (uint32_t counterKind = 0; counterKind < COUNTER_KINDS_NUM; ++counterKind) { - uint32_t instanceBaseIndex = constants.maxInstancesPerViewport * viewport_index + call.instanceRegion.baseIndex; - uint32_t instanceBaseBufferElementIndex = instanceBaseIndex * constants.instanceBufferElementsCount; - d3d::set_immediate_const(STAGE_VS, &instanceBaseBufferElementIndex, 1); + if (counterKind == COUNTER_KIND_STATIC && constants.maxStaticInstancesPerViewport == 0) + continue; + + if (counterKind == COUNTER_KIND_DYNAMIC && !constants.haveDynamicRegions) + continue; + + const uint32_t kindByteOffset = + persistent_data.cache.numCallsPerViewport * constants.argsStride * constants.viewInfo.maxViewports * counterKind; + const uint32_t viewportByteOffset = persistent_data.cache.numCallsPerViewport * constants.argsStride * viewport_index; + const uint32_t totalByteOffset = call.byteOffset + kindByteOffset + viewportByteOffset + additionalByteOffset; - if (constants.isExtendedArgs) + uint32_t immediate = 0u; + if (call.isForcedSingle && constants.isNonMultidrawIndirectionNeeded) { - // Since draw call ID is not used, we have to skip it in the extended indirect args. - additionalByteOffset = sizeof(uint32_t); + const uint32_t offset = (totalByteOffset + offsetof(DrawIndexedIndirectArgs, startInstanceLocation)); + G_ASSERT(offset <= INST_OFFSET_MASK_VALUE); + immediate |= INST_OFFSET_FLAG_USE_INDIRECTION | offset; + } + + if (immediate != lastImmediate) + { + d3d::set_immediate_const(STAGE_VS, &immediate, 1); + lastImmediate = immediate; } - } - uint32_t viewportByteOffset = persistent_data.cache.numCallsPerViewport * constants.argsStride * viewport_index; - d3d::multi_draw_indexed_indirect(PRIM_TRILIST, draw_args_buf, call.count, constants.argsStride, - call.byteOffset + viewportByteOffset + additionalByteOffset); + d3d::multi_draw_indexed_indirect(PRIM_TRILIST, draw_args_buf, call.count, constants.argsStride, totalByteOffset); + } } // Needed as part RiShaderConstBuffers' contract. rendinst::render::endRenderInstancing(); + d3d::set_immediate_const(STAGE_VS, nullptr, 0); shaders::overrides::reset(); } @@ -722,6 +774,7 @@ void riex_finalize_view(const ViewInfo &view_info, const ViewBuilder &view_build prePassOverrideState.set(shaders::OverrideState::Z_FUNC | shaders::OverrideState::Z_WRITE_DISABLE); prePassOverrideState.zFunc = CMPF_EQUAL; persistentData->afterPrePassOverride.reset(shaders::overrides::create(prePassOverrideState)); + ShaderGlobal::get_int_by_name("ri_additional_instance_offsets_data_no", persistentData->riAdditionalInstanceOffsetRegNo); persistentData->rElemsUpdatedToken = unitedvdata::riUnitedVdata.on_mesh_relems_updated.subscribe( [self = persistentData.get()](const RenderableInstanceLodsResource *ri, bool) { @@ -748,9 +801,14 @@ void riex_finalize_view(const ViewInfo &view_info, const ViewBuilder &view_build for (auto value : maxDrawCallsPerViewport) constants.maxDrawCallsPerViewport += value; - constants.maxInstancesPerViewport = view_builder.maxInstancesPerViewport; + constants.maxStaticInstancesPerViewport = view_builder.maxStaticInstancesPerViewport; + constants.haveDynamicRegions = view_builder.dynamicInstanceRegion.maxCount > 0; constants.viewInfo = view_info; constants.isExtendedArgs = uses_extended_multi_draw_struct(); + + // See rendinst_inc.dshl, inst_offset_getter() + constants.isNonMultidrawIndirectionNeeded = d3d::get_driver_code().is(d3d::dx11 || d3d::dx12); + constants.argsStride = constants.isExtendedArgs ? sizeof(ExtendedDrawIndexedIndirectArgs) : sizeof(DrawIndexedIndirectArgs); constants.argsDwords = constants.argsStride / sizeof(uint32_t); G_ASSERT(constants.argsStride % sizeof(uint32_t) == 0); @@ -766,11 +824,10 @@ void riex_finalize_view(const ViewInfo &view_info, const ViewBuilder &view_build constants.haveSubPass[s] = maxDrawCallsPerViewport[s] > 0; } - uint32_t totalMaxInstances = constants.maxInstancesPerViewport * constants.viewInfo.maxViewports; - if (totalMaxInstances > MAX_MATRIX_OFFSET) + if (view_builder.totalMaxInstances > MAX_MATRIX_OFFSET) { // MAX_MATRIX_OFFSET only applies to packed materials, but we should enforce the same limit everywhere. - logerr("daGdp: maximum number of RendInst Extra instances is too large: %" PRIu32, totalMaxInstances); + logerr("daGdp: maximum number of RendInst Extra instances is too large: %" PRIu32, view_builder.totalMaxInstances); return; } @@ -794,16 +851,17 @@ void riex_finalize_view(const ViewInfo &view_info, const ViewBuilder &view_build { TmpName bufferName(TmpName::CtorSprintf(), "dagdp_riex_%s_counter_patches", constants.viewInfo.uniqueName.c_str()); - persistentData->counterPatchesBuffer = dag::buffers::create_persistent_sr_structured(sizeof(uint4), + persistentData->counterPatchesBuffer = dag::buffers::create_persistent_sr_structured(sizeof(RiexPatch), constants.maxDrawCallsPerViewport * constants.viewInfo.maxViewports, bufferName.c_str()); } { TmpName bufferName(TmpName::CtorSprintf(), "dagdp_riex_%s_draw_args", constants.viewInfo.uniqueName.c_str()); - // `d3d::buffers::create_ua_indirect` does not work here, because of multidraw extended args. - persistentData->drawArgsBuffer = - dag::create_sbuffer(sizeof(uint32_t), constants.argsDwords * constants.maxDrawCallsPerViewport * constants.viewInfo.maxViewports, - SBCF_UA_INDIRECT, 0, bufferName.c_str()); + // For simplicity, draw call args count and indexing are always the same, regardless of whether we have dynamic + // or static instances. This buffer is small anyway. + persistentData->drawArgsBuffer = dag::create_sbuffer(sizeof(uint32_t), + constants.argsDwords * constants.maxDrawCallsPerViewport * constants.viewInfo.maxViewports * COUNTER_KINDS_NUM, + SBCF_BIND_UNORDERED | SBCF_MISC_ALLOW_RAW | SBCF_MISC_DRAWINDIRECT | SBCF_BIND_SHADER_RES, 0, bufferName.c_str()); } dabfg::NodeHandle updateNode = ns.registerNode("update", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { @@ -848,7 +906,7 @@ void riex_finalize_view(const ViewInfo &view_info, const ViewBuilder &view_build }; }); - dabfg::NodeHandle patchNode = ns.registerNode("patch", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + dabfg::NodeHandle patchStaticNode = ns.registerNode("patch_static", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { view_multiplex(registry, persistentData->constants.viewInfo.kind); (registry.root() / "dagdp" / persistentData->constants.viewInfo.uniqueName.c_str()) .read("counters") @@ -858,7 +916,30 @@ void riex_finalize_view(const ViewInfo &view_info, const ViewBuilder &view_build registry.read("patches").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_riex__patches"); registry.modify("draw_args").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_riex__draw_args"); - return [persistentData, shader = ComputeShader("dagdp_riex_patch_counters")] { + return [persistentData, shader = ComputeShader("dagdp_riex_patch_static")] { + if (!persistentData->areBuffersValid) + return; + + ShaderGlobal::set_int(var::num_patches, persistentData->cache.numCounterPatches); + bool res = shader.dispatchThreads(persistentData->cache.numCounterPatches, 1, 1); + G_ASSERT(res); + G_UNUSED(res); + }; + }); + + dabfg::NodeHandle patchDynamicNode = ns.registerNode("patch_dynamic", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + view_multiplex(registry, persistentData->constants.viewInfo.kind); + + const auto ns = registry.root() / "dagdp" / persistentData->constants.viewInfo.uniqueName.c_str(); + + ns.read("dyn_allocs_stage1").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp__dyn_allocs"); + + ns.read("dyn_counters_stage1").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp__dyn_counters"); + + registry.read("patches").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_riex__patches"); + registry.modify("draw_args").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_riex__draw_args"); + + return [persistentData, shader = ComputeShader("dagdp_riex_patch_dynamic")] { if (!persistentData->areBuffersValid) return; @@ -870,7 +951,12 @@ void riex_finalize_view(const ViewInfo &view_info, const ViewBuilder &view_build }); node_inserter(eastl::move(updateNode)); - node_inserter(eastl::move(patchNode)); + + if (constants.maxStaticInstancesPerViewport > 0) + node_inserter(eastl::move(patchStaticNode)); + + if (constants.haveDynamicRegions) + node_inserter(eastl::move(patchDynamicNode)); switch (view_info.kind) { diff --git a/prog/daNetGameLibs/daGdp/render/placer.cpp b/prog/daNetGameLibs/daGdp/render/placer.cpp index 0eab7db20..61a68497c 100644 --- a/prog/daNetGameLibs/daGdp/render/placer.cpp +++ b/prog/daNetGameLibs/daGdp/render/placer.cpp @@ -1,10 +1,17 @@ // Copyright (C) Gaijin Games KFT. All rights reserved. +#include +#include <3d/dag_lockSbuffer.h> #include "placer.h" +#include "../shaders/dagdp_common.hlsli" + +using TmpName = eastl::fixed_string; namespace dagdp { +CONSOLE_FLOAT_VAL("dagdp", frustum_culling_bias, 0.0f); + struct Transition { PlaceableId pId; @@ -12,10 +19,7 @@ struct Transition float drawDistance; }; -void calculate_draw_ranges(uint32_t p_id_start, - dag::ConstSpan object_groups, - DrawRangesFmem &draw_ranges, - RenderableIndicesFmem &renderable_indices) +void calculate_draw_ranges(CommonPlacerBufferInit &init, dag::ConstSpan object_groups) { size_t maxTransitions = 0; for (const auto &objectGroup : object_groups) @@ -27,7 +31,7 @@ void calculate_draw_ranges(uint32_t p_id_start, transitionPoints.reserve(maxTransitions); transitions.reserve(maxTransitions); - uint32_t pId = p_id_start; + uint32_t pId = init.numPlaceables; for (const auto &objectGroup : object_groups) { for (const auto &placeable : objectGroup.info->placeables) @@ -56,28 +60,161 @@ void calculate_draw_ranges(uint32_t p_id_start, return a.rId < b.rId; }); - draw_ranges.insert(draw_ranges.end(), transitionPoints.begin(), transitionPoints.end()); + init.drawRangesFmem.insert(init.drawRangesFmem.end(), transitionPoints.begin(), transitionPoints.end()); - const uint32_t numPlaceables = pId - p_id_start; + const uint32_t numPlaceables = pId - init.numPlaceables; const uint32_t numDrawRanges = transitionPoints.size(); - const uint32_t renderableIndexStart = renderable_indices.size(); + const uint32_t renderableIndexStart = init.renderableIndicesFmem.size(); - renderable_indices.resize(renderableIndexStart + numDrawRanges * numPlaceables); - for (uint32_t i = renderableIndexStart; i < renderable_indices.size(); ++i) - renderable_indices[i] = ~0u; + init.renderableIndicesFmem.resize(renderableIndexStart + numDrawRanges * numPlaceables); + for (uint32_t i = renderableIndexStart; i < init.renderableIndicesFmem.size(); ++i) + init.renderableIndicesFmem[i] = ~0u; dag::Vector currentRangeIndices(numPlaceables); for (const auto &transition : transitions) { - const uint32_t pIdOffset = transition.pId - p_id_start; + const uint32_t pIdOffset = transition.pId - init.numPlaceables; while ( currentRangeIndices[pIdOffset] < numDrawRanges && transition.drawDistance >= transitionPoints[currentRangeIndices[pIdOffset]]) { - renderable_indices[renderableIndexStart + currentRangeIndices[pIdOffset] * numPlaceables + pIdOffset] = transition.rId; + init.renderableIndicesFmem[renderableIndexStart + currentRangeIndices[pIdOffset] * numPlaceables + pIdOffset] = transition.rId; ++currentRangeIndices[pIdOffset]; } } } +void add_variant( + CommonPlacerBufferInit &init, dag::ConstSpan object_groups, float density, float placeableWeightEmpty) +{ + auto &gpuData = init.variantsFmem.push_back(); + + uint32_t numVariantPlaceables = 0; + for (const auto &objectGroup : object_groups) + { + numVariantPlaceables += objectGroup.info->placeables.size(); + init.objectGroupsFmem.push_back(objectGroup); + } + + gpuData.placeableWeightEmpty = placeableWeightEmpty; + gpuData.placeableStartIndex = init.numPlaceables; + gpuData.placeableCount = numVariantPlaceables; + gpuData.placeableEndIndex = init.numPlaceables + numVariantPlaceables; + gpuData.drawRangeStartIndex = init.drawRangesFmem.size(); + gpuData.renderableIndicesStartIndex = init.renderableIndicesFmem.size(); + gpuData.density = density; + + calculate_draw_ranges(init, object_groups); + gpuData.drawRangeEndIndex = init.drawRangesFmem.size(); + + init.numPlaceables += numVariantPlaceables; +} + +bool init_common_placer_buffers(const CommonPlacerBufferInit &init, eastl::string_view buffer_name_prefix, CommonPlacerBuffers &output) +{ + { + TmpName bufferName(TmpName::CtorSprintf(), "%.*s_draw_ranges", buffer_name_prefix.size(), buffer_name_prefix.data()); + output.drawRangesBuffer = + dag::buffers::create_persistent_sr_structured(sizeof(float), init.drawRangesFmem.size(), bufferName.c_str()); + + bool updated = + output.drawRangesBuffer->updateData(0, data_size(init.drawRangesFmem), init.drawRangesFmem.data(), VBLOCK_WRITEONLY); + + if (!updated) + { + logerr("daGdp: could not update buffer %s", bufferName.c_str()); + return false; + } + } + + { + TmpName bufferName(TmpName::CtorSprintf(), "%.*s_renderable_indices", buffer_name_prefix.size(), buffer_name_prefix.data()); + output.renderableIndicesBuffer = + dag::buffers::create_persistent_sr_structured(sizeof(uint32_t), init.renderableIndicesFmem.size(), bufferName.c_str()); + + bool updated = output.renderableIndicesBuffer->updateData(0, data_size(init.renderableIndicesFmem), + init.renderableIndicesFmem.data(), VBLOCK_WRITEONLY); + + if (!updated) + { + logerr("daGdp: could not update buffer %s", bufferName.c_str()); + return false; + } + } + + { + TmpName bufferName(TmpName::CtorSprintf(), "%.*s_variants", buffer_name_prefix.size(), buffer_name_prefix.data()); + output.variantsBuffer = + dag::buffers::create_persistent_sr_structured(sizeof(VariantGpuData), init.variantsFmem.size(), bufferName.c_str()); + + bool updated = output.variantsBuffer->updateData(0, data_size(init.variantsFmem), init.variantsFmem.data(), VBLOCK_WRITEONLY); + + if (!updated) + { + logerr("daGdp: could not update buffer %s", bufferName.c_str()); + return false; + } + } + + { + TmpName bufferName(TmpName::CtorSprintf(), "%.*s_placeables", buffer_name_prefix.size(), buffer_name_prefix.data()); + output.placeablesBuffer = + dag::buffers::create_persistent_sr_structured(sizeof(PlaceableGpuData), init.numPlaceables, bufferName.c_str()); + + auto lockedBuffer = lock_sbuffer(output.placeablesBuffer.getBuf(), 0, init.numPlaceables, VBLOCK_WRITEONLY); + if (!lockedBuffer) + { + logerr("daGdp: could not lock buffer %s", bufferName.c_str()); + return false; + } + + uint32_t i = 0; + for (const auto &objectGroup : init.objectGroupsFmem) + for (const auto &placeable : objectGroup.info->placeables) + { + auto &item = lockedBuffer[i++]; + const auto ¶ms = placeable.params; + + item.yawRadiansMin = md_min(params.yawRadiansMidDev); + item.yawRadiansMax = md_max(params.yawRadiansMidDev); + item.pitchRadiansMin = md_min(params.pitchRadiansMidDev); + item.pitchRadiansMax = md_max(params.pitchRadiansMidDev); + item.rollRadiansMin = md_min(params.rollRadiansMidDev); + item.rollRadiansMax = md_max(params.rollRadiansMidDev); + item.scaleMin = md_min(params.scaleMidDev); + item.scaleMax = md_max(params.scaleMidDev); + item.maxBaseDrawDistance = placeable.ranges.back().baseDrawDistance; + item.slopeFactor = params.slopeFactor; + item.flags = params.flags; + item.riPoolOffset = params.riPoolOffset; + } + + G_ASSERT(i == init.numPlaceables); + } + + { + TmpName bufferName(TmpName::CtorSprintf(), "%.*s_placeable_weights", buffer_name_prefix.size(), buffer_name_prefix.data()); + output.placeableWeightsBuffer = + dag::buffers::create_persistent_sr_structured(sizeof(float), init.numPlaceables, bufferName.c_str()); + + auto lockedBuffer = lock_sbuffer(output.placeableWeightsBuffer.getBuf(), 0, init.numPlaceables, VBLOCK_WRITEONLY); + if (!lockedBuffer) + { + logerr("daGdp: could not lock buffer %s", bufferName.c_str()); + return false; + } + + uint32_t i = 0; + for (const auto &objectGroup : init.objectGroupsFmem) + for (const auto &placeable : objectGroup.info->placeables) + lockedBuffer[i++] = placeable.params.weight; + + G_ASSERT(i == init.numPlaceables); + } + + return true; +} + +float get_frustum_culling_bias() { return frustum_culling_bias.get(); } + } // namespace dagdp diff --git a/prog/daNetGameLibs/daGdp/render/placer.h b/prog/daNetGameLibs/daGdp/render/placer.h index ca344f1e2..2eeb19a5c 100644 --- a/prog/daNetGameLibs/daGdp/render/placer.h +++ b/prog/daNetGameLibs/daGdp/render/placer.h @@ -1,7 +1,9 @@ // Copyright (C) Gaijin Games KFT. All rights reserved. #pragma once +#include #include "common.h" +#include "../shaders/dagdp_common_placer.hlsli" namespace dagdp { @@ -16,15 +18,39 @@ struct PlacerObjectGroup float effectiveDensity = 0.0f; }; -using DrawRangesFmem = dag::RelocatableFixedVector; -using RenderableIndicesFmem = dag::RelocatableFixedVector; +float get_frustum_culling_bias(); -// Should add elements to `draw_ranges` and `renderable_indices`. -// Number of elements added must be `R` and `R * P` respectively, where R is the resulting number of ranges, and P is the number of -// placeables. -void calculate_draw_ranges(uint32_t p_id_start, - dag::ConstSpan object_groups, - DrawRangesFmem &draw_ranges, - RenderableIndicesFmem &renderable_indices); +struct CommonPlacerBufferInit +{ + // Draw ranges are concatenated across variants, with `drawRangeStartIndex` marking the start of entries for the variant. + // For a single variant, R float distances are stored, where: + // - R = number of ranges in the variant. + dag::RelocatableFixedVector drawRangesFmem; + + // Renderable indices are concatenated across variants, with `renderableIndicesStartIndex` marking the start of entries for the + // variant. For a single variant, (P * R) renderable indices (IDs) are stored, where: + // - P = number of placeables in the variant. + // - R = number of ranges in the variant. + dag::RelocatableFixedVector renderableIndicesFmem; + + dag::RelocatableFixedVector variantsFmem; + dag::RelocatableFixedVector objectGroupsFmem; + uint32_t numPlaceables = 0; +}; + +struct CommonPlacerBuffers +{ + UniqueBuf drawRangesBuffer; + UniqueBuf renderableIndicesBuffer; + UniqueBuf variantsBuffer; + UniqueBuf placeablesBuffer; + UniqueBuf placeableWeightsBuffer; +}; + +void add_variant( + CommonPlacerBufferInit &init, dag::ConstSpan object_groups, float density, float placeableWeightEmpty); + +[[nodiscard]] bool init_common_placer_buffers( + const CommonPlacerBufferInit &init, eastl::string_view buffer_name_prefix, CommonPlacerBuffers &output); } // namespace dagdp diff --git a/prog/daNetGameLibs/daGdp/render/placers/heightmap.cpp b/prog/daNetGameLibs/daGdp/render/placers/heightmap.cpp index 8bbf9efab..06f933f6f 100644 --- a/prog/daNetGameLibs/daGdp/render/placers/heightmap.cpp +++ b/prog/daNetGameLibs/daGdp/render/placers/heightmap.cpp @@ -3,7 +3,6 @@ #include #include #include -#include #include <3d/dag_resPtr.h> #include <3d/dag_lockSbuffer.h> #include @@ -67,15 +66,6 @@ using TmpName = eastl::fixed_string; namespace dagdp { -CONSOLE_FLOAT_VAL("dagdp", heightmap_frustum_culling_bias, 0.0f); - -struct Variant -{ - uint32_t drawRangeStartIndex; - uint32_t renderableIndicesStartIndex; // Not a typo. - uint32_t placeableStartIndex; -}; - struct Biome { uint32_t variantIndex; @@ -84,8 +74,7 @@ struct Biome struct HeightmapConstants { ViewInfo viewInfo; - uint32_t maxInstancesPerViewport; - uint32_t numDrawRanges; + uint32_t maxStaticInstancesPerViewport; uint32_t numTiles; uint32_t numRenderables; uint32_t numPlaceables; @@ -99,15 +88,12 @@ struct HeightmapConstants struct HeightmapPersistentData { - UniqueBuf drawRangesBuffer; - UniqueBuf placeablesBuffer; - UniqueBuf placeableWeightsBuffer; + CommonPlacerBuffers commonBuffers; + UniqueBuf placeableTileLimitsBuffer; - UniqueBuf renderableIndicesBuffer; UniqueBuf instanceRegionsBuffer; UniqueBuf tilePositionsBuffer; UniqueBuf biomesBuffer; - UniqueBuf variantsBuffer; SharedTex densityMask; HeightmapConstants constants{}; @@ -213,14 +199,14 @@ static dabfg::NodeHandle create_place_node(const dabfg::NameSpace &ns, const auto &view = viewHandle.ref(); G_ASSERT(view.viewports.size() <= constants.viewInfo.maxViewports); - ShaderGlobal::set_buffer(var::draw_ranges, persistentData->drawRangesBuffer.getBufId()); - ShaderGlobal::set_buffer(var::placeables, persistentData->placeablesBuffer.getBufId()); - ShaderGlobal::set_buffer(var::placeable_weights, persistentData->placeableWeightsBuffer.getBufId()); + ShaderGlobal::set_buffer(var::draw_ranges, persistentData->commonBuffers.drawRangesBuffer.getBufId()); + ShaderGlobal::set_buffer(var::placeables, persistentData->commonBuffers.placeablesBuffer.getBufId()); + ShaderGlobal::set_buffer(var::placeable_weights, persistentData->commonBuffers.placeableWeightsBuffer.getBufId()); ShaderGlobal::set_buffer(var::placeable_tile_limits, persistentData->placeableTileLimitsBuffer.getBufId()); - ShaderGlobal::set_buffer(var::renderable_indices, persistentData->renderableIndicesBuffer.getBufId()); + ShaderGlobal::set_buffer(var::renderable_indices, persistentData->commonBuffers.renderableIndicesBuffer.getBufId()); ShaderGlobal::set_buffer(var::instance_regions, persistentData->instanceRegionsBuffer.getBufId()); ShaderGlobal::set_buffer(var::biomes, persistentData->biomesBuffer.getBufId()); - ShaderGlobal::set_buffer(var::variants, persistentData->variantsBuffer.getBufId()); + ShaderGlobal::set_buffer(var::variants, persistentData->commonBuffers.variantsBuffer.getBufId()); ShaderGlobal::set_texture(var::density_mask, persistentData->densityMask); @@ -232,7 +218,7 @@ static dabfg::NodeHandle create_place_node(const dabfg::NameSpace &ns, ShaderGlobal::set_real(var::max_placeable_bounding_radius, constants.maxPlaceableBoundingRadius); ShaderGlobal::set_real(var::tile_pos_delta, constants.tileWorldSize); ShaderGlobal::set_real(var::instance_pos_delta, constants.tileWorldSize / TILE_INSTANCE_COUNT_1D); - ShaderGlobal::set_real(var::debug_frustum_culling_bias, heightmap_frustum_culling_bias.get()); + ShaderGlobal::set_real(var::debug_frustum_culling_bias, get_frustum_culling_bias()); ShaderGlobal::set_int(var::prng_seed_jitter_x, constants.prngSeed + 0x4272ECD4u); ShaderGlobal::set_int(var::prng_seed_jitter_z, constants.prngSeed + 0x86E5A4D2u); @@ -261,7 +247,7 @@ static dabfg::NodeHandle create_place_node(const dabfg::NameSpace &ns, ShaderGlobal::set_color4(var::viewport_pos, viewport.worldPos); ShaderGlobal::set_real(var::viewport_max_distance, min(viewport.maxDrawDistance, constants.viewInfo.maxDrawDistance)); ShaderGlobal::set_int(var::viewport_index, viewportIndex); - ShaderGlobal::set_int(var::viewport_instance_offset, constants.maxInstancesPerViewport * viewportIndex); + ShaderGlobal::set_int(var::viewport_instance_offset, constants.maxStaticInstancesPerViewport * viewportIndex); bool res = shader.dispatchIndirect(indirectArgsHandle.view().getBuf(), viewportIndex * DISPATCH_INDIRECT_BUFFER_SIZE); G_ASSERT(res); @@ -290,38 +276,17 @@ static void create_grid_nodes(const ViewInfo &view_info, { FRAMEMEM_REGION; - // Draw ranges are concatenated across variants, with `drawRangeStartIndex` marking the start of entries for the variant. - // For a single variant, (P * R) float distances are stored, where: - // - P = number of placeables in the variant. - // - R = number of ranges in the variant. - DrawRangesFmem drawRangesFmem; - - // Renderable indices are concatenated across variants, with `renderableIndicesStartIndex` marking the start of entries for the - // variant. For a single variant, (P * R) renderable indices (IDs) are stored, where: - // - P = number of placeables in the variant. - // - R = number of ranges in the variant. - RenderableIndicesFmem renderableIndicesFmem; + if (grid.tiles.empty()) + return; - dag::RelocatableFixedVector perVariantFmem; dag::RelocatableFixedVector perBiomeFmem; + CommonPlacerBufferInit commonBufferInit; - perVariantFmem.resize(grid.variants.size()); - - uint32_t pIdStart = 0; - for (const auto [variantIndex, variant] : enumerate(grid.variants)) - { - perVariantFmem[variantIndex].drawRangeStartIndex = drawRangesFmem.size(); - perVariantFmem[variantIndex].renderableIndicesStartIndex = renderableIndicesFmem.size(); - perVariantFmem[variantIndex].placeableStartIndex = pIdStart; - - calculate_draw_ranges(pIdStart, variant.objectGroups, drawRangesFmem, renderableIndicesFmem); - - uint32_t numVariantPlaceables = 0; - for (const auto &objectGroup : variant.objectGroups) - numVariantPlaceables += objectGroup.info->placeables.size(); + for (const auto &variant : grid.variants) + add_variant(commonBufferInit, variant.objectGroups, variant.effectiveDensity, 1.0f - variant.effectiveDensity / grid.density); - pIdStart += numVariantPlaceables; - } + if (commonBufferInit.numPlaceables == 0) + return; uint32_t maxBiomeIndex = 0; for (const auto &variant : grid.variants) @@ -334,20 +299,17 @@ static void create_grid_nodes(const ViewInfo &view_info, for (const auto biomeIndex : variant.biomes) perBiomeFmem[biomeIndex].variantIndex = variantIndex; - if (grid.tiles.empty()) - return; - TmpName nameSpaceName(TmpName::CtorSprintf(), "heightmap_%zu", grid_index); + TmpName bufferNamePrefix(TmpName::CtorSprintf(), "dagdp_%s_heightmap_%zu", view_info.uniqueName.c_str(), grid_index); const dabfg::NameSpace ns = dabfg::root() / "dagdp" / view_info.uniqueName.c_str() / nameSpaceName.c_str(); auto persistentData = eastl::make_shared(); persistentData->densityMask = density_mask; HeightmapConstants &constants = persistentData->constants; constants.viewInfo = view_info; - constants.maxInstancesPerViewport = view_builder.maxInstancesPerViewport; - constants.numDrawRanges = drawRangesFmem.size(); + constants.maxStaticInstancesPerViewport = view_builder.maxStaticInstancesPerViewport; constants.numTiles = grid.tiles.size(); - constants.numPlaceables = pIdStart; + constants.numPlaceables = commonBufferInit.numPlaceables; constants.numBiomes = perBiomeFmem.size(); constants.numRenderables = view_builder.numRenderables; constants.tileWorldSize = grid.tileWorldSize; @@ -361,76 +323,9 @@ static void create_grid_nodes(const ViewInfo &view_info, constants.gridJitter = grid.gridJitter; constants.lowerLevel = grid.lowerLevel; - TmpName bufferNamePrefix(TmpName::CtorSprintf(), "dagdp_%s_heightmap_%zu", view_info.uniqueName.c_str(), grid_index); - { - TmpName bufferName(TmpName::CtorSprintf(), "%s_draw_ranges", bufferNamePrefix.c_str()); - persistentData->drawRangesBuffer = - dag::buffers::create_persistent_sr_structured(sizeof(float), drawRangesFmem.size(), bufferName.c_str()); - - bool updated = persistentData->drawRangesBuffer->updateData(0, data_size(drawRangesFmem), drawRangesFmem.data(), VBLOCK_WRITEONLY); - - if (!updated) - { - logerr("daGdp: could not update buffer %s", bufferName.c_str()); - return; - } - } - - { - TmpName bufferName(TmpName::CtorSprintf(), "%s_placeables", bufferNamePrefix.c_str()); - persistentData->placeablesBuffer = - dag::buffers::create_persistent_sr_structured(sizeof(PlaceableGpuData), constants.numPlaceables, bufferName.c_str()); - - auto lockedBuffer = - lock_sbuffer(persistentData->placeablesBuffer.getBuf(), 0, constants.numPlaceables, VBLOCK_WRITEONLY); - if (!lockedBuffer) - { - logerr("daGdp: could not lock buffer %s", bufferName.c_str()); - return; - } - - uint32_t i = 0; - for (const auto &variant : grid.variants) - for (const auto &objectGroup : variant.objectGroups) - for (const auto &placeable : objectGroup.info->placeables) - { - auto &item = lockedBuffer[i++]; - const auto ¶ms = placeable.params; - - item.yawRadiansMin = md_min(params.yawRadiansMidDev); - item.yawRadiansMax = md_max(params.yawRadiansMidDev); - item.pitchRadiansMin = md_min(params.pitchRadiansMidDev); - item.pitchRadiansMax = md_max(params.pitchRadiansMidDev); - item.rollRadiansMin = md_min(params.rollRadiansMidDev); - item.rollRadiansMax = md_max(params.rollRadiansMidDev); - item.scaleMin = md_min(params.scaleMidDev); - item.scaleMax = md_max(params.scaleMidDev); - item.maxBaseDrawDistance = placeable.ranges.back().baseDrawDistance; - item.slopeFactor = params.slopeFactor; - item.flags = params.flags; // TODO: little-endian assumed. - item.riPoolOffset = params.riPoolOffset; - } - } - - { - TmpName bufferName(TmpName::CtorSprintf(), "%s_placeable_weights", bufferNamePrefix.c_str()); - persistentData->placeableWeightsBuffer = - dag::buffers::create_persistent_sr_structured(sizeof(float), constants.numPlaceables, bufferName.c_str()); - - auto lockedBuffer = - lock_sbuffer(persistentData->placeableWeightsBuffer.getBuf(), 0, constants.numPlaceables, VBLOCK_WRITEONLY); - if (!lockedBuffer) - { - logerr("daGdp: could not lock buffer %s", bufferName.c_str()); - return; - } - - uint32_t i = 0; - for (const auto &variant : grid.variants) - for (const auto &objectGroup : variant.objectGroups) - for (const auto &placeable : objectGroup.info->placeables) - lockedBuffer[i++] = placeable.params.weight * (objectGroup.effectiveDensity / grid.density); - } + bool commonSuccess = init_common_placer_buffers(commonBufferInit, bufferNamePrefix, persistentData->commonBuffers); + if (!commonSuccess) + return; { TmpName bufferName(TmpName::CtorSprintf(), "%s_placeable_tile_limits", bufferNamePrefix.c_str()); @@ -447,21 +342,6 @@ static void create_grid_nodes(const ViewInfo &view_info, } } - { - TmpName bufferName(TmpName::CtorSprintf(), "%s_renderable_indices", bufferNamePrefix.c_str()); - persistentData->renderableIndicesBuffer = - dag::buffers::create_persistent_sr_structured(sizeof(uint32_t), renderableIndicesFmem.size(), bufferName.c_str()); - - bool updated = persistentData->renderableIndicesBuffer->updateData(0, data_size(renderableIndicesFmem), - renderableIndicesFmem.data(), VBLOCK_WRITEONLY); - - if (!updated) - { - logerr("daGdp: could not update buffer %s", bufferName.c_str()); - return; - } - } - { TmpName bufferName(TmpName::CtorSprintf(), "%s_instance_regions", bufferNamePrefix.c_str()); persistentData->instanceRegionsBuffer = dag::buffers::create_persistent_sr_structured(sizeof(InstanceRegion), @@ -507,40 +387,6 @@ static void create_grid_nodes(const ViewInfo &view_info, } } - { - TmpName bufferName(TmpName::CtorSprintf(), "%s_variants", bufferNamePrefix.c_str()); - persistentData->variantsBuffer = - dag::buffers::create_persistent_sr_structured(sizeof(VariantGpuData), grid.variants.size(), bufferName.c_str()); - - auto lockedBuffer = - lock_sbuffer(persistentData->variantsBuffer.getBuf(), 0, grid.variants.size(), VBLOCK_WRITEONLY); - if (!lockedBuffer) - { - logerr("daGdp: could not lock buffer %s", bufferName.c_str()); - return; - } - - uint32_t i = 0; - for (const auto &variant : grid.variants) - { - auto &item = lockedBuffer[i]; - const bool isLastVariant = i == grid.variants.size() - 1; - - item.placeableCount = 0; - for (const auto &objectGroup : variant.objectGroups) - item.placeableCount += objectGroup.info->placeables.size(); - - item.placeableWeightEmpty = 1.0f - variant.effectiveDensity / grid.density; - item.placeableStartIndex = perVariantFmem[i].placeableStartIndex; - item.placeableEndIndex = item.placeableStartIndex + item.placeableCount; - item.drawRangeStartIndex = perVariantFmem[i].drawRangeStartIndex; - item.drawRangeEndIndex = isLastVariant ? drawRangesFmem.size() : perVariantFmem[i + 1].drawRangeStartIndex; - item.renderableIndicesStartIndex = perVariantFmem[i].renderableIndicesStartIndex; - - ++i; - } - } - node_inserter(create_cull_tiles_node(ns, persistentData)); node_inserter(create_place_node(ns, persistentData)); } @@ -548,7 +394,7 @@ static void create_grid_nodes(const ViewInfo &view_info, void create_heightmap_nodes( const ViewInfo &view_info, const ViewBuilder &view_builder, const HeightmapManager &heightmap_manager, NodeInserter node_inserter) { - if (view_builder.maxInstancesPerViewport == 0) + if (view_builder.totalMaxInstances == 0) return; // Nothing to do, early exit. // TODO: merge these separate grid nodes into a single node/dispatch. diff --git a/prog/daNetGameLibs/daGdp/render/placers/heightmapES.cpp.inl b/prog/daNetGameLibs/daGdp/render/placers/heightmapES.cpp.inl index 08c2ea05f..f30b37844 100644 --- a/prog/daNetGameLibs/daGdp/render/placers/heightmapES.cpp.inl +++ b/prog/daNetGameLibs/daGdp/render/placers/heightmapES.cpp.inl @@ -107,8 +107,10 @@ static inline void heightmap_view_process_es(const dagdp::EventViewProcess &evt, auto &viewBuilder = *evt.get<2>(); auto &builder = dagdp__heightmap_manager.currentBuilder; - dag::Vector entries; + if (rulesBuilder.maxObjects != 0) + logwarn("daGdp: note: heightmap placer currently does not respect the max. objects setting."); + dag::Vector entries; heightmap_placers_ecs_query([&](ECS_REQUIRE(ecs::Tag dagdp_placer_heightmap) ecs::EntityId eid, const ecs::List &dagdp__biomes, float dagdp__density, int dagdp__seed, float dagdp__jitter, bool dagdp__heightmap_lower_level, bool dagdp__heightmap_allow_unoptimal_grids, float dagdp__heightmap_cell_size) { diff --git a/prog/daNetGameLibs/daGdp/render/placers/volume.cpp b/prog/daNetGameLibs/daGdp/render/placers/volume.cpp new file mode 100644 index 000000000..dbca51e36 --- /dev/null +++ b/prog/daNetGameLibs/daGdp/render/placers/volume.cpp @@ -0,0 +1,523 @@ +// Copyright (C) Gaijin Games KFT. All rights reserved. + +#include +#include +#include +#include <3d/dag_lockSbuffer.h> +#include +#include +#include +#include +#include +#include "../../shaders/dagdp_common.hlsli" +#include "../../shaders/dagdp_common_placer.hlsli" +#include "../../shaders/dagdp_volume.hlsli" +#include "volume.h" + +namespace var +{ +static ShaderVariableInfo draw_ranges("dagdp_volume__draw_ranges"); +static ShaderVariableInfo placeables("dagdp_volume__placeables"); +static ShaderVariableInfo placeable_weights("dagdp_volume__placeable_weights"); +static ShaderVariableInfo renderable_indices("dagdp_volume__renderable_indices"); +static ShaderVariableInfo variants("dagdp_volume__variants"); + +static ShaderVariableInfo debug_frustum_culling_bias("dagdp_volume__debug_frustum_culling_bias"); +static ShaderVariableInfo max_placeable_bounding_radius("dagdp_volume__max_placeable_bounding_radius"); +static ShaderVariableInfo num_renderables("dagdp_volume__num_renderables"); +static ShaderVariableInfo num_placeables("dagdp_volume__num_placeables"); + +static ShaderVariableInfo prng_seed_placeable("dagdp_volume__prng_seed_placeable"); +static ShaderVariableInfo prng_seed_slope("dagdp_volume__prng_seed_slope"); +static ShaderVariableInfo prng_seed_scale("dagdp_volume__prng_seed_scale"); +static ShaderVariableInfo prng_seed_yaw("dagdp_volume__prng_seed_yaw"); +static ShaderVariableInfo prng_seed_pitch("dagdp_volume__prng_seed_pitch"); +static ShaderVariableInfo prng_seed_roll("dagdp_volume__prng_seed_roll"); +static ShaderVariableInfo prng_seed_triangle1("dagdp_volume__prng_seed_triangle1"); +static ShaderVariableInfo prng_seed_triangle2("dagdp_volume__prng_seed_triangle2"); + +static ShaderVariableInfo viewport_pos("dagdp_volume__viewport_pos"); +static ShaderVariableInfo viewport_max_distance("dagdp_volume__viewport_max_distance"); +static ShaderVariableInfo viewport_index("dagdp_volume__viewport_index"); + +static ShaderVariableInfo mesh_index("dagdp_volume__mesh_index"); +static ShaderVariableInfo num_dispatches("dagdp_volume__num_dispatches"); + +static ShaderVariableInfo mesh_params("dagdp_volume__mesh_params"); +static ShaderVariableInfo areas_start_offset("dagdp_volume__areas_start_offset"); +static ShaderVariableInfo areas_bottom_offset("dagdp_volume__areas_bottom_offset"); +static ShaderVariableInfo areas_top_offset("dagdp_volume__areas_top_offset"); +static ShaderVariableInfo areas_count("dagdp_volume__areas_count"); +static ShaderVariableInfo areas("dagdp_volume__areas"); +} // namespace var + +using TmpName = eastl::fixed_string; + +namespace dagdp +{ + +static constexpr uint32_t MAX_TRIANGLES = 1 << 20; +static constexpr uint32_t MAX_MESHES = 1 << 10; +static constexpr uint32_t ESTIMATED_PREFIX_SUM_LEVELS = 4; +static inline uint32_t divUp(uint32_t size, uint32_t stride) { return (size + stride - 1) / stride; } + +struct VolumeConstants +{ + ViewInfo viewInfo; + eastl::fixed_string viewResourceName; + float maxPlaceableBoundingRadius; + uint32_t numRenderables; + uint32_t numPlaceables; + uint32_t prngSeed; + uint32_t totalCounters; + VolumeMapping mapping; +}; + +// Note: not safe if FG starts using MT execution. +struct VolumeMutables +{ + uint32_t areasUsed = 0; + RiexProcessor riexProcessor; +}; + +struct VolumePersistentData +{ + CommonPlacerBuffers commonBuffers; + + UniqueBuf allocsBuffer; + UniqueBuf areasBuffer; + UniqueBuf dispatchArgsBuffer; + + VolumeConstants constants{}; + VolumeMutables mutables{}; +}; + +struct VolumePerViewportInfo +{ + uint32_t meshStartIndex; + uint32_t meshEndIndex; +}; + +struct VolumeMeshesInfo +{ + dag::RelocatableFixedVector perViewport; + RelevantMeshes relevantMeshes; + bool isValid = false; + + dag::RelocatableFixedVector meshesToProcess; +}; + +template +using PerLevel = dag::RelocatableFixedVector; + +template +using PerProcessedMesh = dag::RelocatableFixedVector; + +void create_volume_nodes( + const ViewInfo &view_info, const ViewBuilder &view_builder, const VolumeManager &volume_manager, NodeInserter node_inserter) +{ + FRAMEMEM_REGION; + + auto &builder = volume_manager.currentBuilder; + CommonPlacerBufferInit commonBufferInit; + + for (const auto &variant : builder.variants) + add_variant(commonBufferInit, variant.objectGroups, variant.density, 0.0f); + + if (commonBufferInit.numPlaceables == 0) + return; + + TmpName bufferNamePrefix(TmpName::CtorSprintf(), "dagdp_%s_volume", view_info.uniqueName.c_str()); + auto persistentData = eastl::make_shared(); + VolumeConstants &constants = persistentData->constants; + constants.mapping = eastl::move(builder.mapping); + constants.viewInfo = view_info; + constants.viewResourceName.append_sprintf("view@%s", constants.viewInfo.uniqueName.c_str()); + constants.numRenderables = view_builder.numRenderables; + constants.numPlaceables = commonBufferInit.numPlaceables; + constants.prngSeed = 0; // Not implemented for now. + constants.totalCounters = constants.viewInfo.maxViewports * constants.numRenderables; + + constants.maxPlaceableBoundingRadius = 0.0f; + for (const auto &variant : builder.variants) + for (const auto &objectGroup : variant.objectGroups) + constants.maxPlaceableBoundingRadius = max(constants.maxPlaceableBoundingRadius, objectGroup.info->maxPlaceableBoundingRadius); + + bool commonSuccess = init_common_placer_buffers(commonBufferInit, bufferNamePrefix, persistentData->commonBuffers); + if (!commonSuccess) + return; + + { + TmpName bufferName(TmpName::CtorSprintf(), "%s_areas", bufferNamePrefix.c_str()); + persistentData->areasBuffer = + dag::buffers::create_ua_sr_structured(sizeof(float), MAX_TRIANGLES, bufferName.c_str(), d3d::buffers::Init::Zero); + } + + { + TmpName bufferName(TmpName::CtorSprintf(), "%s_dispatch_args", bufferNamePrefix.c_str()); + persistentData->dispatchArgsBuffer = dag::create_sbuffer(sizeof(uint32_t), 3 * MAX_MESHES, + SBCF_BIND_UNORDERED | SBCF_BIND_SHADER_RES | SBCF_MISC_ALLOW_RAW | SBCF_MISC_DRAWINDIRECT | SBCF_ZEROMEM, 0, bufferName.c_str()); + } + + const dabfg::NameSpace ns = dabfg::root() / "dagdp" / view_info.uniqueName.c_str() / "volume"; + + dabfg::NodeHandle gatherNode = ns.registerNode("gather", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + const auto &constants = persistentData->constants; + view_multiplex(registry, constants.viewInfo.kind); + + const auto meshesBufferHandle = registry.create("meshes", dabfg::History::No) + .structuredBufferUaSr(MAX_MESHES) + .atStage(dabfg::Stage::TRANSFER) + .useAs(dabfg::Usage::COPY) + .handle(); + + registry.registerBuffer("areas", [persistentData](auto) { return ManagedBufView(persistentData->areasBuffer); }) + .atStage(dabfg::Stage::COMPUTE) + .useAs(dabfg::Usage::SHADER_RESOURCE); + + const auto viewHandle = registry.readBlob(constants.viewResourceName.c_str()).handle(); + const auto meshesInfoHandle = registry.createBlob("meshesInfo", dabfg::History::No).handle(); + return [persistentData, viewHandle, meshesBufferHandle, meshesInfoHandle] { + const auto &constants = persistentData->constants; + auto &meshesInfo = meshesInfoHandle.ref(); + auto *meshesBuffer = meshesBufferHandle.get(); + const auto &view = viewHandle.ref(); + G_ASSERT(view.viewports.size() <= constants.viewInfo.maxViewports); + + persistentData->mutables.riexProcessor.resetCurrent(); + + for (uint32_t viewportIndex = 0; viewportIndex < view.viewports.size(); ++viewportIndex) + { + const uint32_t meshStartIndex = meshesInfo.relevantMeshes.size(); + const auto &viewport = view.viewports[viewportIndex]; + gather_meshes(persistentData->constants.mapping, constants.viewInfo, viewport, constants.maxPlaceableBoundingRadius, + persistentData->mutables.riexProcessor, meshesInfo.relevantMeshes); + + auto &item = meshesInfo.perViewport.push_back(); + item.meshStartIndex = meshStartIndex; + item.meshEndIndex = meshesInfo.relevantMeshes.size(); + } + + if (const auto *nextToProcess = persistentData->mutables.riexProcessor.current(); nextToProcess != nullptr) + { + const dag::ConstSpan elems = + nextToProcess->getMesh()->getMesh()->getMesh()->getElems(ShaderMesh::STG_opaque); + + for (const auto &elem : elems) + { + auto &item = meshesInfo.meshesToProcess.push_back(); + item.startIndex = elem.si; + item.numFaces = elem.numf; + item.baseVertex = elem.baseVertex; + item.stride = elem.vertexData->getStride(); + item.vbIndex = elem.vertexData->getVbIdx(); + } + } + + if (meshesInfo.relevantMeshes.empty()) + return; + + if (meshesInfo.relevantMeshes.size() > MAX_MESHES) + { + logerr("daGdp: volume placement exceeded max. number of meshes."); + return; + } + + const bool res = meshesBuffer->updateData(0, sizeof(MeshIntersection) * meshesInfo.relevantMeshes.size(), + meshesInfo.relevantMeshes.data(), VBLOCK_WRITEONLY); + if (!res) + { + logerr("daGdp: volume meshes buffer update error."); + return; + } + + meshesInfo.isValid = true; + }; + }); + + dabfg::NodeHandle processNode = ns.registerNode("process", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + const auto meshesInfoHandle = registry.readBlob("meshesInfo").handle(); + registry.modify("areas").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_volume__areas"); + registry.read("meshes").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_volume__meshes"); + + return [persistentData, meshesInfoHandle, shaderTri = ComputeShader("dagdp_volume_mesh_process_tri"), + shaderUp = ComputeShader("dagdp_volume_mesh_process_up"), shaderDown = ComputeShader("dagdp_volume_mesh_process_down")] { + FRAMEMEM_REGION; + + const auto &meshesInfo = meshesInfoHandle.ref(); + AreasIndices areasIndices; + bool outOfMem = false; + + const auto bumpAllocate = [&used = persistentData->mutables.areasUsed, &outOfMem](uint32_t count) { + uint32_t offset = used; + used += count; + if (used > MAX_TRIANGLES) + outOfMem = true; + return offset; + }; + + struct DispatchTri + { + MeshToProcess mesh; + uint32_t areasStartOffset; + }; + + struct DispatchUpDown + { + uint32_t areasBottomCount; + uint32_t areasBottomOffset; + uint32_t areasTopOffset; + uint32_t areasTopCount; + }; + + PerProcessedMesh dispatchTris; + PerLevel> dispatchUpDowns; // Up forwards <=> down backwards + + for (const auto &mesh : meshesInfo.meshesToProcess) + { + uint32_t count = mesh.numFaces; + uint32_t prevOffset = ~0u; + uint32_t prevCount = 0; + + uint32_t level = 0; + do + { + if (level == 0) + { + auto &item = dispatchTris.push_back(); + item.mesh = mesh; + item.areasStartOffset = bumpAllocate(count); + prevOffset = item.areasStartOffset; + prevCount = count; + areasIndices.push_back(item.areasStartOffset); + } + else + { + if (dispatchUpDowns.size() < level) + dispatchUpDowns.push_back(); + + auto &item = dispatchUpDowns[level - 1].push_back(); + item.areasBottomOffset = prevOffset; + item.areasBottomCount = prevCount; + item.areasTopOffset = bumpAllocate(count); + item.areasTopCount = count; + prevOffset = item.areasTopOffset; + prevCount = item.areasTopCount; + } + + ++level; + count = divUp(count, DAGDP_PREFIX_SUM_GROUP_SIZE) - 1; + } while (count > 0); + } + + if (outOfMem) + { + logerr("daGdp: volume mesh processing ran out of memory! %" PRIu32 " > %" PRIu32, persistentData->mutables.areasUsed, + MAX_TRIANGLES); + return; + } + + Ibuffer *ib = unitedvdata::riUnitedVdata.getIB(); + d3d::set_buffer(STAGE_CS, 0, ib); + + bool dispatchSuccess = true; + for (const auto &dispatch : dispatchTris) + { + const auto &mesh = dispatch.mesh; + Vbuffer *vb = unitedvdata::riUnitedVdata.getVB(mesh.vbIndex); + d3d::set_buffer(STAGE_CS, 1, vb); + ShaderGlobal::set_int4(var::mesh_params, IPoint4(mesh.startIndex, mesh.numFaces, mesh.baseVertex, mesh.stride)); + ShaderGlobal::set_int(var::areas_start_offset, dispatch.areasStartOffset); + dispatchSuccess &= shaderTri.dispatchThreads(mesh.numFaces, 1, 1); + } + + d3d::set_buffer(STAGE_CS, 0, nullptr); + d3d::set_buffer(STAGE_CS, 1, nullptr); + + d3d::resource_barrier({persistentData->areasBuffer.getBuf(), RB_RW_UAV | RB_STAGE_COMPUTE}); + + for (const auto &perLevel : dispatchUpDowns) + { + for (const auto &dispatch : perLevel) + { + ShaderGlobal::set_int(var::areas_bottom_offset, dispatch.areasBottomOffset); + ShaderGlobal::set_int(var::areas_top_offset, dispatch.areasTopOffset); + ShaderGlobal::set_int(var::areas_count, dispatch.areasTopCount); + dispatchSuccess &= shaderUp.dispatchThreads(dispatch.areasTopCount, 1, 1); + } + + d3d::resource_barrier({persistentData->areasBuffer.getBuf(), RB_RW_UAV | RB_STAGE_COMPUTE}); + } + + // Backwards. + for (auto it = dispatchUpDowns.end(); it != dispatchUpDowns.begin();) + { + const auto &level = *--it; + for (const auto &dispatch : level) + { + ShaderGlobal::set_int(var::areas_bottom_offset, dispatch.areasBottomOffset); + ShaderGlobal::set_int(var::areas_top_offset, dispatch.areasTopOffset); + ShaderGlobal::set_int(var::areas_count, dispatch.areasBottomCount); + dispatchSuccess &= shaderDown.dispatchThreads(dispatch.areasBottomCount, 1, 1); + } + + if (it != dispatchUpDowns.begin()) + { + // Can skip this barrier on the last level. + d3d::resource_barrier({persistentData->areasBuffer.getBuf(), RB_RW_UAV | RB_STAGE_COMPUTE}); + } + } + + if (!dispatchSuccess) + { + logerr("daGdp: volume mesh processing dispatch failed!"); + return; + } + + persistentData->mutables.riexProcessor.markCurrentAsProcessed(eastl::move(areasIndices)); + }; + }); + + dabfg::NodeHandle copyArgsNode = ns.registerNode("set_args", DABFG_PP_NODE_SRC, [persistentData](dabfg::Registry registry) { + const auto &constants = persistentData->constants; + view_multiplex(registry, constants.viewInfo.kind); + + registry.registerBuffer("dispatch_args", [persistentData](auto) { return ManagedBufView(persistentData->dispatchArgsBuffer); }) + .atStage(dabfg::Stage::COMPUTE) + .bindToShaderVar("dagdp_volume__dispatch_args"); + + registry.read("areas").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_volume__areas"); + registry.read("meshes").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_volume__meshes"); + const auto meshesInfoHandle = registry.readBlob("meshesInfo").handle(); + + return [persistentData, meshesInfoHandle, shader = ComputeShader("dagdp_volume_set_args")] { + const auto &meshesInfo = meshesInfoHandle.ref(); + + if (!meshesInfo.isValid) + return; + + ShaderGlobal::set_buffer(var::variants, persistentData->commonBuffers.variantsBuffer.getBufId()); + ShaderGlobal::set_int(var::num_dispatches, meshesInfo.relevantMeshes.size()); + const bool dispatchSuccess = shader.dispatchThreads(meshesInfo.relevantMeshes.size(), 1, 1); + ShaderGlobal::set_buffer(var::variants, BAD_D3DRESID); + + if (!dispatchSuccess) + logerr("daGdp: volume set args dispatch failed!"); + }; + }); + + const auto createPlaceNode = [&](bool isOptimistic) { + return ns.registerNode(isOptimistic ? "place_stage0" : "place_stage1", DABFG_PP_NODE_SRC, + [isOptimistic, persistentData](dabfg::Registry registry) { + const auto &constants = persistentData->constants; + view_multiplex(registry, constants.viewInfo.kind); + registry.read("meshes").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_volume__meshes"); + registry.read("areas").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp_volume__areas"); + registry.read("dispatch_args").buffer().atStage(dabfg::Stage::COMPUTE).useAs(dabfg::Usage::INDIRECTION_BUFFER); + const auto meshesInfoHandle = registry.readBlob("meshesInfo").handle(); + const auto viewHandle = registry.readBlob(constants.viewResourceName.c_str()).handle(); + + const auto ns = registry.root() / "dagdp" / constants.viewInfo.uniqueName.c_str(); + + ns.read(isOptimistic ? "dyn_allocs_stage0" : "dyn_allocs_stage1") + .buffer() + .atStage(dabfg::Stage::COMPUTE) + .bindToShaderVar("dagdp__dyn_allocs"); + + ns.modify(isOptimistic ? "dyn_counters_stage0" : "dyn_counters_stage1") + .buffer() + .atStage(dabfg::Stage::COMPUTE) + .bindToShaderVar("dagdp__dyn_counters"); + + ns.modify("instance_data").buffer().atStage(dabfg::Stage::COMPUTE).bindToShaderVar("dagdp__instance_data"); + + return [persistentData, viewHandle, meshesInfoHandle, isOptimistic, + shader = ComputeShader(isOptimistic ? "dagdp_volume_place_stage0" : "dagdp_volume_place_stage1")] { + const auto &constants = persistentData->constants; + const auto &view = viewHandle.ref(); + const auto &meshesInfo = meshesInfoHandle.ref(); + + if (!meshesInfo.isValid) + return; + + bool dispatchSuccess = true; + + STATE_GUARD(ShaderGlobal::set_buffer(var::draw_ranges, VALUE), persistentData->commonBuffers.drawRangesBuffer.getBufId(), + BAD_D3DRESID); + STATE_GUARD(ShaderGlobal::set_buffer(var::placeables, VALUE), persistentData->commonBuffers.placeablesBuffer.getBufId(), + BAD_D3DRESID); + STATE_GUARD(ShaderGlobal::set_buffer(var::placeable_weights, VALUE), + persistentData->commonBuffers.placeableWeightsBuffer.getBufId(), BAD_D3DRESID); + STATE_GUARD(ShaderGlobal::set_buffer(var::renderable_indices, VALUE), + persistentData->commonBuffers.renderableIndicesBuffer.getBufId(), BAD_D3DRESID); + STATE_GUARD(ShaderGlobal::set_buffer(var::variants, VALUE), persistentData->commonBuffers.variantsBuffer.getBufId(), + BAD_D3DRESID); + + ShaderGlobal::set_real(var::debug_frustum_culling_bias, get_frustum_culling_bias()); + ShaderGlobal::set_real(var::max_placeable_bounding_radius, constants.maxPlaceableBoundingRadius); + ShaderGlobal::set_int(var::num_renderables, constants.numRenderables); + ShaderGlobal::set_int(var::num_placeables, constants.numPlaceables); + ShaderGlobal::set_int(var::prng_seed_placeable, constants.prngSeed + 0x08C2592Cu); + ShaderGlobal::set_int(var::prng_seed_scale, constants.prngSeed + 0xDF3069FFu); + ShaderGlobal::set_int(var::prng_seed_slope, constants.prngSeed + 0x3C1385DBu); + ShaderGlobal::set_int(var::prng_seed_yaw, constants.prngSeed + 0x71F23960u); + ShaderGlobal::set_int(var::prng_seed_pitch, constants.prngSeed + 0xDEB40CF0u); + ShaderGlobal::set_int(var::prng_seed_roll, constants.prngSeed + 0xF6A38A81u); + ShaderGlobal::set_int(var::prng_seed_triangle1, constants.prngSeed + 0x54F2A367u); + ShaderGlobal::set_int(var::prng_seed_triangle2, constants.prngSeed + 0x45F9668Eu); + + Ibuffer *ib = unitedvdata::riUnitedVdata.getIB(); + d3d::set_buffer(STAGE_CS, 0, ib); + + uint32_t dispatchIndex = 0; + for (uint32_t viewportIndex = 0; viewportIndex < view.viewports.size(); ++viewportIndex) + { + const auto &info = meshesInfo.perViewport[viewportIndex]; + const uint32_t numMeshes = info.meshEndIndex - info.meshStartIndex; + + const auto &viewport = view.viewports[viewportIndex]; + ScopeFrustumPlanesShaderVars scopedFrustumVars(viewport.frustum); + + ShaderGlobal::set_color4(var::viewport_pos, viewport.worldPos); + ShaderGlobal::set_real(var::viewport_max_distance, min(viewport.maxDrawDistance, constants.viewInfo.maxDrawDistance)); + ShaderGlobal::set_int(var::viewport_index, viewportIndex); + + for (uint32_t meshIndex = 0; meshIndex < numMeshes; ++meshIndex) + { + const auto &mesh = meshesInfo.relevantMeshes[meshIndex + info.meshStartIndex]; + Vbuffer *vb = unitedvdata::riUnitedVdata.getVB(mesh.vbIndex); + d3d::set_buffer(STAGE_CS, 1, vb); + + ShaderGlobal::set_int(var::mesh_index, info.meshStartIndex + meshIndex); + + // TODO: Performance: ideally need to merge all dispatches (regardless of viewport / vbIndex / meshIndex). + // TODO: Performance: in the pessimistic phase, should set dispatch args to zero, based on success/failure of the + // optimistic phase. + int indirectOffset = 3 * sizeof(uint32_t) * dispatchIndex++; + dispatchSuccess &= shader.dispatchIndirect(persistentData->dispatchArgsBuffer.getBuf(), indirectOffset); + } + } + + d3d::set_buffer(STAGE_CS, 0, nullptr); + d3d::set_buffer(STAGE_CS, 1, nullptr); + + if (!dispatchSuccess) + logerr("daGdp: volume place dispatch failed! (optimistic = %d)", isOptimistic); + }; + }); + }; + + // Optimistic placement. + dabfg::NodeHandle placeStage0Node = createPlaceNode(true); + + // Pessimistic placement. + dabfg::NodeHandle placeStage1Node = createPlaceNode(false); + + node_inserter(eastl::move(gatherNode)); + node_inserter(eastl::move(processNode)); + node_inserter(eastl::move(copyArgsNode)); + node_inserter(eastl::move(placeStage0Node)); + node_inserter(eastl::move(placeStage1Node)); +} + +} // namespace dagdp diff --git a/prog/daNetGameLibs/daGdp/render/placers/volume.h b/prog/daNetGameLibs/daGdp/render/placers/volume.h new file mode 100644 index 000000000..5c77d99bb --- /dev/null +++ b/prog/daNetGameLibs/daGdp/render/placers/volume.h @@ -0,0 +1,62 @@ +// Copyright (C) Gaijin Games KFT. All rights reserved. +#pragma once + +#include +#include "../common.h" +#include "../placer.h" +#include "../riexProcessor.h" +#include "../../shaders/dagdp_volume.hlsli" + + +namespace dagdp +{ + +static constexpr uint32_t ESTIMATED_RELEVANT_MESHES_PER_FRAME = 64; + +struct VolumeVariant +{ + dag::RelocatableFixedVector objectGroups; + float density = 0.0; +}; + +struct VolumeMapping +{ + dag::VectorMap variantIds; +}; + +struct VolumeBuilder +{ + dag::Vector variants; + VolumeMapping mapping; + uint32_t dynamicRegionIndex; +}; + +struct VolumeManager +{ + VolumeBuilder currentBuilder; // Only valid while building a view. +}; + +void create_volume_nodes( + const ViewInfo &view_info, const ViewBuilder &view_builder, const VolumeManager &volume_manager, NodeInserter node_inserter); + +struct MeshToProcess +{ + int startIndex; + int numFaces; + int baseVertex; + int stride; + int vbIndex; +}; + +using RelevantMeshes = dag::RelocatableFixedVector; + +void gather_meshes(const VolumeMapping &volume_mapping, + const ViewInfo &view_info, + const Viewport &viewport, + float max_bounding_radius, + RiexProcessor &riex_processor, + RelevantMeshes &out_result); + +} // namespace dagdp + +ECS_DECLARE_BOXED_TYPE(dagdp::VolumeManager); diff --git a/prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.gen.es.cpp b/prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.gen.es.cpp new file mode 100644 index 000000000..e22101bfc --- /dev/null +++ b/prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.gen.es.cpp @@ -0,0 +1,326 @@ +#include "volumeES.cpp.inl" +ECS_DEF_PULL_VAR(volume); +//built with ECS codegen version 1.0 +#include +static constexpr ecs::ComponentDesc volume_view_process_es_comps[] = +{ +//start of 1 rw components at [0] + {ECS_HASH("dagdp__volume_manager"), ecs::ComponentTypeInfo()} +}; +static void volume_view_process_es_all_events(const ecs::Event &__restrict evt, const ecs::QueryView &__restrict components) +{ + G_FAST_ASSERT(evt.is()); + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp!=compE); do + dagdp::volume_view_process_es(static_cast(evt) + , ECS_RW_COMP(volume_view_process_es_comps, "dagdp__volume_manager", dagdp::VolumeManager) + ); + while (++comp != compE); +} +static ecs::EntitySystemDesc volume_view_process_es_es_desc +( + "volume_view_process_es", + "prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl", + ecs::EntitySystemOps(nullptr, volume_view_process_es_all_events), + make_span(volume_view_process_es_comps+0, 1)/*rw*/, + empty_span(), + empty_span(), + empty_span(), + ecs::EventSetBuilder::build(), + 0 +,nullptr,nullptr,"*"); +static constexpr ecs::ComponentDesc volume_view_finalize_es_comps[] = +{ +//start of 1 rw components at [0] + {ECS_HASH("dagdp__volume_manager"), ecs::ComponentTypeInfo()} +}; +static void volume_view_finalize_es_all_events(const ecs::Event &__restrict evt, const ecs::QueryView &__restrict components) +{ + G_FAST_ASSERT(evt.is()); + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp!=compE); do + dagdp::volume_view_finalize_es(static_cast(evt) + , ECS_RW_COMP(volume_view_finalize_es_comps, "dagdp__volume_manager", dagdp::VolumeManager) + ); + while (++comp != compE); +} +static ecs::EntitySystemDesc volume_view_finalize_es_es_desc +( + "volume_view_finalize_es", + "prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl", + ecs::EntitySystemOps(nullptr, volume_view_finalize_es_all_events), + make_span(volume_view_finalize_es_comps+0, 1)/*rw*/, + empty_span(), + empty_span(), + empty_span(), + ecs::EventSetBuilder::build(), + 0 +,nullptr,nullptr,"*"); +static constexpr ecs::ComponentDesc dagdp_placer_volume_changed_es_comps[] = +{ +//start of 3 rq components at [0] + {ECS_HASH("dagdp__name"), ecs::ComponentTypeInfo()}, + {ECS_HASH("dagdp_placer_volume"), ecs::ComponentTypeInfo()}, + {ECS_HASH("dagdp__density"), ecs::ComponentTypeInfo()} +}; +static void dagdp_placer_volume_changed_es_all_events(const ecs::Event &__restrict evt, const ecs::QueryView &__restrict components) +{ + G_UNUSED(components); + dagdp::dagdp_placer_volume_changed_es(evt + ); +} +static ecs::EntitySystemDesc dagdp_placer_volume_changed_es_es_desc +( + "dagdp_placer_volume_changed_es", + "prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl", + ecs::EntitySystemOps(nullptr, dagdp_placer_volume_changed_es_all_events), + empty_span(), + empty_span(), + make_span(dagdp_placer_volume_changed_es_comps+0, 3)/*rq*/, + empty_span(), + ecs::EventSetBuilder::build(), + 0 +,"render","dagdp__density,dagdp__name"); +static constexpr ecs::ComponentDesc dagdp_placer_volume_link_es_comps[] = +{ +//start of 2 ro components at [0] + {ECS_HASH("eid"), ecs::ComponentTypeInfo()}, + {ECS_HASH("dagdp__name"), ecs::ComponentTypeInfo()}, +//start of 1 rq components at [2] + {ECS_HASH("dagdp_placer_volume"), ecs::ComponentTypeInfo()} +}; +static void dagdp_placer_volume_link_es_all_events(const ecs::Event &__restrict evt, const ecs::QueryView &__restrict components) +{ + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp!=compE); do + dagdp::dagdp_placer_volume_link_es(evt + , ECS_RO_COMP(dagdp_placer_volume_link_es_comps, "eid", ecs::EntityId) + , ECS_RO_COMP(dagdp_placer_volume_link_es_comps, "dagdp__name", ecs::string) + ); + while (++comp != compE); +} +static ecs::EntitySystemDesc dagdp_placer_volume_link_es_es_desc +( + "dagdp_placer_volume_link_es", + "prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl", + ecs::EntitySystemOps(nullptr, dagdp_placer_volume_link_es_all_events), + empty_span(), + make_span(dagdp_placer_volume_link_es_comps+0, 2)/*ro*/, + make_span(dagdp_placer_volume_link_es_comps+2, 1)/*rq*/, + empty_span(), + ecs::EventSetBuilder::build(), + 0 +,"render","dagdp__name"); +static constexpr ecs::ComponentDesc dagdp_placer_volume_unlink_es_comps[] = +{ +//start of 1 ro components at [0] + {ECS_HASH("dagdp__name"), ecs::ComponentTypeInfo()}, +//start of 1 rq components at [1] + {ECS_HASH("dagdp_placer_volume"), ecs::ComponentTypeInfo()} +}; +static void dagdp_placer_volume_unlink_es_all_events(const ecs::Event &__restrict evt, const ecs::QueryView &__restrict components) +{ + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp!=compE); do + dagdp::dagdp_placer_volume_unlink_es(evt + , ECS_RO_COMP(dagdp_placer_volume_unlink_es_comps, "dagdp__name", ecs::string) + ); + while (++comp != compE); +} +static ecs::EntitySystemDesc dagdp_placer_volume_unlink_es_es_desc +( + "dagdp_placer_volume_unlink_es", + "prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl", + ecs::EntitySystemOps(nullptr, dagdp_placer_volume_unlink_es_all_events), + empty_span(), + make_span(dagdp_placer_volume_unlink_es_comps+0, 1)/*ro*/, + make_span(dagdp_placer_volume_unlink_es_comps+1, 1)/*rq*/, + empty_span(), + ecs::EventSetBuilder::build(), + 0 +,"render"); +static constexpr ecs::ComponentDesc dagdp_volume_link_es_comps[] = +{ +//start of 1 rw components at [0] + {ECS_HASH("dagdp_internal__volume_placer_eid"), ecs::ComponentTypeInfo()}, +//start of 1 ro components at [1] + {ECS_HASH("dagdp__volume_placer_name"), ecs::ComponentTypeInfo()}, +//start of 1 rq components at [2] + {ECS_HASH("dagdp_volume"), ecs::ComponentTypeInfo()} +}; +static void dagdp_volume_link_es_all_events(const ecs::Event &__restrict evt, const ecs::QueryView &__restrict components) +{ + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp!=compE); do + dagdp::dagdp_volume_link_es(evt + , ECS_RO_COMP(dagdp_volume_link_es_comps, "dagdp__volume_placer_name", ecs::string) + , ECS_RW_COMP(dagdp_volume_link_es_comps, "dagdp_internal__volume_placer_eid", ecs::EntityId) + ); + while (++comp != compE); +} +static ecs::EntitySystemDesc dagdp_volume_link_es_es_desc +( + "dagdp_volume_link_es", + "prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl", + ecs::EntitySystemOps(nullptr, dagdp_volume_link_es_all_events), + make_span(dagdp_volume_link_es_comps+0, 1)/*rw*/, + make_span(dagdp_volume_link_es_comps+1, 1)/*ro*/, + make_span(dagdp_volume_link_es_comps+2, 1)/*rq*/, + empty_span(), + ecs::EventSetBuilder::build(), + 0 +,"render","dagdp__volume_placer_name"); +static constexpr ecs::ComponentDesc volume_placers_ecs_query_comps[] = +{ +//start of 2 ro components at [0] + {ECS_HASH("eid"), ecs::ComponentTypeInfo()}, + {ECS_HASH("dagdp__density"), ecs::ComponentTypeInfo()}, +//start of 1 rq components at [2] + {ECS_HASH("dagdp_placer_volume"), ecs::ComponentTypeInfo()} +}; +static ecs::CompileTimeQueryDesc volume_placers_ecs_query_desc +( + "dagdp::volume_placers_ecs_query", + empty_span(), + make_span(volume_placers_ecs_query_comps+0, 2)/*ro*/, + make_span(volume_placers_ecs_query_comps+2, 1)/*rq*/, + empty_span()); +template +inline void dagdp::volume_placers_ecs_query(Callable function) +{ + perform_query(g_entity_mgr, volume_placers_ecs_query_desc.getHandle(), + [&function](const ecs::QueryView& __restrict components) + { + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp != compE); do + { + function( + ECS_RO_COMP(volume_placers_ecs_query_comps, "eid", ecs::EntityId) + , ECS_RO_COMP(volume_placers_ecs_query_comps, "dagdp__density", float) + ); + + }while (++comp != compE); + } + ); +} +static constexpr ecs::ComponentDesc volumes_ecs_query_comps[] = +{ +//start of 2 ro components at [0] + {ECS_HASH("dagdp_internal__volume_placer_eid"), ecs::ComponentTypeInfo()}, + {ECS_HASH("transform"), ecs::ComponentTypeInfo()}, +//start of 1 rq components at [2] + {ECS_HASH("dagdp_volume"), ecs::ComponentTypeInfo()} +}; +static ecs::CompileTimeQueryDesc volumes_ecs_query_desc +( + "dagdp::volumes_ecs_query", + empty_span(), + make_span(volumes_ecs_query_comps+0, 2)/*ro*/, + make_span(volumes_ecs_query_comps+2, 1)/*rq*/, + empty_span()); +template +inline void dagdp::volumes_ecs_query(Callable function) +{ + perform_query(g_entity_mgr, volumes_ecs_query_desc.getHandle(), + [&function](const ecs::QueryView& __restrict components) + { + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp != compE); do + { + function( + ECS_RO_COMP(volumes_ecs_query_comps, "dagdp_internal__volume_placer_eid", ecs::EntityId) + , ECS_RO_COMP(volumes_ecs_query_comps, "transform", TMatrix) + ); + + }while (++comp != compE); + } + ); +} +static constexpr ecs::ComponentDesc manager_ecs_query_comps[] = +{ +//start of 1 rw components at [0] + {ECS_HASH("dagdp__global_manager"), ecs::ComponentTypeInfo()} +}; +static ecs::CompileTimeQueryDesc manager_ecs_query_desc +( + "dagdp::manager_ecs_query", + make_span(manager_ecs_query_comps+0, 1)/*rw*/, + empty_span(), + empty_span(), + empty_span()); +template +inline void dagdp::manager_ecs_query(Callable function) +{ + perform_query(g_entity_mgr, manager_ecs_query_desc.getHandle(), + [&function](const ecs::QueryView& __restrict components) + { + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp != compE); do + { + function( + ECS_RW_COMP(manager_ecs_query_comps, "dagdp__global_manager", dagdp::GlobalManager) + ); + + }while (++comp != compE); + } + ); +} +static constexpr ecs::ComponentDesc volumes_link_ecs_query_comps[] = +{ +//start of 1 rw components at [0] + {ECS_HASH("dagdp_internal__volume_placer_eid"), ecs::ComponentTypeInfo()}, +//start of 1 ro components at [1] + {ECS_HASH("dagdp__volume_placer_name"), ecs::ComponentTypeInfo()} +}; +static ecs::CompileTimeQueryDesc volumes_link_ecs_query_desc +( + "dagdp::volumes_link_ecs_query", + make_span(volumes_link_ecs_query_comps+0, 1)/*rw*/, + make_span(volumes_link_ecs_query_comps+1, 1)/*ro*/, + empty_span(), + empty_span()); +template +inline void dagdp::volumes_link_ecs_query(Callable function) +{ + perform_query(g_entity_mgr, volumes_link_ecs_query_desc.getHandle(), + [&function](const ecs::QueryView& __restrict components) + { + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp != compE); do + { + function( + ECS_RO_COMP(volumes_link_ecs_query_comps, "dagdp__volume_placer_name", ecs::string) + , ECS_RW_COMP(volumes_link_ecs_query_comps, "dagdp_internal__volume_placer_eid", ecs::EntityId) + ); + + }while (++comp != compE); + } + ); +} +static constexpr ecs::ComponentDesc volume_placers_link_ecs_query_comps[] = +{ +//start of 2 ro components at [0] + {ECS_HASH("eid"), ecs::ComponentTypeInfo()}, + {ECS_HASH("dagdp__name"), ecs::ComponentTypeInfo()} +}; +static ecs::CompileTimeQueryDesc volume_placers_link_ecs_query_desc +( + "dagdp::volume_placers_link_ecs_query", + empty_span(), + make_span(volume_placers_link_ecs_query_comps+0, 2)/*ro*/, + empty_span(), + empty_span()); +template +inline void dagdp::volume_placers_link_ecs_query(Callable function) +{ + perform_query(g_entity_mgr, volume_placers_link_ecs_query_desc.getHandle(), + [&function](const ecs::QueryView& __restrict components) + { + auto comp = components.begin(), compE = components.end(); G_ASSERT(comp != compE); do + { + function( + ECS_RO_COMP(volume_placers_link_ecs_query_comps, "eid", ecs::EntityId) + , ECS_RO_COMP(volume_placers_link_ecs_query_comps, "dagdp__name", ecs::string) + ); + + }while (++comp != compE); + } + ); +} diff --git a/prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl b/prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl new file mode 100644 index 000000000..93d8e5a70 --- /dev/null +++ b/prog/daNetGameLibs/daGdp/render/placers/volumeES.cpp.inl @@ -0,0 +1,228 @@ +// Copyright (C) Gaijin Games KFT. All rights reserved. + +#include +#include +#include +#include +#include +#include +#include +#include "../riexProcessor.h" +#include "../globalManager.h" +#include "volume.h" + +constexpr int RI_FIXED_LOD = 2; +constexpr float MIN_GEOMETRY_SIZE = 1.0f; + +ECS_REGISTER_BOXED_TYPE(dagdp::VolumeManager, nullptr); + +namespace dagdp +{ + +template +static inline void volume_placers_ecs_query(Callable); + +template +static inline void volumes_ecs_query(Callable); + +ECS_NO_ORDER +static inline void volume_view_process_es(const EventViewProcess &evt, VolumeManager &dagdp__volume_manager) +{ + const auto &rulesBuilder = evt.get<0>(); + auto &builder = dagdp__volume_manager.currentBuilder; + auto &viewBuilder = *evt.get<2>(); + + volume_placers_ecs_query([&](ECS_REQUIRE(ecs::Tag dagdp_placer_volume) ecs::EntityId eid, float dagdp__density) { + auto iter = rulesBuilder.placers.find(eid); + if (iter == rulesBuilder.placers.end()) + return; + + if (rulesBuilder.maxObjects == 0) + { + logerr("daGdp: placer with EID %u disabled, as the max. objects setting is 0", static_cast(eid)); + return; + } + + auto &placer = iter->second; + + if (dagdp__density <= 0.0f) + { + logerr("daGdp: placer with EID %u has invalid density", static_cast(eid)); + return; + } + + builder.mapping.variantIds.insert({eid, builder.variants.size()}); + auto &variant = builder.variants.push_back(); + variant.density = dagdp__density; + + for (const auto objectGroupEid : placer.objectGroupEids) + { + auto iter = rulesBuilder.objectGroups.find(objectGroupEid); + if (iter == rulesBuilder.objectGroups.end()) + continue; + + auto &objectGroup = variant.objectGroups.push_back(); + objectGroup.effectiveDensity = dagdp__density; + objectGroup.info = &iter->second; + } + + viewBuilder.hasDynamicPlacers = true; + }); +} + +ECS_NO_ORDER static inline void volume_view_finalize_es(const EventViewFinalize &evt, VolumeManager &dagdp__volume_manager) +{ + const auto &viewInfo = evt.get<0>(); + const auto &viewBuilder = evt.get<1>(); + auto nodes = evt.get<2>(); + + create_volume_nodes(viewInfo, viewBuilder, dagdp__volume_manager, nodes); + + dagdp__volume_manager.currentBuilder = {}; +} + +void gather_meshes(const VolumeMapping &volume_mapping, + const ViewInfo &viewInfo, + const Viewport &viewport, + float max_bounding_radius, + RiexProcessor &riex_processor, + RelevantMeshes &out_result) +{ + volumes_ecs_query( + [&](ECS_REQUIRE(ecs::Tag dagdp_volume) const ecs::EntityId dagdp_internal__volume_placer_eid, const TMatrix &transform) { + const auto iter = volume_mapping.variantIds.find(dagdp_internal__volume_placer_eid); + if (iter == volume_mapping.variantIds.end()) + return; + + const uint32_t variantIndex = iter->second; + + // Reference: volumePlacerES.cpp.inl, VolumePlacer::updateVisibility + mat44f bboxTm44; + v_mat44_make_from_43cu_unsafe(bboxTm44, transform.array); + + mat44f bboxItm44; + // v_mat44_orthonormal_inverse43(bboxItm44, bboxTm44); + // v_mat44_inverse(bboxItm44, bboxTm44); + v_mat44_inverse43(bboxItm44, bboxTm44); + + mat43f bboxItm43; + v_mat44_transpose_to_mat43(bboxItm43, bboxItm44); + + const vec4f extent2 = + v_add(v_add(v_abs(bboxTm44.col0), v_add(v_abs(bboxTm44.col1), v_abs(bboxTm44.col2))), v_splats(max_bounding_radius)); + const vec4f center2 = v_add(bboxTm44.col3, bboxTm44.col3); + + Frustum frustum = viewport.frustum; + Point4 worldPos(viewport.worldPos.x, viewport.worldPos.y, viewport.worldPos.z, 0.0f); + shrink_frustum_zfar(frustum, v_ldu(&worldPos.x), v_splats(min(viewInfo.maxDrawDistance, viewport.maxDrawDistance))); + const bool isIntersecting = frustum.testBoxExtentB(center2, extent2); + + if (!isIntersecting) + return; + + // Reference: volumePlacerES.cpp.inl, VolumePlacer::gatherGeometryInBox + bbox3f bbox; + v_bbox3_init(bbox, bboxTm44, {v_neg(V_C_HALF), V_C_HALF}); + rendinst::riex_collidable_t out_handles; + rendinst::gatherRIGenExtraCollidableMin(out_handles, bbox, MIN_GEOMETRY_SIZE); + + for (const auto handle : out_handles) + { + const uint32_t resIndex = rendinst::handle_to_ri_type(handle); + const RenderableInstanceLodsResource *riLodsRes = rendinst::getRIGenExtraRes(resIndex); + const int bestLod = riLodsRes->getQlBestLod(); + const int actualLod = min(RI_FIXED_LOD, riLodsRes->lods.size() - 1); + if (bestLod > actualLod) + continue; + + const mat43f &instTm = rendinst::getRIGenExtra43(handle); + RenderableInstanceResource *riRes = riLodsRes->lods[actualLod].scene; + const auto *state = riex_processor.ask(riRes); + + if (state) + { + const dag::ConstSpan elems = riRes->getMesh()->getMesh()->getMesh()->getElems(ShaderMesh::STG_opaque); + + G_ASSERT(elems.size() == state->areasIndices.size()); + for (auto [i, elem] : enumerate(elems)) + { + auto &item = out_result.push_back(); + v_stu(&item.bboxItmRow0, bboxItm43.row0); + v_stu(&item.bboxItmRow1, bboxItm43.row1); + v_stu(&item.bboxItmRow2, bboxItm43.row2); + item.startIndex = elem.si; + item.numFaces = elem.numf; + item.baseVertex = elem.baseVertex; + item.stride = elem.vertexData->getStride(); + v_stu(&item.tmRow0.x, instTm.row0); + v_stu(&item.tmRow1.x, instTm.row1); + v_stu(&item.tmRow2.x, instTm.row2); + item.areasIndex = state->areasIndices[i]; + item.vbIndex = elem.vertexData->getVbIdx(); + item.variantIndex = variantIndex; + item.uniqueId = rendinst::handle_to_ri_inst(handle); + } + } + } + }); +} + +template +static inline void manager_ecs_query(Callable); + +ECS_TAG(render) +ECS_ON_EVENT(on_appear) +ECS_ON_EVENT(on_disappear) +ECS_TRACK(dagdp__name, dagdp__density) +ECS_REQUIRE(ecs::Tag dagdp_placer_volume, const ecs::string &dagdp__name, float dagdp__density) +static void dagdp_placer_volume_changed_es(const ecs::Event &) +{ + manager_ecs_query([](GlobalManager &dagdp__global_manager) { dagdp__global_manager.invalidateRules(); }); +} + +template +static inline void volumes_link_ecs_query(Callable); + +ECS_TAG(render) +ECS_ON_EVENT(on_appear) +ECS_TRACK(dagdp__name) +ECS_REQUIRE(ecs::Tag dagdp_placer_volume) +static void dagdp_placer_volume_link_es(const ecs::Event &, ecs::EntityId eid, const ecs::string &dagdp__name) +{ + volumes_link_ecs_query([&](const ecs::string &dagdp__volume_placer_name, ecs::EntityId &dagdp_internal__volume_placer_eid) { + if (dagdp__name == dagdp__volume_placer_name) + dagdp_internal__volume_placer_eid = eid; + }); +} + +ECS_TAG(render) +ECS_ON_EVENT(on_disappear) +ECS_REQUIRE(ecs::Tag dagdp_placer_volume) +static void dagdp_placer_volume_unlink_es(const ecs::Event &, const ecs::string &dagdp__name) +{ + volumes_link_ecs_query([&](const ecs::string &dagdp__volume_placer_name, ecs::EntityId &dagdp_internal__volume_placer_eid) { + if (dagdp__name == dagdp__volume_placer_name) + dagdp_internal__volume_placer_eid = ecs::INVALID_ENTITY_ID; + }); +} + +// TODO: when dagdp__name changes, all volumes that were linked to the old name should ideally be unlinked. +// However, this is a dev-only scenario, and probably a rare one, so it's not implemented for now. + +template +static inline void volume_placers_link_ecs_query(Callable); + +ECS_TAG(render) +ECS_ON_EVENT(on_appear) +ECS_TRACK(dagdp__volume_placer_name) +ECS_REQUIRE(ecs::Tag dagdp_volume) +static void dagdp_volume_link_es( + const ecs::Event &, const ecs::string &dagdp__volume_placer_name, ecs::EntityId &dagdp_internal__volume_placer_eid) +{ + volume_placers_link_ecs_query([&](ecs::EntityId eid, const ecs::string &dagdp__name) { + if (dagdp__name == dagdp__volume_placer_name) + dagdp_internal__volume_placer_eid = eid; + }); +} + +} // namespace dagdp diff --git a/prog/daNetGameLibs/daGdp/render/riexProcessor.cpp b/prog/daNetGameLibs/daGdp/render/riexProcessor.cpp new file mode 100644 index 000000000..35598d0b3 --- /dev/null +++ b/prog/daNetGameLibs/daGdp/render/riexProcessor.cpp @@ -0,0 +1,43 @@ +// Copyright (C) Gaijin Games KFT. All rights reserved. + +#include "riexProcessor.h" + +namespace dagdp +{ + +const RiexProcessor::ResourceState *RiexProcessor::ask(RenderableInstanceResource *res) +{ + const auto result = resources.find(res); + + if (result != resources.end()) + return &result->second; + + if (!nextToProcess) + { + nextToProcess = res; + nextToProcess->addRef(); + } + + return nullptr; +} + +void RiexProcessor::markCurrentAsProcessed(AreasIndices &&areasIndices) +{ + if (nextToProcess) + { + auto &item = resources[nextToProcess]; + item.areasIndices = eastl::move(areasIndices); + nextToProcess = nullptr; + } +} + +void RiexProcessor::resetCurrent() +{ + if (nextToProcess) + { + nextToProcess->delRef(); + nextToProcess = nullptr; + } +} + +} // namespace dagdp diff --git a/prog/daNetGameLibs/daGdp/render/riexProcessor.h b/prog/daNetGameLibs/daGdp/render/riexProcessor.h new file mode 100644 index 000000000..4ed87c313 --- /dev/null +++ b/prog/daNetGameLibs/daGdp/render/riexProcessor.h @@ -0,0 +1,39 @@ +// Copyright (C) Gaijin Games KFT. All rights reserved. +#pragma once + +#include +#include +#include + +namespace dagdp +{ + +static constexpr uint32_t ESTIMATED_PROCESSED_MESHES_PER_FRAME = 8; +using AreasIndices = dag::RelocatableFixedVector; + +class RiexProcessor +{ +public: + struct ResourceState + { + AreasIndices areasIndices; + }; + +private: + eastl::hash_map resources; + RenderableInstanceResource *nextToProcess = nullptr; + +public: + const ResourceState *ask(RenderableInstanceResource *res); + RenderableInstanceResource *current() const { return nextToProcess; } + void resetCurrent(); + void markCurrentAsProcessed(AreasIndices &&areasIndices); + + ~RiexProcessor() + { + for (const auto &item : resources) + item.first->delRef(); + } +}; + +} // namespace dagdp diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_common.dshl b/prog/daNetGameLibs/daGdp/shaders/dagdp_common.dshl index 47496100e..737ff5374 100644 --- a/prog/daNetGameLibs/daGdp/shaders/dagdp_common.dshl +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_common.dshl @@ -1,5 +1,6 @@ hlsl { #include "dagdp_common.hlsli" + #include "noise/uint_noise1D.hlsl" } macro INIT_DAGDP_COMMON() diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_common.hlsli b/prog/daNetGameLibs/daGdp/shaders/dagdp_common.hlsli index f5cc2bd32..1b05208e9 100644 --- a/prog/daNetGameLibs/daGdp/shaders/dagdp_common.hlsli +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_common.hlsli @@ -39,4 +39,18 @@ struct PlaceableGpuData uint riPoolOffset; }; +#define DYN_COUNTERS_PREFIX 4 +#define DYN_COUNTERS_INDEX_OVERFLOW_FLAG 0 +#define DYN_COUNTERS_INDEX_SKIP_PESSIMISTIC_PLACEMENT 1 +#define DYN_COUNTERS_INDEX_TOTAL_PLACED 2 +#define DYN_COUNTERS_INDEX_TOTAL_CAPACITY 3 + +struct DynAlloc +{ + uint instanceBaseIndexPlaced; + uint instanceBaseIndex; + uint capacity; + uint _padding; +}; + #endif // DAGDP_COMMON_HLSLI_INCLUDED \ No newline at end of file diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_common_placer.dshl b/prog/daNetGameLibs/daGdp/shaders/dagdp_common_placer.dshl index 2c024b1e9..de81b4c43 100644 --- a/prog/daNetGameLibs/daGdp/shaders/dagdp_common_placer.dshl +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_common_placer.dshl @@ -22,8 +22,44 @@ macro INIT_DAGDP_COMMON_PLACER() return r <= w; } - void writeInstance(PlaceableGpuData placeable, float3 instancePos, int2 stablePos, float3 yawAxis, float2 xzVector, float randScale, uint instanceElementOffset) + void writeInstance(PlaceableGpuData placeable, float3 instancePos, float3 surfaceNormal, int2 stablePos, float randScale, uint instanceElementOffset) { + float3 yawAxis; + float2 xzVector; + + FLATTEN + if (placeable.flags & ORIENTATION_Y_IS_NORMAL_BIT) + yawAxis = surfaceNormal; + else + yawAxis = float3(0, 1, 0); // World Y + + FLATTEN + if (placeable.flags & ORIENTATION_X_IS_PROJECTED_BIT) + { + // Pick a projected axis. + xzVector = surfaceNormal.xz; + + FLATTEN + if (length(xzVector) > 0.1) + xzVector = normalize(xzVector); + else + { + // Fallback when surface is roughly horizontal, and there is no meaningful projection. + xzVector = float2(1, 0); // World X. + } + } + else + { + // Pick a world axis. + if (yawAxis.x < 0.9) + xzVector = float2(1, 0); // World X + else + { + // Fallback when surface is roughly in the YZ plane. + xzVector = float2(0, -1); // World Z. + } + } + float3x3 basis; basis[1] = yawAxis; basis[2] = normalize(cross(float3(xzVector.x, 0.0, xzVector.y), yawAxis)); @@ -101,13 +137,13 @@ macro INIT_DAGDP_COMMON_PLACER() return placeableIndex; } - uint getRenderableIndex(VariantGpuData variant, uint placeableIndex, float3 instancePos, float randScale, bool isVisible) + uint getRenderableIndex(VariantGpuData variant, uint placeableIndex, float3 instancePos, float3 viewportPos, float randScale, bool isVisible) { uint rangeOffset = ~0u; FLATTEN if (isVisible && (placeableIndex != ~0u)) { - float dist = distance(instancePos, viewport_pos) / randScale; + float dist = distance(instancePos, viewportPos) / randScale; LOOP for (uint i = variant.drawRangeStartIndex; i < variant.drawRangeEndIndex; ++i) @@ -133,7 +169,7 @@ endmacro // g_storage: must be `groupshared uint[N]`, N >= 2 * threadgroup_size. // g_cond: must be `groupshared uint`. -macro INIT_DAGDP_COMMON_PLACER_THREADGROUP(threadgroup_size, g_storage, g_cond) +macro INIT_DAGDP_COMMON_PLACER_THREADGROUP(threadgroup_size, g_storage, g_cond, useTileLimits) hlsl(cs) { bool threadGroupAnyTrue(bool cond, uint tId) { @@ -220,8 +256,10 @@ macro INIT_DAGDP_COMMON_PLACER_THREADGROUP(threadgroup_size, g_storage, g_cond) result.count = pBitCounts.x + pBitCounts.y; result.offset = mBitCounts.x + mBitCounts.y; + ##if useTileLimits uint tileLimit = structuredBufferAt(placeable_tile_limits, placeableIndex); result.count = min(result.count, tileLimit); + ##endif } GroupMemoryBarrierWithGroupSync(); diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_common_placer.hlsli b/prog/daNetGameLibs/daGdp/shaders/dagdp_common_placer.hlsli index cd7446952..25326cd31 100644 --- a/prog/daNetGameLibs/daGdp/shaders/dagdp_common_placer.hlsli +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_common_placer.hlsli @@ -10,7 +10,7 @@ struct VariantGpuData uint drawRangeStartIndex; uint drawRangeEndIndex; uint renderableIndicesStartIndex; // Not a typo. - uint _pad; + float density; }; #endif // DAGDP_COMMON_PLACER_HLSLI_INCLUDED \ No newline at end of file diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_dynamic.dshl b/prog/daNetGameLibs/daGdp/shaders/dagdp_dynamic.dshl new file mode 100644 index 000000000..f9e95702a --- /dev/null +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_dynamic.dshl @@ -0,0 +1,115 @@ +include "shader_global.dshl" + +buffer dagdp__dyn_allocs; +buffer dagdp__dyn_counters; + +int dagdp__dyn_counters_num; +int4 dagdp__dyn_region; + +hlsl { + #include "dagdp_common.hlsli" + #include "dagdp_dynamic.hlsli" +} + +shader dagdp_dynamic_recount +{ + ENABLE_ASSERT(cs) + + (cs) { + dyn_allocs@uav = dagdp__dyn_allocs hlsl { + #include "dagdp_common.hlsli" + RWStructuredBuffer dyn_allocs@uav; + } + + dyn_counters@uav = dagdp__dyn_counters hlsl { + RWStructuredBuffer dyn_counters@uav; + } + + dyn_counters_num@i1 = dagdp__dyn_counters_num; + dyn_region@i2 = dagdp__dyn_region; + } + + hlsl(cs) { + groupshared uint gs_data[DAGDP_DYNAMIC_RECOUNT_GROUP_SIZE]; + + void inclusive_prefix_sum_gs(uint tId, uint count) + { + // TODO (Performance): eliminate bank conflicts? + // See https://developer.nvidia.com/gpugems/gpugems3/part-vi-gpu-computing/chapter-39-parallel-prefix-sum-scan-cuda + gs_data[tId] = count; + GroupMemoryBarrierWithGroupSync(); + + UNROLL + for (uint stride = 1u; stride <= DAGDP_DYNAMIC_RECOUNT_GROUP_SIZE; stride *= 2u) + { + uint mate = 0u; + + FLATTEN + if (tId >= stride) + mate = gs_data[tId - stride]; + + GroupMemoryBarrierWithGroupSync(); + gs_data[tId] += mate; + GroupMemoryBarrierWithGroupSync(); + } + } + + [numthreads(DAGDP_DYNAMIC_RECOUNT_GROUP_SIZE, 1, 1)] + void main(uint tId : SV_GroupThreadID) + { + uint count = 0; + + BRANCH + if (tId < dyn_counters_num) + count = structuredBufferAt(dyn_counters, tId.x + DYN_COUNTERS_PREFIX); + + BRANCH + if (tId == 0) + gs_data[0] = 0; + + GroupMemoryBarrierWithGroupSync(); + + BRANCH + if (tId < dyn_counters_num) + InterlockedAdd(gs_data[0], count); + + GroupMemoryBarrierWithGroupSync(); + + const uint totalPlaced = gs_data[0]; + const bool isOverflow = totalPlaced > dyn_region.y; + + if (isOverflow) + count = 0; + else + count += (dyn_region.y - totalPlaced) / dyn_counters_num; // Distribute extra capacity across all counters equally. + + inclusive_prefix_sum_gs(tId, count); + const bool isAlreadyPlaced = structuredBufferAt(dyn_counters, DYN_COUNTERS_INDEX_OVERFLOW_FLAG) == 0u; + + BRANCH + if (tId < dyn_counters_num) + { + const uint oldIndex = structuredBufferAt(dyn_allocs, tId).instanceBaseIndex; + const uint newIndex = dyn_region.x + gs_data[tId] - count; + + structuredBufferAt(dyn_allocs, tId).instanceBaseIndexPlaced = isAlreadyPlaced ? oldIndex : newIndex; + structuredBufferAt(dyn_allocs, tId).instanceBaseIndex = newIndex; + structuredBufferAt(dyn_allocs, tId).capacity = count; + + BRANCH + if (!isAlreadyPlaced) + structuredBufferAt(dyn_counters, tId + DYN_COUNTERS_PREFIX) = 0; + } + + BRANCH + if (tId == 0) + { + structuredBufferAt(dyn_counters, DYN_COUNTERS_INDEX_OVERFLOW_FLAG) = isOverflow ? ~0u : 0; + structuredBufferAt(dyn_counters, DYN_COUNTERS_INDEX_SKIP_PESSIMISTIC_PLACEMENT) = isAlreadyPlaced || isOverflow; + structuredBufferAt(dyn_counters, DYN_COUNTERS_INDEX_TOTAL_PLACED) = totalPlaced; + structuredBufferAt(dyn_counters, DYN_COUNTERS_INDEX_TOTAL_CAPACITY) = dyn_region.y; + } + } + } + compile("target_cs", "main"); +} diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_dynamic.hlsli b/prog/daNetGameLibs/daGdp/shaders/dagdp_dynamic.hlsli new file mode 100644 index 000000000..547c153f6 --- /dev/null +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_dynamic.hlsli @@ -0,0 +1,6 @@ +#ifndef DAGDP_DYNAMIC_HLSLI_INCLUDED +#define DAGDP_DYNAMIC_HLSLI_INCLUDED + +#define DAGDP_DYNAMIC_RECOUNT_GROUP_SIZE 768 + +#endif // DAGDP_DYNAMIC_HLSLI_INCLUDED \ No newline at end of file diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_heightmap.dshl b/prog/daNetGameLibs/daGdp/shaders/dagdp_heightmap.dshl index 6fa73f463..88f9017bc 100644 --- a/prog/daNetGameLibs/daGdp/shaders/dagdp_heightmap.dshl +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_heightmap.dshl @@ -223,7 +223,7 @@ shader dagdp_heightmap_place INIT_DAGDP_COMMON() INIT_DAGDP_COMMON_PLACER() - INIT_DAGDP_COMMON_PLACER_THREADGROUP(TILE_INSTANCE_COUNT, g_storage, g_cond) + INIT_DAGDP_COMMON_PLACER_THREADGROUP(TILE_INSTANCE_COUNT, g_storage, g_cond, true) INIT_DEFORM_HMAP(cs) USE_DEFORM_HMAP(cs) @@ -332,7 +332,7 @@ shader dagdp_heightmap_place return; float randScale = lerp(placeable.scaleMin, placeable.scaleMax, stableRand(stablePos, prng_seed_scale)); - uint renderableIndex = getRenderableIndex(variant, placeableIndex, instancePos, randScale, isVisible); + uint renderableIndex = getRenderableIndex(variant, placeableIndex, instancePos, viewport_pos, randScale, isVisible); SamePlaceableInfo pInfo = getSamePlaceableInfo(placeableIndex, tId.y); // Disallow more than the specified # of instances per tile. @@ -365,43 +365,7 @@ shader dagdp_heightmap_place uint instanceElementOffset = 4 * (viewport_instance_offset + instanceRegion.x + baseInstanceOffset + rInfo.offset); // TODO: float4, float4x3 ##assert(baseInstanceOffset + rInfo.offset < instanceRegion.y, "Instance region out of bounds"); - float3 yawAxis; - float2 xzVector; - - FLATTEN - if (placeable.flags & ORIENTATION_Y_IS_NORMAL_BIT) - yawAxis = surfaceNormal; - else - yawAxis = float3(0, 1, 0); // World Y - - FLATTEN - if (placeable.flags & ORIENTATION_X_IS_PROJECTED_BIT) - { - // Pick a projected axis. - xzVector = surfaceNormal.xz; - - FLATTEN - if (length(xzVector) > 0.1) - xzVector = normalize(xzVector); - else - { - // Fallback when surface is roughly horizontal, and there is no meaningful projection. - xzVector = float2(1, 0); // World X. - } - } - else - { - // Pick a world axis. - if (yawAxis.x < 0.9) - xzVector = float2(1, 0); // World X - else - { - // Fallback when surface is roughly in the YZ plane. - xzVector = float2(0, -1); // World Z. - } - } - - writeInstance(placeable, instancePos, stablePos, yawAxis, xzVector, randScale, instanceElementOffset); + writeInstance(placeable, instancePos, surfaceNormal, stablePos, randScale, instanceElementOffset); } } } diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_riex.dshl b/prog/daNetGameLibs/daGdp/shaders/dagdp_riex.dshl index f1e1d4647..bfb6d3b40 100644 --- a/prog/daNetGameLibs/daGdp/shaders/dagdp_riex.dshl +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_riex.dshl @@ -1,12 +1,25 @@ include "hardware_defines.dshl" +include "dagdp_common.dshl" buffer dagdp__counters; +buffer dagdp__dyn_allocs; +buffer dagdp__dyn_counters; buffer dagdp_riex__draw_args; buffer dagdp_riex__patches; int dagdp_riex__num_patches; -shader dagdp_riex_patch_counters +hlsl(cs) { + #define SIZEOF_UINT 4 + #define ARGS_BYTE_OFFSET_INDEX_COUNT 0 // See DrawIndexedIndirectArgs. + #define ARGS_BYTE_OFFSET_INSTANCE_COUNT SIZEOF_UINT + #define ARGS_BYTE_OFFSET_DRAW_CALL_ID (-SIZEOF_UINT) + #define ARGS_BYTE_OFFSET_INSTANCE_START (4 * SIZEOF_UINT) + + #include +} + +shader dagdp_riex_patch_static { ENABLE_ASSERT(cs) @@ -22,24 +35,87 @@ shader dagdp_riex_patch_counters } patches@buf = dagdp_riex__patches hlsl { - StructuredBuffer patches@buf; + #include "dagdp_riex.hlsli" + StructuredBuffer patches@buf; } } hlsl(cs) { - #define SIZEOF_UINT 4 - #define ARGS_BYTE_OFFSET_INDEX_COUNT 0 // See DrawIndexedIndirectArgs. - #define ARGS_BYTE_OFFSET_INSTANCE_COUNT SIZEOF_UINT + [numthreads(64, 1, 1)] + void main(uint3 dtid : SV_DispatchThreadID) + { + BRANCH + if (dtid.x >= num_patches) + return; + + RiexPatch patch = structuredBufferAt(patches, dtid.x); + const uint argsByteOffset = patch.argsByteOffsetStatic; + const uint instanceCount = structuredBufferAt(counters, patch.localCounterIndex); + uint indexCount = patch.indexCount; + + FLATTEN + if (instanceCount == 0) + { + // Workaround for AMD bug. + // See https://youtrack.gaijin.team/issue/RE-876/daGDP-incorrect-rendering-on-Xbox-One + // See https://forums.xboxlive.com/questions/51821/d3d12-executeindirect-of-a-zero-instance-draw-inde.html + indexCount = 0; + } + + storeBuffer(draw_args, argsByteOffset + ARGS_BYTE_OFFSET_INDEX_COUNT, indexCount); + storeBuffer(draw_args, argsByteOffset + ARGS_BYTE_OFFSET_INSTANCE_COUNT, instanceCount); + } + } + compile("target_cs", "main"); +} + +shader dagdp_riex_patch_dynamic +{ + ENABLE_ASSERT(cs) + + (cs) { + num_patches@i1 = dagdp_riex__num_patches; + draw_args@uav = dagdp_riex__draw_args hlsl { + RWByteAddressBuffer draw_args@uav; + } + + dyn_allocs@buf = dagdp__dyn_allocs hlsl { + #include "dagdp_common.hlsli" + StructuredBuffer dyn_allocs@buf; + } + + dyn_counters@buf = dagdp__dyn_counters hlsl { + StructuredBuffer dyn_counters@buf; + } + + patches@buf = dagdp_riex__patches hlsl { + #include "dagdp_riex.hlsli" + StructuredBuffer patches@buf; + } + } + + hlsl(cs) { [numthreads(64, 1, 1)] void main(uint3 dtid : SV_DispatchThreadID) { - if (dtid.x >= num_patches) return; - uint4 patch = structuredBufferAt(patches, dtid.x); - uint argsByteOffset = patch.x; - uint counterIndex = patch.y; - uint indexCount = patch.z; - uint instanceCount = structuredBufferAt(counters, counterIndex); + BRANCH + if (dtid.x >= num_patches) + return; + + RiexPatch patch = structuredBufferAt(patches, dtid.x); + const uint argsByteOffset = patch.argsByteOffsetDynamic; + uint instanceBaseIndex = structuredBufferAt(dyn_allocs, patch.localCounterIndex).instanceBaseIndexPlaced; + uint instanceCount = structuredBufferAt(dyn_counters, patch.localCounterIndex + DYN_COUNTERS_PREFIX); + + if (structuredBufferAt(dyn_counters, DYN_COUNTERS_INDEX_OVERFLOW_FLAG) != 0u) + { + // Placement failed. + instanceBaseIndex = 0; + instanceCount = 0; + } + + uint indexCount = patch.indexCount; FLATTEN if (instanceCount == 0) @@ -52,6 +128,25 @@ shader dagdp_riex_patch_counters storeBuffer(draw_args, argsByteOffset + ARGS_BYTE_OFFSET_INDEX_COUNT, indexCount); storeBuffer(draw_args, argsByteOffset + ARGS_BYTE_OFFSET_INSTANCE_COUNT, instanceCount); + + BRANCH + if (patch.flags & RIEX_PATCH_FLAG_BIT_PACKED) + { + const uint drawCallId = (instanceBaseIndex << MATRICES_OFFSET_SHIFT) | patch.materialOffset; + BRANCH + if (patch.flags & RIEX_PATCH_FLAG_BIT_EXTENDED_ARGS) + { + storeBuffer(draw_args, argsByteOffset + ARGS_BYTE_OFFSET_DRAW_CALL_ID, drawCallId); + } + else + { + storeBuffer(draw_args, argsByteOffset + ARGS_BYTE_OFFSET_INSTANCE_START, drawCallId); + } + } + else + { + storeBuffer(draw_args, argsByteOffset + ARGS_BYTE_OFFSET_INSTANCE_START, instanceBaseIndex); + } } } compile("target_cs", "main"); diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_riex.hlsli b/prog/daNetGameLibs/daGdp/shaders/dagdp_riex.hlsli new file mode 100644 index 000000000..1ee1fdfd5 --- /dev/null +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_riex.hlsli @@ -0,0 +1,18 @@ +#ifndef DAGDP_RIEX_HLSLI_INCLUDED +#define DAGDP_RIEX_HLSLI_INCLUDED + +#define RIEX_PATCH_FLAG_BIT_EXTENDED_ARGS (1 << 0) +#define RIEX_PATCH_FLAG_BIT_PACKED (1 << 1) + +struct RiexPatch +{ + uint argsByteOffsetStatic; + uint argsByteOffsetDynamic; + uint localCounterIndex; // "Local" to its kind. + uint indexCount; + uint materialOffset; + uint flags; + uint _padding[2]; +}; + +#endif // DAGDP_RIEX_HLSLI_INCLUDED \ No newline at end of file diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_volume.dshl b/prog/daNetGameLibs/daGdp/shaders/dagdp_volume.dshl new file mode 100644 index 000000000..59d9c7d78 --- /dev/null +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_volume.dshl @@ -0,0 +1,555 @@ +include "shader_global.dshl" +include "frustum.dshl" +include "dagdp_common_placer.dshl" + +buffer dagdp__instance_data; +buffer dagdp__dyn_allocs; +buffer dagdp__dyn_counters; + +buffer dagdp_volume__meshes; +buffer dagdp_volume__draw_ranges; +buffer dagdp_volume__placeables; +buffer dagdp_volume__placeable_weights; +buffer dagdp_volume__renderable_indices; +buffer dagdp_volume__variants; +buffer dagdp_volume__areas; +buffer dagdp_volume__dispatch_args; + +float dagdp_volume__max_placeable_bounding_radius; +float dagdp_volume__debug_frustum_culling_bias; + +int dagdp_volume__num_renderables; +int dagdp_volume__num_placeables; + +int dagdp_volume__prng_seed_placeable; +int dagdp_volume__prng_seed_slope; +int dagdp_volume__prng_seed_scale; +int dagdp_volume__prng_seed_yaw; +int dagdp_volume__prng_seed_pitch; +int dagdp_volume__prng_seed_roll; +int dagdp_volume__prng_seed_triangle1; +int dagdp_volume__prng_seed_triangle2; + +float4 dagdp_volume__viewport_pos; +float dagdp_volume__viewport_max_distance; +int dagdp_volume__viewport_index; + +int dagdp_volume__mesh_index; +int dagdp_volume__num_dispatches; + +int4 dagdp_volume__mesh_params; +int dagdp_volume__areas_start_offset; +int dagdp_volume__areas_bottom_offset; +int dagdp_volume__areas_top_offset; +int dagdp_volume__areas_count; + +int dagdp_volume__index_buf_reg_no = 0; +int dagdp_volume__vertex_buf_reg_no = 1; + +hlsl { + #include "dagdp_volume.hlsli" +} + +// TODO: fully duplicates macro in prog/gameLibs/render/shaders/gpu_objects_placer.dshl +macro GPU_OBJECTS_LOAD_MESH_TRIANGLE() + hlsl(cs) { + void load_mesh_triangle_internal(uint start_index, uint face_id, uint base_vertex, uint stride, + out uint4 v1_n, out uint4 v2_n, out uint4 v3_n) + { + uint3 indices; + #define BYTE_PER_INDEX 2 + uint indices_offset = ((start_index + face_id * 3) * BYTE_PER_INDEX); + uint2 indices_mem = loadBuffer2(indexBuf, indices_offset & ~0x3); //48 bits of need indices, other 16 not needed + if (indices_offset & 0x2) //first 16 not needed + indices = uint3(indices_mem.x >> 16, indices_mem.y & 0xffff, indices_mem.y >> 16); + else //last 16 not needed + indices = uint3(indices_mem.x & 0xffff, indices_mem.x >> 16, indices_mem.y & 0xffff); + indices = (indices + base_vertex) * stride; //assumption that stride is multiple by 4 + + v1_n = loadBuffer4(vertexBuf, indices.x); + v2_n = loadBuffer4(vertexBuf, indices.y); + v3_n = loadBuffer4(vertexBuf, indices.z); + } + + void load_mesh_triangle(uint start_index, uint face_id, uint base_vertex, uint stride, + out float3 v1, out float3 v2, out float3 v3) + { + uint4 v1_n, v2_n, v3_n; + load_mesh_triangle_internal(start_index, face_id, base_vertex, stride, v1_n, v2_n, v3_n); + v1 = asfloat(v1_n.xyz); + v2 = asfloat(v2_n.xyz); + v3 = asfloat(v3_n.xyz); + } + + float3 decode_normal(uint encoded_normal) + { + return (uint3(encoded_normal >> 16, encoded_normal >> 8, encoded_normal) & 0xff) / 127.5 - 1.0; + } + + void load_mesh_triangle(uint start_index, uint face_id, uint base_vertex, uint stride, + out float3 v1, out float3 v2, out float3 v3, out float3 n1, out float3 n2, out float3 n3) + { + uint4 v1_n, v2_n, v3_n; + load_mesh_triangle_internal(start_index, face_id, base_vertex, stride, v1_n, v2_n, v3_n); + v1 = asfloat(v1_n.xyz); + v2 = asfloat(v2_n.xyz); + v3 = asfloat(v3_n.xyz); + n1 = decode_normal(v1_n.w); + n2 = decode_normal(v2_n.w); + n3 = decode_normal(v3_n.w); + } + } +endmacro + +// gs_storage: must be groupshared float[DAGDP_PREFIX_SUM_GROUP_SIZE]; +macro INIT_PSUM() + hlsl(cs) { + // TODO: Duplicates dagdp_dynamic.dshl (except using float instead of uint) + void inclusive_prefix_sum_gs(uint tId, float value) + { + // TODO (Performance): eliminate bank conflicts? + gs_storage[tId] = value; + GroupMemoryBarrierWithGroupSync(); + + UNROLL + for (uint stride = 1u; stride <= DAGDP_PREFIX_SUM_GROUP_SIZE; stride *= 2u) + { + uint mate = 0u; + + FLATTEN + if (tId >= stride) + mate = gs_storage[tId - stride]; + + GroupMemoryBarrierWithGroupSync(); + gs_storage[tId] += mate; + GroupMemoryBarrierWithGroupSync(); + } + } + + } +endmacro + +shader dagdp_volume_mesh_process_tri +{ + ENABLE_ASSERT(cs) + GPU_OBJECTS_LOAD_MESH_TRIANGLE() + + hlsl(cs) { + groupshared float gs_storage[DAGDP_PREFIX_SUM_GROUP_SIZE]; + } + + INIT_PSUM() + + (cs) { + mesh_params@i4 = dagdp_volume__mesh_params; + areas_start_offset@i1 = dagdp_volume__areas_start_offset; + + indexBuf@buf : register(dagdp_volume__index_buf_reg_no) hlsl { + ByteAddressBuffer indexBuf@buf; + } + + vertexBuf@buf : register(dagdp_volume__vertex_buf_reg_no) hlsl { + ByteAddressBuffer vertexBuf@buf; + } + + areas@uav = dagdp_volume__areas hlsl { + RWStructuredBuffer areas@uav; + } + } + + hlsl(cs) { + [numthreads(DAGDP_PREFIX_SUM_GROUP_SIZE, 1, 1)] + void main(uint dtId : SV_DispatchThreadID, uint tId : SV_GroupThreadID) + { + const uint startIndex = mesh_params.x; + const uint numFaces = mesh_params.y; + const uint baseVertex = mesh_params.z; + const uint stride = mesh_params.w; + + float area = 0; + BRANCH + if (dtId < numFaces) + { + float3 v1, v2, v3; + load_mesh_triangle(startIndex, dtId, baseVertex, stride, v1, v2, v3); + area = 0.5 * length(cross(v1 - v3, v2 - v3)); + } + + inclusive_prefix_sum_gs(tId, area); + structuredBufferAt(areas, dtId + areas_start_offset) = gs_storage[tId]; + } + } + compile("target_cs", "main"); +} + +shader dagdp_volume_mesh_process_up +{ + ENABLE_ASSERT(cs) + + hlsl(cs) { + groupshared float gs_storage[DAGDP_PREFIX_SUM_GROUP_SIZE]; + } + + INIT_PSUM() + + (cs) { + areas_bottom_offset@i1 = dagdp_volume__areas_bottom_offset; + areas_top_offset@i1 = dagdp_volume__areas_top_offset; + areas_count@i1 = dagdp_volume__areas_count; + + areas@uav = dagdp_volume__areas hlsl { + RWStructuredBuffer areas@uav; + } + } + + hlsl(cs) { + [numthreads(DAGDP_PREFIX_SUM_GROUP_SIZE, 1, 1)] + void main(uint dtId : SV_DispatchThreadID, uint tId : SV_GroupThreadID) + { + float area = 0; + BRANCH + if (dtId < areas_count) + area = structuredBufferAt(areas, dtId * DAGDP_PREFIX_SUM_GROUP_SIZE + areas_bottom_offset + DAGDP_PREFIX_SUM_GROUP_SIZE - 1); + + inclusive_prefix_sum_gs(tId, area); + + BRANCH + if (dtId < areas_count) + structuredBufferAt(areas, dtId + areas_top_offset) = gs_storage[tId]; + } + } + compile("target_cs", "main"); +} + +shader dagdp_volume_mesh_process_down +{ + ENABLE_ASSERT(cs) + + (cs) { + areas_bottom_offset@i1 = dagdp_volume__areas_bottom_offset; + areas_top_offset@i1 = dagdp_volume__areas_top_offset; + areas_count@i1 = dagdp_volume__areas_count; + + areas@uav = dagdp_volume__areas hlsl { + RWStructuredBuffer areas@uav; + } + } + + hlsl(cs) { + [numthreads(DAGDP_PREFIX_SUM_GROUP_SIZE, 1, 1)] + void main(uint dtId : SV_DispatchThreadID, uint tId : SV_GroupThreadID) + { + BRANCH + if (dtId <= DAGDP_PREFIX_SUM_GROUP_SIZE || dtId >= areas_count) + return; + + float area = structuredBufferAt(areas, dtId + areas_bottom_offset); + float summedArea = 0; + + uint topIndex = (dtId / DAGDP_PREFIX_SUM_GROUP_SIZE) - 1; + summedArea = structuredBufferAt(areas, topIndex + areas_top_offset); + + structuredBufferAt(areas, dtId + areas_bottom_offset) = area + summedArea; + } + } + compile("target_cs", "main"); +} + +shader dagdp_volume_set_args +{ + ENABLE_ASSERT(cs) + + (cs) { + areas@buf = dagdp_volume__areas hlsl { + StructuredBuffer areas@buf; + } + + dispatch_args@uav = dagdp_volume__dispatch_args hlsl { + RWByteAddressBuffer dispatch_args@uav; + } + + meshes@buf = dagdp_volume__meshes hlsl { + #include "dagdp_volume.hlsli" + StructuredBuffer meshes@buf; + } + + variants@buf = dagdp_volume__variants hlsl { + #include "dagdp_common_placer.hlsli" + StructuredBuffer variants@buf; + } + + num_dispatches@i1 = dagdp_volume__num_dispatches; + } + + hlsl(cs) { + #define GROUP_SIZE 64 + + [numthreads(GROUP_SIZE, 1, 1)] + void main(uint dtId : SV_DispatchThreadID) + { + BRANCH + if (dtId >= num_dispatches) + return; + + const MeshIntersection mesh = structuredBufferAt(meshes, dtId); + const float totalArea = structuredBufferAt(areas, mesh.areasIndex + mesh.numFaces - 1); + const VariantGpuData variant = structuredBufferAt(variants, mesh.variantIndex); + const uint groupCount = (uint(totalArea * variant.density) + GROUP_SIZE - 1) / GROUP_SIZE; + const uint argsByteOffset = dtId * 12; // 3 * sizeof(uint) + + storeBuffer(dispatch_args, argsByteOffset, groupCount); + storeBuffer(dispatch_args, argsByteOffset + 4, 1); + storeBuffer(dispatch_args, argsByteOffset + 8, 1); + } + } + compile("target_cs", "main"); +} + +shader dagdp_volume_place_stage0, dagdp_volume_place_stage1 +{ + ENABLE_ASSERT(cs) + GPU_OBJECTS_LOAD_MESH_TRIANGLE() + INIT_AND_USE_FRUSTUM_CHECK_BASE(cs) + + hlsl(cs) { + #define GROUP_SIZE 64 + groupshared uint g_storage[GROUP_SIZE * 2]; + groupshared uint g_cond; + } + + INIT_DAGDP_COMMON() + INIT_DAGDP_COMMON_PLACER() + INIT_DAGDP_COMMON_PLACER_THREADGROUP(GROUP_SIZE, g_storage, g_cond, false) + + (cs) { + areas@buf = dagdp_volume__areas hlsl { + StructuredBuffer areas@buf; + } + + instance_data@uav = dagdp__instance_data hlsl { + RWBuffer instance_data@uav; + } + + meshes@buf = dagdp_volume__meshes hlsl { + #include "dagdp_volume.hlsli" + StructuredBuffer meshes@buf; + } + + dyn_allocs@buf = dagdp__dyn_allocs hlsl { + #include "dagdp_common.hlsli" + StructuredBuffer dyn_allocs@buf; + } + + dyn_counters@uav = dagdp__dyn_counters hlsl { + RWStructuredBuffer dyn_counters@uav; + } + + draw_ranges@buf = dagdp_volume__draw_ranges hlsl { + StructuredBuffer draw_ranges@buf; + } + + placeables@buf = dagdp_volume__placeables hlsl { + #include "dagdp_common.hlsli" + StructuredBuffer placeables@buf; + } + + placeable_weights@buf = dagdp_volume__placeable_weights hlsl { + StructuredBuffer placeable_weights@buf; + } + + renderable_indices@buf = dagdp_volume__renderable_indices hlsl { + StructuredBuffer renderable_indices@buf; + } + + variants@buf = dagdp_volume__variants hlsl { + #include "dagdp_common_placer.hlsli" + StructuredBuffer variants@buf; + } + + debug_frustum_culling_bias@f1 = dagdp_volume__debug_frustum_culling_bias; + max_placeable_bounding_radius@f1 = dagdp_volume__max_placeable_bounding_radius; + num_renderables@i1 = dagdp_volume__num_renderables; + num_placeables@i1 = dagdp_volume__num_placeables; + + prng_seed_placeable@i1 = dagdp_volume__prng_seed_placeable; + prng_seed_slope@i1 = dagdp_volume__prng_seed_slope; + prng_seed_scale@i1 = dagdp_volume__prng_seed_scale; + prng_seed_yaw@i1 = dagdp_volume__prng_seed_yaw; + prng_seed_pitch@i1 = dagdp_volume__prng_seed_pitch; + prng_seed_roll@i1 = dagdp_volume__prng_seed_roll; + prng_seed_triangle1@i1 = dagdp_volume__prng_seed_triangle1; + prng_seed_triangle2@i1 = dagdp_volume__prng_seed_triangle2; + + viewport_pos@f3 = dagdp_volume__viewport_pos; + viewport_max_distance@f1 = dagdp_volume__viewport_max_distance; + viewport_index@i1 = dagdp_volume__viewport_index; + + mesh_index@i1 = dagdp_volume__mesh_index; + + indexBuf@buf : register(dagdp_volume__index_buf_reg_no) hlsl { + ByteAddressBuffer indexBuf@buf; + } + + vertexBuf@buf : register(dagdp_volume__vertex_buf_reg_no) hlsl { + ByteAddressBuffer vertexBuf@buf; + } + } + + hlsl(cs) { + [numthreads(GROUP_SIZE, 1, 1)] + void main(uint dtId : SV_DispatchThreadID, uint tId : SV_GroupThreadID) + { + ##if shader == dagdp_volume_place_stage1 + // Early exit if the optimistic placement already did the job. + const bool isEarlyExit = structuredBufferAt(dyn_counters, DYN_COUNTERS_INDEX_SKIP_PESSIMISTIC_PLACEMENT) != 0u; + + ##if hardware.dx11 + // Work around compiler assuming varying flow control. + BRANCH + if (threadGroupAnyTrue(isEarlyExit, tId)) + return; + ##else + BRANCH + if (isEarlyExit) + return; + ##endif + ##endif + + const MeshIntersection mesh = structuredBufferAt(meshes, mesh_index); + const float totalArea = structuredBufferAt(areas, mesh.areasIndex + mesh.numFaces - 1); + const VariantGpuData variant = structuredBufferAt(variants, mesh.variantIndex); + const uint instanceCount = uint(totalArea * variant.density); + const float targetSumArea = dtId / variant.density; + const bool isInstanceValid = dtId < instanceCount; + + uint faceIndex; + { + const uint maxIterations = firstbithigh(mesh.numFaces) + 1; + uint faceIndexLower = 0u; + uint faceIndexUpper = mesh.numFaces; + bool wasFound = !isInstanceValid; + + // Binary search for the faceIndex. + LOOP + for (uint i = 0; i < maxIterations; ++i) + { + faceIndex = (faceIndexLower + faceIndexUpper) / 2; + + float low = 0.0; + BRANCH + if (faceIndex > 0) + low = structuredBufferAt(areas, mesh.areasIndex + faceIndex - 1); + const float high = structuredBufferAt(areas, mesh.areasIndex + faceIndex); + + FLATTEN + if (targetSumArea < low) + faceIndexUpper = faceIndex; + else if (targetSumArea >= high) + faceIndexLower = faceIndex; + else + wasFound = true; // Keep iterating for uniform flow. + } + + ##assert(wasFound, "targetSumArea = %f, totalArea = %f, instanceCount = %d, dtId = %d", targetSumArea, totalArea, instanceCount, dtId); + } + + const int2 stablePos = int2(mesh.uniqueId, dtId); + + float3 instancePos; + float3 surfaceNormal; + BRANCH + if (isInstanceValid) + { + float a1 = stableRand(stablePos, prng_seed_triangle1); + float a2 = stableRand(stablePos, prng_seed_triangle2); + + float3 n0, n1, n2; + float3 v0, v1, v2; + load_mesh_triangle(mesh.startIndex, faceIndex, mesh.baseVertex, mesh.stride, v0, v1, v2, n0, n1, n2); + + FLATTEN + if (a1 + a2 > 1.0) + { + a1 = 1.0 - a1; + a2 = 1.0 - a2; + } + + // https://mathworld.wolfram.com/TrianglePointPicking.html + const float4 v = float4(v0 + (v1 - v0) * a1 + (v2 - v0) * a2, 1.0); + const float3 n = normalize(n0 + (n1 - n0) * a1 + (n2 - n0) * a2); + instancePos = float3(dot(v, mesh.tmRow0), dot(v, mesh.tmRow1), dot(v, mesh.tmRow2)); + surfaceNormal = float3(dot(n, mesh.tmRow0.xyz), dot(n, mesh.tmRow1.xyz), dot(n, mesh.tmRow2.xyz)); + } + + const bool isViewportDistanceOk = distance(instancePos, viewport_pos) - max_placeable_bounding_radius < viewport_max_distance; + const bool isFrustumOk = testSphereB(instancePos, max_placeable_bounding_radius + debug_frustum_culling_bias); + const float4 ip4 = float4(instancePos, 1.0); + const bool isBoxOk = all(abs(float3(dot(ip4, mesh.bboxItmRow0), dot(ip4, mesh.bboxItmRow1), dot(ip4, mesh.bboxItmRow2))) <= 0.5); + + const bool isVisible = isInstanceValid && isViewportDistanceOk && isFrustumOk && isBoxOk; + uint placeableIndex = ~0u; + + FLATTEN + if (isVisible) + placeableIndex = getPlaceableIndex(variant, stableRand(stablePos, prng_seed_placeable)); + + PlaceableGpuData placeable; + FLATTEN + if (placeableIndex != ~0u) + { + placeable = structuredBufferAt(placeables, placeableIndex); + const bool isSlopeOk = checkSlope(placeable.slopeFactor, placeable.flags, surfaceNormal, stableRand(stablePos, prng_seed_slope)); + + FLATTEN + if (!isSlopeOk) + placeableIndex = ~0u; + } + + const float randScale = lerp(placeable.scaleMin, placeable.scaleMax, stableRand(stablePos, prng_seed_scale)); + const uint renderableIndex = getRenderableIndex(variant, placeableIndex, instancePos, viewport_pos, randScale, isVisible); + + bool isPlaced = renderableIndex != ~0u; + const SameRenderableInfo rInfo = getSameRenderableInfo(renderableIndex, tId); + const uint counterIndex = getCounterIndex(renderableIndex, viewport_index, num_renderables); + + DynAlloc allocRegion; + FLATTEN + if (isPlaced) + allocRegion = structuredBufferAt(dyn_allocs, counterIndex); + + BRANCH + if (isPlaced && (rInfo.baseThreadId == tId)) + { + ##assert(rInfo.offset == 0, "must be zero for base thread"); + uint baseInstanceOffset; + InterlockedAdd(structuredBufferAt(dyn_counters, counterIndex + DYN_COUNTERS_PREFIX), rInfo.count, baseInstanceOffset); + g_storage[tId] = baseInstanceOffset; + + if (baseInstanceOffset + rInfo.count > allocRegion.capacity) + { + ##if shader == dagdp_volume_place_stage1 + ##assert(false, "Placement overflowed: %d < %d + %d", allocRegion.capacity, baseInstanceOffset, rInfo.count); + ##endif + + // Placement overflowed, set a flag and don't write out instance data. + g_storage[tId] = ~0u; + uint _ignore; + InterlockedCompareExchange(structuredBufferAt(dyn_counters, DYN_COUNTERS_INDEX_OVERFLOW_FLAG), 0u, ~0u, _ignore); + } + } + + GroupMemoryBarrierWithGroupSync(); + + uint baseInstanceOffset = g_storage[rInfo.baseThreadId & (GROUP_SIZE - 1)]; + BRANCH + if (isPlaced && baseInstanceOffset != ~0u) + { + uint instanceElementOffset = 4 * (allocRegion.instanceBaseIndex + baseInstanceOffset + rInfo.offset); // TODO: float4, float4x3 + ##assert(allocRegion.capacity > baseInstanceOffset + rInfo.offset, "capacity %d must be greater than offset %d + %d. (count = %d)", allocRegion.capacity, baseInstanceOffset, rInfo.offset, rInfo.count); + + writeInstance(placeable, instancePos, surfaceNormal, stablePos, randScale, instanceElementOffset); + } + } + } + compile("target_cs", "main"); +} diff --git a/prog/daNetGameLibs/daGdp/shaders/dagdp_volume.hlsli b/prog/daNetGameLibs/daGdp/shaders/dagdp_volume.hlsli new file mode 100644 index 000000000..a99b94636 --- /dev/null +++ b/prog/daNetGameLibs/daGdp/shaders/dagdp_volume.hlsli @@ -0,0 +1,28 @@ +#ifndef DAGDP_VOLUME_HLSLI_INCLUDED +#define DAGDP_VOLUME_HLSLI_INCLUDED + +#define DAGDP_PREFIX_SUM_GROUP_SIZE 256 + +// Reference: gpu_objects_const.hlsli +struct MeshIntersection +{ + float4 bboxItmRow0; + float4 bboxItmRow1; + float4 bboxItmRow2; + + int startIndex; + int numFaces; + int baseVertex; + int stride; + + float4 tmRow0; + float4 tmRow1; + float4 tmRow2; + + uint areasIndex; + uint vbIndex; + uint variantIndex; + uint uniqueId; +}; + +#endif // DAGDP_VOLUME_HLSLI_INCLUDED \ No newline at end of file diff --git a/prog/daNetGameLibs/daGdp/templates/dagdp.template.blk b/prog/daNetGameLibs/daGdp/templates/dagdp.template.blk index 0f087b75a..23a9ec492 100644 --- a/prog/daNetGameLibs/daGdp/templates/dagdp.template.blk +++ b/prog/daNetGameLibs/daGdp/templates/dagdp.template.blk @@ -1,10 +1,25 @@ dagdp_manager { _singleton:b=true _use:t="replicating" + "dagdp_manager:tag"{} "dagdp__global_manager:dagdp::GlobalManager"{ _tags:t="render" } "dagdp__dyn_shadows_manager:dagdp::DynShadowsManager"{ _tags:t="render" } "dagdp__riex_manager:dagdp::RiexManager"{ _tags:t="render" } "dagdp__heightmap_manager:dagdp::HeightmapManager"{ _tags:t="render" } + "dagdp__volume_manager:dagdp::VolumeManager"{ _tags:t="render" } +} + +dagdp_level_settings { + _singleton:b=true + _use:t="replicating" + "dagdp_level_settings:tag"{} + _group { + _track:b=yes + "dagdp__max_objects:i"{ + value:i=0 + _info { desc:t="Maximum number of objects that can be placed." } + } + } } dagdp_base { @@ -114,3 +129,28 @@ dagdp_object_group_riex { "dagdp__riex_preload:dagdp::RiexPreload"{ _tags:t="render" } _info { desc:t="Will render one of the specified RendInst Extra assets, when used in a rule." } } + +dagdp_placer_volume { + _use:t="dagdp_placer" + _use:t="dagdp_has_density" + "dagdp_placer_volume:tag"{} + _info { desc:t="Will place objects on geometry inside volumes that reference this placer." } +} + +dagdp_volume { + "box_zone:tag"{} // For daEditorE's visualization of `transform`. + "dagdp_volume:tag"{} + _group { + _track:b=yes + "transform:m"{ + value:m=[[1, 0, 0] [0, 1, 0] [0, 0, 1] [0, 0, 0]] + _info { desc:t="Bounding box of the volume." } + } + "dagdp__volume_placer_name:t"{ + value:t="" + _info { desc:t="Name of the placer that should be used." } + } + "dagdp_internal__volume_placer_eid:eid"{} + } + _info { desc:t="A volume, inside which a referenced placer will place objects." } +} diff --git a/prog/daNetGameLibs/sound/sound_utils/es/sound_debug.das b/prog/daNetGameLibs/sound/sound_utils/es/sound_debug.das index 92dee7b5d..6e976d9bc 100644 --- a/prog/daNetGameLibs/sound/sound_utils/es/sound_debug.das +++ b/prog/daNetGameLibs/sound/sound_utils/es/sound_debug.das @@ -10,6 +10,7 @@ require DagorConsole require math.color require math.base require strings +require RendInst [es(no_order, tag=sound, on_event=EventSoundDrawDebug, REQUIRE=soundDebugDrawTemplate)] @@ -125,3 +126,33 @@ def sound_debug_console_sound_play_pos(path : string; x, y, z : float; duration [console_cmd(name="snd.enum_events")] def sound_debug_console_enum_events() sound_debug_enum_events() + + +[console_cmd(name="snd.debug_draw_ri")] +def sound_debug_draw_ri_console_cmd(name : string) + if !empty(name) + var entEid : EntityId + query() <| $ [es] (eid : EntityId; sound_debug_draw_ri__name : string) + if sound_debug_draw_ri__name == name + entEid = eid + if !entEid + let resIdx = get_rigen_extra_res_idx(name) + if resIdx < 0 + logerr("no such res idx for '{name}'") + return + createEntity("sound_debug_draw_ri") <| $(var init : ComponentsInitializer) + set(init, "sound_debug_draw_ri__name", name) + else + destroyEntity(entEid) + + +[es(tag=sound, on_event=EventSoundDrawDebug)] +def sound_debug_draw_ri(evt : Event; + sound_debug_draw_ri__name : string) + + let resIdx = get_rigen_extra_res_idx(sound_debug_draw_ri__name) + get_ri_gen_extra_instances(resIdx) <| $(handles : array#) + for handle in handles + let desc = RendInstDesc(handle) + let tm = getRIGenMatrix(desc) + add_debug_text_mark(tm[3], "ri='{sound_debug_draw_ri__name}' dist={int(distance(get_listener_pos(), tm[3]))}", -1, 0., E3DCOLOR(0xffffffff)) diff --git a/prog/daNetGameLibs/sound/sound_utils/templates/sound_utils.template.blk b/prog/daNetGameLibs/sound/sound_utils/templates/sound_utils.template.blk index 6aba44553..3da38258f 100644 --- a/prog/daNetGameLibs/sound/sound_utils/templates/sound_utils.template.blk +++ b/prog/daNetGameLibs/sound/sound_utils/templates/sound_utils.template.blk @@ -24,6 +24,13 @@ sound_debug_draw_template{ "soundDebugDrawTemplate:tag"{_tags:t="sound";} } +sound_debug_draw_ri{ + _group{ + _tags:t="sound" + sound_debug_draw_ri__name:t="" + } +} + sound_banks_state{ _group{ _tags:t="sound" diff --git a/prog/gameLibs/gamePhys/collision/contactSolver.cpp b/prog/gameLibs/gamePhys/collision/contactSolver.cpp index 268ff3a0b..484dd3d46 100644 --- a/prog/gameLibs/gamePhys/collision/contactSolver.cpp +++ b/prog/gameLibs/gamePhys/collision/contactSolver.cpp @@ -353,17 +353,22 @@ void ContactSolver::addBody(IPhysBase *phys, float bounding_radius, uint32_t fla if (!phys) return; + int vacantBodyInd = -1; for (int i = 0; i < bodies.size(); ++i) + { if (bodies[i].phys == phys) return; + else if (!bodies[i].phys && vacantBodyInd < 0) + vacantBodyInd = i; + } - Body &b = bodies.push_back(); + Body &b = (vacantBodyInd < 0) ? bodies.push_back() : bodies[vacantBodyInd]; b.phys = phys; b.boundingRadius = bounding_radius; b.flags = flags; b.layer = layer; - BodyState &state = bodyStates.push_back(); + BodyState &state = (vacantBodyInd < 0) ? bodyStates.push_back() : bodyStates[vacantBodyInd]; ::memset(&state, 0, sizeof(BodyState)); state.shouldCheckContactWithGround = true; diff --git a/prog/gameLibs/publicInclude/gamePhys/collision/contactSolver.h b/prog/gameLibs/publicInclude/gamePhys/collision/contactSolver.h index aa61f33c0..dcda2e91b 100644 --- a/prog/gameLibs/publicInclude/gamePhys/collision/contactSolver.h +++ b/prog/gameLibs/publicInclude/gamePhys/collision/contactSolver.h @@ -156,7 +156,7 @@ class ContactSolver BodyState groundState; - static const size_t MAX_LAYERS_NUM = 4; + static const size_t MAX_LAYERS_NUM = 5; eastl::bitset layersMask; BodyState &getState(int index); diff --git a/prog/tools/ShaderCompiler2/shCode.cpp b/prog/tools/ShaderCompiler2/shCode.cpp index 5ccc576b8..76711889e 100644 --- a/prog/tools/ShaderCompiler2/shCode.cpp +++ b/prog/tools/ShaderCompiler2/shCode.cpp @@ -73,7 +73,7 @@ unsigned int ShaderCode::getVertexStride() const void ShaderClass::sortStaticVarsByMode() { Tab> argvars{}; - argvars.reserve(stvar.size()); + argvars.resize(stvar.size()); int i = 0; eastl::transform(stvar.cbegin(), stvar.cend(), argvars.begin(), [&i](const Var &var) { return eastl::make_pair(var, i++); }); diff --git a/prog/tools/dargbox/gamebase/samples_prog/_basic/components/scrollbar.nut b/prog/tools/dargbox/gamebase/samples_prog/_basic/components/scrollbar.nut index e4f1b9a09..7b31136b3 100644 --- a/prog/tools/dargbox/gamebase/samples_prog/_basic/components/scrollbar.nut +++ b/prog/tools/dargbox/gamebase/samples_prog/_basic/components/scrollbar.nut @@ -26,7 +26,7 @@ function mkScrollbar(scroll_handler, orientation=null, needReservePlace=true) { let knob = @() { watch = stateFlags key = "knob" - size = [flex(), flex()] + size = flex() rendObj = ROBJ_SOLID color = calcKnobColor(stateFlags.get()) } @@ -50,38 +50,35 @@ function mkScrollbar(scroll_handler, orientation=null, needReservePlace=true) { } } - return { - isElemFit - function scrollComp() { - if (isElemFit.get()) - return { - watch = isElemFit - key = scroll_handler - behavior = Behaviors.Slider - size = needReservePlace ? calcBarSize(isVertical) : null - } + return function scrollComp() { + if (isElemFit.get()) return { - watch = [isElemFit, maxV, elemSize] + watch = isElemFit key = scroll_handler - size = calcBarSize(isVertical) - behavior = Behaviors.Slider - orientation - fValue = fValue.get() - knob - min = 0 - max = maxV.get() - unit = 1 - pageScroll = (isVertical ? 1 : -1) * maxV.get() / 100.0 // TODO probably needed sync with container wheelStep option - onChange = @(val) isVertical ? scroll_handler.scrollToY(val) - : scroll_handler.scrollToX(val) - onElemState = @(sf) stateFlags.set(sf) - - rendObj = ROBJ_SOLID - color = barColor - - children = view + size = needReservePlace ? calcBarSize(isVertical) : null } + return { + watch = [isElemFit, maxV, elemSize] + key = scroll_handler + size = calcBarSize(isVertical) + + behavior = Behaviors.Slider + orientation + fValue = fValue.get() + knob + min = 0 + max = maxV.get() + unit = 1 + pageScroll = (isVertical ? 1 : -1) * maxV.get() / 100.0 // TODO probably needed sync with container wheelStep option + onChange = @(val) isVertical ? scroll_handler.scrollToY(val) + : scroll_handler.scrollToX(val) + onElemState = @(sf) stateFlags.set(sf) + + rendObj = ROBJ_SOLID + color = barColor + + children = view } } } @@ -102,13 +99,12 @@ let makeSideScroll = kwarg(function(content, scrollAlign = ALIGN_RIGHT, orientat children = content } - let { isElemFit, scrollComp } = mkScrollbar(scrollHandler, orientation, needReservePlace) + let scrollComp = mkScrollbar(scrollHandler, orientation, needReservePlace) let children = scrollAlign == ALIGN_LEFT || scrollAlign == ALIGN_TOP ? [scrollComp, contentRoot] : [contentRoot, scrollComp] - return @() { - watch = isElemFit + return { size maxHeight maxWidth @@ -142,10 +138,10 @@ let makeHVScrolls = function(content, options = null ) { clipChildren = true children = [ contentRoot - mkScrollbar(scrollHandler, O_VERTICAL, needReservePlace).scrollComp + mkScrollbar(scrollHandler, O_VERTICAL, needReservePlace) ] } - mkScrollbar(scrollHandler, O_HORIZONTAL, needReservePlace).scrollComp + mkScrollbar(scrollHandler, O_HORIZONTAL, needReservePlace) ] } } diff --git a/samples/dngSceneViewer/prog/jamfile b/samples/dngSceneViewer/prog/jamfile index e4cb05760..710d52164 100644 --- a/samples/dngSceneViewer/prog/jamfile +++ b/samples/dngSceneViewer/prog/jamfile @@ -100,7 +100,7 @@ else { if $(FmodStudio) != src && $(HaveSound) != no { AddLibs += $(FmodStaticLibs) ; - for dll in $(FmodStudioSharedLibs) { BundleCopy += @$(dll) $(dll:D=) ; } + if $(Platform) != macOS { for dll in $(FmodStudioSharedLibs) { BundleCopy += @$(dll) $(dll:D=) ; } } } if $(CheckOnly) != yes { # different compile options are not compatible with PCH