From dd31587337b4b7017b72cd77cce54278fa7f4047 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Sat, 12 Aug 2023 13:32:28 -0400 Subject: [PATCH 01/21] Fix sync delay between calls to vkQueueSubmit() on non-Apple-Silicon devices. The [MTLDevice sampleTimestamps:gpuTimestamp:] function turns out to be synchronized with other queue activities, and can block GPU execution if it is called between MTLCommandBuffer submissions. On non-Apple-Silicon devices, it was called before and after every vkQueueSubmit() submission, to track the correlation between GPU and CPU timestamps, and was delaying the start of GPU work on the next submission (on Apple Silicon, both CPU & GPU timestamps are specified in nanoseconds, and the call was bypassed). Move timestamp correlation from vkQueueSubmit() to vkGetPhysicalDeviceProperties(), where it is used to update VkPhysicalDeviceLimits::timestampPeriod on non-Apple-Silicon devices. Delegate MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2*) to MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties*), plus minimize wasted effort if pNext is empty (unrelated). Move the declaration of several MVKPhysicalDevice member structs to potentially reduce member spacing (unrelated). --- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 23 ++++--------- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 39 +++++++++++------------ MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 3 -- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 7 ---- 5 files changed, 26 insertions(+), 47 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 9e0fc42ac..d473ae912 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -363,6 +363,7 @@ Released 2022/02/07 - Fix issue where *MSL 2.3* only available on *Apple Silicon*, even on *macOS*. - Fix memory leak of dummy `MTLTexture` in render subpasses that use no attachments. - Fix Metal object retain-release errors in assignment operators. +- Fix sync delay between calls to `vkQueueSubmit()` on non-Apple-Silicon devices. - Fix use of GPU counter sets on older versions of iOS running on the simulator. - `mvk::getShaderOutputs()` in `SPRIVReflection.h` support flattening nested structures. - Replaced ASL logging levels with `MVKConfigLogLevel`. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 5186fe4a0..450fad661 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -356,20 +356,6 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { return _metalFeatures.argumentBuffers && mvkConfig().useMetalArgumentBuffers != MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS_NEVER; }; - /** - * Returns the start timestamps of a timestamp correlation. - * The returned values should be later passed back to updateTimestampPeriod(). - */ - void startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart); - - /** - * Updates the current value of VkPhysicalDeviceLimits::timestampPeriod, based on the - * correlation between the CPU time tickes and GPU time ticks, from the specified start - * values, to the current values. The cpuStart and gpuStart values should have been - * retrieved from a prior call to startTimestampCorrelation(). - */ - void updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart); - #pragma mark Construction @@ -416,6 +402,7 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { void initExtensions(); void initCounterSets(); bool needsCounterSetRetained(); + void updateTimestampsAndPeriod(); MVKArrayRef getQueueFamilies(); void initPipelineCacheUUID(); uint32_t getHighestGPUCapability(); @@ -435,16 +422,18 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { VkPhysicalDeviceMemoryProperties _memoryProperties; MVKSmallVector _queueFamilies; MVKPixelFormats _pixelFormats; + VkExternalMemoryProperties _hostPointerExternalMemoryProperties; + VkExternalMemoryProperties _mtlBufferExternalMemoryProperties; + VkExternalMemoryProperties _mtlTextureExternalMemoryProperties; id _timestampMTLCounterSet; MVKSemaphoreStyle _vkSemaphoreStyle; + MTLTimestamp _prevCPUTimestamp = 0; + MTLTimestamp _prevGPUTimestamp = 0; uint32_t _allMemoryTypes; uint32_t _hostVisibleMemoryTypes; uint32_t _hostCoherentMemoryTypes; uint32_t _privateMemoryTypes; uint32_t _lazilyAllocatedMemoryTypes; - VkExternalMemoryProperties _hostPointerExternalMemoryProperties; - VkExternalMemoryProperties _mtlBufferExternalMemoryProperties; - VkExternalMemoryProperties _mtlTextureExternalMemoryProperties; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 3c77519db..59f90df35 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -451,10 +451,17 @@ } void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) { + updateTimestampsAndPeriod(); *properties = _properties; } void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties2* properties) { + + properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + getProperties(&properties->properties); + + if ( !properties->pNext ) { return; } + uint32_t uintMax = std::numeric_limits::max(); uint32_t maxSamplerCnt = getMaxSamplerCount(); bool isTier2 = supportsMetalArgumentBuffers() && (_metalFeatures.argumentBuffersTier >= MTLArgumentBuffersTier2); @@ -536,8 +543,6 @@ supportedProps12.maxTimelineSemaphoreValueDifference = std::numeric_limits::max(); supportedProps12.framebufferIntegerColorSampleCounts = _metalFeatures.supportedSampleCounts; - properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; - properties->properties = _properties; for (auto* next = (VkBaseOutStructure*)properties->pNext; next; next = next->pNext) { switch ((uint32_t)next->sType) { case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES: { @@ -1562,23 +1567,17 @@ return rslt; } -// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps -// are the same, or if we're not using GPU timestamp counters. -void MVKPhysicalDevice::startTimestampCorrelation(MTLTimestamp& cpuStart, MTLTimestamp& gpuStart) { - if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; } - [_mtlDevice sampleTimestamps: &cpuStart gpuTimestamp: &gpuStart]; -} - -// Don't need to do this for Apple GPUs, where the GPU and CPU timestamps -// are the same, or if we're not using GPU timestamp counters. -void MVKPhysicalDevice::updateTimestampPeriod(MTLTimestamp cpuStart, MTLTimestamp gpuStart) { - if (_properties.vendorID == kAppleVendorId || !_timestampMTLCounterSet) { return; } - - MTLTimestamp cpuEnd; - MTLTimestamp gpuEnd; - [_mtlDevice sampleTimestamps: &cpuEnd gpuTimestamp: &gpuEnd]; - - _properties.limits.timestampPeriod = (double)(cpuEnd - cpuStart) / (double)(gpuEnd - gpuStart); +// Mark both CPU and GPU timestamps, and if needed, update the timestamp period for this device. +// On Apple GPUs, the CPU & GPU timestamps are the same, and the timestamp period never changes. +void MVKPhysicalDevice::updateTimestampsAndPeriod() { + if (_properties.vendorID == kAppleVendorId) { + _prevGPUTimestamp = _prevCPUTimestamp = mvkGetElapsedNanoseconds(); + } else { + MTLTimestamp earlierCPUTs = _prevCPUTimestamp; + MTLTimestamp earlierGPUTs = _prevGPUTimestamp; + [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; + _properties.limits.timestampPeriod = (double)(_prevCPUTimestamp - earlierCPUTs) / (double)(_prevGPUTimestamp - earlierGPUTs); + } } @@ -2606,7 +2605,7 @@ _properties.limits.optimalBufferCopyRowPitchAlignment = 1; _properties.limits.timestampComputeAndGraphics = VK_TRUE; - _properties.limits.timestampPeriod = _metalFeatures.counterSamplingPoints ? 1.0 : mvkGetTimestampPeriod(); + _properties.limits.timestampPeriod = mvkGetTimestampPeriod(); // Will be 1.0 on Apple Silicon _properties.limits.pointSizeRange[0] = 1; switch (_properties.vendorID) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index 96d77bc18..bcefd2f37 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -255,11 +255,8 @@ class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmissi protected: void submitCommandBuffers() override; - void finish() override; MVKSmallVector _cmdBuffers; - MTLTimestamp _cpuStart = 0; - MTLTimestamp _gpuStart = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 96b34cc3b..0ad143072 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -524,16 +524,9 @@ template void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { - _queue->getPhysicalDevice()->startTimestampCorrelation(_cpuStart, _gpuStart); for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } } -template -void MVKQueueFullCommandBufferSubmission::finish() { - _queue->getPhysicalDevice()->updateTimestampPeriod(_cpuStart, _gpuStart); - MVKQueueCommandBufferSubmission::finish(); -} - #pragma mark - #pragma mark MVKQueuePresentSurfaceSubmission From 41a5a97fefa27586a91d5aeb3369c27a9d26e942 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Sun, 13 Aug 2023 20:14:20 -0400 Subject: [PATCH 02/21] Ensure Xcode simulator always uses 256B buffer alignment. Xcode simulator always requires 256B buffer alignment, even when running on Apple Silicon. Previously, it was assumed that Apple Silicon would use it's native 16B buffer alignment. --- Docs/Whats_New.md | 3 ++- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 9e0fc42ac..6369370c7 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -28,9 +28,10 @@ Released TBD - Add support for `VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN`. - Support building MoltenVK for visionOS. - Ensure non-dispatch compute commands don't interfere with compute encoding state used by dispatch commands. -- Support `VK_PRESENT_MODE_IMMEDIATE_KHR` if `VkPresentTimeGOOGLE::desiredPresentTime` is zero. - Support maximizing the concurrent executing compilation tasks via `MVKConfiguration::shouldMaximizeConcurrentCompilation` +- Support `VK_PRESENT_MODE_IMMEDIATE_KHR` if `VkPresentTimeGOOGLE::desiredPresentTime` is zero. - Add support for `VK_PRESENT_MODE_IMMEDIATE_KHR` to macOS Cube demo. +- Ensure Xcode simulator always uses 256B buffer alignment. - Log more info about SPIR-V to MSL conversion errors. - Drop official support for using *Xcode 11* to build MoltenVK. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 3c77519db..35d4a8eff 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -2157,9 +2157,9 @@ #endif } -// iOS and tvOS adjustments necessary when running in the simulator on non-Apple GPUs. -#if MVK_OS_SIMULATOR && !MVK_APPLE_SILICON - _metalFeatures.mtlBufferAlignment = 256; +// iOS and tvOS adjustments necessary when running on the simulator. +#if MVK_OS_SIMULATOR + _metalFeatures.mtlBufferAlignment = 256; // Even on Apple Silicon #endif // Currently, Metal argument buffer support is in beta stage, and is only supported From 4fe88116657fb05e07e84428ea7228353cc482c0 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 15 Aug 2023 16:03:31 -0400 Subject: [PATCH 03/21] Update dependency libraries to match Vulkan SDK 1.3.261. - In MoltenVK Xcode projects, set iOS & tvOS deployment targets to 12.0, to avoid warnings while building MoltenVK. - Add DYLD_LIBRARY_PATH to runcts script, to ensure Vulkan and MoltenVK libraries are found during CTS runs. - Update Whats_New.md and MoltenVK_Runtime_UserGuide.md documents. --- Docs/MoltenVK_Runtime_UserGuide.md | 17 ++++++++++++ Docs/Whats_New.md | 25 ++++++++++++++++-- .../project.pbxproj | 8 +++--- ExternalRevisions/SPIRV-Cross_repo_revision | 2 +- .../Vulkan-Headers_repo_revision | 2 +- ExternalRevisions/Vulkan-Tools_repo_revision | 2 +- ExternalRevisions/glslang_repo_revision | 2 +- MoltenVK/MoltenVK.xcodeproj/project.pbxproj | 12 ++++----- .../project.pbxproj | 8 +++--- Scripts/runcts | 3 +++ Templates/spirv-tools/build.zip | Bin 53948 -> 54542 bytes 11 files changed, 61 insertions(+), 20 deletions(-) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index f2434d10a..f236a7e97 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -32,6 +32,7 @@ Table of Contents - [Performance Considerations](#performance) - [Shader Loading Time](#shader_load_time) - [Swapchains](#swapchains) + - [Timestamping](#timestamping) - [Xcode Configuration](#xcode_config) - [Metal System Trace Tool](#trace_tool) - [Known **MoltenVK** Limitations](#limitations) @@ -327,6 +328,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_get_surface_capabilities2` - `VK_KHR_imageless_framebuffer` - `VK_KHR_image_format_list` +- `VK_KHR_incremental_present` - `VK_KHR_maintenance1` - `VK_KHR_maintenance2` - `VK_KHR_maintenance3` @@ -369,6 +371,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_EXT_metal_objects` - `VK_EXT_metal_surface` - `VK_EXT_pipeline_creation_cache_control` +- `VK_EXT_pipeline_creation_feedback` - `VK_EXT_post_depth_coverage` *(iOS and macOS, requires family 4 (A11) or better Apple GPU)* - `VK_EXT_private_data ` - `VK_EXT_robustness2` @@ -581,6 +584,20 @@ than when using an internal compositor, which increases the risk that a swapchai vailable when you request it, resulting in frame delays and visual stuttering. + +### Timestamping + +On non-Apple Silicon devices (older Mac devices), the GPU can switch power and performance +states as required by usage. This affects the GPU timestamps retrievable through the Vulkan +API. As a result, the value of `VkPhysicalDeviceLimits::timestampPeriod` can vary over time. +Consider calling `vkGetPhysicalDeviceProperties()`, when needed, and retrieve the current +value of `VkPhysicalDeviceLimits::timestampPeriod`, to help you calibrate recent GPU +timestamps queried through the Vulkan API. + +This is not needed on Apple Silicon devices, where all GPU timestamps are always returned +as nanoseconds, regardless of variations in power and performance states as the app runs. + + ### Xcode Configuration diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 6a69083d9..68332ce77 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -16,12 +16,14 @@ Copyright (c) 2015-2023 [The Brenwill Workshop Ltd.](http://www.brenwill.com) MoltenVK 1.2.5 -------------- -Released TBD +Released 2023/08/15 - Add support for extensions: + - `VK_KHR_incremental_present` - `VK_KHR_shader_non_semantic_info` - `VK_EXT_4444_formats` - `VK_EXT_calibrated_timestamps` + - `VK_EXT_pipeline_creation_feedback` - `VK_EXT_shader_demote_to_helper_invocation` - `VK_EXT_shader_subgroup_ballot` - `VK_EXT_shader_subgroup_vote` @@ -31,9 +33,29 @@ Released TBD - Support maximizing the concurrent executing compilation tasks via `MVKConfiguration::shouldMaximizeConcurrentCompilation` - Support `VK_PRESENT_MODE_IMMEDIATE_KHR` if `VkPresentTimeGOOGLE::desiredPresentTime` is zero. - Add support for `VK_PRESENT_MODE_IMMEDIATE_KHR` to macOS Cube demo. +- Allow both `renderPass` and `VkPipelineRenderingCreateInfo` to be missing. +- Fix sync delay between calls to `vkQueueSubmit()` on non-Apple-Silicon devices. - Ensure Xcode simulator always uses 256B buffer alignment. +- Don't attempt to force the window system to use the same high-power GPU as the app, on every swapchain creation. - Log more info about SPIR-V to MSL conversion errors. +- Implement Deferred Host Operations. +- Support _MSL Version 3.1_. - Drop official support for using *Xcode 11* to build MoltenVK. +- To allow building MoltenVK without an internet connection, don't fetch a submodule if the commit is already known. +- Update dependency libraries to match _Vulkan SDK 1.3.261_. +- Update to latest SPIRV-Cross: + - MSL: Fix argument buffer padding when content includes arrays. + - MSL: ray-query intersection params + - MSL: Support `SPV_KHR_shader_ballot` and `SPV_KHR_subgroup_vote`. + - Skip line directives when emitting loop condition blocks. + - MSL: Consider changed array types for array-of-constant-bool in struct. + - MSL: Consider bool-short remapping for constant expressions as well. + - Minor cleanup in constant_expression(). + - MSL: Add test for bool-in-struct edge cases. + - MSL: Handle more complex array copy scenarios with bool <-> short. + - MSL: Handle stores to struct bool[]. + - MSL: Consider bool/short remapping when dealing with composites. + - MSL: fix function constant deduplication misfire @@ -364,7 +386,6 @@ Released 2022/02/07 - Fix issue where *MSL 2.3* only available on *Apple Silicon*, even on *macOS*. - Fix memory leak of dummy `MTLTexture` in render subpasses that use no attachments. - Fix Metal object retain-release errors in assignment operators. -- Fix sync delay between calls to `vkQueueSubmit()` on non-Apple-Silicon devices. - Fix use of GPU counter sets on older versions of iOS running on the simulator. - `mvk::getShaderOutputs()` in `SPRIVReflection.h` support flattening nested structures. - Replaced ASL logging levels with `MVKConfigLogLevel`. diff --git a/ExternalDependencies.xcodeproj/project.pbxproj b/ExternalDependencies.xcodeproj/project.pbxproj index 8cdb8e767..fdbcf75f2 100644 --- a/ExternalDependencies.xcodeproj/project.pbxproj +++ b/ExternalDependencies.xcodeproj/project.pbxproj @@ -7204,11 +7204,11 @@ GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_PARAMETER = YES; GCC_WARN_UNUSED_VARIABLE = NO; - IPHONEOS_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; MACH_O_TYPE = staticlib; MACOSX_DEPLOYMENT_TARGET = 10.13; SKIP_INSTALL = YES; - TVOS_DEPLOYMENT_TARGET = 11.0; + TVOS_DEPLOYMENT_TARGET = 12.0; }; name = Debug; }; @@ -7254,11 +7254,11 @@ GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_PARAMETER = YES; GCC_WARN_UNUSED_VARIABLE = NO; - IPHONEOS_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; MACH_O_TYPE = staticlib; MACOSX_DEPLOYMENT_TARGET = 10.13; SKIP_INSTALL = YES; - TVOS_DEPLOYMENT_TARGET = 11.0; + TVOS_DEPLOYMENT_TARGET = 12.0; VALIDATE_PRODUCT = YES; }; name = Release; diff --git a/ExternalRevisions/SPIRV-Cross_repo_revision b/ExternalRevisions/SPIRV-Cross_repo_revision index ce73e44aa..590969e2f 100644 --- a/ExternalRevisions/SPIRV-Cross_repo_revision +++ b/ExternalRevisions/SPIRV-Cross_repo_revision @@ -1 +1 @@ -aafcc207ea82308722124db2575aa95f42cb99c9 +bccaa94db814af33d8ef05c153e7c34d8bd4d685 diff --git a/ExternalRevisions/Vulkan-Headers_repo_revision b/ExternalRevisions/Vulkan-Headers_repo_revision index e48127cb9..a8ebecec5 100644 --- a/ExternalRevisions/Vulkan-Headers_repo_revision +++ b/ExternalRevisions/Vulkan-Headers_repo_revision @@ -1 +1 @@ -9e61870ecbd32514113b467e0a0c46f60ed222c7 +85c2334e92e215cce34e8e0ed8b2dce4700f4a50 diff --git a/ExternalRevisions/Vulkan-Tools_repo_revision b/ExternalRevisions/Vulkan-Tools_repo_revision index 4359b342d..bf77fda05 100644 --- a/ExternalRevisions/Vulkan-Tools_repo_revision +++ b/ExternalRevisions/Vulkan-Tools_repo_revision @@ -1 +1 @@ -695887a994ef9cc00a7aa3f9c00b31a56ea79534 +300d9bf6b3cf7b237ee5e2c1d0ae10b9236f82d3 diff --git a/ExternalRevisions/glslang_repo_revision b/ExternalRevisions/glslang_repo_revision index d336b0428..aba7fbb10 100644 --- a/ExternalRevisions/glslang_repo_revision +++ b/ExternalRevisions/glslang_repo_revision @@ -1 +1 @@ -d1517d64cfca91f573af1bf7341dc3a5113349c0 +76b52ebf77833908dc4c0dd6c70a9c357ac720bd diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj index c18c9b14c..202efa912 100644 --- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj +++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj @@ -478,7 +478,7 @@ DCFD7F4A2A45BC6E007BBBF7 /* MVKRenderPass.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7941C7DFB4800632CA3 /* MVKRenderPass.mm */; }; DCFD7F4B2A45BC6E007BBBF7 /* MVKCmdTransfer.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB76D1C7DFB4800632CA3 /* MVKCmdTransfer.mm */; }; DCFD7F4C2A45BC6E007BBBF7 /* MVKCmdQueries.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7711C7DFB4800632CA3 /* MVKCmdQueries.mm */; }; - DCFD7F4D2A45BC6E007BBBF7 /* vk_mvk_moltenvk.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7AC1C7DFB4800632CA3 /* vk_mvk_moltenvk.mm */; }; + DCFD7F4D2A45BC6E007BBBF7 /* mvk_api.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB7AC1C7DFB4800632CA3 /* mvk_api.mm */; }; DCFD7F4E2A45BC6E007BBBF7 /* MVKSwapchain.mm in Sources */ = {isa = PBXBuildFile; fileRef = A94FB79C1C7DFB4800632CA3 /* MVKSwapchain.mm */; }; DCFD7F4F2A45BC6E007BBBF7 /* MVKCommandEncoderState.mm in Sources */ = {isa = PBXBuildFile; fileRef = A95B7D681D3EE486003183D3 /* MVKCommandEncoderState.mm */; }; DCFD7F502A45BC6E007BBBF7 /* MVKGPUCapture.mm in Sources */ = {isa = PBXBuildFile; fileRef = A93E83342121F0C8001FEBD4 /* MVKGPUCapture.mm */; }; @@ -1860,7 +1860,7 @@ DCFD7F4A2A45BC6E007BBBF7 /* MVKRenderPass.mm in Sources */, DCFD7F4B2A45BC6E007BBBF7 /* MVKCmdTransfer.mm in Sources */, DCFD7F4C2A45BC6E007BBBF7 /* MVKCmdQueries.mm in Sources */, - DCFD7F4D2A45BC6E007BBBF7 /* vk_mvk_moltenvk.mm in Sources */, + DCFD7F4D2A45BC6E007BBBF7 /* mvk_api.mm in Sources */, DCFD7F4E2A45BC6E007BBBF7 /* MVKSwapchain.mm in Sources */, DCFD7F4F2A45BC6E007BBBF7 /* MVKCommandEncoderState.mm in Sources */, DCFD7F502A45BC6E007BBBF7 /* MVKGPUCapture.mm in Sources */, @@ -2001,7 +2001,7 @@ "\"$(SRCROOT)/../External/cereal/include\"", "\"${BUILT_PRODUCTS_DIR}\"", ); - IPHONEOS_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; MACH_O_TYPE = staticlib; MACOSX_DEPLOYMENT_TARGET = 10.13; MTL_ENABLE_DEBUG_INFO = YES; @@ -2011,7 +2011,7 @@ PRELINK_LIBS = "${CONFIGURATION_BUILD_DIR}/libMoltenVKShaderConverter.a"; PRODUCT_NAME = MoltenVK; SKIP_INSTALL = YES; - TVOS_DEPLOYMENT_TARGET = 11.0; + TVOS_DEPLOYMENT_TARGET = 12.0; WARNING_CFLAGS = "-Wreorder"; }; name = Debug; @@ -2072,7 +2072,7 @@ "\"$(SRCROOT)/../External/cereal/include\"", "\"${BUILT_PRODUCTS_DIR}\"", ); - IPHONEOS_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; MACH_O_TYPE = staticlib; MACOSX_DEPLOYMENT_TARGET = 10.13; MTL_ENABLE_DEBUG_INFO = NO; @@ -2082,7 +2082,7 @@ PRELINK_LIBS = "${CONFIGURATION_BUILD_DIR}/libMoltenVKShaderConverter.a"; PRODUCT_NAME = MoltenVK; SKIP_INSTALL = YES; - TVOS_DEPLOYMENT_TARGET = 11.0; + TVOS_DEPLOYMENT_TARGET = 12.0; VALIDATE_PRODUCT = YES; WARNING_CFLAGS = "-Wreorder"; }; diff --git a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/project.pbxproj b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/project.pbxproj index 4b64e66d0..6372955e5 100644 --- a/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/project.pbxproj +++ b/MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj/project.pbxproj @@ -732,12 +732,12 @@ "\"$(SRCROOT)/glslang\"", "\"$(SRCROOT)/glslang/External/spirv-tools/include\"", ); - IPHONEOS_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; MACH_O_TYPE = staticlib; MACOSX_DEPLOYMENT_TARGET = 10.13; PRODUCT_NAME = MoltenVKShaderConverter; SKIP_INSTALL = YES; - TVOS_DEPLOYMENT_TARGET = 11.0; + TVOS_DEPLOYMENT_TARGET = 12.0; WARNING_CFLAGS = "-Wreorder"; }; name = Debug; @@ -792,12 +792,12 @@ "\"$(SRCROOT)/glslang\"", "\"$(SRCROOT)/glslang/External/spirv-tools/include\"", ); - IPHONEOS_DEPLOYMENT_TARGET = 11.0; + IPHONEOS_DEPLOYMENT_TARGET = 12.0; MACH_O_TYPE = staticlib; MACOSX_DEPLOYMENT_TARGET = 10.13; PRODUCT_NAME = MoltenVKShaderConverter; SKIP_INSTALL = YES; - TVOS_DEPLOYMENT_TARGET = 11.0; + TVOS_DEPLOYMENT_TARGET = 12.0; VALIDATE_PRODUCT = YES; WARNING_CFLAGS = "-Wreorder"; }; diff --git a/Scripts/runcts b/Scripts/runcts index 8b956422b..20ae1abe5 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -93,6 +93,9 @@ if [ "${is_portability}" != "" ]; then export MVK_CONFIG_ADVERTISE_EXTENSIONS=0xA fi +# ----- System settings ------ +export DYLD_LIBRARY_PATH="/usr/local/lib" + # ----- Metal validation settings ------ export METAL_DEVICE_WRAPPER_TYPE=1 export METAL_ERROR_MODE=3 diff --git a/Templates/spirv-tools/build.zip b/Templates/spirv-tools/build.zip index ee4797c80b3a8674043025bf4afff4bfb9b8082a..3879c9b8368fa5cf774fce1db6774bc26225b43d 100644 GIT binary patch delta 40692 zcmY&;18go#uy$?Rw%wlEwr$&P-`X}#ZQHg^ZJpY-+y8twx%qQ9+3aQ}JCp40Om?4n zx=TRgDnStxWkA7Tfc|UTiijGC2)K|6p_``vdLk1J81R20c|pj%kWJIWkj;kF1`w$K zWD<<{;mPu#RR60;_5;P@P+Cw@gh9B4gIR>1n4P6o4}b}_BIQHXBb8y@epFRaIUs|D zMI~nd`p+ZUKYvg_|Cvgj0%iDLGmT)gAn^YQF$zICO2B}Cypp#d$o@yWAF`PY2}uK} zuSm+Wj8biH?)u@cYfehK%*@uii8NK~(SCot*{R#VzH72fLEodIz3=;W&WhE;iQT7? zqqjaV{X}N4jyi9jUEuh;Wj<%gFKME?wISv0g1(?WI4)WE+-Nl+j!ZpVFVg2KPK!U1 z>02CqTAWTlGFXzxO}ISixG~t73J(qNREb2&IT5I$r}efg>5Ed|RM%(0OCJlV!@-Wf z%uOQ}*xM{p^gSiELT$<6?fCg7p|Qs2jg`lnaP=FWk)J5fq1P1h+TJtkr|q=dG(El4 z$$=Xz!y6P@CNTPiC$W)#;1=I<7}xD|p5mkpGp5X8>y{&m!NoAyrS6WsgkJ{`qP2Rj zcaA}w;k}OpFO>Bj;fK>w&x;WmC>BeCYJS4lWh>7r{4G*Po9nl?PVtmDM!B+H% zc&Peo&l9s3x~Z`=gUo0XD|eC*&%&hWcbtMl0d#km4)IncZe(9;hPjod`bP-keNk(a z#zhleX6gdONx%b6>wBzvov@RO@dgU2aN+sZo<)C5Gnw|oILNf&x^U)A;qG_SqpFe5q2YdCFIK#3#)}PZw6PdRot}!z#o;f&5~eoFY_zjk5%XC z)zV!c7%f-9GW%UtcSzrW#nBek4~m8Los(eHSoqESxl!S;-x}~uyrdEO)1!XV&<@s- zZSk5iGAZ^b%f^n*iuTU`D|)qa(&CuIh;er>alJZw*88C`n__H14+5o?%eufpme0;6 z-H0&$Z|>e4D(!i~ioX@rLm)3~Xe^0ACdCIJ^3%<1=(nN_b#--NU622U=_X0?3SGjOut&_i2GZ=9uhisoHZt6VA`&n+=Z-X17Q(sA)e7MUTG*4kRzKHeImiH$u zUjK%m*PpJ}$4ea4mhNX|k3Q8W&4{^3?-TL=CbW)d_s~FeAfW5t$)m9UKSMsjvI2ky z_8$Kr9Vaw(k1t#w_vP8^e<*WWe;G|TjUayf2$GgcBWBNdBX1m&``Th%EPn~r^d(v<&!e0~&!xWvavIsBE>4wF zTfC#>ov;bjVqfUx6;3^DFPUq5nR@Z6=ls26UT7BZEpXM`l(GohVsItJP65*~ckRA$ zaaR5@c~}Ar84vk{wXKDPe!7IRC(;K|otR+q-W@t(ksUj7$>wfZL)$ZuN*Ig)pNQ6T z%~N?T%y}WRGWXwn7*e{pXoh)8dp&AWnYoWPi_$tz#bvr%GC)Vw@%WCbrryKkq1TTv zzTjNimnNN7+p4jZd0G+$QGm5gHt0$gS`V)%b7bIBRTO_&8aA-W8-4t-5pvRuId5JYn=n>=A}g+yIY6fspwTTMmEAA)?<=#d4AIr^2roy#;Asl; z47IYu284fxhzK-+(6v@C|3Tm8%FCt5mG1kn4f_yxI$^nNakfWLJh z#2$z7F4-tv=WO1w^6*kxvAzk79+KO-Y;{T)1N(0e?Lx=Q%gwFq#cN)3x!jCsW#e)w zX9v3;=adP>nECOpTmRj&ZzT&$@4s=V+j6we174?@{Ka*Y{jgstFfqEiA^qxvey%nY z9k<|$f4#FV0N14_7GIxn*>Hcrg!22H2WlKZgW8_NQadI^>*xaRSDJ(o+lK3_oa`|Y z4=U!U85&54U{DmiB`;l1CDyNryIYDjN}VvtPc0&|U7b+6R_dc%mvA*C(d75M1V$=m zqJMj0@CCG~a&#cjq}=t)8tsFD-KWuW7&~MP`iCer0H7*KZrvBC#hW9iSVCQBfhg)X zY@f~Me@7^>3VYhbUO$^9W1%?G>LEJf2lkzFH4;f-EKY#{9rN*yn&CLOa{R9Ftux{M z(ls%r0NVp$q+w3U&!oq(c6M?~+0FxWM*ru8 zU;KBi0jS%tDKlS-5@x~=Ea23{SQG2lD(UW$hU#{377Bbhk2>tpWpN|S!3Yserxd&y zQkWwfk5rm`1H6>)FD!D3knNLn4ivlWym8~RK*zjrthcS@MJg4jf5D1rk*F;6!bJ?C zbYB9DVda54pKDMazU#g-)S&QV|7(qoG^Tk4-|JTg^V* z02?GH1R>$(eq5xJv2}_HAqW?Bh~hPeQ1vS$SzH8A4%`I{BIAd4v2B|^pB)l3(O!WB^{fHo@q<1+4~Cp{uXOD|0lw{hh*ClcAac^nV}RgCk0nbA8(S&VZp zjfOKBgBwAR&ICYLZ8<@i5Cf-_GHo#9pyBtk@K#6coCpytIC+0^H2pMkzo$;Q{;`Gj znpp^$Y-CR1IadL57^-H?_x*MbSS~KDJKfRI$u1nnAXMYyII5(J^UHBHR#Un#JhHXE zY4bmFeW_a*Fw3H|?P2t~V_&^!+v}e^zX8zs>wd!0#ql3lXa5q+LmH^BtzS#($dshY zzwiR))egor-p1KbM8N0^W2$ps1~CjR10^dbPV}I4gQuPB0-12kn9b*d#%)CohFF^r$eUd$`+>3{xvZh3iKzW2#YHN)%($qcf~q zW{y)d?hM=TqDJDyWD!s*g`9!y@e@g1Dg$|;HF^*FzhLk*uu|q;_bZS_lZnXbh$k5Z zp^*izB%vCs1ca$|Dv>Jzub~(~gX>7JNB)_harqlYQSe(;LM;KL&zzrnM!;^RjyK!L zCY#3umTN!m3_d<Lv|W5>zO}q2Q$Ywjshk{^i_We+Gw5zRS{>oTc`RE1+&Vo_^NvYmjvwyD zdo#F+9vO!MUP|AO5p-rK?xtV;@HIsc<>@nMceKbkyumZI7i?(L8Syy07^pK(_qEC* zM^Isk-S0725t4+N&;WW`8)!lGa^7V`#|CQ@++XUQ4f0At4wU-bJdCM4k(M@^*J zW^r1wN3tXpC?)!qE?yXk)-RyvhX}H z6AU!2Ayd{{^xi{1AFd#cW;alz3pNM}w-Y!SoH4vGc!q$W6Hs!1v4lLaXvA8mmNmjG zlZ1;Q9ut0mk6-};Y+!*cKgbxCp4-*7PL^i<2t)o83ROj1nEM?BI>~*I-d}c z2N?*;>j%65%jz89WXX6)WFxaHdUU38g9%K=POoXYvfAR>WeV!Gd8DW12e##25}|#10w%mKeMxC;}TA79tsB z84wJ27$l~D2#Nl9{0;7Dixwl{MU4_y;>xcfM#8%SGl24mUbx4i1qZd?jDZQw zQd-`8x#6_VJFeUAOm`4OXD>g+MWw!c6z#!J{hmKft&Dl-@#YKKOv-Hv_0O;ynA!Qc zjrMd+yV84`soll?eo7UtA&r%aD_hNRF1zgmME8B|o_<=fb8XY>x6yyy=xJLk2z-~{ zJnsDHd#-jG(Dpc=Y&t-McX3rMA=ZiO>{KQaf6X6A+PVC+tbH=G=eLzmYAox0Hhujp z(|*j%9;kFn!QV0E!fNFZ?DXZ;GA7+gym(6fcpFR~RrCS+KxnNrgw}2IJrP$^AzF$7 zf%{|r91m*Y7sdC%Qs_-^)hTUe#feAmIzcDMF(Arc;u2wk$X0+%)vu0e2*2rTnu^yB zIqbm)>i<@#Nu2uv3A8(hps+>&>$CF$s`_Lr^+g~(^gb+H2dt+!+~M?_=!;@AGGBw` zBZIkbr3P`^RqmcwQMdy8Q}4sn&j8R|gEJ_J6f1LQ@HRjFVUF!Vd07CsUIs)o#tqjy zmenaGO|)~U%=yyvRjw|FgP8o6PG#?b(ADqE`orNCj>LPv_t|`O zB1yK)LJClzooW779|5~R4e+#0z)(H%uCvV-B^1&(B%_9V0Y=4VKNrfW_JDKktc3IZ zU-3^q-QsbNrs$_URyxgg*;IFJ24ZzHbN_3_Hxy+*kT5Zh+r-vW0l(JjU-@r=PV8@qcU?kEC96T4*WcY8E~zc%?eJ zXuSTt#1q=?j)QLC3!DL3``B0ey}HJ5aisVKM3o@1nj}J8uJQ33`V_ejVI`N+jEKyR z3ET)rn{Tj>em`VJLFIu?+~M(ous~aesz(*4Br#R~2jEya#1RPgu7EIu%+x38iYp2P zflx%C!hG?vZW{iuhB8`=z5|X0`3sTxrR}bwzs^7yT~@(f4S7g%3PKcY?^}zK1G1*D zXV1@v>H1^}%&z6Zl$5c4QSsheL*csXEF3&S#x63$7BqQL-SPMRDP}0M%um>_yMzIM zm^+qk^ceNsGPDc(e*umK0wi<;@_(7%AbQ&)1bbjTfed-j*qpFr1_Ser&3kF+BOkM4 z*~i@(WMwqj5P|4lLGZ@7z8`JVMC|A> z_dFMCaVFV7{XRSR>e9`6rc7EkJiC}u`~)B@P#kHJ6|d3F09mQIK$ z@Za9bx&qQbl?uU2vXz0dfddeHCSNab_`XL|lfwM*-BC5J%=gXu(mfaL$w8(MZy7zF zuSK*y7_Dk`Bp#07Js+M&=pEI>@CT~HMLADLoMDEoK_+}Pxh6td|FkJ{c849>ugJM^ z-Bfi42ylW2iUJbLCL$cN-X>f`cKJfk2gl3skXGjq_gs5)m zIDeb{8t}z+KJL{`u&LWWpXw|>cW3`KCeJufaDVqC&(Zz&OQHzYW02+#z7FsSm{EpM z!UaZ+j9LXtjHF&g|6D2p6(ZuuO(oonDjd+`bn8eeggYSThJWPOiZGl6P9H1oiubeu zx{lk6O?0U!KUyt7w8HG7CDQoCTfN!m5B-+p~VFYUnxE2 zqG@I?=NcV2X1g@iP-Oof5slj=Io!TVS3vplifV4irV^B*@wdpGYYC~R8cG^|%y>EV zwhp#3G(`TulL}S>rOGwb?%a$uB6_{J`pxA3Gqa7~BibCF<8rSgT+Dxn7MMW#) zqmbYU3J85PkQ)dd_fV)8Tt6uY6G+jEonMa9kSt&WD$DG0i*amWZjJ;98GerT0ej!; z1)bzp7a0p*r0RU`WY>rXpIAV(E=+i8E{QR{6}nwUgT~vQuykJ|D9TX@u5D@MQXZGo zuF=?&bJs4}Q0arQ^F}gKgP-`=G4i5<4m|@3s@hO=kF0~S>>%;lColRgS0m>J~^>}$P z!msz0zW}-4C)KLQ^fbZ;weB?un9P6or{IJ7M*D@3T`$uiYaDJ`0tP?iB7hzhN2`U< z)ptBAiN&JG5~Aj-wv04ZV{U#6*f6&v&H>P^5OrHur>w~hb16{ZBJoQdO}@wgDQc^@ zBPy^=p{OJ$p-55?&js^9LvGsxSaA_F!jn)s{N@S{!1udBC`1{{Kd#A2#U@NbQ!1&u zIxJR}&8q>4_1Yxo+)BnD68Luk-E;#z;)udk0sZPJA!Wbg;EeYsestrxf*7!U&>#RN z_j{?Z>w1X!k{n2I;WPaEVj$NUW*Pzrdtic*m`J{vd{slPc_-iG!fV|+8JG>O=oI5+&4?1-o(ar&qYEG>le**~{X(Bg3BwW7e|pt@4z+<(3;PVRiFqrq zQ?uZUE&$ww8cORd``vIy^)JvU<3PpJEWp#4=msg)seNFrrptat+!G6N2PTf;9uLgE$uU zw8*z*55d2f+GGm-70L~#$D9M|TJ(_itv2W(5wwmun*}LUynd}?RLU4g)Y?L_NTtXP z^S;>I&TKfOx#-tWn}5cX1aLL{JFBKL5Ilw4c-7r*kjyFzW%O)ZD$#Noe+>d(B-lEl zqTfuQ5R1xd9*A?%IfZHO%~MVb#Njc+T*fq$<){np-P&RBG(7L+5roA%9Y8g4Jk`i! zW9QrM=r3e=cKhD?C5RVSL64n7`$8$UUn&f?$#adiwQPqi=Y;mu3E=suD(Y{#U91FR z9@~9UDD{kOSPwkT)*G8hzeR-rp>Yk=IXs0`88mf&G+G!;nkm8oc0mf;vwnQ&|9LE- z%x#kJSG1qLJNDqd2aOUof0p%&z3Jx>Vdjr-|0UEN za%nge)xqE#^zozKa5d?1(arUeHp4!MVfC3A0O{>%_;J$e1T+FL2)0*2A zF|7kykMOdf{L3|yg2*GSscF7!ZSaNZes5n1NR?_8%+^xp@3`R zS5WU$SY_{k&rWQ2=8@noEDvQ}ar|f-)MZc>on{WeoXpn~Q_J1**MK1(=AgMWJ4lpm z*LhcI6k8yc(AX0qDi4f)>-Ei?2RK)#^YlPM7F7NZ#ko?eB5MZcREWt*Je0U`ifZOD zktvlFv1Ft;5dUj02tQXp(mP3L9{m!5IYMyyH0+wzjQ?H6d;Fb)4mkv1*5v!srBv$e z2C4)gg-KBb4K?@+%Ha4KVKOZ?DQOz$1`-rNh)<-whqrWNVK^v=KvzAV(y#aU3HOz0 zQ`lLn0&FMWx5$}^{4rmU@x`MN2l=cx`Tq7KA{DlqYQ-!} zKE)%@{V_4j8t$$OqNm=sjGl2COLq6!Vxx}va0ccwf`Lv7G~zpcQ{hO_g3lI zJ1(mJ1CCu-rF3^(uc?E(P%y_8oU%MK_2LQtzvMGXObDD+#9t zDuf>lGLe!>qY@d(U5#G%WS}8e?}rFvGHn59_!HEzEDQ^C6{rWq-*PVz+5@mZOI@)L zGUyt~2doTAq*shQ41BqV4@IW}hJnh^u=V;#i2#R#y4`w#nGAnx*i(7R7=`!7 zLEjeern|`9w`!{9I@h#_C8@w|4bn8ilJYeq%yJ{G)WxzHwc$ftg%?3fN2^8J0}TSK zVIPO}np=+)nq+(#i&<8{A7GUJu6(z5lOl7K>%%}h;--Q1sdr+!9XNol(a-$fKeM)E zl}cHe{!g6h+cyr!X{;_{rEp&jN==FQ+H12!)|o$lE1fQf%R&18L5L}@a?RYXEQZ$p z*@XRFO^?FOD3Wgaosy^#=vNMf@2nu0@<&P5YADvj*GHgN4FS_66Q>X*G*Jrr70B@2 z*=PlOJ6UX<5QJRsmI#0#jRvRL;T|K#6L79=iMOk+?=Guoj4c%lDY*ED78M4c z1<~)`c9ceQmZ~{c1ksPG4k#%C>!=s-@#>?a2U;oVCC~ukf*QT`JYiUj(nkd4pu5`B z$oAQ+%8@yZJN9%0#!2*n%Jw1R%RNag@mlTH4?O7nBJrF@vV=;u=ucKX)Ft`WE^j`0 zgFk)4iwI}yLTx8?XqrfdW81EWxmX5(H$bTyE9M zw~jh6AaMa&fsk{(ok6IKD`3*Zm9=)KTPuxcoOqqB7j?vI^Fm~~RsY3wf}9#X$jHA5 z6S-=%uvU&ZneQ%`r>;EF79a3~8rW~fRj^AVR*NPN%izins-w{Ce*VUveWQ8oCvw5` z6)Vvo&3RCXbAd?=YlI@h>x+hSLedX(uE#A5fKRxnPK|H}g<)UkH?5LgGk&w>W|~## zc&;m{RKA!RyJ$&2xvL(}>zhSm@9rNkFaBFTQ5df$OSofFlN&K{!9RQl;A^v^tZ^Tl zxklO&C%B()CmNyWnOy0LG%34WyupbG~exQQSw7S9{M-fA^f3$c?#S=Cl=H zy9(>u$)&~NkwMyqz|so@Kg89spUx_wifV5ibK+@8<4Y;5iaXOl!PL}zYg+M(4x{~C zK4wY@@_64+j#JOh%Y+#T7dqEiOiLe4z-fa-MU;%#)AJvY#Wvo@J*6U^*If5e8Eo{* z7BduuSPS2Dl!+gDR8>Y|6@3-lJkhoC&0*xgXgY!kFkyU7-(mrqJGxe?#kOB$c3=As zc?RLt%b~tUtyB*2{`0sy3Sn9Kyg8jN)xj zC#w)^)D>1nM+llNyG!Ur!AC?@H$Bv}>UyZG){k15IwZ>>$}Mbn-17H>lz?)qvPD)fNd&34eK^`vqX|c7{e&Rj2ms8Y;d1IxuG^$B9 z1*t7jkFK1auP)~|mD_M?MOoxjAl@~0z!5|B0ELDMYaYc=g`x&3C82s}MpFs?=7jg{fvO2dce*>igxS#ifRz>!YXW_H*;(V8qZ zFgUh2IGhodTAM9sl3SBF^)aS3vyxZy?s*)ty$Pu0cK$^@cy?uefVcz%zQ6Wwl{0nK zW^VFDXxs&rKcad&U!?*g{+|D?p(^{MZpky9vIVb?ulsa!!TGm{rXDEkY^(4A^lPu$ zl{jNXVIXj{gX)qY!0DfLalDRkOa3)G+Ouo;3Y zKB42Jq!a}!HTW3Gc3drHlI0N!oicbxUnW=ouaHuq1Rs8J=cJU1IgGDTiq}|i+pUz5 zQGRt1n%6gOyI&cWokG6p=kUfJlH8lIAz}^cIs|{rT4uWeKg31}3_BROy*`$rCMF!* zRPaHvp{=J{Wf?HOt>QMHxt+wrAjV>ZX z58HaC>I5cG{~|W1)vc~J*gUPPip}KyXw#U`aVEFTHHsA4QMaAG@?g~Y^BNSo>sSZO=uR0g_4JnUQ6eSB|BO>=h zT&%@wja(5GE!9#L>>?NSo|t)VKT~2XcN*5R*YJnVdKLAsW%GU#Vl%@&gwM~Cw$2m} ziq^)s#NDD$Q3$oO0tF+Bu@0IC6I?4;tk&FyKNAYjdC8EP9%)ngo?eAhyI=y|ztUrh z05jwvBDP7VA%+_qp|5mA!O78|YKvwCy0O;N!Cn$f+bnj%S4h9IJrZTpcC>7{WkcBI zaHvh_0Zhn(31r3_wYD{K-_%FzUIi}R-cC`NJ9a9ANBC^(5HPTUv(!avIuzJDyet_E=HZQ-QOe&Z(5#k;-U4juVKb^Jh@ei!=LSH_R_*!+ z;V}&pH=r!-eb0HTG~eWKc0Oy3=Q%CC3^%V|*xBe6Mid=@0{Yx1DOP9vBI2(Cu#NFh zK(YzmK%P?ZgNxM@6H zr>F#7Z9-WCUVJ7sUmk*s()b)V^kh+oqndnJXyB2A{J%ttIJev7+U%Jh%MF^>5T3V3NeV@wAcYiO2N>La0=C z&hHjqyu!cc#)-v8cno}#Fn!a57bU}h3aT9?ImiW_=cVGjjfVnouX(o$-0rqH`p?#O z%U;5hac?Q}q;e~P$-R#c8nU!6&`X0RY_XEkY$i~=Pk039qn!(XED>wiggu(e;YhiQ ziqbZ$I&MDMuPQ(8qFvFI+eikxRb6z1JSW*GTY#*OfZR#=6#E_zNm z;SsR-Y$w~?i{t_7U#mSJlBn+pL3c;!q9N%ywEzJFYDEL+DFG9Z(f4n4f;r5tzVAy& z`Axgy*J>-f?&a>IcNreO{feKZbCIBb11?-@x*5QMJdfimYg>N-18JG1M zwwRSacxur07=`7v+fgdwcNQp6$ad`Yk^$Qm;62ZGj`0pTB}C(JCQBy()u(L~)vxvf z#!;8X*}M$^La{yPoy;QCW5p_L^yv4^Nx*1@8d~3E8fWP6OUQWTDwG)BFQmkHQ}SF! zwRig5c?^gNH&9w@yZ}Q(?4tuo5N<=j=v9y9nF|^)bo5BipV~ihD^k$i36Xj4-TcjW zlv|`5of9n{3(%B zz2)lf%!3h;*t@i@EsE8qB}`=yxrZ{DMd=OP7w@e@H&!l_-lWa$w3^PkwXtVw9Tbnt zzia?KR}SAo+FylQ7za;Dz5>&f@zFbxF%D0y?-zVIxErOqsM7(QlE{&h!#wx*EnvMW zBXf7_Cl~_O#+yXwR>ErGK%9E|jPI}TYshb$m1f?=j4QFn%rP^HKCe}Im#2?K4xJU2>;TZUygySUeMy2fCv5YMoxo~5ZM0hUf9~4 zC5Q{Ym|^&WwnA?mm;~0Pi_|Iv`txLpwt-nmn_jGni*VnB@-w%6tny?5^Pxy^U^X@m z6deDs=<^?X_OOOCV7j5ni>Qdm@dGe-e|nSUZgVvnv4^=UUNE~`Om;Y5bgi%pKLR1vv}AxXk2%>fi7j2EzJ54&>LvgmVu6Ajh3*tlfI$yj{+9Ah$foF2T zi1Iw@kjxo4@@fPoB2w{>0IKvY6Sx3RbrpJC&t^#)_l2^ZKZLTp`zId4U{Xo z?+B<+hku-j#8X0I-2=JSzECr`$U(y}Xj%N`oDSh2x{Xvu0=5UgwW#AG9@RUb#>+-)K%GqI(jt8L;`={c14; z|9ZV=Kc-@aR0ujq0kp%B@l0mX9btMf96}J2tFU_cMKZ4vwfTdyR;yF-G%%>B&~2mW z?_%X*o)*zR&b895O~Rj!;Wn;@LN(DwYLhAu?9!@-R;Iej7dQ@U+*b5gcl2(`Hzrc&h!)wdldxBX*&&dV5({6?^v)IeA60pq9L zmDpqwet2<}0dN<)zS|Xp&{fo?EYhn3W+VEgP3_i53604 ze4lXCm72+hmn~!(1MPHulDya8X-Q4qY8B^~$S!OafOk?au#>}&;O({b8OfB>3J(eb z``34qE#e8};R=t4*j8DOVehBIu3$)=QpTjFti?;2{RbG&t>>CKIl8A_wP7N31H=Wg*}M(XWDO(!y1U2ySH`2t1003Jo(1IAJ6_@^vm74S7XuVz=1BvT|T z!)j;M^X*aAEc6(wLej_@gO}DjFMs95^GDDt4-Xr3VH%@e`U(&4WAzT$W*cklm0jd) zyYdp0ovLx|b(V5M-rP|en&WJ3spsG#4$igZ&WY~Ml1Wm~PlrECpJNl%t{LE^XJLu2 z0Eo#0DvK`t!SbEU-5k0U`b%#VOSy^oaaPxX!Gq?FSaag*Lu!x+g3!bw<=Q$4f2aNC zJciv8<9KZA-x+dr2D*7j7dV*N%8j&$DQ3;0e580J=1PbGSjzcAOe71ep5&4L^71Ba z5A*r-3=n|ShZdV~(!d^Ri;l}(28O&W0zCS`zGr&a*Vd7pz~y8?#wsJue>jNXrj&Tg z&-_+udSr%_niE^}l={Msz;ZYv$F=hy8b{FfE)d65_&<`1>K)1X+mGGj-I;6^(Wuqz6mV-PW8lNePc7a$46C8Jp>jm{Zc zGVghjDW%=cGZyU>W~JV9`}{# ziBFymh$i58Xoi&B2H=2)UlK^n&wnh#590AOv{g&E%lTT6UMRX|=ENVKWse8mU;8fk zBl<8e+;DZ_`7PwO`(W`6=x>==mQ;_Y%Y?>HwMfXQx6yR-lqTr#&=*o7#+r5SC7GuX z%P$|UDEsUl`lAQM^&ljH32S#F#BQyN?)!59Yzki0ie?x$NP66ETdzGOdNjXwrMCZe zCalZ_3tm63z_Pdb?mS7|qs4MYw3u316xkcEzb;k!4hsc{o32ge7bV8w$G1^9lW-gD zze6HDzmAW1|9~+t=9b*gwSo~>KU|#XBjm0$tMFAxhpJI7j21|EY+IOV-&8;W z$lFvakfpY-Zirpo9VH;^!!GMC`0!~OVGZ;K^YR3CihlZd1botR4cz_&jSITxR4*z7 zoK$^ER@s1@SOmkm54%+>X*z+d(DSO(pETksZO8G)Fmw$qk#nDQiq5(TJa#mu9u-}9 zKaSwVl|a4RWsyc+UU}n_9J)*6VowQD6I;)deU%!F9uYH+~Eh#2eeH{ zE{C*D6eSi7FF^_9Jmwv|**;>Qzcp#1-M=HkHjl(Ys3o1-&(tu`Y6zEIMFzar#gxnS zpA%0k0tWD>4mTdY36!jqK*koe(SAIwqxT8r9F$&7sB151g8<|!+A5UYz5B=je{Ce_ zd`%}SP}@s1yk4NOK#q<@9`p{4O7o zj_bTqJjMCQ)cRiCI>}&g^;hDjJ1ZWa2!u3HJXC2v?~SmLt}VARhu5AdK3ANvx2`Sh zGT2B{-4vg{&B6_)g3b=v48s8cd4@|+#^v?l21>Z zr~1Sn6ujqv4U)6BKhMT1b7VEPbFOve##}7covZ511YW7lKA=72`-(e~3Z3~BgG_E&#@@n94lb`t48 zMyh{I#!If1DbrN>QI&lq;$M|+klKmO^Cz9SdS{4I`VN>&<8zCaO^l++P3eFt=BC+>QGOV_oVUK=Gt!OIQ~0dk0NGQ3R9 zMtfmtanR{SEv}No0V6xCFU5AMGh$cR*R|0OqIJ^5(POQ-H4BT1Q0}iVW{eDD27a_5 za5dovh)e82YYk(7hWY!Un>!FfJVU6j*ErWZ)UP-QSpfngn5D1ujt@^TcSir+Sj3MG z^;Sp7R`4Kw-#%CmsBWfvBS!z7&?!o#m)wld?A+#UAKz8v4QK+Qch_NfR0GoV;YHJ` z;l)-r`wnrBQ6~;WD|s*%wI7L-dy#a(R4h&xi8D5Pv_`; zhr7bhY@_)BMn&un;9vbiQic)H>h3;r$&!@r%fLwt#pwRMTK?|MBazXz*>om2;;_Tl z$Tk{IuyU*364&P%TO6cjx7nz4bqQhcbs|KPgX-(#tacTOAjRX7+-Sa3r4N@OSktD? z2A2E+-mZ5EB!nlz57f#udbNk0>8*5lm%vu;domUP35?ocJG(!bdiM)k+^xr67g;WX zWsS}nY5{A<^++AU=|5Q8*TIp#I$AGF ztPmD}qlHQ=m{;uJD|fygpS8H` z+bOWu`VZUl?7WFdO-4eDqO<+K8H^IsSh9iuq(b-ySQDbVe(ESoGg*l28fHS-SS0Cz75WzYwOCOO2%61hS4g4)qgfY~-%z%F@2RehK@2ebp&I z=t2TmV!3oFbaJT7K@;E!*o)-koVyH-R^A;yy&JV7y!Q^D-RM&JxX(2%Rp%KtT_Sok zr1v*|@?72AMqh{y81mcgrsFTl-}b#!E`x4xusbo#ygTexQF0}xQ2SG8$3m`A+r|vWHpWbuKy)q7^wH9~j4_kHBEff={aUX~O<5hfw@Ya94K^=s0{sq$>YFU;5CR22cDA<;u9BI> za;mP~I%$LC#Q{RiO3rDE>P8GYE&lXHAcDnzf_X;K0_r|fCKeoItfC}L!pxTiE&GjS zMmf!h{rUD^2T8b7dt96X^YD?Fj%G!73OX$I{~uNF7@bMbb_>V0ZQI7gHYU!*&Lo*w zcWmCVolI=owrv{|r-9M_jt5?5f9!CijVdHu|zT=Wl<;Kavbp=5Zr1>Y(#6oxEV;9Kn#E zRqJF{y|2O)L~1!vNG;Nn4SsuLQiN9*6t(IvA7bT6Mxiw32crIz2Atk6x_50!=MSr2 zduuAJ1NR4k`#OUL6f#h%J}5c2E^DE)?zMW;nEW*%Uhr@HY7LaX8LFb^e z!nx@Rd2T@sIu#*>1rb;k@!td~fLWnmMphGyuaFp4J^K`D)RHxZjm?{vGl@N;^{HUF zu3!<&^fb_3M=l9J&u}>O?Ivk8QxoJ(!qoD4iFgOm*69iH2c+S-q8X}|kyB{=*oaQY zpC@H8+!=ah7wJrDIbKj+A2$h#nnG+~(_zUd1RA-W= zY7%s1yDHFKt=|t@o{0%?&JPH=OU?aQoN(#d_sf7&+R2D0lJ4K`Exr(}#;DEjRBIW|%Y|Y&2B^;) zIiR|zz;CPQ7>Xpv{#<4;B;$F{o7W4;I(4_x0n#rSCbqvXjwkjGwqgWkq5%E1QpEOpT%t2m(2Y_jLBXNnZ5LU{ADFfX7uY+rn&C`Uvgq=F-B ziL8h-fW>)!3)3T!_M~m5+Xuk8Qv#sO)#`y>?#DgoZ1KO3omtbSyW2$O;39QjI4{-3 z^$yG<@URh!Ws>QA)5(<7?F$LQX)^F>)1AN%r8)LZHPaR!^_teWsQGb_hBKmXFO66- z6pIm8_R6_e0C8k=%|$U3i-!J~RlCHf6Us?8?_GcGrP(0kq1xhY-T{wze`U~ElkbWt z>{{@(j11HI!&D+Zq;g=?hb_g{?vshN6p&O zpN8@EA&VMd^`3Cu_r~!duHpR?^{Hen@O1)XE0N`$1W8|>47;j|u(3HPu6Ns&lB?xV z+6^UJzu6k^xRXp>HULi9wSN5F>bk;#C-VzGnsiCs7>V(u1C^;@?8VI9q1-c3IsEha z*A$93nPfco6SJwF8_EGY|{RyTT&O`D=$bv>@DW>h+2zlv(YK zr+P77iu_*f=0IgLgQB)NpU*IfYdYb)|J1UE>ePA)P(4@FCQ-iZc(1yg@crkG7p-Y{b2a>dZ8N2i@aB$pNX*wSuZZ&UKQd(-@7`0BZm(`u z?&MwigciQJMMWLVn3nSggu+<&Ua2s8K>TM8t6{LO_|(wwp24Mo@VOYsPjJ?pBFX2= zSV?{?z;oK)Lu7ZeN+NJtaNfcikjI!p20LnZR(na0_s#j&@U|m`k|oMfYZ$14akG^*kBzt46YaU~U3FwFAQ=jLCY9vVy1? z4AU)SlP`#FvIvf4zxG^@UINUTvD+tfChYzp;I1ML361PHdr%R{CdTljm#Fcb(k$!6 zSdE?+XR*mb_8j4aV;Yr2N!R>}iFod*QKM^dlqVMiQo6x;?g9x%fRW%?b>WxI0G{LK z=arpd+2|}=JEAXre#s}>+Z6O1;rA871LB+QB7kX%@ds{qwgWv^o;$JRjEMOGbXmMp zmb#WQF8e@)33e2rU8HW4i#}Z~GW0(`A6yw|Hs{DrUQKAAGEyDIfT3syDSt0>yfax( ztdrLD%n`A8SFJ!TGTOQ;g>ptEAo7-4rOsDF{%(khI>uwXUX?A7->gxha<~1%!(0~2 z5qqKrQpX)v5-442!pDxndo-TN_>}^zm)or@9&7CTFH=|FoAWBim~4wGY?mHB)}5Fs zpADtNG=on0T7JIH57G8e*?H)Y7ttwN($9%?VU<7_U-()(e0L{mTL8$!*wpGM8BGz| zQo(64_Sz2`8Oj%QYzofv`WJW;4e6a8#JWX_Tc4wpX38*zZVtQEN;6oMgieARdB>rB z*HUPN5p?Ab;pR{qK}bX`$%A!k#a5hdeGVKpFU|K>yl;yG4mraZO7gQ;# z$cc4s<@8}4tjjbBzYDnHovb-i&IJX3|Ak8Jn0|ytjtiKWH|3>}k*`f^Xhib9XK3Y3 zyUA!@DQv4J?_PrDLMqI@4Wgq%=01|mH|h{DCJmFIcXg$7_Nata+|=`It#s06-2}gt z_x)CHEuc+Oa+Ll6k5-+uvuN~#h7%S{v1~`*Hh3d2Nz)rK*#I|!7mZsJt8U5SyfqG4 zyxr$TC{_LW4M;=iJhO(lBo+jeVdyb+>v1w1F`9X?i2wG3hHPgJUERH@@ zqh?V{J2y+_WkJ89jt)jF@-eD#c}wr_9!!PcS_>HXYxZR&0bZ_n>=5=)cIr1 zT$9zU-ef;uz*bRj`#XDyjZ>ZL5Dt^r4 zsXcl1kA=_C+x72Eic!Q~0Pw3lW`?ej>;jaZmA#@4`w69LtbhzlWLm$_Fomuu6!)FczUf-5?lFSg?vC^3y%RcmOyOXA z;r^{+>|65&$S(IlRg8%i7WIE9SiAbLjP0*Lz$ z_FW_hEZ=fn;D+XMGnMi<&l@g<9b!@a!Vc0Wz5FdP(Nykq?oNZ~S63H2>h=F~$v4?G}mj?p0oH!)j zBH^twwh0=&I$e`tQSw7cO-W zJ$jI5e!x}``UYH}PiC&Oo@_XoGmXxzcXk;C0`4*o1ZRC@gQ;=tT2+fOV;lZaR*qMB zvgp%TU!b(iU2Nv^SGBl=uSuiNHd1a3055t$WGi1b-ZM%15rPKB7dtP-;<_c`co2o% zs`VwM1-n7hivv2|!i~IExka}BoZ1HqTg%yZxLi~J^GE+>;sv&Q1)Ew;fmaZk6es9} z33x>zxUqcQ7hQx8=ebM5`OzYpD95R10sep@Q94XY&%Scki8>CauC$=} zeTNtN<^;{~FGZyH{V;kW48`zAYRH{T2f!YWqXk4Q&a&E`*47}-Lxg~R+4yh~Ih3A z#q@?rSh4km&NGtaSv+)|S4UMmg0mnu3U4YreWl$WZ7%(<(5lPV6(rnLs{6dL?V)z2|+H)JbP+ z**v$8Ljn*{qMAE$x;jTW7JSW^{mpM_lAv;PwdzGy(Khx+nC{(_A`{K+KY)s$%ug?D zj+86pwMY$&4g`HCDgFa}mFAQNRnlR^|9FN{!`re7LuJlfpk#k+O3vN=0*GBS=bbEm zF2jjl8rZm-+STn>kg&(WiUk*>!G+u!3PR-v(^675T@(tQhyS7Swm4;-`gcUc-Uea? zcd17vKkOhlxbATqxI)hjGs@+EP*E(zC|}j5QhOFW3_z&qlRd{Xp!jSGNnI&)dyuM{ zO~&%K7?!8@+`e1!n?5oL%yCP{!r%16i#pAXXFh)+nJA-DCN-a%Y?tt z-Ty#US3#wnoc3C1JokZ4Xu>(E`G&%3phjt*Vw;%?tOP}?YvyI+Mv>N1LWPm1|7FA+ zp6psEriRP$T^2*nm2;AJUziLytyNq8Y*QykGx$O1>+d#74eO@x3O&p1EMn^Yad*`Y z9AX462MOh5H3FQFc3B=eu^rFPz8(JZy8`>pl(H47j-75;Fy>cGM@!a0#`dI`Ew$8> z_)33$`dt_8e>f@8G`;c^+-nMDs?Yddz-o*H^i(EaM9L;Axwr?Ur?m98%dMW1lulE% z=BX%2JI595HjtZRsed{j1Rx?V?go^_P$;8g9GcGJ3jpYHq762O*|QH3dEPcGkB#@J ztnD?EE%WXw2@kw5lLz(86OY9ViXLu?^i};4O#00n{&8_SzQdm$!$F8V@3qf<26uB) za4(iuDM>hmjE^Q_Gnwv8H-LUw^4>CLXzoMIzA6)a$}8>bSGBC5K1@5r z;XbdVbpSd+wWa;Cytz!H*H!khYU`J=$5N+aP5@oyvjGk%P57XS*HObAW33Ln-6_0r zuX~LZfL=O?K;f+ z0Bz*nlc-hE`zab5-Ggr25|cT@my0`|vA;}=GOB4`b0ss>CxDyXimw}BlNna)-Z{?c zC<|CH@yH?a;_7U=AzYh_> zVAIa~dC4@#I!7%ol>7#J$4tS5GTKE~anI2@oG&c#8f=S5w)!(mB$MVv>$zmS+jHY5 zZ>eH}7sD%hp`m#WY+>wl0)*6HtEOTH_dCGr9fS}K$1$JTnJGq70pGD?JL?%e%QF^Y zDEkCl|NAR$e39jAu{Zr^?E_U>q_X5 zq)vX+`a!B>eLE5gB%`WS#L5^TN77H}GPs4ZGXBk!p!riEJasQ58e9(7;Qopc4`l#{ z{KdiH1ew?mh=A;(H1IOY5^5>R#tVR>XR*W1SkI<4N%dq1pR=ayIZ~oCgKVyg6VbxZ zF1MU`B*V{>APv%eqHmZ5nd&$Q9BG6bE|jFP--6NK4+bej;>SN^L79yn^l*woM{IeR z5$=m^HG91Rbc^MkGe~LZiObiDCR;7FR^7&=xm9f;j($$@1av+sc{Sy3ulbNZ5}rbq#pX(kc_-V3Kdc~Y zF@)K+jc+;+XDqm>DcFC;(*oU^<_vPmmBQ5>*xP@|haUHezFFp)Z4Qk;g02wVntPis^SP9UY15Xce(RASheIfSSZF?cuKC)N(`^`&)Ho55n*Jq+BF| zA`X4E*4m{!Jm?Bep|FD+u=cmn9&tl&-8ap0XyiF^obL)`}M@GdaPLDdO7WTT=3wb>y@|X6c z5|PHKGf{e{Yn^{*R$M_Z#HMcqN~`?s1o8Ch@%ig=&HrE9Hi`G>iZnN{;!Oav9E@=v zmouhQk$IrGyou~t2`gRsIfR=dTsJ07l@?ZiF$rZS8jt9A(z;>r1{)~=>IQ zm%Pz7VsmLTpaWQZHeL1RA0)1ywVVl|#zj#o6Czr`+^d0TqnHLc4ysEQx&A6COl3R= zbuJ4LJ0^qCgWy)T%g>t5N&=KL0I!;by;CQA?Yk!|63_+Mc|-xEUM zs{9s~xj}4{wUNTHk&jm-+iBEWZN(Q@zSQbuc&|HJUZ~hZUZc$`YJ-*}^c_)k9ySo7 z@zgdA=(L@_fA}7+;%Qj%*_&JVgNg9 zx?2C(m#Gb*9aJG7{b^10tgN6Bwc&8s&Tr~Fqa~Qp5$dHWM!%y#^AvlzfkI8wJITqv zL0gm8zNbbCaQ+vQkgOtVJWeRg{0HgFXI1A(6k=tHc*tfT6JSdc4sL2`1YbHFj|}rVkFt zf6*GcaO}&g_Y}!1u`$SzCSDh>7sXYA<^_*8lA#>Z8_R_fluUBW;@bkjzPfIKr=EF% zI)4EX@o1lwuN^2bU++5F@8r)ImnBp6{8FzByjI;p!qw~l2<>6LE|l4=`TSi7eS9)w zSQ?E)0Vj=iYFm!%_6n2WSF4&ML^RXy1 zCULooO67181zjRT{7tS<#%vkvR=*PA>Y_Ov1qid`=Lq-68vOVB>$ST31JOzg;^X_c zBMiBfA;JJ|A!TokV?>lDMSzM66}J=R*o2MqKK=Fiur2&yNX29ohF9k*K$C!3vWa0B zDm|EDcBo8F0#_*lO^>i!1;daXKk2tFU7L9H*dK@%pRCb)zBxP4CP6c`6|x!j)4xa1 z7toK3$krr+7WziC3!b9j7lEv&LrK0LBkY9h-@9{l<$Q#^i98Pi`zmXpl0C>)TT+-qRH@7FE@=6>1;OnaSN5ZB>uqYFXBanYJn8(r%NtI(^GJ1*Q zih{#)wL%R~3^v-6_fvOPtDmG!3^v7+2~ZiL$3+t5?bIg3GI@eByPSoN#W~fFS3ARY ze9~_~c^>-agYe~ltP<;eaQ}O$ zmd*NZ(%5Zi0LrU(k4#JZ&OFpX5JLbW0=1-f%uTF8F(CmY$Y~1^s;hqF)o~V96!2iG zO9k0-0R}=Gzuxds&a_4>;so$Bco!w%eifnGb|bf(Q#@Ggj(LRkwG`AX9y~}CCY=H8 zHT0$c`AG<}v$_*vIseUXDL|u)mjLo#hOQ`TVrM}oLQe8{1q4_VBNZQ@+jn~k3|2!= zL98BOE?|bZX;+R`uD&^@IP7=xQj{0Qquy-B}V^1ta==ukY;HFGJGS>teDK%`$ZXOtnC z&IT4!fni51(-!l3Tw|4XpAF7V=56NNEo5imTV~yUG^!3{&Odl}GbzHqkFAGs=A87d z@{j&77>$hU`a2;h<+q+SIJoP8ilHIfHi<7z@?*=VeV2sAF}R;t6y{;J`lX`9J?X7> zUEoSzZ4c`)h`f_FXl!AVfS<IMylue^a+62q=6*x_(C!T+ zKr7)x|NEiq^ViyCoFp#)Ea}$gPW3z}99Q1$Td*6kw;6R#iCW` z5HhrTl@$?x2oV=iW{AM_`fPee4wr}9cHkr4i2eGU|MhMB?O%}x5PwNkF#_G$v9aEA zg5%2SRMll62S&#LsR&%UCPFK|oJZsF7=jsdGBAcZ@_ya8(?k7?Snm$4D^)4%xSvpK z9D13X52$P?7#I$zj4AY(&+22Oq)xR_(l^N836=L9#u__^hY%1)`!7z>H$(Tm#{AzV z|5NE9|6lm?}cTP@7fx8YJtLg>w;_R3+X`*0&4a1@HL3i_trSVZ`F8vMi zB3|vWn^T3hq-!Bvt|`aBqq?||mI^;asH_R7Cso*rUa8lOT$l`c5)`qaUWmh!*3sokl1u^L%RrZV7AjHZ3 z_E0}G8+0vTFJY?ZmY;)MpmF4!LiAzI80?|fvO%(d2oNaiezZ&)dpUA5K0ZoqNa=p2 zog7hG0+OE8S~s#tVZgf5B**bQJmP>mlBB=~8t_bI7z+ z@<_2LEvfZ|lBJH#3}Oev`PrqW#|DXwx%S1tU(g#ANk=^SIU0mF5D1hNkPN)^d)H4I zhY2D%EJ&Ci#l*tTIX~<-E@acBrU(A(w2n*0Lhp+mg6F`(sF9ixF>C{CRhIU9y9CwM z>qZ6`bJ8qIZj;%{e89E<(aR}RWQ?9h0d%w`>d9W2bV16Ria&mZKk6l{-Yr|>GHmC5 z*|u*VelXKhvb~f<8s6CcO$Yh*iw$1mjSczKMp+J0n=^gB*PoiwErTsYOt#y~X}-@n zX?Vpf{t`@$t(7IRZ9YHU6bnWiNq8;r`3DLpzPHu1mk81jk34j5DKt@SQx=_{d={2l z8uvy8-Pr`!$%y-44kTBiR~Xb<-IlPKarm|&IwwdFb)YkQmKs8mJW1zNMY$V{*GL1y zI*40gbadpqD)E@RG_Ea9n5-_|QF~#=+_?Jk!SMQftsS^4*_uLnz?CdRvEIredV&FD zK*WvaP*%3$+IzdsJfi4`%}knUiPI@X^%mfa18ouB*BMW zL`9;b*b!zaD#Zl7_4jTCs^2g!l^AvUP^WhYg9?|14IK+p!H;qtB-Z3&MOO*1-z`^S zy7=)1gjH1$%i6ZRi9HNdH@M3fg9B`^9WJH`+7|9BIDu;UtEpf*{p$n0K2Ww5dvwY2&rzB_~?KFNw|)UtvFXe<^6#V_eHa?jf}`cp@Vpv=(G(-rkG zep)9exhpg0s%omsN{lkl2pV9qD09HiQA==!{LwgZ9(^eLn>SW{>RyeHr3u&!45`nZ zA^!)PhnAx@**?TU>!6XtN~glKsAn@#0QR$pRs*v`MWOaqT+GlXI=cJTSl0}xoZy&F zzO~Le|KrPAt|MQH=P@cT)&&d`ou^HW#N0^~%68(^nPIj2e!I8E3TkCH4KudKs%R%*b>8tkVbRwmtUSaEhcwsS!8^;fmYP_f)U+`aUuQppU1r!pQs@Ec2@nE(EYB!447UU{KsST&>ghq z?pL?y{KJDk0{GKja$1&D0EH>pB3(leDV)FmCYPNkik!fGOx$l%|0tcqfH&HESF$u8 zk04IGpfBrD5%_9V1jXw$Ln_TG|7$<|EYr&Vo9~dcGQ5EQ9mvkMkuu6;MV!HvOgT-c z3LRuLJp{Q9Gb)>RkUhIbM#ROUZk?d^XFsvBRq@AViz_zrBf?3L4B%)ifmJu}r8w)< zHO%`mp3}G;t9g}#PG{GR885hi*5@k-e+hrO{cEB*|6&Zgx;BEI2;q==Xqd*b zCgJ(p=&`{zRO@Qfy5_KW$4yv*rGT48R4Wz(cJ)6g&NL|)io>@* zq!&94<8w66Z;2=Tb;5`RDnIq{{p~Y@W-8SToXmNGDt3x*^H3QsRUcX$qNL{z?3p0p zH^3w%Brrq6r5j`~)I2Sn@;{K%Vt!P|Mo1O2hmBh^T8~fDs{z1VH#ec}wSl*Jm|Ns*iNm_1@G{m>-H5E3esXk&c{8s5PV}fF+BAj;q^S$!Z~k(vOTogG>oe{ zjHkrz;~sRbnSg_HXlit`8FLZt-~$KMkaWAm_tJFg3Z_xn$hsI^7Iz%joq>r}_yh-w z54EH#JH4S`n)w}&U@Y-d9%@#TwNH$t8VH>JJ+VtY)PHOBBrJKc{$CWyI|}@N z&}@Lh|Dp+ZfXe>?3;aa?FL>d%{6FBZ|NkVRT?eh#!vwVc&)Vhr9E(n62Lbslk}w@k z1NcudpzkO&Qx|u55KxGJpdcXsE$4ITqn)td(s#S0US6Ih(c>^H!ixf8|?NK13<-2ggobq?*x+b`Q7bJcjdGB>5 zoc`+N@A|wOMi_!OHIz>_ z^yMA@`Va=(!hJnEeO-~FuY_D1-+&P-wi6DlNqmi9J&rT43Turm9n*N6k`0HZzPXi+ zfECwwm*fHI?mTzso^-TNmSG{@F<)?Yo+v}8?&8knXYd=q0*jT-Z{?x%lBxk@CblW> zA@`~SL`*iM2_0hQwec-8N^e)7Y5r-%La5X+07Gh^@9PMq&nvGyaJBC%n{jYKowvmPVp9$5*K2?3W;Y`HnRmRRlBGD=C}J314-gVuJES1WCASUO>FbY9Xao zKlfMA?tFcPv}U!kQIm~t!6E>jFfRo*PK`lzW5y*GB|*O0;rmH%`M$YO(kd6~$i%CP zLZ(Z=5%dP0_cy-LH;hgzWc^&G=-%N2s+yf?U@=zvd)~NGV@-<+8^~EL6M~J6;E$sxPl5j3lHVk zZ0in&4)ll)WDz8Ak=|PJu;YohNx=C8U4gzn>#Hg< zFx}6G#FFxi6r)6G<^cj~y!eD#r-nrMv58F~iyG-@EdAoUsfWlDP!TQ@q&mPDseE8- zV*jq^QeMgznZbyvXjbTOn#n+_3xKS zN%*>Yf-pPmwSA%{#K`K%X*c#s_y~zu8M>;_Jlq%N`1tc&xp&5DZ`NzIO4oLaT>MzI zCBeY@(xx=aOV8z}9X>>ew})HV1yN|KNY|uOk2>!_DCKf^&-3DGBYGKSXdY$9pcTr5 zmy3W44)#r+5CGi1^>E}JBicd;u)R-!Q7$rTwlv3vble%a>y(BGiu7oN?T$u{#dext|Jp}1G^SkOm-85jb?*nu!1bGS)Ad1I-Aq_ z7a&p1$|5skn(Q`9s~?7yf$bt#;V;^$`S+OWms1Wck%Eq zSTPf-+8iJ~vvNO6n#);lE78O!KN^Spv?A!o1_duFRoy{PPMN^vEs>kDH@1U-JX2S)IawOIt*Q8TZf(`ylYju^au2bM$HSSo5| z-YG`5I-?qeKA+mKQ=vv-uI03FuBBq&!My`(3I>3Bfl=-68<$8Ea;;>V%p#gZNF=o( ziRbFujB3jGIW`?ZymIa?kFyF9a1>MOlO_RK;qs&{Y_ZHGoK_xcAcC+Dp zyW@q@``tLV5ftbeSj z9oG=tUdh{9P{Q`Yv~eBG4VNQ;&hj!sOdy#UV3*TfU3jj!Lo?*_hfl zYjg`~%j!K#FSt~dyAd{i$;zitU#Mak??!|gYt(dC1Ca&JyP{~bFaGmL-_u{x$BDoMXC ztk&Lwdzh0{PP2%-{N+3zy+F^<$^xnKS#i{@+T;{H(SlGU7i&JXzI;HNTrrn`JC>}t zHU#cunNuzdlW<6HGCt34**XGHdZ{ zrV#gUoq+1w6e{+!g@AJ+h2;R#y1g{ruh8F-J2^Ji3#n0A$^xQa`@zXPpHvt3O`N8?eabwO`L*lR{ z!_luT*I9t#*C#8D^}d!v3KN60?AMu;D#6F$pi>8)Ky=eNfMIx-cwR-zT(%AmKVvn0 zBrKdb;47JR)dLP-%K8HOPDbu~@q z6*XwiNnwtdO~f5IT2f{P7=xiS{Vb0xh5eRkhOKgMOva{USugpgYL*_(v~S?{`utYy*X=_% z$OeKHdza7J1aZI&?RI?#boj9;iSZu=C59P?nO_`aD+qgjm)@-TB~&~~FYaAhALJN6 z(G5*!9*8!QObA>QmjO46&ri1w=y7!Qy5uJiaMOW{&NnRhq2#%Abm$yP&bBgweNg72 z>J4gg_sZd;DR#9Dohi=Iu2wu+NJPa!R{(Y6`PUDa9Q}bPl6`eVuT)1n$RmNc7s%g8 z3Z3D+YE66+;dp}iiu73}CKXW1H&~F!w9-|7&r8=19MH8<3IHOWGD>=wGVbIdQfS4{ zMI*mgISBO}YQ&VaEuCJMzuM6V3Qh$G5D>nj;v^``W%_X@QtEH5`LhyMFI?-iCZW$O zWKMtaPy~iv9wUNsrD>kzon+%)7f3rKU~w_51I3k8b?Q9IW{!3q5PsXn=}CF1=be@( z>=y(gh^W%p#Q^qf5SkzSP%oO{_9&&9acNIytvR8XV|Jz!NP7C6X_s#=7pNw{OIv46 z2n1rJmDygS{~1f*kM7$3!ucnu?Cq${f)zpGvT(pBYV!^!QLsRwjjQ$3`mmrmc_5i8 zNYzU%mQ#Q70`q~yH(0F~;gkWGPN1jQnO>Sh%i58aGY#D~qcc+YFS3AObx#YQ!i1N|J~WcrM9^IaT_x!g zU5Q{YVF->}oI0b#p*^8Try6UovsD(92^0!?B-)ozvrK#2=Jen%N85U4}wxOlh zhf?FCqpGUwu2*C}>;^OcytlAcQ>qz^7TQVYk6Xc2y^G%vJkPHP$YiSQtJV9&+yjnS zF#V!KQfRfG$?n7@jE5uLHCih`tLiV>j>_!A0GW{t zxl;8nuMpqGQKKFoVCxi8P0GS~M5cV@zeK3o*N;c(FDwdeYzl-Svnv5bHl+)ZXP)%e zl4{LR-^Hlf(R7O)VYC~#fd)^Ml(VQa0U>C94M zfK-X2YRZy^3l$p+e;ONn3aqM8!cL?s=BK2?Bhtu$L5dm!z@hnzLN+Zs`I8E)@c0%y zA%4uZgCmWY7H@z>pU1eA>8QZ?$rB>)F%;4UU&%L%In0hbk!{5P1r3{KH5?8*=olho zK)k1JclWTt>Ypx_O40yh0B%`!sg^7Cx6$_!!nG4`4?hhq3zJ-b@^m)2SJ6Z*AtPTtABA4xnQXLt}L3%0qrLglvb914z(c*q6~NjQ{YB!CouS9h>>*5am^; z=ig?efBP-L@yc~9ag18#!?7fla9MNg*?+Wq;;0xni-ynIpaL3ang*_j`HY57HciAN zXlKG%B&*ZZ7l-2P`Ke~?RP-x^=bYEs{p};&D^j}vJM9W!WdRRRQ3Pjj8B#mf!N(M$ zB9UPEOd*z-@paCqkMbg~bRB-==^MLv50HO`e=wio)z6d}B^eH3O;kXtogFbE=n1{^ zI^O#GF#0DoIVwFM7>RUyQ}$!S=px2u!=WMT1C37)86M&5E_J?m#U=uBz`YU(vvvR`uu^_GmnKe;8s!__=+5bTI7!9y-51Un8tuQSMH7Izgja7G55x0r2W4dAOdS$ zsYMagXV4JvaYGc~S!~I4(^A|E7K(L$!&GwX7=MUceiTwJ0fc7zBff$tMufd(#iYa2 zW61NpPEE9PP$I^<8D|78pl+BiFGDu76vlJ_Ji@IYD-MEYI!oV`y3S>s$2@b)oVTh> zROEgk`X5(so;+~484FX|<^3Ta7sJT}Xj?&p40++L{xD&r+-W;by_jb{g!TB*1-sIm zQ!r5MmOF&*y&D56^&OkwLssWAhqPW&7;j#DMj>GW+wM`$h>kQ?SgBk*bV|RE37nG! z(EK0Z9{pYiM8S$O5f<8PPLn1B)zb`%NLrRY4~GDwpi-HNAW=p)&-Umd1oHbL3=>FX zdB6BXSJB-LfQGH~{vL_OMV-I+>ws2-ocfRDQ3H%)5@SmQuO1UKfzdUdm5^9vX&tj>9eku;Gw0u6nUpM zjo^>9W1w%6X7{$)o=Sc`y`fZ`+gGLaCYMw)lg(ZM1fLF`iEDlO03kHShZC;$8@!?> zlsJEbOy~prI6((z#owfVaUVJ$eFsn+m}`o~xwl;ygMQL|Om`3M+j_odUs4ndxEf<2 zI1~H~y35?a%w&HGCNjhQ@-WuTDlAMY$cE`jI3J%K?BfF0tFcqg+%Q)Lf6RK2^UN?3 ztW9{7^PGp)g=%QE00i_kJX|-utk#$27Ns^d`=>^H$1<1eb}DeR>0Gb2YX1L2li7w5 z1)?{j6~-g}r#T3ja)d}?RyFM=fJMSj-Hfiz z;*Zta2D1hm)gmFVCM&xJ%jggdp(nef=sq9}*jo~&wW4n5;YV|z)#l$t{gRfNh^ol~ z0Vf_Mt=n@AkY8YvxnK3463|#LM6`nTD=yC($jeHFaa>e7TT<-de6&2So*CbYU1CT> z<)*ji9RZd6`~>lNtgDbN=v20FNZFkVYDhvMj{73?h}$ILrEbPuxV~^)`p--_z6k+R z9yI&*xV&|0=EZ1oNGx^puT2Wg=97Wqk{6#U#N`1xAPO57eTfG@Kz7k1hCvC`+a zKdV2t9vv&WGXtqN>@4$Ju*(g$>xpRxeqT9=h1QCD=oa*4+VluC4e1rgH24Iw3I0&< zX8KijK>AW^!pKS{fj)=hg$69w5s%>TM(7n8QdKQ$U{FUAu4eRH?WV)(cRB#*vE7-k z5Naeq`>`4`gB6oKLqD>m zbds4$t~CByKbO)$onYI6hG+vgSm{nxzqXv4oOwIhW(`T6i$%CJ)C{Q3d~&^HE=#(L zz1gz(s6wDes>EyO_)n3G$x|hUwLO`g9bzpF=B``EcMK0Y3b)yrhX{E4W-(;CEeyEO z48F@fM6yEv-g5{2{WLKy**AVP<*|z-oSAT)hWYrcnCUs4=}-cg8hrwcjyc7q{?@sO zAIjHdrOJjbKix6XOuJ=Y7MvnAKR!A(MI?D8%3#DAzjU~5Fkm!Ks^|-H>xn;yA^D!G z$lyH^{OKXFSJV~R%WVJZI3J7vsv>|!i?26Q3}Cp6*+moVd*B>ysl`UJ5^`2X9%9S2 zC@$Duc!}uP`g{vY0)U@&tIi5Qi}lla&vzaJ4fOhL)F;jdmdd#{s($69WnZxiaw`rA z53>Ba$zse0rBvLPP^;7J%9b9BUpcvd7k)T&%GNu6o4d2oCc4m^u@=M!5H53Lk!Ef{ zy+qk5RjU}zn)_T9`Xei^R6_a$xd;ZxU6qFYpRQ8Ho<<0oWdY+=ty3ASY~|Fo>C<{D zU0omtv4(m3USMp@qLdjPPuk}j<#6Esc2)c8FTPwFH*|uR*)l&|6c4rtqSg4r{(-jl z>~%{fC(}CqY?&~i8KEZ&G*lO+<_-!NUNo*pNI0p;Hy3(lcF9V(PC&M=ea`ew@xF;X zK0ata2;4U1r3P$M5~JJKzW*1`fc`%?AN4jqF+EHLC7+WGxKUXmBHhK_;Q%wvOWg5( zrng&AMK1I$eS@hZFuClc^i5e4J}gljhx)2iT?H3<6D@U{r2U3;sdf3xE2fO0N~@@y z+~$M`=|QyiZ;{-%`ocnOyV@!u0-X3994nv?tN;6L=#kXKSW|n%KZ;U^Z16~FBg*@7 z=94bxpw=X{y*~$mV8Cs)aE!-tONZxEm^~$7$Iy6x|H z7i?`iDX-qpo3u(C$Ec=x(2tTT4nRd;C)nm@t=J#p={tegd8UB(V#8vNsg^< zdgTT?z3F3QWOpfy!s&(`xKFW3_%4z$LL~c#jN_p8}*GM$Fru+oFrdKBhL_swlzQ}`K z5b9wXiPd{2C)Iv}kpM%kqUo4mzXD1k~`JRCz6V8;#Rb8im?1wNB`(VccP9O~}Nco>_9)LYX-)d?m zahnh3;pdDPmh;3DzGI`-TmkgM!ISq5PgQ91Z3feC*vXK4O>%v5HrWMaPD$OIrXvLM zYSh3)st+F1hIMJ{Xg%Q7EAj`3Sp&X7HK#FaZycBYYu>OWcV?Bf_So_rtW!g6{t|}c z3!Z>T440EOWH9=OzQ0)!BA`0)-(yK^&vtz~cV;SF`{tPb%AdhA3BD)Q8zc&!Yf#0OrT0RSQ7IS>B*nw z1lASV3ChgcQ-F(UV!FMSeHntQvKk#5zoeSEtN>5d$73AZDmu;$qPHW@1R5R#VY+vH z{uHIYCuarI`tVmhFCcLHOlRkt=}f)>2d_|<#VAq#L!Qa-`;3GbG=pD?Y-u)VriF{* zeJOB5qoV_1t~?fsGFSf`qXHw%8KNHn3fHKO#0X|cFum45A&01(#o--lQthTJ+s3h3 z1n0Z&L*efBdViP7{m<_%oalS)50&?Sl$OA%xnqQ`LhMEt00$9MU-_T~{|iabZy_rk znWf>uJx8vsE4%5fE5#UGA&*Ok1T3HAcgO0p=YqDv9I z=(|QZ;nN-XOG^POM&tZQBC?7jA%v(H}FK6tI332{rfq;=HZ zd&b06{$mRjac9~Ik}Ao`sHQ(p2gbNOW!b^BMub5wLYQw(2&R9xWFGHcijPc}sbvGP#PyVG(%fTt^Mw zuwC~hZP{R**lCPF&09q^8^`Eajd@~#^!fG<;kwTpS>|T_3Qs0N$pmp1mDa;=YTP;B z0GFwkHq~pvGgQyY2ukn6r~G!>qSHzomzxTf`rFZyalbF3PeOp|dekZYZuG-_af;Aw@RfZ>RC?y0LE>4Wm4Q8wSMO`HOC z-05xl{MQ{jJ6I^LFIm(AMj>qJ1;7ko*1)V3qH>7N^nPjG2kJ>N% zl{{rx+y{JbZ_-M`8K^B~YUNK6ggRwoqOEXWc@{JGy%yy7x(_^x`i@3(*AjqJ8I=}A zM=PTM4vy4QS`WdcCY;6C<>z{S&HB;osGMG$E_6}H$=HZ+WR%9)CsrrM1An6IdRY)$cGNj=E zg#$iB)j0X!k!1;;2wTI|S_sI?s)-zGSAe_Db1K%)vNH5Dr%uAx)|AsHo* z1DjMc{RXEfk~QyzkK#sQg7+(OH<~vgyk9?ympIj(nFqZGNRg5iSfl7VFZ#fg56Ji# zcNvG{xuGQB930)5H|KAQI#L_5_K~W+O~-T;L(9CdrWB1=s4mG`j^gdYk`ml2t70D5 zvpx;CQGFLZFEsr0&UDbw6g?@cTVJcfrMB{Y-80X45HD2}c5dI8Fls6dDSqj?(u9)} z8x?dZg#A8vwE4{-@VtEt4OVSTiO7@Qy~3bq0?g_K8;nVp6}0t+Ps%P~3G>_==Q8(t z3bBJW083p4rr*347RJ@EE~(uv{m>h1l9~R?0Zr?4c|>V>EBrXh;2Ixm7hr6;ZlVS-FfMNW+er^l{fTeH=G}tVgj2wK#WuJ%fJ2Zu7## zX-shBi~Y!WvM@+tsZ3?QB0U;eVgDs$7mVa)%xKa-Xz>$0C*xy3Vg@1h_De`+lV6gM zQ^G{0u$T+crhK5V8Wb9}`s`WZ8pw0I^E=w&1aZ`a1XgQwmjqFP>jnHl8(CahGiy$(hB)VcUW#`9%`7(C~fB9*j z0$OPL3#(Jhy?ynH_AM0nO0?EbZ1Gn7Vhee_+E!~DCrTP&`rYf!`Y2zNsP|nZ4N04p zZhl$WVsyO>qxA*np}c_|nQ4TUJ+6d=(&V&J_po=TL~tViV&QC!0&u_P3vCqDA?J_+ zudvBitrOLy0Yh-Fh$iTO5c78*6xc$uj#-bv53$bpDccnmPvFxami$kn?46p~fk`O6 zJY9r6BS+I)F+u?jw7KQoD#wZ+Ytr`Rk1v+`)cG-o^zzU44c>tyPdojc zVg{}1Xs#JSxFb-OdoXby`L`*gEa6OCDnc~@Gtr_+^1iP4O&Dr^Z}4_03%xGdq>%&> zt6K=*Y@5D#)^iJ_7fmzJ*Y2$ie==i(nx!>|?YjM*2Me-c8=6|Hjj!|*htIi=EW!Yt zzv_KUFP<()YZWWvTYA18If$NrkaG500V$$x6pkyDZnp`l0zR!vnF#${QMF-l@9VG_ z!L}k+8n$z-!0F(pt6&BGlBsEP2}w}F8%B*bgS@r^4j<7!m+i1~^NsceW)iI`3wa8)vNOrw7ER zIqtTmC+)|LJHeN?Pp*&N#l;mqE?OMWf7p*Ga|h57dP=n2LGo4dOV=JcZQWm>qiRZb z2gxV9fGV?mo*U-n90@xe$8^*l5PobVE=CeF4 z*8Z93xq}zbw}q$@VLEw@l|W@zGZFZ?@JG{>LQB1eLSL}n{kP3pO$KZeUWLl*$XV~Q zIU2+vmxu46Z7))7={c|N^zOpuKN5P6d4KNjU$oeIp;+v&!Fu0 zj*Wb4w*{e(r=GNZePao}R~o_zY{#5rCL#Bqo~GtqID955w^)$LnUOP20d`i{IJ_IA zSHK?6G=zZbwrVwFp(x6lLDEJm8{HhD7GF=AXy^xRxFDJ#Z_r-+F%rAZ;SL{a`{=FI+K9vz64-#lKZ2;iEpjE&kfF+Zf)LLaMW*4(wW zCS+=2?RJ4MYVwbC zyO&oB->fv><*R8WEkFJ^i}Awg){>g(_V6r_ZXAUFzR?FWtDArdMH8Wwl~~&Ff~~S) zcc6Fyg9n2sv7Osvs3DDdh%``3oWzo3CHFM=dTi)%(;s0E4bNhyqt#s0%znrTjH2P( z0frI_Gn{qG8r#aX={DGg?DrpihcQ)lkdv z;#+06=7I_t9R!^k(s4Q?jT#t?KBLIVfQ4h_7253+mgOj@5r8lQr3gq22zgf?!*yJ& zV*INA9=`T76v(htU`~zD+z)7mGc0FQHmj}n{^DL(HuMMhCKV)L$qQxL8s3qqsqedz z>Q#%$|6+x|-G!p6qfGSG*eMR2m_DZ#{jvS)<1+OWpiNT4D-vs8#27YOxl+_^a3mkXD?aMPMT8rAvlO1C;k4@S` zeCT4y5ka+=6^p$dSJ2poW(^mep=$Jm9c*2e@MYp=PK{@4zysuEP5m@~*vd){+umQp zAvg|^z+2n&1Vw$BUV8uft>u*rN#$APWXVW9F!*O<`waR2oO!8H->D{`{YdP9F) zY+A<6V6m-~g#0yW@QK~;Mc8l>#3#j(@5Zsx{b^|-F>%XTKowzJ%iio}B~nG~!#(}l zm-XSe+)O3Y72J3DmhO58p{O5ErZg8%Y)=*jJ`BK%i)rM z+70n#_K+&lm?2pOXS!$Gh$BU64FCDE?lY>!W@;#RRF+|el7LGFn3Gw1h&GCq%)0ZI z8p?|UnV*0EVfUqFQA3BRtFYgyZN}5KTahLVjOz88nz~~wN3_ka1*^_UjfJ8qpO)L4w|c3LovsTr#*Yz6_5rI!!3&~-*)CYS@#2($8K~Y155!k6JC>hA@N-SJgh2SAGvmQQdfYA{%F%X;&2|sLB45-HwbU z8m@)m63YE!giSmLf)o7(v&?#1o#Dqgn>NiColoSJt!kBo`!DQMa)w;sn?N( z8ikZT!f9AGjldJ4sw(U%7cZ4~60@jWrZXVbURXv2BT2sV4yjE)R957|GbCKxiiymn z#az&QO>@EQbB@MmG_wSp%#Y``n>W5bAF~djm_H)FzMK3+Ej>WGuDkCh13f>UGcw*` z_CuYbWf^9vAca9#yz693mwNSOhdf%Ip5UY!XGURZb%M_=aTc9$9Eq=ljXBUyBLhp^ z=a6=VJSpjcK4eOa_E1D?tcp83Nxwm#TNn3t`<~I+^=`H4CpCu7e}D=mAV%{7|~9=py_Q;Qai?Wt#&?V#CI8aKR?5HPALZeI>RaJ!Ik{s zp;#(ti5-0UZF(5(&Ay2|qR65m98qmGbp~zg_BFSw<0U~FUo$S8 z+QHmK^g{Xfmf%EQD{B1YNL>C0GP^%w196}-Rc2H4z>AOj{xFqI{Q#j~3c{T~Wgj?s z%E9^pfe&`qbGwt*NvJ-!hhLdQ4_E9L9Hppz>1Dd+dr1Y3L3u57XxRMM98N?)NUa&w zY#)7BB6^h)55MavdffUBjz};t+H$=GhECO`6g6)4J>xE^-mlM7^fwJ_gQPIGReyg0@2$RsmUEH zV!r;BQ?$TPfm$Wthr%0|L0{jLpRr*I&rcfSeIlxdpyCp5<}N`zmEP*SJPd>W7pmm= zRiZ|VuhBWATh9_di!Q*UyUMNC?`neB&3a|n!XT~LB@ZuPYQLIp=5SP>5rB-1Yrx#= zk)L;K-Qu~>uWqeBhkHY;O&EF~bQQ&fpyspVYxwWrUOD zXr$D>c5!}uG0r2$Weh39VdX|N|JBrnC(?_?Bvox|#y`<|JtjKUdC1p_LfeFCyfE&q zsmgP67yLFHe+|T{#*%z>--l&V{A3CHOA|5EsJ%1l5f!;;;7~*xttpl)ScHk6i_OU+ zB%Sd^J4Z%WU_`KiwB=g_@9A5Lr>T-L{^3;EJUoD{Ks0sJgQeAn?KzQl6MReb!<%kb zgq9*vPzw_QS!On?oWoFc_lPV_k(ty7Pr`3HagixjRY zHn{&B5dSOOI%>0Q#en~v4z2}tBtw7OJ3<}a$$AB~kCDCq1JL}*J)S|x*PDF#|g zn=6w)>_>RIQ^8Ngn^L32w_n1|4)T9`!n>A0^F)?szDEVveJIJDfpgs~vZu8)qa7(nE zb~I={L48NeMF0qgl$QT8^*Mm9mS%=zvBLnPH$_{OCJ_4WFSIiAiz*b8mLv6Ef)@!x zFW-$sGuRz4L9Qn*fPxnj6c(qzO3lX}dMH7~jV8dJz=fgL+rh^X+Tf{tqkE&KrNqTZ z7tf`-p?VJL%`(q}x^Wm#c+ux;C#)XKnHzDypY2mudUUsX8$`%=<6TzP6I!)4BGcHHYBS{XS zrGY!sp@`UQ_lMZY{+80inEY|Pkxt%3Z->LXzB!2*iE4=F=zIhfzC?avBW2n_BFnrr zYMQc-|DiHQn^ijCP%nEv@#w{mA2FL8vr246Os|F}H!;kA7tue3MvqsC4hjvr57spc zak+@udPo-T{vu@Dy#rZGc#u!R86I0^V2QB$M9rMXM4!jH7c($z`+#md6IKcWP&GY) zlWS}%1TTLxZnS~>(07!+Em>C9SNz3q4oZ96RlL0fe*2rzh8^(b%RWxfW^R6TSEyR( z%vm!J_dSVdHMfIaQh|5rYzk#-LHroEv(^ce8Uoto;N_g=A z>X^n~BUU{xa`q1=vMiBv?q)5&OqJMR-=6Mk-HW9tT;If$7k`8IbgGdwg!%F06ATQI z#UR=5^7=(eqPtN%rV-hm!9FO95?OS<;2`nG6WXIPmVAx)Tom&dHl9x5fVM3e_($S3 z>&cq!AM%*jktHfSBA}9B+8q$K@8J;v?i1QTVIP*^y`HdxT*+&2yRb(CFexJbqxIo=+pG2`Gpt{bsi*CBc*Y-2^6*Nt^BE zC+<6}1)Y86);mX#i*!87@k5R>d1v3<3vjs*6crttDzw$t=j@gNR;}2%Bs7&a>Wm}b zk1D07vxT8}za$eC<&5&XP2sj*Eis}wDhm=i;GP-c<#wej(#X~J1k5bRRwF2YoYdDy})FVnfyi z(Dg`h7rKttSz4Jh4-79%Us6%_oS%I7ebDO>j$tcLqb3*@xom~p^2tAgkjRG+Sitm6 z&>2FSqfE-}9X@szllA54TDU?b?wHg2_t_e(f=?10gFbMZQjk+1MJ1eJR7&Wxnhf8_SZ%3loR8i`Giw?%yA2K?9k(&!;!ybpbPpV)=b{YdvEnTW%Cx~| ziGq(sHY|R%(h<&#r6t8J2`W^jB@sPh|G6U}Q~+OHJlul+OK~LCBJ@`j`ad$Fh7O*;#_Ii!Qj|Z|tX{eQXWi*tMMM@>$JL1+{mzC24E`BUH$wHt zje0;0=}}|!+kl4f=%K(v9U31u@ZzuWkJrwC=wAc&Z!eM@#KwGN0Kg;u^$-5P2>oxQ zF~T+2pBMnx&j0|Z0c3#R*Q00&kmuiw35XxSqo|8(mA^pF4m(ZAxzIs@X{ z97+G1U@UI@D)k8fph5ru;Qx!z-KZZRk8{HOI57;8D-b^bf+HV9TQvh__wwx1;p` ceu(541=3nSAik_u;4dd>jP42H;O}YwA1&Bu9smFU delta 40112 zcmZ6x1B@7=T=D(Q48Rp(Sp z1!z|>D1y8cC>RXT|AZ*4MKvA)2NHhG%J{z(&wveP6#Ab^Rsb@6)yg<))v7+Z9t7%t zx-sIb^h8-u%Ku}?S+z>^0mWidT~LsRLAZm1S%e>-ouyLsg9)@C;YHOEmV(`WQhqdM z28B&zA)^EOUqrP3;z0rZ&sE|iDBb_#zX5C(1pa@@?4A|#R4^bQ;lwQn(*I-4_%8?~ z+Wf?s$v4KQ{yFXLPdp|RPN9BXYw|$uW2;{jdNGZ&#InmiuvSWto_OuWrtA;)BOosAi2|UbBlvr>eeOoVToym*f{p=rf$Yq0URUl9K9@c zJ~6ix!ErF>jR(Ea{=z?}Ze@ zZ7g1Usr#7S+shc3jwV@gAyXs8>#m+Utrx6Zp-J|VZP?m641O{zI5ku)ZV#M0E&W8( zpN_W`(&#(rNc*vmusYosY8m-^&mm1Gitd(!BnQtw9g1wm%`qgfXqfvpvpNv%H`sur zJx?g=;WkMbCEOg{!})`^va(FAk;@OG899bH@|F`Sqv-VzKx^-9KT=@$;vTvhUk~4(2m9DMVd$c30Ygivx%sXL!%<)9teiTWTYI^gPz ztX)tGL2i>}8OnIqzW#l}pCtmbm=gW@(iQi8%9mCNM^idB=gv%Ye%F20_2Mz+vI8yrfD!(MMZ^Hv631laj_O zKPKTx^6aITM@tO$D|8eBHpd+~L(YynlqdL(wxYWd5xX!nh(j3(BTV~HmvSjyPls(a zf#bu3R9XaDt>JkWW@6J^91dU^HYZv3i^pIj>{gSyO^Z`*-XT}>%-w613mB367OYS@ zOU0mJ*){o?>OMf{%1kK{iQn_lH9esLpC*W~(=7O0&D~v_ru7{1_AvcDT$5I1^R;X1FgxKh7+Ay_vmT*y>-M-g`BTRh7a!y!{SwD;lFAf4T^AdA)>n6C+w45H#qcv3 zN0Kb9&pwu#yZ)n>@Y7MBe;BE9jYEwyVU>onz9UxT2mzZ}_M|{=FiVBeH$~UrL&Ch8 z(_6i% z(2KBJq=Te6M`d^7FIX>TewzMRbbtP*u4;IN&o+(h>-pXDH?iG9X`OwkR4u8rob|cO zqEsio{2?_W^X%=|B1fx9Ix~NLJ3KObmZCNJu~056E6CSVD{3RFi^eKFQ?69rudg1;S3Ht2-i00XQZ0JC zcy-gy_e@o9~d@EJ*YIR;pp_(`8R5m zUzSXn0m6|1@BZ~J)}BWI%i4x(%S`kN(P^PQOb>ON=Js<5U#?{6orNynN7cQ8VFy>Q zD@_v(e?9_4(xx(%blzI!9u1RK^3TItdCs)`QE>sXu-_#$G!lj&&N-(0m&7XC7d zkyykNp0rS_sC&pm zWnHsT*DPL)@CsU(y4kY3or)sq*pucz#nHY@N_0150gfx92yHe@%m)br&Yti+Ah@(I z4jmSY%7GQR8e;W;h_y`%lqC~Y(mi=nbPhR4o$K)9zkc>B5rs^NF?ie(8n{cy%zq9z zI!C{qlF9#CiS=YdtW3K`wqp4^>3*1%UJNu6F87B+Cb5x@Vl77VAz`wFsRE zh>Dz5XO_tPdVTU94hh28sEH;NrR!`*plF{e)XfQJn|4?Mu#CRJ&@_FepVcWaZIxxA~U?( zsqd|@gR-dreWZS?d%o2{nl!X_4RvbL9D#liL{|-~Tp`b;A)hynog2~G%kPOn z4@ODDM@E^cJ4trWVtmxXRZ(DaUBZ9d~^Iumg9XsKQ{rgeper1Rc$3?M_?E z7gs~@{8AYg+#RyRf&bo(JHHA~RU=~)nY@QWR%Z<+Q|@%U@s19(X-P~W3Nf^k_P@Uc zU|3H|H&QX|uo&iR1LBX=E=Tyf+RRS_!g9 zy2c;S;y#n9=s@4*k=iPXO0rl*)R}gR?+%|*jO%NXVK>}$wiEukdWsImkKHioX{?4(hnlCj3)vgE+NuqYR? zqiVD1rf6(Bq-s4_P|ds9zs(_Qs+u4UBdKKWkfUhHyY7%3wFYaZxUA~fh|NdDiV;So zTN&3TMBR$FAqs?qWJ4cMZO82*3;0sTUPxguJUOm`k~%2w-69W6d@BH;8uZ{G!6~6+ z3qdq4I1~dcxYa~=A}0C3L5-#>R65S6w&hOT7Ws!jLrgEDLcuA7Akc$YBI{qrU#|17 z^ohTk4d@mOP(?I=oLz0bDFR!Wgzib3{w!MJx<96-fslz60=nwOD~GwYy4g!p=#O4j zRyt_wSA{jBCECfu`LzIU*{lj<)T*rPjZYi(@U8DkY5R0?X0=->UGM4X4;zn)hL2Bz zMgp^6kjUuSHcbPBIf{P_Rn$u^#=hhzuxB6YK(Q-LBkH~6tSN!RMT|6M;4VPwM5jRE z7mTyU!AbZuH9^ip;&>!MKrl!|>dM?_ep|&W?YQU)m4uk-AU_AH)fslq~tP_l! z;e|X~8VHVJy?UgobuN<18gD9P=3^4sW;rvZiDdUWJ>8*$(E>t5=;fz3pkBJsM2}@w z{~+qn`#l4ZNct#AMG!_sX_UeN#W*ZrWrrccRs@oi1`*VHN%aRH!mjFab$krmJ*Wyi z*UjBt?CVXh1AD;bHYSFAYPefJFJp^8o?l3|`K@DJChk~Ywmu@W^@S(qppS-Jm|fWi zgPqzEX7py}j)|ex!YFWO1U}FX$`jE~%g&z`?o+58qSeA|Cb5%ich1%CXlgMeYyNf= zVuBHUqqA$EJRcmftLt8lRXHuH84*L&A>rUpte7|rzj}ZUnAnva1Xw`KDk2>M6^l1# z(Fz{gOvET5e^He9#ha%%V<%JuwyOhhFKoDgCI(2$RRsyqx`LSOT?WG65^^1)K3Xor z6hxlH1$PpLlLJ}e08uAFWK>ram=T&mzHE5`dmIL#J(bv5P9k&sk^X)bNw_eoIHAznUviluGr$kfX@`;F)luiUskmEztjy@tD_n{&)eV?A zduTn3jV@ea*LhS>Tsu>BxXE-;N-BS%IAtO^`d1cq2=O{A5W?dX)`0O=noyG0pa2J~ zS)Bvl3}sL{C!AXeBNmeBB?AmJjy^+1f8_pRpASG)?k~qXZ}=m#4~pl+y98F!_b3bl zqTd0KD9~`smOvCzJy_xrPmg}YNgu907f`655HB0BaC;X>i~$yeG~*t&y!sf|o-x$50p#LuYjZ!knc4Eh6;HF4I%7-rrd9$EI-#*tq+uCBFJjk10EPK1q<`S zk%om{&&Y6u7YX$T8|&DSm0`fhMn8;ymKckN8jTW80|^d%4G-(=frr6xD$js%=3D>G z4?Mu&pbxodpHr&VmwOPl+K0{vU+3Rur&DE7k~;Jskje zmC>%EJaXOh<*=%P{dTwZnQmI|la*rE@2{skBRjd`T1L1h^WX16MrxJvhiqTofKA7Y z#&nM~oBFBUUnp2}X;WP7_nGRPKR?eZT-79j5(uNK*^Z@my^X!!d*@&0EF9Z2x~;U| zw>FyCa{NE$H&462IpC`uden`Mrx6#1v@zt7z}{pbF%1S z)+Pnh(p61jc-H1GQf@QHe^+X`h)+P@>JurF(gfk|D(*EDzfBaMvAKdSxOnVfR5M%a zjh-@e{@4u73=v3I;4JIsl6LuXSF`n|^kIJVB(XP%$M!i`$Aq!e2g$Olhd^Qv4WfUc zeHuMIyA-+4mqezvYs4G`NCV`?qSzX{Dz18>nG1FR)`3qJ32GNORI*+18AR+|_{$!j7Y2%os-c^7L^Hz>O!u?mAhL*h$* zYo$~=%B^gLD;xRCs`d83JY{x$AH zW<-^;eMmE2CwsoHI*@FCf52nV=`l)@-r%jZO1`owz`gV`{Lo1Is$q(qvV}gD_=;|% zCzqjJ_#)>+Z|nT)K>)C$i)+0<3Alx?cLZo0{JB2J*EWQUA>qv@tN@ATAQt3sj*Z>W zCC_;bDZZMZL4@Hz!wr47{sRAK`5`q3DD!vV3XPSB1lrPHI<7w>j;iz(2FJ=~3`4M$ z4$)grewMtP)*Q~w7Xwm|H&WECMsT<~j~u>ji*8!<)MIgZy#bW9;`$HUZs_Y?QHU-o z#lc7Oy|O5>ptcKjbo0KOV2l?AASgo}71hgK=#TJvLnTogHKLNTx&8Alc_<(Iq(qd~ zkUgL(;R7Yu1dXF2KRxNr9IlK}l0xGc{(`)7hel!_jfm zJmN|4_iFvS@1!qT&0)DTU3es>mgKgX-6RhFuOBN=feM|_)?Uk2PLIU;>*TPT+&G>k zpq~+X>6Y_>J5)l#|Lcr7??@=wl;^Ajqt6U7XCPPJLkE~IH1npiI9oMfcbK;&*M0E{ zXTMe@o|8uSbu_!>D{^^FDCu}+(cAH3x6|7K(NkGMzVZ}z4%DS7704own+gp~ls13# zCUF_G62CEBSN?qL@q~R_lt2{YGpLTp8#)v5mM1h5m9Na9Ora(ghEJ;t#xbIf4=#z4sAnK zHMm%d;GlpSgbUbeGS|3}Om+BoAq5^%BQ4Mlp!8iean3OS%d=~t&1kB-^!}7MR)=N8 zwfLH`RnKGvC|Eyj51N=WBgrv5bK6|FZ6tZxmID$@GxGav;a&2E*b=N0T+`FQIB&Gez^XQ~Q z>81vmAd)ayO1)yF#OGCc5{{?1wno>7VXX+zY;%3aRa}&;0YM1HQ5v=`axV$#O-&`( zv;geaeJMeU(DS_qX4o8BfkSqiZpByHJaXjYeseZ$fr9gLl-r>rxp}> zqYTlqv~rchA+c#N^kCn!Nz_;Pr0=lf4_EUQlHNy2Ep1TNCnl^5Kn_kk%Fl`s>$r8N z8=1B4|K`AAHThIij3bnfH>M#Enp-~#KoONn=#+M*S9^5oujW^b4K+Hk~419wVU z*=)_^?m@|<|!T~Rvj@EiJ z!((jTo9A`KTQznPi;Rj#dDPHoYq5TgkFZ`40c&DGCl+^*ll`Ub?LZl!3_N!05GdqE ztyTo)7^3yy0o*#e(Pp%MAqHsM2l(7qMg8z00@7aW7UhW_vY9=_kN*Zkkg>Y6)$jzg z23`7f^*=#=PCG?1=$B9#IsyHZ<@>F>QgGb=?d*1BA$Vwb3(dJ@&;9Uk|BQnA_IN@^ z6X?m{by`MMrZyM<`i-+8MV2JK8!ZISq#lL~9|Z;M>s`^*O0ljVmOUx9o_cjs$~|A2 zhr6EJZ7?fskiz>uL&>r*Hv4qc=95#DLWyVU`DVFMO@u3iOdAP|0KSqdEK!|q?^Osx z=1}|XS5_T4E-G_x_Fb9p!fU$7q|WG949+4J-}%_d?auYcyxmCCeD13}kp5*{_n@18 zpgMmlaDf-`pzE@1p*vYoER!xPDI_55>eZZLTroOiO} zAmo?{B^%zEZfE3i2lvl z*NUhUDRQo&5s^ZAwAni^y|rjI1`YdP1hG2oUVdgr53Tpx;X>xqlX=@3ekBoP3xxWP z^ly+&9ib=*^tN3~yB(3?fsABkKLed=N1dD$9-)$QaSCoP>&AgW797&Cds`bKJv4fM zHm_;{&D(7&p@lLkrY?+^F(NgBF4VWbbxZE%%X1|wE>C6`KmR}EHL-w*>;EB-5)X-{ z5dKFVX`#}C28sazJni$yY$Ps=YE8U4Y|~E zP)93HEgYW+cyCw|f*ArZ0m$PEKX~g&9L=%}qX@1fIz`xlZ7&6=(79G!g=IDO)6RJI zW?U$5gVUqrr};O0e;fID@JfJEY!IRy$s__T#aXSX(3*6)ra-(+s?Vx&JQtGiS=*1V zwOIe-$T!|c4bW+xwhu!QImlUWgl7`1GUCZH)Be~yNkBXnnMw&C)E`MjD8&K{KSW;| zf5aDtpdRskGg#Kp^!`1MZ@89}#!UXAAET#D(eQpbkEvY13=7pSUOr&ye88O(LXZP= z|8Z(tXuUx}z5#zvr}y@@fcGDDR^Z1sm^9Xye=zF>N?UYJP9DFDjPk0>L?5i=DmXqN znV;tzqVTTgzli3h!(sG`0mRiw|I45rtJM7y$rbP{j#_DJsmZ{WytL&Uibi z4=c$IriyPId~D@UvWQbTsnyiTFwgHZnZ=Lxb)RZSjhtP80i&9CAew0}$|mhMk@p-` zZB5Pc6$Ei%Pz{oh9+F{?LlTKR9jJCTSnaNXnPdwR4gP8o0Z9^>CxL`jcric+jG8 z+w=t69o<3+S^-WK;|BC#HE>nrajXT_*y{BvUpaNOFk!0+$hPms$2`Dy*!dWs%^z;X z+g;b6S)k8n!&afCkIA3{?$+H3w;SiC5|+&W5k3*EMdGrwD>nPpL0a5!y~aEreRv_u zwOJ|qKdKzMu#zhv&Mhg3)AryBrFlvR zZ>UK%kXIr^8|t8se{gn$`lPAg1uAm6v?{A-uY=T5g&@vaH=+~viF75IO)LYnBR6|$ z7D=DWpaIy${3~{r;TRYPfW*3nXxbJ$^#&|r< zwOK;T^xvH_hpQn<&^{CFK(sfRCN5_teM{d=#Jz0|cOp&bA})HJB7~uY7j}j3KmM?# zj)Kih(94FeUokK0eWt*=&Vh;;+7z|Qkq|pF5p#ETF4#K$Zn$VHp#70+ToV`-(2u+U zKY&iOz*KXnxO=wb!sPc#%Law~JXqrsL$#4=yzs!w2w3(&6(B1F1(~Gdn^9A54EztV zH@-&c-EC00F0`=$Y3rkH(y3QPv87|7zkSNN3S%+lVqX6##MKUwjC{5au3-u?4F0s5TgiC6_=59{@1rN{tlchB=c)bxpjC))TGEA#~!&ZWa z=9ZKA-3I4UM{(4uh<}f5&vmvniJtWvdmkU+9v*39e6Mt%*U?&)jYYz6?6yKLvDAaj z6gz+!cw3D_cZ`-ukW&0Gm&-&50V%4-dzX2RnWvh#*5=^&0-qNsvR&$Ulz`x}j*@&Q zoXL`1`@M~t-T-|m$awb}NlO-D0XCInDBKnEe|}W?l4ubZOT_!26n%$|aQiRFY4pT< z-tAoaua|!^yFqc1bWo}2v>{*ozcCRbXUgb-JDOx^QHCOJxD|gDTI7AOz@7ABReq7y z>HG3eTh_QqH!SgzDnVZ=F9E$s%c7ftku5gqs$o|W>DXH*fFaXhOW7$L;*P~M6 ziz;}cwwj&Ba)T}*{vf-M(OMa`^Q8C)Za>-m%lkXmV88Eu*_6ddyo+xb^}pRd|Y zn52jbb`y*R7QB&|pm^+93@Bv{=n6K&ri?%2H|r!$j!6$=YUTkkrIQ%v-Yla+tL`B! zp~3iGNfT{J%62m}3;>@=VFG;Cj$k_rv#z9h!R8X_gYx^JTSemcUekaJGUEs<%fmz{ z9G6TEV*B`a&~$;a!;sh~GqRje+KDF{1PQI)x3AMxB5D^Ry>_ZfUbq~M!oNIq)I z{6tDoZnl?!1mhFPb#@B5$Lv_iKNKw4z1v{hRml@M7QhE1)w#?Rf$ zCc_n0hN97Y)J{@GS@IPr_Cg>I$GZ)=jAAW&xGQ|Cn4doYn?L%>iat+cw6jGkt^DTz3v9f+Te< zb3nD^mZ{d``bfksH4W@))+L-umLO>~!m8yv^i@Uv!Dc-EK%X@OW?#r?{2PL-c+##w zGHMx=akfGdB?2l!!_BPs1tl#8)LiaEtgQEp79+sMvJacY=#+S>D?^qz?%vj_d5IOp z;bIe*hk)U9$jkAHDk_;Mn~cKRkS9l0mwSipw?buPwUQiiIxojIQ=pWET98a_nQ89} zxYEFm4377@rQuB4YT#6jo23;cwlRbXRx?9RbIk%5Y@fsL!ta_95;X}>*w%>cVJY@e zA0ndRt1gSF$E$-Wq_(cbVUg{^UTtTKy*zjHuW7oC4o3p%Db+jY;{Vh&#QgEI?1o)!>qAQ2!`glAPOXNX-!8V1cq zrEw}Yeg`hF(>z+xv19vQs~y>01<~Q*)V=cyAR;5##bV@H`I+vCo?BwBNos2Swd!Jd ze*omsYD%Oo>h+wNZ?|mDuI9IpOsPRd-Y}Jx`u_%|$0Cr+>Dbn}WH|N$(|49hG9UzR zRM5vIWJ`pUo-QNyRjoY-rD>8M@hpuxNYN-nencYw(Cy=-_2hD=x)ODZBa5ky#0GSHZ`ckSf4VrmosfP~#XIb|Amh<BP_ zW>;>Sc2sF%3_DwvwskZD)gBj(2e9QuZ&zw5aJ303Iq#5c7%p!Ywz(?lHji|_)PG9W zG&-za@;Tkhj2zZ!Dv|TQ9kcCNdjF1)ziujtBZDm}yhk3AJgNN#w(wWi7??ay=iT z+>@DSD$EBw4kp?K5;BQ$LdDHKalw9^f=~o-+@52he&q^h5FAiK25Y zSgK?e{9d=VPnA`{y(`=>;D!5#q<*O-B%BO z79FqUf#~f)ck{LZ0PvJ(BC712^t2hlh0KBk6W`zh{3^)E1z&H4D~N#aHC~T85b7V? zQ-V&^`_VW3Z-L931nXG~J97d$?lWPdiuxhB#OuIF=Bss!dfi;UdG)f=arDza#g`-o z>y>uYx)?37%xNVX{Hxo1*aJjcBu@BqL=GARdyRU=FW-55cnnYjEw zNZHa&zjmCNex7FJxvjS$55@Y2OO~D=o#BGodB@$BZ31d@&76pQT~Hvvb&^a?_9qa* zCNT352k8?F?*sS+H8+GrwJ;kMPxzhyQk!Xj*AB65_IgVQ8)>K;sAtFexi?Os3An{V z3i%e&Gi7KVz-DC`nbmjf9!s&u4XI$eAg&Q8gLM8d>>9updf(y23JUb|Egb?DdW<7MUA*eCL4gzJCn-tLq1tQ_fvnWeReb5~K z9#&P7Q-PHG)eeLOM7QOL=HsnjNu~(j5SW;17l@V*0J!_I^hrm)DT(TiR>N>!UIfVW z=<^>xC4GaAz0?!5M>YkKn+M2I$`d|sLk*+j!1V`Mn*x}AAuHNR$Yy=H!Lm3Lv(Vjy z9=$BYR{eT4uem4(Wo86eNIQz*Jx4Ib#7_lh4)*zF~A^>Zs%bo#gkyw z9uZf$dQZS3xMUM&tAzzy3-`?DF?Q@>6FW?t1_{h2T4aP5vcN;WW}g7rx$fDz_psk? zzbnPYBYka?-?^>W^{+|+)WuVLaKEvAu~rf`fUy0PRk?-8w{iR&EW(DqS zI)f$QDlAOZuy)8?z4T7P7yDDwjUH3sOvC`DfMRicl)Fme@#s{NLJ~dq#B>mJh95|Q zZVB}ygYTf1$0I{Kk76PzcWK0{GVwrtH^OK(&!s(cEKKp&H=x^Fm-?bMz(~$I%TzWU z@Vx2aK2!I)?5LnJwDHNYPcMQOZbZmutH%_t(qKM@I3Il!=v|dV3M^H{f9#M~ilj3< zMy|*&(c_avXJ=FJ9NdO;$2<*#AMq5hXa@b7)+scgl~N;zC9ZTqOZKMm@X#2oM5t$$ z{2N>}p#5=^Xks)mlel?q$iHSMW;eG5Q2%`Lq(@>R+XQQh2~*wku@21cgOaQk4_onI zbXXd4%US_L7ya$XR~s4|96`ssfJOX5E@t$)Hk#Q6U3z>`V4vI7LQwR8HM4#fe4B<* z0;AicsPnDw^1p_N(^ooGSot!~><3Ej20xihYpN5(nwbd^bREAA7+M=e<|W1eoZ{7w z#SToy*dvbqx|?S5DW3h;$E{X4Kw>kTz1rEKI%|A&**?`@X4(tx zvS!wsP0y@y2#%k%%Cw&{Ap6$9D|dpVYEn4RtZrwNbZ8k&KuV2*J_t-mjL2DS{~ zwFRhNh9w{BnmWvGj(nedtmKLVB5|MI8n}!gtnog->5R~kk`; zbhVg1PcT^$Is}iCs+q$&3_;oPHyrB4KFFzL~eylgP0HPe&(iN zW`UHr+enO2Zn5U5iMvJt3p#eNZ4RNGKsVgC=DE|IAH~c=ZHYPpHT#ZjV-C?hcThK)67eS(5j%l0HHijE6CEs1R*|KB=Y1!7a`e4f5~nG2 zRqmm5Im#V0zMf~)+eLVc;t@^4YBh@ot845I94Qp{*9ih#dEj0r7>2Pc_>4fJfPZ;X ztKflnhuT0GzSq8hCz#GBZQMQj7OiujC0IEfW!Nl9$2;MR8FR$dFQRAbGw}a7Lg1m# zF=(G7k&DZ@5@R1LGtO81j7Ft5G{spHiZrEHu_!9tITwQ%Kf7M1SF8_5(!wd3%R15r zC@Zci&Y=zITltUcbL2q@7W9Q#;2toh5c^N4snnA+;V&4#9E>O1JT{hYRhct%s{YP9 z(wsn(?kAmG0EY+x>|pclBdPss%sX5bUU>I)b1hN{9b0ZS|9Dt(wqX)&Gk7~Yb*lNm zZ>GrC9&f!ZRTI8klNRb|q)>s(*cz%8olhthuvsy8+k!ncuN|LABk#GyE7SBG552g(SQ0e34nz;>N_+%wmomB;bW`>TnB@0ZZDw8Gu z9(H2tr9Cdqt?FnFh6gEQ7Q444OS3M>*ZUUBDJdboBGr-FfO4y zt+l%^tukJusmkFdnOOKmV%WGxl5IN+ZnM*pxDg@#+H@U+D?x^{bP(HIG;Ee8 z<=Q`3H`?YE(uGt{S8~d-CzZy5P%0N!*|6Oi^y41l$^dWRO0i%?yanLhd6s8MwH>e; zkt~joAIR1EWB~N7HgOl}9%d$$nlRB*yCV-^#w;B!j2dx8+I^$DbTfFK|MYrK!bh~qN*eyA@Bsj!^NrH6@UI7b!W zO781QXCPq}y>nM6WlS4?H(a{+wf|QWv9~Y$uPP~)7wtGLCEa8#O6(`F)VM}>g|f>_ z7-t6acYN2c{I_g?wrV@X zrPt{6Jx1o&=&?*Ti;zR*HlgS)*QWGo7a};_irc7Zj&?gnxXuC8bUeNJB?m9Z8xYFv zdH54>JejUf%G94+t~%*Oy&BSZO2j!ho!n}H>m%FQ4&4-JS_xyw@%sps-;^@9%T+;hG_U+$v}1xH*gDJhj6>vy2_Y&Vf?j6iZvrf6c^AXuch9 zL3p1YRGhSC2Fz9P60?AIeJ2MUDAPi-0#vO(Q4_ zSmC!P6OJLY7SR?YRRl_=t;Z@qEobetSzEOAnVny6;J!QJj_E95B)c8Ji8_lVVa6hN z=5i)nM=^dgm+=OcuxeZ5sYiNl&PCD|z@5dun+sg)S8&u;8Fp~#Aw@so$NQr2(F6WH zg-WlMn&1?oIvuE~;db`j>JJ~$fJO6|<4<~4VB1ynbHp9h`jime!i{7MN!u-7 z2>Tb0vFNinWl9R_Bk`HP?yLHVJgTozU|5+;OenmndVDjx^K7 z3P7rhLOg9wpidHCfV>qIk7TAWxS&r?f8ar;kaRgnqiY$C2%_dY=VTqJkCRJe8qf-G zYnt9+n%B)b(HD1j^v|K3w^PN^TGK8neLr86Hplr6g486>T|s#p0q2tM4`k6M)vbwtLxt;-ZbvGue^ERk{Wn{2$UhH3Q@dR| zStCxd&Zioe^}ICZ7}jXbD1&nQFWsG<>`%s{*&G>#FeJqc`k*xA4MvTHZ{LsvfIJ0p z*Q?<=w;}W)e0kCaLsFPrmll-{_mlUYOOg(Nxm{~!ff(2E%L4PV&Kpxu_?n3m#OZ2i zrxAK}@ir5(@-t_$f-|wm9uInKW4Gpw=*72m%h8GDyOPuBiNre~yJcivOf#M;6&yRs zDki1c%G9M*5{JW0TOfuQZPK-$V46fEyL_~w=(Km_iyjcujgSB)q}hegytV%Cz?ThR zmH#G~yF$M~+}-@jc4IEpt-ie{vAxp~r#crXaPzVP%i8F@`z-N*7R?^kY;0jxXluRx zwp8IhBMrGe`fcrQq&R<9nWPDWGhQ9n{04LHnTT!LAfMyD%xxs8F^V2YK&^A#ueUrOxBb`5 z{lU<2g`kvqR&BNyjyfthUL)y@K?SQ4kk@7bxm@phbV(ary zH%lwGIQ-yA(-N!-R0)2Q6|54}DmuIl)+$;MPbRP)(Vb;mr1yOOihbkLsGMfwodnw! z^Ax#+aD2Z+O+&uPR(X*X{L&axBA;tr>}3WhkUM3(`S6>!V4(mkrnH&z=Y9!em_K8u z;BHh=TP+prSFx&@T-M!Xoa778M2I3#f3N_dzDdR54iN)t%y$w6s0Aw`CiBPBxd^mO!!(*uLcP@>VUSAPyXd&23 zQ$C+t;9%G}u4eb_zmmVsS>T@9IhtkBktR4O+;}X)3?~4OciVIzfpP)bNo!cM#qAt_ z(`Czgwq#1+Bg*?L1EC~up1BS*$e5+<*Fa^lb?@|XT+@l*y|%-H2jL>r- zn15E9z&An95UocsLcIAz{|;a0Es2uTeAkw}DVt@r1ds60>2a_7WKWFBi#HH1teMQl z4wj4Xj`{<%`BCU8uKTeg_vJ17FM?`H88P47#y=+u=gUZf*K=)U6&2`o^JDcGkSWLY z)V@tdH66E8r78gE|7vPFmJ+aBzhgDbBpYV#(llpa*ZK`-5vDci@bIgAoj|P{%+wG3 z^WkCHo9w0Auxg5gi6MBtc%7`)M31$Xa`tUE-$b7dkQ&^rFUSA#-~{PM(xbT^_`{|a&U^Zv_?;g~y+T244i zuugX0V4G=Dy@X($u@SUhBObN|wT8Ff6sQn%4ler-SV(xq@gmc>=D)BmA6yD8jRVnx}QK-d^=JbEc^SpMzX}^0|-3;@TZcmdQ8J zMp13lrp2jCp~K{X3%M;mCzWWY?g}rmEHG}Nq|t-bd|!nJVpRqnW^Rr(42BOP*ajhN z2gbU4@u%C)QB*ab(B>1kbCAI}%un1nTA-Y-(QyN0=YyMuV*gCaDL(bs^82mv8rn$R z7@uq2j!i!&bgC7>`TF~ttk2i&yFh_S=>6?&g?VOX1RG2P(<1~FL zhDe96cyyR$#U~<37MrZ?yO5F?z*{<1!+(A?w&hY+9nINDEtjD+y24|p<0N{d9COi- zG*$&LH>9-^rf8w->05&O)o0Z}#3)8ggvE!1FxLxHp2motS8J-0AJlUkmDm)`G#*af zHSY+#!4LW2rp@eyn+q^tG2vXc+;X(^ho2e0=5i)i>xNhc$qsJ}wqvD}Xs(xP6n7Sk zwo;~z`%_TeV&{>6SzCTvR*<{|LiTYZK3)x|jA{FNdBQ5aN!aS&cpZu2+$_zGf=m`KALA zxOPhUJTmKIR}O!BbENP7X&kP@4`hTAYBKOOg(Jo15J9Gmb^vrUnto5Wc2unq$vyzE zBfjv}?}-xad8+3jGj|nK+d&@nKHNwi2%fhF-=eVYC~)XD-F%M~PwtD46|1I?C0;bs z0*4GbODk_pL~Wi_dB%vTZR2`(DKVf&An$9OYb1W|rVtS@B^`wbf*;}z8RIst@GxjyV0yv!APLzmuNIz@J`C6y_R+w~RN z6S|5T;irM%fW0&n@34^VhAiuK$%#vY_m#1|Zc*lib&y}3Aia}$?LvQ09c`_BtAvIT z>c7@+ZwWneqrC$SiZ99Ys|SGFo=3B*VaNb6UnK5fR^Ni3pz#GeDT@d(qY%?|fwF!> z;SqKdB44Tfw*g|VHRMgN*} z>gDbT7VFzDNo}|3G0jZ)I*YoTqYm$zqPw}qpoBUT-K#M*d57hANaFxs#}P`#<8sz( zh0sB#nD+>Whh25?msr0LzsNpDW(MhY@3i3L)&pV5)jLp>NigwXPB%ya zrc+baaDTU86CcSeE?h*_29VylHnFr;R%{V3g&`NHxiB2SQ2O>@&#qcaKBk>#&o)?U zJ9KF{=v1ynl8y^h01ywDd(=J{aHlaU(~Th29Lx;?<`FGN!~Ms)0Em~GpB&szDjdcg z1Wa$Gbs$GuvSm3UWw>E#b!s@A*eA~>0N9Ll+g!LNjc^{%R4qqu3k)Dct z6Dsp<%VV`V(t!VkCc~HDP-X<`)g(5hERNUwjs%r!JhQmnv-?ut#Or64`FZ+CW^8OHokPGiv_Z>bygb!4aX0l^x|TUQOV|ituoJF|2)?o*5fGyZL>% z|HQ?1i^{jc(WQxPqMXYs!%& zVrp#k`*vyaDk0N*v|u96K#x^oL7E_v>S_NDhYwnHeA7J5N|5zxi5!*&;u}sXK}Cqi z-ZDCcG}&=5k6swTXwmET{8FnyrS)ux=tq){sqD@1)Xu>~7}rz?pu15n$HEa><}vd} z_q*-s6DCBW>HIslzZzWP@xlbz_?gt5jKx3>*w1S(+xMN)BghSeHe7J9kyg7kdp~=a z(B0X$PdQI9;ojEJIpLVP8$lZgc}q-9!uaOnn~pgCU6TIzL|X@#K3%@A%DUH4{T$9i zi2U9+k&e7^ot<3*pk)Y5+*{J^9u~QUuW(q<4>9I6Oiz3iK2hyYf^6zcBC2c$jT>&3 z#Y)v!^<*-s<=gPpZ_uQNvwQQgIPReoNZWH5zX8t?Sd8z+3uq;) zLLgg*FcVz>G)+R$>Gk_wG<5s)?P2uuyTdGAn~u)Vy7jrlaySq~(QD)1;~{G<5?$1d zjw>*djvfapW|+pKJ;M2w=oxxiQC?bn!KKwEFl&9`xu53|Tx3*8Ad*aeEoO8_om|j# zGFGqxuxy!3pCvvctNRbRGjc6=>R0$_*6v=2$%ym;9eX{3o6@_~uQI5i%~4#j&%GVr zlQZrlh}PiS>q7@4=~orpxf)x1(`T;5t*)02EzS_*Z|dEcENmQzPTO|kY26Mrob-g> z*ut>-ph;jKrxUtj;G-kSq&DIhJ7a?0=TLSJ!dfU{g{_s6eC5xmg~~(a#{zqa$duv! z^C?jPIIBC;PH8;*Pu8^)`+fgdc0#Fc`SW|tlS(f5|b zkAhaAxJ9%%!NyC_HfGDEm}Xe-?_vGWSiPaIC*97a8Ko4WF@koS>0cCgs2{DQ&PpSU z!$I*89Vryv#R8M(WR#76i+v1w z4H3T&c4}S15bQ{(cX8Hp+YvQ@Qd8U>&re*^!{+ZUvbh)u1^=aE4#((UPq$V=qCDDB z^M7&Weex3*M<2OI5i+x)99cpB!lxbA!M|w{URD7uWw<`{Pf=jDiW9zcPf6YH(6q=@ z2pf&YZ0A@$&Gr57rqQ5ywBF#k8X*nL>kk@D48BKcG-)Uq9{naPpjwE4MNzcngBfN@ zQ~pGjx`U)lQA-QwaREeYm<0cZ-mZobxIer5-0G8&gQ)BG%z86jmkQeAAe%6c5kcC| zIcn^`+V>hL_3J4VZ;v(mk2YkR5r4EB@^J96%a8OEHp_{*YX9_pG-EBFWUNRpf^N)hNPQiK%Vbo<@4>to>|@pw8O7!eVZ!b+Akv z^x5H|D8(c(Czi=_TobIRQ=0ct7L`#-z;&ErMxtv}Bq%g#g~WUeHpwt{xQi08Cga%w za&0D%4Mzt8l{Q8nv~o4Hd~B~8oMnmC(}Mu3(bF8-Z>NKY(AaH&;5YD(^1HYjl5q+{KjYC7<&rEv!Kyt4474;E8{n*)JkoHq~7My5Tm`P znv|JCWffZ)t4}z9Z&qfU*e(#OMF26!;2-40Dt+dcNX#3L;p9*7(WR0{Ww}fBZGR{7 z!hy|g>9UcbmpALM$o?ZsaQqma=TK3oe5wzq`9A~n520i?Gq@e;d z?oO1Bt`*F`5_DZ#Y8_Q+*Et^KyQ6Gt^?~(~^rf7V4>s*pa!GUphmuxIuq{VKm$<_T zNYfKASg-icTD9hv+|mWO8f`OCyH83`tA>hN-iPskWkpzujADc^deVG^Y{mkNJC6JX z^}$XgMWIp_h$>&MwZB<9X>DAIr0`;goT^%u0-0r9_2Ot!ZA+5G;!geq|6+Tuc1Uw^ z_p>6w=Q1Nmt(%LRc3D4r%!;O5nJ23{e2JBCT-q~7s`FWgN8CMB{1D!gem^wQL@)oX zpfuuKk-W75KRpPwQ67^Sb%&BG<((;?Iw?FqSg`64b7D32kKBv(@HrzNMc{o3Vs}VS(=(P+h@@WKmuRNK z0=TfC=W^9}Mr&B+STbTKaq()yTgYUu zou_vxNvxoDv*5(#_ zs*Qqid9(&?Xp)cf({N@Kd<}y)Pus_A^yqa4B0SP2RuK=u#hHLayd-L+9AC_`^w;QZ z?nVyyVS+qh?e4$HD4(}fhw$t{TI7MPpz{kpr<={#V!4=cDyN#9+3N4s2?EwhJmp<- zmG&jYwCPkSyYpLmid@%CAxmIIqwzpeGrDnHM$ua76mcwxJ6eJ+G!pPI1!sO5X2!Gha znBrdy#3dYZ_%@h}>x`v6-)bvjw$dOIJ1?jJ-JtPv0zFG0g6_74&3qTkA1cG8d7Wnl zp+Mh-O;$7NT!Wzq|3eU=6Y%^6!q+)~O*Vv1S9H)V8>l_%sP6=^N39v`rAQOTm1wvq>=2?RR^zJLJ z_2khblK}|#*4x4rMo8r0L@c_wxto5~l_NDlHwH!co`jvFl`!K1>dc_=f$en*6Ktub zi%)=miPG4sxE{-bZUCRG^B>IezaX-v@oGmaa&7 zWt3`}6+q6}5&$WXI7YqjqVzAlDM(h&T7#h0iFoGV>Had9RJhYg3{gMlj7YRHl$`aZ z6jz$!S>CLwz9H8)g z#0NrwAN9`!CPsr(Y|9*yXt0Hk{*>4_=}JjN*^)*_e{etQP84ojF7z4OUg5S4gOSfH z;&N!#t7z}A%xMzA*ORS-v^X?lDWWf%wY#DBDx)uxcOaneW#jVD`MxZ}(5wQp?Q;6q zc2(_v^Z@{SE2=k}s8o!8#K^m^z! zNl(vVYB{-fI;L*>^+9TD_Ra?zC$U-V;=9Nnj{%oe@y<}8`8Qo*+Um1?env2Vp-ZV1 zjgliSx|;^wWgdlyu@cP-#)K@Uq-iR=EylyLv8aT?R`4VU$SNcy ziURC!%Y4|pTnT19QrPz{rK__e!!yg;G&S>rC;kCKZd5hUozoR_5IvI@22i*lur!1fz&EtcaqkIB#4=gNZqS&Q;SposJw=yGxIxjj3qisgSau> zkrhksVfDU>GQtWQ-qWnkVV6-zQ%wpUG7I zsBG+e8wTZ(qKjXpFD|qWQPDO9(}%HYNn`V2rg##DEm$^1EqREA(HI%8!t*{hA7Z4; zBxA&pR)gra<#;Hog7@){xf(|n-VZtj&rT~awbKfZlUHP7SRm|7a>`bKy5HneW&ncT zQ1tl_R>E^qe%JXg=sb@v;HSK^jy&3R=OR*4V-A{r)4t z<`md0&xC$5emk|jhN5#Q5;c6GTLTDJF2JAscwB2dYxcLFOVK+Ly_B{$qr??A%D?$ zES2~n)DDz*HZ@10km5uCKD(zUcrlf$SUkdy{02M2ST_ebJA5GRS9GjNMg`o!|KJnk z34@TxJqCLnSS&XoyT5OT^Us&RNElp(^Ubw;43UlOoVk;K^OSBA+zpZjb<^m}9_OLp zQb-a~iJ%q{fD%RhLa877(t9GYU7(*OAE5FUZyEkn0Z*64(5Zy6Kv%&d46 zoyS5dVf<_f0ouI26?Cci4*O?oKx-1HhIrWdk0fdF-xQRFisr|SvOR|UFBVp*^@wCI zr%e}5BPfuSx9aN6DF>-{xohK@}4A^UuRM__0oqTmcf~;)akinZqY+ z<`bAD_|gSU*wA&~iBg*Vw6rEj_z_i#EN0H}@bd*o1JNJw8x}z3diR1y*uln1B){10 zgKF+%fRsYBo1HMjPDc)SI7h-EbUaQ8_CKbWeBBXw9?>1y*eD9-7U4BQzUp)NHg3y#%wpLL?vOVdef&hr%k{TTcYh{{^@>JUn)R^vz*q z;E>->?G7ksB2GYa4HbSh#$?^EJNP8Fow_=&YOv8zZr5%uURc-SV;MvfNkru$mr+*e z^>#fmvU_OMRZiiwHT-fj-tH?)E+SnxrL z>NeUMTn#snsiLCOx-VvjN2QzqY(A&ct!8G~v3?s!64fIlkBzEY`%691i&|XJ;wqdh zb9%~_A@Vp3bT6~Lhd-ps7ytcKUskS z(xo_fSnaeaIS>626tg|CyQYS70#2-e34^;mhdY&^%`jqDZ6%~HDnD<$sG%oPRQwgW}P`Lhd=$@iyV>fRe z>V-e>%=2)jdVBTV1bL0|8zA~A$`7C>e0qZK!&XTVJ{TV4Y?-HnQKiF1Mw|LcvQe7h zrnl>viOTmYq_n2VchytX2}0*(7cLx8X-$Pm!M%XDYrL42{g!R@`qylmFlx-HPaOA$ zXTm-%0SIApwM>A0yn4(RRhPG?o*pBxRxmRF>lfx!Z@(E7& z!Yf}(I*d!GUa&YG85WX2YN?*aR%OG?c_>vGNI9xV+$U#1^QkDK3A6vJ1x8ONU{RYT zzuC`GorCc}o%k}=W*>r2VJsy~h)$l{_K)UYa*ae|)0 z$SD1;8lW!2n~xy#urW&#WzCKAL$BwE5jFweaU7t{V)17#Hz2${WJOI@mG21*83?+X zsvFG>P>WO<90#dZ!Evp*t=VFj8I6Hb@z5v;((x%@rGQ%2z4)S)UUzU+Bl2z-3x#X3 z0;C9xl;)J=^)y;a8J=@-=ZK>#DPc*(~AjSjItw|D-jQ4Vmr-pJtZM z*;icO>d?C=z%IukD-HB2=3;>CztkiR^9RMw+a2(A)mp8#wvk;j2~U@u zf&JTwKRNRCEyaN6tFs|^j!w6+vSKuu1GI^}rz!n=tIieR9hfddp6+KWeshgjSSR~{q>1kCigNHoium#RFA>ogDNPc9YDBUu zM6#6kG~x0P4X>n3Hd;|ziEQS-bydoPKQCthzAtLj0%ietO>O+#v4};x2z!Vkq3Q;wG9;WJwZLw) z5C?B4njD;`KbaEsaVIC+fhh6ls{Pcn^FtkizyFnBUxlgr_u=`?V8S!Cs3VE@;O~QC z$o@${)X^X#IZWYqLiYFjd46^|0U&N;FK7V28yx5JAWja+1zNqUuI@;R{QfS_io+qGzN`ySA zLtF$|cm!TUjtb;m|7!_sp$fEQE%`>*HMgM+Xqb1aK`wsQa)AX2t1kDT=XR1wHJRek z$h^+(3nb5YNb1!CuFs5E0&p#?-&S92AaN{JEPqJq-cPiiJo)pH2EO<~e zMItu`Odlg(`iEzAH{+UXGzX|L_nKO=KJLJ~^IE+lx8#JIVEE230JjS2 zYL{u3yvV<;v|Bp+;OSEeRRMe~Y(E+?QD-KB-_}x}neK(E*~=3!spzC;VK%yy!X zLk5=vOYKoTXert#l5>y++ki4cR*H^rR4bdX&gjzHWa?sY(vQ={l2R#YUR-mvtnk@u z4!|?^`yT)6nD<>_hs5lrZp#bm?Ziba6n%CDzgHN#Rf1$6FuIEHi1L#yrg@>R(B*si z3dASRSthGb85Sm1gAZjTIB77u3-Ln~Kip+y4>;Z+ZtvF! z+zNi(g=AC{EiOw!KpK@c-qN>^(gZwAr_qKeJ=&~56Zsf20)Kn4;K8$L+y)N&G-}D1 zzFJ!>GVq}T{Ao=HfB)WN(4|SCx-GAQ5;_`WBJSUvP5zix&Q0>iNJT|Ti#Cfy8^D=< zf0#C@VwpZqPj+h+he1r^~_tK4#yBSCoFo%J50AN+06qeWP(2gXY znfQe!op=xpmb$~I^n94Czhd)cObTHiG=Q*Sy6{{9b|uN`KyJ>Q1BKUgfamM9pKiR-`5Go?y14K&>ccN8?#8DEhf%RsVy-=Eh#A`jKuW9<9CI69$v$LX@( zM2Epn3wOl=y_q2E4-#kk=5bg^c$y_ZT#3B?y}TEILnmJ`wpWmn=iwsm1<2%e`1mNX z_7@KznX|{_obr#EY0W=yUwA59%QOi5H!_pXyOU|+OgtgJMQAWs+aPp?!&=KJ3F;LA z(_Vfv#sOJxc*klkZE2yZpN=HA)I#9Lx+2MSeUS?^MUiQ|W5tBI%{tMeY8!46&A#9z zwQ-jQk#p(N)P%?pY*dvD7BB}mp3lYXT}B{aO4_C;)c`TaKM*}0T+W!rTtPu8e6+6j zEU~xJ2`d1p$j8Nrt9`)e(7CxS7g2TO+(Qy&F$I2Mz)0wI-y&ut0OT2o>r^_Gkl+`} zo?q`M`s&S>2E0`BzdCnqB>+gx(GZoSiqe|zeXi65X7Vw9DGJfAWK<-LJl<80j@M>@ z4n(B-DgXL4qUTSfRJhFd_$LMr&IEbizxd(pqI82JKtrtQ0F@a7@O@^&6K@S{HGHDm z0#5B`*xtZ9+R%GGE2csA!k)Q&^xQK9apV~~-{Xb2Sl%bqN<2VSFzm#lF~>-$`#b;4 z>@yeq!n7w0==K@ET6XL^wLgB9fZPaBb$L_3YEjoGrO`1?Y=0ky(Y;g)631O6pDM;v zYpQW{Z?_Zc1Er-Q(^HK--J5!2eoAL~^oiULHQ~-SPym7*_}u{Dretn`=mU+l2u1&> zK=4P5kYIDzy%8Tv8<}Zc&hTQ9a&s>D7!Bs;R@_$-3LLg!TelG#glksnF7*O5s>`%g znQ=Nlwn8^3iLs`q>S!i2(3evh^jsqP2^#qc@L*7b<0yN)J;bHgl_`Yp>?r4oz46dQ zNzo{kf)5wnwthW@ReSqH3uYMB^{wbp<~RI-0q$?GQW~(RpuxBpFWI6(!qf z=`cs~1f7qvq&U+#z{0s#!Dp-@L^9E?mL?9VlU$;*9uD9f-av)-eL`!&6thUxG7>J& z0;ZB_FR&_*J#-S*u@vW^j?9mBwLwPZc$Z@`bNW^J*Z$}G;|KJ=G8F$WRfsEV`v1e+ z(@0`N4f$VY9~N)9+D8x|pgpKW9cKc-e^e>5{~fL#ut30IPryJx|J9XS&smuzlq-pGDvWj7YRKgs)q1 z~lEU*@t#sNId4@ae*-)h)?nWFETZTcReqko25(Sd9lzw}fG${=YLWD@!I$PN_7~|9^D?rx#Lj#b2 zav3JH-%-ceW2l=6g_xuf) z7`wz1W?>l5!&?zrAX9TcoODzsz$`bS$JQnS?odSOzoz%hQ5RDHyokFwbhzL`azV2c zTO>XmL#=!=X7LM@sWz9AY>7!8rM#kX7Yh@lvT?k;vrSC*Y`1Wc9|Vg>6`l>KGVW{D z>3o*cFIQa>#U}>x9pm88zproqc7$RP#YL-7M9}j=#H`xH`=A*< zb9KjWGqfDJ2%Rhy=1*noqg>BWz(SrX+%WmvOZ zB6Vk4L7g?#S+O}DO2_n-<3OJ9MRer!CjAEanQB*ME-N7nz;~(j{|0YL=EsvX`$#mA zT=L_K<<$D4+@PY>5Un+QO{CQi|Cifwk2E`+$)YhYQ^<*f(s;T>>RVwNRV~$|T2ynC z7Lz9m%)to#GF`H*`MZ4LFg`&jsdEo37+qx2Q4VFK>5I~4PjKVG@=(h5rGkIwKp~LS z|Dn!=cpLhI0Jr{2u4pNW3#|)0Ur=xM8|42hOOQD4M*e@uu^PYK8X^8G=aSg$%Lw^T zP31p&E{#{dQK0{``ufZMAK%1z|LXr40|JEpALC@e>i>)-f&Zi2D*HG7Z(#fXX6E$g zi#i!I5YR8~#G1(8|DBmI|72wTe>3ySS2OXDt>wh~2WHeaBjz@+^73MHmHkApoUFPo zNhw?@g%=eS<`;DFUV#J_X(Ll-;m-$<4k-#OFkON~1m5;Ci631HOp zdpjomdHMOArv5sd{OSyqF~I4G?1%M%a^ntbj{MG{y-d}=2h=50&pO^Oh{hl?zTC*f zlgXF^cR?jsq*7sWrpRgq zu=+fJ!sX_eiZ*x^!!+tXP*(%=PNBMW{$v@YAB5C+2c?;K1v~E^<)oM~DUM8W|PhgCq#aQH* zlx_j>J7NE|G8uvHA_QZd#7PQ~LQLl%vk^opytrk7be2N-h};2rdgw068QwM_kHZ(T zM~E}atW^v%VCUfDlpT#8=tjpk1~Xx`tS|>e0+5dqCnv$&@K3al!2}pf!mIEAZMvEM zgU7Un2uaUrvR}&s*F8#=WU&5fa~elHauH*C!j52jHo3Q+gn%sZMa4I+ z5{NZx(pIX@woIc-sD6c3gMOAur?VjzI>5xw&HD#~!%_DqOj4Yzy0}W~tbh+(VIwsg zP3nv1%v_&Pj!W;xSP$?*qeiEEX^+IMz=jhRbPQ0F)GQ)Dm8)>%6fVjHabpoktada@ zmO?w^q8F}!#|50?$JB`XG0ocE!I))SlW-XU8yh%hEFtg=hoEBxxe4q zrO&+UKxwK~jgGB`t$wL>hZ)8AdZ9o3E9}Pdju`s0(ey4%sx=vxh7LXXa|wE1N(zZk zP_JM|31T7hd=GGgJKKob0@WSNSULO~6T1&4f9GsuL%- z#&5_oo`5KfQfm(g`+Yt8Fd~zsS3VFxPw#TT;=xK1Fp?9CKvdmsO5LD4kc9i@5W5Xp zW>iP$@;K4Sq{L)7s1D{uR|7L+!AM)GYWzfnhMwaG)e68Ug~cm*g;R|nAi9AXo0C_x zC{QadbMVs!`dzBue#_Z}^xFzJwLAjr4m(3`-*TBsbfK8pm1b1S6efawPdMU)MS>3= z!&%Z%Xqzgy!yeV-1IaomU9DQdq{-tRt01)-k=C`n%ddO@85$frMtIfj4hDo;%QvnT z^XRC|qz?dv|K&>r`ih$j$Kb=HBIrhhD&jeTSmFUo5 zR^uIoNOhE&lUf>uJkHpQ<)Zib1!R1iquwLatujAHDzb1qNC;?-S0^2IJbt6w_s3@< zR1}=0sy9uDklvU*esE(Tu@wRg9#2T{4ShjJ+r(u&6PmK#u~FxjMH;7`}crgfGpUS9+}$n84|DUMqD z8eD7=Osb9M4~_0Pa8T*fiX@5s2&+NU%}PkUHyc|!J-or^-X8@`D_+@qG1il(iGEi? z*Z_`^mu%$M?rnO9%pqDwry?4LP!w~{*)C(~4I=j=_0GcG;EzXi>|*aMv1LKmY?f&` zgS=U_k=#DCk#1H_(@AEuw@V0@+3jr?IPIs~+*cZ}s|3M+?7(-X1c3 zwAQIum=2j#=y-ED?e!N+HSVS4&C9g7EO8LW@_lQ*31w1hBWUGki_SRlR4E3hroOv| zkPJ&A)Nu1!WAUmKeWW^#+QBPq)H-{dq?_>93FgY{SL-?)CY7;AR5!}1TsMW=Gyt+< z9{zGcQch}36zK1F&xIcD_Phhhi|0iJ(2}m1(H82ycsp%<0iw*t>5nY z82kLIOJ$5$-1#O!->o-2AFBYK_R*iPR(y2vcm9OiFL!>QpKR2%=lRk7-JhqM+aJF% zm@?N9rw*?jzF74Um%FB~E{*8#RRCnclly&dtL|s+-tKOws4Xt+bwhBlD-K?fQyFZHatra~viX5fjZvCx7@QBYNg?TpFu~*A^W9C?> zTG%w|M%8?l^g^ zE(>Nf6k8yN0k?kCX*slmU*?V`s3_DXLKJ47&&WhY#OQNXgXA8Z292 z-?J_Q0pIM<0!&5RG{8(?G-Zjz#-K#Rl)IS*u{q6W2YqtGTW-}N6%Z15yk{tN`dx^g zxpE`^dP!^smR@AFHW+1X&G6TMir&N7;#_try<&Rk|h{b6zQQ2QMHi zY0-5}B87|HE3y!pMk`=zIYS2uO9F|*D^@O|$0?d;1ci$R`6J0H*U`|7b&0P=Q0@4w z{tD1K!ZTN+9ONOitiT2A%1ZBo;6*GOh}WaSuxC0b}wc+VB! zW^Ef?A@+0VayJ|R$Cf7SQKw<`Flyr5_CMtuEXN&!-uGrcBE%U3tBcI>D(vwa`4GtEZvff71LV zrRJ_@%+Gn+{Ea=8ia>(hNc7${VA!Z(>vbWA4t_2rtH>-||?oK0pD_5CJhn0$P6Mf}KS(j5bQx*8Q}*HL9Q z9!tK&94u4-dI>NX)-}I)Sh)HAkvp7j`_tGRJO=@C=3{gV9rr5b)d(RdlXDNGzeLl& zUCcZ1{3G>KQ1@d*;RSpm-lU@>{aKJYXRxa22$Hy!Tt}S{1kM2b+u%53vAcnIfOK)skJ7rLWY(5^U8t; z`O;c6-C4taf-Sli{1joHx3C8G4Ta8GS?ss~+h%L?TeOXwUv9sz!n_N^Lw;G)*s-qV zUvvpw%CB69bRs81W$ZZS$g-MXMx_`qtXzwP`NL=h7?KRuSptX1+E72B*k^6HT)^i) ziAh)GLRIHB4ZX_)^H#meO9Xrl&T2woDK0t=xzPL4T%ZywKDB~pgl+?Uw|8&Xbv9Z8 zG-3Zzm;>R|luk`(L0#MMhRbQ*G)B#Cy3$xdR}tNQHsj>xoXB087ty-d*?58lKG0KN z0ri>4lMq-G6odJkagu*LO1lc-$Jv8CU;X8%Kf^1#1JiFa?sGI+w9O5($!!{ZrnD?d zFXxh+*BG$dv6>7DWU{1ADo6JDM;@sJ(1JD{5KM1En{dHrs0E)(QWHvdP$0JrZotY{ zH{16wYC?(V7i8Rk`MlVznkd6qi4?UR&C zxpg$c9J7o8s2Rv~xHdFX#~l&)czCk9xK;r7G4H%PK8K^aLn$uRyAlAB;+oJdzL)c# zfgXh5!1Ph`hUTcLh}uG&4m5$ZsX?>EnnC!_dTF8D_!XGw$rmEs$Thp4C5ei!cs{%r-H!tbQe4U92j?8aq{{`NNVo$R#WvOZl6dd^Vmn zyQEdmHtNxB=;GlLI^g;~-XdSTbv<~MqdE8w+~WZc;qISH|3YOiSi$MMzqGd`A%DF6 zSxGwHYOE={l+FO(yjbRohc7l1SQIU&jRtv#ZNy2XM>Ts{?eveoCtqv-*t-6Eniwx5 zAo&`4@l>cyIEVfgK}n}fLHITIh1oOPkf$NJjoC9<0voKk6F?4h#6m;w=85@iNkEFx z#twuLmEA{4wcQij-KkZ#`ClzdN$PwoB@405{9<1cC`q|3sW>q{iT|8%xJHpxdPv)o z^WYeN7cS??(IKn0MO7u=M&WvheeBDQMQQ!HOxluD*ay=ByP&b4U^qa(v!JH6q-dIB zMRCB=ImuqtaudWql_X^Q_^XoO5h;3E19TP%gzI}i`u1_`)M?VW? z_v^-RD=(-C`3QWo9|^hz9wlv=DfBjuYX0X-3||30vKF>ncS>L1qF|{PjL29 zWW!z$g4^!f@5V1->juW9r9GJela0rJlf)OWBaL%M81shpm+2vj{KB?MS`lJkNh2RK zPFoB=f8gRW&B%pQuy9@RLe*tg1m_FxrkJ~ zIqa9^D`r4oRB@AvrDhSXub%c}13;HX>=-cCZgMnT$6Z5?2xL5@2R8PP3b<>;fv8fA zv*696N8_uWb>yLc(Tj`aJb*0}5~CM3-c)2=i4Psjjq(Dr$6MfJ%CvXIQEvA8yVyK2 zn6hj~PE6qpHd$dv@b{dCLM3oa6@A&{^dY-5BJQ&{zNThNmUOMq@}Rsfqd>0(ECXPw zFkEpwO>-c}Eh!fY;B% zCAZ+6ALuRekmdJJ+4qO|FXvZ|C*#Q&a!iTDJMTk7{B1+KvtAUZ2kI!SN2j?S8;TyA z`wXo6>wc2YY*pQ;d%}|UNf7s!vlO65UM8=h|XkR@>!5z8~PAJYl-T zI)P_X?qB?C^sXW&?JK0MHecMbd4&}#q@(K@$B00(ka7YQx!IPjI*-7>?1-S)6wrDC zlIxErpje*2=ap=BXaF_EOP1zKc1qfYgv{I&$j2ox_)jWpI!dCwp==jMzW_hS)-{&t z*LD3qA$~m$F62(9F~T;9^A>=Ly7Pp~erD{FDg4RjB;x)FixBV$gSU$s=ZQ|WPSY{# zHQrtE8)@x?OEUS!Tp7G}W%0TlZ0WDa_4GVxbD#!69W2eawTWgrihK0rWpO|kIn%4? zld_ZWZx0<>w#nb&GgNd_kUzyW%J;ta{j})02{#Fdes`ZqKB+#pnKXdXqs}Aa0os)R z*&p`)XO}5G@;)>M_iq*vx3eXbN^TGg*6qrA#UdfN_sL4QiHSpdB=ieTp^h0_2okxW;fr(fEIsV`x)1-llFuA6UG_u*4oZwETt2vAqq+tv2>1u3|;8w~vf2 zKhgStW`MsZc5~hrMJ$8Me?=}b#HYJVK$IF6f90P7r>-L z_XhzJI83t0n3GVhS7`X;OHiE|eE&8B$53-cWE~0_tqaBiKSxM|kbZqr+@79ft-3B( z$hcoX;;x3_+jtG}4@|CZbg02h%~En*-;W6IByFHY&_5#62iV_Db!eo@S<6arUY}LM4JB*^D1Ofm0o&Si%&3yO>GHQRHq(7KCUoYXGL%y%t_rq6JR+= zeQf%%D86^IzKtWdHL}-EW836LS;k%Y_*nl~y^qt>jmgmoMykA6TJK3MB!3;{t-vc~ zjNQ^vBPUK@EMs%q5R$|1Km!V+KR9V6K`F;Z#r+|He{5(WOz7iqLaK6K*c;^<2pOo% z8kw^Yr^Zdy)hicow?pGLC1jgM4mdV4(8ToGiH^nbLmJTRB^Yd^c!W3Uqx)tY*m|I} z%-+BcN8aRd!JCDQp>NjC%QN?m7M$RzQ3w^o`;K|`|9uMWhUf%gYURblMmHtjS3#vS0WGglu74ioZs?~0Oxl;)PgEm#YD4PY~~*V=>r z`Y+T;_SpwSos2R}soRDum&)!M7yD;An*{m7LdZe~2gUb7@R~|b5BzLdJP>8B1}%tM zlpL?0Ay7~(jaC9PKr+^>N`0|(lnV~GSCAP>f7zM#nhlC%3caXTlJDD>XDi75dK#n9 zk+G6LqPGW3c0u($tN-u6MW`iaWKh(l`PGDiAn10QLY8{`y-kB>W@NMbYJF38FKs^1 zZ4B_EfT`rsbT;nFhOp88%1{!E2Q$ONjJ-fL@OyOwpLDrQqw=;8!RD?~46B{)$b2q+NjX{ z+tC)G97%|eopGXb-{d?2c9cvl=zTh%b*&#R@Yz@FuP+ z)aICe1-iM0AdX0GBjD<{J;_VvDXng2>gzKwd7};?RZsi0Y}%a`U8jwq27%!-2`;kp=Uf}`oabOY3mcH_@zDiwS^Fn zB9fY;dGYIP{qyVmk6+Ck9pxv(*_T7Mo8&8!s~RBxWe{zHESP}pU*F>Y-3QOlL->Pp zPx#`fTdsAJX#E!S)l@4}`7+kbQ=o;zSNX)(-X?E$4a?+TZPh+w)h%Chj4C?mx^T|x zL>pVCRnz!yP~^vvG7wF7q8*m(HZNgDxi=2~ujD)UqZ?LoDR(inFCROd!1YLb0EJ-7 z9~`}sS4k+tGvj{GHB#1(MOOT=Hkty8Rg6F>Fxp{Op-#NeaB?Z`*u~`4zZ{-t#G#KU zTfK+Wfr$NfkpEX#R~;8s*ZfyjL+*x)Fq>7nYI^aX}g+mzM65 zG{_}H0YSP;K-%B>Jg>j^eV+T*xifR-Gjs0!+|NBT^POWN|6#P=OkzL4{1xLE{(KG7 zuMXR?JY}V?S{0-LGV%PRtNCAnsBhRK98A+G^?#y$lx%J$T*G7|jFcWLr6wSA^ZWbD z@45Fx#78;fdZoPv1!UG%pMEA`dMUrhBlfluf89Qzx#K<0`?r3(!MI-rc-?pTu1>Zb zw>UdHp(Qv4lV^4Z$=9dx4sgKB%LhwR2IXz#>6kY4fim+xg0$JdC}N?~5YYxm8@JmD zDhUtvHb0Ee*+f;2O&13(0)K|QE#6>x0fJFrUhK4evWTd8)*TOiy3#D6y-smskltbC zoLNZ9d({(z$?_|FXllp3;MkMXyH=MnWiKUGf3ISVa$uldxCEPRXu?7CydqQfWwtC0 z&rDI?>VZmWOm`{FX_MUAGm*Ab-iTUPm7pi<9AjFf@BZX@rL#G921-QUHS_ZwnlCql z7&NdD@uYrV>MBSW_E4TYXW?LXKTOcP(Yr_Oyyq)u{h$zaooB%9kkKK^$FbxAp^GT0 z+8`@k4~HgxN055w6j62Cvh%dnSM*1=d#mUc7?6G;Yc(*R@?Wb^NNA!bMA9X^7jkTZ zl{cXJ9Hoq?O=J_*n-!6HQBzzIWT|Niop6mjz6G?Z?$HRaZnJ9Mn>%%) z^EBL)g32~;gz9`qtxDMmttlchn$|n9D0!FGl(P%0^)ts9Vw-AzqQ4|BOnXFfW-6Ya z|3jvvpWf6pnU(wUl8`$>zBn&pm8rrw@j*?CcI=n&S20P`b&4;zn%N?i*1PtvhT2y6 zU(_(xsa3qD>Jf)$G31Q|I&@KP2~Q+`&#xPO6(aWO7%*$i^j7`y+~L<~kr*ce+TsTn zvSTXklJoY-wov zuOG($v0|y)Yv`@V`9hmTtTmzvqN<@mdvC`pkopaC0PyblEy;ki9-5=a`4dAiXUc9A zq2nKghw=O8I3<|XE}0nu@~ z0E^~9O$_fWV`S9Vqh(#)%-1F(;^23bk}?tn9xQ=2oa~A26lS$zWoBEgkUM&v_mBrG zn)c^B=lDprm`jgcj7P+s7_^5+0bsnc*$Hnt3=)iHK;F;@L4>*0IpizmOzsll62!t@GF~gx z&n=)bbfRsO^tW33IU2twlIK-|{eVk`3r50&F;2eOUU;YYUxDrq$O4=kkv9eDw@btl zZJk8~NRtk}uXpk&YF2B|HkFct^wS^MSm|Sy@7dY6q!^7Xt4Yq>-^lD}Y&wEBcPTW# zMVdEe{B#`j`P8iBqt(oN+52;l)ki0qrsCm)WQe`7`{hGI{_yxt#fVu_Q^kroEAkNA zg$tSh7T7B#Nd6>T+qWRla$p`Z`fo*X@va*u_LOB{N%@O!*TALk-zAs*Kuv)9Yoa z@2N=wQ^CA~?Id09@G5b3i>H1q3a&9K0q<`+nZ&Q|;Ze7zlFf~$H&r5gaquEN3y>!f zo3wr5^D1I^2Z(*qDY3$nqfB}od5UeXlsEc^k{lj=Vn-)V3jxPXBGtwvNmiu_OE<;9 zhmE67`GtCB{N}f9NKXg4)^gn~UCL7`duzZARg-d+DacC0bAi;d`69`PgAJG2r7vvk z+BpJ;-^o&+<|e~#%5oe9`d#3ur65&J27VRJ>?6pC3c^zHdP1jFmcj2a{3Nr9y5 zkRKeJ(lQa9i(%W6%zK!JMN-jvV%V>-kwP*LHA50@?;#co`7Wg`h3k2@N8MNQ`6Tj80QiXX?6!wHI+xpkpkhy3V2qJA))Q)bsG>gKl zOxaxxv+sO1i(vk^rndFrTINt~^LeP%#weS9E5R$pmxI{OxL1^42aXtNFO2o8U)P0v z5VSVe_jze-5VC-((&_77{SjXhWWLGmLS33v`$Tkh(tZvOdI$e@{^;zp zvv|Oh1g3#99W>)&9Nda&+sir=8tA}rz3A{nK9EYecy_Q*r+*5Wcrm2iBke$0urP%h zIBau{cQmlfYW=?KO)kmh)Qn=c$$7eC;G_Mf_@>veNB88mn-pF7VYPBCAyYDV!qDrQ{>g27r63s-;uEnY ze1DJE7fzAsf>IU8QPPqWjzdSD^ObHFK{#+A=bhWe&M%p5KO8SM)kCYVW7Jg~!Q{Iu z{!AGdeFd2*8fFvxl+TV#-`-KXs^aJc3Gi~qmZdS^2G=Eq%)tpCT!CKPK-`$FD_}Jj z(W%PV4bCEhv2X-Ljn;E(D=j=*b`xO;%V=_gw$Knnhr{n7zL3uIQ!?(j9(`o|7wV|u1FiQn8QPIQ=i`Fic0Bg7}wEIlXK+X#4x1KqJO;8+PM zo+Ot3lJsy2S++55RU&5SOckL*J@j}gt)>YKjV%Bbu!C-zwa8NF2j3G7_!*>ro($Q2 zuc62$DM#h>g>N5Qh6G<*h$}|xATPQ8sx{hN3FTVr^+q73Xh!ffw>CzLJ!M%r_!YQU zg*o1=HTYZ|g#b;Lf;wGLq$*D9r#rVFOJgdQVvsZZm+ic_ky;LcdJwyFZsYlFTf{)A zD3JC1HP3$TcdO;LDm;eq8fVjW*KcvytRYCz}{bF7t%*ac%rcv@j z$O|BRI4`|SpLD6>J|TGb!TEQI^+d&p+o?T0)R8|BV9a~6H=Pv&^j{so&^+bDtvO1w z5hNbXvpIZ4N7_VppxeHq;^+7QY+=Y4?K#JY!$w>S{eET>&W&ykCA*dq4aA!RqXUis&f*)gnkSO;&ifku10h%0nub z%n`zzY>0cW3}a7nD{qhfWjdh%T>UGlJkre#$l%s&On29h0?_U!+?HDdbKt%4ocem&&b>6&^8*s3e`#2qH^~G)Z&?~crE%45Bj-iR?HK|A= zu>jFL>vH@b-kIHO@H-wU8P!V#HjB4Hjokn%GomViXWnp1iq#Je4lXOSi?@db@kOYK z%|?hjTc6Zn(^t#SLQV{iY7*ot2;(r@p5R1B5+hFDqZ3uPVj1;e;=0WAw2KNHaLsH5 z+l!9M5h0cjvbiF%h@#AH8)x>F|F%ezXi>MPVr9xHM`deiiyY zliQ;CDmZH;az_wy45lqPWNN;dpzR>S`Km4&=<$7euM_9xPorSh_OG>M9DI$j$vw508oAh)gxH_q$Ab7P`MuS~Q((2jtXoL-OeQ^aD$fI2F8z zdUi{$K;LL7kCfV-N^Ctqg%v0lPN&49h9~wxk}p36ol#B^m-N+3AmgLTZWLR)#GG}Z zeZfS7{w%)+V{IA5CXrcct}dcioPkl68uoc2S0NRxTP`8DN$g<@8>Hu5S}Y;L{l_^;cfEG^L|-_hdVb;=-cC7qNQRB5*u}cAKjV` z6k+aKl{$k+2za*-jLx(+w469xEBx%)5mW&CGa;XZE27x7_C#fu49)SnB61Qt!H_!r zmG%LFXXz-*(7DUZD`f=+xR{9Wrz=IxmixkL+JT8`wZA5ryIm>I7q`~fkp-^pkKX=F zIgjda7;}wr!r%Haax}5k@ZnLfKX2W<>`XT7!I)Ujy%9{7@jh!Kyi#&j%X)`GY2}u6 zGcB^Uhd6+I-^j13V0ccTNR`M>&$qRQ%*BYzUfn!% zVgm7p$Hlg!-ZiezVTUCn#Pz~bb85(PMvB20W$eC3Uoq@~X39^!yl5@w0CrAI@AIYg zEf7?iH}~W8ftIQj+LddobEpz^h6!+q-~e$$M!-)}4iz;{V!Tnj_-BhwvC)VuN9YF% zo5e*}fQb_!z3bXys1)=pt#v+K3fuiXxkJIVqDy3n2XY@Q$8+eJSz|5fh%EK{s`1AE zCsTp)a~&T|8P>fy-kOJ*6w~BdtJ^8MxI&a)$hGX+f4cEDa%wFroikGn7I~iBTsCeI zbT1f@RwVEM5l^Zff#>o?5^DA>)*^4sEqRs3daGFqQSw6b`JK?4tJptd6+uQQp&5gg z>CN{;o=6t2Xj0V>+nIjrCgka++7F|c?txQK;5r||Q}c()qxS8e)Zw&>u?t$+8hk&l z5^38F?sm}1c=D*`&94$IiV6GN%A!U}chN+8saikLSw18bQF0D46vZZy2pYp`y2(x} z&mJj13}yyWih*D-oKZZ|H>2N}Uj5qqD&F;!!~uW*qSxz2dx-?RogGO=lbz26b^BP` zJF37~Vr60LN8SfTd&9s19+tvpDCAH>T@yBWA_=lt1aB3w>?{0u8l?l3hMgbjqT7X7 z^7~5mKP)5WzS%YY`0{n~c-pu&uaoZz$4ISRph}^~xmp8CYGb=L#^_coMe>2$2d*+?r<9@|;v_hq?! z@?t|#&Gf0zEL>-^9ZmsEN38J1OV&wQjq`a7(46h{nHYwYD`oUP}_ zqoht?EfpzJmv5GejLLqF?_3sSyJXe-Nze6bE{W8qkY#;S68VwXVh7stWk zU{{ynTl-Zg>Cp3Mw4aAee~Q8xm|F8@Qp)uznqXQKR90C}Ed?nh$F?MHv5=>$n* z*XPJO9-OZFo1Dvk-+LCSS7MSu(fVk~eUinAbs=sRC^LLJy*mSqbC>&i{ccLIgWL## z;4A((4+Mq9NK8+kyQGX-Ul@*(@ix!uXg z1L48JmuoyR<+od6?tvGceG@E+(Mc!;@rh=2^2E^Wq?W#gfD7tJxy+|g zcVQB^=L%@2{hovrAO1_4BS-?!KszDQRxj?!bQ&a^n4X*nID*z(Fxh{c0)9T#YUAce z$<8~ipb3#5Dy_3$@K-HRsca>*)AGx#Z6Zkv4aoVnxWh*$x;G`By z^j_(06^V;!W$V(;jRkLVh?Ju3?i*Ilk-6u>?<$uxgYZ4^)$E$Y8^VL?cBuBhoA2*e ziQ*huwX{XmdijpFVhfbc2uR~e;&tH7WUFnh=Hk^=UF+B{Pg>3nA2Rh~+j0hTS2)rA zbE##BHiKlQ?dRf!vVpI6WvR<_*VMhW^D#HWx;{Va4drukF50aT-p9ffIDwx)*l_eP zB^qi`NczkM`sB!X04GD=+y|DwV4E*c%Vx&;Bnh@ z^X@x0QQ_Df^pLr5mO6@|$?~atp%>lv$=iOUK(QMX0pyxR?0q60S=4!|)O?6&%<;>d zJhruHrDkG`yUj|jHkZh)q~~F_Eao6u5HLRJ_)fL7aVs$rMJyfD-h2PONb_P5@5iM_ z>$S52ckWp1Xe{%O-`NRMan+Rs#FF*`57)|ylaoqSSm>1K&Z+|$r!$`w7!m6}e63q0 z1!RL%g0&q*4hQy0`X#EGWlf&k>!Z2vLSN@`U=ftqDJ{d4W)c6Z99-aG|DW73jHawY zDy)k`R|m%7J@g59*vUdnL?0N^`)MyDA9uKri(+0xzO(g=0(vOEV!ztKOg@cCf1Nll zqEwUqnhMMN>yBIri&_75iToG92h}O|2cJ_r-U;~+l!vrOvu=N`73#xycgQV1QyB;s)NGc0vV@(f=*xI-~)_g6)12Zd*k(7`(r? zorYQ-68=N{TS{_R;t%2OZ>g+~Df^V00DwHIaQNXL4$E&&xqa}oDkT5_$^ZZu05pK# z7Z$w-C|J?$0%phgA& zkp5%BG8}+f@S^yi34ePbJqE&g?~gJWV+CRnSpQw$+%XXFCGxj-RH%uuf0@jL0RfYP z0I0+e{{Kn^2Tr Date: Thu, 17 Aug 2023 17:47:39 -0400 Subject: [PATCH 04/21] Fix VkPhysicalDeviceLimits::timestampPeriod calculations on Intel GPU. - Guard against Intel returning zero values for CPU & GPU timestamps. - Apply lowpass filter on timestampPeriod updates, to avoid wild temporary changes, particularly at startup before GPU has been really exercised. --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 26 +++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index b4527ae71..0b11d4dfe 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1567,16 +1567,24 @@ return rslt; } -// Mark both CPU and GPU timestamps, and if needed, update the timestamp period for this device. -// On Apple GPUs, the CPU & GPU timestamps are the same, and the timestamp period never changes. +// If needed, update the timestamp period for this device, using a crude lowpass filter to level out +// wild temporary changes, particularly during initial queries before much GPU activity has occurred. +// On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes. void MVKPhysicalDevice::updateTimestampsAndPeriod() { - if (_properties.vendorID == kAppleVendorId) { - _prevGPUTimestamp = _prevCPUTimestamp = mvkGetElapsedNanoseconds(); - } else { - MTLTimestamp earlierCPUTs = _prevCPUTimestamp; - MTLTimestamp earlierGPUTs = _prevGPUTimestamp; - [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; - _properties.limits.timestampPeriod = (double)(_prevCPUTimestamp - earlierCPUTs) / (double)(_prevGPUTimestamp - earlierGPUTs); + if (_properties.vendorID == kAppleVendorId) { return; } + + MTLTimestamp earlierCPUTs = _prevCPUTimestamp; + MTLTimestamp earlierGPUTs = _prevGPUTimestamp; + [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; + double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs; + double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs; + if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero + float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; + + // Basic lowpass filter Y = (1 - a)Y + a*X. + // The lower a is, the slower Y will change over time. + static const float a = 0.05; + _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); } } From 7910083ffae7ca9d638c5bdc0c36ad71335e79de Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 23 Aug 2023 14:14:15 -0400 Subject: [PATCH 05/21] Fix rare case where vertex attribute buffers are not bound to Metal. In the rare case where vertex attribute buffers are bound to MVKCommandEncoder, are not used by first pipeline, but are used by a subsequent pipeline, and no other bindings are changed, the MVKResourcesCommandEncoderState will not appear to be dirty to the second pipeline, and the buffer will not be bound to Metal. When reverting a binding to dirty if it is not used by a pipeline, also revert the enclosing MVKResourcesCommandEncoderState to dirty state. Update MoltenVK to version 1.2.6 (unrelated). --- Docs/Whats_New.md | 10 ++++++++++ MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h | 7 +++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 68332ce77..82e9b1558 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -13,6 +13,16 @@ Copyright (c) 2015-2023 [The Brenwill Workshop Ltd.](http://www.brenwill.com) +MoltenVK 1.2.6 +-------------- + +Released TBD + +- Fix rare case where vertex attribute buffers are not bound to Metal + when no other bindings change between pipelines. + + + MoltenVK 1.2.5 -------------- diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index d8cf26ca5..06152dd7a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -81,6 +81,8 @@ class MVKCommandEncoderState : public MVKBaseObject { /** * If the content of this instance is dirty, marks this instance as no longer dirty * and calls the encodeImpl() function to encode the content onto the Metal encoder. + * Marking dirty is done in advance so that subclass encodeImpl() implementations + * can override to leave this instance in a dirty state. * Subclasses must override the encodeImpl() function to do the actual work. */ void encode(uint32_t stage = 0) { @@ -430,7 +432,8 @@ class MVKResourcesCommandEncoderState : public MVKCommandEncoderState { // Template function that executes a lambda expression on each dirty element of // a vector of bindings, and marks the bindings and the vector as no longer dirty. - // Clear isDirty flag before operation to allow operation to possibly override. + // Clear binding isDirty flag before operation to allow operation to possibly override. + // If it does override, leave both the bindings and this instance as dirty. template void encodeBinding(V& bindings, bool& bindingsDirtyFlag, @@ -441,7 +444,7 @@ class MVKResourcesCommandEncoderState : public MVKCommandEncoderState { if (b.isDirty) { b.isDirty = false; mtlOperation(_cmdEncoder, b); - if (b.isDirty) { bindingsDirtyFlag = true; } + if (b.isDirty) { _isDirty = bindingsDirtyFlag = true; } } } } From 6acdd9fb9157a8d8aa03d0160798bb4288ed9fb0 Mon Sep 17 00:00:00 2001 From: Dan Ginsburg Date: Thu, 31 Aug 2023 09:13:26 -0400 Subject: [PATCH 06/21] Fix crash in Dota 2 on macOS < 10.15 - closes #2006 --- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 28 ++++++++++++----------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0b11d4dfe..203adb816 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1572,19 +1572,21 @@ // On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes. void MVKPhysicalDevice::updateTimestampsAndPeriod() { if (_properties.vendorID == kAppleVendorId) { return; } - - MTLTimestamp earlierCPUTs = _prevCPUTimestamp; - MTLTimestamp earlierGPUTs = _prevGPUTimestamp; - [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; - double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs; - double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs; - if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero - float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; - - // Basic lowpass filter Y = (1 - a)Y + a*X. - // The lower a is, the slower Y will change over time. - static const float a = 0.05; - _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); + + if ([_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) { + MTLTimestamp earlierCPUTs = _prevCPUTimestamp; + MTLTimestamp earlierGPUTs = _prevGPUTimestamp; + [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; + double elapsedCPUNanos = _prevCPUTimestamp - earlierCPUTs; + double elapsedGPUTicks = _prevGPUTimestamp - earlierGPUTs; + if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero + float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; + + // Basic lowpass filter Y = (1 - a)Y + a*X. + // The lower a is, the slower Y will change over time. + static const float a = 0.05; + _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); + } } } From 9f64faadbcf490e73e69db8bc3e10154e61f17e5 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Sat, 2 Sep 2023 08:51:36 -0400 Subject: [PATCH 07/21] Improve behavior of swapchain image presentation stalls caused by Metal regression. In a recent Metal regression, Metal sometimes does not trigger the [CAMetalDrawable addPresentedHandler:] callback on the final few (1-3) CAMetalDrawable presentations, and retains internal memory associated with these CAMetalDrawables. This does not occur for any CAMetalDrawable presentations prior to those final few. Most apps typically don't care much what happens after the last few CAMetalDrawables are presented, and typically end shortly after that. However, for some apps, such as Vulkan CTS WSI tests, which serially create potentially hundreds, or thousands, of CAMetalLayers and MTLDevices,these retained device memory allocations can pile up and cause the CTS WSI tests to stall, block, or crash. This issue has proven very difficult to debug, or replicate in incrementally controlled environments. It appears consistently in some scenarios, and never in other, almost identical scenarios. For example, the MoltenVK Cube demo consistently runs without encountering this issue, but CTS WSI test dEQP-VK.wsi.macos.swapchain.render.basic consistently triggers the issue. Both apps run almost identical Vulkan command paths, and identical swapchain image presentation paths, and result in GPU captures that have identical swapchain image presentations. We may ultimately have to wait for Apple to fix the core issue, but this update includes workarounds that helps in some cases. During vkQueueWaitIdle() and vkDeviceWaitIdle(), wait a short while for any in-flight swapchain image presentations to finish, and attempt to force completion by calling MVKPresentableSwapchainImage::forcePresentationCompletion(), which releases the current CAMetalDrawable, and attempts to retrieve a new one, to trigger the callback on the current CAMetalDrawable. In exploring possible work-arounds for this issue, this update adds significant structural improvements in the handling of swapchains, and quite a bit of new performance and logging functionality that is useful for debugging purposes. - Add several additional performance trackers, available via logging, or the mvk_private_api.h API. - Rename MVKPerformanceTracker members, and refactor performance result collection, to support tracking and logging memory use, or other measurements, in addition to just durations. - Redefine MVKQueuePerformance to add tracking separate performance metrics for MTLCommandBuffer retrieval, encoding, and execution, plus swapchain presentation. - Add MVKDevicePerformance as part of MVKPerformanceStatistics to track device information, including GPU device memory allocated, and update device memory results whenever performance content is requested. - Add MVKConfigActivityPerformanceLoggingStyle:: MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE to accumulate performance and memory results across multiple serial invocations of VkDevices, during the lifetime of the app process. This is useful for accumulating performance results across multiple CTS tests. - Log destruction of VkDevice, VkPhysicalDevice, and VkInstance, to bookend the corresponding logs performed upon their creation. - Include consumed GPU memory in log when VkPhysicalDevice is destroyed. - Add mvkGetAvailableMTLDevicesArray() to support consistency when retrieving MTLDevices available on the system. - Add mvkVkCommandName() to generically map command use to a command name. - MVKDevice: - Support MTLPhysicalDevice.recommendedMaxWorkingSetSize on iOS & tvOS. - Include available and consumed GPU memory in log of GPU device at VkInstance creation time. - MVKQueue: - Add handleMTLCommandBufferError() to handle errors for all MTLCommandBuffer executions. - Track time to retrieve a MTLCommandBuffer. - If MTLCommandBuffer could not be retrieved during queue submission, report error, signal queue submission completion, and return VK_ERROR_OUT_OF_POOL_MEMORY. - waitIdle() simplify to use [MTLCommandBuffer waitUntilCompleted], plus also wait for in-flight presentations to complete, and attempt to force them to complete if they are stuck. - MVKPresentableSwapchainImage: - Don't track presenting MTLCommandBuffer. - Add limit on number of attempts to retrieve a drawable, and report VK_ERROR_OUT_OF_POOL_MEMORY if drawable cannot be retrieved. - Return VkResult from acquireAndSignalWhenAvailable() to notify upstream if MTLCommandBuffer could not be created. - Track presentation time. - Notify MVKQueue when presentation has completed. - Add forcePresentationCompletion(), which releases the current CAMetalDrawable, and attempts to retrieve a new one, to trigger the callback on the current CAMetalDrawable. Called when a swapchain is destroyed, or by queue if waiting for presentation to complete stalls, - If destroyed while in flight, stop tracking swapchain and don't notify when presentation completes. - MVKSwapchain: - Track active swapchain in MVKSurface to check oldSwapchain - Track MVKSurface to access layer and detect lost surface. - Don't track layer and layer observer, since MVKSurface handles these. - On destruction, wait until all in-flight presentable images have returned. - Remove empty and unused releaseUndisplayedSurfaces() function. - MVKSurface: - Consolidate constructors into initLayer() function. - Update logic to test for valid layer and to set up layer observer. - MVKSemaphoreImpl: - Add getReservationCount() - MVKBaseObject: - Add reportResult() and reportWarning() functions to support logging and reporting Vulkan results that are not actual errors. - Rename MVKCommandUse::kMVKCommandUseEndCommandBuffer to kMVKCommandUseBeginCommandBuffer, since that's where it is used. - Update MVK_CONFIGURATION_API_VERSION and MVK_PRIVATE_API_VERSION to 38. - Cube Demo support running a maximum number of frames. --- Demos/Cube/Cube.xcodeproj/project.pbxproj | 2 +- Demos/Cube/iOS/DemoViewController.m | 19 +- Demos/Cube/macOS/DemoViewController.m | 64 ++- Docs/Whats_New.md | 3 + MoltenVK/MoltenVK/API/mvk_config.h | 13 +- MoltenVK/MoltenVK/API/mvk_private_api.h | 34 +- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 3 - .../MoltenVK/Commands/MVKCommandBuffer.mm | 23 +- MoltenVK/MoltenVK/Commands/MVKCommandPool.h | 2 +- MoltenVK/MoltenVK/Commands/MVKCommandPool.mm | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 39 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 305 +++++++++----- MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 23 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 109 +++-- MoltenVK/MoltenVK/GPUObjects/MVKInstance.h | 1 - MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm | 93 +--- .../MoltenVK/GPUObjects/MVKPixelFormats.mm | 21 +- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 67 +-- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 371 ++++++++++------ MoltenVK/MoltenVK/GPUObjects/MVKSurface.h | 19 +- MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm | 43 +- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h | 28 +- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm | 398 +++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKSync.h | 10 +- MoltenVK/MoltenVK/GPUObjects/MVKSync.mm | 5 + MoltenVK/MoltenVK/Utility/MVKBaseObject.h | 33 +- MoltenVK/MoltenVK/Utility/MVKBaseObject.mm | 74 ++-- MoltenVK/MoltenVK/Utility/MVKFoundation.cpp | 38 ++ MoltenVK/MoltenVK/Utility/MVKFoundation.h | 5 +- MoltenVK/MoltenVK/Utility/MVKLogging.h | 24 +- Scripts/runcts | 2 +- 31 files changed, 1072 insertions(+), 803 deletions(-) diff --git a/Demos/Cube/Cube.xcodeproj/project.pbxproj b/Demos/Cube/Cube.xcodeproj/project.pbxproj index 44654e514..66fb7d927 100644 --- a/Demos/Cube/Cube.xcodeproj/project.pbxproj +++ b/Demos/Cube/Cube.xcodeproj/project.pbxproj @@ -3,7 +3,7 @@ archiveVersion = 1; classes = { }; - objectVersion = 52; + objectVersion = 54; objects = { /* Begin PBXBuildFile section */ diff --git a/Demos/Cube/iOS/DemoViewController.m b/Demos/Cube/iOS/DemoViewController.m index 9256927a9..30112a2cf 100644 --- a/Demos/Cube/iOS/DemoViewController.m +++ b/Demos/Cube/iOS/DemoViewController.m @@ -30,15 +30,9 @@ @implementation DemoViewController { struct demo demo; } --(void) dealloc { - demo_cleanup(&demo); - [_displayLink release]; - [super dealloc]; -} - -/** Since this is a single-view app, init Vulkan when the view is loaded. */ --(void) viewDidLoad { - [super viewDidLoad]; +/** Since this is a single-view app, initialize Vulkan as view is appearing. */ +-(void) viewWillAppear: (BOOL) animated { + [super viewWillAppear: animated]; self.view.contentScaleFactor = UIScreen.mainScreen.nativeScale; @@ -68,6 +62,13 @@ -(void)viewWillTransitionToSize:(CGSize)size withTransitionCoordinator:(id)coord demo_resize(&demo); } +-(void) viewDidDisappear: (BOOL) animated { + [_displayLink invalidate]; + [_displayLink release]; + demo_cleanup(&demo); + [super viewDidDisappear: animated]; +} + @end diff --git a/Demos/Cube/macOS/DemoViewController.m b/Demos/Cube/macOS/DemoViewController.m index d8468bdc3..76dc4d92c 100644 --- a/Demos/Cube/macOS/DemoViewController.m +++ b/Demos/Cube/macOS/DemoViewController.m @@ -18,6 +18,7 @@ #import "DemoViewController.h" #import +#import #include #include "../../Vulkan-Tools/cube/cube.c" @@ -27,27 +28,34 @@ #pragma mark DemoViewController @implementation DemoViewController { - CVDisplayLinkRef _displayLink; + CVDisplayLinkRef _displayLink; struct demo demo; + uint32_t _maxFrameCount; + uint64_t _frameCount; + BOOL _stop; + BOOL _useDisplayLink; } --(void) dealloc { - demo_cleanup(&demo); - CVDisplayLinkRelease(_displayLink); - [super dealloc]; -} - -/** Since this is a single-view app, initialize Vulkan during view loading. */ --(void) viewDidLoad { - [super viewDidLoad]; +/** Since this is a single-view app, initialize Vulkan as view is appearing. */ +-(void) viewWillAppear { + [super viewWillAppear]; self.view.wantsLayer = YES; // Back the view with a layer created by the makeBackingLayer method. - // Enabling this will sync the rendering loop with the natural display link (60 fps). - // Disabling this will allow the rendering loop to run flat out, limited only by the rendering speed. - bool useDisplayLink = true; + // Enabling this will sync the rendering loop with the natural display link + // (monitor refresh rate, typically 60 fps). Disabling this will allow the + // rendering loop to run flat out, limited only by the rendering speed. + _useDisplayLink = YES; - VkPresentModeKHR vkPresentMode = useDisplayLink ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR; + // If this value is set to zero, the demo will render frames until the window is closed. + // If this value is not zero, it establishes a maximum number of frames that will be + // rendered, and once this count has been reached, the demo will stop rendering. + // Once rendering is finished, if _useDisplayLink is false, the demo will immediately + // clean up the Vulkan objects, or if _useDisplayLink is true, the demo will delay + // cleaning up Vulkan objects until the window is closed. + _maxFrameCount = 0; + + VkPresentModeKHR vkPresentMode = _useDisplayLink ? VK_PRESENT_MODE_FIFO_KHR : VK_PRESENT_MODE_IMMEDIATE_KHR; char vkPresentModeStr[64]; sprintf(vkPresentModeStr, "%d", vkPresentMode); @@ -55,19 +63,33 @@ -(void) viewDidLoad { int argc = sizeof(argv)/sizeof(char*); demo_main(&demo, self.view.layer, argc, argv); - if (useDisplayLink) { + _stop = NO; + _frameCount = 0; + if (_useDisplayLink) { CVDisplayLinkCreateWithActiveCGDisplays(&_displayLink); - CVDisplayLinkSetOutputCallback(_displayLink, &DisplayLinkCallback, &demo); + CVDisplayLinkSetOutputCallback(_displayLink, &DisplayLinkCallback, self); CVDisplayLinkStart(_displayLink); } else { dispatch_async(dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0), ^{ - while(true) { + do { demo_draw(&demo); - } + _stop = _stop || (_maxFrameCount && ++_frameCount >= _maxFrameCount); + } while( !_stop ); + demo_cleanup(&demo); }); } } +-(void) viewDidDisappear { + _stop = YES; + if (_useDisplayLink) { + CVDisplayLinkRelease(_displayLink); + demo_cleanup(&demo); + } + + [super viewDidDisappear]; +} + #pragma mark Display loop callback function @@ -78,7 +100,11 @@ static CVReturn DisplayLinkCallback(CVDisplayLinkRef displayLink, CVOptionFlags flagsIn, CVOptionFlags* flagsOut, void* target) { - demo_draw((struct demo*)target); + DemoViewController* demoVC =(DemoViewController*)target; + if ( !demoVC->_stop ) { + demo_draw(&demoVC->demo); + demoVC->_stop = (demoVC->_maxFrameCount && ++demoVC->_frameCount >= demoVC->_maxFrameCount); + } return kCVReturnSuccess; } diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 82e9b1558..3e476dd3d 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -20,6 +20,9 @@ Released TBD - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. +- Improve behavior of swapchain image presentation stalls caused by Metal regression. +- Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. +- Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index 360007e16..532d27db4 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -51,7 +51,7 @@ extern "C" { #define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH) -#define MVK_CONFIGURATION_API_VERSION 37 +#define MVK_CONFIGURATION_API_VERSION 38 /** Identifies the level of logging MoltenVK should be limited to outputting. */ typedef enum MVKConfigLogLevel { @@ -138,10 +138,11 @@ typedef enum MVKConfigCompressionAlgorithm { /** Identifies the style of activity performance logging to use. */ typedef enum MVKConfigActivityPerformanceLoggingStyle { - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT = 0, /**< Repeatedly log performance after a configured number of frames. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE = 1, /**< Log immediately after each performance measurement. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2, /**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */ - MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM = 0x7FFFFFFF, + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT = 0, /**< Repeatedly log performance after a configured number of frames. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE = 1, /**< Log immediately after each performance measurement. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME = 2, /**< Log at the end of the VkDevice lifetime. This is useful for one-shot apps such as testing frameworks. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE = 3, /**< Log at the end of the VkDevice lifetime, but continue to accumulate across mulitiple VkDevices throughout the app process. This is useful for testing frameworks that create many VkDevices serially. */ + MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_MAX_ENUM = 0x7FFFFFFF, } MVKConfigActivityPerformanceLoggingStyle; /** @@ -786,6 +787,8 @@ typedef struct { /** * Controls when MoltenVK should log activity performance events. * + * The performanceTracking parameter must also be enabled. + * * The value of this parameter must be changed before creating a VkDevice, * for the change to take effect. * diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index 87bc8ad99..e496fc5ab 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -44,7 +44,7 @@ typedef unsigned long MTLArgumentBuffersTier; */ -#define MVK_PRIVATE_API_VERSION 37 +#define MVK_PRIVATE_API_VERSION 38 /** Identifies the type of rounding Metal uses for float to integer conversions in particular calculatons. */ @@ -153,13 +153,16 @@ typedef struct { VkDeviceSize hostMemoryPageSize; /**< The size of a page of host memory on this platform. */ } MVKPhysicalDeviceMetalFeatures; -/** MoltenVK performance of a particular type of activity. */ +/** + * MoltenVK performance of a particular type of activity. + * Durations are recorded in milliseconds. Memory sizes are recorded in kilobytes. + */ typedef struct { - uint32_t count; /**< The number of activities of this type. */ - double latestDuration; /**< The latest (most recent) duration of the activity, in milliseconds. */ - double averageDuration; /**< The average duration of the activity, in milliseconds. */ - double minimumDuration; /**< The minimum duration of the activity, in milliseconds. */ - double maximumDuration; /**< The maximum duration of the activity, in milliseconds. */ + uint32_t count; /**< The number of activities of this type. */ + double latest; /**< The latest (most recent) value of the activity. */ + double average; /**< The average value of the activity. */ + double minimum; /**< The minimum value of the activity. */ + double maximum; /**< The maximum value of the activity. */ } MVKPerformanceTracker; /** MoltenVK performance of shader compilation activities. */ @@ -186,12 +189,20 @@ typedef struct { /** MoltenVK performance of queue activities. */ typedef struct { - MVKPerformanceTracker mtlQueueAccess; /** Create an MTLCommandQueue or access an existing cached instance. */ - MVKPerformanceTracker mtlCommandBufferCompletion; /** Completion of a MTLCommandBuffer on the GPU, from commit to completion callback. */ - MVKPerformanceTracker nextCAMetalDrawable; /** Retrieve next CAMetalDrawable from CAMetalLayer during presentation. */ - MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS). */ + MVKPerformanceTracker retrieveMTLCommandBuffer; /** Retrieve a MTLCommandBuffer from a MTLQueue. */ + MVKPerformanceTracker commandBufferEncoding; /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling). */ + MVKPerformanceTracker submitCommandBuffers; /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding). */ + MVKPerformanceTracker mtlCommandBufferExecution; /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback. */ + MVKPerformanceTracker retrieveCAMetalDrawable; /** Retrieve next CAMetalDrawable from a CAMetalLayer. */ + MVKPerformanceTracker presentSwapchains; /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback. */ + MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS). */ } MVKQueuePerformance; +/** MoltenVK performance of device activities. */ +typedef struct { + MVKPerformanceTracker gpuMemoryAllocated; /** GPU memory allocated (in KB). */ +} MVKDevicePerformance; + /** * MoltenVK performance. You can retrieve a copy of this structure using the vkGetPerformanceStatisticsMVK() function. * @@ -209,6 +220,7 @@ typedef struct { MVKShaderCompilationPerformance shaderCompilation; /** Shader compilations activities. */ MVKPipelineCachePerformance pipelineCache; /** Pipeline cache activities. */ MVKQueuePerformance queue; /** Queue activities. */ + MVKDevicePerformance device; /** Device activities. */ } MVKPerformanceStatistics; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 76274dad8..59242aff8 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -536,9 +536,6 @@ class MVKCommandEncoder : public MVKBaseDeviceObject { #pragma mark - #pragma mark Support functions -/** Returns a name, suitable for use as a MTLCommandBuffer label, based on the MVKCommandUse. */ -NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse); - /** Returns a name, suitable for use as a MTLRenderCommandEncoder label, based on the MVKCommandUse. */ NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 5edc13aaa..4e0af4145 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -120,7 +120,7 @@ if(_device->shouldPrefillMTLCommandBuffers() && !(_isSecondary || _supportsConcurrentExecution)) { @autoreleasepool { - _prefilledMTLCmdBuffer = [_commandPool->getMTLCommandBuffer(0) retain]; // retained + _prefilledMTLCmdBuffer = [_commandPool->getMTLCommandBuffer(kMVKCommandUseBeginCommandBuffer, 0) retain]; // retained auto prefillStyle = mvkConfig().prefillMetalCommandBuffers; if (prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING || prefillStyle == MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS_STYLE_IMMEDIATE_ENCODING_NO_AUTORELEASE ) { @@ -335,11 +335,19 @@ #pragma mark - #pragma mark MVKCommandEncoder +// Activity performance tracking is put here to deliberately exclude when +// MVKConfiguration::prefillMetalCommandBuffers is set to immediate prefilling, +// because that would include app time between command submissions. void MVKCommandEncoder::encode(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); + beginEncoding(mtlCmdBuff, pEncodingContext); encodeCommands(_cmdBuffer->_head); endEncoding(); + + mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime); } void MVKCommandEncoder::beginEncoding(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { @@ -1169,19 +1177,6 @@ #pragma mark - #pragma mark Support functions -NSString* mvkMTLCommandBufferLabel(MVKCommandUse cmdUse) { - switch (cmdUse) { - case kMVKCommandUseEndCommandBuffer: return @"vkEndCommandBuffer (Prefilled) CommandBuffer"; - case kMVKCommandUseQueueSubmit: return @"vkQueueSubmit CommandBuffer"; - case kMVKCommandUseQueuePresent: return @"vkQueuePresentKHR CommandBuffer"; - case kMVKCommandUseQueueWaitIdle: return @"vkQueueWaitIdle CommandBuffer"; - case kMVKCommandUseDeviceWaitIdle: return @"vkDeviceWaitIdle CommandBuffer"; - case kMVKCommandUseAcquireNextImage: return @"vkAcquireNextImageKHR CommandBuffer"; - case kMVKCommandUseInvalidateMappedMemoryRanges: return @"vkInvalidateMappedMemoryRanges CommandBuffer"; - default: return @"Unknown Use CommandBuffer"; - } -} - NSString* mvkMTLRenderCommandEncoderLabel(MVKCommandUse cmdUse) { switch (cmdUse) { case kMVKCommandUseBeginRendering: return @"vkCmdBeginRendering RenderEncoder"; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h index f2cf1e66e..a6b1a38b3 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.h @@ -82,7 +82,7 @@ class MVKCommandPool : public MVKVulkanAPIDeviceObject { * Returns a retained MTLCommandBuffer created from the indexed queue * within the queue family for which this command pool was created. */ - id getMTLCommandBuffer(uint32_t queueIndex); + id getMTLCommandBuffer(MVKCommandUse cmdUse, uint32_t queueIndex); /** Release any held but unused memory back to the system. */ void trim(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm b/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm index be4713f36..656740b09 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandPool.mm @@ -77,8 +77,8 @@ } } -id MVKCommandPool::getMTLCommandBuffer(uint32_t queueIndex) { - return _device->getQueue(_queueFamilyIndex, queueIndex)->getMTLCommandBuffer(kMVKCommandUseEndCommandBuffer, true); +id MVKCommandPool::getMTLCommandBuffer(MVKCommandUse cmdUse, uint32_t queueIndex) { + return _device->getQueue(_queueFamilyIndex, queueIndex)->getMTLCommandBuffer(cmdUse, true); } // Clear the command type pool member variables. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 450fad661..5d4c328be 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -53,7 +53,6 @@ class MVKSemaphore; class MVKTimelineSemaphore; class MVKDeferredOperation; class MVKEvent; -class MVKSemaphoreImpl; class MVKQueryPool; class MVKShaderModule; class MVKPipelineCache; @@ -440,6 +439,11 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { #pragma mark - #pragma mark MVKDevice +typedef enum { + MVKActivityPerformanceValueTypeDuration, + MVKActivityPerformanceValueTypeByteCount, +} MVKActivityPerformanceValueType; + typedef struct MVKMTLBlitEncoder { id mtlBlitEncoder = nil; id mtlCmdBuffer = nil; @@ -704,13 +708,17 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { void addActivityPerformance(MVKPerformanceTracker& activityTracker, uint64_t startTime, uint64_t endTime = 0) { if (_isPerformanceTracking) { - updateActivityPerformance(activityTracker, startTime, endTime); + updateActivityPerformance(activityTracker, mvkGetElapsedMilliseconds(startTime, endTime)); + } + }; - // Log call not locked. Very minor chance that the tracker data will be updated during log call, - // resulting in an inconsistent report. Not worth taking lock perf hit for rare inline reporting. - if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { - logActivityPerformance(activityTracker, _performanceStatistics, true); - } + /** + * If performance is being tracked, adds the performance for an activity + * with a kilobyte count, to the given performance statistics. + */ + void addActivityByteCount(MVKPerformanceTracker& activityTracker, uint64_t byteCount) { + if (_isPerformanceTracking) { + updateActivityPerformance(activityTracker, double(byteCount / KIBI)); } }; @@ -885,8 +893,11 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { template void enableFeatures(S* pRequested, VkBool32* pEnabledBools, const VkBool32* pRequestedBools, const VkBool32* pAvailableBools, uint32_t count); void enableExtensions(const VkDeviceCreateInfo* pCreateInfo); const char* getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); - void logActivityPerformance(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); - void updateActivityPerformance(MVKPerformanceTracker& activity, uint64_t startTime, uint64_t endTime); + MVKActivityPerformanceValueType getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); + void logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); + void logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); + void logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); + void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); void getDescriptorVariableDescriptorCountLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport, VkDescriptorSetVariableDescriptorCountLayoutSupport* pVarDescSetCountSupport); @@ -908,7 +919,6 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { id _defaultMTLSamplerState = nil; id _dummyBlitMTLBuffer = nil; uint32_t _globalVisibilityQueryCount = 0; - MVKConfigActivityPerformanceLoggingStyle _activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; bool _isPerformanceTracking = false; bool _isCurrentlyAutoGPUCapturing = false; bool _isUsingMetalArgumentBuffers = false; @@ -1056,6 +1066,15 @@ class MVKDeviceObjectPool : public MVKObjectPool, public MVKDeviceTrackingMix #pragma mark - #pragma mark Support functions +/** + * Returns an autoreleased array containing the MTLDevices available on this system, + * sorted according to power, with higher power GPU's at the front of the array. + * This ensures that a lazy app that simply grabs the first GPU will get a high-power + * one by default. If MVKConfiguration::forceLowPowerGPU is enabled, the returned + * array will only include low-power devices. + */ +NSArray>* mvkGetAvailableMTLDevicesArray(); + /** Returns the registry ID of the specified device, or zero if the device does not have a registry ID. */ uint64_t mvkGetRegistryID(id mtlDevice); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 0b11d4dfe..d44e64929 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -3056,32 +3056,23 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope } } +// If possible, retrieve from the MTLDevice, otherwise from available memory size, or a fixed conservative estimate. uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() { -#if MVK_MACOS +#if MVK_XCODE_14 || MVK_MACOS if ( [_mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) { return _mtlDevice.recommendedMaxWorkingSetSize; } #endif -#if MVK_IOS_OR_TVOS - // GPU and CPU use shared memory. Estimate the current free memory in the system. uint64_t freeMem = mvkGetAvailableMemorySize(); - if (freeMem) { return freeMem; } -#endif - - return 128 * MEBI; // Conservative minimum for macOS GPU's & iOS shared memory + return freeMem ? freeMem : 256 * MEBI; } +// If possible, retrieve from the MTLDevice, otherwise use the current memory used by this process. uint64_t MVKPhysicalDevice::getCurrentAllocatedSize() { if ( [_mtlDevice respondsToSelector: @selector(currentAllocatedSize)] ) { return _mtlDevice.currentAllocatedSize; } -#if MVK_IOS_OR_TVOS - // We can use the current memory used by this process as a reasonable approximation. return mvkGetUsedMemorySize(); -#endif -#if MVK_MACOS - return 0; -#endif } // When using argument buffers, Metal imposes a hard limit on the number of MTLSamplerState @@ -3249,31 +3240,14 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope } void MVKPhysicalDevice::logGPUInfo() { - string devTypeStr; - switch (_properties.deviceType) { - case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: - devTypeStr = "Discrete"; - break; - case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: - devTypeStr = "Integrated"; - break; - case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: - devTypeStr = "Virtual"; - break; - case VK_PHYSICAL_DEVICE_TYPE_CPU: - devTypeStr = "CPU Emulation"; - break; - default: - devTypeStr = "Unknown"; - break; - } - string logMsg = "GPU device:"; logMsg += "\n\t\tmodel: %s"; logMsg += "\n\t\ttype: %s"; logMsg += "\n\t\tvendorID: %#06x"; logMsg += "\n\t\tdeviceID: %#06x"; logMsg += "\n\t\tpipelineCacheUUID: %s"; + logMsg += "\n\t\tGPU memory available: %llu MB"; + logMsg += "\n\t\tGPU memory used: %llu MB"; logMsg += "\n\tsupports the following Metal Versions, GPU's and Feature Sets:"; logMsg += "\n\t\tMetal Shading Language %s"; @@ -3356,9 +3330,29 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope } #endif + string devTypeStr; + switch (_properties.deviceType) { + case VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU: + devTypeStr = "Discrete"; + break; + case VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU: + devTypeStr = "Integrated"; + break; + case VK_PHYSICAL_DEVICE_TYPE_VIRTUAL_GPU: + devTypeStr = "Virtual"; + break; + case VK_PHYSICAL_DEVICE_TYPE_CPU: + devTypeStr = "CPU Emulation"; + break; + default: + devTypeStr = "Unknown"; + break; + } + NSUUID* nsUUID = [[NSUUID alloc] initWithUUIDBytes: _properties.pipelineCacheUUID]; // temp retain - MVKLogInfo(logMsg.c_str(), _properties.deviceName, devTypeStr.c_str(), + MVKLogInfo(logMsg.c_str(), getName(), devTypeStr.c_str(), _properties.vendorID, _properties.deviceID, nsUUID.UUIDString.UTF8String, + getRecommendedMaxWorkingSetSize() / MEBI, getCurrentAllocatedSize() / MEBI, SPIRVToMSLConversionOptions::printMSLVersion(_metalFeatures.mslVersion).c_str()); [nsUUID release]; // temp release } @@ -3366,7 +3360,11 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope MVKPhysicalDevice::~MVKPhysicalDevice() { mvkDestroyContainerContents(_queueFamilies); [_timestampMTLCounterSet release]; + + uint64_t memUsed = getCurrentAllocatedSize(); // Retrieve before releasing MTLDevice [_mtlDevice release]; + + MVKLogInfo("Destroyed VkPhysicalDevice for GPU %s with %llu MB of GPU memory still allocated.", getName(), memUsed / MEBI); } @@ -4185,30 +4183,52 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope } } -void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, - uint64_t startTime, uint64_t endTime) { - - double currInterval = mvkGetElapsedMilliseconds(startTime, endTime); +void MVKDevice::updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue) { lock_guard lock(_perfLock); - activity.latestDuration = currInterval; - activity.minimumDuration = ((activity.minimumDuration == 0.0) - ? currInterval : - min(currInterval, activity.minimumDuration)); - activity.maximumDuration = max(currInterval, activity.maximumDuration); - double totalInterval = (activity.averageDuration * activity.count++) + currInterval; - activity.averageDuration = totalInterval / activity.count; + activity.latest = currentValue; + activity.minimum = ((activity.minimum == 0.0) + ? currentValue : + min(currentValue, activity.minimum)); + activity.maximum = max(currentValue, activity.maximum); + double total = (activity.average * activity.count++) + currentValue; + activity.average = total / activity.count; + + if (mvkConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { + logActivityInline(activity, _performanceStatistics); + } } -void MVKDevice::logActivityPerformance(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { - MVKLogInfo("%s%s%s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d", - (isInline ? "" : " "), +void MVKDevice::logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { + if (getActivityPerformanceValueType(activity, _performanceStatistics) == MVKActivityPerformanceValueTypeByteCount) { + logActivityByteCount(activity, _performanceStatistics, true); + } else { + logActivityDuration(activity, _performanceStatistics, true); + } +} +void MVKDevice::logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { + const char* fmt = (isInline + ? "%s performance avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d" + : " %-45s avg: %.3f ms, latest: %.3f ms, min: %.3f ms, max: %.3f ms, count: %d"); + MVKLogInfo(fmt, getActivityPerformanceDescription(activity, perfStats), - (isInline ? " performance" : ""), - activity.averageDuration, - activity.latestDuration, - activity.minimumDuration, - activity.maximumDuration, + activity.average, + activity.latest, + activity.minimum, + activity.maximum, + activity.count); +} + +void MVKDevice::logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline) { + const char* fmt = (isInline + ? "%s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d" + : " %-45s avg: %5llu MB, latest: %5llu MB, min: %5llu MB, max: %5llu MB, count: %d"); + MVKLogInfo(fmt, + getActivityPerformanceDescription(activity, perfStats), + uint64_t(activity.average) / KIBI, + uint64_t(activity.latest) / KIBI, + uint64_t(activity.minimum) / KIBI, + uint64_t(activity.maximum) / KIBI, activity.count); } @@ -4218,49 +4238,71 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope MVKPerformanceStatistics perfStats; getPerformanceStatistics(&perfStats); - logActivityPerformance(perfStats.queue.frameInterval, perfStats); - logActivityPerformance(perfStats.queue.nextCAMetalDrawable, perfStats); - logActivityPerformance(perfStats.queue.mtlCommandBufferCompletion, perfStats); - logActivityPerformance(perfStats.queue.mtlQueueAccess, perfStats); - logActivityPerformance(perfStats.shaderCompilation.hashShaderCode, perfStats); - logActivityPerformance(perfStats.shaderCompilation.spirvToMSL, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslCompile, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslLoad, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslCompress, perfStats); - logActivityPerformance(perfStats.shaderCompilation.mslDecompress, perfStats); - logActivityPerformance(perfStats.shaderCompilation.shaderLibraryFromCache, perfStats); - logActivityPerformance(perfStats.shaderCompilation.functionRetrieval, perfStats); - logActivityPerformance(perfStats.shaderCompilation.functionSpecialization, perfStats); - logActivityPerformance(perfStats.shaderCompilation.pipelineCompile, perfStats); - logActivityPerformance(perfStats.pipelineCache.sizePipelineCache, perfStats); - logActivityPerformance(perfStats.pipelineCache.readPipelineCache, perfStats); - logActivityPerformance(perfStats.pipelineCache.writePipelineCache, perfStats); +#define logDuration(s) logActivityDuration(perfStats.s, perfStats) +#define logByteCount(s) logActivityByteCount(perfStats.s, perfStats) + + logDuration(queue.frameInterval); + logDuration(queue.retrieveMTLCommandBuffer); + logDuration(queue.commandBufferEncoding); + logDuration(queue.submitCommandBuffers); + logDuration(queue.mtlCommandBufferExecution); + logDuration(queue.retrieveCAMetalDrawable); + logDuration(queue.presentSwapchains); + logDuration(shaderCompilation.hashShaderCode); + logDuration(shaderCompilation.spirvToMSL); + logDuration(shaderCompilation.mslCompile); + logDuration(shaderCompilation.mslLoad); + logDuration(shaderCompilation.mslCompress); + logDuration(shaderCompilation.mslDecompress); + logDuration(shaderCompilation.shaderLibraryFromCache); + logDuration(shaderCompilation.functionRetrieval); + logDuration(shaderCompilation.functionSpecialization); + logDuration(shaderCompilation.pipelineCompile); + logDuration(pipelineCache.sizePipelineCache); + logDuration(pipelineCache.readPipelineCache); + logDuration(pipelineCache.writePipelineCache); + logByteCount(device.gpuMemoryAllocated); +#undef logDuration +#undef logByteCount } const char* MVKDevice::getActivityPerformanceDescription(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { - if (&activity == &perfStats.shaderCompilation.hashShaderCode) { return "Hash shader SPIR-V code"; } - if (&activity == &perfStats.shaderCompilation.spirvToMSL) { return "Convert SPIR-V to MSL source code"; } - if (&activity == &perfStats.shaderCompilation.mslCompile) { return "Compile MSL source code into a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslLoad) { return "Load pre-compiled MSL code into a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslCompress) { return "Compress MSL source code after compiling a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.mslDecompress) { return "Decompress MSL source code during pipeline cache write"; } - if (&activity == &perfStats.shaderCompilation.shaderLibraryFromCache) { return "Retrieve shader library from the cache"; } - if (&activity == &perfStats.shaderCompilation.functionRetrieval) { return "Retrieve a MTLFunction from a MTLLibrary"; } - if (&activity == &perfStats.shaderCompilation.functionSpecialization) { return "Specialize a retrieved MTLFunction"; } - if (&activity == &perfStats.shaderCompilation.pipelineCompile) { return "Compile MTLFunctions into a pipeline"; } - if (&activity == &perfStats.pipelineCache.sizePipelineCache) { return "Calculate cache size required to write MSL to pipeline cache"; } - if (&activity == &perfStats.pipelineCache.readPipelineCache) { return "Read MSL from pipeline cache"; } - if (&activity == &perfStats.pipelineCache.writePipelineCache) { return "Write MSL to pipeline cache"; } - if (&activity == &perfStats.queue.mtlQueueAccess) { return "Access MTLCommandQueue"; } - if (&activity == &perfStats.queue.mtlCommandBufferCompletion) { return "Complete MTLCommandBuffer"; } - if (&activity == &perfStats.queue.nextCAMetalDrawable) { return "Retrieve a CAMetalDrawable from CAMetalLayer"; } - if (&activity == &perfStats.queue.frameInterval) { return "Frame interval"; } - return "Unknown performance activity"; +#define ifActivityReturnName(s, n) if (&activity == &perfStats.s) return n + ifActivityReturnName(shaderCompilation.hashShaderCode, "Hash shader SPIR-V code"); + ifActivityReturnName(shaderCompilation.spirvToMSL, "Convert SPIR-V to MSL source code"); + ifActivityReturnName(shaderCompilation.mslCompile, "Compile MSL into a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslLoad, "Load pre-compiled MSL into a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslCompress, "Compress MSL after compiling a MTLLibrary"); + ifActivityReturnName(shaderCompilation.mslDecompress, "Decompress MSL for pipeline cache write"); + ifActivityReturnName(shaderCompilation.shaderLibraryFromCache, "Retrieve shader library from the cache"); + ifActivityReturnName(shaderCompilation.functionRetrieval, "Retrieve a MTLFunction from a MTLLibrary"); + ifActivityReturnName(shaderCompilation.functionSpecialization, "Specialize a retrieved MTLFunction"); + ifActivityReturnName(shaderCompilation.pipelineCompile, "Compile MTLFunctions into a pipeline"); + ifActivityReturnName(pipelineCache.sizePipelineCache, "Calculate pipeline cache size"); + ifActivityReturnName(pipelineCache.readPipelineCache, "Read MSL from pipeline cache"); + ifActivityReturnName(pipelineCache.writePipelineCache, "Write MSL to pipeline cache"); + ifActivityReturnName(queue.retrieveMTLCommandBuffer, "Retrieve a MTLCommandBuffer"); + ifActivityReturnName(queue.commandBufferEncoding, "Encode VkCommandBuffer to MTLCommandBuffer"); + ifActivityReturnName(queue.submitCommandBuffers, "vkQueueSubmit() encoding to MTLCommandBuffers"); + ifActivityReturnName(queue.mtlCommandBufferExecution, "Execute a MTLCommandBuffer on GPU"); + ifActivityReturnName(queue.retrieveCAMetalDrawable, "Retrieve a CAMetalDrawable"); + ifActivityReturnName(queue.presentSwapchains, "Present swapchains in on GPU"); + ifActivityReturnName(queue.frameInterval, "Frame interval"); + ifActivityReturnName(device.gpuMemoryAllocated, "GPU memory allocated"); + return "Unknown performance activity"; +#undef ifActivityReturnName +} + +MVKActivityPerformanceValueType MVKDevice::getActivityPerformanceValueType(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats) { + if (&activity == &perfStats.device.gpuMemoryAllocated) return MVKActivityPerformanceValueTypeByteCount; + return MVKActivityPerformanceValueTypeDuration; } void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) { - lock_guard lock(_perfLock); + addActivityByteCount(_performanceStatistics.device.gpuMemoryAllocated, + _physicalDevice->getCurrentAllocatedSize()); + lock_guard lock(_perfLock); if (pPerf) { *pPerf = _performanceStatistics; } } @@ -4597,33 +4639,15 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE, getMTLDevice()); MVKLogInfo("Created VkDevice to run on GPU %s with the following %d Vulkan extensions enabled:%s", - _pProperties->deviceName, - _enabledExtensions.getEnabledCount(), - _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); + getName(), _enabledExtensions.getEnabledCount(), _enabledExtensions.enabledNamesString("\n\t\t", true).c_str()); } -void MVKDevice::initPerformanceTracking() { +// Perf stats that last the duration of the app process. +static MVKPerformanceStatistics _processPerformanceStatistics = {}; +void MVKDevice::initPerformanceTracking() { _isPerformanceTracking = mvkConfig().performanceTracking; - _activityPerformanceLoggingStyle = mvkConfig().activityPerformanceLoggingStyle; - - _performanceStatistics.shaderCompilation.hashShaderCode = {}; - _performanceStatistics.shaderCompilation.spirvToMSL = {}; - _performanceStatistics.shaderCompilation.mslCompile = {}; - _performanceStatistics.shaderCompilation.mslLoad = {}; - _performanceStatistics.shaderCompilation.mslCompress = {}; - _performanceStatistics.shaderCompilation.mslDecompress = {}; - _performanceStatistics.shaderCompilation.shaderLibraryFromCache = {}; - _performanceStatistics.shaderCompilation.functionRetrieval = {}; - _performanceStatistics.shaderCompilation.functionSpecialization = {}; - _performanceStatistics.shaderCompilation.pipelineCompile = {}; - _performanceStatistics.pipelineCache.sizePipelineCache = {}; - _performanceStatistics.pipelineCache.writePipelineCache = {}; - _performanceStatistics.pipelineCache.readPipelineCache = {}; - _performanceStatistics.queue.mtlQueueAccess = {}; - _performanceStatistics.queue.mtlCommandBufferCompletion = {}; - _performanceStatistics.queue.nextCAMetalDrawable = {}; - _performanceStatistics.queue.frameInterval = {}; + _performanceStatistics = _processPerformanceStatistics; } void MVKDevice::initPhysicalDevice(MVKPhysicalDevice* physicalDevice, const VkDeviceCreateInfo* pCreateInfo) { @@ -4920,9 +4944,16 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope } MVKDevice::~MVKDevice() { - if (_activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) { - MVKLogInfo("Device activity performance summary:"); - logPerformanceSummary(); + if (_isPerformanceTracking) { + auto perfLogStyle = mvkConfig().activityPerformanceLoggingStyle; + if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME) { + MVKLogInfo("Device activity performance summary:"); + logPerformanceSummary(); + } else if (perfLogStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_DEVICE_LIFETIME_ACCUMULATE) { + MVKLogInfo("Process activity performance summary:"); + logPerformanceSummary(); + _processPerformanceStatistics = _performanceStatistics; + } } for (auto& queues : _queuesByQueueFamilyIndex) { @@ -4938,12 +4969,58 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_DEVICE); mvkDestroyContainerContents(_privateDataSlots); + + MVKLogInfo("Destroyed VkDevice on GPU %s with %d Vulkan extensions enabled.", + getName(), _enabledExtensions.getEnabledCount()); } #pragma mark - #pragma mark Support functions +NSArray>* mvkGetAvailableMTLDevicesArray() { + NSMutableArray* mtlDevs = [NSMutableArray array]; // autoreleased + +#if MVK_MACOS + NSArray* rawMTLDevs = [MTLCopyAllDevices() autorelease]; + bool forceLowPower = mvkConfig().forceLowPowerGPU; + + // Populate the array of appropriate MTLDevices + for (id md in rawMTLDevs) { + if ( !forceLowPower || md.isLowPower ) { [mtlDevs addObject: md]; } + } + + // Sort by power + [mtlDevs sortUsingComparator: ^(id md1, id md2) { + BOOL md1IsLP = md1.isLowPower; + BOOL md2IsLP = md2.isLowPower; + + if (md1IsLP == md2IsLP) { + // If one device is headless and the other one is not, select the + // one that is not headless first. + BOOL md1IsHeadless = md1.isHeadless; + BOOL md2IsHeadless = md2.isHeadless; + if (md1IsHeadless == md2IsHeadless ) { + return NSOrderedSame; + } + return md2IsHeadless ? NSOrderedAscending : NSOrderedDescending; + } + + return md2IsLP ? NSOrderedAscending : NSOrderedDescending; + }]; + + // If the survey found at least one device, return the array. + if (mtlDevs.count) { return mtlDevs; } + +#endif // MVK_MACOS + + // For other OS's, or for macOS if the survey returned empty, use the default device. + id md = [MTLCreateSystemDefaultDevice() autorelease]; + if (md) { [mtlDevs addObject: md]; } + + return mtlDevs; // retained +} + uint64_t mvkGetRegistryID(id mtlDevice) { return [mtlDevice respondsToSelector: @selector(registryID)] ? mtlDevice.registryID : 0; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 572e8f06a..fb7c3dfa4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -31,6 +31,7 @@ class MVKImage; class MVKImageView; class MVKSwapchain; +class MVKQueue; class MVKCommandEncoder; @@ -409,8 +410,8 @@ class MVKSwapchainImage : public MVKImage { virtual id getCAMetalDrawable() = 0; void detachSwapchain(); + std::mutex _detachmentLock; MVKSwapchain* _swapchain; - std::mutex _swapchainLock; uint32_t _swapchainIndex; }; @@ -429,6 +430,7 @@ typedef struct MVKSwapchainImageAvailability { /** Presentation info. */ typedef struct { MVKPresentableSwapchainImage* presentableImage; + MVKQueue* queue; // The queue on which the vkQueuePresentKHR() command was executed. MVKFence* fence; // VK_EXT_swapchain_maintenance1 fence signaled when resources can be destroyed uint64_t desiredPresentTime; // VK_GOOGLE_display_timing desired presentation time in nanoseconds uint32_t presentID; // VK_GOOGLE_display_timing presentID @@ -454,12 +456,22 @@ class MVKPresentableSwapchainImage : public MVKSwapchainImage { /** Presents the contained drawable to the OS. */ void presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); + /** Called when the presentation begins. */ + void beginPresentation(const MVKImagePresentInfo& presentInfo); + + /** Called via callback when the presentation completes. */ + void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); + + /** If this image is stuck in-flight, attempt to force it to complete. */ + void forcePresentationCompletion(); #pragma mark Construction MVKPresentableSwapchainImage(MVKDevice* device, const VkImageCreateInfo* pCreateInfo, MVKSwapchain* swapchain, uint32_t swapchainIndex); + void destroy() override; + ~MVKPresentableSwapchainImage() override; protected: @@ -471,15 +483,14 @@ class MVKPresentableSwapchainImage : public MVKSwapchainImage { MVKSwapchainImageAvailability getAvailability(); void makeAvailable(const MVKSwapchainSignaler& signaler); void makeAvailable(); - void acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); - void renderWatermark(id mtlCmdBuff); + VkResult acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); - id _mtlDrawable; - id _presentingMTLCmdBuff; + id _mtlDrawable = nil; MVKSwapchainImageAvailability _availability; MVKSmallVector _availabilitySignalers; - MVKSwapchainSignaler _preSignaler; + MVKSwapchainSignaler _preSignaler = {}; std::mutex _availabilityLock; + uint64_t _presentationStartTime = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 148702215..fa87643a3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -19,6 +19,7 @@ #include "MVKImage.h" #include "MVKQueue.h" #include "MVKSwapchain.h" +#include "MVKSurface.h" #include "MVKCommandBuffer.h" #include "MVKCmdDebug.h" #include "MVKFoundation.h" @@ -1192,8 +1193,9 @@ } void MVKSwapchainImage::detachSwapchain() { - lock_guard lock(_swapchainLock); + lock_guard lock(_detachmentLock); _swapchain = nullptr; + _device = nullptr; } void MVKSwapchainImage::destroy() { @@ -1245,7 +1247,7 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { unmarkAsTracked(signaler); } -void MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { +VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { lock_guard lock(_availabilityLock); // Upon acquisition, update acquisition ID immediately, to move it to the back of the chain, @@ -1256,18 +1258,21 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. releaseMetalDrawable(); + VkResult rslt = VK_SUCCESS; auto signaler = MVKSwapchainSignaler{fence, semaphore, semaphore ? semaphore->deferSignal() : 0}; if (_availability.isAvailable) { _availability.isAvailable = false; - // If signalling through a MTLEvent, and there's no command buffer presenting me, use an ephemeral MTLCommandBuffer. + // If signalling through a MTLEvent, signal through an ephemeral MTLCommandBuffer. // Another option would be to use MTLSharedEvent in MVKSemaphore, but that might // impose unacceptable performance costs to handle this particular case. @autoreleasepool { MVKSemaphore* mvkSem = signaler.semaphore; - id mtlCmdBuff = (mvkSem && mvkSem->isUsingCommandEncoding() - ? _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage) - : nil); + id mtlCmdBuff = nil; + if (mvkSem && mvkSem->isUsingCommandEncoding()) { + mtlCmdBuff = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage); + if ( !mtlCmdBuff ) { rslt = VK_ERROR_OUT_OF_POOL_MEMORY; } + } signal(signaler, mtlCmdBuff); [mtlCmdBuff commit]; } @@ -1277,17 +1282,20 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { _availabilitySignalers.push_back(signaler); } markAsTracked(signaler); + + return rslt; } id MVKPresentableSwapchainImage::getCAMetalDrawable() { - while ( !_mtlDrawable ) { - @autoreleasepool { // Reclaim auto-released drawable object before end of loop - uint64_t startTime = _device->getPerformanceTimestamp(); - - _mtlDrawable = [_swapchain->_mtlLayer.nextDrawable retain]; - if ( !_mtlDrawable ) { MVKLogError("CAMetalDrawable could not be acquired."); } - - _device->addActivityPerformance(_device->_performanceStatistics.queue.nextCAMetalDrawable, startTime); + if ( !_mtlDrawable ) { + @autoreleasepool { + uint32_t attemptCnt = _swapchain->getImageCount() * 2; // Attempt a resonable number of times + for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { + uint64_t startTime = _device->getPerformanceTimestamp(); + _mtlDrawable = [_swapchain->_surface->getCAMetalLayer().nextDrawable retain]; // retained + _device->addActivityPerformance(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); + } + if ( !_mtlDrawable ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt); } } } return _mtlDrawable; @@ -1299,22 +1307,20 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { MVKImagePresentInfo presentInfo) { lock_guard lock(_availabilityLock); - _swapchain->willPresentSurface(getMTLTexture(0), mtlCmdBuff); + _swapchain->renderWatermark(getMTLTexture(0), mtlCmdBuff); // According to Apple, it is more performant to call MTLDrawable present from within a // MTLCommandBuffer scheduled-handler than it is to call MTLCommandBuffer presentDrawable:. // But get current drawable now, intead of in handler, because a new drawable might be acquired by then. // Attach present handler before presenting to avoid race condition. id mtlDrwbl = getCAMetalDrawable(); + addPresentedHandler(mtlDrwbl, presentInfo); [mtlCmdBuff addScheduledHandler: ^(id mcb) { // Try to do any present mode transitions as late as possible in an attempt // to avoid visual disruptions on any presents already on the queue. if (presentInfo.presentMode != VK_PRESENT_MODE_MAX_ENUM_KHR) { mtlDrwbl.layer.displaySyncEnabledMVK = (presentInfo.presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); } - if (presentInfo.hasPresentTime) { - addPresentedHandler(mtlDrwbl, presentInfo); - } if (presentInfo.desiredPresentTime) { [mtlDrwbl presentAtTime: (double)presentInfo.desiredPresentTime * 1.0e-9]; } else { @@ -1362,34 +1368,45 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. void MVKPresentableSwapchainImage::addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo) { + beginPresentation(presentInfo); + #if !MVK_OS_SIMULATOR if ([mtlDrawable respondsToSelector: @selector(addPresentedHandler:)]) { - retain(); // Ensure this image is not destroyed while awaiting presentation - [mtlDrawable addPresentedHandler: ^(id drawable) { - // Since we're in a callback, it's possible that the swapchain has been released by now. - // Lock the swapchain, and test if it is present before doing anything with it. - lock_guard cblock(_swapchainLock); - if (_swapchain) { _swapchain->recordPresentTime(presentInfo, drawable.presentedTime * 1.0e9); } - release(); + [mtlDrawable addPresentedHandler: ^(id mtlDrwbl) { + endPresentation(presentInfo, mtlDrwbl.presentedTime * 1.0e9); }]; - return; - } + } else #endif + { + // If MTLDrawable.presentedTime/addPresentedHandler isn't supported, + // treat it as if the present happened when requested. + endPresentation(presentInfo); + } +} - // If MTLDrawable.presentedTime/addPresentedHandler isn't supported, - // treat it as if the present happened when requested. - // Since this function may be called in a callback, it's possible that - // the swapchain has been released by the time this function runs. - // Lock the swapchain, and test if it is present before doing anything with it. - lock_guard lock(_swapchainLock); - if (_swapchain) {_swapchain->recordPresentTime(presentInfo); } +// Ensure this image and the swapchain are not destroyed while awaiting presentation +void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& presentInfo) { + retain(); + _swapchain->beginPresentation(presentInfo); + presentInfo.queue->beginPresentation(presentInfo); + _presentationStartTime = getDevice()->getPerformanceTimestamp(); } -// Resets the MTLTexture and CAMetalDrawable underlying this image. +void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo, + uint64_t actualPresentTime) { + { // Scope to avoid deadlock if release() is run within detachment lock + // If I have become detached from the swapchain, it means the swapchain, and possibly the + // VkDevice, have been destroyed by the time of this callback, so do not reference them. + lock_guard lock(_detachmentLock); + if (_device) { _device->addActivityPerformance(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } + if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } + } + presentInfo.queue->endPresentation(presentInfo); + release(); +} + +// Releases the CAMetalDrawable underlying this image. void MVKPresentableSwapchainImage::releaseMetalDrawable() { - for (uint8_t planeIndex = 0; planeIndex < _planes.size(); ++planeIndex) { - _planes[planeIndex]->releaseMTLTexture(); - } [_mtlDrawable release]; _mtlDrawable = nil; } @@ -1417,6 +1434,13 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { } } +// Clear the existing CAMetalDrawable and retrieve and release a new transient one, +// in an attempt to trigger the existing CAMetalDrawable to complete it's callback. +void MVKPresentableSwapchainImage::forcePresentationCompletion() { + releaseMetalDrawable(); + if (_swapchain) { @autoreleasepool { [_swapchain->_surface->getCAMetalLayer() nextDrawable]; } } +} + #pragma mark Construction @@ -1426,11 +1450,14 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { uint32_t swapchainIndex) : MVKSwapchainImage(device, pCreateInfo, swapchain, swapchainIndex) { - _mtlDrawable = nil; - _availability.acquisitionID = _swapchain->getNextAcquisitionID(); _availability.isAvailable = true; - _preSignaler = MVKSwapchainSignaler{nullptr, nullptr, 0}; +} + + +void MVKPresentableSwapchainImage::destroy() { + forcePresentationCompletion(); + MVKSwapchainImage::destroy(); } // Unsignaled signalers will exist if this image is acquired more than it is presented. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h index 9e41ac71c..aa831845c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.h @@ -179,7 +179,6 @@ class MVKInstance : public MVKDispatchableVulkanAPIObject { void propagateDebugName() override {} void initProcAddrs(); void initDebugCallbacks(const VkInstanceCreateInfo* pCreateInfo); - NSArray>* getAvailableMTLDevicesArray(); VkDebugReportFlagsEXT getVkDebugReportFlagsFromLogLevel(MVKConfigLogLevel logLevel); VkDebugUtilsMessageSeverityFlagBitsEXT getVkDebugUtilsMessageSeverityFlagBitsFromLogLevel(MVKConfigLogLevel logLevel); VkDebugUtilsMessageTypeFlagsEXT getVkDebugUtilsMessageTypesFlagBitsFromLogLevel(MVKConfigLogLevel logLevel); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm index 5c6e6cb9b..e3c94135a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKInstance.mm @@ -238,94 +238,37 @@ VkDebugReportFlagsEXT MVKInstance::getVkDebugReportFlagsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return VK_DEBUG_REPORT_DEBUG_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_INFO: - return VK_DEBUG_REPORT_INFORMATION_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_REPORT_WARNING_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_REPORT_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_REPORT_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_REPORT_WARNING_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_REPORT_INFORMATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_REPORT_DEBUG_BIT_EXT; + default: return VK_DEBUG_REPORT_ERROR_BIT_EXT; } } VkDebugUtilsMessageSeverityFlagBitsEXT MVKInstance::getVkDebugUtilsMessageSeverityFlagBitsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_INFO: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT; + default: return VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT; } } VkDebugUtilsMessageTypeFlagsEXT MVKInstance::getVkDebugUtilsMessageTypesFlagBitsFromLogLevel(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - case MVK_CONFIG_LOG_LEVEL_INFO: - case MVK_CONFIG_LOG_LEVEL_WARNING: - return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_ERROR: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_WARNING: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; + case MVK_CONFIG_LOG_LEVEL_INFO: return VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT; + default: return VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT; } } #pragma mark Object Creation -// Returns an autoreleased array containing the MTLDevices available on this system, sorted according -// to power, with higher power GPU's at the front of the array. This ensures that a lazy app that simply -// grabs the first GPU will get a high-power one by default. If MVKConfiguration::forceLowPowerGPU is set, -// the returned array will only include low-power devices. -NSArray>* MVKInstance::getAvailableMTLDevicesArray() { - NSMutableArray* mtlDevs = [NSMutableArray array]; - -#if MVK_MACOS - NSArray* rawMTLDevs = [MTLCopyAllDevices() autorelease]; - if (rawMTLDevs) { - bool forceLowPower = mvkConfig().forceLowPowerGPU; - - // Populate the array of appropriate MTLDevices - for (id md in rawMTLDevs) { - if ( !forceLowPower || md.isLowPower ) { [mtlDevs addObject: md]; } - } - - // Sort by power - [mtlDevs sortUsingComparator: ^(id md1, id md2) { - BOOL md1IsLP = md1.isLowPower; - BOOL md2IsLP = md2.isLowPower; - - if (md1IsLP == md2IsLP) { - // If one device is headless and the other one is not, select the - // one that is not headless first. - BOOL md1IsHeadless = md1.isHeadless; - BOOL md2IsHeadless = md2.isHeadless; - if (md1IsHeadless == md2IsHeadless ) { - return NSOrderedSame; - } - return md2IsHeadless ? NSOrderedAscending : NSOrderedDescending; - } - - return md2IsLP ? NSOrderedAscending : NSOrderedDescending; - }]; - - } -#endif // MVK_MACOS - -#if MVK_IOS_OR_TVOS - id md = [MTLCreateSystemDefaultDevice() autorelease]; - if (md) { [mtlDevs addObject: md]; } -#endif // MVK_IOS_OR_TVOS - - return mtlDevs; // retained -} - MVKInstance::MVKInstance(const VkInstanceCreateInfo* pCreateInfo) : _enabledExtensions(this) { initDebugCallbacks(pCreateInfo); // Do before any creation activities @@ -347,7 +290,7 @@ // This effort creates a number of autoreleased instances of Metal // and other Obj-C classes, so wrap it all in an autorelease pool. @autoreleasepool { - NSArray>* mtlDevices = getAvailableMTLDevicesArray(); + NSArray>* mtlDevices = mvkGetAvailableMTLDevicesArray(); _physicalDevices.reserve(mtlDevices.count); for (id mtlDev in mtlDevices) { _physicalDevices.push_back(new MVKPhysicalDevice(this, mtlDev)); @@ -782,5 +725,9 @@ lock_guard lock(_dcbLock); mvkDestroyContainerContents(_debugReportCallbacks); + + MVKLogInfo("Destroyed VkInstance for Vulkan version %s with %d Vulkan extensions enabled.", + mvkGetVulkanVersionString(_appInfo.apiVersion).c_str(), + _enabledExtensions.getEnabledCount()); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm index 3044f66d9..7e1c1a193 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPixelFormats.mm @@ -1482,26 +1482,21 @@ } } -// If supporting a physical device, retrieve the MTLDevice from it, -// otherwise create a temp copy of the system default MTLDevice. +// If supporting a physical device, retrieve the MTLDevice from it, otherwise +// retrieve the array of physical GPU devices, and use the first one. +// Retrieving the GPUs creates a number of autoreleased instances of Metal +// and other Obj-C classes, so wrap it all in an autorelease pool. void MVKPixelFormats::modifyMTLFormatCapabilities() { if (_physicalDevice) { modifyMTLFormatCapabilities(_physicalDevice->getMTLDevice()); } else { -#if MVK_IOS_OR_TVOS - id mtlDevice = MTLCreateSystemDefaultDevice(); // temp retained -#endif -#if MVK_MACOS - NSArray>* mtlDevices = MTLCopyAllDevices(); // temp retained - id mtlDevice = [mtlDevices count] > 0 ? [mtlDevices[0] retain] : MTLCreateSystemDefaultDevice(); // temp retained - [mtlDevices release]; // temp release -#endif - modifyMTLFormatCapabilities(mtlDevice); - [mtlDevice release]; // release temp instance + @autoreleasepool { + auto* mtlDevs = mvkGetAvailableMTLDevicesArray(); + if (mtlDevs.count) { modifyMTLFormatCapabilities(mtlDevs[0]); } + } } } - // Mac Catalyst does not support feature sets, so we redefine them to GPU families in MVKDevice.h. #if MVK_MACCAT #define addFeatSetMTLPixFmtCaps(FEAT_SET, MTL_FMT, CAPS) \ diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index bcefd2f37..0de3d2b84 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -86,6 +86,9 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking /** Returns a pointer to the Vulkan instance. */ MVKInstance* getInstance() override { return _device->getInstance(); } + /** Return the name of this queue. */ + const std::string& getName() { return _name; } + #pragma mark Queue submissions /** Submits the specified command buffers to the queue. */ @@ -97,8 +100,11 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking /** Block the current thread until this queue is idle. */ VkResult waitIdle(MVKCommandUse cmdUse); - /** Return the name of this queue. */ - const std::string& getName() { return _name; } + /** Mark the beginning of a swapchain image presentation. */ + void beginPresentation(const MVKImagePresentInfo& presentInfo); + + /** Mark the end of a swapchain image presentation. */ + void endPresentation(const MVKImagePresentInfo& presentInfo); #pragma mark Metal @@ -140,25 +146,29 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking void initName(); void initExecQueue(); void initMTLCommandQueue(); - void initGPUCaptureScopes(); void destroyExecQueue(); VkResult submit(MVKQueueSubmission* qSubmit); NSString* getMTLCommandBufferLabel(MVKCommandUse cmdUse); + void handleMTLCommandBufferError(id mtlCmdBuff); + void waitSwapchainPresentations(MVKCommandUse cmdUse); MVKQueueFamily* _queueFamily; - uint32_t _index; - float _priority; - dispatch_queue_t _execQueue; - id _mtlQueue; + MVKSemaphoreImpl _presentationCompletionBlocker; + std::unordered_map _presentedImages; std::string _name; - NSString* _mtlCmdBuffLabelEndCommandBuffer; - NSString* _mtlCmdBuffLabelQueueSubmit; - NSString* _mtlCmdBuffLabelQueuePresent; - NSString* _mtlCmdBuffLabelDeviceWaitIdle; - NSString* _mtlCmdBuffLabelQueueWaitIdle; - NSString* _mtlCmdBuffLabelAcquireNextImage; - NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges; - MVKGPUCaptureScope* _submissionCaptureScope; + dispatch_queue_t _execQueue; + id _mtlQueue = nil; + NSString* _mtlCmdBuffLabelBeginCommandBuffer = nil; + NSString* _mtlCmdBuffLabelQueueSubmit = nil; + NSString* _mtlCmdBuffLabelQueuePresent = nil; + NSString* _mtlCmdBuffLabelDeviceWaitIdle = nil; + NSString* _mtlCmdBuffLabelQueueWaitIdle = nil; + NSString* _mtlCmdBuffLabelAcquireNextImage = nil; + NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; + MVKGPUCaptureScope* _submissionCaptureScope = nil; + std::mutex _presentedImagesLock; + float _priority; + uint32_t _index; }; @@ -178,7 +188,7 @@ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { * * Upon completion of this function, no further calls should be made to this instance. */ - virtual void execute() = 0; + virtual VkResult execute() = 0; MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, @@ -190,6 +200,7 @@ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { friend class MVKQueue; virtual void finish() = 0; + MVKDevice* getDevice() { return _queue->getDevice(); } MVKQueue* _queue; MVKSmallVector> _waitSemaphores; @@ -206,7 +217,7 @@ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { public: - void execute() override; + VkResult execute() override; MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); @@ -217,7 +228,7 @@ class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { id getActiveMTLCommandBuffer(); void setActiveMTLCommandBuffer(id mtlCmdBuff); - void commitActiveMTLCommandBuffer(bool signalCompletion = false); + VkResult commitActiveMTLCommandBuffer(bool signalCompletion = false); void finish() override; virtual void submitCommandBuffers() {} @@ -238,20 +249,10 @@ template class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmission { public: - MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence) : - MVKQueueCommandBufferSubmission(queue, pSubmit, fence, kMVKCommandUseQueueSubmit) { - - // pSubmit can be null if just tracking the fence alone - if (pSubmit) { - uint32_t cbCnt = pSubmit->commandBufferCount; - _cmdBuffers.reserve(cbCnt); - for (uint32_t i = 0; i < cbCnt; i++) { - MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]); - _cmdBuffers.push_back(cb); - setConfigurationResult(cb->getConfigurationResult()); - } - } - } + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); protected: void submitCommandBuffers() override; @@ -267,7 +268,7 @@ class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmissi class MVKQueuePresentSurfaceSubmission : public MVKQueueSubmission { public: - void execute() override; + VkResult execute() override; MVKQueuePresentSurfaceSubmission(MVKQueue* queue, const VkPresentInfoKHR* pPresentInfo); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 0ad143072..293f50eff 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -18,6 +18,7 @@ #include "MVKInstance.h" #include "MVKQueue.h" +#include "MVKSurface.h" #include "MVKSwapchain.h" #include "MVKSync.h" #include "MVKFoundation.h" @@ -68,7 +69,7 @@ // Execute the queue submission under an autoreleasepool to ensure transient Metal objects are autoreleased. // This is critical for apps that don't use standard OS autoreleasing runloop threading. -static inline void execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { qSubmit->execute(); } } +static inline VkResult execute(MVKQueueSubmission* qSubmit) { @autoreleasepool { return qSubmit->execute(); } } // Executes the submmission, either immediately, or by dispatching to an execution queue. // Submissions to the execution queue are wrapped in a dedicated autoreleasepool. @@ -80,10 +81,12 @@ if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils VkResult rslt = qSubmit->getConfigurationResult(); // Extract result before submission to avoid race condition with early destruction - if (_execQueue) { - dispatch_async(_execQueue, ^{ execute(qSubmit); } ); - } else { - execute(qSubmit); + if (rslt == VK_SUCCESS) { + if (_execQueue) { + dispatch_async(_execQueue, ^{ execute(qSubmit); } ); + } else { + rslt = execute(qSubmit); + } } return rslt; } @@ -103,19 +106,19 @@ MVKQueueCommandBufferSubmission* mvkSub; uint32_t cbCnt = pVkSub->commandBufferCount; if (cbCnt <= 1) { - mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 16) { - mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<16>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 32) { - mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<32>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 64) { - mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<64>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 128) { - mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<128>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 256) { - mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<256>(this, pVkSub, fenceOrNil, cmdUse); } else { - mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil); + mvkSub = new MVKQueueFullCommandBufferSubmission<512>(this, pVkSub, fenceOrNil, cmdUse); } VkResult subRslt = submit(mvkSub); @@ -128,29 +131,62 @@ return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo)); } -// Create an empty submit struct and fence, submit to queue and wait on fence. VkResult MVKQueue::waitIdle(MVKCommandUse cmdUse) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + VkResult rslt = _device->getConfigurationResult(); + if (rslt != VK_SUCCESS) { return rslt; } - VkFenceCreateInfo vkFenceInfo = { - .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, - .pNext = nullptr, - .flags = 0, - }; - - // The MVKFence is retained by the command submission, and may outlive this function while - // the command submission finishes, so we can't allocate MVKFence locally on the stack. - MVKFence* mvkFence = new MVKFence(_device, &vkFenceInfo); - VkFence vkFence = (VkFence)mvkFence; - submit(0, nullptr, vkFence, cmdUse); - VkResult rslt = mvkWaitForFences(_device, 1, &vkFence, false); - mvkFence->destroy(); - return rslt; + auto* mtlCmdBuff = getMTLCommandBuffer(cmdUse); + [mtlCmdBuff commit]; + [mtlCmdBuff waitUntilCompleted]; + + waitSwapchainPresentations(cmdUse); + + return VK_SUCCESS; +} + +// If there are any swapchain presentations in flight, wait a few frames for them to complete. +// If they don't complete within a few frames, attempt to force them to complete, and wait another +// few frames for that to happen. If there are still swapchain presentations that haven't completed, +// log a warning, and force them to end presentation, so the images and drawables will be released. +void MVKQueue::waitSwapchainPresentations(MVKCommandUse cmdUse) { + auto waitFrames = _device->_pMetalFeatures->maxSwapchainImageCount + 2; + if (_presentationCompletionBlocker.wait((waitFrames/60.0) * 1e9)) { return; } + + auto imgCnt = _presentationCompletionBlocker.getReservationCount(); + MVKPresentableSwapchainImage* images[imgCnt]; + mvkClear(images, imgCnt); + + { + // Scope of image lock limited to creating array copy of uncompleted presentations + // Populate a working array of the unpresented images. + lock_guard lock(_presentedImagesLock); + size_t imgIdx = 0; + for (auto imgPair : _presentedImages) { images[imgIdx++] = imgPair.first; } + } + + // Attempt to force each image to complete presentation through the callback. + for (size_t imgIdx = 0; imgIdx < imgCnt && _presentationCompletionBlocker.getReservationCount(); imgIdx++) { + auto* img = images[imgIdx]; + if (img) { img->forcePresentationCompletion(); } + } + + // Wait for forced presentation completions. If we still have unfinished swapchain image + // presentations, log a warning, and force each image to end, so that it can be released. + if ( !_presentationCompletionBlocker.wait((waitFrames/60.0) * 1e9) ) { + reportWarning(VK_TIMEOUT, "%s timed out after %d frames while awaiting %d swapchain image presentations to complete.", + mvkVkCommandName(cmdUse), waitFrames * 2, _presentationCompletionBlocker.getReservationCount()); + for (size_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) { + auto* img = images[imgIdx]; + if (_presentedImages.count(img)) { img->endPresentation({.queue = this, .presentableImage = img}); } + } + } } id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool retainRefs) { id mtlCmdBuff = nil; + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); #if MVK_XCODE_12 if ([_mtlQueue respondsToSelector: @selector(commandBufferWithDescriptor:)]) { MTLCommandBufferDescriptor* mtlCmdBuffDesc = [MTLCommandBufferDescriptor new]; // temp retain @@ -167,53 +203,145 @@ } else { mtlCmdBuff = [_mtlQueue commandBufferWithUnretainedReferences]; } - setLabelIfNotNil(mtlCmdBuff, getMTLCommandBufferLabel(cmdUse)); + mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime); + NSString* mtlCmdBuffLabel = getMTLCommandBufferLabel(cmdUse); + setLabelIfNotNil(mtlCmdBuff, mtlCmdBuffLabel); + [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { handleMTLCommandBufferError(mtlCB); }]; + + if ( !mtlCmdBuff ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "%s could not be acquired.", mtlCmdBuffLabel.UTF8String); } return mtlCmdBuff; } NSString* MVKQueue::getMTLCommandBufferLabel(MVKCommandUse cmdUse) { -#define CASE_GET_LABEL(cmdUse) \ - case kMVKCommandUse ##cmdUse: \ - if ( !_mtlCmdBuffLabel ##cmdUse ) { _mtlCmdBuffLabel ##cmdUse = [[NSString stringWithFormat: @"%@ on Queue %d-%d", mvkMTLCommandBufferLabel(kMVKCommandUse ##cmdUse), _queueFamily->getIndex(), _index] retain]; } \ - return _mtlCmdBuffLabel ##cmdUse +#define CASE_GET_LABEL(cu) \ + case kMVKCommandUse ##cu: \ + if ( !_mtlCmdBuffLabel ##cu ) { _mtlCmdBuffLabel ##cu = [[NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(kMVKCommandUse ##cu), _queueFamily->getIndex(), _index] retain]; } \ + return _mtlCmdBuffLabel ##cu switch (cmdUse) { - CASE_GET_LABEL(EndCommandBuffer); + CASE_GET_LABEL(BeginCommandBuffer); CASE_GET_LABEL(QueueSubmit); CASE_GET_LABEL(QueuePresent); CASE_GET_LABEL(QueueWaitIdle); CASE_GET_LABEL(DeviceWaitIdle); CASE_GET_LABEL(AcquireNextImage); CASE_GET_LABEL(InvalidateMappedMemoryRanges); - default: return mvkMTLCommandBufferLabel(cmdUse); + default: + MVKAssert(false, "Uncached MTLCommandBuffer label for command use %s.", mvkVkCommandName(cmdUse)); + return [NSString stringWithFormat: @"%s MTLCommandBuffer on Queue %d-%d", mvkVkCommandName(cmdUse), _queueFamily->getIndex(), _index]; } #undef CASE_GET_LABEL } +#if MVK_XCODE_12 +static const char* mvkStringFromMTLCommandEncoderErrorState(MTLCommandEncoderErrorState errState) { + switch (errState) { + case MTLCommandEncoderErrorStateUnknown: return "unknown"; + case MTLCommandEncoderErrorStateAffected: return "affected"; + case MTLCommandEncoderErrorStateCompleted: return "completed"; + case MTLCommandEncoderErrorStateFaulted: return "faulted"; + case MTLCommandEncoderErrorStatePending: return "pending"; + } + return "unknown"; +} +#endif + +void MVKQueue::handleMTLCommandBufferError(id mtlCmdBuff) { + if (mtlCmdBuff.status != MTLCommandBufferStatusError) { return; } + + // If a command buffer error has occurred, report the error. If the error affects + // the physical device, always mark both the device and physical device as lost. + // If the error is local to this command buffer, optionally mark the device (but not the + // physical device) as lost, depending on the value of MVKConfiguration::resumeLostDevice. + VkResult vkErr = VK_ERROR_UNKNOWN; + bool markDeviceLoss = !mvkConfig().resumeLostDevice; + bool markPhysicalDeviceLoss = false; + switch (mtlCmdBuff.error.code) { + case MTLCommandBufferErrorBlacklisted: + case MTLCommandBufferErrorNotPermitted: // May also be used for command buffers executed in the background without the right entitlement. +#if MVK_MACOS && !MVK_MACCAT + case MTLCommandBufferErrorDeviceRemoved: +#endif + vkErr = VK_ERROR_DEVICE_LOST; + markDeviceLoss = true; + markPhysicalDeviceLoss = true; + break; + case MTLCommandBufferErrorTimeout: + vkErr = VK_TIMEOUT; + break; +#if MVK_XCODE_13 + case MTLCommandBufferErrorStackOverflow: +#endif + case MTLCommandBufferErrorPageFault: + case MTLCommandBufferErrorOutOfMemory: + default: + vkErr = VK_ERROR_OUT_OF_DEVICE_MEMORY; + break; + } + reportError(vkErr, "MTLCommandBuffer \"%s\" execution failed (code %li): %s", + mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : "", + mtlCmdBuff.error.code, mtlCmdBuff.error.localizedDescription.UTF8String); + if (markDeviceLoss) { getDevice()->markLost(markPhysicalDeviceLoss); } + +#if MVK_XCODE_12 + if (&MTLCommandBufferEncoderInfoErrorKey != nullptr) { + if (NSArray>* mtlEncInfo = mtlCmdBuff.error.userInfo[MTLCommandBufferEncoderInfoErrorKey]) { + MVKLogInfo("Encoders for %p \"%s\":", mtlCmdBuff, mtlCmdBuff.label ? mtlCmdBuff.label.UTF8String : ""); + for (id enc in mtlEncInfo) { + MVKLogInfo(" - %s: %s", enc.label.UTF8String, mvkStringFromMTLCommandEncoderErrorState(enc.errorState)); + if (enc.debugSignposts.count > 0) { + MVKLogInfo(" Debug signposts:"); + for (NSString* signpost in enc.debugSignposts) { + MVKLogInfo(" - %s", signpost.UTF8String); + } + } + } + } + } + if ([mtlCmdBuff respondsToSelector: @selector(logs)]) { + bool isFirstMsg = true; + for (id log in mtlCmdBuff.logs) { + if (isFirstMsg) { + MVKLogInfo("Shader log messages:"); + isFirstMsg = false; + } + MVKLogInfo("%s", log.description.UTF8String); + } + } +#endif +} + +// _presentedImages counts presentations per swapchain image, because the presentation of an image can +// begin before the previous presentation of that image has indicated that it has completed via a callback. +void MVKQueue::beginPresentation(const MVKImagePresentInfo& presentInfo) { + lock_guard lock(_presentedImagesLock); + _presentationCompletionBlocker.reserve(); + _presentedImages[presentInfo.presentableImage]++; +} + +void MVKQueue::endPresentation(const MVKImagePresentInfo& presentInfo) { + lock_guard lock(_presentedImagesLock); + _presentationCompletionBlocker.release(); + if (_presentedImages[presentInfo.presentableImage]) { + _presentedImages[presentInfo.presentableImage]--; + } + if ( !_presentedImages[presentInfo.presentableImage] ) { + _presentedImages.erase(presentInfo.presentableImage); + } +} #pragma mark Construction #define MVK_DISPATCH_QUEUE_QOS_CLASS QOS_CLASS_USER_INITIATED -MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority) - : MVKDeviceTrackingMixin(device) { - +MVKQueue::MVKQueue(MVKDevice* device, MVKQueueFamily* queueFamily, uint32_t index, float priority) : MVKDeviceTrackingMixin(device) { _queueFamily = queueFamily; _index = index; _priority = priority; - _mtlCmdBuffLabelEndCommandBuffer = nil; - _mtlCmdBuffLabelQueueSubmit = nil; - _mtlCmdBuffLabelQueuePresent = nil; - _mtlCmdBuffLabelDeviceWaitIdle = nil; - _mtlCmdBuffLabelQueueWaitIdle = nil; - _mtlCmdBuffLabelAcquireNextImage = nil; - _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; - initName(); initExecQueue(); initMTLCommandQueue(); - initGPUCaptureScopes(); } void MVKQueue::initName() { @@ -236,23 +364,15 @@ } } -// Retrieves and initializes the Metal command queue. +// Retrieves and initializes the Metal command queue and Xcode GPU capture scopes void MVKQueue::initMTLCommandQueue() { - uint64_t startTime = _device->getPerformanceTimestamp(); _mtlQueue = _queueFamily->getMTLCommandQueue(_index); // not retained (cached in queue family) - _device->addActivityPerformance(_device->_performanceStatistics.queue.mtlQueueAccess, startTime); -} -// Initializes Xcode GPU capture scopes -void MVKQueue::initGPUCaptureScopes() { _submissionCaptureScope = new MVKGPUCaptureScope(this); - if (_queueFamily->getIndex() == mvkConfig().defaultGPUCaptureScopeQueueFamilyIndex && _index == mvkConfig().defaultGPUCaptureScopeQueueIndex) { - getDevice()->startAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME, _mtlQueue); _submissionCaptureScope->makeDefault(); - } _submissionCaptureScope->beginScope(); // Allow Xcode to capture the first frame if desired. } @@ -261,7 +381,7 @@ destroyExecQueue(); _submissionCaptureScope->destroy(); - [_mtlCmdBuffLabelEndCommandBuffer release]; + [_mtlCmdBuffLabelBeginCommandBuffer release]; [_mtlCmdBuffLabelQueueSubmit release]; [_mtlCmdBuffLabelQueuePresent release]; [_mtlCmdBuffLabelDeviceWaitIdle release]; @@ -306,7 +426,7 @@ #pragma mark - #pragma mark MVKQueueCommandBufferSubmission -void MVKQueueCommandBufferSubmission::execute() { +VkResult MVKQueueCommandBufferSubmission::execute() { _queue->_submissionCaptureScope->beginScope(); @@ -321,7 +441,7 @@ // Commit the last MTLCommandBuffer. // Nothing after this because callback might destroy this instance before this function ends. - commitActiveMTLCommandBuffer(true); + return commitActiveMTLCommandBuffer(true); } // Returns the active MTLCommandBuffer, lazily retrieving it from the queue if needed. @@ -341,24 +461,11 @@ [_activeMTLCommandBuffer enqueue]; } -#if MVK_XCODE_12 -static const char* mvkStringFromErrorState(MTLCommandEncoderErrorState errState) { - switch (errState) { - case MTLCommandEncoderErrorStateUnknown: return "unknown"; - case MTLCommandEncoderErrorStateAffected: return "affected"; - case MTLCommandEncoderErrorStateCompleted: return "completed"; - case MTLCommandEncoderErrorStateFaulted: return "faulted"; - case MTLCommandEncoderErrorStatePending: return "pending"; - } - return "unknown"; -} -#endif - // Commits and releases the currently active MTLCommandBuffer, optionally signalling // when the MTLCommandBuffer is done. The first time this is called, it will wait on // any semaphores. We have delayed signalling the semaphores as long as possible to // allow as much filling of the MTLCommandBuffer as possible before forcing a wait. -void MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCompletion) { +VkResult MVKQueueCommandBufferSubmission::commitActiveMTLCommandBuffer(bool signalCompletion) { // If using inline semaphore waiting, do so now. // When prefilled command buffers are used, multiple commits will happen because native semaphore @@ -386,66 +493,21 @@ id mtlCmdBuff = signalCompletion ? getActiveMTLCommandBuffer() : _activeMTLCommandBuffer; _activeMTLCommandBuffer = nil; - MVKDevice* mvkDev = _queue->getDevice(); + MVKDevice* mvkDev = getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { - if (mtlCB.status == MTLCommandBufferStatusError) { - // If a command buffer error has occurred, report the error. If the error affects - // the physical device, always mark both the device and physical device as lost. - // If the error is local to this command buffer, optionally mark the device (but not the - // physical device) as lost, depending on the value of MVKConfiguration::resumeLostDevice. - getVulkanAPIObject()->reportError(VK_ERROR_DEVICE_LOST, "MTLCommandBuffer \"%s\" execution failed (code %li): %s", mtlCB.label ? mtlCB.label.UTF8String : "", mtlCB.error.code, mtlCB.error.localizedDescription.UTF8String); - switch (mtlCB.error.code) { - case MTLCommandBufferErrorBlacklisted: - case MTLCommandBufferErrorNotPermitted: // May also be used for command buffers executed in the background without the right entitlement. -#if MVK_MACOS && !MVK_MACCAT - case MTLCommandBufferErrorDeviceRemoved: -#endif - mvkDev->markLost(true); - break; - default: - if ( !mvkConfig().resumeLostDevice ) { mvkDev->markLost(); } - break; - } -#if MVK_XCODE_12 - if (mvkConfig().debugMode) { - if (&MTLCommandBufferEncoderInfoErrorKey != nullptr) { - if (NSArray>* mtlEncInfo = mtlCB.error.userInfo[MTLCommandBufferEncoderInfoErrorKey]) { - MVKLogInfo("Encoders for %p \"%s\":", mtlCB, mtlCB.label ? mtlCB.label.UTF8String : ""); - for (id enc in mtlEncInfo) { - MVKLogInfo(" - %s: %s", enc.label.UTF8String, mvkStringFromErrorState(enc.errorState)); - if (enc.debugSignposts.count > 0) { - MVKLogInfo(" Debug signposts:"); - for (NSString* signpost in enc.debugSignposts) { - MVKLogInfo(" - %s", signpost.UTF8String); - } - } - } - } - } - } -#endif - } -#if MVK_XCODE_12 - if (mvkConfig().debugMode && [mtlCB respondsToSelector: @selector(logs)]) { - bool isFirstMsg = true; - for (id log in mtlCB.logs) { - if (isFirstMsg) { - MVKLogInfo("Shader log messages:"); - isFirstMsg = false; - } - MVKLogInfo("%s", log.description.UTF8String); - } - } -#endif - - // Ensure finish() is the last thing the completetion callback does. - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.mtlCommandBufferCompletion, startTime); - if (signalCompletion) { this->finish(); } + mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime); + if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does. }]; [mtlCmdBuff commit]; [mtlCmdBuff release]; // retained + + // If we need to signal completion, but an error occurred and the MTLCommandBuffer + // was not created, call the finish() function directly. + if (signalCompletion && !mtlCmdBuff) { finish(); } + + return mtlCmdBuff ? VK_SUCCESS : VK_ERROR_OUT_OF_POOL_MEMORY; } // Be sure to retain() any API objects referenced in this function, and release() them in the @@ -474,10 +536,11 @@ MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, - MVKCommandUse cmdUse) : - MVKQueueSubmission(queue, - (pSubmit ? pSubmit->waitSemaphoreCount : 0), - (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), + MVKCommandUse cmdUse) + : MVKQueueSubmission(queue, + (pSubmit ? pSubmit->waitSemaphoreCount : 0), + (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), + _commandUse(cmdUse), _emulatedWaitDone(false) { @@ -524,7 +587,31 @@ template void MVKQueueFullCommandBufferSubmission::submitCommandBuffers() { + MVKDevice* mvkDev = getDevice(); + uint64_t startTime = mvkDev->getPerformanceTimestamp(); + for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } + + mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); +} + +template +MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) + : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { + + // pSubmit can be null if just tracking the fence alone + if (pSubmit) { + uint32_t cbCnt = pSubmit->commandBufferCount; + _cmdBuffers.reserve(cbCnt); + for (uint32_t i = 0; i < cbCnt; i++) { + MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]); + _cmdBuffers.push_back(cb); + setConfigurationResult(cb->getConfigurationResult()); + } + } } @@ -534,24 +621,31 @@ // If the semaphores are encodable, wait on them by encoding them on the MTLCommandBuffer before presenting. // If the semaphores are not encodable, wait on them inline after presenting. // The semaphores know what to do. -void MVKQueuePresentSurfaceSubmission::execute() { +VkResult MVKQueuePresentSurfaceSubmission::execute() { id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent); [mtlCmdBuff enqueue]; - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(mtlCmdBuff, 0); } // Add completion handler that will destroy this submission only once the MTLCommandBuffer // is finished with the resources retained here, including the wait semaphores. // Completion handlers are also added in presentCAMetalDrawable() to retain the swapchain images. - [mtlCmdBuff addCompletedHandler: ^(id mcb) { - this->finish(); - }]; + [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { this->finish(); }]; + + for (auto& ws : _waitSemaphores) { + auto& sem4 = ws.first; + sem4->encodeWait(mtlCmdBuff, 0); // Encoded semaphore waits + sem4->encodeWait(nil, 0); // Inline semaphore waits + } for (int i = 0; i < _presentInfo.size(); i++ ) { _presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]); } - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, 0); } [mtlCmdBuff commit]; + + // If an error occurred and the MTLCommandBuffer was not created, call finish() directly. + if ( !mtlCmdBuff ) { finish(); } + + return mtlCmdBuff ? VK_SUCCESS : VK_ERROR_OUT_OF_POOL_MEMORY; } void MVKQueuePresentSurfaceSubmission::finish() { @@ -563,7 +657,7 @@ cs->beginScope(); if (_queue->_queueFamily->getIndex() == mvkConfig().defaultGPUCaptureScopeQueueFamilyIndex && _queue->_index == mvkConfig().defaultGPUCaptureScopeQueueIndex) { - _queue->getDevice()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME); + getDevice()->stopAutoGPUCapture(MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE_FRAME); } this->destroy(); @@ -623,6 +717,7 @@ for (uint32_t scIdx = 0; scIdx < scCnt; scIdx++) { MVKSwapchain* mvkSC = (MVKSwapchain*)pPresentInfo->pSwapchains[scIdx]; MVKImagePresentInfo presentInfo = {}; // Start with everything zeroed + presentInfo.queue = _queue; presentInfo.presentableImage = mvkSC->getPresentableImage(pPresentInfo->pImageIndices[scIdx]); presentInfo.presentMode = pPresentModes ? pPresentModes[scIdx] : VK_PRESENT_MODE_MAX_ENUM_KHR; presentInfo.fence = pFences ? (MVKFence*)pFences[scIdx] : nullptr; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h index 0bcceb5d1..5746bfbf9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.h @@ -35,6 +35,7 @@ #endif class MVKInstance; +class MVKSwapchain; @class MVKBlockObserver; @@ -55,11 +56,8 @@ class MVKSurface : public MVKVulkanAPIObject { /** Returns a pointer to the Vulkan instance. */ MVKInstance* getInstance() override { return _mvkInstance; } - /** Returns the CAMetalLayer underlying this surface. */ - inline CAMetalLayer* getCAMetalLayer() { - std::lock_guard lock(_layerLock); - return _mtlCAMetalLayer; - } + /** Returns the CAMetalLayer underlying this surface. */ + CAMetalLayer* getCAMetalLayer(); #pragma mark Construction @@ -75,13 +73,16 @@ class MVKSurface : public MVKVulkanAPIObject { ~MVKSurface() override; protected: + friend class MVKSwapchain; + void propagateDebugName() override {} - void initLayerObserver(); + void initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName); void releaseLayer(); - MVKInstance* _mvkInstance; - CAMetalLayer* _mtlCAMetalLayer; - MVKBlockObserver* _layerObserver; std::mutex _layerLock; + MVKInstance* _mvkInstance = nullptr; + CAMetalLayer* _mtlCAMetalLayer = nil; + MVKBlockObserver* _layerObserver = nil; + MVKSwapchain* _activeSwapchain = nullptr; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm index 1309d73dc..3899ab69f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSurface.mm @@ -29,12 +29,15 @@ #pragma mark MVKSurface +CAMetalLayer* MVKSurface::getCAMetalLayer() { + std::lock_guard lock(_layerLock); + return _mtlCAMetalLayer; +} + MVKSurface::MVKSurface(MVKInstance* mvkInstance, const VkMetalSurfaceCreateInfoEXT* pCreateInfo, const VkAllocationCallbacks* pAllocator) : _mvkInstance(mvkInstance) { - - _mtlCAMetalLayer = (CAMetalLayer*)[pCreateInfo->pLayer retain]; - initLayerObserver(); + initLayer((CAMetalLayer*)pCreateInfo->pLayer, "vkCreateMetalSurfaceEXT"); } // pCreateInfo->pView can be either a CAMetalLayer or a view (NSView/UIView). @@ -47,36 +50,30 @@ // If it's a view (NSView/UIView), extract the layer, otherwise assume it's already a CAMetalLayer. if ([obj isKindOfClass: [PLATFORM_VIEW_CLASS class]]) { + obj = ((PLATFORM_VIEW_CLASS*)obj).layer; if ( !NSThread.isMainThread ) { - MVKLogInfo("%s(): You are not calling this function from the main thread. %s should only be accessed from the main thread. When using this function outside the main thread, consider passing the CAMetalLayer itself in %s::pView, instead of the %s.", + MVKLogWarn("%s(): You are not calling this function from the main thread. %s should only be accessed from the main thread. When using this function outside the main thread, consider passing the CAMetalLayer itself in %s::pView, instead of the %s.", STR(vkCreate_PLATFORM_SurfaceMVK), STR(PLATFORM_VIEW_CLASS), STR(Vk_PLATFORM_SurfaceCreateInfoMVK), STR(PLATFORM_VIEW_CLASS)); } - obj = ((PLATFORM_VIEW_CLASS*)obj).layer; } // Confirm that we were provided with a CAMetalLayer - if ([obj isKindOfClass: [CAMetalLayer class]]) { - _mtlCAMetalLayer = (CAMetalLayer*)[obj retain]; // retained - } else { - setConfigurationResult(reportError(VK_ERROR_INITIALIZATION_FAILED, - "%s(): On-screen rendering requires a layer of type CAMetalLayer.", - STR(vkCreate_PLATFORM_SurfaceMVK))); - _mtlCAMetalLayer = nil; - } - - initLayerObserver(); + initLayer([obj isKindOfClass: CAMetalLayer.class] ? (CAMetalLayer*)obj : nil, + STR(vkCreate_PLATFORM_SurfaceMVK)); } -// Sometimes, the owning view can replace its CAMetalLayer. In that case, the client needs to recreate the surface. -void MVKSurface::initLayerObserver() { +void MVKSurface::initLayer(CAMetalLayer* mtlLayer, const char* vkFuncName) { - _layerObserver = nil; - if ( ![_mtlCAMetalLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) { return; } + _mtlCAMetalLayer = [mtlLayer retain]; // retained + if ( !_mtlCAMetalLayer ) { setConfigurationResult(reportError(VK_ERROR_SURFACE_LOST_KHR, "%s(): On-screen rendering requires a layer of type CAMetalLayer.", vkFuncName)); } - _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { - if ( ![path isEqualToString: @"layer"] ) { return; } - this->releaseLayer(); - } forObject: _mtlCAMetalLayer.delegate atKeyPath: @"layer"]; + // Sometimes, the owning view can replace its CAMetalLayer. + // When that happens, the app needs to recreate the surface. + if ([_mtlCAMetalLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]]) { + _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { + if ([path isEqualToString: @"layer"]) { this->releaseLayer(); } + } forObject: _mtlCAMetalLayer.delegate atKeyPath: @"layer"]; + } } void MVKSurface::releaseLayer() { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h index 523a58072..7e7cff8cc 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h @@ -28,8 +28,6 @@ class MVKWatermark; -@class MVKBlockObserver; - #pragma mark - #pragma mark MVKSwapchain @@ -76,19 +74,8 @@ class MVKSwapchain : public MVKVulkanAPIDeviceObject { /** Releases swapchain images. */ VkResult releaseImages(const VkReleaseSwapchainImagesInfoEXT* pReleaseInfo); - /** Returns whether the parent surface is now lost and this swapchain must be recreated. */ - bool getIsSurfaceLost() { return _surfaceLost; } - - /** Returns whether this swapchain is optimally sized for the surface. */ - bool hasOptimalSurface(); - /** Returns the status of the surface. Surface loss takes precedence over sub-optimal errors. */ - VkResult getSurfaceStatus() { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; } - if ( !hasOptimalSurface() ) { return VK_SUBOPTIMAL_KHR; } - return VK_SUCCESS; - } + VkResult getSurfaceStatus(); /** Adds HDR metadata to this swapchain. */ void setHDRMetadataEXT(const VkHdrMetadataEXT& metadata); @@ -118,31 +105,28 @@ class MVKSwapchain : public MVKVulkanAPIDeviceObject { VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo, uint32_t imgCnt); void initSurfaceImages(const VkSwapchainCreateInfoKHR* pCreateInfo, uint32_t imgCnt); - void releaseLayer(); - void releaseUndisplayedSurfaces(); + bool getIsSurfaceLost(); + bool hasOptimalSurface(); uint64_t getNextAcquisitionID(); - void willPresentSurface(id mtlTexture, id mtlCmdBuff); void renderWatermark(id mtlTexture, id mtlCmdBuff); void markFrameInterval(); - void recordPresentTime(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); + void beginPresentation(const MVKImagePresentInfo& presentInfo); + void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); - CAMetalLayer* _mtlLayer = nil; + MVKSurface* _surface = nullptr; MVKWatermark* _licenseWatermark = nullptr; MVKSmallVector _presentableImages; MVKSmallVector _compatiblePresentModes; static const int kMaxPresentationHistory = 60; VkPastPresentationTimingGOOGLE _presentTimingHistory[kMaxPresentationHistory]; std::atomic _currentAcquisitionID = 0; - MVKBlockObserver* _layerObserver = nil; std::mutex _presentHistoryLock; - std::mutex _layerLock; uint64_t _lastFrameTime = 0; VkExtent2D _mtlLayerDrawableExtent = {0, 0}; uint32_t _currentPerfLogFrameCount = 0; uint32_t _presentHistoryCount = 0; uint32_t _presentHistoryIndex = 0; uint32_t _presentHistoryHeadIndex = 0; - std::atomic _surfaceLost = false; bool _isDeliberatelyScaled = false; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index 601fbc544..f326f82e1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -95,9 +95,8 @@ // Return the index of the image with the shortest wait, // and signal the semaphore and fence when it's available *pImageIndex = minWaitImage->_swapchainIndex; - minWaitImage->acquireAndSignalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence); - - return getSurfaceStatus(); + VkResult rslt = minWaitImage->acquireAndSignalWhenAvailable((MVKSemaphore*)semaphore, (MVKFence*)fence); + return rslt ? rslt : getSurfaceStatus(); } VkResult MVKSwapchain::releaseImages(const VkReleaseSwapchainImagesInfoEXT* pReleaseInfo) { @@ -110,10 +109,18 @@ uint64_t MVKSwapchain::getNextAcquisitionID() { return ++_currentAcquisitionID; } -// Releases any surfaces that are not currently being displayed, -// so they can be used by a different swapchain. -void MVKSwapchain::releaseUndisplayedSurfaces() {} +bool MVKSwapchain::getIsSurfaceLost() { + VkResult surfRslt = _surface->getConfigurationResult(); + setConfigurationResult(surfRslt); + return surfRslt != VK_SUCCESS; +} +VkResult MVKSwapchain::getSurfaceStatus() { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + if (getIsSurfaceLost()) { return VK_ERROR_SURFACE_LOST_KHR; } + if ( !hasOptimalSurface() ) { return VK_SUBOPTIMAL_KHR; } + return VK_SUCCESS; +} // This swapchain is optimally sized for the surface if the app has specified deliberate // swapchain scaling, or the CAMetalLayer drawableSize has not changed since the swapchain @@ -121,22 +128,16 @@ bool MVKSwapchain::hasOptimalSurface() { if (_isDeliberatelyScaled) { return true; } - VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(_mtlLayer.drawableSize); + auto* mtlLayer = _surface->getCAMetalLayer(); + VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(mtlLayer.drawableSize); return (mvkVkExtent2DsAreEqual(drawExtent, _mtlLayerDrawableExtent) && - mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(_mtlLayer))); + mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(mtlLayer))); } #pragma mark Rendering -// Called automatically when a swapchain image is about to be presented to the surface by the queue. -// Activities include marking the frame interval and rendering the watermark if needed. -void MVKSwapchain::willPresentSurface(id mtlTexture, id mtlCmdBuff) { - markFrameInterval(); - renderWatermark(mtlTexture, mtlCmdBuff); -} - -// If the product has not been fully licensed, renders the watermark image to the surface. +// Renders the watermark image to the surface. void MVKSwapchain::renderWatermark(id mtlTexture, id mtlCmdBuff) { if (mvkConfig().displayWatermark) { if ( !_licenseWatermark ) { @@ -159,21 +160,20 @@ // Calculates and remembers the time interval between frames. void MVKSwapchain::markFrameInterval() { - if ( !(mvkConfig().performanceTracking || _licenseWatermark) ) { return; } - uint64_t prevFrameTime = _lastFrameTime; - _lastFrameTime = mvkGetTimestamp(); + _lastFrameTime = _device->getPerformanceTimestamp(); if (prevFrameTime == 0) { return; } // First frame starts at first presentation _device->addActivityPerformance(_device->_performanceStatistics.queue.frameInterval, prevFrameTime, _lastFrameTime); - uint32_t perfLogCntLimit = mvkConfig().performanceLoggingFrameCount; - if ((perfLogCntLimit > 0) && (++_currentPerfLogFrameCount >= perfLogCntLimit)) { + auto& mvkCfg = mvkConfig(); + bool shouldLogOnFrames = mvkCfg.performanceTracking && mvkCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; + if (shouldLogOnFrames && (mvkCfg.performanceLoggingFrameCount > 0) && (++_currentPerfLogFrameCount >= mvkCfg.performanceLoggingFrameCount)) { _currentPerfLogFrameCount = 0; MVKLogInfo("Performance statistics reporting every: %d frames, avg FPS: %.2f, elapsed time: %.3f seconds:", - perfLogCntLimit, - (1000.0 / _device->_performanceStatistics.queue.frameInterval.averageDuration), + mvkCfg.performanceLoggingFrameCount, + (1000.0 / _device->_performanceStatistics.queue.frameInterval.average), mvkGetElapsedMilliseconds() / 1000.0); if (mvkConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) { _device->logPerformanceSummary(); @@ -181,6 +181,119 @@ } } +VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + + auto* mtlLayer = _surface->getCAMetalLayer(); +#if MVK_VISIONOS + // TODO: See if this can be obtained from OS instead + NSInteger framesPerSecond = 90; +#elif MVK_IOS_OR_TVOS || MVK_MACCAT + NSInteger framesPerSecond = 60; + UIScreen* screen = mtlLayer.screenMVK; + if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) { + framesPerSecond = screen.maximumFramesPerSecond; + } +#elif MVK_MACOS && !MVK_MACCAT + NSScreen* screen = mtlLayer.screenMVK; + CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue]; + CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId); + double framesPerSecond = CGDisplayModeGetRefreshRate(mode); + CGDisplayModeRelease(mode); +#if MVK_XCODE_13 + if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)]) + framesPerSecond = [screen maximumFramesPerSecond]; +#endif + + // Builtin panels, e.g., on MacBook, report a zero refresh rate. + if (framesPerSecond == 0) + framesPerSecond = 60.0; +#endif + + pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond; + return VK_SUCCESS; +} + +VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) { + if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } + + VkResult res = VK_SUCCESS; + + std::lock_guard lock(_presentHistoryLock); + if (pPresentationTimings == nullptr) { + *pCount = _presentHistoryCount; + } else { + uint32_t countRemaining = std::min(_presentHistoryCount, *pCount); + uint32_t outIndex = 0; + + res = (*pCount >= _presentHistoryCount) ? VK_SUCCESS : VK_INCOMPLETE; + *pCount = countRemaining; + + while (countRemaining > 0) { + pPresentationTimings[outIndex] = _presentTimingHistory[_presentHistoryHeadIndex]; + countRemaining--; + _presentHistoryCount--; + _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; + outIndex++; + } + } + + return res; +} + +void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) {} + +void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { + + markFrameInterval(); + + std::lock_guard lock(_presentHistoryLock); + if (_presentHistoryCount < kMaxPresentationHistory) { + _presentHistoryCount++; + } else { + _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; + } + + // If actual present time is not available, use desired time instead, and if that + // hasn't been set, use the current time, which should be reasonably accurate (sub-ms), + // since we are here as part of the addPresentedHandler: callback. + if (actualPresentTime == 0) { actualPresentTime = presentInfo.desiredPresentTime; } + if (actualPresentTime == 0) { actualPresentTime = CACurrentMediaTime() * 1.0e9; } + + _presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID; + _presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime; + _presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime; + // These details are not available in Metal + _presentTimingHistory[_presentHistoryIndex].earliestPresentTime = actualPresentTime; + _presentTimingHistory[_presentHistoryIndex].presentMargin = 0; + _presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory; +} + +void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) { + auto* mtlLayer = _surface->getCAMetalLayer(); + if (!pRegion || pRegion->rectangleCount == 0) { + [mtlLayer setNeedsDisplay]; + return; + } + + for (uint32_t i = 0; i < pRegion->rectangleCount; ++i) { + CGRect cgRect = mvkCGRectFromVkRectLayerKHR(pRegion->pRectangles[i]); +#if MVK_MACOS + // VK_KHR_incremental_present specifies an upper-left origin, but macOS by default + // uses a lower-left origin. + cgRect.origin.y = mtlLayer.bounds.size.height - cgRect.origin.y; +#endif + // We were given rectangles in pixels, but -[CALayer setNeedsDisplayInRect:] wants them + // in points, which is pixels / contentsScale. + CGFloat scaleFactor = mtlLayer.contentsScale; + cgRect.origin.x /= scaleFactor; + cgRect.origin.y /= scaleFactor; + cgRect.size.width /= scaleFactor; + cgRect.size.height /= scaleFactor; + [mtlLayer setNeedsDisplayInRect:cgRect]; + } +} + #if MVK_MACOS struct CIE1931XY { uint16_t x; @@ -237,19 +350,31 @@ static inline CIE1931XY VkXYColorEXTToCIE1931XY(VkXYColorEXT xy) { CAEDRMetadata* caMetadata = [CAEDRMetadata HDR10MetadataWithDisplayInfo: colorVolData contentInfo: lightLevelData opticalOutputScale: 1]; - _mtlLayer.EDRMetadata = caMetadata; + auto* mtlLayer = _surface->getCAMetalLayer(); + mtlLayer.EDRMetadata = caMetadata; + mtlLayer.wantsExtendedDynamicRangeContent = YES; [caMetadata release]; [colorVolData release]; [lightLevelData release]; - _mtlLayer.wantsExtendedDynamicRangeContent = YES; #endif } #pragma mark Construction -MVKSwapchain::MVKSwapchain(MVKDevice* device, - const VkSwapchainCreateInfoKHR* pCreateInfo) : MVKVulkanAPIDeviceObject(device) { +MVKSwapchain::MVKSwapchain(MVKDevice* device, const VkSwapchainCreateInfoKHR* pCreateInfo) + : MVKVulkanAPIDeviceObject(device), + _surface((MVKSurface*)pCreateInfo->surface) { + + // Check if oldSwapchain is properly set + auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; + if (oldSwapchain == _surface->_activeSwapchain) { + _surface->_activeSwapchain = this; + } else { + setConfigurationResult(reportError(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR, "vkCreateSwapchainKHR(): pCreateInfo->oldSwapchain does not match the VkSwapchain that is in use by the surface")); + return; + } + memset(_presentTimingHistory, 0, sizeof(_presentTimingHistory)); // Retrieve the scaling and present mode structs if they are supplied. @@ -280,10 +405,6 @@ static inline CIE1931XY VkXYColorEXTToCIE1931XY(VkXYColorEXT xy) { } } - // If applicable, release any surfaces (not currently being displayed) from the old swapchain. - MVKSwapchain* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; - if (oldSwapchain) { oldSwapchain->releaseUndisplayedSurfaces(); } - uint32_t imgCnt = mvkClamp(pCreateInfo->minImageCount, _device->_pMetalFeatures->minSwapchainImageCount, _device->_pMetalFeatures->maxSwapchainImageCount); @@ -333,85 +454,80 @@ static CALayerContentsGravity getCALayerContentsGravity(VkSwapchainPresentScalin VkSwapchainPresentScalingCreateInfoEXT* pScalingInfo, uint32_t imgCnt) { - MVKSurface* mvkSrfc = (MVKSurface*)pCreateInfo->surface; - _mtlLayer = mvkSrfc->getCAMetalLayer(); - if ( !_mtlLayer ) { - setConfigurationResult(mvkSrfc->getConfigurationResult()); - _surfaceLost = true; - return; - } + if ( getIsSurfaceLost() ) { return; } + auto* mtlLayer = _surface->getCAMetalLayer(); auto minMagFilter = mvkConfig().swapchainMinMagFilterUseNearest ? kCAFilterNearest : kCAFilterLinear; - _mtlLayer.device = getMTLDevice(); - _mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); - _mtlLayer.maximumDrawableCountMVK = imgCnt; - _mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); - _mtlLayer.minificationFilter = minMagFilter; - _mtlLayer.magnificationFilter = minMagFilter; - _mtlLayer.contentsGravity = getCALayerContentsGravity(pScalingInfo); - _mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | + mtlLayer.device = getMTLDevice(); + mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); + mtlLayer.maximumDrawableCountMVK = imgCnt; + mtlLayer.displaySyncEnabledMVK = (pCreateInfo->presentMode != VK_PRESENT_MODE_IMMEDIATE_KHR); + mtlLayer.minificationFilter = minMagFilter; + mtlLayer.magnificationFilter = minMagFilter; + mtlLayer.contentsGravity = getCALayerContentsGravity(pScalingInfo); + mtlLayer.framebufferOnly = !mvkIsAnyFlagEnabled(pCreateInfo->imageUsage, (VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)); // Remember the extent to later detect if it has changed under the covers, // and set the drawable size of the CAMetalLayer from the extent. _mtlLayerDrawableExtent = pCreateInfo->imageExtent; - _mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_mtlLayerDrawableExtent); + mtlLayer.drawableSize = mvkCGSizeFromVkExtent2D(_mtlLayerDrawableExtent); if (pCreateInfo->compositeAlpha != VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR) { - _mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; + mtlLayer.opaque = pCreateInfo->compositeAlpha == VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR; } switch (pCreateInfo->imageColorSpace) { case VK_COLOR_SPACE_SRGB_NONLINEAR_KHR: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_DISPLAY_P3_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceDisplayP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceDisplayP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_EXTENDED_SRGB_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_EXTENDED_SRGB_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedSRGB; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedSRGB; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_DISPLAY_P3_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearDisplayP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearDisplayP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_DCI_P3_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceDCIP3; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceDCIP3; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_BT709_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_709; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_709; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_BT2020_LINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearITUR_2020; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceExtendedLinearITUR_2020; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; #if MVK_XCODE_12 case VK_COLOR_SPACE_HDR10_ST2084_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_PQ; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_PQ; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; case VK_COLOR_SPACE_HDR10_HLG_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_HLG; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; + mtlLayer.colorspaceNameMVK = kCGColorSpaceITUR_2100_HLG; + mtlLayer.wantsExtendedDynamicRangeContentMVK = YES; break; #endif case VK_COLOR_SPACE_ADOBERGB_NONLINEAR_EXT: - _mtlLayer.colorspaceNameMVK = kCGColorSpaceAdobeRGB1998; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspaceNameMVK = kCGColorSpaceAdobeRGB1998; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; case VK_COLOR_SPACE_PASS_THROUGH_EXT: - _mtlLayer.colorspace = nil; - _mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; + mtlLayer.colorspace = nil; + mtlLayer.wantsExtendedDynamicRangeContentMVK = NO; break; default: setConfigurationResult(reportError(VK_ERROR_FORMAT_NOT_SUPPORTED, "vkCreateSwapchainKHR(): Metal does not support VkColorSpaceKHR value %d.", pCreateInfo->imageColorSpace)); @@ -421,22 +537,6 @@ static CALayerContentsGravity getCALayerContentsGravity(VkSwapchainPresentScalin // TODO: set additional CAMetalLayer properties before extracting drawables: // - presentsWithTransaction // - drawsAsynchronously - - if ( [_mtlLayer.delegate isKindOfClass: [PLATFORM_VIEW_CLASS class]] ) { - // Sometimes, the owning view can replace its CAMetalLayer. In that case, the client - // needs to recreate the swapchain, or no content will be displayed. - _layerObserver = [MVKBlockObserver observerWithBlock: ^(NSString* path, id, NSDictionary*, void*) { - if ( ![path isEqualToString: @"layer"] ) { return; } - this->releaseLayer(); - } forObject: _mtlLayer.delegate atKeyPath: @"layer"]; - } -} - -void MVKSwapchain::releaseLayer() { - std::lock_guard lock(_layerLock); - _surfaceLost = true; - [_layerObserver release]; - _layerObserver = nil; } // Initializes the array of images used for the surface of this swapchain. @@ -459,13 +559,13 @@ static CALayerContentsGravity getCALayerContentsGravity(VkSwapchainPresentScalin } } + auto* mtlLayer = _surface->getCAMetalLayer(); VkExtent2D imgExtent = pCreateInfo->imageExtent; - VkImageCreateInfo imgInfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, .pNext = VK_NULL_HANDLE, .imageType = VK_IMAGE_TYPE_2D, - .format = getPixelFormats()->getVkFormat(_mtlLayer.pixelFormat), + .format = getPixelFormats()->getVkFormat(mtlLayer.pixelFormat), .extent = { imgExtent.width, imgExtent.height, 1 }, .mipLevels = 1, .arrayLayers = 1, @@ -494,131 +594,21 @@ static CALayerContentsGravity getCALayerContentsGravity(VkSwapchainPresentScalin NSString* screenName = @"Main Screen"; #if MVK_MACOS && !MVK_MACCAT - if ([_mtlLayer.screenMVK respondsToSelector:@selector(localizedName)]) { - screenName = _mtlLayer.screenMVK.localizedName; + if ([mtlLayer.screenMVK respondsToSelector:@selector(localizedName)]) { + screenName = mtlLayer.screenMVK.localizedName; } #endif MVKLogInfo("Created %d swapchain images with initial size (%d, %d) and contents scale %.1f for screen %s.", - imgCnt, imgExtent.width, imgExtent.height, _mtlLayer.contentsScale, screenName.UTF8String); -} - -VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - -#if MVK_VISIONOS - // TODO: See if this can be obtained from OS instead - NSInteger framesPerSecond = 90; -#elif MVK_IOS_OR_TVOS || MVK_MACCAT - NSInteger framesPerSecond = 60; - UIScreen* screen = _mtlLayer.screenMVK; - if ([screen respondsToSelector: @selector(maximumFramesPerSecond)]) { - framesPerSecond = screen.maximumFramesPerSecond; - } -#elif MVK_MACOS && !MVK_MACCAT - NSScreen* screen = _mtlLayer.screenMVK; - CGDirectDisplayID displayId = [[[screen deviceDescription] objectForKey:@"NSScreenNumber"] unsignedIntValue]; - CGDisplayModeRef mode = CGDisplayCopyDisplayMode(displayId); - double framesPerSecond = CGDisplayModeGetRefreshRate(mode); - CGDisplayModeRelease(mode); -#if MVK_XCODE_13 - if (framesPerSecond == 0 && [screen respondsToSelector: @selector(maximumFramesPerSecond)]) - framesPerSecond = [screen maximumFramesPerSecond]; -#endif - - // Builtin panels, e.g., on MacBook, report a zero refresh rate. - if (framesPerSecond == 0) - framesPerSecond = 60.0; -#endif - - pRefreshCycleDuration->refreshDuration = (uint64_t)1e9 / framesPerSecond; - return VK_SUCCESS; -} - -VkResult MVKSwapchain::getPastPresentationTiming(uint32_t *pCount, VkPastPresentationTimingGOOGLE *pPresentationTimings) { - if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - - VkResult res = VK_SUCCESS; - - std::lock_guard lock(_presentHistoryLock); - if (pPresentationTimings == nullptr) { - *pCount = _presentHistoryCount; - } else { - uint32_t countRemaining = std::min(_presentHistoryCount, *pCount); - uint32_t outIndex = 0; - - res = (*pCount >= _presentHistoryCount) ? VK_SUCCESS : VK_INCOMPLETE; - *pCount = countRemaining; - - while (countRemaining > 0) { - pPresentationTimings[outIndex] = _presentTimingHistory[_presentHistoryHeadIndex]; - countRemaining--; - _presentHistoryCount--; - _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; - outIndex++; - } - } - - return res; -} - -void MVKSwapchain::recordPresentTime(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { - std::lock_guard lock(_presentHistoryLock); - if (_presentHistoryCount < kMaxPresentationHistory) { - _presentHistoryCount++; - } else { - _presentHistoryHeadIndex = (_presentHistoryHeadIndex + 1) % kMaxPresentationHistory; - } - - // If actual present time is not available, use desired time instead, and if that - // hasn't been set, use the current time, which should be reasonably accurate (sub-ms), - // since we are here as part of the addPresentedHandler: callback. - if (actualPresentTime == 0) { actualPresentTime = presentInfo.desiredPresentTime; } - if (actualPresentTime == 0) { actualPresentTime = CACurrentMediaTime() * 1.0e9; } - - _presentTimingHistory[_presentHistoryIndex].presentID = presentInfo.presentID; - _presentTimingHistory[_presentHistoryIndex].desiredPresentTime = presentInfo.desiredPresentTime; - _presentTimingHistory[_presentHistoryIndex].actualPresentTime = actualPresentTime; - // These details are not available in Metal - _presentTimingHistory[_presentHistoryIndex].earliestPresentTime = actualPresentTime; - _presentTimingHistory[_presentHistoryIndex].presentMargin = 0; - _presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory; -} - -void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) { - if (!pRegion || pRegion->rectangleCount == 0) { - [_mtlLayer setNeedsDisplay]; - return; - } - - for (uint32_t i = 0; i < pRegion->rectangleCount; ++i) { - CGRect cgRect = mvkCGRectFromVkRectLayerKHR(pRegion->pRectangles[i]); -#if MVK_MACOS - // VK_KHR_incremental_present specifies an upper-left origin, but macOS by default - // uses a lower-left origin. - cgRect.origin.y = _mtlLayer.bounds.size.height - cgRect.origin.y; -#endif - // We were given rectangles in pixels, but -[CALayer setNeedsDisplayInRect:] wants them - // in points, which is pixels / contentsScale. - CGFloat scaleFactor = _mtlLayer.contentsScale; - cgRect.origin.x /= scaleFactor; - cgRect.origin.y /= scaleFactor; - cgRect.size.width /= scaleFactor; - cgRect.size.height /= scaleFactor; - [_mtlLayer setNeedsDisplayInRect:cgRect]; - } + imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, screenName.UTF8String); } -// A retention loop exists between the swapchain and its images. The swapchain images -// retain the swapchain because they can be in flight when the app destroys the swapchain. -// Release the images now, when the app destroys the swapchain, so they will be destroyed when -// no longer held by the presentation flow, and will in turn release the swapchain for destruction. void MVKSwapchain::destroy() { + if (_surface->_activeSwapchain == this) { _surface->_activeSwapchain = nullptr; } for (auto& img : _presentableImages) { _device->destroyPresentableSwapchainImage(img, NULL); } MVKVulkanAPIDeviceObject::destroy(); } MVKSwapchain::~MVKSwapchain() { if (_licenseWatermark) { _licenseWatermark->destroy(); } - releaseLayer(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h index ad87f715a..4e3f51220 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.h @@ -63,6 +63,9 @@ class MVKSemaphoreImpl : public MVKBaseObject { /** Returns whether this instance is in a reserved state. */ bool isReserved(); + /** Returns the number of outstanding reservations. */ + uint32_t getReservationCount(); + /** * Blocks processing on the current thread until any or all (depending on configuration) outstanding * reservations have been released, or until the specified timeout interval in nanoseconds expires. @@ -89,20 +92,19 @@ class MVKSemaphoreImpl : public MVKBaseObject { * require a separate call to the release() function to cause the semaphore to stop blocking. */ MVKSemaphoreImpl(bool waitAll = true, uint32_t reservationCount = 0) - : _shouldWaitAll(waitAll), _reservationCount(reservationCount) {} + : _reservationCount(reservationCount), _shouldWaitAll(waitAll) {} - /** Destructor. */ ~MVKSemaphoreImpl(); private: bool operator()(); - inline bool isClear() { return _reservationCount == 0; } // Not thread-safe + bool isClear() { return _reservationCount == 0; } // Not thread-safe std::mutex _lock; std::condition_variable _blocker; - bool _shouldWaitAll; uint32_t _reservationCount; + bool _shouldWaitAll; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm index efde21cb9..fb1e0190c 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm @@ -50,6 +50,11 @@ return !isClear(); } +uint32_t MVKSemaphoreImpl::getReservationCount() { + lock_guard lock(_lock); + return _reservationCount; +} + bool MVKSemaphoreImpl::wait(uint64_t timeout, bool reserveAgain) { unique_lock lock(_lock); diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h index 7005e9850..d2fcb9e8c 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h @@ -50,7 +50,7 @@ class MVKBaseObject { void reportMessage(MVKConfigLogLevel logLevel, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object, which may be nil. + * Report a message, on behalf of the object, which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. @@ -58,14 +58,19 @@ class MVKBaseObject { static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object, which may be nil. + * Report a Vulkan result message. This includes logging to a standard system logging stream, + * and some subclasses will also forward the message to their VkInstance for output to the + * Vulkan debug report messaging API. + */ + VkResult reportResult(VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(4, 5); + + /** + * Report a Vulkan result message, on behalf of the object. which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. - * - * This is the core reporting implementation. Other similar functions delegate here. */ - static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(3, 0); + static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, ...) __printflike(4, 5); /** * Report a Vulkan error message. This includes logging to a standard system logging stream, @@ -83,19 +88,29 @@ class MVKBaseObject { static VkResult reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) __printflike(3, 4); /** - * Report a Vulkan error message, on behalf of the object. which may be nil. + * Report a Vulkan warning message. This includes logging to a standard system logging stream, + * and some subclasses will also forward the message to their VkInstance for output to the + * Vulkan debug report messaging API. + */ + VkResult reportWarning(VkResult vkRslt, const char* format, ...) __printflike(3, 4); + + /** + * Report a Vulkan warning message, on behalf of the object. which may be nil. * Reporting includes logging to a standard system logging stream, and if the object * is not nil and has access to the VkInstance, the message will also be forwarded * to the VkInstance for output to the Vulkan debug report messaging API. - * - * This is the core reporting implementation. Other similar functions delegate here. */ - static VkResult reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, va_list args) __printflike(3, 0); + static VkResult reportWarning(MVKBaseObject* mvkObj, VkResult vkRslt, const char* format, ...) __printflike(3, 4); /** Destroys this object. Default behaviour simply deletes it. Subclasses may override to delay deletion. */ virtual void destroy() { delete this; } virtual ~MVKBaseObject() {} + +protected: + static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(4, 0); + static void reportMessage(MVKBaseObject* mvkObj, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(3, 0); + }; diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm index 427c32278..5a14888c1 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.mm @@ -26,24 +26,19 @@ using namespace std; +#pragma mark - +#pragma mark MVKBaseObject + static const char* getReportingLevelString(MVKConfigLogLevel logLevel) { switch (logLevel) { - case MVK_CONFIG_LOG_LEVEL_DEBUG: - return "mvk-debug"; - case MVK_CONFIG_LOG_LEVEL_INFO: - return "mvk-info"; - case MVK_CONFIG_LOG_LEVEL_WARNING: - return "mvk-warn"; - case MVK_CONFIG_LOG_LEVEL_ERROR: - default: - return "mvk-error"; + case MVK_CONFIG_LOG_LEVEL_ERROR: return "mvk-error"; + case MVK_CONFIG_LOG_LEVEL_WARNING: return "mvk-warn"; + case MVK_CONFIG_LOG_LEVEL_INFO: return "mvk-info"; + case MVK_CONFIG_LOG_LEVEL_DEBUG: return "mvk-debug"; + default: return "mvk-unknown"; } } - -#pragma mark - -#pragma mark MVKBaseObject - string MVKBaseObject::getClassName() { return mvk::getTypeName(this); } void MVKBaseObject::reportMessage(MVKConfigLogLevel logLevel, const char* format, ...) { @@ -102,36 +97,67 @@ free(redoBuff); } -VkResult MVKBaseObject::reportError(VkResult vkErr, const char* format, ...) { +VkResult MVKBaseObject::reportResult(VkResult vkErr, MVKConfigLogLevel logLevel, const char* format, ...) { va_list args; va_start(args, format); - VkResult rslt = reportError(this, vkErr, format, args); + VkResult rslt = reportResult(this, vkErr, logLevel, format, args); va_end(args); return rslt; } -VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) { +VkResult MVKBaseObject::reportResult(MVKBaseObject* mvkObj, VkResult vkErr, MVKConfigLogLevel logLevel, const char* format, ...) { va_list args; va_start(args, format); - VkResult rslt = reportError(mvkObj, vkErr, format, args); + VkResult rslt = reportResult(mvkObj, vkErr, logLevel, format, args); va_end(args); return rslt; } -// This is the core reporting implementation. Other similar functions delegate here. -VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, va_list args) { +VkResult MVKBaseObject::reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) { - // Prepend the error code to the format string - const char* vkRsltName = mvkVkResultName(vkErr); + // Prepend the result code to the format string + const char* vkRsltName = mvkVkResultName(vkRslt); size_t rsltLen = strlen(vkRsltName) + strlen(format) + 4; char fmtStr[rsltLen]; snprintf(fmtStr, rsltLen, "%s: %s", vkRsltName, format); - // Report the error + // Report the message va_list lclArgs; va_copy(lclArgs, args); - reportMessage(mvkObj, MVK_CONFIG_LOG_LEVEL_ERROR, fmtStr, lclArgs); + reportMessage(mvkObj, logLevel, fmtStr, lclArgs); va_end(lclArgs); - return vkErr; + return vkRslt; +} + +VkResult MVKBaseObject::reportError(VkResult vkErr, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(this, vkErr, MVK_CONFIG_LOG_LEVEL_ERROR, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportError(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(mvkObj, vkErr, MVK_CONFIG_LOG_LEVEL_ERROR, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportWarning(VkResult vkErr, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(this, vkErr, MVK_CONFIG_LOG_LEVEL_WARNING, format, args); + va_end(args); + return rslt; +} + +VkResult MVKBaseObject::reportWarning(MVKBaseObject* mvkObj, VkResult vkErr, const char* format, ...) { + va_list args; + va_start(args, format); + VkResult rslt = reportResult(mvkObj, vkErr, MVK_CONFIG_LOG_LEVEL_WARNING, format, args); + va_end(args); + return rslt; } diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp index 29ee115ff..85ad7d5b5 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.cpp @@ -21,6 +21,44 @@ #define CASE_STRINGIFY(V) case V: return #V +const char* mvkVkCommandName(MVKCommandUse cmdUse) { + switch (cmdUse) { + case kMVKCommandUseBeginCommandBuffer: return "vkBeginCommandBuffer (prefilled VkCommandBuffer)"; + case kMVKCommandUseQueueSubmit: return "vkQueueSubmit"; + case kMVKCommandUseAcquireNextImage: return "vkAcquireNextImageKHR"; + case kMVKCommandUseQueuePresent: return "vkQueuePresentKHR"; + case kMVKCommandUseQueueWaitIdle: return "vkQueueWaitIdle"; + case kMVKCommandUseDeviceWaitIdle: return "vkDeviceWaitIdle"; + case kMVKCommandUseInvalidateMappedMemoryRanges: return "vkInvalidateMappedMemoryRanges"; + case kMVKCommandUseBeginRendering: return "vkCmdBeginRendering"; + case kMVKCommandUseBeginRenderPass: return "vkCmdBeginRenderPass"; + case kMVKCommandUseNextSubpass: return "vkCmdNextSubpass"; + case kMVKCommandUseRestartSubpass: return "Metal renderpass restart on barrier"; + case kMVKCommandUsePipelineBarrier: return "vkCmdPipelineBarrier"; + case kMVKCommandUseBlitImage: return "vkCmdBlitImage"; + case kMVKCommandUseCopyImage: return "vkCmdCopyImage"; + case kMVKCommandUseResolveImage: return "vkCmdResolveImage (resolve stage)"; + case kMVKCommandUseResolveExpandImage: return "vkCmdResolveImage (expand stage)"; + case kMVKCommandUseResolveCopyImage: return "vkCmdResolveImage (copy stage)"; + case kMVKCommandUseCopyBuffer: return "vkCmdCopyBuffer"; + case kMVKCommandUseCopyBufferToImage: return "vkCmdCopyBufferToImage"; + case kMVKCommandUseCopyImageToBuffer: return "vkCmdCopyImageToBuffer"; + case kMVKCommandUseFillBuffer: return "vkCmdFillBuffer"; + case kMVKCommandUseUpdateBuffer: return "vkCmdUpdateBuffer"; + case kMVKCommandUseClearAttachments: return "vkCmdClearAttachments"; + case kMVKCommandUseClearColorImage: return "vkCmdClearColorImage"; + case kMVKCommandUseClearDepthStencilImage: return "vkCmdClearDepthStencilImage"; + case kMVKCommandUseResetQueryPool: return "vkCmdResetQueryPool"; + case kMVKCommandUseDispatch: return "vkCmdDispatch"; + case kMVKCommandUseTessellationVertexTessCtl: return "vkCmdDraw (vertex and tess control stages)"; + case kMVKCommandUseDrawIndirectConvertBuffers: return "vkCmdDrawIndirect (convert indirect buffers)"; + case kMVKCommandUseCopyQueryPoolResults: return "vkCmdCopyQueryPoolResults"; + case kMVKCommandUseAccumOcclusionQuery: return "Post-render-pass occlusion query accumulation"; + case kMVKCommandUseRecordGPUCounterSample: return "Record GPU Counter Sample"; + default: return "Unknown Vulkan command"; + } +} + const char* mvkVkResultName(VkResult vkResult) { switch (vkResult) { diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index b8f10720f..1097afb5a 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -63,7 +63,7 @@ typedef struct { /** Tracks the Vulkan command currently being used. */ typedef enum : uint8_t { kMVKCommandUseNone = 0, /**< No use defined. */ - kMVKCommandUseEndCommandBuffer, /**< vkEndCommandBuffer (prefilled VkCommandBuffer). */ + kMVKCommandUseBeginCommandBuffer, /**< vkBeginCommandBuffer (prefilled VkCommandBuffer). */ kMVKCommandUseQueueSubmit, /**< vkQueueSubmit. */ kMVKCommandUseAcquireNextImage, /**< vkAcquireNextImageKHR. */ kMVKCommandUseQueuePresent, /**< vkQueuePresentKHR. */ @@ -104,6 +104,9 @@ enum MVKGraphicsStage { kMVKGraphicsStageRasterization /**< The rest of the pipeline. */ }; +/** Returns the name of the command defined by the command use. */ +const char* mvkVkCommandName(MVKCommandUse cmdUse); + /** Returns the name of the result value. */ const char* mvkVkResultName(VkResult vkResult); diff --git a/MoltenVK/MoltenVK/Utility/MVKLogging.h b/MoltenVK/MoltenVK/Utility/MVKLogging.h index bea3a92f5..840d37803 100644 --- a/MoltenVK/MoltenVK/Utility/MVKLogging.h +++ b/MoltenVK/MoltenVK/Utility/MVKLogging.h @@ -57,9 +57,9 @@ extern "C" { * MVKLogErrorIf(cond, fmt, ...) - same as MVKLogError if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * - * MVKLogWarning(fmt, ...) - recommended for not immediately harmful errors + * MVKLogWarn(fmt, ...) - recommended for not immediately harmful errors * - will print if MVK_LOG_LEVEL_WARNING is set on. - * MVKLogWarningIf(cond, fmt, ...) - same as MVKLogWarning if boolean "cond" condition expression evaluates to YES, + * MVKLogWarnIf(cond, fmt, ...) - same as MVKLogWarn if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * * MVKLogInfo(fmt, ...) - recommended for general, infrequent, information messages @@ -67,7 +67,7 @@ extern "C" { * MVKLogInfoIf(cond, fmt, ...) - same as MVKLogInfo if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. * - * MVKLogDebug(fmt, ...) - recommended for temporary use during debugging + * MVKLogDebug(fmt, ...) - recommended for temporary use during debugging * - will print if MVK_LOG_LEVEL_DEBUG is set on. * MVKLogDebugIf(cond, fmt, ...) - same as MVKLogDebug if boolean "cond" condition expression evaluates to YES, * otherwise logs nothing. @@ -148,11 +148,11 @@ extern "C" { // Warning logging - for not immediately harmful errors #if MVK_LOG_LEVEL_WARNING -# define MVKLogWarning(fmt, ...) MVKLogWarningImpl(fmt, ##__VA_ARGS__) -# define MVKLogWarningIf(cond, fmt, ...) if(cond) { MVKLogWarningImpl(fmt, ##__VA_ARGS__); } +# define MVKLogWarn(fmt, ...) MVKLogWarnImpl(fmt, ##__VA_ARGS__) +# define MVKLogWarnIf(cond, fmt, ...) if(cond) { MVKLogWarnImpl(fmt, ##__VA_ARGS__); } #else -# define MVKLogWarning(...) -# define MVKLogWarningIf(cond, fmt, ...) +# define MVKLogWarn(...) +# define MVKLogWarnIf(cond, fmt, ...) #endif // Info logging - for general, non-performance affecting information messages @@ -182,11 +182,11 @@ extern "C" { # define MVKLogTraceIf(cond, fmt, ...) #endif -#define MVKLogErrorImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) -#define MVKLogWarningImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_WARNING, fmt, ##__VA_ARGS__) -#define MVKLogInfoImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) -#define MVKLogDebugImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) -#define MVKLogTraceImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) +#define MVKLogErrorImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_ERROR, fmt, ##__VA_ARGS__) +#define MVKLogWarnImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_WARNING, fmt, ##__VA_ARGS__) +#define MVKLogInfoImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_INFO, fmt, ##__VA_ARGS__) +#define MVKLogDebugImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) +#define MVKLogTraceImpl(fmt, ...) reportMessage(MVK_CONFIG_LOG_LEVEL_DEBUG, fmt, ##__VA_ARGS__) // Assertions #ifdef NS_BLOCK_ASSERTIONS diff --git a/Scripts/runcts b/Scripts/runcts index 20ae1abe5..bf65cc9e9 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -113,7 +113,7 @@ export MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS=0 #(2 = VK_EXT_descriptor_ export MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE=2 #(2 = MTLEvents always) export MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM=0 #(2 = ZLIB, 3 = LZ4) export MVK_CONFIG_PERFORMANCE_TRACKING=0 -export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=2 #(2 = Device lifetime) +export MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE=3 #(2 = Device lifetime, 3 = Process lifetime) # -------------- Operation -------------------- From a28437d8f21dff45563eaa550a8331698a32babb Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 5 Sep 2023 14:56:43 -0400 Subject: [PATCH 08/21] Updates to code review on swapchain image presentation improvement. - Fix failure building on Xcode 14. - Track frame interval statistics, regardless of whether performance tracking is enabled. - Determine wait time for swapchain presentations from frame intervals. - MVKSwapchain call markFrameInterval() from within mutex lock. - MVKDevice rename addActivityPerformance() to addPerformanceInterval() and addActivityByteCount() to addPerformanceByteCount(). - Add documentation about performance being measured in milliseconds. --- MoltenVK/MoltenVK/API/mvk_private_api.h | 44 +++++++++---------- .../MoltenVK/Commands/MVKCommandBuffer.mm | 2 +- .../Commands/MVKCommandResourceFactory.mm | 8 ++-- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 23 +++++----- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 8 ++-- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 6 +-- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 14 +++--- .../MoltenVK/GPUObjects/MVKShaderModule.mm | 22 +++++----- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm | 8 ++-- MoltenVK/MoltenVK/GPUObjects/MVKSync.mm | 2 +- 11 files changed, 70 insertions(+), 71 deletions(-) diff --git a/MoltenVK/MoltenVK/API/mvk_private_api.h b/MoltenVK/MoltenVK/API/mvk_private_api.h index e496fc5ab..8ed5b7547 100644 --- a/MoltenVK/MoltenVK/API/mvk_private_api.h +++ b/MoltenVK/MoltenVK/API/mvk_private_api.h @@ -167,40 +167,40 @@ typedef struct { /** MoltenVK performance of shader compilation activities. */ typedef struct { - MVKPerformanceTracker hashShaderCode; /** Create a hash from the incoming shader code. */ - MVKPerformanceTracker spirvToMSL; /** Convert SPIR-V to MSL source code. */ - MVKPerformanceTracker mslCompile; /** Compile MSL source code into a MTLLibrary. */ - MVKPerformanceTracker mslLoad; /** Load pre-compiled MSL code into a MTLLibrary. */ - MVKPerformanceTracker mslCompress; /** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache. */ - MVKPerformanceTracker mslDecompress; /** Decompress MSL source code to write the MSL when serializing a pipeline cache. */ - MVKPerformanceTracker shaderLibraryFromCache; /** Retrieve a shader library from the cache, lazily creating it if needed. */ - MVKPerformanceTracker functionRetrieval; /** Retrieve a MTLFunction from a MTLLibrary. */ - MVKPerformanceTracker functionSpecialization; /** Specialize a retrieved MTLFunction. */ - MVKPerformanceTracker pipelineCompile; /** Compile MTLFunctions into a pipeline. */ - MVKPerformanceTracker glslToSPRIV; /** Convert GLSL to SPIR-V code. */ + MVKPerformanceTracker hashShaderCode; /** Create a hash from the incoming shader code, in milliseconds. */ + MVKPerformanceTracker spirvToMSL; /** Convert SPIR-V to MSL source code, in milliseconds. */ + MVKPerformanceTracker mslCompile; /** Compile MSL source code into a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker mslLoad; /** Load pre-compiled MSL code into a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker mslCompress; /** Compress MSL source code after compiling a MTLLibrary, to hold it in a pipeline cache, in milliseconds. */ + MVKPerformanceTracker mslDecompress; /** Decompress MSL source code to write the MSL when serializing a pipeline cache, in milliseconds. */ + MVKPerformanceTracker shaderLibraryFromCache; /** Retrieve a shader library from the cache, lazily creating it if needed, in milliseconds. */ + MVKPerformanceTracker functionRetrieval; /** Retrieve a MTLFunction from a MTLLibrary, in milliseconds. */ + MVKPerformanceTracker functionSpecialization; /** Specialize a retrieved MTLFunction, in milliseconds. */ + MVKPerformanceTracker pipelineCompile; /** Compile MTLFunctions into a pipeline, in milliseconds. */ + MVKPerformanceTracker glslToSPRIV; /** Convert GLSL to SPIR-V code, in milliseconds. */ } MVKShaderCompilationPerformance; /** MoltenVK performance of pipeline cache activities. */ typedef struct { - MVKPerformanceTracker sizePipelineCache; /** Calculate the size of cache data required to write MSL to pipeline cache data stream. */ - MVKPerformanceTracker writePipelineCache; /** Write MSL to pipeline cache data stream. */ - MVKPerformanceTracker readPipelineCache; /** Read MSL from pipeline cache data stream. */ + MVKPerformanceTracker sizePipelineCache; /** Calculate the size of cache data required to write MSL to pipeline cache data stream, in milliseconds. */ + MVKPerformanceTracker writePipelineCache; /** Write MSL to pipeline cache data stream, in milliseconds. */ + MVKPerformanceTracker readPipelineCache; /** Read MSL from pipeline cache data stream, in milliseconds. */ } MVKPipelineCachePerformance; /** MoltenVK performance of queue activities. */ typedef struct { - MVKPerformanceTracker retrieveMTLCommandBuffer; /** Retrieve a MTLCommandBuffer from a MTLQueue. */ - MVKPerformanceTracker commandBufferEncoding; /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling). */ - MVKPerformanceTracker submitCommandBuffers; /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding). */ - MVKPerformanceTracker mtlCommandBufferExecution; /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback. */ - MVKPerformanceTracker retrieveCAMetalDrawable; /** Retrieve next CAMetalDrawable from a CAMetalLayer. */ - MVKPerformanceTracker presentSwapchains; /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback. */ - MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS). */ + MVKPerformanceTracker retrieveMTLCommandBuffer; /** Retrieve a MTLCommandBuffer from a MTLQueue, in milliseconds. */ + MVKPerformanceTracker commandBufferEncoding; /** Encode a single VkCommandBuffer to a MTLCommandBuffer (excludes MTLCommandBuffer encoding from configured immediate prefilling), in milliseconds. */ + MVKPerformanceTracker submitCommandBuffers; /** Submit and encode all VkCommandBuffers in a vkQueueSubmit() operation to MTLCommandBuffers (including both prefilled and deferred encoding), in milliseconds. */ + MVKPerformanceTracker mtlCommandBufferExecution; /** Execute a MTLCommandBuffer on the GPU, from commit to completion callback, in milliseconds. */ + MVKPerformanceTracker retrieveCAMetalDrawable; /** Retrieve next CAMetalDrawable from a CAMetalLayer, in milliseconds. */ + MVKPerformanceTracker presentSwapchains; /** Present the swapchains in a vkQueuePresentKHR() on the GPU, from commit to presentation callback, in milliseconds. */ + MVKPerformanceTracker frameInterval; /** Frame presentation interval (1000/FPS), in milliseconds. */ } MVKQueuePerformance; /** MoltenVK performance of device activities. */ typedef struct { - MVKPerformanceTracker gpuMemoryAllocated; /** GPU memory allocated (in KB). */ + MVKPerformanceTracker gpuMemoryAllocated; /** GPU memory allocated, in kilobytes. */ } MVKDevicePerformance; /** diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 4e0af4145..8ac91c264 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -347,7 +347,7 @@ encodeCommands(_cmdBuffer->_head); endEncoding(); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.commandBufferEncoding, startTime); } void MVKCommandEncoder::beginEncoding(id mtlCmdBuff, MVKCommandEncodingContext* pEncodingContext) { diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm index b3003507c..973db991c 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm @@ -623,7 +623,7 @@ static void getSwizzleString(char swizzleStr[4], VkComponentMapping vkMapping) { NSString* nsFuncName = [[NSString alloc] initWithUTF8String: funcName]; // temp retained id mtlFunc = [_mtlLibrary newFunctionWithName: nsFuncName]; // retained [nsFuncName release]; // temp release - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); return mtlFunc; } @@ -636,7 +636,7 @@ static void getSwizzleString(char swizzleStr[4], VkComponentMapping vkMapping) { id mtlLib = [getMTLDevice() newLibraryWithSource: mslSrcCode options: getDevice()->getMTLCompileOptions() error: &err]; // temp retain - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); if (err) { reportError(VK_ERROR_INITIALIZATION_FAILED, @@ -645,7 +645,7 @@ static void getSwizzleString(char swizzleStr[4], VkComponentMapping vkMapping) { } else { startTime = _device->getPerformanceTimestamp(); mtlFunc = [mtlLib newFunctionWithName: funcName]; - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); } [mtlLib release]; // temp release @@ -689,7 +689,7 @@ static void getSwizzleString(char swizzleStr[4], VkComponentMapping vkMapping) { options: getDevice()->getMTLCompileOptions() error: &err]; // retained MVKAssert( !err, "Could not compile command shaders (Error code %li):\n%s", (long)err.code, err.localizedDescription.UTF8String); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.mslCompile, startTime); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 5d4c328be..5ae7f5ecd 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -689,26 +689,23 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { /** * If performance is being tracked, returns a monotonic timestamp value for use performance timestamping. - * * The returned value corresponds to the number of CPU "ticks" since the app was initialized. * - * Calling this value twice, subtracting the first value from the second, and then multiplying - * the result by the value returned by mvkGetTimestampPeriod() will provide an indication of the - * number of nanoseconds between the two calls. The convenience function mvkGetElapsedMilliseconds() - * can be used to perform this calculation. + * Call this function twice, then use the functions mvkGetElapsedNanoseconds() or mvkGetElapsedMilliseconds() + * to determine the number of nanoseconds or milliseconds between the two calls. */ uint64_t getPerformanceTimestamp() { return _isPerformanceTracking ? mvkGetTimestamp() : 0; } /** - * If performance is being tracked, adds the performance for an activity with a duration - * interval between the start and end times, to the given performance statistics. + * If performance is being tracked, adds the performance for an activity with a duration interval + * between the start and end times, measured in milliseconds, to the given performance statistics. * * If endTime is zero or not supplied, the current time is used. */ - void addActivityPerformance(MVKPerformanceTracker& activityTracker, + void addPerformanceInterval(MVKPerformanceTracker& perfTracker, uint64_t startTime, uint64_t endTime = 0) { if (_isPerformanceTracking) { - updateActivityPerformance(activityTracker, mvkGetElapsedMilliseconds(startTime, endTime)); + updateActivityPerformance(perfTracker, mvkGetElapsedMilliseconds(startTime, endTime)); } }; @@ -716,12 +713,15 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { * If performance is being tracked, adds the performance for an activity * with a kilobyte count, to the given performance statistics. */ - void addActivityByteCount(MVKPerformanceTracker& activityTracker, uint64_t byteCount) { + void addPerformanceByteCount(MVKPerformanceTracker& perfTracker, uint64_t byteCount) { if (_isPerformanceTracking) { - updateActivityPerformance(activityTracker, double(byteCount / KIBI)); + updateActivityPerformance(perfTracker, double(byteCount / KIBI)); } }; + /** Updates the given performance statistic. */ + void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); + /** Populates the specified statistics structure from the current activity performance statistics. */ void getPerformanceStatistics(MVKPerformanceStatistics* pPerf); @@ -897,7 +897,6 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { void logActivityInline(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats); void logActivityDuration(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); void logActivityByteCount(MVKPerformanceTracker& activity, MVKPerformanceStatistics& perfStats, bool isInline = false); - void updateActivityPerformance(MVKPerformanceTracker& activity, double currentValue); void getDescriptorVariableDescriptorCountLayoutSupport(const VkDescriptorSetLayoutCreateInfo* pCreateInfo, VkDescriptorSetLayoutSupport* pSupport, VkDescriptorSetVariableDescriptorCountLayoutSupport* pVarDescSetCountSupport); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index d44e64929..1930ceedb 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -3058,7 +3058,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope // If possible, retrieve from the MTLDevice, otherwise from available memory size, or a fixed conservative estimate. uint64_t MVKPhysicalDevice::getRecommendedMaxWorkingSetSize() { -#if MVK_XCODE_14 || MVK_MACOS +#if MVK_XCODE_15 || MVK_MACOS if ( [_mtlDevice respondsToSelector: @selector(recommendedMaxWorkingSetSize)]) { return _mtlDevice.recommendedMaxWorkingSetSize; } @@ -4194,7 +4194,7 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope double total = (activity.average * activity.count++) + currentValue; activity.average = total / activity.count; - if (mvkConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { + if (_isPerformanceTracking && mvkConfig().activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE) { logActivityInline(activity, _performanceStatistics); } } @@ -4299,8 +4299,8 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope } void MVKDevice::getPerformanceStatistics(MVKPerformanceStatistics* pPerf) { - addActivityByteCount(_performanceStatistics.device.gpuMemoryAllocated, - _physicalDevice->getCurrentAllocatedSize()); + addPerformanceByteCount(_performanceStatistics.device.gpuMemoryAllocated, + _physicalDevice->getCurrentAllocatedSize()); lock_guard lock(_perfLock); if (pPerf) { *pPerf = _performanceStatistics; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index fa87643a3..b632e78b3 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1293,7 +1293,7 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { uint64_t startTime = _device->getPerformanceTimestamp(); _mtlDrawable = [_swapchain->_surface->getCAMetalLayer().nextDrawable retain]; // retained - _device->addActivityPerformance(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); } if ( !_mtlDrawable ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt); } } @@ -1398,7 +1398,7 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { // If I have become detached from the swapchain, it means the swapchain, and possibly the // VkDevice, have been destroyed by the time of this callback, so do not reference them. lock_guard lock(_detachmentLock); - if (_device) { _device->addActivityPerformance(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } + if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } } presentInfo.queue->endPresentation(presentInfo); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index 13f59b6bc..e3da96b34 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -2269,7 +2269,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 // Serializes the data in this cache to a stream void MVKPipelineCache::writeData(ostream& outstream, bool isCounting) { #if MVK_USE_CEREAL - MVKPerformanceTracker& activityTracker = isCounting + MVKPerformanceTracker& perfTracker = isCounting ? _device->_performanceStatistics.pipelineCache.sizePipelineCache : _device->_performanceStatistics.pipelineCache.writePipelineCache; @@ -2297,7 +2297,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 writer(cacheIter.getShaderConversionConfig()); writer(cacheIter.getShaderConversionResultInfo()); writer(cacheIter.getCompressedMSL()); - _device->addActivityPerformance(activityTracker, startTime); + _device->addPerformanceInterval(perfTracker, startTime); } } @@ -2366,7 +2366,7 @@ static MTLVertexFormat mvkAdjustFormatVectorToSize(MTLVertexFormat format, uint3 // Add the shader library to the staging cache. MVKShaderLibraryCache* slCache = getShaderLibraryCache(smKey); - _device->addActivityPerformance(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.pipelineCache.readPipelineCache, startTime); slCache->addShaderLibrary(&shaderConversionConfig, resultInfo, compressedMSL); break; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 293f50eff..c104deeda 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -150,8 +150,9 @@ // few frames for that to happen. If there are still swapchain presentations that haven't completed, // log a warning, and force them to end presentation, so the images and drawables will be released. void MVKQueue::waitSwapchainPresentations(MVKCommandUse cmdUse) { - auto waitFrames = _device->_pMetalFeatures->maxSwapchainImageCount + 2; - if (_presentationCompletionBlocker.wait((waitFrames/60.0) * 1e9)) { return; } + uint32_t waitFrames = _device->_pMetalFeatures->maxSwapchainImageCount + 2; + uint64_t waitNanos = waitFrames * _device->_performanceStatistics.queue.frameInterval.average * 1e6; + if (_presentationCompletionBlocker.wait(waitNanos)) { return; } auto imgCnt = _presentationCompletionBlocker.getReservationCount(); MVKPresentableSwapchainImage* images[imgCnt]; @@ -173,7 +174,7 @@ // Wait for forced presentation completions. If we still have unfinished swapchain image // presentations, log a warning, and force each image to end, so that it can be released. - if ( !_presentationCompletionBlocker.wait((waitFrames/60.0) * 1e9) ) { + if ( !_presentationCompletionBlocker.wait(waitNanos) ) { reportWarning(VK_TIMEOUT, "%s timed out after %d frames while awaiting %d swapchain image presentations to complete.", mvkVkCommandName(cmdUse), waitFrames * 2, _presentationCompletionBlocker.getReservationCount()); for (size_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) { @@ -203,7 +204,7 @@ } else { mtlCmdBuff = [_mtlQueue commandBufferWithUnretainedReferences]; } - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.retrieveMTLCommandBuffer, startTime); NSString* mtlCmdBuffLabel = getMTLCommandBufferLabel(cmdUse); setLabelIfNotNil(mtlCmdBuff, mtlCmdBuffLabel); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { handleMTLCommandBufferError(mtlCB); }]; @@ -496,7 +497,7 @@ MVKDevice* mvkDev = getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.mtlCommandBufferExecution, startTime); if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does. }]; @@ -592,7 +593,7 @@ for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); } template @@ -602,7 +603,6 @@ MVKCommandUse cmdUse) : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { - // pSubmit can be null if just tracking the fence alone if (pSubmit) { uint32_t cbCnt = pSubmit->commandBufferCount; _cmdBuffers.reserve(cbCnt); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm index a47a65b77..908314989 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm @@ -80,7 +80,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD uint64_t startTime = pShaderFeedback ? mvkGetTimestamp() : mvkDev->getPerformanceTimestamp(); id mtlFunc = [[_mtlLibrary newFunctionWithName: mtlFuncName] autorelease]; - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.functionRetrieval, startTime); if (pShaderFeedback) { if (mtlFunc) { mvkEnableFlags(pShaderFeedback->flags, VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT); @@ -156,7 +156,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD MVKDevice* mvkDev = _owner->getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); _compressedMSL.compress(msl, mvkConfig().shaderSourceCompressionAlgorithm); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslCompress, startTime); } // Decompresses the cached MSL into the string. @@ -164,7 +164,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD MVKDevice* mvkDev = _owner->getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); _compressedMSL.decompress(msl); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslDecompress, startTime); } MVKShaderLibrary::MVKShaderLibrary(MVKVulkanAPIDeviceObject* owner, @@ -207,7 +207,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD handleCompilationError(err, "Compiled shader module creation"); [shdrData release]; } - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.mslLoad, startTime); } MVKShaderLibrary::MVKShaderLibrary(const MVKShaderLibrary& other) { @@ -283,7 +283,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD if (slPair.first.matches(*pShaderConfig)) { pShaderConfig->alignWith(slPair.first); MVKDevice* mvkDev = _owner->getDevice(); - mvkDev->addActivityPerformance(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime); + mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.shaderCompilation.shaderLibraryFromCache, startTime); if (pShaderFeedback) { pShaderFeedback->duration += mvkGetElapsedNanoseconds(startTime); } @@ -363,7 +363,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD GLSLToSPIRVConversionResult glslConversionResult; uint64_t startTime = _device->getPerformanceTimestamp(); bool wasConverted = _glslConverter.convert(getMVKGLSLConversionShaderStage(pShaderConfig), glslConversionResult, shouldLogCode, false); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.glslToSPRIV, startTime); if (wasConverted) { if (shouldLogCode) { MVKLogInfo("%s", glslConversionResult.resultLog.c_str()); } @@ -376,7 +376,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD uint64_t startTime = _device->getPerformanceTimestamp(); bool wasConverted = _spvConverter.convert(*pShaderConfig, conversionResult, shouldLogCode, shouldLogCode, shouldLogEstimatedGLSL); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.spirvToMSL, startTime); if (wasConverted) { if (shouldLogCode) { MVKLogInfo("%s", conversionResult.resultLog.c_str()); } @@ -436,7 +436,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(pCreateInfo->pCode, spvCount); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _spvConverter.setSPIRV(pCreateInfo->pCode, spvCount); @@ -450,7 +450,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(&magicNum); codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); SPIRVToMSLConversionResult conversionResult; conversionResult.msl = pMSLCode; @@ -466,7 +466,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(&magicNum); codeHash = mvkHash(pMSLCode, mslCodeLen, codeHash); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _directMSLLibrary = new MVKShaderLibrary(this, (void*)(pMSLCode), mslCodeLen); @@ -479,7 +479,7 @@ static uint32_t getWorkgroupDimensionSize(const SPIRVWorkgroupSizeDimension& wgD uint64_t startTime = _device->getPerformanceTimestamp(); codeHash = mvkHash(pGLSL, codeSize); - _device->addActivityPerformance(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); + _device->addPerformanceInterval(_device->_performanceStatistics.shaderCompilation.hashShaderCode, startTime); _glslConverter.setGLSL(pGLSL, glslLen); } else { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index f326f82e1..159c2edf1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -159,13 +159,14 @@ } // Calculates and remembers the time interval between frames. +// Not threadsafe. Ensure this is called from a threadsafe environment. void MVKSwapchain::markFrameInterval() { uint64_t prevFrameTime = _lastFrameTime; - _lastFrameTime = _device->getPerformanceTimestamp(); + _lastFrameTime = mvkGetTimestamp(); if (prevFrameTime == 0) { return; } // First frame starts at first presentation - _device->addActivityPerformance(_device->_performanceStatistics.queue.frameInterval, prevFrameTime, _lastFrameTime); + _device->updateActivityPerformance(_device->_performanceStatistics.queue.frameInterval, mvkGetElapsedMilliseconds(prevFrameTime, _lastFrameTime)); auto& mvkCfg = mvkConfig(); bool shouldLogOnFrames = mvkCfg.performanceTracking && mvkCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT; @@ -244,10 +245,9 @@ void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) {} void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { + std::lock_guard lock(_presentHistoryLock); markFrameInterval(); - - std::lock_guard lock(_presentHistoryLock); if (_presentHistoryCount < kMaxPresentationHistory) { _presentHistoryCount++; } else { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm index fb1e0190c..b7a4a64de 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSync.mm @@ -588,7 +588,7 @@ VkResult mvkWaitSemaphores(MVKDevice* device, if (_compileError) { handleError(); } - mvkDev->addActivityPerformance(*_pPerformanceTracker, _startTime); + mvkDev->addPerformanceInterval(*_pPerformanceTracker, _startTime); } void MVKMetalCompiler::handleError() { From 7fe4963985d8ae44159243d8babff25cf830bca7 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Wed, 6 Sep 2023 16:16:11 -0400 Subject: [PATCH 09/21] Guard against CAMetalDrawable with invalid pixel format. - Calling nextDrawable may result in a nil drawable, or a drawable with no pixel format. Attempt several times to retrieve a drawable with a valid pixel format, and if unsuccessful, return an error from vkQueuePresentKHR() and vkAcquireNextImageKHR(), to force swapchain to be re-created. - Reorganize MVKQueuePresentSurfaceSubmission::execute() to detect drawable with invalid format, attach MTLCommandBuffer completion handler just before commit, and delay enqueuing MTLCommandBuffer until commit. - Refactor mvkOSVersionIsAtLeast() for clarity (unrelated). --- Common/MVKOSExtensions.h | 17 +++++++++------- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 23 ++++++++++++++++------ MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 25 ++++++++++++------------ 5 files changed, 42 insertions(+), 26 deletions(-) diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h index 79d89216c..e824ba062 100644 --- a/Common/MVKOSExtensions.h +++ b/Common/MVKOSExtensions.h @@ -39,27 +39,30 @@ static const MVKOSVersion kMVKOSVersionUnsupported = std::numeric_limits= minVer; } +static inline bool mvkOSVersionIsAtLeast(MVKOSVersion minVer) { return mvkOSVersion() >= minVer; } /** * Returns whether the operating system version is at least the appropriate min version. - * The constant kMVKOSVersionUnsupported can be used for either value to cause the test - * to always fail on that OS, which is useful for indidicating functionalty guarded by + * The constant kMVKOSVersionUnsupported can be used for any of the values to cause the test + * to always fail on that OS, which is useful for indidicating that functionalty guarded by * this test is not supported on that OS. */ -inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, MVKOSVersion iOSMinVer, MVKOSVersion visionOSMinVer) { +static inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, + MVKOSVersion iOSMinVer, + MVKOSVersion visionOSMinVer) { #if MVK_MACOS return mvkOSVersionIsAtLeast(macOSMinVer); #endif +#if MVK_IOS_OR_TVOS + return mvkOSVersionIsAtLeast(iOSMinVer); +#endif #if MVK_VISIONOS return mvkOSVersionIsAtLeast(visionOSMinVer); -#elif MVK_IOS_OR_TVOS - return mvkOSVersionIsAtLeast(iOSMinVer); #endif } diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 3e476dd3d..836a60a9a 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -20,6 +20,7 @@ Released TBD - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. +- Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. - Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index fb7c3dfa4..1479f7242 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -454,7 +454,7 @@ class MVKPresentableSwapchainImage : public MVKSwapchainImage { #pragma mark Metal /** Presents the contained drawable to the OS. */ - void presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); + VkResult presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo); /** Called when the presentation begins. */ void beginPresentation(const MVKImagePresentInfo& presentInfo); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index b632e78b3..1769df11d 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1258,7 +1258,6 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. releaseMetalDrawable(); - VkResult rslt = VK_SUCCESS; auto signaler = MVKSwapchainSignaler{fence, semaphore, semaphore ? semaphore->deferSignal() : 0}; if (_availability.isAvailable) { _availability.isAvailable = false; @@ -1271,7 +1270,7 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { id mtlCmdBuff = nil; if (mvkSem && mvkSem->isUsingCommandEncoding()) { mtlCmdBuff = _device->getAnyQueue()->getMTLCommandBuffer(kMVKCommandUseAcquireNextImage); - if ( !mtlCmdBuff ) { rslt = VK_ERROR_OUT_OF_POOL_MEMORY; } + if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); } } signal(signaler, mtlCmdBuff); [mtlCmdBuff commit]; @@ -1283,19 +1282,29 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { } markAsTracked(signaler); - return rslt; + return getConfigurationResult(); } +// Calling nextDrawable may result in a nil drawable, or a drawable with no pixel format. +// Attempt several times to retrieve a good drawable, and set an error to trigger the +// swapchain to be re-established if one cannot be retrieved. id MVKPresentableSwapchainImage::getCAMetalDrawable() { if ( !_mtlDrawable ) { @autoreleasepool { + bool hasInvalidFormat = false; uint32_t attemptCnt = _swapchain->getImageCount() * 2; // Attempt a resonable number of times for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { uint64_t startTime = _device->getPerformanceTimestamp(); _mtlDrawable = [_swapchain->_surface->getCAMetalLayer().nextDrawable retain]; // retained _device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); + hasInvalidFormat = _mtlDrawable && !_mtlDrawable.texture.pixelFormat; + if (hasInvalidFormat) { releaseMetalDrawable(); } + } + if (hasInvalidFormat) { + setConfigurationResult(reportError(VK_ERROR_OUT_OF_DATE_KHR, "CAMetalDrawable with valid format could not be acquired after %d attempts.", attemptCnt)); + } else if ( !_mtlDrawable ) { + setConfigurationResult(reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt)); } - if ( !_mtlDrawable ) { reportError(VK_ERROR_OUT_OF_POOL_MEMORY, "CAMetalDrawable could not be acquired after %d attempts.", attemptCnt); } } } return _mtlDrawable; @@ -1303,8 +1312,8 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { // Present the drawable and make myself available only once the command buffer has completed. // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. -void MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, - MVKImagePresentInfo presentInfo) { +VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, + MVKImagePresentInfo presentInfo) { lock_guard lock(_availabilityLock); _swapchain->renderWatermark(getMTLTexture(0), mtlCmdBuff); @@ -1363,6 +1372,8 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { }]; signalPresentationSemaphore(signaler, mtlCmdBuff); + + return getConfigurationResult(); } // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index c104deeda..f53cb71d4 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -623,12 +623,6 @@ // The semaphores know what to do. VkResult MVKQueuePresentSurfaceSubmission::execute() { id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent); - [mtlCmdBuff enqueue]; - - // Add completion handler that will destroy this submission only once the MTLCommandBuffer - // is finished with the resources retained here, including the wait semaphores. - // Completion handlers are also added in presentCAMetalDrawable() to retain the swapchain images. - [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { this->finish(); }]; for (auto& ws : _waitSemaphores) { auto& sem4 = ws.first; @@ -637,15 +631,22 @@ } for (int i = 0; i < _presentInfo.size(); i++ ) { - _presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i]); + setConfigurationResult(_presentInfo[i].presentableImage->presentCAMetalDrawable(mtlCmdBuff, _presentInfo[i])); } - [mtlCmdBuff commit]; - - // If an error occurred and the MTLCommandBuffer was not created, call finish() directly. - if ( !mtlCmdBuff ) { finish(); } + if ( !mtlCmdBuff ) { setConfigurationResult(VK_ERROR_OUT_OF_POOL_MEMORY); } // Check after images may set error. - return mtlCmdBuff ? VK_SUCCESS : VK_ERROR_OUT_OF_POOL_MEMORY; + // Add completion callback to the MTLCommandBuffer to call finish(), + // or if the MTLCommandBuffer could not be created, call finish() directly. + // Retrieve the result first, because finish() will destroy this instance. + VkResult rslt = getConfigurationResult(); + if (mtlCmdBuff) { + [mtlCmdBuff addCompletedHandler: ^(id mtlCB) { this->finish(); }]; + [mtlCmdBuff commit]; + } else { + finish(); + } + return rslt; } void MVKQueuePresentSurfaceSubmission::finish() { From 6c6139ca929d2edd01930b3034c2ccd5adc55705 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Thu, 7 Sep 2023 09:33:40 -0400 Subject: [PATCH 10/21] Update Common/MVKOSExtensions.h Co-authored-by: Chip Davis --- Common/MVKOSExtensions.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h index e824ba062..13d864dab 100644 --- a/Common/MVKOSExtensions.h +++ b/Common/MVKOSExtensions.h @@ -49,7 +49,7 @@ static inline bool mvkOSVersionIsAtLeast(MVKOSVersion minVer) { return mvkOSVers /** * Returns whether the operating system version is at least the appropriate min version. * The constant kMVKOSVersionUnsupported can be used for any of the values to cause the test - * to always fail on that OS, which is useful for indidicating that functionalty guarded by + * to always fail on that OS, which is useful for indicating that functionalty guarded by * this test is not supported on that OS. */ static inline bool mvkOSVersionIsAtLeast(MVKOSVersion macOSMinVer, From 0ee15222c83157dc765a9fdeac614e1620f76d12 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 8 Sep 2023 20:46:28 -0400 Subject: [PATCH 11/21] Ensure objects retained for life of MTLCommandBuffer. - vkCmdBlitImage() ensure swizzle texture view is retained for life of MTLCommandBuffer. - vkQueuePresentKHR() use MTLCommandBuffer that retains references. - Update MoltenVK version to 1.2.6. --- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/API/mvk_config.h | 2 +- MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm | 19 ++++++++----------- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 5 ++++- 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 836a60a9a..605cda82c 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -20,6 +20,7 @@ Released TBD - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. +- Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. - Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index 532d27db4..f97026619 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -45,7 +45,7 @@ extern "C" { */ #define MVK_VERSION_MAJOR 1 #define MVK_VERSION_MINOR 2 -#define MVK_VERSION_PATCH 5 +#define MVK_VERSION_PATCH 6 #define MVK_MAKE_VERSION(major, minor, patch) (((major) * 10000) + ((minor) * 100) + (patch)) #define MVK_VERSION MVK_MAKE_VERSION(MVK_VERSION_MAJOR, MVK_VERSION_MINOR, MVK_VERSION_PATCH) diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm index 2c0ef5465..1c38d15a6 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdTransfer.mm @@ -504,11 +504,12 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma if (cmdEncoder->getDevice()->_pMetalFeatures->nativeTextureSwizzle && _srcImage->needsSwizzle()) { // Use a view that has a swizzle on it. - srcMTLTex = [[srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat - textureType:srcMTLTex.textureType - levels:NSMakeRange(0, srcMTLTex.mipmapLevelCount) - slices:NSMakeRange(0, srcMTLTex.arrayLength) - swizzle:_srcImage->getPixelFormats()->getMTLTextureSwizzleChannels(_srcImage->getVkFormat())] autorelease]; + srcMTLTex = [srcMTLTex newTextureViewWithPixelFormat:srcMTLTex.pixelFormat + textureType:srcMTLTex.textureType + levels:NSMakeRange(0, srcMTLTex.mipmapLevelCount) + slices:NSMakeRange(0, srcMTLTex.arrayLength) + swizzle:_srcImage->getPixelFormats()->getMTLTextureSwizzleChannels(_srcImage->getVkFormat())]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [srcMTLTex release]; }]; } cmdEncoder->endCurrentMetalEncoding(); @@ -551,9 +552,7 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma textureType: MTLTextureType2DArray levels: NSMakeRange(0, srcMTLTex.mipmapLevelCount) slices: NSMakeRange(0, srcMTLTex.arrayLength)]; - [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { - [srcMTLTex release]; - }]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [srcMTLTex release]; }]; } blitKey.dstMTLPixelFormat = _dstImage->getMTLPixelFormat(dstPlaneIndex); blitKey.srcFilter = mvkMTLSamplerMinMagFilterFromVkFilter(_filter); @@ -655,9 +654,7 @@ static inline MTLSize mvkClampMTLSize(MTLSize size, MTLOrigin origin, MTLSize ma #endif } id stencilMTLTex = [srcMTLTex newTextureViewWithPixelFormat: stencilFmt]; - [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { - [stencilMTLTex release]; - }]; + [cmdEncoder->_mtlCmdBuffer addCompletedHandler: ^(id) { [stencilMTLTex release]; }]; [mtlRendEnc setFragmentTexture: stencilMTLTex atIndex: 1]; } else { [mtlRendEnc setFragmentTexture: srcMTLTex atIndex: 1]; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index f53cb71d4..1c28f63fb 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -622,7 +622,10 @@ // If the semaphores are not encodable, wait on them inline after presenting. // The semaphores know what to do. VkResult MVKQueuePresentSurfaceSubmission::execute() { - id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent); + // MTLCommandBuffer retain references to avoid rare case where objects are destroyed too early. + // Although testing could not determine which objects were being lost, queue present MTLCommandBuffers + // are used only once per frame, and retain so few objects, that blanket retention is still performant. + id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true); for (auto& ws : _waitSemaphores) { auto& sem4 = ws.first; From 62e0368e21c067ee7bb12cbd87bfae04826c5636 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 11 Sep 2023 20:14:23 -0400 Subject: [PATCH 12/21] Add configurable lowpass filter for VkPhysicalDeviceLimits::timestampPeriod. - Add MVKConfiguration::timestampPeriodLowPassAlpha, along with matching MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA env var. - Add MVKConfigMembers.def file to describe MVKConfiguration members, to support consistent batch handling of members. - Add env var & build settings MVK_CONFIG_DEBUG, plus legacy MVK_CONFIG_ALLOW_METAL_EVENTS & MVK_CONFIG_ALLOW_METAL_FENCES. - Simplify environment variable retrieval functions and macros. - Rename MVKDevice::updateTimestampsAndPeriod() to updateTimestampPeriod(). --- Common/MVKOSExtensions.h | 56 ++---------- Common/MVKOSExtensions.mm | 17 ++-- Docs/MoltenVK_Runtime_UserGuide.md | 4 +- Docs/Whats_New.md | 2 + MoltenVK/MoltenVK.xcodeproj/project.pbxproj | 10 +++ MoltenVK/MoltenVK/API/mvk_config.h | 37 +++++++- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 16 ++-- .../MoltenVK/Utility/MVKConfigMembers.def | 86 +++++++++++++++++++ MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp | 66 +++++--------- MoltenVK/MoltenVK/Utility/MVKEnvironment.h | 30 ++++++- Scripts/runcts | 2 +- 12 files changed, 211 insertions(+), 117 deletions(-) create mode 100644 MoltenVK/MoltenVK/Utility/MVKConfigMembers.def diff --git a/Common/MVKOSExtensions.h b/Common/MVKOSExtensions.h index 13d864dab..f9faba912 100644 --- a/Common/MVKOSExtensions.h +++ b/Common/MVKOSExtensions.h @@ -108,62 +108,22 @@ void mvkDispatchToMainAndWait(dispatch_block_t block); #pragma mark Process environment /** - * Returns the value of the environment variable at the given name, - * or an empty string if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. + * Sets the value of the environment variable at the given name, into the + * std::string, and returns whether the environment variable was found. */ -std::string mvkGetEnvVar(std::string varName, bool* pWasFound = nullptr); +bool mvkGetEnvVar(const char* evName, std::string& evStr); /** - * Returns the value of the environment variable at the given name, - * or zero if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. + * Returns a pointer to a string containing the value of the environment variable at + * the given name, or returns the default value if the environment variable was not set. */ -int64_t mvkGetEnvVarInt64(std::string varName, bool* pWasFound = nullptr); +const char* mvkGetEnvVarString(const char* evName, std::string& evStr, const char* defaultValue = ""); /** * Returns the value of the environment variable at the given name, - * or false if no environment variable with that name exists. - * - * If pWasFound is not null, its value is set to true if the environment - * variable exists, or false if not. + * or returns the default value if the environment variable was not set. */ -bool mvkGetEnvVarBool(std::string varName, bool* pWasFound = nullptr); - -#define MVK_SET_FROM_ENV_OR_BUILD_BOOL(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - bool ev = mvkGetEnvVarBool(#EV, &wasFound); \ - cfgVal = wasFound ? ev : EV; \ - } while(false) - -#define MVK_SET_FROM_ENV_OR_BUILD_INT64(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - int64_t ev = mvkGetEnvVarInt64(#EV, &wasFound); \ - cfgVal = wasFound ? ev : EV; \ - } while(false) - -// Pointer cast permits cfgVal to be an enum var -#define MVK_SET_FROM_ENV_OR_BUILD_INT32(cfgVal, EV) \ - do { \ - bool wasFound = false; \ - int64_t ev = mvkGetEnvVarInt64(#EV, &wasFound); \ - int64_t val = wasFound ? ev : EV; \ - *(int32_t*)&cfgVal = (int32_t)std::min(std::max(val, (int64_t)INT32_MIN), (int64_t)INT32_MAX); \ - } while(false) - -#define MVK_SET_FROM_ENV_OR_BUILD_STRING(cfgVal, EV, strObj) \ - do { \ - bool wasFound = false; \ - std::string ev = mvkGetEnvVar(#EV, &wasFound); \ - strObj = wasFound ? std::move(ev) : EV; \ - cfgVal = strObj.c_str(); \ - } while(false) +double mvkGetEnvVarNumber(const char* evName, double defaultValue = 0.0); #pragma mark - diff --git a/Common/MVKOSExtensions.mm b/Common/MVKOSExtensions.mm index 4e2c974c6..93025f2bc 100644 --- a/Common/MVKOSExtensions.mm +++ b/Common/MVKOSExtensions.mm @@ -81,21 +81,22 @@ void mvkDispatchToMainAndWait(dispatch_block_t block) { #pragma mark - #pragma mark Process environment -string mvkGetEnvVar(string varName, bool* pWasFound) { +bool mvkGetEnvVar(const char* varName, string& evStr) { @autoreleasepool { NSDictionary* nsEnv = [[NSProcessInfo processInfo] environment]; - NSString* envStr = nsEnv[@(varName.c_str())]; - if (pWasFound) { *pWasFound = envStr != nil; } - return envStr ? envStr.UTF8String : ""; + NSString* nsStr = nsEnv[@(varName)]; + if (nsStr) { evStr = nsStr.UTF8String; } + return nsStr != nil; } } -int64_t mvkGetEnvVarInt64(string varName, bool* pWasFound) { - return strtoll(mvkGetEnvVar(varName, pWasFound).c_str(), NULL, 0); +const char* mvkGetEnvVarString(const char* varName, string& evStr, const char* defaultValue) { + return mvkGetEnvVar(varName, evStr) ? evStr.c_str() : defaultValue; } -bool mvkGetEnvVarBool(std::string varName, bool* pWasFound) { - return mvkGetEnvVarInt64(varName, pWasFound) != 0; +double mvkGetEnvVarNumber(const char* varName, double defaultValue) { + string evStr; + return mvkGetEnvVar(varName, evStr) ? strtod(evStr.c_str(), nullptr) : defaultValue; } diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index f236a7e97..6684bb34a 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -517,8 +517,8 @@ you can address the issue as follows: - Errors encountered during **Runtime Shader Conversion** are logged to the console. - To help understand conversion issues during **Runtime Shader Conversion**, you can enable the - logging of the *SPIR-V* and *MSL* shader source code during shader conversion, by turning on - the `MVKConfiguration::debugMode` configuration parameter, or setting the value of the `MVK_DEBUG` + logging of the *SPIR-V* and *MSL* shader source code during shader conversion, by turning on the + `MVKConfiguration::debugMode` configuration parameter, or setting the value of the `MVK_CONFIG_DEBUG` runtime environment variable to `1`. See the [*MoltenVK Configuration*](#moltenvk_config) description above. diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 605cda82c..357df0e47 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -24,6 +24,8 @@ Released TBD - Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. +- Add configurable lowpass filter for `VkPhysicalDeviceLimits::timestampPeriod`. +- Deprecate `MVK_DEBUG` env var, and add `MVK_CONFIG_DEBUG` env var to replace it. - Update `MVK_CONFIGURATION_API_VERSION` and `MVK_PRIVATE_API_VERSION` to `38`. diff --git a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj index 202efa912..1dffab36a 100644 --- a/MoltenVK/MoltenVK.xcodeproj/project.pbxproj +++ b/MoltenVK/MoltenVK.xcodeproj/project.pbxproj @@ -331,6 +331,10 @@ A9B51BD8225E986A00AC74D2 /* MVKOSExtensions.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */; }; A9B51BD9225E986A00AC74D2 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; A9B51BDA225E986A00AC74D2 /* MVKOSExtensions.h in Headers */ = {isa = PBXBuildFile; fileRef = A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */; }; + A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327572AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327582AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; + A9C327592AAFBD3B0025EE79 /* MVKConfigMembers.def in Headers */ = {isa = PBXBuildFile; fileRef = A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */; }; A9C96DD01DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */ = {isa = PBXBuildFile; fileRef = A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */; }; A9C96DD11DDC20C20053187F /* MVKMTLBufferAllocation.h in Headers */ = {isa = PBXBuildFile; fileRef = A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */; }; A9C96DD21DDC20C20053187F /* MVKMTLBufferAllocation.mm in Sources */ = {isa = PBXBuildFile; fileRef = A9C96DCF1DDC20C20053187F /* MVKMTLBufferAllocation.mm */; }; @@ -670,6 +674,7 @@ A9B51BD2225E986A00AC74D2 /* MVKOSExtensions.mm */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.objcpp; path = MVKOSExtensions.mm; sourceTree = ""; }; A9B51BD6225E986A00AC74D2 /* MVKOSExtensions.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKOSExtensions.h; sourceTree = ""; }; A9B8EE0A1A98D796009C5A02 /* libMoltenVK.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libMoltenVK.a; sourceTree = BUILT_PRODUCTS_DIR; }; + A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKConfigMembers.def; sourceTree = ""; }; A9C83DCD24533E22003E5261 /* MVKCommandTypePools.def */ = {isa = PBXFileReference; explicitFileType = sourcecode.cpp.h; fileEncoding = 4; path = MVKCommandTypePools.def; sourceTree = ""; }; A9C86CB61C55B8350096CAF2 /* MoltenVKShaderConverter.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = MoltenVKShaderConverter.xcodeproj; path = ../MoltenVKShaderConverter/MoltenVKShaderConverter.xcodeproj; sourceTree = ""; }; A9C96DCE1DDC20C20053187F /* MVKMTLBufferAllocation.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = MVKMTLBufferAllocation.h; sourceTree = ""; }; @@ -843,6 +848,7 @@ 4553AEF62251617100E8EBCD /* MVKBlockObserver.m */, 45557A5121C9EFF3008868BD /* MVKCodec.h */, 45557A4D21C9EFF3008868BD /* MVKCodec.mm */, + A9C327542AAF8A770025EE79 /* MVKConfigMembers.def */, 45557A5721CD83C3008868BD /* MVKDXTnCodec.def */, A9A5E9C525C0822700E9085E /* MVKEnvironment.cpp */, A98149431FB6A3F7005F00B4 /* MVKEnvironment.h */, @@ -1008,6 +1014,7 @@ 2FEA0A7824902F9F00EEF3AD /* MVKDeviceMemory.h in Headers */, 2FEA0A7924902F9F00EEF3AD /* MVKMTLResourceBindings.h in Headers */, 2FEA0A7A24902F9F00EEF3AD /* MVKExtensions.def in Headers */, + A9C327572AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */, 2FEA0A7B24902F9F00EEF3AD /* mvk_datatypes.hpp in Headers */, 2FEA0A7C24902F9F00EEF3AD /* MVKCommandEncodingPool.h in Headers */, 2FEA0A7D24902F9F00EEF3AD /* MVKResource.h in Headers */, @@ -1070,6 +1077,7 @@ A94FB7C41C7DFB4800632CA3 /* MVKCmdRenderPass.h in Headers */, A94FB7BC1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */, A9F3D9DC24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */, + A9C327562AAFBD390025EE79 /* MVKConfigMembers.def in Headers */, A94FB7F81C7DFB4800632CA3 /* MVKPipeline.h in Headers */, A94FB7F01C7DFB4800632CA3 /* MVKImage.h in Headers */, 4553AEFD2251617100E8EBCD /* MVKBlockObserver.h in Headers */, @@ -1147,6 +1155,7 @@ A94FB7BD1C7DFB4800632CA3 /* MVKCmdPipeline.h in Headers */, A9F3D9DD24732A4D00745190 /* MVKSmallVectorAllocator.h in Headers */, A94FB7F91C7DFB4800632CA3 /* MVKPipeline.h in Headers */, + A9C327582AAFBD3A0025EE79 /* MVKConfigMembers.def in Headers */, A94FB7F11C7DFB4800632CA3 /* MVKImage.h in Headers */, 4553AEFE2251617100E8EBCD /* MVKBlockObserver.h in Headers */, A94FB7B91C7DFB4800632CA3 /* MVKCmdTransfer.h in Headers */, @@ -1204,6 +1213,7 @@ DCFD7EFD2A45BC6E007BBBF7 /* MVKSync.h in Headers */, DCFD7EFE2A45BC6E007BBBF7 /* MVKDevice.h in Headers */, DCFD7EFF2A45BC6E007BBBF7 /* MVKSmallVector.h in Headers */, + A9C327592AAFBD3B0025EE79 /* MVKConfigMembers.def in Headers */, DCFD7F002A45BC6E007BBBF7 /* MVKCommandPool.h in Headers */, DCFD7F012A45BC6E007BBBF7 /* MVKShaderModule.h in Headers */, DCFD7F022A45BC6E007BBBF7 /* MVKVulkanAPIObject.h in Headers */, diff --git a/MoltenVK/MoltenVK/API/mvk_config.h b/MoltenVK/MoltenVK/API/mvk_config.h index f97026619..f72ef7770 100644 --- a/MoltenVK/MoltenVK/API/mvk_config.h +++ b/MoltenVK/MoltenVK/API/mvk_config.h @@ -191,7 +191,7 @@ typedef struct { * and the changed value will immediately effect subsequent MoltenVK behaviour. * * The initial value or this parameter is set by the - * MVK_DEBUG + * MVK_CONFIG_DEBUG * runtime environment variable or MoltenVK compile-time build setting. * If neither is set, the value of this parameter is false if MoltenVK was * built in Release mode, and true if MoltenVK was built in Debug mode. @@ -919,6 +919,9 @@ typedef struct { /** * Maximize the concurrent executing compilation tasks. * + * The value of this parameter must be changed before creating a VkInstance, + * for the change to take effect. + * * The initial value or this parameter is set by the * MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION * runtime environment variable or MoltenVK compile-time build setting. @@ -926,6 +929,38 @@ typedef struct { */ VkBool32 shouldMaximizeConcurrentCompilation; + /** + * This parameter is ignored on Apple Silicon devices. + * + * Non-Apple GPUs can have a dynamic timestamp period, which varies over time according to GPU + * workload. Depending on how often the app samples the VkPhysicalDeviceLimits::timestampPeriod + * value using vkGetPhysicalDeviceProperties(), the app may want up-to-date, but potentially + * volatile values, or it may find average values more useful. + * + * The value of this parameter sets the alpha (A) value of a simple lowpass filter + * on the timestampPeriod value, of the form: + * + * TPout = (1 - A)TPout + (A * TPin) + * + * The alpha value can be set to a float between 0.0 and 1.0. Values of alpha closer to + * 0.0 cause the value of timestampPeriod to vary slowly over time and be less volatile, + * and values of alpha closer to 1.0 cause the value of timestampPeriod to vary quickly + * and be more volatile. + * + * Apps that query the timestampPeriod value infrequently will prefer low volatility, whereas + * apps that query frequently may prefer higher volatility, to track more recent changes. + * + * The value of this parameter can be changed at any time, and will affect subsequent queries. + * + * The initial value or this parameter is set by the + * MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA + * runtime environment variable or MoltenVK compile-time build setting. + * If neither is set, this parameter is set to 0.05 by default, + * indicating that the timestampPeriod will vary relatively slowly, + * with the expectation that the app is querying this value infrequently. + */ + float timestampPeriodLowPassAlpha; + } MVKConfiguration; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 5ae7f5ecd..7a04e90e0 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -401,7 +401,7 @@ class MVKPhysicalDevice : public MVKDispatchableVulkanAPIObject { void initExtensions(); void initCounterSets(); bool needsCounterSetRetained(); - void updateTimestampsAndPeriod(); + void updateTimestampPeriod(); MVKArrayRef getQueueFamilies(); void initPipelineCacheUUID(); uint32_t getHighestGPUCapability(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 048160116..422f1b43e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -451,7 +451,7 @@ } void MVKPhysicalDevice::getProperties(VkPhysicalDeviceProperties* properties) { - updateTimestampsAndPeriod(); + updateTimestampPeriod(); *properties = _properties; } @@ -1570,10 +1570,10 @@ // If needed, update the timestamp period for this device, using a crude lowpass filter to level out // wild temporary changes, particularly during initial queries before much GPU activity has occurred. // On Apple GPUs, CPU & GPU timestamps are the same, and timestamp period never changes. -void MVKPhysicalDevice::updateTimestampsAndPeriod() { - if (_properties.vendorID == kAppleVendorId) { return; } - - if ([_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) { +void MVKPhysicalDevice::updateTimestampPeriod() { + if (_properties.vendorID != kAppleVendorId && + [_mtlDevice respondsToSelector: @selector(sampleTimestamps:gpuTimestamp:)]) { + MTLTimestamp earlierCPUTs = _prevCPUTimestamp; MTLTimestamp earlierGPUTs = _prevGPUTimestamp; [_mtlDevice sampleTimestamps: &_prevCPUTimestamp gpuTimestamp: &_prevGPUTimestamp]; @@ -1582,9 +1582,9 @@ if (elapsedCPUNanos && elapsedGPUTicks) { // Ensure not zero float tsPeriod = elapsedCPUNanos / elapsedGPUTicks; - // Basic lowpass filter Y = (1 - a)Y + a*X. - // The lower a is, the slower Y will change over time. - static const float a = 0.05; + // Basic lowpass filter TPout = (1 - A)TPout + (A * TPin). + // The lower A is, the slower TPout will change over time. + float a = mvkConfig().timestampPeriodLowPassAlpha; _properties.limits.timestampPeriod = ((1.0 - a) * _properties.limits.timestampPeriod) + (a * tsPeriod); } } diff --git a/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def new file mode 100644 index 000000000..aff0cf33a --- /dev/null +++ b/MoltenVK/MoltenVK/Utility/MVKConfigMembers.def @@ -0,0 +1,86 @@ +/* + * MVKConfigMembers.def + * + * Copyright (c) 2015-2023 The Brenwill Workshop Ltd. (http://www.brenwill.com) + * + * Licensed under the Apache License, Version 2.0 (the "License", Int64) + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// The items in the list below describe the members of the MVKConfiguration struct. +// When a new member is added to the MVKConfiguration struct, a corresponding description +// must be added here. +// +// To use this file, define the macros: +// +// MVK_CONFIG_MEMBER(member, mbrType, name) +// MVK_CONFIG_MEMBER_STRING(member, strObj, name) +// +// then #include this file inline with your code. +// +// The name prameter is the name of the configuration parameter, which is used as the name +// of the environment variable, and build setting, that sets the config value, and is entered +// here without the "MVK_CONFIG_" prefix. +// +// Since string members are set from char pointers, the text must be copied to a std::string +// object, which is passed as a parameter to MVK_CONFIG_MEMBER_STRING. + + +#ifndef MVK_CONFIG_MEMBER +#error MVK_CONFIG_MEMBER must be defined before including this file +#endif + +#ifndef MVK_CONFIG_MEMBER_STRING +#error MVK_CONFIG_MEMBER_STRING must be defined before including this file +#endif + +MVK_CONFIG_MEMBER(debugMode, VkBool32, DEBUG) +MVK_CONFIG_MEMBER(shaderConversionFlipVertexY, VkBool32, SHADER_CONVERSION_FLIP_VERTEX_Y) +MVK_CONFIG_MEMBER(synchronousQueueSubmits, VkBool32, SYNCHRONOUS_QUEUE_SUBMITS) +MVK_CONFIG_MEMBER(prefillMetalCommandBuffers, MVKPrefillMetalCommandBuffersStyle, PREFILL_METAL_COMMAND_BUFFERS) +MVK_CONFIG_MEMBER(maxActiveMetalCommandBuffersPerQueue, uint32_t, MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE) +MVK_CONFIG_MEMBER(supportLargeQueryPools, VkBool32, SUPPORT_LARGE_QUERY_POOLS) +MVK_CONFIG_MEMBER(presentWithCommandBuffer, VkBool32, PRESENT_WITH_COMMAND_BUFFER) +MVK_CONFIG_MEMBER(swapchainMinMagFilterUseNearest, VkBool32, SWAPCHAIN_MAG_FILTER_USE_NEAREST) // Deprecated legacy renaming +MVK_CONFIG_MEMBER(swapchainMinMagFilterUseNearest, VkBool32, SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST) +MVK_CONFIG_MEMBER(metalCompileTimeout, uint64_t, METAL_COMPILE_TIMEOUT) +MVK_CONFIG_MEMBER(performanceTracking, VkBool32, PERFORMANCE_TRACKING) +MVK_CONFIG_MEMBER(performanceLoggingFrameCount, uint32_t, PERFORMANCE_LOGGING_FRAME_COUNT) +MVK_CONFIG_MEMBER(activityPerformanceLoggingStyle, MVKConfigActivityPerformanceLoggingStyle, ACTIVITY_PERFORMANCE_LOGGING_STYLE) +MVK_CONFIG_MEMBER(displayWatermark, VkBool32, DISPLAY_WATERMARK) +MVK_CONFIG_MEMBER(specializedQueueFamilies, VkBool32, SPECIALIZED_QUEUE_FAMILIES) +MVK_CONFIG_MEMBER(switchSystemGPU, VkBool32, SWITCH_SYSTEM_GPU) +MVK_CONFIG_MEMBER(fullImageViewSwizzle, VkBool32, FULL_IMAGE_VIEW_SWIZZLE) +MVK_CONFIG_MEMBER(defaultGPUCaptureScopeQueueFamilyIndex, VkBool32, DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX) +MVK_CONFIG_MEMBER(defaultGPUCaptureScopeQueueIndex, VkBool32, DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX) +MVK_CONFIG_MEMBER(fastMathEnabled, MVKConfigFastMath, FAST_MATH_ENABLED) +MVK_CONFIG_MEMBER(logLevel, MVKConfigLogLevel, LOG_LEVEL) +MVK_CONFIG_MEMBER(traceVulkanCalls, MVKConfigTraceVulkanCalls, TRACE_VULKAN_CALLS) +MVK_CONFIG_MEMBER(forceLowPowerGPU, VkBool32, FORCE_LOW_POWER_GPU) +MVK_CONFIG_MEMBER(semaphoreUseMTLFence, VkBool32, ALLOW_METAL_FENCES) // Deprecated legacy +MVK_CONFIG_MEMBER(semaphoreSupportStyle, MVKVkSemaphoreSupportStyle, VK_SEMAPHORE_SUPPORT_STYLE) +MVK_CONFIG_MEMBER(autoGPUCaptureScope, MVKConfigAutoGPUCaptureScope, AUTO_GPU_CAPTURE_SCOPE) +MVK_CONFIG_MEMBER_STRING(autoGPUCaptureOutputFilepath, evGPUCapFileStrObj, AUTO_GPU_CAPTURE_OUTPUT_FILE) +MVK_CONFIG_MEMBER(texture1DAs2D, VkBool32, TEXTURE_1D_AS_2D) +MVK_CONFIG_MEMBER(preallocateDescriptors, VkBool32, PREALLOCATE_DESCRIPTORS) +MVK_CONFIG_MEMBER(useCommandPooling, VkBool32, USE_COMMAND_POOLING) +MVK_CONFIG_MEMBER(useMTLHeap, VkBool32, USE_MTLHEAP) +MVK_CONFIG_MEMBER(apiVersionToAdvertise, uint32_t, API_VERSION_TO_ADVERTISE) +MVK_CONFIG_MEMBER(advertiseExtensions, uint32_t, ADVERTISE_EXTENSIONS) +MVK_CONFIG_MEMBER(resumeLostDevice, VkBool32, RESUME_LOST_DEVICE) +MVK_CONFIG_MEMBER(useMetalArgumentBuffers, MVKUseMetalArgumentBuffers, USE_METAL_ARGUMENT_BUFFERS) +MVK_CONFIG_MEMBER(shaderSourceCompressionAlgorithm, MVKConfigCompressionAlgorithm, SHADER_COMPRESSION_ALGORITHM) +MVK_CONFIG_MEMBER(shouldMaximizeConcurrentCompilation, VkBool32, SHOULD_MAXIMIZE_CONCURRENT_COMPILATION) +MVK_CONFIG_MEMBER(timestampPeriodLowPassAlpha, float, TIMESTAMP_PERIOD_LOWPASS_ALPHA) + +#undef MVK_CONFIG_MEMBER +#undef MVK_CONFIG_MEMBER_STRING diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp index 5aa6f7dbc..f2fa9e86e 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.cpp @@ -18,7 +18,7 @@ #include "MVKEnvironment.h" #include "MVKOSExtensions.h" - +#include "MVKFoundation.h" static bool _mvkConfigInitialized = false; static void mvkInitConfigFromEnvVars() { @@ -27,43 +27,22 @@ static void mvkInitConfigFromEnvVars() { MVKConfiguration evCfg; std::string evGPUCapFileStrObj; - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.debugMode, MVK_DEBUG); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.shaderConversionFlipVertexY, MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.synchronousQueueSubmits, MVK_CONFIG_SYNCHRONOUS_QUEUE_SUBMITS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.prefillMetalCommandBuffers, MVK_CONFIG_PREFILL_METAL_COMMAND_BUFFERS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.maxActiveMetalCommandBuffersPerQueue, MVK_CONFIG_MAX_ACTIVE_METAL_COMMAND_BUFFERS_PER_QUEUE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.supportLargeQueryPools, MVK_CONFIG_SUPPORT_LARGE_QUERY_POOLS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.presentWithCommandBuffer, MVK_CONFIG_PRESENT_WITH_COMMAND_BUFFER); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.swapchainMinMagFilterUseNearest, MVK_CONFIG_SWAPCHAIN_MAG_FILTER_USE_NEAREST); // Deprecated legacy env var - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.swapchainMinMagFilterUseNearest, MVK_CONFIG_SWAPCHAIN_MIN_MAG_FILTER_USE_NEAREST); - MVK_SET_FROM_ENV_OR_BUILD_INT64 (evCfg.metalCompileTimeout, MVK_CONFIG_METAL_COMPILE_TIMEOUT); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.performanceTracking, MVK_CONFIG_PERFORMANCE_TRACKING); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.performanceLoggingFrameCount, MVK_CONFIG_PERFORMANCE_LOGGING_FRAME_COUNT); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.activityPerformanceLoggingStyle, MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.displayWatermark, MVK_CONFIG_DISPLAY_WATERMARK); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.specializedQueueFamilies, MVK_CONFIG_SPECIALIZED_QUEUE_FAMILIES); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.switchSystemGPU, MVK_CONFIG_SWITCH_SYSTEM_GPU); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.fullImageViewSwizzle, MVK_CONFIG_FULL_IMAGE_VIEW_SWIZZLE); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.defaultGPUCaptureScopeQueueFamilyIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_FAMILY_INDEX); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.defaultGPUCaptureScopeQueueIndex, MVK_CONFIG_DEFAULT_GPU_CAPTURE_SCOPE_QUEUE_INDEX); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.fastMathEnabled, MVK_CONFIG_FAST_MATH_ENABLED); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.logLevel, MVK_CONFIG_LOG_LEVEL); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.traceVulkanCalls, MVK_CONFIG_TRACE_VULKAN_CALLS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.forceLowPowerGPU, MVK_CONFIG_FORCE_LOW_POWER_GPU); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.semaphoreUseMTLFence, MVK_ALLOW_METAL_FENCES); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.semaphoreSupportStyle, MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.autoGPUCaptureScope, MVK_CONFIG_AUTO_GPU_CAPTURE_SCOPE); - MVK_SET_FROM_ENV_OR_BUILD_STRING(evCfg.autoGPUCaptureOutputFilepath, MVK_CONFIG_AUTO_GPU_CAPTURE_OUTPUT_FILE, evGPUCapFileStrObj); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.texture1DAs2D, MVK_CONFIG_TEXTURE_1D_AS_2D); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.preallocateDescriptors, MVK_CONFIG_PREALLOCATE_DESCRIPTORS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.useCommandPooling, MVK_CONFIG_USE_COMMAND_POOLING); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.useMTLHeap, MVK_CONFIG_USE_MTLHEAP); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.apiVersionToAdvertise, MVK_CONFIG_API_VERSION_TO_ADVERTISE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.advertiseExtensions, MVK_CONFIG_ADVERTISE_EXTENSIONS); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.resumeLostDevice, MVK_CONFIG_RESUME_LOST_DEVICE); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.useMetalArgumentBuffers, MVK_CONFIG_USE_METAL_ARGUMENT_BUFFERS); - MVK_SET_FROM_ENV_OR_BUILD_INT32 (evCfg.shaderSourceCompressionAlgorithm, MVK_CONFIG_SHADER_COMPRESSION_ALGORITHM); - MVK_SET_FROM_ENV_OR_BUILD_BOOL (evCfg.shouldMaximizeConcurrentCompilation, MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION); +#define STR(name) #name + +#define MVK_CONFIG_MEMBER(member, mbrType, name) \ + evCfg.member = (mbrType)mvkGetEnvVarNumber(STR(MVK_CONFIG_##name), MVK_CONFIG_##name); + +#define MVK_CONFIG_MEMBER_STRING(member, strObj, name) \ + evCfg.member = mvkGetEnvVarString(STR(MVK_CONFIG_##name), strObj, MVK_CONFIG_##name); + +#include "MVKConfigMembers.def" + + // At this point, debugMode has been set by env var MVK_CONFIG_DEBUG. + // MVK_CONFIG_DEBUG replaced the deprecataed MVK_DEBUG env var, so for + // legacy use, if the MVK_DEBUG env var is explicitly set, override debugMode. + double noEV = -3.1415; // An unlikely env var value. + double cvMVKDebug = mvkGetEnvVarNumber("MVK_DEBUG", noEV); + if (cvMVKDebug != noEV) { evCfg.debugMode = cvMVKDebug; } // Deprected legacy VkSemaphore MVK_ALLOW_METAL_FENCES and MVK_ALLOW_METAL_EVENTS config. // Legacy MVK_ALLOW_METAL_EVENTS is covered by MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE, @@ -71,9 +50,7 @@ static void mvkInitConfigFromEnvVars() { // disabled, disable semaphoreUseMTLEvent (aliased as semaphoreSupportStyle value // MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE), and let mvkSetConfig() // further process legacy behavior of MVK_ALLOW_METAL_FENCES. - bool sem4UseMTLEvent; - MVK_SET_FROM_ENV_OR_BUILD_BOOL(sem4UseMTLEvent, MVK_ALLOW_METAL_EVENTS); - if ( !sem4UseMTLEvent ) { + if ( !mvkGetEnvVarNumber("MVK_CONFIG_ALLOW_METAL_EVENTS", 1.0) ) { evCfg.semaphoreUseMTLEvent = (MVKVkSemaphoreSupportStyle)false; // Disabled. Also semaphoreSupportStyle MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_SINGLE_QUEUE. } @@ -81,13 +58,11 @@ static void mvkInitConfigFromEnvVars() { // MVK_CONFIG_PERFORMANCE_LOGGING_INLINE env var was used, and activityPerformanceLoggingStyle // was not already set by MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE, set // activityPerformanceLoggingStyle to MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE. - bool logPerfInline; - MVK_SET_FROM_ENV_OR_BUILD_BOOL(logPerfInline, MVK_CONFIG_PERFORMANCE_LOGGING_INLINE); + bool logPerfInline = mvkGetEnvVarNumber("MVK_CONFIG_PERFORMANCE_LOGGING_INLINE", 0.0); if (logPerfInline && evCfg.activityPerformanceLoggingStyle == MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_FRAME_COUNT) { evCfg.activityPerformanceLoggingStyle = MVK_CONFIG_ACTIVITY_PERFORMANCE_LOGGING_STYLE_IMMEDIATE; } - mvkSetConfig(evCfg); } @@ -129,4 +104,7 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig) { _autoGPUCaptureOutputFile = _mvkConfig.autoGPUCaptureOutputFilepath; } _mvkConfig.autoGPUCaptureOutputFilepath = (char*)_autoGPUCaptureOutputFile.c_str(); + + // Clamp timestampPeriodLowPassAlpha between 0.0 and 1.0. + _mvkConfig.timestampPeriodLowPassAlpha = mvkClamp(_mvkConfig.timestampPeriodLowPassAlpha, 0.0f, 1.0f); } diff --git a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h index 86215bf9f..f6f1ae9ed 100644 --- a/MoltenVK/MoltenVK/Utility/MVKEnvironment.h +++ b/MoltenVK/MoltenVK/Utility/MVKEnvironment.h @@ -82,6 +82,14 @@ const MVKConfiguration& mvkConfig(); /** Global function to update MoltenVK configuration info. */ void mvkSetConfig(const MVKConfiguration& mvkConfig); +/** + * Enable debug mode. + * By default, disabled for Release builds and enabled for Debug builds. + */ +#ifndef MVK_CONFIG_DEBUG +# define MVK_CONFIG_DEBUG MVK_DEBUG +#endif + /** Flip the vertex coordinate in shaders. Enabled by default. */ #ifndef MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y # define MVK_CONFIG_SHADER_CONVERSION_FLIP_VERTEX_Y 1 @@ -244,11 +252,17 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig); #ifndef MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE # define MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE MVK_CONFIG_VK_SEMAPHORE_SUPPORT_STYLE_METAL_EVENTS_WHERE_SAFE #endif -#ifndef MVK_ALLOW_METAL_EVENTS // Deprecated -# define MVK_ALLOW_METAL_EVENTS 1 +#ifndef MVK_CONFIG_ALLOW_METAL_EVENTS +# define MVK_CONFIG_ALLOW_METAL_EVENTS 1 +#endif +#ifndef MVK_ALLOW_METAL_EVENTS // Deprecated +# define MVK_ALLOW_METAL_EVENTS MVK_CONFIG_ALLOW_METAL_EVENTS #endif -#ifndef MVK_ALLOW_METAL_FENCES // Deprecated -# define MVK_ALLOW_METAL_FENCES 1 +#ifndef MVK_CONFIG_ALLOW_METAL_FENCES +# define MVK_CONFIG_ALLOW_METAL_FENCES 1 +#endif +#ifndef MVK_ALLOW_METAL_FENCES // Deprecated +# define MVK_ALLOW_METAL_FENCES MVK_CONFIG_ALLOW_METAL_FENCES #endif /** Substitute Metal 2D textures for Vulkan 1D images. Enabled by default. */ @@ -303,3 +317,11 @@ void mvkSetConfig(const MVKConfiguration& mvkConfig); #ifndef MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION # define MVK_CONFIG_SHOULD_MAXIMIZE_CONCURRENT_COMPILATION 0 #endif + +/** + * The alpha value of a lowpass filter tracking VkPhysicalDeviceLimits::timestampPeriod. + * This can be set to a float between 0.0 and 1.0. + */ +#ifndef MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA +# define MVK_CONFIG_TIMESTAMP_PERIOD_LOWPASS_ALPHA 0.05 +#endif diff --git a/Scripts/runcts b/Scripts/runcts index bf65cc9e9..73aee7c6d 100755 --- a/Scripts/runcts +++ b/Scripts/runcts @@ -103,7 +103,7 @@ export METAL_DEBUG_ERROR_MODE=3 # ----- MoltenVK config settings ------ export MVK_CONFIG_LOG_LEVEL=1 #(1 = Errors only, 3 = Info) -export MVK_DEBUG=0 +export MVK_CONFIG_DEBUG=0 # Additional MoltenVK configuration can be set here by editing below. export MVK_CONFIG_RESUME_LOST_DEVICE=1 From 9c206ecc79e707d81fb320b57a97e198a5aa743a Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 12 Sep 2023 16:44:33 -0400 Subject: [PATCH 13/21] Fix MSL code used in vkCmdBlitImage() on depth-stencil formats. --- Docs/Whats_New.md | 1 + MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 357df0e47..14f6edfc3 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -22,6 +22,7 @@ Released TBD when no other bindings change between pipelines. - Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. - Fix case where a `CAMetalDrawable` with invalid pixel format causes onscreen flickering. +- Fix MSL code used in `vkCmdBlitImage()` on depth-stencil formats. - Improve behavior of swapchain image presentation stalls caused by Metal regression. - Add several additional performance trackers, available via logging, or the `mvk_private_api.h` API. - Add configurable lowpass filter for `VkPhysicalDeviceLimits::timestampPeriod`. diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm index 973db991c..33ee4485a 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandResourceFactory.mm @@ -286,7 +286,7 @@ static void getSwizzleString(char swizzleStr[4], VkComponentMapping vkMapping) { [msl appendLineMVK: @" constant TexSubrez& subRez [[buffer(0)]]) {"]; [msl appendLineMVK: @" FragmentOutputs out;"]; if (mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_DEPTH_BIT))) { - [msl appendFormat: @" out.depth = tex.sample(ce_sampler, varyings.v_texCoord%@%@, level(subRez.lod)).%c;", coordArg, sliceArg, swizzleArg[0]]; + [msl appendFormat: @" out.depth = tex.sample(ce_sampler, varyings.v_texCoord%@%@, level(subRez.lod));", coordArg, sliceArg]; [msl appendLineMVK]; } if (mvkIsAnyFlagEnabled(blitKey.srcAspect, (VK_IMAGE_ASPECT_STENCIL_BIT))) { From f0cb31a12b59f05177f07ab5a46bc9084ba5fbc9 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Fri, 15 Sep 2023 09:54:48 -0400 Subject: [PATCH 14/21] Rework workaround to force incomplete CAMetalDrawable presentations to complete. - To force any incomplete CAMetalDrawable presentations to complete, don't force the creation of another transient drawable, as this can stall the creation of future drawables. Instead, when a swapchain is destroyed, or replaced by a new swapchain, set the CAMetalLayer drawableSize, which will force presentation completion. - Add presentation completion handler in command buffer scheduling callback, move marking available to presentation completion handler, and minimize mutex locking. - MVKQueue::waitIdle() remove wait for swapchain presentations, and remove callbacks to MVKQueue from drawable completions. - MVKQueue::submit() don't bypass submitting a misconfigured submission, so that semaphores and fences will be signalled, and ensure misconfigured submissions are well behaved. - Add MVKSwapchain::getCAMetalLayer() to streamline layer access (unrelated). --- .../MoltenVK/Commands/MVKCommandBuffer.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 10 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 93 ++++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 11 --- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 81 +++------------- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h | 9 +- MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm | 53 ++++++++--- 7 files changed, 115 insertions(+), 144 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 8ac91c264..72dde4f1e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -260,7 +260,7 @@ } _wasExecuted = true; - return true; + return wasConfigurationSuccessful(); } // Return the number of bits set in the view mask, with a minimum value of 1. diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index 1479f7242..ef606b035 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -460,10 +460,9 @@ class MVKPresentableSwapchainImage : public MVKSwapchainImage { void beginPresentation(const MVKImagePresentInfo& presentInfo); /** Called via callback when the presentation completes. */ - void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); - - /** If this image is stuck in-flight, attempt to force it to complete. */ - void forcePresentationCompletion(); + void endPresentation(const MVKImagePresentInfo& presentInfo, + const MVKSwapchainSignaler& signaler, + uint64_t actualPresentTime = 0); #pragma mark Construction @@ -478,12 +477,13 @@ class MVKPresentableSwapchainImage : public MVKSwapchainImage { friend MVKSwapchain; id getCAMetalDrawable() override; - void addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo); + void addPresentedHandler(id mtlDrawable, MVKImagePresentInfo presentInfo, MVKSwapchainSignaler signaler); void releaseMetalDrawable(); MVKSwapchainImageAvailability getAvailability(); void makeAvailable(const MVKSwapchainSignaler& signaler); void makeAvailable(); VkResult acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence); + MVKSwapchainSignaler getPresentationSignaler(); id _mtlDrawable = nil; MVKSwapchainImageAvailability _availability; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index 1769df11d..f09495c74 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -1248,16 +1248,18 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { } VkResult MVKPresentableSwapchainImage::acquireAndSignalWhenAvailable(MVKSemaphore* semaphore, MVKFence* fence) { + + // Now that this image is being acquired, release the existing drawable and its texture. + // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. + // This may trigger a delayed presentation callback, which uses the _availabilityLock, also used below. + releaseMetalDrawable(); + lock_guard lock(_availabilityLock); // Upon acquisition, update acquisition ID immediately, to move it to the back of the chain, // so other images will be preferred if either all images are available or no images are available. _availability.acquisitionID = _swapchain->getNextAcquisitionID(); - // Now that this image is being acquired, release the existing drawable and its texture. - // This is not done earlier so the texture is retained for any post-processing such as screen captures, etc. - releaseMetalDrawable(); - auto signaler = MVKSwapchainSignaler{fence, semaphore, semaphore ? semaphore->deferSignal() : 0}; if (_availability.isAvailable) { _availability.isAvailable = false; @@ -1292,10 +1294,10 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { if ( !_mtlDrawable ) { @autoreleasepool { bool hasInvalidFormat = false; - uint32_t attemptCnt = _swapchain->getImageCount() * 2; // Attempt a resonable number of times + uint32_t attemptCnt = _swapchain->getImageCount(); // Attempt a resonable number of times for (uint32_t attemptIdx = 0; !_mtlDrawable && attemptIdx < attemptCnt; attemptIdx++) { uint64_t startTime = _device->getPerformanceTimestamp(); - _mtlDrawable = [_swapchain->_surface->getCAMetalLayer().nextDrawable retain]; // retained + _mtlDrawable = [_swapchain->getCAMetalLayer().nextDrawable retain]; // retained _device->addPerformanceInterval(_device->_performanceStatistics.queue.retrieveCAMetalDrawable, startTime); hasInvalidFormat = _mtlDrawable && !_mtlDrawable.texture.pixelFormat; if (hasInvalidFormat) { releaseMetalDrawable(); } @@ -1314,8 +1316,6 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { // Pass MVKImagePresentInfo by value because it may not exist when the callback runs. VkResult MVKPresentableSwapchainImage::presentCAMetalDrawable(id mtlCmdBuff, MVKImagePresentInfo presentInfo) { - lock_guard lock(_availabilityLock); - _swapchain->renderWatermark(getMTLTexture(0), mtlCmdBuff); // According to Apple, it is more performant to call MTLDrawable present from within a @@ -1323,8 +1323,11 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { // But get current drawable now, intead of in handler, because a new drawable might be acquired by then. // Attach present handler before presenting to avoid race condition. id mtlDrwbl = getCAMetalDrawable(); - addPresentedHandler(mtlDrwbl, presentInfo); + MVKSwapchainSignaler signaler = getPresentationSignaler(); [mtlCmdBuff addScheduledHandler: ^(id mcb) { + + addPresentedHandler(mtlDrwbl, presentInfo, signaler); + // Try to do any present mode transitions as late as possible in an attempt // to avoid visual disruptions on any presents already on the queue. if (presentInfo.presentMode != VK_PRESENT_MODE_MAX_ENUM_KHR) { @@ -1337,38 +1340,22 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { } }]; - MVKSwapchainSignaler signaler; - // Mark this image as available if no semaphores or fences are waiting to be signaled. - _availability.isAvailable = _availabilitySignalers.empty(); - if (_availability.isAvailable) { - // If this image is available, signal the semaphore and fence that were associated - // with the last time this image was acquired while available. This is a workaround for - // when an app uses a single semaphore or fence for more than one swapchain image. - // Because the semaphore or fence will be signaled by more than one image, it will - // get out of sync, and the final use of the image would not be signaled as a result. - signaler = _preSignaler; - } else { - // If this image is not yet available, extract and signal the first semaphore and fence. - auto sigIter = _availabilitySignalers.begin(); - signaler = *sigIter; - _availabilitySignalers.erase(sigIter); - } - // Ensure this image, the drawable, and the present fence are not destroyed while // awaiting MTLCommandBuffer completion. We retain the drawable separately because // a new drawable might be acquired by this image by then. + // Signal the fence from this callback, because the last one or two presentation + // completion callbacks can occasionally stall. retain(); [mtlDrwbl retain]; auto* fence = presentInfo.fence; if (fence) { fence->retain(); } [mtlCmdBuff addCompletedHandler: ^(id mcb) { - [mtlDrwbl release]; - makeAvailable(signaler); - release(); if (fence) { fence->signal(); fence->release(); } + [mtlDrwbl release]; + release(); }]; signalPresentationSemaphore(signaler, mtlCmdBuff); @@ -1376,22 +1363,45 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { return getConfigurationResult(); } -// Pass MVKImagePresentInfo by value because it may not exist when the callback runs. +MVKSwapchainSignaler MVKPresentableSwapchainImage::getPresentationSignaler() { + lock_guard lock(_availabilityLock); + + // Mark this image as available if no semaphores or fences are waiting to be signaled. + _availability.isAvailable = _availabilitySignalers.empty(); + if (_availability.isAvailable) { + // If this image is available, signal the semaphore and fence that were associated + // with the last time this image was acquired while available. This is a workaround for + // when an app uses a single semaphore or fence for more than one swapchain image. + // Because the semaphore or fence will be signaled by more than one image, it will + // get out of sync, and the final use of the image would not be signaled as a result. + return _preSignaler; + } else { + // If this image is not yet available, extract and signal the first semaphore and fence. + MVKSwapchainSignaler signaler; + auto sigIter = _availabilitySignalers.begin(); + signaler = *sigIter; + _availabilitySignalers.erase(sigIter); + return signaler; + } +} + +// Pass MVKImagePresentInfo & MVKSwapchainSignaler by value because they may not exist when the callback runs. void MVKPresentableSwapchainImage::addPresentedHandler(id mtlDrawable, - MVKImagePresentInfo presentInfo) { + MVKImagePresentInfo presentInfo, + MVKSwapchainSignaler signaler) { beginPresentation(presentInfo); #if !MVK_OS_SIMULATOR if ([mtlDrawable respondsToSelector: @selector(addPresentedHandler:)]) { [mtlDrawable addPresentedHandler: ^(id mtlDrwbl) { - endPresentation(presentInfo, mtlDrwbl.presentedTime * 1.0e9); + endPresentation(presentInfo, signaler, mtlDrwbl.presentedTime * 1.0e9); }]; } else #endif { // If MTLDrawable.presentedTime/addPresentedHandler isn't supported, // treat it as if the present happened when requested. - endPresentation(presentInfo); + endPresentation(presentInfo, signaler); } } @@ -1399,11 +1409,11 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { void MVKPresentableSwapchainImage::beginPresentation(const MVKImagePresentInfo& presentInfo) { retain(); _swapchain->beginPresentation(presentInfo); - presentInfo.queue->beginPresentation(presentInfo); _presentationStartTime = getDevice()->getPerformanceTimestamp(); } void MVKPresentableSwapchainImage::endPresentation(const MVKImagePresentInfo& presentInfo, + const MVKSwapchainSignaler& signaler, uint64_t actualPresentTime) { { // Scope to avoid deadlock if release() is run within detachment lock // If I have become detached from the swapchain, it means the swapchain, and possibly the @@ -1412,7 +1422,7 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { if (_device) { _device->addPerformanceInterval(_device->_performanceStatistics.queue.presentSwapchains, _presentationStartTime); } if (_swapchain) { _swapchain->endPresentation(presentInfo, actualPresentTime); } } - presentInfo.queue->endPresentation(presentInfo); + makeAvailable(signaler); release(); } @@ -1432,7 +1442,9 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { } // Signal, untrack, and release any signalers that are tracking. +// Release the drawable before the lock, as it may trigger completion callback. void MVKPresentableSwapchainImage::makeAvailable() { + releaseMetalDrawable(); lock_guard lock(_availabilityLock); if ( !_availability.isAvailable ) { @@ -1445,14 +1457,6 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { } } -// Clear the existing CAMetalDrawable and retrieve and release a new transient one, -// in an attempt to trigger the existing CAMetalDrawable to complete it's callback. -void MVKPresentableSwapchainImage::forcePresentationCompletion() { - releaseMetalDrawable(); - if (_swapchain) { @autoreleasepool { [_swapchain->_surface->getCAMetalLayer() nextDrawable]; } } -} - - #pragma mark Construction MVKPresentableSwapchainImage::MVKPresentableSwapchainImage(MVKDevice* device, @@ -1467,14 +1471,13 @@ static void signalAndUnmarkAsTracked(const MVKSwapchainSignaler& signaler) { void MVKPresentableSwapchainImage::destroy() { - forcePresentationCompletion(); + releaseMetalDrawable(); MVKSwapchainImage::destroy(); } // Unsignaled signalers will exist if this image is acquired more than it is presented. // Ensure they are signaled and untracked so the fences and semaphores will be released. MVKPresentableSwapchainImage::~MVKPresentableSwapchainImage() { - releaseMetalDrawable(); makeAvailable(); } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index 0de3d2b84..c3b1d242f 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -100,13 +100,6 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking /** Block the current thread until this queue is idle. */ VkResult waitIdle(MVKCommandUse cmdUse); - /** Mark the beginning of a swapchain image presentation. */ - void beginPresentation(const MVKImagePresentInfo& presentInfo); - - /** Mark the end of a swapchain image presentation. */ - void endPresentation(const MVKImagePresentInfo& presentInfo); - - #pragma mark Metal /** Returns the Metal queue underlying this queue. */ @@ -150,11 +143,8 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking VkResult submit(MVKQueueSubmission* qSubmit); NSString* getMTLCommandBufferLabel(MVKCommandUse cmdUse); void handleMTLCommandBufferError(id mtlCmdBuff); - void waitSwapchainPresentations(MVKCommandUse cmdUse); MVKQueueFamily* _queueFamily; - MVKSemaphoreImpl _presentationCompletionBlocker; - std::unordered_map _presentedImages; std::string _name; dispatch_queue_t _execQueue; id _mtlQueue = nil; @@ -166,7 +156,6 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking NSString* _mtlCmdBuffLabelAcquireNextImage = nil; NSString* _mtlCmdBuffLabelInvalidateMappedMemoryRanges = nil; MVKGPUCaptureScope* _submissionCaptureScope = nil; - std::mutex _presentedImagesLock; float _priority; uint32_t _index; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 1c28f63fb..401fa8b22 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -80,13 +80,14 @@ if ( !qSubmit ) { return VK_SUCCESS; } // Ignore nils - VkResult rslt = qSubmit->getConfigurationResult(); // Extract result before submission to avoid race condition with early destruction - if (rslt == VK_SUCCESS) { - if (_execQueue) { - dispatch_async(_execQueue, ^{ execute(qSubmit); } ); - } else { - rslt = execute(qSubmit); - } + // Extract result before submission to avoid race condition with early destruction + // Submit regardless of config result, to ensure submission semaphores and fences are signalled. + // The submissions will ensure a misconfiguration will be safe to execute. + VkResult rslt = qSubmit->getConfigurationResult(); + if (_execQueue) { + dispatch_async(_execQueue, ^{ execute(qSubmit); } ); + } else { + rslt = execute(qSubmit); } return rslt; } @@ -140,50 +141,9 @@ [mtlCmdBuff commit]; [mtlCmdBuff waitUntilCompleted]; - waitSwapchainPresentations(cmdUse); - return VK_SUCCESS; } -// If there are any swapchain presentations in flight, wait a few frames for them to complete. -// If they don't complete within a few frames, attempt to force them to complete, and wait another -// few frames for that to happen. If there are still swapchain presentations that haven't completed, -// log a warning, and force them to end presentation, so the images and drawables will be released. -void MVKQueue::waitSwapchainPresentations(MVKCommandUse cmdUse) { - uint32_t waitFrames = _device->_pMetalFeatures->maxSwapchainImageCount + 2; - uint64_t waitNanos = waitFrames * _device->_performanceStatistics.queue.frameInterval.average * 1e6; - if (_presentationCompletionBlocker.wait(waitNanos)) { return; } - - auto imgCnt = _presentationCompletionBlocker.getReservationCount(); - MVKPresentableSwapchainImage* images[imgCnt]; - mvkClear(images, imgCnt); - - { - // Scope of image lock limited to creating array copy of uncompleted presentations - // Populate a working array of the unpresented images. - lock_guard lock(_presentedImagesLock); - size_t imgIdx = 0; - for (auto imgPair : _presentedImages) { images[imgIdx++] = imgPair.first; } - } - - // Attempt to force each image to complete presentation through the callback. - for (size_t imgIdx = 0; imgIdx < imgCnt && _presentationCompletionBlocker.getReservationCount(); imgIdx++) { - auto* img = images[imgIdx]; - if (img) { img->forcePresentationCompletion(); } - } - - // Wait for forced presentation completions. If we still have unfinished swapchain image - // presentations, log a warning, and force each image to end, so that it can be released. - if ( !_presentationCompletionBlocker.wait(waitNanos) ) { - reportWarning(VK_TIMEOUT, "%s timed out after %d frames while awaiting %d swapchain image presentations to complete.", - mvkVkCommandName(cmdUse), waitFrames * 2, _presentationCompletionBlocker.getReservationCount()); - for (size_t imgIdx = 0; imgIdx < imgCnt; imgIdx++) { - auto* img = images[imgIdx]; - if (_presentedImages.count(img)) { img->endPresentation({.queue = this, .presentableImage = img}); } - } - } -} - id MVKQueue::getMTLCommandBuffer(MVKCommandUse cmdUse, bool retainRefs) { id mtlCmdBuff = nil; MVKDevice* mvkDev = getDevice(); @@ -312,25 +272,6 @@ #endif } -// _presentedImages counts presentations per swapchain image, because the presentation of an image can -// begin before the previous presentation of that image has indicated that it has completed via a callback. -void MVKQueue::beginPresentation(const MVKImagePresentInfo& presentInfo) { - lock_guard lock(_presentedImagesLock); - _presentationCompletionBlocker.reserve(); - _presentedImages[presentInfo.presentableImage]++; -} - -void MVKQueue::endPresentation(const MVKImagePresentInfo& presentInfo) { - lock_guard lock(_presentedImagesLock); - _presentationCompletionBlocker.release(); - if (_presentedImages[presentInfo.presentableImage]) { - _presentedImages[presentInfo.presentableImage]--; - } - if ( !_presentedImages[presentInfo.presentableImage] ) { - _presentedImages.erase(presentInfo.presentableImage); - } -} - #pragma mark Construction #define MVK_DISPATCH_QUEUE_QOS_CLASS QOS_CLASS_USER_INITIATED @@ -488,7 +429,7 @@ // If we need to signal completion, use getActiveMTLCommandBuffer() to ensure at least // one MTLCommandBuffer is used, otherwise if this instance has no content, it will not - // finish(), signal the fence and semaphores ,and be destroyed. + // finish(), signal the fence and semaphores, and be destroyed. // Use temp var for MTLCommandBuffer commit and release because completion callback // may destroy this instance before this function ends. id mtlCmdBuff = signalCompletion ? getActiveMTLCommandBuffer() : _activeMTLCommandBuffer; @@ -501,6 +442,8 @@ if (signalCompletion) { this->finish(); } // Must be the last thing the completetion callback does. }]; + // Retrieve the result before committing MTLCommandBuffer, because finish() will destroy this instance. + VkResult rslt = mtlCmdBuff ? getConfigurationResult() : VK_ERROR_OUT_OF_POOL_MEMORY; [mtlCmdBuff commit]; [mtlCmdBuff release]; // retained @@ -508,7 +451,7 @@ // was not created, call the finish() function directly. if (signalCompletion && !mtlCmdBuff) { finish(); } - return mtlCmdBuff ? VK_SUCCESS : VK_ERROR_OUT_OF_POOL_MEMORY; + return rslt; } // Be sure to retain() any API objects referenced in this function, and release() them in the diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h index 7e7cff8cc..cd418bd1a 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.h @@ -43,11 +43,14 @@ class MVKSwapchain : public MVKVulkanAPIDeviceObject { /** Returns the debug report object type of this object. */ VkDebugReportObjectTypeEXT getVkDebugReportObjectType() override { return VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT; } + /** Returns the CAMetalLayer underlying the surface used by this swapchain. */ + CAMetalLayer* getCAMetalLayer(); + /** Returns the number of images in this swapchain. */ - inline uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); } + uint32_t getImageCount() { return (uint32_t)_presentableImages.size(); } /** Returns the image at the specified index. */ - inline MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; } + MVKPresentableSwapchainImage* getPresentableImage(uint32_t index) { return _presentableImages[index]; } /** * Returns the array of presentable images associated with this swapchain. @@ -112,6 +115,7 @@ class MVKSwapchain : public MVKVulkanAPIDeviceObject { void markFrameInterval(); void beginPresentation(const MVKImagePresentInfo& presentInfo); void endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime = 0); + void forceUnpresentedImageCompletion(); MVKSurface* _surface = nullptr; MVKWatermark* _licenseWatermark = nullptr; @@ -123,6 +127,7 @@ class MVKSwapchain : public MVKVulkanAPIDeviceObject { std::mutex _presentHistoryLock; uint64_t _lastFrameTime = 0; VkExtent2D _mtlLayerDrawableExtent = {0, 0}; + std::atomic _unpresentedImageCount = 0; uint32_t _currentPerfLogFrameCount = 0; uint32_t _presentHistoryCount = 0; uint32_t _presentHistoryIndex = 0; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm index 159c2edf1..63c3ac783 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKSwapchain.mm @@ -47,6 +47,8 @@ } } +CAMetalLayer* MVKSwapchain::getCAMetalLayer() { return _surface->getCAMetalLayer(); } + VkResult MVKSwapchain::getImages(uint32_t* pCount, VkImage* pSwapchainImages) { // Get the number of surface images @@ -104,7 +106,7 @@ getPresentableImage(pReleaseInfo->pImageIndices[imgIdxIdx])->makeAvailable(); } - return VK_SUCCESS; + return _surface->getConfigurationResult(); } uint64_t MVKSwapchain::getNextAcquisitionID() { return ++_currentAcquisitionID; } @@ -128,7 +130,7 @@ bool MVKSwapchain::hasOptimalSurface() { if (_isDeliberatelyScaled) { return true; } - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); VkExtent2D drawExtent = mvkVkExtent2DFromCGSize(mtlLayer.drawableSize); return (mvkVkExtent2DsAreEqual(drawExtent, _mtlLayerDrawableExtent) && mvkVkExtent2DsAreEqual(drawExtent, mvkGetNaturalExtent(mtlLayer))); @@ -185,7 +187,7 @@ VkResult MVKSwapchain::getRefreshCycleDuration(VkRefreshCycleDurationGOOGLE *pRefreshCycleDuration) { if (_device->getConfigurationResult() != VK_SUCCESS) { return _device->getConfigurationResult(); } - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); #if MVK_VISIONOS // TODO: See if this can be obtained from OS instead NSInteger framesPerSecond = 90; @@ -242,9 +244,13 @@ return res; } -void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) {} +void MVKSwapchain::beginPresentation(const MVKImagePresentInfo& presentInfo) { + _unpresentedImageCount++; +} void MVKSwapchain::endPresentation(const MVKImagePresentInfo& presentInfo, uint64_t actualPresentTime) { + _unpresentedImageCount--; + std::lock_guard lock(_presentHistoryLock); markFrameInterval(); @@ -269,8 +275,18 @@ _presentHistoryIndex = (_presentHistoryIndex + 1) % kMaxPresentationHistory; } +// Because of a regression in Metal, the most recent one or two presentations may not complete +// and call back. To work around this, if there are any uncompleted presentations, change the +// drawableSize of the CAMetalLayer, which will trigger presentation completion and callbacks. +// The drawableSize will be set to a correct size by the next swapchain created on the same surface. +void MVKSwapchain::forceUnpresentedImageCompletion() { + if (_unpresentedImageCount) { + getCAMetalLayer().drawableSize = { 1,1 }; + } +} + void MVKSwapchain::setLayerNeedsDisplay(const VkPresentRegionKHR* pRegion) { - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); if (!pRegion || pRegion->rectangleCount == 0) { [mtlLayer setNeedsDisplay]; return; @@ -350,7 +366,7 @@ static inline CIE1931XY VkXYColorEXTToCIE1931XY(VkXYColorEXT xy) { CAEDRMetadata* caMetadata = [CAEDRMetadata HDR10MetadataWithDisplayInfo: colorVolData contentInfo: lightLevelData opticalOutputScale: 1]; - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); mtlLayer.EDRMetadata = caMetadata; mtlLayer.wantsExtendedDynamicRangeContent = YES; [caMetadata release]; @@ -456,7 +472,7 @@ static CALayerContentsGravity getCALayerContentsGravity(VkSwapchainPresentScalin if ( getIsSurfaceLost() ) { return; } - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); auto minMagFilter = mvkConfig().swapchainMinMagFilterUseNearest ? kCAFilterNearest : kCAFilterLinear; mtlLayer.device = getMTLDevice(); mtlLayer.pixelFormat = getPixelFormats()->getMTLPixelFormat(pCreateInfo->imageFormat); @@ -469,6 +485,16 @@ static CALayerContentsGravity getCALayerContentsGravity(VkSwapchainPresentScalin VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT)); + + // Because of a regression in Metal, the most recent one or two presentations may not + // complete and call back. Changing the CAMetalLayer drawableSize will force any incomplete + // presentations on the oldSwapchain to complete and call back, but if the drawableSize + // is not changing from the previous, we force those completions first. + auto* oldSwapchain = (MVKSwapchain*)pCreateInfo->oldSwapchain; + if (oldSwapchain && mvkVkExtent2DsAreEqual(pCreateInfo->imageExtent, mvkVkExtent2DFromCGSize(mtlLayer.drawableSize))) { + oldSwapchain->forceUnpresentedImageCompletion(); + } + // Remember the extent to later detect if it has changed under the covers, // and set the drawable size of the CAMetalLayer from the extent. _mtlLayerDrawableExtent = pCreateInfo->imageExtent; @@ -559,7 +585,7 @@ static CALayerContentsGravity getCALayerContentsGravity(VkSwapchainPresentScalin } } - auto* mtlLayer = _surface->getCAMetalLayer(); + auto* mtlLayer = getCAMetalLayer(); VkExtent2D imgExtent = pCreateInfo->imageExtent; VkImageCreateInfo imgInfo = { .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, @@ -598,12 +624,17 @@ static CALayerContentsGravity getCALayerContentsGravity(VkSwapchainPresentScalin screenName = mtlLayer.screenMVK.localizedName; } #endif - MVKLogInfo("Created %d swapchain images with initial size (%d, %d) and contents scale %.1f for screen %s.", - imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, screenName.UTF8String); + MVKLogInfo("Created %d swapchain images with size (%d, %d) and contents scale %.1f in layer %s (%p) on screen %s.", + imgCnt, imgExtent.width, imgExtent.height, mtlLayer.contentsScale, mtlLayer.name.UTF8String, mtlLayer, screenName.UTF8String); } void MVKSwapchain::destroy() { - if (_surface->_activeSwapchain == this) { _surface->_activeSwapchain = nullptr; } + // If this swapchain was not replaced by a new swapchain, remove this swapchain + // from the surface, and force any outstanding presentations to complete. + if (_surface->_activeSwapchain == this) { + _surface->_activeSwapchain = nullptr; + forceUnpresentedImageCompletion(); + } for (auto& img : _presentableImages) { _device->destroyPresentableSwapchainImage(img, NULL); } MVKVulkanAPIDeviceObject::destroy(); } From 6127918a32fa7df07b741c6547b4777029a38876 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Mon, 18 Sep 2023 22:55:01 -0400 Subject: [PATCH 15/21] Add support for extension VK_KHR_synchronization2. - MVKPhysicalDevice add support for VkPhysicalDeviceSynchronization2Features. - Pass sync2 structs to MVKPipelineBarrier, MVKCmdPipelineBarrier, MVKCmdSetEvent, MVKCmdResetEvent, MVKCmdWaitEvents, MVKRenderPass, MVKQueue & MVKQueueSubmission. - Replace use of VkPipelineStageFlags & VkAccessFlags with VkPipelineStageFlags2 & VkAccessFlags2. - Add stage masks to MVKPipelineBarrier, and redefine apply*MemoryBarrier() functions to remove separately passing stage masks. - Add MVKSemaphoreSubmitInfo to track semaphores in MVKQueueSubmission. - Add MVKCommandBufferSubmitInfo to track command buffers in MVKQueueCommandBufferSubmission. - Add MVKSubpassDependency to combine VkSubpassDependency & VkMemoryBarrier2 in MVKRenderPass. - Remove abstract MVKCmdSetResetEvent superclass. - Streamline code in MVKMTLFunction::operator= (unrelated). --- Docs/MoltenVK_Runtime_UserGuide.md | 1 + Docs/Whats_New.md | 2 + MoltenVK/MoltenVK/API/mvk_datatypes.h | 8 +- MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h | 41 ++-- MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm | 116 +++++++--- MoltenVK/MoltenVK/Commands/MVKCmdQueries.h | 4 +- MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm | 4 +- .../Commands/MVKMTLResourceBindings.h | 62 +++++- MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h | 12 +- MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm | 18 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 4 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 13 +- .../GPUObjects/MVKDeviceFeatureStructs.def | 1 + MoltenVK/MoltenVK/GPUObjects/MVKImage.h | 16 +- MoltenVK/MoltenVK/GPUObjects/MVKImage.mm | 22 +- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 68 +++++- MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm | 203 ++++++++++++++---- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h | 18 +- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm | 48 +++-- MoltenVK/MoltenVK/GPUObjects/MVKResource.h | 4 +- .../MoltenVK/GPUObjects/MVKShaderModule.h | 2 +- .../MoltenVK/GPUObjects/MVKShaderModule.mm | 9 +- MoltenVK/MoltenVK/Layers/MVKExtensions.def | 1 + MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm | 42 ++-- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 132 +++++++++--- 25 files changed, 606 insertions(+), 245 deletions(-) diff --git a/Docs/MoltenVK_Runtime_UserGuide.md b/Docs/MoltenVK_Runtime_UserGuide.md index 6684bb34a..57773c278 100644 --- a/Docs/MoltenVK_Runtime_UserGuide.md +++ b/Docs/MoltenVK_Runtime_UserGuide.md @@ -350,6 +350,7 @@ In addition to core *Vulkan* functionality, **MoltenVK** also supports the foll - `VK_KHR_surface` - `VK_KHR_swapchain` - `VK_KHR_swapchain_mutable_format` +- `VK_KHR_synchronization2` - `VK_KHR_timeline_semaphore` - `VK_KHR_uniform_buffer_standard_layout` - `VK_KHR_variable_pointers` diff --git a/Docs/Whats_New.md b/Docs/Whats_New.md index 14f6edfc3..d08bca1f0 100644 --- a/Docs/Whats_New.md +++ b/Docs/Whats_New.md @@ -18,6 +18,8 @@ MoltenVK 1.2.6 Released TBD +- Add support for extensions: + - `VK_KHR_synchronization2` - Fix rare case where vertex attribute buffers are not bound to Metal when no other bindings change between pipelines. - Ensure objects retained for life of `MTLCommandBuffer` during `vkCmdBlitImage()` & `vkQueuePresentKHR()`. diff --git a/MoltenVK/MoltenVK/API/mvk_datatypes.h b/MoltenVK/MoltenVK/API/mvk_datatypes.h index b0e2dac7c..8e5670c9f 100644 --- a/MoltenVK/MoltenVK/API/mvk_datatypes.h +++ b/MoltenVK/MoltenVK/API/mvk_datatypes.h @@ -414,13 +414,13 @@ MTLWinding mvkMTLWindingFromSpvExecutionMode(uint32_t spvMode); MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode(uint32_t spvMode); /** - * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPiplineStageFlags, + * Returns the combination of Metal MTLRenderStage bits corresponding to the specified Vulkan VkPipelineStageFlags2, * taking into consideration whether the barrier is to be placed before or after the specified pipeline stages. */ -MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, bool placeBarrierBefore); +MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore); -/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags. */ -MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess); +/** Returns the combination of Metal MTLBarrierScope bits corresponding to the specified Vulkan VkAccessFlags2. */ +MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess); #pragma mark - #pragma mark Geometry conversions diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h index 8e1772566..aec8800c3 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.h @@ -41,6 +41,9 @@ template class MVKCmdPipelineBarrier : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + const VkDependencyInfo* pDependencyInfo); + VkResult setContent(MVKCommandBuffer* cmdBuff, VkPipelineStageFlags srcStageMask, VkPipelineStageFlags dstStageMask, @@ -59,8 +62,6 @@ class MVKCmdPipelineBarrier : public MVKCommand { bool coversTextures(); MVKSmallVector _barriers; - VkPipelineStageFlags _srcStageMask; - VkPipelineStageFlags _dstStageMask; VkDependencyFlags _dependencyFlags; }; @@ -281,34 +282,26 @@ class MVKCmdPushDescriptorSetWithTemplate : public MVKCommand { #pragma mark - -#pragma mark MVKCmdSetResetEvent +#pragma mark MVKCmdSetEvent -/** Abstract Vulkan command to set or reset an event. */ -class MVKCmdSetResetEvent : public MVKCommand { +/** Vulkan command to set an event. */ +class MVKCmdSetEvent : public MVKCommand { public: VkResult setContent(MVKCommandBuffer* cmdBuff, VkEvent event, - VkPipelineStageFlags stageMask); - -protected: - MVKEvent* _mvkEvent; - -}; - - -#pragma mark - -#pragma mark MVKCmdSetEvent + const VkDependencyInfo* pDependencyInfo); -/** Vulkan command to set an event. */ -class MVKCmdSetEvent : public MVKCmdSetResetEvent { + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags stageMask); -public: void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + MVKEvent* _mvkEvent; }; @@ -316,14 +309,19 @@ class MVKCmdSetEvent : public MVKCmdSetResetEvent { #pragma mark MVKCmdResetEvent /** Vulkan command to reset an event. */ -class MVKCmdResetEvent : public MVKCmdSetResetEvent { +class MVKCmdResetEvent : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags2 stageMask); + void encode(MVKCommandEncoder* cmdEncoder) override; protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; + MVKEvent* _mvkEvent; }; @@ -339,6 +337,11 @@ template class MVKCmdWaitEvents : public MVKCommand { public: + VkResult setContent(MVKCommandBuffer* cmdBuff, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos); + VkResult setContent(MVKCommandBuffer* cmdBuff, uint32_t eventCount, const VkEvent* pEvents, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm index 3efcab53c..05e578f6e 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdPipeline.mm @@ -29,6 +29,29 @@ #pragma mark - #pragma mark MVKCmdPipelineBarrier +template +VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, + const VkDependencyInfo* pDependencyInfo) { + _dependencyFlags = pDependencyInfo->dependencyFlags; + + _barriers.clear(); // Clear for reuse + _barriers.reserve(pDependencyInfo->memoryBarrierCount + + pDependencyInfo->bufferMemoryBarrierCount + + pDependencyInfo->imageMemoryBarrierCount); + + for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pMemoryBarriers[i]); + } + for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pBufferMemoryBarriers[i]); + } + for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) { + _barriers.emplace_back(pDependencyInfo->pImageMemoryBarriers[i]); + } + + return VK_SUCCESS; +} + template VkResult MVKCmdPipelineBarrier::setContent(MVKCommandBuffer* cmdBuff, VkPipelineStageFlags srcStageMask, @@ -40,21 +63,19 @@ const VkBufferMemoryBarrier* pBufferMemoryBarriers, uint32_t imageMemoryBarrierCount, const VkImageMemoryBarrier* pImageMemoryBarriers) { - _srcStageMask = srcStageMask; - _dstStageMask = dstStageMask; _dependencyFlags = dependencyFlags; _barriers.clear(); // Clear for reuse _barriers.reserve(memoryBarrierCount + bufferMemoryBarrierCount + imageMemoryBarrierCount); for (uint32_t i = 0; i < memoryBarrierCount; i++) { - _barriers.emplace_back(pMemoryBarriers[i]); + _barriers.emplace_back(pMemoryBarriers[i], srcStageMask, dstStageMask); } for (uint32_t i = 0; i < bufferMemoryBarrierCount; i++) { - _barriers.emplace_back(pBufferMemoryBarriers[i]); + _barriers.emplace_back(pBufferMemoryBarriers[i], srcStageMask, dstStageMask); } for (uint32_t i = 0; i < imageMemoryBarrierCount; i++) { - _barriers.emplace_back(pImageMemoryBarriers[i]); + _barriers.emplace_back(pImageMemoryBarriers[i], srcStageMask, dstStageMask); } return VK_SUCCESS; @@ -67,13 +88,9 @@ // Calls below invoke MTLBlitCommandEncoder so must apply this first. // Check if pipeline barriers are available and we are in a renderpass. if (cmdEncoder->getDevice()->_pMetalFeatures->memoryBarriers && cmdEncoder->_mtlRenderEncoder) { - MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_srcStageMask, false); - MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(_dstStageMask, true); - - id resources[_barriers.size()]; - uint32_t rezCnt = 0; - for (auto& b : _barriers) { + MTLRenderStages srcStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.srcStageMask, false); + MTLRenderStages dstStages = mvkMTLRenderStagesFromVkPipelineStageFlags(b.dstStageMask, true); switch (b.type) { case MVKPipelineBarrier::Memory: { MTLBarrierScope scope = (mvkMTLBarrierScopeFromVkAccessFlags(b.srcAccessMask) | @@ -84,27 +101,30 @@ break; } - case MVKPipelineBarrier::Buffer: - resources[rezCnt++] = b.mvkBuffer->getMTLBuffer(); + case MVKPipelineBarrier::Buffer: { + id mtlRez = b.mvkBuffer->getMTLBuffer(); + [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: &mtlRez + count: 1 + afterStages: srcStages + beforeStages: dstStages]; break; - - case MVKPipelineBarrier::Image: - for (uint8_t planeIndex = 0; planeIndex < b.mvkImage->getPlaneCount(); planeIndex++) { - resources[rezCnt++] = b.mvkImage->getMTLTexture(planeIndex); - } + } + case MVKPipelineBarrier::Image: { + uint32_t plnCnt = b.mvkImage->getPlaneCount(); + id mtlRezs[plnCnt]; + for (uint8_t plnIdx = 0; plnIdx < plnCnt; plnIdx++) { + mtlRezs[plnIdx] = b.mvkImage->getMTLTexture(plnIdx); + } + [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: mtlRezs + count: plnCnt + afterStages: srcStages + beforeStages: dstStages]; break; - + } default: break; } } - - if (rezCnt) { - [cmdEncoder->_mtlRenderEncoder memoryBarrierWithResources: resources - count: rezCnt - afterStages: srcStages - beforeStages: dstStages]; - } } else if (cmdEncoder->getDevice()->_pMetalFeatures->textureBarriers) { #if !MVK_MACCAT if (coversTextures()) { [cmdEncoder->_mtlRenderEncoder textureBarrier]; } @@ -138,15 +158,15 @@ for (auto& b : _barriers) { switch (b.type) { case MVKPipelineBarrier::Memory: - mvkDvc->applyMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + mvkDvc->applyMemoryBarrier(b, cmdEncoder, cmdUse); break; case MVKPipelineBarrier::Buffer: - b.mvkBuffer->applyBufferMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + b.mvkBuffer->applyBufferMemoryBarrier(b, cmdEncoder, cmdUse); break; case MVKPipelineBarrier::Image: - b.mvkImage->applyImageMemoryBarrier(_srcStageMask, _dstStageMask, b, cmdEncoder, cmdUse); + b.mvkImage->applyImageMemoryBarrier(b, cmdEncoder, cmdUse); break; default: @@ -493,19 +513,23 @@ #pragma mark - -#pragma mark MVKCmdSetResetEvent +#pragma mark MVKCmdSetEvent -VkResult MVKCmdSetResetEvent::setContent(MVKCommandBuffer* cmdBuff, - VkEvent event, - VkPipelineStageFlags stageMask) { +VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags stageMask) { _mvkEvent = (MVKEvent*)event; return VK_SUCCESS; } +VkResult MVKCmdSetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + const VkDependencyInfo* pDependencyInfo) { + _mvkEvent = (MVKEvent*)event; -#pragma mark - -#pragma mark MVKCmdSetEvent + return VK_SUCCESS; +} void MVKCmdSetEvent::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->signalEvent(_mvkEvent, true); @@ -515,6 +539,14 @@ #pragma mark - #pragma mark MVKCmdResetEvent +VkResult MVKCmdResetEvent::setContent(MVKCommandBuffer* cmdBuff, + VkEvent event, + VkPipelineStageFlags2 stageMask) { + _mvkEvent = (MVKEvent*)event; + + return VK_SUCCESS; +} + void MVKCmdResetEvent::encode(MVKCommandEncoder* cmdEncoder) { cmdEncoder->signalEvent(_mvkEvent, false); } @@ -523,6 +555,20 @@ #pragma mark - #pragma mark MVKCmdWaitEvents +template +VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos) { + _mvkEvents.clear(); // Clear for reuse + _mvkEvents.reserve(eventCount); + for (uint32_t i = 0; i < eventCount; i++) { + _mvkEvents.push_back((MVKEvent*)pEvents[i]); + } + + return VK_SUCCESS; +} + template VkResult MVKCmdWaitEvents::setContent(MVKCommandBuffer* cmdBuff, uint32_t eventCount, diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h index baa588634..6b3686e80 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.h @@ -85,7 +85,7 @@ class MVKCmdWriteTimestamp : public MVKCmdQuery { public: VkResult setContent(MVKCommandBuffer* cmdBuff, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query); @@ -94,7 +94,7 @@ class MVKCmdWriteTimestamp : public MVKCmdQuery { protected: MVKCommandTypePool* getTypePool(MVKCommandPool* cmdPool) override; - VkPipelineStageFlagBits _pipelineStage; + VkPipelineStageFlags2 _stage; }; diff --git a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm index bc5ba9c6d..aac431fb1 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCmdQueries.mm @@ -77,13 +77,13 @@ #pragma mark MVKCmdWriteTimestamp VkResult MVKCmdWriteTimestamp::setContent(MVKCommandBuffer* cmdBuff, - VkPipelineStageFlagBits pipelineStage, + VkPipelineStageFlags2 stage, VkQueryPool queryPool, uint32_t query) { VkResult rslt = MVKCmdQuery::setContent(cmdBuff, queryPool, query); - _pipelineStage = pipelineStage; + _stage = stage; cmdBuff->recordTimestampCommand(); diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h index de72f06dd..3eeb7d426 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLResourceBindings.h @@ -112,8 +112,10 @@ typedef struct MVKPipelineBarrier { } MVKPipelineBarrierType; MVKPipelineBarrierType type = None; - VkAccessFlags srcAccessMask = 0; - VkAccessFlags dstAccessMask = 0; + VkPipelineStageFlags2 srcStageMask = 0; + VkAccessFlags2 srcAccessMask = 0; + VkPipelineStageFlags2 dstStageMask = 0; + VkAccessFlags2 dstAccessMask = 0; uint8_t srcQueueFamilyIndex = 0; uint8_t dstQueueFamilyIndex = 0; union { MVKBuffer* mvkBuffer = nullptr; MVKImage* mvkImage; MVKResource* mvkResource; }; @@ -136,15 +138,44 @@ typedef struct MVKPipelineBarrier { bool isBufferBarrier() { return type == Buffer; } bool isImageBarrier() { return type == Image; } - MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier) : + MVKPipelineBarrier(const VkMemoryBarrier2& vkBarrier) : type(Memory), + srcStageMask(vkBarrier.srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), dstAccessMask(vkBarrier.dstAccessMask) {} - MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier) : + MVKPipelineBarrier(const VkMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : + type(Memory), + srcStageMask(srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask) + {} + + MVKPipelineBarrier(const VkBufferMemoryBarrier2& vkBarrier) : + type(Buffer), + srcStageMask(vkBarrier.srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask), + srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), + dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), + mvkBuffer((MVKBuffer*)vkBarrier.buffer), + offset(vkBarrier.offset), + size(vkBarrier.size) + {} + + MVKPipelineBarrier(const VkBufferMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : type(Buffer), + srcStageMask(srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), dstAccessMask(vkBarrier.dstAccessMask), srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), @@ -153,9 +184,30 @@ typedef struct MVKPipelineBarrier { size(vkBarrier.size) {} - MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier) : + MVKPipelineBarrier(const VkImageMemoryBarrier2& vkBarrier) : + type(Image), + srcStageMask(vkBarrier.srcStageMask), + srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(vkBarrier.dstStageMask), + dstAccessMask(vkBarrier.dstAccessMask), + srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), + dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), + mvkImage((MVKImage*)vkBarrier.image), + newLayout(vkBarrier.newLayout), + aspectMask(vkBarrier.subresourceRange.aspectMask), + baseArrayLayer(vkBarrier.subresourceRange.baseArrayLayer), + layerCount(vkBarrier.subresourceRange.layerCount), + baseMipLevel(vkBarrier.subresourceRange.baseMipLevel), + levelCount(vkBarrier.subresourceRange.levelCount) + {} + + MVKPipelineBarrier(const VkImageMemoryBarrier& vkBarrier, + VkPipelineStageFlags srcStageMask, + VkPipelineStageFlags dstStageMask) : type(Image), + srcStageMask(srcStageMask), srcAccessMask(vkBarrier.srcAccessMask), + dstStageMask(dstStageMask), dstAccessMask(vkBarrier.dstAccessMask), srcQueueFamilyIndex(vkBarrier.srcQueueFamilyIndex), dstQueueFamilyIndex(vkBarrier.dstQueueFamilyIndex), diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h index 2e338ce7a..95fdf681b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.h @@ -52,16 +52,12 @@ class MVKBuffer : public MVKResource { VkResult bindDeviceMemory2(const VkBindBufferMemoryInfo* pBindInfo); /** Applies the specified global memory barrier. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) override; /** Applies the specified buffer memory barrier. */ - void applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyBufferMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); @@ -95,9 +91,7 @@ class MVKBuffer : public MVKResource { friend class MVKDeviceMemory; void propagateDebugName() override; - bool needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier); + bool needsHostReadSync(MVKPipelineBarrier& barrier); bool overlaps(VkDeviceSize offset, VkDeviceSize size, VkDeviceSize &overlapOffset, VkDeviceSize &overlapSize); bool shouldFlushHostMemory(); VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm index a99f4f0fc..41ee4cef1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKBuffer.mm @@ -94,25 +94,21 @@ return bindDeviceMemory((MVKDeviceMemory*)pBindInfo->memory, pBindInfo->memoryOffset); } -void MVKBuffer::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKBuffer::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if ( needsHostReadSync(barrier) ) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()]; } #endif } -void MVKBuffer::applyBufferMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKBuffer::applyBufferMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if ( needsHostReadSync(barrier) ) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: getMTLBuffer()]; } #endif @@ -120,11 +116,9 @@ // Returns whether the specified buffer memory barrier requires a sync between this // buffer and host memory for the purpose of the host reading texture memory. -bool MVKBuffer::needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier) { +bool MVKBuffer::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS - return (mvkIsAnyFlagEnabled(dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) && + return (mvkIsAnyFlagEnabled(barrier.dstStageMask, (VK_PIPELINE_STAGE_HOST_BIT)) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT)) && isMemoryHostAccessible() && (!isMemoryHostCoherent() || _isHostCoherentTexelBuffer)); #endif diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index 7a04e90e0..b6b462ad7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -681,9 +681,7 @@ class MVKDevice : public MVKDispatchableVulkanAPIObject { void removeTimelineSemaphore(MVKTimelineSemaphore* sem4, uint64_t value); /** Applies the specified global memory barrier to all resource issued by this device. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index 422f1b43e..acd50514e 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -320,6 +320,11 @@ subgroupSizeFeatures->computeFullSubgroups = _metalFeatures.simdPermute || _metalFeatures.quadPermute; break; } + case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES: { + auto* synch2Features = (VkPhysicalDeviceSynchronization2Features*)next; + synch2Features->synchronization2 = true; + break; + } case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXTURE_COMPRESSION_ASTC_HDR_FEATURES: { auto* astcHDRFeatures = (VkPhysicalDeviceTextureCompressionASTCHDRFeatures*)next; astcHDRFeatures->textureCompressionASTC_HDR = _metalFeatures.astcHDRTextures; @@ -4172,16 +4177,14 @@ static uint32_t mvkGetEntryProperty(io_registry_entry_t entry, CFStringRef prope mvkRemoveFirstOccurance(_awaitingTimelineSem4s, make_pair(sem4, value)); } -void MVKDevice::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKDevice::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { - if (!mvkIsAnyFlagEnabled(dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) || + if (!mvkIsAnyFlagEnabled(barrier.dstStageMask, VK_PIPELINE_STAGE_HOST_BIT) || !mvkIsAnyFlagEnabled(barrier.dstAccessMask, VK_ACCESS_HOST_READ_BIT) ) { return; } lock_guard lock(_rezLock); for (auto& rez : _resources) { - rez->applyMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse); + rez->applyMemoryBarrier(barrier, cmdEncoder, cmdUse); } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def index c0bbb4816..88a3a33f1 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDeviceFeatureStructs.def @@ -55,6 +55,7 @@ MVK_DEVICE_FEATURE(ShaderAtomicInt64, SHADER_ATOMIC_INT64, MVK_DEVICE_FEATURE(ShaderFloat16Int8, SHADER_FLOAT16_INT8, 2) MVK_DEVICE_FEATURE(ShaderSubgroupExtendedTypes, SHADER_SUBGROUP_EXTENDED_TYPES, 1) MVK_DEVICE_FEATURE(SubgroupSizeControl, SUBGROUP_SIZE_CONTROL, 2) +MVK_DEVICE_FEATURE(Synchronization2, SYNCHRONIZATION_2, 1) MVK_DEVICE_FEATURE(TextureCompressionASTCHDR, TEXTURE_COMPRESSION_ASTC_HDR, 1) MVK_DEVICE_FEATURE(TimelineSemaphore, TIMELINE_SEMAPHORE, 1) MVK_DEVICE_FEATURE(UniformBufferStandardLayout, UNIFORM_BUFFER_STANDARD_LAYOUT, 1) diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h index ef606b035..900b10ffa 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.h @@ -74,9 +74,7 @@ class MVKImagePlane : public MVKBaseObject { bool overlaps(VkSubresourceLayout& imgLayout, VkDeviceSize offset, VkDeviceSize size); void propagateDebugName(); MVKImageMemoryBinding* getMemoryBinding() const; - void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); void pullFromDeviceOnCompletion(MVKCommandEncoder* cmdEncoder, @@ -119,9 +117,7 @@ class MVKImageMemoryBinding : public MVKResource { VkResult bindDeviceMemory(MVKDeviceMemory* mvkMem, VkDeviceSize memOffset) override; /** Applies the specified global memory barrier. */ - void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) override; @@ -133,9 +129,7 @@ class MVKImageMemoryBinding : public MVKResource { friend MVKImage; void propagateDebugName() override; - bool needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier); + bool needsHostReadSync(MVKPipelineBarrier& barrier); bool shouldFlushHostMemory(); VkResult flushToDevice(VkDeviceSize offset, VkDeviceSize size); VkResult pullFromDevice(VkDeviceSize offset, VkDeviceSize size); @@ -251,9 +245,7 @@ class MVKImage : public MVKVulkanAPIDeviceObject { virtual VkResult bindDeviceMemory2(const VkBindImageMemoryInfo* pBindInfo); /** Applies the specified image memory barrier. */ - void applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + void applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm index f09495c74..c605f45a9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKImage.mm @@ -304,9 +304,7 @@ return (_image->_memoryBindings.size() > 1) ? _image->_memoryBindings[_planeIndex] : _image->_memoryBindings[0]; } -void MVKImagePlane::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImagePlane::applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { @@ -323,7 +321,7 @@ : (layerStart + barrier.layerCount)); MVKImageMemoryBinding* memBind = getMemoryBinding(); - bool needsSync = memBind->needsHostReadSync(srcStageMask, dstStageMask, barrier); + bool needsSync = memBind->needsHostReadSync(barrier); bool needsPull = ((!memBind->_mtlTexelBuffer || memBind->_ownsTexelBuffer) && memBind->isMemoryHostCoherent() && barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL && @@ -444,13 +442,11 @@ return _deviceMemory ? _deviceMemory->addImageMemoryBinding(this) : VK_SUCCESS; } -void MVKImageMemoryBinding::applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImageMemoryBinding::applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { #if MVK_MACOS - if ( needsHostReadSync(srcStageMask, dstStageMask, barrier) ) { + if (needsHostReadSync(barrier)) { for(uint8_t planeIndex = beginPlaneIndex(); planeIndex < endPlaneIndex(); planeIndex++) { [cmdEncoder->getMTLBlitEncoder(cmdUse) synchronizeResource: _image->_planes[planeIndex]->_mtlTexture]; } @@ -469,9 +465,7 @@ // Returns whether the specified image memory barrier requires a sync between this // texture and host memory for the purpose of the host reading texture memory. -bool MVKImageMemoryBinding::needsHostReadSync(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier) { +bool MVKImageMemoryBinding::needsHostReadSync(MVKPipelineBarrier& barrier) { #if MVK_MACOS return ((barrier.newLayout == VK_IMAGE_LAYOUT_GENERAL) && mvkIsAnyFlagEnabled(barrier.dstAccessMask, (VK_ACCESS_HOST_READ_BIT | VK_ACCESS_MEMORY_READ_BIT)) && @@ -625,15 +619,13 @@ #pragma mark Resource memory -void MVKImage::applyImageMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, +void MVKImage::applyImageMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) { for (uint8_t planeIndex = 0; planeIndex < _planes.size(); planeIndex++) { if ( !_hasChromaSubsampling || mvkIsAnyFlagEnabled(barrier.aspectMask, (VK_IMAGE_ASPECT_PLANE_0_BIT << planeIndex)) ) { - _planes[planeIndex]->applyImageMemoryBarrier(srcStageMask, dstStageMask, barrier, cmdEncoder, cmdUse); + _planes[planeIndex]->applyImageMemoryBarrier(barrier, cmdEncoder, cmdUse); } } } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index c3b1d242f..b4509f0b7 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -92,7 +92,8 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking #pragma mark Queue submissions /** Submits the specified command buffers to the queue. */ - VkResult submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse); + template + VkResult submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse); /** Submits the specified presentation command to the queue. */ VkResult submit(const VkPresentInfoKHR* pPresentInfo); @@ -164,6 +165,24 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking #pragma mark - #pragma mark MVKQueueSubmission +typedef struct MVKSemaphoreSubmitInfo { +private: + MVKSemaphore* _semaphore; +public: + uint64_t value; + VkPipelineStageFlags2 stageMask; + uint32_t deviceIndex; + + void encodeWait(id mtlCmdBuff); + void encodeSignal(id mtlCmdBuff); + MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo); + MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, VkPipelineStageFlags stageMask); + MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other); + MVKSemaphoreSubmitInfo& operator=(const MVKSemaphoreSubmitInfo& other); + ~MVKSemaphoreSubmitInfo(); + +} MVKSemaphoreSubmitInfo; + /** This is an abstract class for an operation that can be submitted to an MVKQueue. */ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { @@ -179,9 +198,14 @@ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { */ virtual VkResult execute() = 0; + MVKQueueSubmission(MVKQueue* queue, + uint32_t waitSemaphoreInfoCount, + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos); + MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores); + const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags* pWaitDstStageMask); ~MVKQueueSubmission() override; @@ -192,13 +216,22 @@ class MVKQueueSubmission : public MVKBaseObject, public MVKConfigurableMixin { MVKDevice* getDevice() { return _queue->getDevice(); } MVKQueue* _queue; - MVKSmallVector> _waitSemaphores; + MVKSmallVector _waitSemaphores; }; #pragma mark - #pragma mark MVKQueueCommandBufferSubmission +typedef struct MVKCommandBufferSubmitInfo { + MVKCommandBuffer* commandBuffer; + uint32_t deviceMask; + + MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo); + MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer); + +} MVKCommandBufferSubmitInfo; + /** * Submits an empty set of command buffers to the queue. * Used for fence-only command submissions. @@ -208,7 +241,15 @@ class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { public: VkResult execute() override; - MVKQueueCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); + MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); + + MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); ~MVKQueueCommandBufferSubmission() override; @@ -222,11 +263,11 @@ class MVKQueueCommandBufferSubmission : public MVKQueueSubmission { virtual void submitCommandBuffers() {} MVKCommandEncodingContext _encodingContext; - MVKSmallVector> _signalSemaphores; - MVKFence* _fence; - id _activeMTLCommandBuffer; - MVKCommandUse _commandUse; - bool _emulatedWaitDone; //Used to track if we've already waited for emulated semaphores. + MVKSmallVector _signalSemaphores; + MVKFence* _fence = nullptr; + id _activeMTLCommandBuffer = nil; + MVKCommandUse _commandUse = kMVKCommandUseNone; + bool _emulatedWaitDone = false; //Used to track if we've already waited for emulated semaphores. }; @@ -238,7 +279,12 @@ template class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmission { public: - MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse); + + MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, VkFence fence, MVKCommandUse cmdUse); @@ -246,7 +292,7 @@ class MVKQueueFullCommandBufferSubmission : public MVKQueueCommandBufferSubmissi protected: void submitCommandBuffers() override; - MVKSmallVector _cmdBuffers; + MVKSmallVector _cmdBuffers; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm index 401fa8b22..9b4afdf83 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.mm @@ -92,20 +92,24 @@ return rslt; } -VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse) { +static inline uint32_t getCommandBufferCount(const VkSubmitInfo2* pSubmitInfo) { return pSubmitInfo->commandBufferInfoCount; } +static inline uint32_t getCommandBufferCount(const VkSubmitInfo* pSubmitInfo) { return pSubmitInfo->commandBufferCount; } + +template +VkResult MVKQueue::submit(uint32_t submitCount, const S* pSubmits, VkFence fence, MVKCommandUse cmdUse) { // Fence-only submission if (submitCount == 0 && fence) { - return submit(new MVKQueueCommandBufferSubmission(this, nullptr, fence, cmdUse)); + return submit(new MVKQueueCommandBufferSubmission(this, (S*)nullptr, fence, cmdUse)); } VkResult rslt = VK_SUCCESS; for (uint32_t sIdx = 0; sIdx < submitCount; sIdx++) { VkFence fenceOrNil = (sIdx == (submitCount - 1)) ? fence : VK_NULL_HANDLE; // last one gets the fence - const VkSubmitInfo* pVkSub = &pSubmits[sIdx]; + const S* pVkSub = &pSubmits[sIdx]; MVKQueueCommandBufferSubmission* mvkSub; - uint32_t cbCnt = pVkSub->commandBufferCount; + uint32_t cbCnt = getCommandBufferCount(pVkSub); if (cbCnt <= 1) { mvkSub = new MVKQueueFullCommandBufferSubmission<1>(this, pVkSub, fenceOrNil, cmdUse); } else if (cbCnt <= 16) { @@ -128,6 +132,10 @@ return rslt; } +// Concrete implementations of templated MVKQueue::submit(). +template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo2* pSubmits, VkFence fence, MVKCommandUse cmdUse); +template VkResult MVKQueue::submit(uint32_t submitCount, const VkSubmitInfo* pSubmits, VkFence fence, MVKCommandUse cmdUse); + VkResult MVKQueue::submit(const VkPresentInfoKHR* pPresentInfo) { return submit(new MVKQueuePresentSurfaceSubmission(this, pPresentInfo)); } @@ -344,23 +352,89 @@ #pragma mark - #pragma mark MVKQueueSubmission +void MVKSemaphoreSubmitInfo::encodeWait(id mtlCmdBuff) { + if (_semaphore) { _semaphore->encodeWait(mtlCmdBuff, value); } +} + +void MVKSemaphoreSubmitInfo::encodeSignal(id mtlCmdBuff) { + if (_semaphore) { _semaphore->encodeSignal(mtlCmdBuff, value); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphoreSubmitInfo& semaphoreSubmitInfo) : + _semaphore((MVKSemaphore*)semaphoreSubmitInfo.semaphore), + value(semaphoreSubmitInfo.value), + stageMask(semaphoreSubmitInfo.stageMask), + deviceIndex(semaphoreSubmitInfo.deviceIndex) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const VkSemaphore semaphore, + VkPipelineStageFlags stageMask) : + _semaphore((MVKSemaphore*)semaphore), + value(0), + stageMask(stageMask), + deviceIndex(0) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo::MVKSemaphoreSubmitInfo(const MVKSemaphoreSubmitInfo& other) : + _semaphore(other._semaphore), + value(other.value), + stageMask(other.stageMask), + deviceIndex(other.deviceIndex) { + if (_semaphore) { _semaphore->retain(); } +} + +MVKSemaphoreSubmitInfo& MVKSemaphoreSubmitInfo::operator=(const MVKSemaphoreSubmitInfo& other) { + // Retain new object first in case it's the same object + if (other._semaphore) {other._semaphore->retain(); } + if (_semaphore) { _semaphore->release(); } + _semaphore = other._semaphore; + + value = other.value; + stageMask = other.stageMask; + deviceIndex = other.deviceIndex; + return *this; +} + +MVKSemaphoreSubmitInfo::~MVKSemaphoreSubmitInfo() { + if (_semaphore) { _semaphore->release(); } +} + +MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(const VkCommandBufferSubmitInfo& commandBufferInfo) : + commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBufferInfo.commandBuffer)), + deviceMask(commandBufferInfo.deviceMask) {} + +MVKCommandBufferSubmitInfo::MVKCommandBufferSubmitInfo(VkCommandBuffer commandBuffer) : + commandBuffer(MVKCommandBuffer::getMVKCommandBuffer(commandBuffer)), + deviceMask(0) {} + +MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, + uint32_t waitSemaphoreInfoCount, + const VkSemaphoreSubmitInfo* pWaitSemaphoreSubmitInfos) { + _queue = queue; + _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). + + _waitSemaphores.reserve(waitSemaphoreInfoCount); + for (uint32_t i = 0; i < waitSemaphoreInfoCount; i++) { + _waitSemaphores.emplace_back(pWaitSemaphoreSubmitInfos[i]); + } +} + MVKQueueSubmission::MVKQueueSubmission(MVKQueue* queue, uint32_t waitSemaphoreCount, - const VkSemaphore* pWaitSemaphores) { + const VkSemaphore* pWaitSemaphores, + const VkPipelineStageFlags* pWaitDstStageMask) { _queue = queue; _queue->retain(); // Retain here and release in destructor. See note for MVKQueueCommandBufferSubmission::finish(). _waitSemaphores.reserve(waitSemaphoreCount); for (uint32_t i = 0; i < waitSemaphoreCount; i++) { - auto* sem4 = (MVKSemaphore*)pWaitSemaphores[i]; - sem4->retain(); - uint64_t sem4Val = 0; - _waitSemaphores.emplace_back(sem4, sem4Val); + _waitSemaphores.emplace_back(pWaitSemaphores[i], pWaitDstStageMask ? pWaitDstStageMask[i] : 0); } } MVKQueueSubmission::~MVKQueueSubmission() { - for (auto s : _waitSemaphores) { s.first->release(); } _queue->release(); } @@ -373,13 +447,13 @@ _queue->_submissionCaptureScope->beginScope(); // If using encoded semaphore waiting, do so now. - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(getActiveMTLCommandBuffer(), ws.second); } + for (auto& ws : _waitSemaphores) { ws.encodeWait(getActiveMTLCommandBuffer()); } // Submit each command buffer. submitCommandBuffers(); // If using encoded semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(getActiveMTLCommandBuffer(), ss.second); } + for (auto& ss : _signalSemaphores) { ss.encodeSignal(getActiveMTLCommandBuffer()); } // Commit the last MTLCommandBuffer. // Nothing after this because callback might destroy this instance before this function ends. @@ -417,7 +491,7 @@ // should be more performant when prefilled command buffers aren't used, because we spend time encoding commands // first, thus giving the command buffer signalling these semaphores more time to complete. if ( !_emulatedWaitDone ) { - for (auto& ws : _waitSemaphores) { ws.first->encodeWait(nil, ws.second); } + for (auto& ws : _waitSemaphores) { ws.encodeWait(nil); } _emulatedWaitDone = true; } @@ -466,7 +540,7 @@ _queue->_submissionCaptureScope->endScope(); // If using inline semaphore signaling, do so now. - for (auto& ss : _signalSemaphores) { ss.first->encodeSignal(nil, ss.second); } + for (auto& ss : _signalSemaphores) { ss.encodeSignal(nil); } // If a fence exists, signal it. if (_fence) { _fence->signal(); } @@ -474,6 +548,31 @@ this->destroy(); } +// On device loss, the fence and signal semaphores may be signalled early, and they might then +// be destroyed on the waiting thread before this submission is done with them. We therefore +// retain() each here to ensure they live long enough for this submission to finish using them. +MVKQueueCommandBufferSubmission::MVKQueueCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) : + MVKQueueSubmission(queue, + pSubmit ? pSubmit->waitSemaphoreInfoCount : 0, + pSubmit ? pSubmit->pWaitSemaphoreInfos : nullptr), + _fence((MVKFence*)fence), + _commandUse(cmdUse) { + + if (_fence) { _fence->retain(); } + + // pSubmit can be null if just tracking the fence alone + if (pSubmit) { + uint32_t ssCnt = pSubmit->signalSemaphoreInfoCount; + _signalSemaphores.reserve(ssCnt); + for (uint32_t i = 0; i < ssCnt; i++) { + _signalSemaphores.emplace_back(pSubmit->pSignalSemaphoreInfos[i]); + } + } +} + // On device loss, the fence and signal semaphores may be signalled early, and they might then // be destroyed on the waiting thread before this submission is done with them. We therefore // retain() each here to ensure they live long enough for this submission to finish using them. @@ -482,15 +581,24 @@ VkFence fence, MVKCommandUse cmdUse) : MVKQueueSubmission(queue, - (pSubmit ? pSubmit->waitSemaphoreCount : 0), - (pSubmit ? pSubmit->pWaitSemaphores : nullptr)), + pSubmit ? pSubmit->waitSemaphoreCount : 0, + pSubmit ? pSubmit->pWaitSemaphores : nullptr, + pSubmit ? pSubmit->pWaitDstStageMask : nullptr), - _commandUse(cmdUse), - _emulatedWaitDone(false) { + _fence((MVKFence*)fence), + _commandUse(cmdUse) { + + if (_fence) { _fence->retain(); } // pSubmit can be null if just tracking the fence alone if (pSubmit) { - VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; + uint32_t ssCnt = pSubmit->signalSemaphoreCount; + _signalSemaphores.reserve(ssCnt); + for (uint32_t i = 0; i < ssCnt; i++) { + _signalSemaphores.emplace_back(pSubmit->pSignalSemaphores[i], 0); + } + + VkTimelineSemaphoreSubmitInfo* pTimelineSubmit = nullptr; for (const auto* next = (const VkBaseInStructure*)pSubmit->pNext; next; next = next->pNext) { switch (next->sType) { case VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO: @@ -501,31 +609,21 @@ } } if (pTimelineSubmit) { - // Presentation doesn't support timeline semaphores, so handle wait values here. - uint32_t wsCnt = pTimelineSubmit->waitSemaphoreValueCount; - for (uint32_t i = 0; i < wsCnt; i++) { - _waitSemaphores[i].second = pTimelineSubmit->pWaitSemaphoreValues[i]; + uint32_t wsvCnt = pTimelineSubmit->waitSemaphoreValueCount; + for (uint32_t i = 0; i < wsvCnt; i++) { + _waitSemaphores[i].value = pTimelineSubmit->pWaitSemaphoreValues[i]; } + + uint32_t ssvCnt = pTimelineSubmit->signalSemaphoreValueCount; + for (uint32_t i = 0; i < ssvCnt; i++) { + _signalSemaphores[i].value = pTimelineSubmit->pSignalSemaphoreValues[i]; + } } - uint32_t ssCnt = pSubmit->signalSemaphoreCount; - _signalSemaphores.reserve(ssCnt); - for (uint32_t i = 0; i < ssCnt; i++) { - auto* sem4 = (MVKSemaphore*)pSubmit->pSignalSemaphores[i]; - sem4->retain(); - uint64_t sem4Val = pTimelineSubmit ? pTimelineSubmit->pSignalSemaphoreValues[i] : 0; - _signalSemaphores.emplace_back(sem4, sem4Val); - } } - - _fence = (MVKFence*)fence; - if (_fence) { _fence->retain(); } - - _activeMTLCommandBuffer = nil; } MVKQueueCommandBufferSubmission::~MVKQueueCommandBufferSubmission() { if (_fence) { _fence->release(); } - for (auto s : _signalSemaphores) { s.first->release(); } } @@ -534,11 +632,28 @@ MVKDevice* mvkDev = getDevice(); uint64_t startTime = mvkDev->getPerformanceTimestamp(); - for (auto& cb : _cmdBuffers) { cb->submit(this, &_encodingContext); } + for (auto& cbInfo : _cmdBuffers) { cbInfo.commandBuffer->submit(this, &_encodingContext); } mvkDev->addPerformanceInterval(mvkDev->_performanceStatistics.queue.submitCommandBuffers, startTime); } +template +MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, + const VkSubmitInfo2* pSubmit, + VkFence fence, + MVKCommandUse cmdUse) + : MVKQueueCommandBufferSubmission(queue, pSubmit, fence, cmdUse) { + + if (pSubmit) { + uint32_t cbCnt = pSubmit->commandBufferInfoCount; + _cmdBuffers.reserve(cbCnt); + for (uint32_t i = 0; i < cbCnt; i++) { + _cmdBuffers.emplace_back(pSubmit->pCommandBufferInfos[i]); + setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult()); + } + } +} + template MVKQueueFullCommandBufferSubmission::MVKQueueFullCommandBufferSubmission(MVKQueue* queue, const VkSubmitInfo* pSubmit, @@ -550,9 +665,8 @@ uint32_t cbCnt = pSubmit->commandBufferCount; _cmdBuffers.reserve(cbCnt); for (uint32_t i = 0; i < cbCnt; i++) { - MVKCommandBuffer* cb = MVKCommandBuffer::getMVKCommandBuffer(pSubmit->pCommandBuffers[i]); - _cmdBuffers.push_back(cb); - setConfigurationResult(cb->getConfigurationResult()); + _cmdBuffers.emplace_back(pSubmit->pCommandBuffers[i]); + setConfigurationResult(_cmdBuffers.back().commandBuffer->getConfigurationResult()); } } } @@ -571,9 +685,8 @@ id mtlCmdBuff = _queue->getMTLCommandBuffer(kMVKCommandUseQueuePresent, true); for (auto& ws : _waitSemaphores) { - auto& sem4 = ws.first; - sem4->encodeWait(mtlCmdBuff, 0); // Encoded semaphore waits - sem4->encodeWait(nil, 0); // Inline semaphore waits + ws.encodeWait(mtlCmdBuff); // Encoded semaphore waits + ws.encodeWait(nil); // Inline semaphore waits } for (int i = 0; i < _presentInfo.size(); i++ ) { @@ -612,7 +725,7 @@ MVKQueuePresentSurfaceSubmission::MVKQueuePresentSurfaceSubmission(MVKQueue* queue, const VkPresentInfoKHR* pPresentInfo) - : MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores) { + : MVKQueueSubmission(queue, pPresentInfo->waitSemaphoreCount, pPresentInfo->pWaitSemaphores, nullptr) { const VkPresentTimesInfoGOOGLE* pPresentTimesInfo = nullptr; const VkSwapchainPresentFenceInfoEXT* pPresentFenceInfo = nullptr; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index 534ec018f..6cbe2e4e2 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -265,6 +265,22 @@ class MVKAttachmentDescription : public MVKBaseObject { #pragma mark - #pragma mark MVKRenderPass +/** Collects together VkSubpassDependency and VkMemoryBarrier2. */ +typedef struct MVKSubpassDependency { + uint32_t srcSubpass; + uint32_t dstSubpass; + VkPipelineStageFlags2 srcStageMask; + VkPipelineStageFlags2 dstStageMask; + VkAccessFlags2 srcAccessMask; + VkAccessFlags2 dstAccessMask; + VkDependencyFlags dependencyFlags; + int32_t viewOffset; + + MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset); + MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar); + +} MVKSubpassDependency; + /** Represents a Vulkan render pass. */ class MVKRenderPass : public MVKVulkanAPIDeviceObject { @@ -308,7 +324,7 @@ class MVKRenderPass : public MVKVulkanAPIDeviceObject { MVKSmallVector _attachments; MVKSmallVector _subpasses; - MVKSmallVector _subpassDependencies; + MVKSmallVector _subpassDependencies; VkRenderingFlags _renderingFlags = 0; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 762d72d9b..3bf8a1887 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -904,6 +904,26 @@ #pragma mark - #pragma mark MVKRenderPass +MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency& spDep, int32_t viewOffset) : + srcSubpass(spDep.srcSubpass), + dstSubpass(spDep.dstSubpass), + srcStageMask(spDep.srcStageMask), + dstStageMask(spDep.dstStageMask), + srcAccessMask(spDep.srcAccessMask), + dstAccessMask(spDep.dstAccessMask), + dependencyFlags(spDep.dependencyFlags), + viewOffset(viewOffset) {} + +MVKSubpassDependency::MVKSubpassDependency(const VkSubpassDependency2& spDep, const VkMemoryBarrier2* pMemBar) : + srcSubpass(spDep.srcSubpass), + dstSubpass(spDep.dstSubpass), + srcStageMask(pMemBar ? pMemBar->srcStageMask : spDep.srcStageMask), + dstStageMask(pMemBar ? pMemBar->dstStageMask : spDep.dstStageMask), + srcAccessMask(pMemBar ? pMemBar->srcAccessMask : spDep.srcAccessMask), + dstAccessMask(pMemBar ? pMemBar->dstAccessMask : spDep.dstAccessMask), + dependencyFlags(spDep.dependencyFlags), + viewOffset(spDep.viewOffset) {} + VkExtent2D MVKRenderPass::getRenderAreaGranularity() { if (_device->_pMetalFeatures->tileBasedDeferredRendering) { // This is the tile area. @@ -954,19 +974,7 @@ } _subpassDependencies.reserve(pCreateInfo->dependencyCount); for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { - VkSubpassDependency2 dependency = { - .sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2, - .pNext = nullptr, - .srcSubpass = pCreateInfo->pDependencies[i].srcSubpass, - .dstSubpass = pCreateInfo->pDependencies[i].dstSubpass, - .srcStageMask = pCreateInfo->pDependencies[i].srcStageMask, - .dstStageMask = pCreateInfo->pDependencies[i].dstStageMask, - .srcAccessMask = pCreateInfo->pDependencies[i].srcAccessMask, - .dstAccessMask = pCreateInfo->pDependencies[i].dstAccessMask, - .dependencyFlags = pCreateInfo->pDependencies[i].dependencyFlags, - .viewOffset = viewOffsets ? viewOffsets[i] : 0, - }; - _subpassDependencies.push_back(dependency); + _subpassDependencies.emplace_back(pCreateInfo->pDependencies[i], viewOffsets ? viewOffsets[i] : 0); } // Link attachments to subpasses @@ -991,7 +999,19 @@ } _subpassDependencies.reserve(pCreateInfo->dependencyCount); for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) { - _subpassDependencies.push_back(pCreateInfo->pDependencies[i]); + auto& spDep = pCreateInfo->pDependencies[i]; + + const VkMemoryBarrier2* pMemoryBarrier2 = nullptr; + for (auto* next = (const VkBaseInStructure*)spDep.pNext; next; next = next->pNext) { + switch (next->sType) { + case VK_STRUCTURE_TYPE_MEMORY_BARRIER_2: + pMemoryBarrier2 = (const VkMemoryBarrier2*)next; + break; + default: + break; + } + } + _subpassDependencies.emplace_back(spDep, pMemoryBarrier2); } // Link attachments to subpasses diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h index a1c3da6b2..5b9c47fdd 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKResource.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKResource.h @@ -60,9 +60,7 @@ class MVKResource : public MVKVulkanAPIDeviceObject { } /** Applies the specified global memory barrier. */ - virtual void applyMemoryBarrier(VkPipelineStageFlags srcStageMask, - VkPipelineStageFlags dstStageMask, - MVKPipelineBarrier& barrier, + virtual void applyMemoryBarrier(MVKPipelineBarrier& barrier, MVKCommandEncoder* cmdEncoder, MVKCommandUse cmdUse) = 0; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h index 87418edd1..be4f25454 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.h @@ -43,7 +43,7 @@ using namespace mvk; typedef struct MVKMTLFunction { SPIRVToMSLConversionResultInfo shaderConversionResults; MTLSize threadGroupSize; - inline id getMTLFunction() { return _mtlFunction; } + id getMTLFunction() { return _mtlFunction; } MVKMTLFunction(id mtlFunc, const SPIRVToMSLConversionResultInfo scRslts, MTLSize tgSize); MVKMTLFunction(const MVKMTLFunction& other); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm index 908314989..8619a0da9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKShaderModule.mm @@ -36,10 +36,11 @@ } MVKMTLFunction& MVKMTLFunction::operator=(const MVKMTLFunction& other) { - if (_mtlFunction != other._mtlFunction) { - [_mtlFunction release]; - _mtlFunction = [other._mtlFunction retain]; // retained - } + // Retain new object first in case it's the same object + [other._mtlFunction retain]; + [_mtlFunction release]; + _mtlFunction = other._mtlFunction; + shaderConversionResults = other.shaderConversionResults; threadGroupSize = other.threadGroupSize; return *this; diff --git a/MoltenVK/MoltenVK/Layers/MVKExtensions.def b/MoltenVK/MoltenVK/Layers/MVKExtensions.def index 74a006290..f6ad3447c 100644 --- a/MoltenVK/MoltenVK/Layers/MVKExtensions.def +++ b/MoltenVK/MoltenVK/Layers/MVKExtensions.def @@ -91,6 +91,7 @@ MVK_EXTENSION(KHR_storage_buffer_storage_class, KHR_STORAGE_BUFFER_STORAGE MVK_EXTENSION(KHR_surface, KHR_SURFACE, INSTANCE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_swapchain, KHR_SWAPCHAIN, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_swapchain_mutable_format, KHR_SWAPCHAIN_MUTABLE_FORMAT, DEVICE, 10.11, 8.0, 1.0) +MVK_EXTENSION(KHR_synchronization2, KHR_SYNCHRONIZATION_2, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_timeline_semaphore, KHR_TIMELINE_SEMAPHORE, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_uniform_buffer_standard_layout, KHR_UNIFORM_BUFFER_STANDARD_LAYOUT, DEVICE, 10.11, 8.0, 1.0) MVK_EXTENSION(KHR_variable_pointers, KHR_VARIABLE_POINTERS, DEVICE, 10.11, 8.0, 1.0) diff --git a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm index caa776237..90cb72e0c 100644 --- a/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm +++ b/MoltenVK/MoltenVK/Vulkan/mvk_datatypes.mm @@ -728,40 +728,50 @@ MTLTessellationPartitionMode mvkMTLTessellationPartitionModeFromSpvExecutionMode } } -MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags vkStages, +MVK_PUBLIC_SYMBOL MTLRenderStages mvkMTLRenderStagesFromVkPipelineStageFlags(VkPipelineStageFlags2 vkStages, bool placeBarrierBefore) { // Although there are many combined render/compute/host stages in Vulkan, there are only two render // stages in Metal. If the Vulkan stage did not map ONLY to a specific Metal render stage, then if the // barrier is to be placed before the render stages, it should come before the vertex stage, otherwise // if the barrier is to be placed after the render stages, it should come after the fragment stage. if (placeBarrierBefore) { - bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | - VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | - VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | - VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)); + bool placeBeforeFragment = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | + VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)); return placeBeforeFragment ? MTLRenderStageFragment : MTLRenderStageVertex; } else { - bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT | - VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | - VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | - VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | - VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT)); + bool placeAfterVertex = mvkIsOnlyAnyFlagEnabled(vkStages, (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT | + VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | + VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | + VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT)); return placeAfterVertex ? MTLRenderStageVertex : MTLRenderStageFragment; } } -MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags vkAccess) { +MVK_PUBLIC_SYMBOL MTLBarrierScope mvkMTLBarrierScopeFromVkAccessFlags(VkAccessFlags2 vkAccess) { MTLBarrierScope mtlScope = MTLBarrierScope(0); - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INDIRECT_COMMAND_READ_BIT | VK_ACCESS_INDEX_READ_BIT | VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT | VK_ACCESS_UNIFORM_READ_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT | + VK_ACCESS_2_INDEX_READ_BIT | + VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT | + VK_ACCESS_2_UNIFORM_READ_BIT)) ) { mtlScope |= MTLBarrierScopeBuffers; } - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_SHADER_READ_BIT | + VK_ACCESS_2_SHADER_WRITE_BIT | + VK_ACCESS_2_MEMORY_READ_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT)) ) { mtlScope |= MTLBarrierScopeBuffers | MTLBarrierScopeTextures; } #if MVK_MACOS - if ( mvkIsAnyFlagEnabled(vkAccess, VK_ACCESS_INPUT_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT) ) { + if ( mvkIsAnyFlagEnabled(vkAccess, (VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT | + VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | + VK_ACCESS_2_MEMORY_READ_BIT | + VK_ACCESS_2_MEMORY_WRITE_BIT)) ) { mtlScope |= MTLBarrierScopeRenderTargets; } #endif diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index 44b0e5f69..d3dcbca90 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -2517,8 +2517,8 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkWaitSemaphores( #pragma mark Vulkan 1.3 calls MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering( - VkCommandBuffer commandBuffer, - const VkRenderingInfo* pRenderingInfo) { + VkCommandBuffer commandBuffer, + const VkRenderingInfo* pRenderingInfo) { MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BeginRendering, pRenderingInfo->colorAttachmentCount, @@ -2527,7 +2527,7 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBeginRendering( } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering( - VkCommandBuffer commandBuffer) { + VkCommandBuffer commandBuffer) { MVKTraceVulkanCallStart(); MVKAddCmd(EndRendering, commandBuffer); @@ -2537,56 +2537,79 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdEndRendering( MVK_PUBLIC_VULKAN_STUB(vkCmdBindVertexBuffers2, void, VkCommandBuffer, uint32_t, uint32_t, const VkBuffer*, const VkDeviceSize*, const VkDeviceSize*, const VkDeviceSize*) MVK_PUBLIC_VULKAN_SYMBOL void vkCmdBlitImage2( - VkCommandBuffer commandBuffer, - const VkBlitImageInfo2* pBlitImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkBlitImageInfo2* pBlitImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(BlitImage, pBlitImageInfo->regionCount, 1, commandBuffer, pBlitImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBuffer2( - VkCommandBuffer commandBuffer, - const VkCopyBufferInfo2* pCopyBufferInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyBufferInfo2* pCopyBufferInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(CopyBuffer, pCopyBufferInfo->regionCount, 1, commandBuffer, pCopyBufferInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyBufferToImage2( - VkCommandBuffer commandBuffer, - const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyBufferToImageInfo2* pCopyBufferToImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyBufferToImageInfo->regionCount, 1, 4, 8, commandBuffer, pCopyBufferToImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImage2( - VkCommandBuffer commandBuffer, - const VkCopyImageInfo2* pCopyImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyImageInfo2* pCopyImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(CopyImage, pCopyImageInfo->regionCount, 1, commandBuffer, pCopyImageInfo); MVKTraceVulkanCallEnd(); } MVK_PUBLIC_VULKAN_SYMBOL void vkCmdCopyImageToBuffer2( - VkCommandBuffer commandBuffer, - const VkCopyImageToBufferInfo2* pCopyImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkCopyImageToBufferInfo2* pCopyImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFrom3Thresholds(BufferImageCopy, pCopyImageInfo->regionCount, 1, 4, 8, commandBuffer, pCopyImageInfo); MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB(vkCmdPipelineBarrier2, void, VkCommandBuffer, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdResetEvent2, void, VkCommandBuffer, VkEvent, VkPipelineStageFlags2 stageMask) +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdPipelineBarrier2( + VkCommandBuffer commandBuffer, + const VkDependencyInfo* pDependencyInfo) { + + MVKTraceVulkanCallStart(); + uint32_t barrierCount = pDependencyInfo->memoryBarrierCount + pDependencyInfo->bufferMemoryBarrierCount + pDependencyInfo->imageMemoryBarrierCount; + MVKAddCmdFrom2Thresholds(PipelineBarrier, barrierCount, 1, 4, commandBuffer, pDependencyInfo); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResetEvent2( + VkCommandBuffer commandBuffer, + VkEvent event, + VkPipelineStageFlags2 stageMask) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(ResetEvent, commandBuffer, event, stageMask); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( - VkCommandBuffer commandBuffer, - const VkResolveImageInfo2* pResolveImageInfo) { - MVKTraceVulkanCallStart(); + VkCommandBuffer commandBuffer, + const VkResolveImageInfo2* pResolveImageInfo) { + + MVKTraceVulkanCallStart(); MVKAddCmdFromThreshold(ResolveImage, pResolveImageInfo->regionCount, 1, commandBuffer, pResolveImageInfo); MVKTraceVulkanCallEnd(); @@ -2598,7 +2621,17 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthCompareOp, void, VkCommandBuffer, VkCompareOp) MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthTestEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetDepthWriteEnable, void, VkCommandBuffer, VkBool32) -MVK_PUBLIC_VULKAN_STUB(vkCmdSetEvent2, void, VkCommandBuffer, VkEvent, const VkDependencyInfo*) + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdSetEvent2( + VkCommandBuffer commandBuffer, + VkEvent event, + const VkDependencyInfo* pDependencyInfo) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(SetEvent, commandBuffer, event, pDependencyInfo); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB(vkCmdSetFrontFace, void, VkCommandBuffer, VkFrontFace) MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveRestartEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetPrimitiveTopology, void, VkCommandBuffer, VkPrimitiveTopology) @@ -2607,8 +2640,29 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilOp, void, VkCommandBuffer, VkStencilFaceFlags, VkStencilOp, VkStencilOp, VkStencilOp, VkCompareOp) MVK_PUBLIC_VULKAN_STUB(vkCmdSetStencilTestEnable, void, VkCommandBuffer, VkBool32) MVK_PUBLIC_VULKAN_STUB(vkCmdSetViewportWithCount, void, VkCommandBuffer, uint32_t, const VkViewport*) -MVK_PUBLIC_VULKAN_STUB(vkCmdWaitEvents2, void, VkCommandBuffer, uint32_t, const VkEvent*, const VkDependencyInfo*) -MVK_PUBLIC_VULKAN_STUB(vkCmdWriteTimestamp2, void, VkCommandBuffer, VkPipelineStageFlags2, VkQueryPool, uint32_t) + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWaitEvents2( + VkCommandBuffer commandBuffer, + uint32_t eventCount, + const VkEvent* pEvents, + const VkDependencyInfo* pDependencyInfos) { + + MVKTraceVulkanCallStart(); + MVKAddCmdFromThreshold(WaitEvents, eventCount, 1, commandBuffer, eventCount, pEvents, pDependencyInfos); + MVKTraceVulkanCallEnd(); +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWriteTimestamp2( + VkCommandBuffer commandBuffer, + VkPipelineStageFlags2 stage, + VkQueryPool queryPool, + uint32_t query) { + + MVKTraceVulkanCallStart(); + MVKAddCmd(WriteTimestamp, commandBuffer, stage, queryPool, query); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkCreatePrivateDataSlot, VkDevice, const VkPrivateDataSlotCreateInfo*, const VkAllocationCallbacks*, VkPrivateDataSlot*) MVK_PUBLIC_VULKAN_STUB(vkDestroyPrivateDataSlot, void, VkDevice, VkPrivateDataSlot, const VkAllocationCallbacks*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceBufferMemoryRequirements, void, VkDevice, const VkDeviceBufferMemoryRequirements*, VkMemoryRequirements2*) @@ -2616,7 +2670,20 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdResolveImage2( MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageSparseMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, uint32_t*, VkSparseImageMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkGetPhysicalDeviceToolProperties, VkPhysicalDevice, uint32_t*, VkPhysicalDeviceToolProperties*) MVK_PUBLIC_VULKAN_STUB(vkGetPrivateData, void, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t*) -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkQueueSubmit2, VkQueue, uint32_t, const VkSubmitInfo2*, VkFence) + +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2( + VkQueue queue, + uint32_t submitCount, + const VkSubmitInfo2* pSubmits, + VkFence fence) { + + MVKTraceVulkanCallStart(); + MVKQueue* mvkQ = MVKQueue::getMVKQueue(queue); + VkResult rslt = mvkQ->submit(submitCount, pSubmits, fence, kMVKCommandUseQueueSubmit); + MVKTraceVulkanCallEnd(); + return rslt; +} + MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkSetPrivateData, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t) #pragma mark - @@ -3102,6 +3169,17 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkGetPhysicalDeviceSurfaceFormats2KHR( } +#pragma mark - +#pragma mark VK_KHR_synchronization2 + +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdPipelineBarrier2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdResetEvent2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdSetEvent2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWaitEvents2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCmdWriteTimestamp2, KHR); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkQueueSubmit2, KHR); + + #pragma mark - #pragma mark VK_KHR_timeline_semaphore From 2c3dc6415a57900b33953798259e0552f45ef964 Mon Sep 17 00:00:00 2001 From: Bill Hollings Date: Tue, 19 Sep 2023 16:29:30 -0400 Subject: [PATCH 16/21] Promote VK_EXT_private_data functions to Vulkan 1.3, and remove stubs. --- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 109 +++++++++++++++-------------- 1 file changed, 56 insertions(+), 53 deletions(-) diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index d3dcbca90..cfa133fb5 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -2663,13 +2663,47 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkCmdWriteTimestamp2( MVKTraceVulkanCallEnd(); } -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkCreatePrivateDataSlot, VkDevice, const VkPrivateDataSlotCreateInfo*, const VkAllocationCallbacks*, VkPrivateDataSlot*) -MVK_PUBLIC_VULKAN_STUB(vkDestroyPrivateDataSlot, void, VkDevice, VkPrivateDataSlot, const VkAllocationCallbacks*) +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreatePrivateDataSlot( + VkDevice device, + const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, + const VkAllocationCallbacks* pAllocator, + VkPrivateDataSlotEXT* pPrivateDataSlot) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + VkResult rslt = mvkDev->createPrivateDataSlot(pCreateInfo, pAllocator, pPrivateDataSlot); + MVKTraceVulkanCallEnd(); + return rslt; +} + +MVK_PUBLIC_VULKAN_SYMBOL void vkDestroyPrivateDataSlot( + VkDevice device, + VkPrivateDataSlotEXT privateDataSlot, + const VkAllocationCallbacks* pAllocator) { + + MVKTraceVulkanCallStart(); + MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); + mvkDev->destroyPrivateDataSlot(privateDataSlot, pAllocator); + MVKTraceVulkanCallEnd(); +} + MVK_PUBLIC_VULKAN_STUB(vkGetDeviceBufferMemoryRequirements, void, VkDevice, const VkDeviceBufferMemoryRequirements*, VkMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, VkMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB(vkGetDeviceImageSparseMemoryRequirements, void, VkDevice, const VkDeviceImageMemoryRequirements*, uint32_t*, VkSparseImageMemoryRequirements2*) MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkGetPhysicalDeviceToolProperties, VkPhysicalDevice, uint32_t*, VkPhysicalDeviceToolProperties*) -MVK_PUBLIC_VULKAN_STUB(vkGetPrivateData, void, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t*) + +MVK_PUBLIC_VULKAN_SYMBOL void vkGetPrivateData( + VkDevice device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t* pData) { + + MVKTraceVulkanCallStart(); + MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; + *pData = mvkPDS->getData(objectType, objectHandle); + MVKTraceVulkanCallEnd(); +} MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2( VkQueue queue, @@ -2684,7 +2718,21 @@ MVK_PUBLIC_VULKAN_SYMBOL VkResult vkQueueSubmit2( return rslt; } -MVK_PUBLIC_VULKAN_STUB_VKRESULT(vkSetPrivateData, VkDevice, VkObjectType, uint64_t, VkPrivateDataSlot, uint64_t) +MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateData( + VkDevice device, + VkObjectType objectType, + uint64_t objectHandle, + VkPrivateDataSlotEXT privateDataSlot, + uint64_t data) { + + MVKTraceVulkanCallStart(); + MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; + mvkPDS->setData(objectType, objectHandle, data); + MVKTraceVulkanCallEnd(); + return VK_SUCCESS; +} + + #pragma mark - #pragma mark VK_KHR_bind_memory2 extension @@ -3511,56 +3559,11 @@ MVK_PUBLIC_VULKAN_SYMBOL void vkExportMetalObjectsEXT( #pragma mark - #pragma mark VK_EXT_private_data extension -MVK_PUBLIC_VULKAN_SYMBOL VkResult vkCreatePrivateDataSlotEXT( - VkDevice device, - const VkPrivateDataSlotCreateInfoEXT* pCreateInfo, - const VkAllocationCallbacks* pAllocator, - VkPrivateDataSlotEXT* pPrivateDataSlot) { - - MVKTraceVulkanCallStart(); - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - VkResult rslt = mvkDev->createPrivateDataSlot(pCreateInfo, pAllocator, pPrivateDataSlot); - MVKTraceVulkanCallEnd(); - return rslt; -} - -MVK_PUBLIC_VULKAN_SYMBOL void vkDestroyPrivateDataSlotEXT( - VkDevice device, - VkPrivateDataSlotEXT privateDataSlot, - const VkAllocationCallbacks* pAllocator) { +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkCreatePrivateDataSlot, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkDestroyPrivateDataSlot, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkGetPrivateData, EXT); +MVK_PUBLIC_VULKAN_CORE_ALIAS(vkSetPrivateData, EXT); - MVKTraceVulkanCallStart(); - MVKDevice* mvkDev = MVKDevice::getMVKDevice(device); - mvkDev->destroyPrivateDataSlot(privateDataSlot, pAllocator); - MVKTraceVulkanCallEnd(); -} - -MVK_PUBLIC_VULKAN_SYMBOL VkResult vkSetPrivateDataEXT( - VkDevice device, - VkObjectType objectType, - uint64_t objectHandle, - VkPrivateDataSlotEXT privateDataSlot, - uint64_t data) { - - MVKTraceVulkanCallStart(); - MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; - mvkPDS->setData(objectType, objectHandle, data); - MVKTraceVulkanCallEnd(); - return VK_SUCCESS; -} - -MVK_PUBLIC_VULKAN_SYMBOL void vkGetPrivateDataEXT( - VkDevice device, - VkObjectType objectType, - uint64_t objectHandle, - VkPrivateDataSlotEXT privateDataSlot, - uint64_t* pData) { - - MVKTraceVulkanCallStart(); - MVKPrivateDataSlot* mvkPDS = (MVKPrivateDataSlot*)privateDataSlot; - *pData = mvkPDS->getData(objectType, objectHandle); - MVKTraceVulkanCallEnd(); -} #pragma mark - #pragma mark VK_EXT_sample_locations extension From 5a216ab1f86122339a8d3b0e4bf3ed3f8802245b Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Tue, 13 Jun 2023 11:41:36 -0500 Subject: [PATCH 17/21] Refcounting cleanup - Use relaxed atomics where possible - Calling operator= on a refcounted object should not reinitialize the refcount --- MoltenVK/MoltenVK/Utility/MVKBaseObject.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h index d2fcb9e8c..dd156b3ed 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h @@ -143,7 +143,7 @@ class MVKReferenceCountingMixin : public BaseClass { * Called when this instance has been retained as a reference by another object, * indicating that this instance will not be deleted until that reference is released. */ - void retain() { _refCount++; } + void retain() { _refCount.fetch_add(1, std::memory_order_relaxed); } /** * Called when this instance has been released as a reference from another object. @@ -154,7 +154,7 @@ class MVKReferenceCountingMixin : public BaseClass { * Note that the destroy() function is called on the BaseClass. * Releasing will not call any overridden destroy() function in a descendant class. */ - void release() { if (--_refCount == 0) { BaseClass::destroy(); } } + void release() { if (_refCount.fetch_sub(1, std::memory_order_acq_rel) == 1) { BaseClass::destroy(); } } /** * Marks this instance as destroyed. If all previous references to this instance @@ -166,15 +166,10 @@ class MVKReferenceCountingMixin : public BaseClass { MVKReferenceCountingMixin() : _refCount(1) {} /** Copy starts with fresh reference counts. */ - MVKReferenceCountingMixin(const MVKReferenceCountingMixin& other) { - _refCount = 1; - } + MVKReferenceCountingMixin(const MVKReferenceCountingMixin& other) : _refCount(1) {} - /** Copy starts with fresh reference counts. */ - MVKReferenceCountingMixin& operator=(const MVKReferenceCountingMixin& other) { - _refCount = 1; - return *this; - } + /** Don't overwrite refcounted objects. */ + MVKReferenceCountingMixin& operator=(const MVKReferenceCountingMixin& other) = delete; protected: std::atomic _refCount; From aeae18d48b362e133718c2a1a3f4099d4b17d230 Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Tue, 13 Jun 2023 12:00:31 -0500 Subject: [PATCH 18/21] Remove getBaseObject Nothing used it, and you should always be able to `static_cast` for any object without a crazy inheritance tree --- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 1 - .../MoltenVK/Commands/MVKMTLBufferAllocation.h | 1 - MoltenVK/MoltenVK/GPUObjects/MVKDevice.h | 14 +------------- MoltenVK/MoltenVK/GPUObjects/MVKQueue.h | 1 - MoltenVK/MoltenVK/Utility/MVKBaseObject.h | 2 +- 5 files changed, 2 insertions(+), 17 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 59242aff8..8f8b2c0b8 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -182,7 +182,6 @@ class MVKCommandBuffer : public MVKDispatchableVulkanAPIObject, friend class MVKCommandEncoder; friend class MVKCommandPool; - MVKBaseObject* getBaseObject() override { return this; }; void propagateDebugName() override {} void init(const VkCommandBufferAllocateInfo* pAllocateInfo); bool canExecute(); diff --git a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h index 474a0a169..2be981449 100644 --- a/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h +++ b/MoltenVK/MoltenVK/Commands/MVKMTLBufferAllocation.h @@ -99,7 +99,6 @@ class MVKMTLBufferAllocationPool : public MVKObjectPool, protected: friend class MVKMTLBufferAllocation; - MVKBaseObject* getBaseObject() override { return this; }; MVKMTLBufferAllocation* newObject() override; void returnAllocationUnlocked(MVKMTLBufferAllocation* ba); void returnAllocation(MVKMTLBufferAllocation* ba); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h index b6b462ad7..125bf9aa9 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.h @@ -959,13 +959,9 @@ class MVKDeviceTrackingMixin { bool isUsingPipelineStageMetalArgumentBuffers() { return isUsingMetalArgumentBuffers() && !_device->_pMetalFeatures->descriptorSetArgumentBuffers; }; /** Constructs an instance for the specified device. */ - MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); } - - virtual ~MVKDeviceTrackingMixin() {} + MVKDeviceTrackingMixin(MVKDevice* device) : _device(device) { assert(_device); } protected: - virtual MVKBaseObject* getBaseObject() = 0; - MVKDevice* _device; }; @@ -980,9 +976,6 @@ class MVKBaseDeviceObject : public MVKBaseObject, public MVKDeviceTrackingMixin /** Constructs an instance for the specified device. */ MVKBaseDeviceObject(MVKDevice* device) : MVKDeviceTrackingMixin(device) {} - -protected: - MVKBaseObject* getBaseObject() override { return this; }; }; @@ -999,10 +992,6 @@ class MVKVulkanAPIDeviceObject : public MVKVulkanAPIObject, public MVKDeviceTrac /** Constructs an instance for the specified device. */ MVKVulkanAPIDeviceObject(MVKDevice* device) : MVKDeviceTrackingMixin(device) {} - -protected: - MVKBaseObject* getBaseObject() override { return this; }; - }; @@ -1055,7 +1044,6 @@ class MVKDeviceObjectPool : public MVKObjectPool, public MVKDeviceTrackingMix protected: T* newObject() override { return new T(_device); } - MVKBaseObject* getBaseObject() override { return this; }; }; diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h index b4509f0b7..086410e82 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueue.h @@ -135,7 +135,6 @@ class MVKQueue : public MVKDispatchableVulkanAPIObject, public MVKDeviceTracking friend class MVKQueueCommandBufferSubmission; friend class MVKQueuePresentSurfaceSubmission; - MVKBaseObject* getBaseObject() override { return this; }; void propagateDebugName() override; void initName(); void initExecQueue(); diff --git a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h index dd156b3ed..419428024 100644 --- a/MoltenVK/MoltenVK/Utility/MVKBaseObject.h +++ b/MoltenVK/MoltenVK/Utility/MVKBaseObject.h @@ -105,7 +105,7 @@ class MVKBaseObject { /** Destroys this object. Default behaviour simply deletes it. Subclasses may override to delay deletion. */ virtual void destroy() { delete this; } - virtual ~MVKBaseObject() {} + virtual ~MVKBaseObject() {} protected: static VkResult reportResult(MVKBaseObject* mvkObj, VkResult vkRslt, MVKConfigLogLevel logLevel, const char* format, va_list args) __printflike(4, 0); From 89195dc7254c56c50c7cc64a17b6a0a12ff075d6 Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Tue, 13 Jun 2023 12:13:21 -0500 Subject: [PATCH 19/21] Remove count from mvkStringsAreEqual It doesn't do anything, and we don't want anyone to think it does something --- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 1097afb5a..16c7fd2e9 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -598,7 +598,7 @@ bool mvkAreEqual(const T* pV1, const T* pV2, size_t count = 1) { * which works on individual chars or char arrays, not strings. * Returns false if either string is null. */ -static constexpr bool mvkStringsAreEqual(const char* pV1, const char* pV2, size_t count = 1) { +static constexpr bool mvkStringsAreEqual(const char* pV1, const char* pV2) { return pV1 && pV2 && (pV1 == pV2 || strcmp(pV1, pV2) == 0); } From 4ba3f335b4f73627e94645361bc35efce222dad2 Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Tue, 13 Jun 2023 15:22:44 -0500 Subject: [PATCH 20/21] MVKArrayRef cleanup Make everything constexpr, remove direct access to members --- .../MoltenVK/Commands/MVKCommandBuffer.mm | 6 ++-- .../Commands/MVKCommandEncoderState.mm | 28 ++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm | 2 +- MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm | 2 +- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 31 ++++++++++--------- MoltenVK/MoltenVK/Vulkan/vulkan.mm | 2 +- 7 files changed, 38 insertions(+), 35 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index 72dde4f1e..e92a57c13 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -472,8 +472,8 @@ _attachments.assign(attachments.begin(), attachments.end()); // Copy the sample positions array of arrays, one array of sample positions for each subpass index. - _subpassSamplePositions.resize(subpassSamplePositions.size); - for (uint32_t spSPIdx = 0; spSPIdx < subpassSamplePositions.size; spSPIdx++) { + _subpassSamplePositions.resize(subpassSamplePositions.size()); + for (uint32_t spSPIdx = 0; spSPIdx < subpassSamplePositions.size(); spSPIdx++) { _subpassSamplePositions[spSPIdx].assign(subpassSamplePositions[spSPIdx].begin(), subpassSamplePositions[spSPIdx].end()); } @@ -593,7 +593,7 @@ // and Metal will default to using default sample postions. if (_pDeviceMetalFeatures->programmableSamplePositions) { auto cstmSampPosns = getCustomSamplePositions(); - [mtlRPDesc setSamplePositions: cstmSampPosns.data count: cstmSampPosns.size]; + [mtlRPDesc setSamplePositions: cstmSampPosns.data() count: cstmSampPosns.size()]; } _mtlRenderEncoder = [_mtlCmdBuffer renderCommandEncoderWithDescriptor: mtlRPDesc]; diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 044dd96e3..2817343d0 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -58,7 +58,7 @@ uint32_t firstViewport, bool isSettingDynamically) { - size_t vpCnt = viewports.size; + size_t vpCnt = viewports.size(); uint32_t maxViewports = getDevice()->_pProperties->limits.maxViewports; if ((firstViewport + vpCnt > maxViewports) || (firstViewport >= maxViewports) || @@ -111,7 +111,7 @@ uint32_t firstScissor, bool isSettingDynamically) { - size_t sCnt = scissors.size; + size_t sCnt = scissors.size(); uint32_t maxScissors = getDevice()->_pProperties->limits.maxViewports; if ((firstScissor + sCnt > maxScissors) || (firstScissor >= maxScissors) || @@ -165,7 +165,7 @@ // Typically any MSL struct that contains a float4 will also have a size that is rounded up to a multiple of a float4 size. // Ensure that we pass along enough content to cover this extra space even if it is never actually accessed by the shader. size_t pcSizeAlign = getDevice()->_pMetalFeatures->pushConstantSizeAlignment; - size_t pcSize = pushConstants.size; + size_t pcSize = pushConstants.size(); size_t pcBuffSize = mvkAlignByteCount(offset + pcSize, pcSizeAlign); mvkEnsureSize(_pushConstants, pcBuffSize); copy(pushConstants.begin(), pushConstants.end(), _pushConstants.begin() + offset); @@ -488,7 +488,7 @@ // Update dynamic buffer offsets uint32_t baseDynOfstIdx = dslMTLRezIdxOffsets.getMetalResourceIndexes().dynamicOffsetBufferIndex; uint32_t doCnt = descSet->getDynamicOffsetDescriptorCount(); - for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size; doIdx++) { + for (uint32_t doIdx = 0; doIdx < doCnt && dynamicOffsetIndex < dynamicOffsets.size(); doIdx++) { updateImplicitBuffer(_dynamicOffsets, baseDynOfstIdx + doIdx, dynamicOffsets[dynamicOffsetIndex++]); } @@ -797,8 +797,8 @@ }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -848,8 +848,8 @@ }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -881,8 +881,8 @@ }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -914,8 +914,8 @@ }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { @@ -947,8 +947,8 @@ }, [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { cmdEncoder->setFragmentBytes(cmdEncoder->_mtlRenderEncoder, - s.data, - s.size * sizeof(uint32_t), + s.data(), + s.byteSize(), b.index); }, [](MVKCommandEncoder* cmdEncoder, MVKMTLTextureBinding& b)->void { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm index a3f02ea89..ac83d6971 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDescriptor.mm @@ -729,7 +729,7 @@ void mvkPopulateShaderConversionConfig(mvk::SPIRVToMSLConversionConfiguration& s MVKArrayRef dynamicOffsets, uint32_t& dynamicOffsetIndex) { MVKMTLBufferBinding bb; - NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size > dynamicOffsetIndex + NSUInteger bufferDynamicOffset = (usesDynamicBufferOffsets() && dynamicOffsets.size() > dynamicOffsetIndex ? dynamicOffsets[dynamicOffsetIndex++] : 0); if (_mvkBuffer) { bb.mtlBuffer = _mvkBuffer->getMTLBuffer(); diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm index acd50514e..20bad33e8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKDevice.mm @@ -1530,7 +1530,7 @@ VkResult MVKPhysicalDevice::getQueueFamilyProperties(uint32_t* pCount, VkQueueFamilyProperties* pQueueFamilyProperties) { auto qFams = getQueueFamilies(); - uint32_t qfCnt = uint32_t(qFams.size); + uint32_t qfCnt = uint32_t(qFams.size()); // If properties aren't actually being requested yet, simply update the returned count if ( !pQueueFamilyProperties ) { diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm index e3da96b34..779eb75ac 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKPipeline.mm @@ -49,7 +49,7 @@ MVKArrayRef dynamicOffsets) { if (!cmdEncoder) { clearConfigurationResult(); } uint32_t dynamicOffsetIndex = 0; - size_t dsCnt = descriptorSets.size; + size_t dsCnt = descriptorSets.size(); for (uint32_t dsIdx = 0; dsIdx < dsCnt; dsIdx++) { MVKDescriptorSet* descSet = descriptorSets[dsIdx]; uint32_t dslIdx = firstSet + dsIdx; diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 16c7fd2e9..8ea9f6582 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -478,20 +478,23 @@ std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 53 */ template struct MVKArrayRef { - Type* data; - const size_t size; - - const Type* begin() const { return data; } - const Type* end() const { return &data[size]; } - const Type& operator[]( const size_t i ) const { return data[i]; } - Type& operator[]( const size_t i ) { return data[i]; } - MVKArrayRef& operator=(const MVKArrayRef& other) { - data = other.data; - *(size_t*)&size = other.size; - return *this; - } - MVKArrayRef() : MVKArrayRef(nullptr, 0) {} - MVKArrayRef(Type* d, size_t s) : data(d), size(s) {} +public: + constexpr const Type* begin() const { return _data; } + constexpr const Type* end() const { return &_data[_size]; } + constexpr const Type* data() const { return _data; } + constexpr Type* begin() { return _data; } + constexpr Type* end() { return &_data[_size]; } + constexpr Type* data() { return _data; } + constexpr const size_t size() const { return _size; } + constexpr const size_t byteSize() const { return _size * sizeof(Type); } + constexpr const Type& operator[]( const size_t i ) const { return _data[i]; } + constexpr Type& operator[]( const size_t i ) { return _data[i]; } + constexpr MVKArrayRef() : MVKArrayRef(nullptr, 0) {} + constexpr MVKArrayRef(Type* d, size_t s) : _data(d), _size(s) {} + +protected: + Type* _data; + size_t _size; }; /** Ensures the size of the specified container is at least the specified size. */ diff --git a/MoltenVK/MoltenVK/Vulkan/vulkan.mm b/MoltenVK/MoltenVK/Vulkan/vulkan.mm index cfa133fb5..c44dd7d1e 100644 --- a/MoltenVK/MoltenVK/Vulkan/vulkan.mm +++ b/MoltenVK/MoltenVK/Vulkan/vulkan.mm @@ -1964,7 +1964,7 @@ static void mvkCmdBeginRenderPass( MVKAddCmdFrom5Thresholds(BeginRenderPass, pRenderPassBegin->clearValueCount, 1, 2, - attachments.size, 0, 1, 2, + attachments.size(), 0, 1, 2, commandBuffer, pRenderPassBegin, pSubpassBeginInfo, From 27f4f6a6a017cb7e9ddad72f7fb7b87f85707788 Mon Sep 17 00:00:00 2001 From: Evan Tang Date: Thu, 15 Jun 2023 13:24:58 -0500 Subject: [PATCH 21/21] Use MVKArrayRef, not const MVKArrayRef It's very easy to accidentally un-const a `const MVKArrayRef`, since ArrayRefs are meant to be passed by value --- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h | 2 +- MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm | 2 +- .../MoltenVK/Commands/MVKCommandEncoderState.h | 8 ++++---- .../Commands/MVKCommandEncoderState.mm | 18 +++++++++--------- MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h | 4 ++-- MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm | 4 ++-- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h | 10 +++++----- MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm | 10 +++++----- MoltenVK/MoltenVK/Utility/MVKFoundation.h | 18 ++++++++---------- MoltenVK/MoltenVK/Utility/MVKSmallVector.h | 8 ++++---- 10 files changed, 41 insertions(+), 43 deletions(-) diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h index 8f8b2c0b8..07b4c2024 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.h @@ -143,7 +143,7 @@ class MVKCommandBuffer : public MVKDispatchableVulkanAPIObject, bool _needsVisibilityResultMTLBuffer; /** Called when a MVKCmdExecuteCommands is added to this command buffer. */ - void recordExecuteCommands(const MVKArrayRef secondaryCommandBuffers); + void recordExecuteCommands(MVKArrayRef secondaryCommandBuffers); /** Called when a timestamp command is added. */ void recordTimestampCommand(); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm index e92a57c13..5f32996e8 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandBuffer.mm @@ -310,7 +310,7 @@ } // Promote the initial visibility buffer and indication of timestamp use from the secondary buffers. -void MVKCommandBuffer::recordExecuteCommands(const MVKArrayRef secondaryCommandBuffers) { +void MVKCommandBuffer::recordExecuteCommands(MVKArrayRef secondaryCommandBuffers) { for (MVKCommandBuffer* cmdBuff : secondaryCommandBuffers) { if (cmdBuff->_needsVisibilityResultMTLBuffer) { _needsVisibilityResultMTLBuffer = true; } if (cmdBuff->_hasStageCounterTimestampCommand) { _hasStageCounterTimestampCommand = true; } diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h index 06152dd7a..6dbeb6472 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.h @@ -143,7 +143,7 @@ class MVKViewportCommandEncoderState : public MVKCommandEncoderState { * The isSettingDynamically indicates that the scissor is being changed dynamically, * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR. */ - void setViewports(const MVKArrayRef viewports, + void setViewports(MVKArrayRef viewports, uint32_t firstViewport, bool isSettingDynamically); @@ -171,7 +171,7 @@ class MVKScissorCommandEncoderState : public MVKCommandEncoderState { * The isSettingDynamically indicates that the scissor is being changed dynamically, * which is only allowed if the pipeline was created as VK_DYNAMIC_STATE_SCISSOR. */ - void setScissors(const MVKArrayRef scissors, + void setScissors(MVKArrayRef scissors, uint32_t firstScissor, bool isSettingDynamically); @@ -457,7 +457,7 @@ class MVKResourcesCommandEncoderState : public MVKCommandEncoderState { contents[index] = value; } - void assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef texBindings); + void assertMissingSwizzles(bool needsSwizzle, const char* stageName, MVKArrayRef texBindings); void encodeMetalArgumentBuffer(MVKShaderStage stage); virtual void bindMetalArgumentBuffer(MVKShaderStage stage, MVKMTLBufferBinding& buffBind) = 0; @@ -547,7 +547,7 @@ class MVKGraphicsResourcesCommandEncoderState : public MVKResourcesCommandEncode const char* pStageName, bool fullImageViewSwizzle, std::function bindBuffer, - std::function)> bindImplicitBuffer, + std::function)> bindImplicitBuffer, std::function bindTexture, std::function bindSampler); diff --git a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm index 2817343d0..37f0194f9 100644 --- a/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm +++ b/MoltenVK/MoltenVK/Commands/MVKCommandEncoderState.mm @@ -54,7 +54,7 @@ #pragma mark - #pragma mark MVKViewportCommandEncoderState -void MVKViewportCommandEncoderState::setViewports(const MVKArrayRef viewports, +void MVKViewportCommandEncoderState::setViewports(MVKArrayRef viewports, uint32_t firstViewport, bool isSettingDynamically) { @@ -107,7 +107,7 @@ #pragma mark - #pragma mark MVKScissorCommandEncoderState -void MVKScissorCommandEncoderState::setScissors(const MVKArrayRef scissors, +void MVKScissorCommandEncoderState::setScissors(MVKArrayRef scissors, uint32_t firstScissor, bool isSettingDynamically) { @@ -594,7 +594,7 @@ } // If a swizzle is needed for this stage, iterates all the bindings and logs errors for those that need texture swizzling. -void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, const MVKArrayRef texBindings) { +void MVKResourcesCommandEncoderState::assertMissingSwizzles(bool needsSwizzle, const char* stageName, MVKArrayRef texBindings) { if (needsSwizzle) { for (auto& tb : texBindings) { VkComponentMapping vkcm = mvkUnpackSwizzle(tb.swizzle); @@ -684,7 +684,7 @@ const char* pStageName, bool fullImageViewSwizzle, std::function bindBuffer, - std::function)> bindImplicitBuffer, + std::function)> bindImplicitBuffer, std::function bindTexture, std::function bindSampler) { @@ -795,7 +795,7 @@ offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), s.data(), s.byteSize(), @@ -846,7 +846,7 @@ b.isDirty = true; // We haven't written it out, so leave dirty until next time. } }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, s.data(), s.byteSize(), @@ -879,7 +879,7 @@ offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setComputeBytes(cmdEncoder->getMTLComputeEncoder(kMVKCommandUseTessellationVertexTessCtl), s.data(), s.byteSize(), @@ -912,7 +912,7 @@ offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setVertexBytes(cmdEncoder->_mtlRenderEncoder, s.data(), s.byteSize(), @@ -945,7 +945,7 @@ offset: b.offset atIndex: b.index]; }, - [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, const MVKArrayRef s)->void { + [](MVKCommandEncoder* cmdEncoder, MVKMTLBufferBinding& b, MVKArrayRef s)->void { cmdEncoder->setFragmentBytes(cmdEncoder->_mtlRenderEncoder, s.data(), s.byteSize(), diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h index 80c3a3572..5f44a95f8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.h @@ -56,7 +56,7 @@ class MVKQueryPool : public MVKVulkanAPIDeviceObject { virtual void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder); /** Finishes the specified queries and marks them as available. */ - virtual void finishQueries(const MVKArrayRef queries); + virtual void finishQueries(MVKArrayRef queries); /** Resets the results and availability status of the specified queries. */ virtual void resetResults(uint32_t firstQuery, uint32_t queryCount, MVKCommandEncoder* cmdEncoder); @@ -212,7 +212,7 @@ class MVKTimestampQueryPool : public MVKGPUCounterQueryPool { public: void endQuery(uint32_t query, MVKCommandEncoder* cmdEncoder) override; - void finishQueries(const MVKArrayRef queries) override; + void finishQueries(MVKArrayRef queries) override; #pragma mark Construction diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm index 2e0e13682..1bd0a6d01 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKQueryPool.mm @@ -52,7 +52,7 @@ } // Mark queries as available -void MVKQueryPool::finishQueries(const MVKArrayRef queries) { +void MVKQueryPool::finishQueries(MVKArrayRef queries) { lock_guard lock(_availabilityLock); for (uint32_t qry : queries) { if (_availability[qry] == DeviceAvailable) { @@ -379,7 +379,7 @@ } // If not using MTLCounterSampleBuffer, update timestamp values, then mark queries as available -void MVKTimestampQueryPool::finishQueries(const MVKArrayRef queries) { +void MVKTimestampQueryPool::finishQueries(MVKArrayRef queries) { if ( !_mtlCounterBuffer ) { uint64_t ts = mvkGetTimestamp(); for (uint32_t qry : queries) { _timestamps[qry] = ts; } diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h index 6cbe2e4e2..cb9c8e44b 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.h @@ -116,8 +116,8 @@ class MVKRenderSubpass : public MVKBaseObject { void populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc, uint32_t passIdx, MVKFramebuffer* framebuffer, - const MVKArrayRef attachments, - const MVKArrayRef clearValues, + MVKArrayRef attachments, + MVKArrayRef clearValues, bool isRenderingEntireAttachment, bool loadOverride = false); @@ -126,7 +126,7 @@ class MVKRenderSubpass : public MVKBaseObject { * when the render area is smaller than the full framebuffer size. */ void populateClearAttachments(MVKClearAttachments& clearAtts, - const MVKArrayRef clearValues); + MVKArrayRef clearValues); /** * Populates the specified vector with VkClearRects for clearing views of a specified multiview @@ -140,11 +140,11 @@ class MVKRenderSubpass : public MVKBaseObject { /** If a render encoder is active, sets the store actions for all attachments to it. */ void encodeStoreActions(MVKCommandEncoder* cmdEncoder, bool isRenderingEntireAttachment, - const MVKArrayRef attachments, + MVKArrayRef attachments, bool storeOverride = false); /** Resolves any resolve attachments that cannot be handled by native Metal subpass resolve behavior. */ - void resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, const MVKArrayRef attachments); + void resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, MVKArrayRef attachments); MVKRenderSubpass(MVKRenderPass* renderPass, const VkSubpassDescription* pCreateInfo, const VkRenderPassInputAttachmentAspectCreateInfo* pInputAspects, diff --git a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm index 3bf8a1887..a742690d8 100644 --- a/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm +++ b/MoltenVK/MoltenVK/GPUObjects/MVKRenderPass.mm @@ -138,8 +138,8 @@ void MVKRenderSubpass::populateMTLRenderPassDescriptor(MTLRenderPassDescriptor* mtlRPDesc, uint32_t passIdx, MVKFramebuffer* framebuffer, - const MVKArrayRef attachments, - const MVKArrayRef clearValues, + MVKArrayRef attachments, + MVKArrayRef clearValues, bool isRenderingEntireAttachment, bool loadOverride) { MVKPixelFormats* pixFmts = _renderPass->getPixelFormats(); @@ -279,7 +279,7 @@ void MVKRenderSubpass::encodeStoreActions(MVKCommandEncoder* cmdEncoder, bool isRenderingEntireAttachment, - const MVKArrayRef attachments, + MVKArrayRef attachments, bool storeOverride) { if (!cmdEncoder->_mtlRenderEncoder) { return; } if (!_renderPass->getDevice()->_pMetalFeatures->deferredStoreActions) { return; } @@ -308,7 +308,7 @@ } void MVKRenderSubpass::populateClearAttachments(MVKClearAttachments& clearAtts, - const MVKArrayRef clearValues) { + MVKArrayRef clearValues) { uint32_t caCnt = getColorAttachmentCount(); for (uint32_t caIdx = 0; caIdx < caCnt; caIdx++) { uint32_t attIdx = _colorAttachments[caIdx].attachment; @@ -394,7 +394,7 @@ return caps; } -void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, const MVKArrayRef attachments) { +void MVKRenderSubpass::resolveUnresolvableAttachments(MVKCommandEncoder* cmdEncoder, MVKArrayRef attachments) { MVKPixelFormats* pixFmts = cmdEncoder->getPixelFormats(); size_t raCnt = _resolveAttachments.size(); for (uint32_t raIdx = 0; raIdx < raCnt; raIdx++) { diff --git a/MoltenVK/MoltenVK/Utility/MVKFoundation.h b/MoltenVK/MoltenVK/Utility/MVKFoundation.h index 8ea9f6582..d26b53a45 100644 --- a/MoltenVK/MoltenVK/Utility/MVKFoundation.h +++ b/MoltenVK/MoltenVK/Utility/MVKFoundation.h @@ -479,18 +479,16 @@ std::size_t mvkHash(const N* pVals, std::size_t count = 1, std::size_t seed = 53 template struct MVKArrayRef { public: - constexpr const Type* begin() const { return _data; } - constexpr const Type* end() const { return &_data[_size]; } - constexpr const Type* data() const { return _data; } - constexpr Type* begin() { return _data; } - constexpr Type* end() { return &_data[_size]; } - constexpr Type* data() { return _data; } - constexpr const size_t size() const { return _size; } - constexpr const size_t byteSize() const { return _size * sizeof(Type); } - constexpr const Type& operator[]( const size_t i ) const { return _data[i]; } - constexpr Type& operator[]( const size_t i ) { return _data[i]; } + constexpr Type* begin() const { return _data; } + constexpr Type* end() const { return &_data[_size]; } + constexpr Type* data() const { return _data; } + constexpr size_t size() const { return _size; } + constexpr size_t byteSize() const { return _size * sizeof(Type); } + constexpr Type& operator[]( const size_t i ) const { return _data[i]; } constexpr MVKArrayRef() : MVKArrayRef(nullptr, 0) {} constexpr MVKArrayRef(Type* d, size_t s) : _data(d), _size(s) {} + template , bool> = true> + constexpr MVKArrayRef(MVKArrayRef other) : _data(other.data()), _size(other.size()) {} protected: Type* _data; diff --git a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h index 6294f9139..b6e1277c7 100755 --- a/MoltenVK/MoltenVK/Utility/MVKSmallVector.h +++ b/MoltenVK/MoltenVK/Utility/MVKSmallVector.h @@ -298,12 +298,12 @@ class MVKSmallVectorImpl reverse_iterator rbegin() const { return reverse_iterator( end() ); } reverse_iterator rend() const { return reverse_iterator( begin() ); } - const MVKArrayRef contents() const { return MVKArrayRef(data(), size()); } - MVKArrayRef contents() { return MVKArrayRef(data(), size()); } + MVKArrayRef contents() const { return MVKArrayRef(data(), size()); } + MVKArrayRef< Type> contents() { return MVKArrayRef< Type>(data(), size()); } - const Type &operator[]( const size_t i ) const { return alc[i]; } + const Type &operator[]( const size_t i ) const { return alc[i]; } Type &operator[]( const size_t i ) { return alc[i]; } - const Type &at( const size_t i ) const { return alc[i]; } + const Type &at( const size_t i ) const { return alc[i]; } Type &at( const size_t i ) { return alc[i]; } const Type &front() const { return alc[0]; } Type &front() { return alc[0]; }