diff --git a/rpcs3/Emu/RSX/Common/TextureUtils.h b/rpcs3/Emu/RSX/Common/TextureUtils.h index 07f3df6d44c1..a33400d62fba 100644 --- a/rpcs3/Emu/RSX/Common/TextureUtils.h +++ b/rpcs3/Emu/RSX/Common/TextureUtils.h @@ -40,7 +40,9 @@ namespace rsx { read = 0, write = 1, - transfer = 2 + transfer = 2, + + gpu_reference = (1 << 6) }; enum format_type : u8 @@ -57,6 +59,8 @@ namespace rsx rsx::texture_dimension_extended image_type = texture_dimension_extended::texture_dimension_2d; rsx::format_type format_class = rsx::format_type::color; bool is_cyclic_reference = false; + u32 ref_address = 0; + u64 surface_cache_tag = 0; f32 scale_x = 1.f; f32 scale_y = 1.f; diff --git a/rpcs3/Emu/RSX/Common/surface_store.h b/rpcs3/Emu/RSX/Common/surface_store.h index 1124b8c7c608..8b5a5eea4fed 100644 --- a/rpcs3/Emu/RSX/Common/surface_store.h +++ b/rpcs3/Emu/RSX/Common/surface_store.h @@ -837,8 +837,6 @@ namespace rsx { invalidate(It->second); m_render_targets_storage.erase(It); - - cache_tag = rsx::get_shared_tag(); return; } } @@ -849,8 +847,6 @@ namespace rsx { invalidate(It->second); m_depth_stencil_storage.erase(It); - - cache_tag = rsx::get_shared_tag(); return; } } @@ -1072,11 +1068,6 @@ namespace rsx } } - void notify_memory_structure_changed() - { - cache_tag = rsx::get_shared_tag(); - } - void invalidate_all() { // Unbind and invalidate all resources diff --git a/rpcs3/Emu/RSX/Common/texture_cache.h b/rpcs3/Emu/RSX/Common/texture_cache.h index f20ffef9bcf2..bb8c5dbac51c 100644 --- a/rpcs3/Emu/RSX/Common/texture_cache.h +++ b/rpcs3/Emu/RSX/Common/texture_cache.h @@ -261,6 +261,65 @@ namespace rsx { return (image_handle || external_subresource_desc.op != deferred_request_command::nop); } + + /** + * Returns a boolean true/false if the descriptor is expired + * Optionally returns a second variable that contains the surface reference. + * The surface reference can be used to insert a texture barrier or inject a deferred resource + */ + template + std::pair is_expired(surface_store_type& surface_cache) + { + if (upload_context != rsx::texture_upload_context::framebuffer_storage) + { + return {}; + } + + // Expired, but may still be valid. Check if the texture is still accessible + auto ref_image = image_handle ? image_handle->image() : external_subresource_desc.external_handle; + surface_type surface = dynamic_cast(ref_image); + + // Try and grab a cache reference in case of MSAA resolve target or compositing op + if (!surface) + { + if (!(surface = surface_cache.get_surface_at(ref_address))) + { + // Compositing op. Just ignore expiry for now + verify(HERE), !ref_image; + return {}; + } + } + + verify(HERE), surface; + if (!ref_image || surface->get_surface(rsx::surface_access::gpu_reference) == ref_image) + { + // Same image, so configuration did not change. + if (surface_cache.cache_tag <= surface_cache_tag && + surface->last_use_tag <= surface_cache_tag) + { + external_subresource_desc.do_not_cache = false; + return {}; + } + + // Image was written to since last bind. Insert texture barrier. + surface_cache_tag = surface->last_use_tag; + is_cyclic_reference = surface_cache.address_is_bound(ref_address); + external_subresource_desc.do_not_cache = is_cyclic_reference; + + switch (external_subresource_desc.op) + { + case deferred_request_command::copy_image_dynamic: + case deferred_request_command::copy_image_static: + external_subresource_desc.op = (is_cyclic_reference) ? deferred_request_command::copy_image_dynamic : deferred_request_command::copy_image_static; + [[ fallthrough ]]; + default: + return { false, surface }; + } + } + + // Reupload + return { true, nullptr }; + } }; @@ -320,7 +379,7 @@ namespace rsx const std::vector& subresource_layout, rsx::texture_dimension_extended type, bool swizzled) = 0; virtual section_storage_type* create_nul_section(commandbuffer_type&, const address_range &rsx_range, bool memory_load) = 0; virtual void enforce_surface_creation_type(section_storage_type& section, u32 gcm_format, texture_create_flags expected) = 0; - virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex) = 0; + virtual void insert_texture_barrier(commandbuffer_type&, image_storage_type* tex, bool strong_ordering = true) = 0; virtual image_view_type generate_cubemap_from_images(commandbuffer_type&, u32 gcm_format, u16 size, const std::vector& sources, const texture_channel_remap_t& remap_vector) = 0; virtual image_view_type generate_3d_from_2d_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, u16 depth, const std::vector& sources, const texture_channel_remap_t& remap_vector) = 0; virtual image_view_type generate_atlas_from_images(commandbuffer_type&, u32 gcm_format, u16 width, u16 height, const std::vector& sections_to_copy, const texture_channel_remap_t& remap_vector) = 0; @@ -1707,6 +1766,71 @@ namespace rsx return {}; } + template + bool test_if_descriptor_expired(commandbuffer_type& cmd, surface_store_type& surface_cache, sampled_image_descriptor* descriptor, const RsxTextureType& tex) + { + auto result = descriptor->is_expired(surface_cache); + if (result.second && descriptor->is_cyclic_reference) + { + /* NOTE: All cyclic descriptors updated via fast update must have a barrier check + * It is possible for the following sequence of events to break common-sense tests + * 1. Cyclic ref occurs normally in upload_texture + * 2. Surface is swappd out, but texture is not updated + * 3. Surface is swapped back in. Surface cache resets layout to optimal rasterization layout + * 4. During bind, the surface is converted to shader layout because it is not in GENERAL layout + */ + if (!g_cfg.video.strict_rendering_mode) + { + insert_texture_barrier(cmd, result.second, false); + } + else if (descriptor->image_handle) + { + // Rebuild duplicate surface + auto src = descriptor->image_handle->image(); + rsx::image_section_attributes_t attr; + attr.address = descriptor->ref_address; + attr.gcm_format = tex.format() & ~(CELL_GCM_TEXTURE_LN | CELL_GCM_TEXTURE_UN); + attr.width = src->width(); + attr.height = src->height(); + attr.depth = 1; + //attr.mipmaps = 1; + attr.pitch = 0; // Unused + attr.slice_h = src->height(); + attr.bpp = get_format_block_size_in_bytes(attr.gcm_format); + //attr.swizzled = false; + + // Sanity checks + const bool gcm_format_is_depth = helpers::is_gcm_depth_format(attr.gcm_format); + const bool bound_surface_is_depth = surface_cache.m_bound_depth_stencil.first == attr.address; + if (!gcm_format_is_depth && bound_surface_is_depth) + { + // While the copy routines can perform a typeless cast, prefer to not cross the aspect barrier if possible + // This avoids messing with other solutions such as texture redirection as well + attr.gcm_format = helpers::get_compatible_depth_format(attr.gcm_format); + } + + descriptor->external_subresource_desc = + { + src, + rsx::deferred_request_command::copy_image_dynamic, + attr, + {}, + rsx::default_remap_vector + }; + + descriptor->external_subresource_desc.do_not_cache = true; + descriptor->image_handle = nullptr; + } + else + { + // Force reupload + return true; + } + } + + return result.first; + } + template sampled_image_descriptor upload_texture(commandbuffer_type& cmd, RsxTextureType& tex, surface_store_type& m_rtts, Args&&... extras) { @@ -1805,6 +1929,9 @@ namespace rsx result.external_subresource_desc.cache_range = lookup_range; } + result.ref_address = attributes.address; + result.surface_cache_tag = m_rtts.cache_tag; + if (subsurface_count == 1) { return result; @@ -2650,7 +2777,6 @@ namespace rsx // NOTE: This doesn't work very well in case of Cell access // Need to lock the affected memory range and actually attach this subres to a locked_region dst_subres.surface->on_write_copy(rsx::get_shared_tag()); - m_rtts.notify_memory_structure_changed(); // Reset this object's synchronization status if it is locked lock.upgrade(); diff --git a/rpcs3/Emu/RSX/GL/GLTextureCache.h b/rpcs3/Emu/RSX/GL/GLTextureCache.h index b2d1d831740e..a1f88a1bb88b 100644 --- a/rpcs3/Emu/RSX/GL/GLTextureCache.h +++ b/rpcs3/Emu/RSX/GL/GLTextureCache.h @@ -926,7 +926,7 @@ namespace gl section.set_view_flags(flags); } - void insert_texture_barrier(gl::command_context&, gl::texture*) override + void insert_texture_barrier(gl::command_context&, gl::texture*, bool) override { auto &caps = gl::get_driver_caps(); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 360139cb66ec..d563c7dea921 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -43,6 +43,18 @@ namespace namespace vk { + VkImageViewType get_view_type(rsx::texture_dimension_extended type) + { + switch (type) + { + case rsx::texture_dimension_extended::texture_dimension_1d: return VK_IMAGE_VIEW_TYPE_1D; + case rsx::texture_dimension_extended::texture_dimension_2d: return VK_IMAGE_VIEW_TYPE_2D; + case rsx::texture_dimension_extended::texture_dimension_cubemap: return VK_IMAGE_VIEW_TYPE_CUBE; + case rsx::texture_dimension_extended::texture_dimension_3d: return VK_IMAGE_VIEW_TYPE_3D; + default: ASSUME(0); + }; + } + VkCompareOp get_compare_func(rsx::comparison_function op, bool reverse_direction = false) { switch (op) @@ -1081,544 +1093,247 @@ void VKGSRender::update_draw_state() m_frame_stats.setup_time += m_profiler.duration(); } -void VKGSRender::begin_render_pass() -{ - vk::begin_renderpass(*m_current_command_buffer, get_render_pass(), m_draw_fbo->value, {positionu{0u, 0u}, sizeu{m_draw_fbo->width(), m_draw_fbo->height()}}); -} - -void VKGSRender::close_render_pass() -{ - vk::end_renderpass(*m_current_command_buffer); -} - -VkRenderPass VKGSRender::get_render_pass() +void VKGSRender::load_texture_env() { - if (!m_cached_renderpass) + // Load textures + bool check_for_cyclic_refs = false; + auto check_surface_cache_sampler = [&](auto descriptor, const auto& tex) { - m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key); - } + if (!m_texture_cache.test_if_descriptor_expired(*m_current_command_buffer, m_rtts, descriptor, tex)) + { + check_for_cyclic_refs |= descriptor->is_cyclic_reference; + return true; + } - return m_cached_renderpass; -} + return false; + }; -void VKGSRender::emit_geometry(u32 sub_index) -{ - auto &draw_call = rsx::method_registers.current_draw_clause; - m_profiler.start(); - if (sub_index == 0) + std::lock_guard lock(m_sampler_mutex); + + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { - analyse_inputs_interleaved(m_vertex_layout); + if (!fs_sampler_state[i]) + fs_sampler_state[i] = std::make_unique(); - if (!m_vertex_layout.validate()) + auto sampler_state = static_cast(fs_sampler_state[i].get()); + const auto& tex = rsx::method_registers.fragment_textures[i]; + + if (m_samplers_dirty || m_textures_dirty[i] || !check_surface_cache_sampler(sampler_state, tex)) { - // No vertex inputs enabled - // Execute remainining pipeline barriers with NOP draw - do + if (rsx::method_registers.fragment_textures[i].enabled()) { - draw_call.execute_pipeline_dependencies(); - } - while (draw_call.next()); + check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); + *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); - draw_call.end(); - return; - } - } - else if (draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed) - { - // Rebase vertex bases instead of - for (auto &info : m_vertex_layout.interleaved_blocks) - { - const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); - info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location); - } - } + if (sampler_state->is_cyclic_reference) + { + check_for_cyclic_refs |= true; + } - const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; - const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; + bool replace = !fs_sampler_handles[i]; + VkFilter mag_filter; + vk::minification_filter min_filter; + f32 min_lod = 0.f, max_lod = 0.f; + f32 lod_bias = 0.f; - // Programs data is dependent on vertex state - auto upload_info = upload_vertex_data(); - if (!upload_info.vertex_draw_count) - { - // Malformed vertex setup; abort - return; - } + const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); + VkBool32 compare_enabled = VK_FALSE; + VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER; - m_frame_stats.vertex_upload_time += m_profiler.duration(); + if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT) + { + if (m_device->get_formats_support().d24_unorm_s8) + { + // NOTE: + // The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3 + // In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison + // Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage + // Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results - auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; - auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; - bool update_descriptors = false; + // NOTE2: + // To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available - const auto& binding_table = m_device->get_pipeline_binding_table(); + compare_enabled = VK_TRUE; + depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true); + } + } - if (sub_index == 0) - { - update_descriptors = true; + const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0; + const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso()); + const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()); + const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()); + const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()); + const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()); - // Allocate stream layout memory for this batch - m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128; - m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range); + // Check if non-point filtering can even be used on this format + bool can_sample_linear; + if (sampler_state->format_class == rsx::format_type::color) [[likely]] + { + // Most PS3-like formats can be linearly filtered without problem + can_sample_linear = true; + } + else + { + // Not all GPUs support linear filtering of depth formats + const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() : + vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format); - if (vk::test_status_interrupt(vk::heap_changed)) - { - if (m_vertex_layout_storage && - m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value) - { - m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage)); - } + can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; + } - vk::clear_status_interrupt(vk::heap_changed); - } - } - else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) - { - // Need to update descriptors; make a copy for the next draw - VkDescriptorSet new_descriptor_set = allocate_descriptor_set(); - std::vector copy_set(binding_table.total_descriptor_bindings); + const auto mipmap_count = rsx::method_registers.fragment_textures[i].get_exact_mipmap_count(); + min_filter = vk::get_min_filter(rsx::method_registers.fragment_textures[i].min_filter()); - for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n) - { - copy_set[n] = - { - VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType - nullptr, // pNext - m_current_frame->descriptor_set, // srcSet - n, // srcBinding - 0u, // srcArrayElement - new_descriptor_set, // dstSet - n, // dstBinding - 0u, // dstArrayElement - 1u // descriptorCount - }; - } + if (can_sample_linear) + { + mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()); + } + else + { + mag_filter = VK_FILTER_NEAREST; + min_filter.filter = VK_FILTER_NEAREST; + } - vkUpdateDescriptorSets(*m_device, 0, 0, binding_table.total_descriptor_bindings, copy_set.data()); - m_current_frame->descriptor_set = new_descriptor_set; + if (min_filter.sample_mipmaps && mipmap_count > 1) + { + f32 actual_mipmaps; + if (sampler_state->upload_context == rsx::texture_upload_context::shader_read) + { + actual_mipmaps = static_cast(mipmap_count); + } + else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather) + { + // Clamp min and max lod + actual_mipmaps = static_cast(sampler_state->external_subresource_desc.sections_to_copy.size()); + } + else + { + actual_mipmaps = 1.f; + } - update_descriptors = true; - } + if (actual_mipmaps > 1.f) + { + min_lod = rsx::method_registers.fragment_textures[i].min_lod(); + max_lod = rsx::method_registers.fragment_textures[i].max_lod(); + lod_bias = rsx::method_registers.fragment_textures[i].bias(); - // Update vertex fetch parameters - update_vertex_env(sub_index, upload_info); + min_lod = std::min(min_lod, actual_mipmaps - 1.f); + max_lod = std::min(max_lod, actual_mipmaps - 1.f); - verify(HERE), m_vertex_layout_storage; - if (update_descriptors) - { - m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set); - m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set); - m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); - } + if (min_filter.mipmap_mode == VK_SAMPLER_MIPMAP_MODE_NEAREST) + { + // Round to nearest 0.5 to work around some broken games + // Unlike openGL, sampler parameters cannot be dynamically changed on vulkan, leading to many permutations + lod_bias = std::floor(lod_bias * 2.f + 0.5f) * 0.5f; + } + } + else + { + min_lod = max_lod = lod_bias = 0.f; + min_filter.mipmap_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST; + } + } - if (!m_current_subdraw_id++) - { - vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); - update_draw_state(); - begin_render_pass(); + if (fs_sampler_handles[i] && m_textures_dirty[i]) + { + if (!fs_sampler_handles[i]->matches( + wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode)) + { + replace = true; + } + } - if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support()) - { - // It is inconvenient that conditional rendering breaks other things like compute dispatch - // TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch - VkConditionalRenderingBeginInfoEXT info{}; - info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; - info.buffer = m_cond_render_buffer->value; + if (replace) + { + fs_sampler_handles[i] = vk::get_resource_manager()->find_sampler( + *m_device, wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode); + } + } + else + { + *sampler_state = {}; + } - m_device->cmdBeginConditionalRenderingEXT(*m_current_command_buffer, &info); - m_current_command_buffer->flags |= vk::command_buffer::cb_has_conditional_render; + m_textures_dirty[i] = false; } } - // Bind the new set of descriptors for use with this draw call - vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); + for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) + { + if (!vs_sampler_state[i]) + vs_sampler_state[i] = std::make_unique(); - m_frame_stats.setup_time += m_profiler.duration(); + auto sampler_state = static_cast(vs_sampler_state[i].get()); + const auto& tex = rsx::method_registers.vertex_textures[i]; - if (!upload_info.index_info) - { - if (draw_call.is_single_draw()) - { - vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0); - } - else + if (m_samplers_dirty || m_vertex_textures_dirty[i] || !check_surface_cache_sampler(sampler_state, tex)) { - u32 vertex_offset = 0; - const auto subranges = draw_call.get_subranges(); - for (const auto &range : subranges) + if (rsx::method_registers.vertex_textures[i].enabled()) { - vkCmdDraw(*m_current_command_buffer, range.count, 1, vertex_offset, 0); - vertex_offset += range.count; + check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); + *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); + + if (sampler_state->is_cyclic_reference || sampler_state->external_subresource_desc.do_not_cache) + { + check_for_cyclic_refs |= true; + } + + bool replace = !vs_sampler_handles[i]; + const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN); + const auto min_lod = rsx::method_registers.vertex_textures[i].min_lod(); + const auto max_lod = rsx::method_registers.vertex_textures[i].max_lod(); + const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color()); + + if (vs_sampler_handles[i]) + { + if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, unnormalized_coords, 0.f, 1.f, min_lod, max_lod, + VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color)) + { + replace = true; + } + } + + if (replace) + { + vs_sampler_handles[i] = vk::get_resource_manager()->find_sampler(*m_device, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, unnormalized_coords, + 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color); + } } + else + *sampler_state = {}; + + m_vertex_textures_dirty[i] = false; } } - else - { - const VkIndexType index_type = std::get<1>(*upload_info.index_info); - const VkDeviceSize offset = std::get<0>(*upload_info.index_info); - vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); + m_samplers_dirty.store(false); - if (rsx::method_registers.current_draw_clause.is_single_draw()) - { - const u32 index_count = upload_info.vertex_draw_count; - vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); - } - else + if (check_for_cyclic_refs) + { + // Regenerate renderpass key + if (const auto key = vk::get_renderpass_key(m_fbo_images, m_current_renderpass_key); key != m_current_renderpass_key) { - u32 vertex_offset = 0; - const auto subranges = draw_call.get_subranges(); - for (const auto &range : subranges) - { - const auto count = get_index_count(draw_call.primitive, range.count); - vkCmdDrawIndexed(*m_current_command_buffer, count, 1, vertex_offset, 0, 0); - vertex_offset += count; - } + m_current_renderpass_key = key; + m_cached_renderpass = VK_NULL_HANDLE; } } - - m_frame_stats.draw_exec_time += m_profiler.duration(); } -void VKGSRender::end() +void VKGSRender::bind_texture_env() { - if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || cond_render_ctrl.disable_rendering()) - { - execute_nop_draw(); - rsx::thread::end(); - return; - } - - // Check for frame resource status here because it is possible for an async flip to happen between begin/end - if (m_current_frame->flags & frame_context_state::dirty) [[unlikely]] + for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) { - check_present_status(); - - if (m_current_frame->swap_command_buffer) [[unlikely]] - { - // Borrow time by using the auxilliary context - m_aux_frame_context.grab_resources(*m_current_frame); - m_current_frame = &m_aux_frame_context; - } - else if (m_current_frame->used_descriptors) + if (current_fp_metadata.referenced_textures_mask & (1 << i)) { - m_current_frame->descriptor_pool.reset(0); - m_current_frame->used_descriptors = 0; - } - - verify(HERE), !m_current_frame->swap_command_buffer; - - m_current_frame->flags &= ~frame_context_state::dirty; - } + vk::image_view* view = nullptr; + auto sampler_state = static_cast(fs_sampler_state[i].get()); - m_profiler.start(); - - // Check for data casts - // NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better - auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); - if (ds && ds->old_contents.size() == 1 && - ds->old_contents[0].source->info.format == VK_FORMAT_B8G8R8A8_UNORM) - { - auto key = vk::get_renderpass_key(ds->info.format); - auto render_pass = vk::get_renderpass(*m_device, key); - verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE; - - VkClearDepthStencilValue clear = { 1.f, 0xFF }; - VkImageSubresourceRange range = { VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1 }; - - // Initialize source - auto src = vk::as_rtt(ds->old_contents[0].source); - src->read_barrier(*m_current_command_buffer); - - switch (src->current_layout) - { - case VK_IMAGE_LAYOUT_GENERAL: - case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: - break; - //case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - default: - src->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); - break; - } - - // Clear explicitly before starting the inheritance transfer - const bool preinitialized = (ds->current_layout == VK_IMAGE_LAYOUT_GENERAL); - if (!preinitialized) ds->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); - vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range); - if (!preinitialized) ds->pop_layout(*m_current_command_buffer); - - // TODO: Stencil transfer - ds->old_contents[0].init_transfer(ds); - m_depth_converter->run(*m_current_command_buffer, - ds->old_contents[0].src_rect(), - ds->old_contents[0].dst_rect(), - src->get_view(0xAAE4, rsx::default_remap_vector), - ds, render_pass); - - // TODO: Flush management to avoid pass running out of ubo space (very unlikely) - ds->on_write(); - } - - //Load textures - { - std::lock_guard lock(m_sampler_mutex); - bool update_framebuffer_sourced = false; - bool check_for_cyclic_refs = false; - - if (surface_store_tag != m_rtts.cache_tag) [[unlikely]] - { - update_framebuffer_sourced = true; - surface_store_tag = m_rtts.cache_tag; - } - - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - if (!fs_sampler_state[i]) - fs_sampler_state[i] = std::make_unique(); - - if (m_samplers_dirty || m_textures_dirty[i] || - (update_framebuffer_sourced && fs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) - { - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled()) - { - check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); - *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.fragment_textures[i], m_rtts); - - if (sampler_state->is_cyclic_reference) - { - check_for_cyclic_refs |= true; - } - - bool replace = !fs_sampler_handles[i]; - VkFilter mag_filter; - vk::minification_filter min_filter; - f32 min_lod = 0.f, max_lod = 0.f; - f32 lod_bias = 0.f; - - const u32 texture_format = rsx::method_registers.fragment_textures[i].format() & ~(CELL_GCM_TEXTURE_UN | CELL_GCM_TEXTURE_LN); - VkBool32 compare_enabled = VK_FALSE; - VkCompareOp depth_compare_mode = VK_COMPARE_OP_NEVER; - - if (texture_format >= CELL_GCM_TEXTURE_DEPTH24_D8 && texture_format <= CELL_GCM_TEXTURE_DEPTH16_FLOAT) - { - if (m_device->get_formats_support().d24_unorm_s8) - { - // NOTE: - // The nvidia-specific format D24S8 has a special way of doing depth comparison that matches the PS3 - // In case of projected shadow lookup the result of the divide operation has its Z clamped to [0-1] before comparison - // Most other wide formats (Z bits > 16) do not behave this way and depth greater than 1 is possible due to the use of floating point as storage - // Compare operations for these formats (such as D32_SFLOAT) are therefore emulated for correct results - - // NOTE2: - // To improve reusability, DEPTH16 shadow ops are also emulated if D24S8 support is not available - - compare_enabled = VK_TRUE; - depth_compare_mode = vk::get_compare_func(rsx::method_registers.fragment_textures[i].zfunc(), true); - } - } - - const bool aniso_override = !g_cfg.video.strict_rendering_mode && g_cfg.video.anisotropic_level_override > 0; - const f32 af_level = aniso_override ? g_cfg.video.anisotropic_level_override : vk::max_aniso(rsx::method_registers.fragment_textures[i].max_aniso()); - const auto wrap_s = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_s()); - const auto wrap_t = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_t()); - const auto wrap_r = vk::vk_wrap_mode(rsx::method_registers.fragment_textures[i].wrap_r()); - const auto border_color = vk::get_border_color(rsx::method_registers.fragment_textures[i].border_color()); - - // Check if non-point filtering can even be used on this format - bool can_sample_linear; - if (sampler_state->format_class == rsx::format_type::color) [[likely]] - { - // Most PS3-like formats can be linearly filtered without problem - can_sample_linear = true; - } - else - { - // Not all GPUs support linear filtering of depth formats - const auto vk_format = sampler_state->image_handle ? sampler_state->image_handle->image()->format() : - vk::get_compatible_sampler_format(m_device->get_formats_support(), sampler_state->external_subresource_desc.gcm_format); - - can_sample_linear = m_device->get_format_properties(vk_format).optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT; - } - - const auto mipmap_count = rsx::method_registers.fragment_textures[i].get_exact_mipmap_count(); - min_filter = vk::get_min_filter(rsx::method_registers.fragment_textures[i].min_filter()); - - if (can_sample_linear) - { - mag_filter = vk::get_mag_filter(rsx::method_registers.fragment_textures[i].mag_filter()); - } - else - { - mag_filter = VK_FILTER_NEAREST; - min_filter.filter = VK_FILTER_NEAREST; - } - - if (min_filter.sample_mipmaps && mipmap_count > 1) - { - f32 actual_mipmaps; - if (sampler_state->upload_context == rsx::texture_upload_context::shader_read) - { - actual_mipmaps = static_cast(mipmap_count); - } - else if (sampler_state->external_subresource_desc.op == rsx::deferred_request_command::mipmap_gather) - { - // Clamp min and max lod - actual_mipmaps = static_cast(sampler_state->external_subresource_desc.sections_to_copy.size()); - } - else - { - actual_mipmaps = 1.f; - } - - if (actual_mipmaps > 1.f) - { - min_lod = rsx::method_registers.fragment_textures[i].min_lod(); - max_lod = rsx::method_registers.fragment_textures[i].max_lod(); - lod_bias = rsx::method_registers.fragment_textures[i].bias(); - - min_lod = std::min(min_lod, actual_mipmaps - 1.f); - max_lod = std::min(max_lod, actual_mipmaps - 1.f); - - if (min_filter.mipmap_mode == VK_SAMPLER_MIPMAP_MODE_NEAREST) - { - // Round to nearest 0.5 to work around some broken games - // Unlike openGL, sampler parameters cannot be dynamically changed on vulkan, leading to many permutations - lod_bias = std::floor(lod_bias * 2.f + 0.5f) * 0.5f; - } - } - else - { - min_lod = max_lod = lod_bias = 0.f; - min_filter.mipmap_mode = VK_SAMPLER_MIPMAP_MODE_NEAREST; - } - } - - if (fs_sampler_handles[i] && m_textures_dirty[i]) - { - if (!fs_sampler_handles[i]->matches(wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, - min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode)) - { - replace = true; - } - } - - if (replace) - { - fs_sampler_handles[i] = vk::get_resource_manager()->find_sampler(*m_device, wrap_s, wrap_t, wrap_r, false, lod_bias, af_level, min_lod, max_lod, - min_filter.filter, mag_filter, min_filter.mipmap_mode, border_color, compare_enabled, depth_compare_mode); - } - } - else - { - *sampler_state = {}; - } - - m_textures_dirty[i] = false; - } - } - - for (int i = 0; i < rsx::limits::vertex_textures_count; ++i) - { - if (!vs_sampler_state[i]) - vs_sampler_state[i] = std::make_unique(); - - if (m_samplers_dirty || m_vertex_textures_dirty[i] || - (update_framebuffer_sourced && vs_sampler_state[i]->upload_context == rsx::texture_upload_context::framebuffer_storage)) - { - auto sampler_state = static_cast(vs_sampler_state[i].get()); - - if (rsx::method_registers.vertex_textures[i].enabled()) - { - check_heap_status(VK_HEAP_CHECK_TEXTURE_UPLOAD_STORAGE); - *sampler_state = m_texture_cache.upload_texture(*m_current_command_buffer, rsx::method_registers.vertex_textures[i], m_rtts); - - if (sampler_state->is_cyclic_reference || sampler_state->external_subresource_desc.do_not_cache) - { - check_for_cyclic_refs |= true; - } - - bool replace = !vs_sampler_handles[i]; - const VkBool32 unnormalized_coords = !!(rsx::method_registers.vertex_textures[i].format() & CELL_GCM_TEXTURE_UN); - const auto min_lod = rsx::method_registers.vertex_textures[i].min_lod(); - const auto max_lod = rsx::method_registers.vertex_textures[i].max_lod(); - const auto border_color = vk::get_border_color(rsx::method_registers.vertex_textures[i].border_color()); - - if (vs_sampler_handles[i]) - { - if (!vs_sampler_handles[i]->matches(VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, - unnormalized_coords, 0.f, 1.f, min_lod, max_lod, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color)) - { - replace = true; - } - } - - if (replace) - { - vs_sampler_handles[i] = vk::get_resource_manager()->find_sampler( - *m_device, - VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT, - unnormalized_coords, - 0.f, 1.f, min_lod, max_lod, - VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, border_color); - } - } - else - *sampler_state = {}; - - m_vertex_textures_dirty[i] = false; - } - } - - m_samplers_dirty.store(false); - - if (check_for_cyclic_refs) - { - // Regenerate renderpass key - if (const auto key = vk::get_renderpass_key(m_fbo_images, m_current_renderpass_key); - key != m_current_renderpass_key) - { - m_current_renderpass_key = key; - m_cached_renderpass = VK_NULL_HANDLE; - } - } - } - - m_frame_stats.textures_upload_time += m_profiler.duration(); - - if (!load_program()) - { - // Program is not ready, skip drawing this - std::this_thread::yield(); - execute_nop_draw(); - // m_rtts.on_write(); - breaks games for obvious reasons - rsx::thread::end(); - return; - } - - // Allocate descriptor set - check_descriptors(); - m_current_frame->descriptor_set = allocate_descriptor_set(); - - // Load program execution environment - load_program_env(); - - m_frame_stats.setup_time += m_profiler.duration(); - - for (int i = 0; i < rsx::limits::fragment_textures_count; ++i) - { - if (current_fp_metadata.referenced_textures_mask & (1 << i)) - { - vk::image_view* view = nullptr; - auto sampler_state = static_cast(fs_sampler_state[i].get()); - - if (rsx::method_registers.fragment_textures[i].enabled() && - sampler_state->validate()) + if (rsx::method_registers.fragment_textures[i].enabled() && sampler_state->validate()) { if (view = sampler_state->image_handle; !view) { - //Requires update, copy subresource + // Requires update, copy subresource view = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); } else @@ -1626,7 +1341,7 @@ void VKGSRender::end() switch (auto raw = view->image(); raw->current_layout) { default: - //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: break; case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; @@ -1646,29 +1361,24 @@ void VKGSRender::end() VkAccessFlags src_access; if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) { - src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; } else { - src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; } - vk::insert_image_memory_barrier( - *m_current_command_buffer, - raw->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, - src_access, VK_ACCESS_SHADER_READ_BIT, - { raw->aspect(), 0, 1, 0, 1 }); + vk::insert_image_memory_barrier(*m_current_command_buffer, raw->value, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, src_stage, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + src_access, VK_ACCESS_SHADER_READ_BIT, {raw->aspect(), 0, 1, 0, 1}); raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } break; case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: - verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage, !sampler_state->is_cyclic_reference; + verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::framebuffer_storage; raw->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); break; } @@ -1677,49 +1387,34 @@ void VKGSRender::end() if (view) [[likely]] { - m_program->bind_uniform({ fs_sampler_handles[i]->value, view->value, view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); + m_program->bind_uniform({fs_sampler_handles[i]->value, view->value, view->image()->current_layout}, i, ::glsl::program_domain::glsl_fragment_program, m_current_frame->descriptor_set); if (current_fragment_program.redirected_textures & (1 << i)) { // Stencil mirror required - auto root_image = static_cast(view->image()); + auto root_image = static_cast(view->image()); auto stencil_view = root_image->get_view(0xAAE4, rsx::default_remap_vector, VK_IMAGE_ASPECT_STENCIL_BIT); if (!m_stencil_mirror_sampler) { - m_stencil_mirror_sampler = std::make_unique(*m_device, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, - VK_FALSE, 0.f, 1.f, 0.f, 0.f, - VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, - VK_BORDER_COLOR_INT_OPAQUE_BLACK); + m_stencil_mirror_sampler = std::make_unique(*m_device, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER, + VK_FALSE, 0.f, 1.f, 0.f, 0.f, VK_FILTER_NEAREST, VK_FILTER_NEAREST, VK_SAMPLER_MIPMAP_MODE_NEAREST, VK_BORDER_COLOR_INT_OPAQUE_BLACK); } - m_program->bind_uniform({ m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, - true); + m_program->bind_uniform( + {m_stencil_mirror_sampler->value, stencil_view->value, stencil_view->image()->current_layout}, i, ::glsl::program_domain::glsl_fragment_program, m_current_frame->descriptor_set, true); } } else { - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set); + const VkImageViewType view_type = vk::get_view_type(current_fragment_program.get_texture_dimension(i)); + m_program->bind_uniform({vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, i, + ::glsl::program_domain::glsl_fragment_program, m_current_frame->descriptor_set); if (current_fragment_program.redirected_textures & (1 << i)) { - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_fragment_program, - m_current_frame->descriptor_set, - true); + m_program->bind_uniform({vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, i, + ::glsl::program_domain::glsl_fragment_program, m_current_frame->descriptor_set, true); } } } @@ -1731,30 +1426,29 @@ void VKGSRender::end() { if (!rsx::method_registers.vertex_textures[i].enabled()) { - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); + m_program->bind_uniform({vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, i, + ::glsl::program_domain::glsl_vertex_program, m_current_frame->descriptor_set); continue; } auto sampler_state = static_cast(vs_sampler_state[i].get()); - auto image_ptr = sampler_state->image_handle; + auto image_ptr = sampler_state->image_handle; if (!image_ptr && sampler_state->validate()) { - image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); + image_ptr = m_texture_cache.create_temporary_subresource(*m_current_command_buffer, sampler_state->external_subresource_desc); m_vertex_textures_dirty[i] = true; } if (!image_ptr) { rsx_log.error("Texture upload failed to vtexture index %d. Binding null sampler.", i); - m_program->bind_uniform({ vk::null_sampler(), vk::null_image_view(*m_current_command_buffer)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + const auto view_type = vk::get_view_type(current_vertex_program.get_texture_dimension(i)); + + m_program->bind_uniform({vk::null_sampler(), vk::null_image_view(*m_current_command_buffer, view_type)->value, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL}, i, + ::glsl::program_domain::glsl_vertex_program, m_current_frame->descriptor_set); continue; } @@ -1762,7 +1456,7 @@ void VKGSRender::end() switch (auto raw = image_ptr->image(); raw->current_layout) { default: - //case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: + // case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: break; case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: verify(HERE), sampler_state->upload_context == rsx::texture_upload_context::blit_engine_dst; @@ -1781,22 +1475,17 @@ void VKGSRender::end() VkAccessFlags src_access; if (raw->aspect() == VK_IMAGE_ASPECT_COLOR_BIT) { - src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + src_stage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; src_access = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; } else { - src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + src_stage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; src_access = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; } - vk::insert_image_memory_barrier( - *m_current_command_buffer, - raw->value, - VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, - src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, - src_access, VK_ACCESS_SHADER_READ_BIT, - { raw->aspect(), 0, 1, 0, 1 }); + vk::insert_image_memory_barrier(*m_current_command_buffer, raw->value, VK_IMAGE_LAYOUT_GENERAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, src_stage, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT, + src_access, VK_ACCESS_SHADER_READ_BIT, {raw->aspect(), 0, 1, 0, 1}); raw->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; } @@ -1808,15 +1497,306 @@ void VKGSRender::end() break; } - m_program->bind_uniform({ vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout }, - i, - ::glsl::program_domain::glsl_vertex_program, - m_current_frame->descriptor_set); + m_program->bind_uniform({vs_sampler_handles[i]->value, image_ptr->value, image_ptr->image()->current_layout}, i, ::glsl::program_domain::glsl_vertex_program, m_current_frame->descriptor_set); } } +} - m_texture_cache.release_uncached_temporary_subresources(); +void VKGSRender::begin_render_pass() +{ + vk::begin_renderpass(*m_current_command_buffer, get_render_pass(), m_draw_fbo->value, {positionu{0u, 0u}, sizeu{m_draw_fbo->width(), m_draw_fbo->height()}}); +} + +void VKGSRender::close_render_pass() +{ + vk::end_renderpass(*m_current_command_buffer); +} + +VkRenderPass VKGSRender::get_render_pass() +{ + if (!m_cached_renderpass) + { + m_cached_renderpass = vk::get_renderpass(*m_device, m_current_renderpass_key); + } + + return m_cached_renderpass; +} + +void VKGSRender::emit_geometry(u32 sub_index) +{ + auto &draw_call = rsx::method_registers.current_draw_clause; + m_profiler.start(); + + if (sub_index == 0) + { + analyse_inputs_interleaved(m_vertex_layout); + + if (!m_vertex_layout.validate()) + { + // No vertex inputs enabled + // Execute remainining pipeline barriers with NOP draw + do + { + draw_call.execute_pipeline_dependencies(); + } + while (draw_call.next()); + + draw_call.end(); + return; + } + } + else if (draw_call.execute_pipeline_dependencies() & rsx::vertex_base_changed) + { + // Rebase vertex bases instead of + for (auto &info : m_vertex_layout.interleaved_blocks) + { + const auto vertex_base_offset = rsx::method_registers.vertex_data_base_offset(); + info.real_offset_address = rsx::get_address(rsx::get_vertex_offset_from_base(vertex_base_offset, info.base_offset), info.memory_location); + } + } + const auto old_persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; + const auto old_volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; + + // Programs data is dependent on vertex state + auto upload_info = upload_vertex_data(); + if (!upload_info.vertex_draw_count) + { + // Malformed vertex setup; abort + return; + } + + m_frame_stats.vertex_upload_time += m_profiler.duration(); + + auto persistent_buffer = m_persistent_attribute_storage ? m_persistent_attribute_storage->value : null_buffer_view->value; + auto volatile_buffer = m_volatile_attribute_storage ? m_volatile_attribute_storage->value : null_buffer_view->value; + bool update_descriptors = false; + + const auto& binding_table = m_device->get_pipeline_binding_table(); + + if (sub_index == 0) + { + update_descriptors = true; + + // Allocate stream layout memory for this batch + m_vertex_layout_stream_info.range = rsx::method_registers.current_draw_clause.pass_count() * 128; + m_vertex_layout_stream_info.offset = m_vertex_layout_ring_info.alloc<256>(m_vertex_layout_stream_info.range); + + if (vk::test_status_interrupt(vk::heap_changed)) + { + if (m_vertex_layout_storage && + m_vertex_layout_storage->info.buffer != m_vertex_layout_ring_info.heap->value) + { + m_current_frame->buffer_views_to_clean.push_back(std::move(m_vertex_layout_storage)); + } + + vk::clear_status_interrupt(vk::heap_changed); + } + } + else if (persistent_buffer != old_persistent_buffer || volatile_buffer != old_volatile_buffer) + { + // Need to update descriptors; make a copy for the next draw + VkDescriptorSet new_descriptor_set = allocate_descriptor_set(); + std::vector copy_set(binding_table.total_descriptor_bindings); + + for (u32 n = 0; n < binding_table.total_descriptor_bindings; ++n) + { + copy_set[n] = + { + VK_STRUCTURE_TYPE_COPY_DESCRIPTOR_SET, // sType + nullptr, // pNext + m_current_frame->descriptor_set, // srcSet + n, // srcBinding + 0u, // srcArrayElement + new_descriptor_set, // dstSet + n, // dstBinding + 0u, // dstArrayElement + 1u // descriptorCount + }; + } + + vkUpdateDescriptorSets(*m_device, 0, 0, binding_table.total_descriptor_bindings, copy_set.data()); + m_current_frame->descriptor_set = new_descriptor_set; + + update_descriptors = true; + } + + // Update vertex fetch parameters + update_vertex_env(sub_index, upload_info); + + verify(HERE), m_vertex_layout_storage; + if (update_descriptors) + { + m_program->bind_uniform(persistent_buffer, binding_table.vertex_buffers_first_bind_slot, m_current_frame->descriptor_set); + m_program->bind_uniform(volatile_buffer, binding_table.vertex_buffers_first_bind_slot + 1, m_current_frame->descriptor_set); + m_program->bind_uniform(m_vertex_layout_storage->value, binding_table.vertex_buffers_first_bind_slot + 2, m_current_frame->descriptor_set); + } + + if (!m_current_subdraw_id++) + { + vkCmdBindPipeline(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, m_program->pipeline); + update_draw_state(); + begin_render_pass(); + + if (cond_render_ctrl.hw_cond_active && m_device->get_conditional_render_support()) + { + // It is inconvenient that conditional rendering breaks other things like compute dispatch + // TODO: If this is heavy, add refactor the resources into global and add checks around compute dispatch + VkConditionalRenderingBeginInfoEXT info{}; + info.sType = VK_STRUCTURE_TYPE_CONDITIONAL_RENDERING_BEGIN_INFO_EXT; + info.buffer = m_cond_render_buffer->value; + + m_device->cmdBeginConditionalRenderingEXT(*m_current_command_buffer, &info); + m_current_command_buffer->flags |= vk::command_buffer::cb_has_conditional_render; + } + } + + // Bind the new set of descriptors for use with this draw call + vkCmdBindDescriptorSets(*m_current_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, 0, 1, &m_current_frame->descriptor_set, 0, nullptr); + + m_frame_stats.setup_time += m_profiler.duration(); + + if (!upload_info.index_info) + { + if (draw_call.is_single_draw()) + { + vkCmdDraw(*m_current_command_buffer, upload_info.vertex_draw_count, 1, 0, 0); + } + else + { + u32 vertex_offset = 0; + const auto subranges = draw_call.get_subranges(); + for (const auto &range : subranges) + { + vkCmdDraw(*m_current_command_buffer, range.count, 1, vertex_offset, 0); + vertex_offset += range.count; + } + } + } + else + { + const VkIndexType index_type = std::get<1>(*upload_info.index_info); + const VkDeviceSize offset = std::get<0>(*upload_info.index_info); + + vkCmdBindIndexBuffer(*m_current_command_buffer, m_index_buffer_ring_info.heap->value, offset, index_type); + + if (rsx::method_registers.current_draw_clause.is_single_draw()) + { + const u32 index_count = upload_info.vertex_draw_count; + vkCmdDrawIndexed(*m_current_command_buffer, index_count, 1, 0, 0, 0); + } + else + { + u32 vertex_offset = 0; + const auto subranges = draw_call.get_subranges(); + for (const auto &range : subranges) + { + const auto count = get_index_count(draw_call.primitive, range.count); + vkCmdDrawIndexed(*m_current_command_buffer, count, 1, vertex_offset, 0, 0); + vertex_offset += count; + } + } + } + + m_frame_stats.draw_exec_time += m_profiler.duration(); +} + +void VKGSRender::end() +{ + if (skip_current_frame || !framebuffer_status_valid || swapchain_unavailable || cond_render_ctrl.disable_rendering()) + { + execute_nop_draw(); + rsx::thread::end(); + return; + } + + // Check for frame resource status here because it is possible for an async flip to happen between begin/end + if (m_current_frame->flags & frame_context_state::dirty) [[unlikely]] + { + check_present_status(); + + if (m_current_frame->swap_command_buffer) [[unlikely]] + { + // Borrow time by using the auxilliary context + m_aux_frame_context.grab_resources(*m_current_frame); + m_current_frame = &m_aux_frame_context; + } + else if (m_current_frame->used_descriptors) + { + m_current_frame->descriptor_pool.reset(0); + m_current_frame->used_descriptors = 0; + } + + verify(HERE), !m_current_frame->swap_command_buffer; + + m_current_frame->flags &= ~frame_context_state::dirty; + } + + m_profiler.start(); + + // Check for data casts + // NOTE: This is deprecated and will be removed soon. The memory barrier invoked before rendering does this better + auto ds = std::get<1>(m_rtts.m_bound_depth_stencil); + if (ds && ds->old_contents.size() == 1 && ds->old_contents[0].source->info.format == VK_FORMAT_B8G8R8A8_UNORM) + { + auto key = vk::get_renderpass_key(ds->info.format); + auto render_pass = vk::get_renderpass(*m_device, key); + verify("Usupported renderpass configuration" HERE), render_pass != VK_NULL_HANDLE; + + VkClearDepthStencilValue clear = {1.f, 0xFF}; + VkImageSubresourceRange range = {VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, 0, 1, 0, 1}; + + // Initialize source + auto src = vk::as_rtt(ds->old_contents[0].source); + src->read_barrier(*m_current_command_buffer); + + switch (src->current_layout) + { + case VK_IMAGE_LAYOUT_GENERAL: + case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: break; + // case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: + default: src->change_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL); break; + } + + // Clear explicitly before starting the inheritance transfer + const bool preinitialized = (ds->current_layout == VK_IMAGE_LAYOUT_GENERAL); + if (!preinitialized) + ds->push_layout(*m_current_command_buffer, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL); + vkCmdClearDepthStencilImage(*m_current_command_buffer, ds->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear, 1, &range); + if (!preinitialized) + ds->pop_layout(*m_current_command_buffer); + + // TODO: Stencil transfer + ds->old_contents[0].init_transfer(ds); + m_depth_converter->run(*m_current_command_buffer, ds->old_contents[0].src_rect(), ds->old_contents[0].dst_rect(), src->get_view(0xAAE4, rsx::default_remap_vector), ds, render_pass); + + // TODO: Flush management to avoid pass running out of ubo space (very unlikely) + ds->on_write(); + } + + load_texture_env(); + m_frame_stats.textures_upload_time += m_profiler.duration(); + + if (!load_program()) + { + // Program is not ready, skip drawing this + std::this_thread::yield(); + execute_nop_draw(); + // m_rtts.on_write(); - breaks games for obvious reasons + rsx::thread::end(); + return; + } + + // Allocate descriptor set + check_descriptors(); + m_current_frame->descriptor_set = allocate_descriptor_set(); + + // Load program execution environment + load_program_env(); + m_frame_stats.setup_time += m_profiler.duration(); + + bind_texture_env(); + m_texture_cache.release_uncached_temporary_subresources(); m_frame_stats.textures_upload_time += m_profiler.duration(); if (m_current_command_buffer->flags & vk::command_buffer::cb_load_occluson_task) @@ -1831,15 +1811,16 @@ void VKGSRender::end() occlusion_id = m_occlusion_query_pool.find_free_slot(); if (occlusion_id == UINT32_MAX) { - //rsx_log.error("Occlusion pool overflow"); - if (m_current_task) m_current_task->result = 1; + // rsx_log.error("Occlusion pool overflow"); + if (m_current_task) + m_current_task->result = 1; } } // Begin query m_occlusion_query_pool.begin_query(*m_current_command_buffer, occlusion_id); - auto &data = m_occlusion_map[m_active_query_info->driver_handle]; + auto& data = m_occlusion_map[m_active_query_info->driver_handle]; data.indices.push_back(occlusion_id); data.set_sync_command_buffer(m_current_command_buffer); @@ -1851,9 +1832,10 @@ void VKGSRender::end() vk::get_appropriate_topology(rsx::method_registers.current_draw_clause.primitive, primitive_emulated); // Apply write memory barriers - if (ds) ds->write_barrier(*m_current_command_buffer); + if (ds) + ds->write_barrier(*m_current_command_buffer); - for (auto &rtt : m_rtts.m_bound_render_targets) + for (auto& rtt : m_rtts.m_bound_render_targets) { if (auto surface = std::get<1>(rtt)) { @@ -1864,15 +1846,14 @@ void VKGSRender::end() // Final heap check... check_heap_status(VK_HEAP_CHECK_VERTEX_STORAGE | VK_HEAP_CHECK_VERTEX_LAYOUT_STORAGE); - u32 sub_index = 0; + u32 sub_index = 0; m_current_subdraw_id = 0; rsx::method_registers.current_draw_clause.begin(); do { emit_geometry(sub_index++); - } - while (rsx::method_registers.current_draw_clause.next()); + } while (rsx::method_registers.current_draw_clause.next()); if (m_current_command_buffer->flags & vk::command_buffer::cb_has_conditional_render) { diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.h b/rpcs3/Emu/RSX/VK/VKGSRender.h index a62a373c426d..a013242cb095 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.h +++ b/rpcs3/Emu/RSX/VK/VKGSRender.h @@ -368,7 +368,6 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control u64 m_cond_render_sync_tag = 0; shared_mutex m_sampler_mutex; - u64 surface_store_tag = 0; std::atomic_bool m_samplers_dirty = { true }; std::unique_ptr m_stencil_mirror_sampler; std::array, rsx::limits::fragment_textures_count> fs_sampler_state = {}; @@ -513,6 +512,9 @@ class VKGSRender : public GSRender, public ::rsx::reports::ZCULL_control void load_program_env(); void update_vertex_env(u32 id, const vk::vertex_upload_info& vertex_info); + void load_texture_env(); + void bind_texture_env(); + public: void init_buffers(rsx::framebuffer_creation_context context, bool skip_reading = false); void set_viewport(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.cpp b/rpcs3/Emu/RSX/VK/VKHelpers.cpp index c6029900536d..d92fb0434422 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.cpp +++ b/rpcs3/Emu/RSX/VK/VKHelpers.cpp @@ -68,8 +68,8 @@ namespace vk const render_device* g_current_renderer; std::unique_ptr g_null_texture; - std::unique_ptr g_null_image_view; std::unique_ptr g_scratch_buffer; + std::unordered_map> g_null_image_views; std::unordered_map> g_typeless_textures; std::unordered_map> g_compute_tasks; @@ -132,7 +132,7 @@ namespace vk bool data_heap::grow(size_t size) { // Create new heap. All sizes are aligned up by 64M, upto 1GiB - const size_t size_limit = 1024 * 0x100000; + const size_t size_limit = 1024 * 0x100000; const size_t aligned_new_size = align(m_size + size, 64 * 0x100000); if (aligned_new_size >= size_limit) @@ -148,7 +148,7 @@ namespace vk } // Wait for DMA activity to end - rsx::g_dma_manager.sync(); + g_fxo->get()->sync(); if (mapped) { @@ -158,11 +158,11 @@ namespace vk VkBufferUsageFlags usage = heap->info.usage; - const auto device = get_current_renderer(); + const auto device = get_current_renderer(); const auto& memory_map = device->get_memory_mapping(); VkFlags memory_flags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - auto memory_index = memory_map.host_visible_coherent; + auto memory_index = memory_map.host_visible_coherent; // Update heap information and reset the allocator ::data_heap::init(aligned_new_size, m_name, m_min_guard_size); @@ -290,26 +290,33 @@ namespace vk return g_null_sampler; } - vk::image_view* null_image_view(vk::command_buffer &cmd) + vk::image_view* null_image_view(vk::command_buffer& cmd, VkImageViewType type) { - if (g_null_image_view) - return g_null_image_view.get(); + if (auto found = g_null_image_views.find(type); found != g_null_image_views.end()) + { + return found->second.get(); + } - g_null_texture = std::make_unique(*g_current_renderer, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, 4, 4, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, - VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0); + if (!g_null_texture) + { + g_null_texture = std::make_unique(*g_current_renderer, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, VK_IMAGE_TYPE_2D, VK_FORMAT_B8G8R8A8_UNORM, + 4, 4, 1, 1, 1, VK_SAMPLE_COUNT_1_BIT, VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_TILING_OPTIMAL, VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT, 0); - g_null_image_view = std::make_unique(*g_current_renderer, g_null_texture.get()); + // Initialize memory to transparent black + VkClearColorValue clear_color = {}; + VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; + change_image_layout(cmd, g_null_texture.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range); + vkCmdClearColorImage(cmd, g_null_texture->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range); + + // Prep for shader access + change_image_layout(cmd, g_null_texture.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); + } - // Initialize memory to transparent black - VkClearColorValue clear_color = {}; - VkImageSubresourceRange range = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 }; - change_image_layout(cmd, g_null_texture.get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, range); - vkCmdClearColorImage(cmd, g_null_texture->value, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, &clear_color, 1, &range); + const VkComponentMapping mapping = {VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A}; + const VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}; - // Prep for shader access - change_image_layout(cmd, g_null_texture.get(), VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, range); - return g_null_image_view.get(); + auto& ret = g_null_image_views[type] = std::make_unique(*g_current_renderer, g_null_texture.get(), mapping, range, type); + return ret.get(); } vk::image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height) @@ -352,9 +359,8 @@ namespace vk // Choose optimal size const u64 alloc_size = std::max(64 * 0x100000, align(min_required_size, 0x100000)); - g_scratch_buffer = std::make_unique(*g_current_renderer, alloc_size, - g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0); + g_scratch_buffer = std::make_unique(*g_current_renderer, alloc_size, g_current_renderer->get_memory_mapping().device_local, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, 0); } return g_scratch_buffer.get(); @@ -397,7 +403,7 @@ namespace vk vk::get_resource_manager()->destroy(); g_null_texture.reset(); - g_null_image_view.reset(); + g_null_image_views.clear(); g_scratch_buffer.reset(); g_upload_heap.destroy(); diff --git a/rpcs3/Emu/RSX/VK/VKHelpers.h b/rpcs3/Emu/RSX/VK/VKHelpers.h index 6a9a97453bec..a9f098c59535 100644 --- a/rpcs3/Emu/RSX/VK/VKHelpers.h +++ b/rpcs3/Emu/RSX/VK/VKHelpers.h @@ -142,7 +142,7 @@ namespace vk VkImageAspectFlags get_aspect_flags(VkFormat format); VkSampler null_sampler(); - image_view* null_image_view(vk::command_buffer&); + image_view* null_image_view(vk::command_buffer& cmd, VkImageViewType type); image* get_typeless_helper(VkFormat format, u32 requested_width, u32 requested_height); buffer* get_scratch_buffer(u32 min_required_size = 0); data_heap* get_upload_heap(); @@ -1403,6 +1403,11 @@ namespace vk return m_storage_aspect; } + u32 layers() const + { + return info.arrayLayers; + } + void push_layout(VkCommandBuffer cmd, VkImageLayout layout) { m_layout_stack.push(current_layout); @@ -1457,32 +1462,37 @@ namespace vk image_view(VkDevice dev, vk::image* resource, const VkComponentMapping mapping = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A }, - const VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}) + const VkImageSubresourceRange range = {VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1}, VkImageViewType view_type = VK_IMAGE_VIEW_TYPE_MAX_ENUM) : m_device(dev), m_resource(resource) { - info.format = resource->info.format; - info.image = resource->value; - info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; - info.components = mapping; + info.format = resource->info.format; + info.image = resource->value; + info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO; + info.components = mapping; info.subresourceRange = range; - switch (resource->info.imageType) + if (view_type == VK_IMAGE_VIEW_TYPE_MAX_ENUM) { - case VK_IMAGE_TYPE_1D: - info.viewType = VK_IMAGE_VIEW_TYPE_1D; - break; - case VK_IMAGE_TYPE_2D: - if (resource->info.flags == VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) - info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; - else - info.viewType = VK_IMAGE_VIEW_TYPE_2D; - break; - case VK_IMAGE_TYPE_3D: - info.viewType = VK_IMAGE_VIEW_TYPE_3D; - break; - default: - ASSUME(0); - break; + switch (resource->info.imageType) + { + case VK_IMAGE_TYPE_1D: info.viewType = VK_IMAGE_VIEW_TYPE_1D; break; + case VK_IMAGE_TYPE_2D: + if (resource->info.flags == VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT) + info.viewType = VK_IMAGE_VIEW_TYPE_CUBE; + else if (resource->info.arrayLayers == 1) + info.viewType = VK_IMAGE_VIEW_TYPE_2D; + else + info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY; + break; + case VK_IMAGE_TYPE_3D: info.viewType = VK_IMAGE_VIEW_TYPE_3D; break; + default: ASSUME(0); break; + } + + info.subresourceRange.layerCount = resource->info.arrayLayers; + } + else + { + info.viewType = view_type; } create_impl(); diff --git a/rpcs3/Emu/RSX/VK/VKOverlays.h b/rpcs3/Emu/RSX/VK/VKOverlays.h index 2da0c3c7aab5..982d9d4461d6 100644 --- a/rpcs3/Emu/RSX/VK/VKOverlays.h +++ b/rpcs3/Emu/RSX/VK/VKOverlays.h @@ -815,6 +815,12 @@ namespace vk m_time = static_cast(get_system_time() / 1000) * 0.005f; m_viewport_size = { static_cast(viewport.width()), static_cast(viewport.height()) }; + std::vector image_views + { + vk::null_image_view(cmd, VK_IMAGE_VIEW_TYPE_2D), + vk::null_image_view(cmd, VK_IMAGE_VIEW_TYPE_2D_ARRAY) + }; + for (auto &command : ui.get_compiled().draw_commands) { num_drawable_elements = static_cast(command.verts.size()); @@ -831,7 +837,7 @@ namespace vk m_clip_region = command.config.clip_rect; m_texture_type = 1; - auto src = vk::null_image_view(cmd); + vk::image_view* src = nullptr; switch (command.config.texture_ref) { case rsx::overlays::image_resource_id::game_icon: @@ -852,7 +858,13 @@ namespace vk break; } - overlay_pass::run(cmd, viewport, target, { src }, render_pass); + if (src) + { + const int res_id = src->image()->layers() > 1 ? 1 : 0; + image_views[res_id] = src; + } + + overlay_pass::run(cmd, viewport, target, image_views, render_pass); } ui.update(); diff --git a/rpcs3/Emu/RSX/VK/VKTextureCache.h b/rpcs3/Emu/RSX/VK/VKTextureCache.h index 0cee3bfafb27..9b467ef31d78 100644 --- a/rpcs3/Emu/RSX/VK/VKTextureCache.h +++ b/rpcs3/Emu/RSX/VK/VKTextureCache.h @@ -1330,8 +1330,14 @@ namespace vk section.set_view_flags(expected_flags); } - void insert_texture_barrier(vk::command_buffer& cmd, vk::image* tex) override + void insert_texture_barrier(vk::command_buffer& cmd, vk::image* tex, bool strong_ordering) override { + if (!strong_ordering && tex->current_layout == VK_IMAGE_LAYOUT_GENERAL) + { + // A previous barrier already exists, do nothing + return; + } + vk::insert_texture_barrier(cmd, tex, VK_IMAGE_LAYOUT_GENERAL); }