diff options
Diffstat (limited to 'src/video_core')
18 files changed, 174 insertions, 111 deletions
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 081a574e8..f5b10411b 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -1192,11 +1192,6 @@ void BufferCache<P>::UpdateDrawIndirect() { .size = static_cast<u32>(size), .buffer_id = FindBuffer(*cpu_addr, static_cast<u32>(size)), }; - VAddr cpu_addr_start = Common::AlignDown(*cpu_addr, 64); - VAddr cpu_addr_end = Common::AlignUp(*cpu_addr + size, 64); - IntervalType interval{cpu_addr_start, cpu_addr_end}; - ClearDownload(interval); - common_ranges.subtract(interval); }; if (current_draw_indirect->include_count) { update(current_draw_indirect->count_start_address, sizeof(u32), diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp index 02e161270..91f10aec2 100644 --- a/src/video_core/engines/fermi_2d.cpp +++ b/src/video_core/engines/fermi_2d.cpp @@ -72,7 +72,7 @@ void Fermi2D::Blit() { UNIMPLEMENTED_IF_MSG(regs.clip_enable != 0, "Clipped blit enabled"); const auto& args = regs.pixels_from_memory; - constexpr s64 null_derivate = 1ULL << 32; + constexpr s64 null_derivative = 1ULL << 32; Surface src = regs.src; const auto bytes_per_pixel = BytesPerBlock(PixelFormatFromRenderTargetFormat(src.format)); const bool delegate_to_gpu = src.width > 512 && src.height > 512 && bytes_per_pixel <= 8 && @@ -89,7 +89,7 @@ void Fermi2D::Blit() { .operation = regs.operation, .filter = args.sample_mode.filter, .must_accelerate = - args.du_dx != null_derivate || args.dv_dy != null_derivate || delegate_to_gpu, + args.du_dx != null_derivative || args.dv_dy != null_derivative || delegate_to_gpu, .dst_x0 = args.dst_x0, .dst_y0 = args.dst_y0, .dst_x1 = args.dst_x0 + args.dst_width, diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 32d767d85..592c28ba3 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -268,7 +268,7 @@ size_t Maxwell3D::EstimateIndexBufferSize() { std::numeric_limits<u32>::max()}; const size_t byte_size = regs.index_buffer.FormatSizeInBytes(); const size_t log2_byte_size = Common::Log2Ceil64(byte_size); - const size_t cap{GetMaxCurrentVertices() * 3 * byte_size}; + const size_t cap{GetMaxCurrentVertices() * 4 * byte_size}; const size_t lower_cap = std::min<size_t>(static_cast<size_t>(end_address - start_address), cap); return std::min<size_t>( diff --git a/src/video_core/fence_manager.h b/src/video_core/fence_manager.h index c0e6471fe..805a89900 100644 --- a/src/video_core/fence_manager.h +++ b/src/video_core/fence_manager.h @@ -86,10 +86,7 @@ public: uncommitted_operations.emplace_back(std::move(func)); } pending_operations.emplace_back(std::move(uncommitted_operations)); - { - std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - QueueFence(new_fence); - } + QueueFence(new_fence); if (!delay_fence) { func(); } diff --git a/src/video_core/renderer_null/null_rasterizer.cpp b/src/video_core/renderer_null/null_rasterizer.cpp index 65cd5aa06..4f1d5b548 100644 --- a/src/video_core/renderer_null/null_rasterizer.cpp +++ b/src/video_core/renderer_null/null_rasterizer.cpp @@ -3,6 +3,7 @@ #include "common/alignment.h" #include "core/memory.h" +#include "video_core/control/channel_state.h" #include "video_core/host1x/host1x.h" #include "video_core/memory_manager.h" #include "video_core/renderer_null/null_rasterizer.h" @@ -99,8 +100,14 @@ bool RasterizerNull::AccelerateDisplay(const Tegra::FramebufferConfig& config, } void RasterizerNull::LoadDiskResources(u64 title_id, std::stop_token stop_loading, const VideoCore::DiskResourceLoadCallback& callback) {} -void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) {} -void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) {} -void RasterizerNull::ReleaseChannel(s32 channel_id) {} +void RasterizerNull::InitializeChannel(Tegra::Control::ChannelState& channel) { + CreateChannel(channel); +} +void RasterizerNull::BindChannel(Tegra::Control::ChannelState& channel) { + BindToChannel(channel.bind_id); +} +void RasterizerNull::ReleaseChannel(s32 channel_id) { + EraseChannel(channel_id); +} } // namespace Null diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index 44a771d65..af0a453ee 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -559,7 +559,9 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { } void GraphicsPipeline::ConfigureTransformFeedbackImpl() const { - glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), GL_SEPARATE_ATTRIBS); + const GLenum buffer_mode = + num_xfb_buffers_active == 1 ? GL_INTERLEAVED_ATTRIBS : GL_SEPARATE_ATTRIBS; + glTransformFeedbackAttribsNV(num_xfb_attribs, xfb_attribs.data(), buffer_mode); } void GraphicsPipeline::GenerateTransformFeedbackState() { @@ -567,12 +569,14 @@ void GraphicsPipeline::GenerateTransformFeedbackState() { // when this is required. GLint* cursor{xfb_attribs.data()}; + num_xfb_buffers_active = 0; for (size_t feedback = 0; feedback < Maxwell::NumTransformFeedbackBuffers; ++feedback) { const auto& layout = key.xfb_state.layouts[feedback]; UNIMPLEMENTED_IF_MSG(layout.stride != layout.varying_count * 4, "Stride padding"); if (layout.varying_count == 0) { continue; } + num_xfb_buffers_active++; const auto& locations = key.xfb_state.varyings[feedback]; std::optional<u32> current_index; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 74fc9cc3d..2f70c1ae9 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -154,6 +154,7 @@ private: static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; + u32 num_xfb_buffers_active{}; std::array<GLint, 128 * XFB_ENTRY_STRIDE * Maxwell::NumTransformFeedbackBuffers> xfb_attribs{}; std::mutex built_mutex; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 27e2de1bf..9995b6dd4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -555,7 +555,7 @@ void RasterizerOpenGL::OnCacheInvalidation(VAddr addr, u64 size) { } { std::scoped_lock lock{buffer_cache.mutex}; - buffer_cache.CachedWriteMemory(addr, size); + buffer_cache.WriteMemory(addr, size); } shader_cache.InvalidateRegion(addr, size); } diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp index 7e7a80740..c4c30d807 100644 --- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp +++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp @@ -132,16 +132,12 @@ void RendererVulkan::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { const bool use_accelerated = rasterizer.AccelerateDisplay(*framebuffer, framebuffer_addr, framebuffer->stride); const bool is_srgb = use_accelerated && screen_info.is_srgb; + RenderScreenshot(*framebuffer, use_accelerated); - { - std::scoped_lock lock{rasterizer.LockCaches()}; - RenderScreenshot(*framebuffer, use_accelerated); - - Frame* frame = present_manager.GetRenderFrame(); - blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); - scheduler.Flush(*frame->render_ready); - present_manager.Present(frame); - } + Frame* frame = present_manager.GetRenderFrame(); + blit_screen.DrawToSwapchain(frame, *framebuffer, use_accelerated, is_srgb); + scheduler.Flush(*frame->render_ready); + present_manager.Present(frame); gpu.RendererFrameEndNotify(); rasterizer.TickFrame(); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 52fc142d1..66483a900 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -137,6 +137,56 @@ BlitScreen::BlitScreen(Core::Memory::Memory& cpu_memory_, Core::Frontend::EmuWin BlitScreen::~BlitScreen() = default; +static Common::Rectangle<f32> NormalizeCrop(const Tegra::FramebufferConfig& framebuffer, + const ScreenInfo& screen_info) { + f32 left, top, right, bottom; + + if (!framebuffer.crop_rect.IsEmpty()) { + // If crop rectangle is not empty, apply properties from rectangle. + left = static_cast<f32>(framebuffer.crop_rect.left); + top = static_cast<f32>(framebuffer.crop_rect.top); + right = static_cast<f32>(framebuffer.crop_rect.right); + bottom = static_cast<f32>(framebuffer.crop_rect.bottom); + } else { + // Otherwise, fall back to framebuffer dimensions. + left = 0; + top = 0; + right = static_cast<f32>(framebuffer.width); + bottom = static_cast<f32>(framebuffer.height); + } + + // Apply transformation flags. + auto framebuffer_transform_flags = framebuffer.transform_flags; + + if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipH)) { + // Switch left and right. + std::swap(left, right); + } + if (True(framebuffer_transform_flags & Service::android::BufferTransformFlags::FlipV)) { + // Switch top and bottom. + std::swap(top, bottom); + } + + framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipH; + framebuffer_transform_flags &= ~Service::android::BufferTransformFlags::FlipV; + if (True(framebuffer_transform_flags)) { + UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}", + static_cast<u32>(framebuffer_transform_flags)); + } + + // Get the screen properties. + const f32 screen_width = static_cast<f32>(screen_info.width); + const f32 screen_height = static_cast<f32>(screen_info.height); + + // Normalize coordinate space. + left /= screen_width; + top /= screen_height; + right /= screen_width; + bottom /= screen_height; + + return Common::Rectangle<f32>(left, top, right, bottom); +} + void BlitScreen::Recreate() { present_manager.WaitPresent(); scheduler.Finish(); @@ -354,17 +404,10 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer, source_image_view = smaa->Draw(scheduler, image_index, source_image, source_image_view); } if (fsr) { - auto crop_rect = framebuffer.crop_rect; - if (crop_rect.GetWidth() == 0) { - crop_rect.right = framebuffer.width; - } - if (crop_rect.GetHeight() == 0) { - crop_rect.bottom = framebuffer.height; - } - crop_rect = crop_rect.Scale(Settings::values.resolution_info.up_factor); - VkExtent2D fsr_input_size{ - .width = Settings::values.resolution_info.ScaleUp(framebuffer.width), - .height = Settings::values.resolution_info.ScaleUp(framebuffer.height), + const auto crop_rect = NormalizeCrop(framebuffer, screen_info); + const VkExtent2D fsr_input_size{ + .width = Settings::values.resolution_info.ScaleUp(screen_info.width), + .height = Settings::values.resolution_info.ScaleUp(screen_info.height), }; VkImageView fsr_image_view = fsr->Draw(scheduler, image_index, source_image_view, fsr_input_size, crop_rect); @@ -1397,61 +1440,37 @@ void BlitScreen::SetUniformData(BufferData& data, const Layout::FramebufferLayou void BlitScreen::SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer, const Layout::FramebufferLayout layout) const { - const auto& framebuffer_transform_flags = framebuffer.transform_flags; - const auto& framebuffer_crop_rect = framebuffer.crop_rect; - - static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f}; - auto left = texcoords.left; - auto right = texcoords.right; - - switch (framebuffer_transform_flags) { - case Service::android::BufferTransformFlags::Unset: - break; - case Service::android::BufferTransformFlags::FlipV: - // Flip the framebuffer vertically - left = texcoords.right; - right = texcoords.left; - break; - default: - UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}", - static_cast<u32>(framebuffer_transform_flags)); - break; - } + f32 left, top, right, bottom; - UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0); - - f32 left_start{}; - if (framebuffer_crop_rect.Top() > 0) { - left_start = static_cast<f32>(framebuffer_crop_rect.Top()) / - static_cast<f32>(framebuffer_crop_rect.Bottom()); - } - f32 scale_u = static_cast<f32>(framebuffer.width) / static_cast<f32>(screen_info.width); - f32 scale_v = static_cast<f32>(framebuffer.height) / static_cast<f32>(screen_info.height); - // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering - // (e.g. handheld mode) on a 1920x1080 framebuffer. - if (!fsr) { - if (framebuffer_crop_rect.GetWidth() > 0) { - scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / - static_cast<f32>(screen_info.width); - } - if (framebuffer_crop_rect.GetHeight() > 0) { - scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / - static_cast<f32>(screen_info.height); - } + if (fsr) { + // FSR has already applied the crop, so we just want to render the image + // it has produced. + left = 0; + top = 0; + right = 1; + bottom = 1; + } else { + // Get the normalized crop rectangle. + const auto crop = NormalizeCrop(framebuffer, screen_info); + + // Apply the crop. + left = crop.left; + top = crop.top; + right = crop.right; + bottom = crop.bottom; } + // Map the coordinates to the screen. const auto& screen = layout.screen; const auto x = static_cast<f32>(screen.left); const auto y = static_cast<f32>(screen.top); const auto w = static_cast<f32>(screen.GetWidth()); const auto h = static_cast<f32>(screen.GetHeight()); - data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left_start + left * scale_v); - data.vertices[1] = - ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left_start + left * scale_v); - data.vertices[2] = - ScreenRectVertex(x, y + h, texcoords.top * scale_u, left_start + right * scale_v); - data.vertices[3] = - ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, left_start + right * scale_v); + + data.vertices[0] = ScreenRectVertex(x, y, left, top); + data.vertices[1] = ScreenRectVertex(x + w, y, right, top); + data.vertices[2] = ScreenRectVertex(x, y + h, left, bottom); + data.vertices[3] = ScreenRectVertex(x + w, y + h, right, bottom); } void BlitScreen::CreateSMAA(VkExtent2D smaa_size) { diff --git a/src/video_core/renderer_vulkan/vk_fsr.cpp b/src/video_core/renderer_vulkan/vk_fsr.cpp index ce8f3f3c2..f7a05fbc0 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.cpp +++ b/src/video_core/renderer_vulkan/vk_fsr.cpp @@ -34,7 +34,7 @@ FSR::FSR(const Device& device_, MemoryAllocator& memory_allocator_, size_t image } VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, - VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect) { + VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect) { UpdateDescriptorSet(image_index, image_view); @@ -61,15 +61,21 @@ VkImageView FSR::Draw(Scheduler& scheduler, size_t image_index, VkImageView imag cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, *easu_pipeline); + const f32 input_image_width = static_cast<f32>(input_image_extent.width); + const f32 input_image_height = static_cast<f32>(input_image_extent.height); + const f32 output_image_width = static_cast<f32>(output_size.width); + const f32 output_image_height = static_cast<f32>(output_size.height); + const f32 viewport_width = (crop_rect.right - crop_rect.left) * input_image_width; + const f32 viewport_x = crop_rect.left * input_image_width; + const f32 viewport_height = (crop_rect.bottom - crop_rect.top) * input_image_height; + const f32 viewport_y = crop_rect.top * input_image_height; + std::array<u32, 4 * 4> push_constants; - FsrEasuConOffset( - push_constants.data() + 0, push_constants.data() + 4, push_constants.data() + 8, - push_constants.data() + 12, - - static_cast<f32>(crop_rect.GetWidth()), static_cast<f32>(crop_rect.GetHeight()), - static_cast<f32>(input_image_extent.width), static_cast<f32>(input_image_extent.height), - static_cast<f32>(output_size.width), static_cast<f32>(output_size.height), - static_cast<f32>(crop_rect.left), static_cast<f32>(crop_rect.top)); + FsrEasuConOffset(push_constants.data() + 0, push_constants.data() + 4, + push_constants.data() + 8, push_constants.data() + 12, + + viewport_width, viewport_height, input_image_width, input_image_height, + output_image_width, output_image_height, viewport_x, viewport_y); cmdbuf.PushConstants(*pipeline_layout, VK_SHADER_STAGE_COMPUTE_BIT, push_constants); { diff --git a/src/video_core/renderer_vulkan/vk_fsr.h b/src/video_core/renderer_vulkan/vk_fsr.h index 8bb9fc23a..3505c1416 100644 --- a/src/video_core/renderer_vulkan/vk_fsr.h +++ b/src/video_core/renderer_vulkan/vk_fsr.h @@ -17,7 +17,7 @@ public: explicit FSR(const Device& device, MemoryAllocator& memory_allocator, size_t image_count, VkExtent2D output_size); VkImageView Draw(Scheduler& scheduler, size_t image_index, VkImageView image_view, - VkExtent2D input_image_extent, const Common::Rectangle<int>& crop_rect); + VkExtent2D input_image_extent, const Common::Rectangle<f32>& crop_rect); private: void CreateDescriptorPool(); diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 22bf8cc77..89b455bff 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -263,6 +263,22 @@ Shader::RuntimeInfo MakeRuntimeInfo(std::span<const Shader::IR::Program> program info.y_negate = key.state.y_negate != 0; return info; } + +size_t GetTotalPipelineWorkers() { + const size_t max_core_threads = + std::max<size_t>(static_cast<size_t>(std::thread::hardware_concurrency()), 2ULL) - 1ULL; +#ifdef ANDROID + // Leave at least a few cores free in android + constexpr size_t free_cores = 3ULL; + if (max_core_threads <= free_cores) { + return 1ULL; + } + return max_core_threads - free_cores; +#else + return max_core_threads; +#endif +} + } // Anonymous namespace size_t ComputePipelineCacheKey::Hash() const noexcept { @@ -294,11 +310,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device texture_cache{texture_cache_}, shader_notify{shader_notify_}, use_asynchronous_shaders{Settings::values.use_asynchronous_shaders.GetValue()}, use_vulkan_pipeline_cache{Settings::values.use_vulkan_driver_pipeline_cache.GetValue()}, -#ifdef ANDROID - workers(1, "VkPipelineBuilder"), -#else - workers(std::max(std::thread::hardware_concurrency(), 2U) - 1, "VkPipelineBuilder"), -#endif + workers(device.HasBrokenParallelShaderCompiling() ? 1ULL : GetTotalPipelineWorkers(), + "VkPipelineBuilder"), serialization_thread(1, "VkPipelineSerialization") { const auto& float_control{device.FloatControlProperties()}; const VkDriverId driver_id{device.GetDriverID()}; @@ -338,6 +351,7 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, const Device& device .support_geometry_shader_passthrough = device.IsNvGeometryShaderPassthroughSupported(), .support_native_ndc = device.IsExtDepthClipControlSupported(), .support_scaled_attributes = !device.MustEmulateScaledFormats(), + .support_multi_viewport = device.SupportsMultiViewport(), .warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(), diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 66c03bf17..078777cdd 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -211,6 +211,13 @@ public: return; } PauseCounter(); + const auto driver_id = device.GetDriverID(); + if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { + pending_sync.clear(); + sync_values_stash.clear(); + return; + } sync_values_stash.clear(); sync_values_stash.emplace_back(); std::vector<HostSyncValues>* sync_values = &sync_values_stash.back(); @@ -1378,6 +1385,12 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku return true; } + auto driver_id = impl->device.GetDriverID(); + if (driver_id == VK_DRIVER_ID_QUALCOMM_PROPRIETARY || + driver_id == VK_DRIVER_ID_ARM_PROPRIETARY || driver_id == VK_DRIVER_ID_MESA_TURNIP) { + return true; + } + for (size_t i = 0; i < 2; i++) { is_null[i] = !is_in_ac[i] && check_value(objects[i]->address); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 3983b2eb7..e0ab1eaac 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -82,7 +82,7 @@ VkViewport GetViewportState(const Device& device, const Maxwell& regs, size_t in } if (y_negate) { - y += height; + y += conv(static_cast<f32>(regs.surface_clip.height)); height = -height; } @@ -199,7 +199,7 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { if (!pipeline) { return; } - std::scoped_lock lock{LockCaches()}; + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; // update engine as channel may be different. pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); @@ -621,7 +621,7 @@ void RasterizerVulkan::OnCacheInvalidation(VAddr addr, u64 size) { } { std::scoped_lock lock{buffer_cache.mutex}; - buffer_cache.CachedWriteMemory(addr, size); + buffer_cache.WriteMemory(addr, size); } pipeline_cache.InvalidateRegion(addr, size); } @@ -710,7 +710,6 @@ void RasterizerVulkan::TiledCacheBarrier() { } void RasterizerVulkan::FlushCommands() { - std::scoped_lock lock{LockCaches()}; if (draw_counter == 0) { return; } @@ -808,7 +807,6 @@ void RasterizerVulkan::FlushWork() { if ((++draw_counter & 7) != 7) { return; } - std::scoped_lock lock{LockCaches()}; if (draw_counter < DRAWS_TO_DISPATCH) { // Send recorded tasks to the worker thread scheduler.DispatchWork(); @@ -1507,7 +1505,7 @@ void RasterizerVulkan::UpdateVertexInput(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::InitializeChannel(Tegra::Control::ChannelState& channel) { CreateChannel(channel); { - std::scoped_lock lock{LockCaches()}; + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.CreateChannel(channel); buffer_cache.CreateChannel(channel); } @@ -1520,7 +1518,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { const s32 channel_id = channel.bind_id; BindToChannel(channel_id); { - std::scoped_lock lock{LockCaches()}; + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.BindToChannel(channel_id); buffer_cache.BindToChannel(channel_id); } @@ -1533,7 +1531,7 @@ void RasterizerVulkan::BindChannel(Tegra::Control::ChannelState& channel) { void RasterizerVulkan::ReleaseChannel(s32 channel_id) { EraseChannel(channel_id); { - std::scoped_lock lock{LockCaches()}; + std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; texture_cache.EraseChannel(channel_id); buffer_cache.EraseChannel(channel_id); } diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index ce3dfbaab..ad069556c 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -133,10 +133,6 @@ public: void ReleaseChannel(s32 channel_id) override; - std::scoped_lock<std::recursive_mutex, std::recursive_mutex> LockCaches() { - return std::scoped_lock{buffer_cache.mutex, texture_cache.mutex}; - } - private: static constexpr size_t MAX_TEXTURES = 192; static constexpr size_t MAX_IMAGES = 48; diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index e518756d2..6900b8ffa 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -635,6 +635,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR has_broken_cube_compatibility = true; } } + if (is_qualcomm) { + const u32 version = (properties.properties.driverVersion << 3) >> 3; + if (version < VK_MAKE_API_VERSION(0, 255, 615, 512)) { + has_broken_parallel_compiling = true; + } + } if (extensions.sampler_filter_minmax && is_amd) { // Disable ext_sampler_filter_minmax on AMD GCN4 and lower as it is broken. if (!features.shader_float16_int8.shaderFloat16) { diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index b213ed7dd..4f3846345 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -102,6 +102,7 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_2_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_EXTENDED_DYNAMIC_STATE_3_EXTENSION_NAME) \ + EXTENSION_NAME(VK_EXT_EXTERNAL_MEMORY_HOST_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_4444_FORMATS_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_LINE_RASTERIZATION_EXTENSION_NAME) \ EXTENSION_NAME(VK_EXT_ROBUSTNESS_2_EXTENSION_NAME) \ @@ -599,6 +600,11 @@ public: return has_broken_cube_compatibility; } + /// Returns true if parallel shader compiling has issues with the current driver. + bool HasBrokenParallelShaderCompiling() const { + return has_broken_parallel_compiling; + } + /// Returns the vendor name reported from Vulkan. std::string_view GetVendorName() const { return properties.driver.driverName; @@ -663,6 +669,10 @@ public: return supports_conditional_barriers; } + bool SupportsMultiViewport() const { + return features2.features.multiViewport; + } + [[nodiscard]] static constexpr bool CheckBrokenCompute(VkDriverId driver_id, u32 driver_version) { if (driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS) { @@ -794,6 +804,7 @@ private: bool is_non_gpu{}; ///< Is SoftwareRasterizer, FPGA, non-GPU device. bool has_broken_compute{}; ///< Compute shaders can cause crashes bool has_broken_cube_compatibility{}; ///< Has broken cube compatibility bit + bool has_broken_parallel_compiling{}; ///< Has broken parallel shader compiling. bool has_renderdoc{}; ///< Has RenderDoc attached bool has_nsight_graphics{}; ///< Has Nsight Graphics attached bool supports_d24_depth{}; ///< Supports D24 depth buffers. |
