From 6ac97405df021d5d2bd9a529253bd5c5a418c1a9 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Tue, 28 Jul 2020 00:08:02 -0400
Subject: Vk Async pipeline compilation

---
 src/video_core/renderer_vulkan/vk_rasterizer.cpp | 27 ++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

(limited to 'src/video_core/renderer_vulkan/vk_rasterizer.cpp')

diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 7500e8244..6310e898c 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -400,8 +400,25 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
       sampler_cache(device),
       fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
-      query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} {
+      query_cache(system, *this, device, scheduler),
+      wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
     scheduler.SetQueryCache(query_cache);
+    if (device.UseAsynchronousShaders()) {
+        // Max worker threads we should allow
+        constexpr auto MAX_THREADS = 2u;
+        // Amount of threads we should reserve for other parts of yuzu
+        constexpr auto RESERVED_THREADS = 6u;
+        // Get the amount of threads we can use(this can return zero)
+        const auto cpu_thread_count =
+            std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
+        // Deduce how many "extra" threads we have to use.
+        const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS;
+        // Always allow at least 1 thread regardless of our settings
+        const auto max_worker_count = std::max(1u, max_threads_unused);
+        // Don't use more than MAX_THREADS
+        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
+        async_shaders.AllocateWorkers(worker_count);
+    }
 }
 
 RasterizerVulkan::~RasterizerVulkan() = default;
@@ -439,7 +456,13 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
+    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) {
+        // Async graphics pipeline was not ready.
+        system.GPU().TickWork();
+        return;
+    }
+
     scheduler.BindGraphicsPipeline(pipeline.GetHandle());
 
     const auto renderpass = pipeline.GetRenderPass();
-- 
cgit v1.2.3


From 4539073ce1d8fd6df03263e826d3805b4909e055 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Thu, 30 Jul 2020 15:41:11 -0400
Subject: Address feedback. Bruteforce delete duplicates

---
 src/video_core/renderer_vulkan/vk_rasterizer.cpp | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

(limited to 'src/video_core/renderer_vulkan/vk_rasterizer.cpp')

diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 6310e898c..fc1b51a96 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -404,10 +404,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
     scheduler.SetQueryCache(query_cache);
     if (device.UseAsynchronousShaders()) {
+        // The following is subject to move into the allocate workers method, to be api agnostic
+
         // Max worker threads we should allow
-        constexpr auto MAX_THREADS = 2u;
+        constexpr u32 MAX_THREADS = 4;
         // Amount of threads we should reserve for other parts of yuzu
-        constexpr auto RESERVED_THREADS = 6u;
+        constexpr u32 RESERVED_THREADS = 6;
         // Get the amount of threads we can use(this can return zero)
         const auto cpu_thread_count =
             std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
@@ -456,16 +458,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
-    if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) {
+    auto pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
         // Async graphics pipeline was not ready.
         system.GPU().TickWork();
         return;
     }
 
-    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+    scheduler.BindGraphicsPipeline(pipeline->GetHandle());
 
-    const auto renderpass = pipeline.GetRenderPass();
+    const auto renderpass = pipeline->GetRenderPass();
     const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
     scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
 
@@ -475,8 +477,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     BeginTransformFeedback();
 
-    const auto pipeline_layout = pipeline.GetLayout();
-    const auto descriptor_set = pipeline.CommitDescriptorSet();
+    const auto pipeline_layout = pipeline->GetLayout();
+    const auto descriptor_set = pipeline->CommitDescriptorSet();
     scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
         if (descriptor_set) {
             cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
-- 
cgit v1.2.3


From 31a76410e8fa09462d960c10148c075125dc385a Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Sun, 2 Aug 2020 13:05:41 -0400
Subject: Address feedback, add shader compile notifier, update setting text

---
 src/video_core/renderer_vulkan/vk_rasterizer.cpp | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

(limited to 'src/video_core/renderer_vulkan/vk_rasterizer.cpp')

diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index fc1b51a96..720802ad5 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -14,6 +14,7 @@
 #include "common/assert.h"
 #include "common/logging/log.h"
 #include "common/microprofile.h"
+#include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/settings.h"
 #include "video_core/engines/kepler_compute.h"
@@ -408,15 +409,10 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
 
         // Max worker threads we should allow
         constexpr u32 MAX_THREADS = 4;
-        // Amount of threads we should reserve for other parts of yuzu
-        constexpr u32 RESERVED_THREADS = 6;
-        // Get the amount of threads we can use(this can return zero)
-        const auto cpu_thread_count =
-            std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
-        // Deduce how many "extra" threads we have to use.
-        const auto max_threads_unused = cpu_thread_count - RESERVED_THREADS;
+        // Deduce how many threads we can use
+        const auto threads_used = std::thread::hardware_concurrency() / 4;
         // Always allow at least 1 thread regardless of our settings
-        const auto max_worker_count = std::max(1u, max_threads_unused);
+        const auto max_worker_count = std::max(1U, threads_used);
         // Don't use more than MAX_THREADS
         const auto worker_count = std::min(max_worker_count, MAX_THREADS);
         async_shaders.AllocateWorkers(worker_count);
@@ -432,6 +428,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
 
     query_cache.UpdateCounters();
 
+    SCOPE_EXIT({ system.GPU().TickWork(); });
+
     const auto& gpu = system.GPU().Maxwell3D();
     GraphicsPipelineCacheKey key;
     key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
@@ -458,10 +456,9 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     key.renderpass_params = GetRenderPassParams(texceptions);
     key.padding = 0;
 
-    auto pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
+    auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
     if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
         // Async graphics pipeline was not ready.
-        system.GPU().TickWork();
         return;
     }
 
@@ -488,8 +485,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
     });
 
     EndTransformFeedback();
-
-    system.GPU().TickWork();
 }
 
 void RasterizerVulkan::Clear() {
-- 
cgit v1.2.3


From 1b829fbd7a36f9c2b553b04aa39bdf8135d30458 Mon Sep 17 00:00:00 2001
From: ameerj <aj662@drexel.edu>
Date: Wed, 5 Aug 2020 12:53:26 -0400
Subject: move thread 1/4 count computation into allocate workers method

---
 src/video_core/renderer_vulkan/vk_rasterizer.cpp | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

(limited to 'src/video_core/renderer_vulkan/vk_rasterizer.cpp')

diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
index 720802ad5..936f76195 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -405,17 +405,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
       wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
     scheduler.SetQueryCache(query_cache);
     if (device.UseAsynchronousShaders()) {
-        // The following is subject to move into the allocate workers method, to be api agnostic
-
-        // Max worker threads we should allow
-        constexpr u32 MAX_THREADS = 4;
-        // Deduce how many threads we can use
-        const auto threads_used = std::thread::hardware_concurrency() / 4;
-        // Always allow at least 1 thread regardless of our settings
-        const auto max_worker_count = std::max(1U, threads_used);
-        // Don't use more than MAX_THREADS
-        const auto worker_count = std::min(max_worker_count, MAX_THREADS);
-        async_shaders.AllocateWorkers(worker_count);
+        async_shaders.AllocateWorkers();
     }
 }
 
-- 
cgit v1.2.3