From 2fc698b040e7e25223ba6ebe31abb04b1fc65f06 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Thu, 1 Apr 2021 01:36:22 -0300
Subject: vulkan: Build pipelines in parallel at runtime

Wait from the worker thread for a pipeline to build before binding it to
the command buffer. This allows queueing pipelines to multiple threads.
---
 src/video_core/renderer_vulkan/vk_scheduler.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'src/video_core/renderer_vulkan/vk_scheduler.cpp')
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index f35c120b0..25a4933e5 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -124,18 +124,16 @@ void VKScheduler::RequestOutsideRenderPassOperationContext() {
     EndRenderPass();
 }
 
-void VKScheduler::BindGraphicsPipeline(VkPipeline pipeline) {
+bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
     if (state.graphics_pipeline == pipeline) {
-        return;
+        return false;
     }
     state.graphics_pipeline = pipeline;
-    Record([pipeline](vk::CommandBuffer cmdbuf) {
-        cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
-    });
+    return true;
 }
 
 void VKScheduler::WorkerThread() {
-    Common::SetCurrentThreadPriority(Common::ThreadPriority::High);
+    Common::SetCurrentThreadName("yuzu:VulkanWorker");
     std::unique_lock lock{mutex};
     do {
         cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
-- 
cgit v1.2.3


From 53acdda772a8b7650c46ba9d998119b8c8e30844 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Mon, 26 Apr 2021 22:11:31 -0300
Subject: vk_scheduler: Allow command submission on worker thread

This changes how Scheduler::Flush works. It queues the current command
buffer to be sent to the GPU but does not do it immediately. The Vulkan
worker thread takes care of that. Users will have to use
Scheduler::Flush + Scheduler::WaitWorker to get the previous behavior.

Scheduler::Finish is unchanged.

To avoid waiting on work never queued, Scheduler::Wait sends the current
command buffer if that's what the caller wants to wait.
---
 src/video_core/renderer_vulkan/vk_scheduler.cpp | 120 +++++++++++++-----------
 1 file changed, 64 insertions(+), 56 deletions(-)

(limited to 'src/video_core/renderer_vulkan/vk_scheduler.cpp')

diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 25a4933e5..81cb330d9 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -31,7 +31,7 @@ void VKScheduler::CommandChunk::ExecuteAll(vk::CommandBuffer cmdbuf) {
         command->~Command();
         command = next;
     }
-
+    submit = false;
     command_offset = 0;
     first = nullptr;
     last = nullptr;
@@ -42,7 +42,7 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
       master_semaphore{std::make_unique<MasterSemaphore>(device)},
       command_pool{std::make_unique<CommandPool>(*master_semaphore, device)} {
     AcquireNewChunk();
-    AllocateNewContext();
+    AllocateWorkerCommandBuffer();
     worker_thread = std::thread(&VKScheduler::WorkerThread, this);
 }
 
@@ -60,6 +60,7 @@ void VKScheduler::Flush(VkSemaphore semaphore) {
 void VKScheduler::Finish(VkSemaphore semaphore) {
     const u64 presubmit_tick = CurrentTick();
     SubmitExecution(semaphore);
+    WaitWorker();
     Wait(presubmit_tick);
     AllocateNewContext();
 }
@@ -140,75 +141,82 @@ void VKScheduler::WorkerThread() {
         if (quit) {
             continue;
         }
-        auto extracted_chunk = std::move(chunk_queue.Front());
-        chunk_queue.Pop();
-        extracted_chunk->ExecuteAll(current_cmdbuf);
-        chunk_reserve.Push(std::move(extracted_chunk));
+        while (!chunk_queue.Empty()) {
+            auto extracted_chunk = std::move(chunk_queue.Front());
+            chunk_queue.Pop();
+            const bool has_submit = extracted_chunk->HasSubmit();
+            extracted_chunk->ExecuteAll(current_cmdbuf);
+            if (has_submit) {
+                AllocateWorkerCommandBuffer();
+            }
+            chunk_reserve.Push(std::move(extracted_chunk));
+        }
     } while (!quit);
 }
 
+void VKScheduler::AllocateWorkerCommandBuffer() {
+    current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
+    current_cmdbuf.Begin({
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .pNext = nullptr,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+        .pInheritanceInfo = nullptr,
+    });
+}
+
 void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
     EndPendingOperations();
     InvalidateState();
-    WaitWorker();
-
-    std::unique_lock lock{mutex};
-
-    current_cmdbuf.End();
-
-    const VkSemaphore timeline_semaphore = master_semaphore->Handle();
-    const u32 num_signal_semaphores = semaphore ? 2U : 1U;
 
     const u64 signal_value = master_semaphore->CurrentTick();
-    const u64 wait_value = signal_value - 1;
-    const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
-
     master_semaphore->NextTick();
 
-    const std::array signal_values{signal_value, u64(0)};
-    const std::array signal_semaphores{timeline_semaphore, semaphore};
+    Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
+        cmdbuf.End();
 
-    const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
-        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
-        .pNext = nullptr,
-        .waitSemaphoreValueCount = 1,
-        .pWaitSemaphoreValues = &wait_value,
-        .signalSemaphoreValueCount = num_signal_semaphores,
-        .pSignalSemaphoreValues = signal_values.data(),
-    };
-    const VkSubmitInfo submit_info{
-        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
-        .pNext = &timeline_si,
-        .waitSemaphoreCount = 1,
-        .pWaitSemaphores = &timeline_semaphore,
-        .pWaitDstStageMask = &wait_stage_mask,
-        .commandBufferCount = 1,
-        .pCommandBuffers = current_cmdbuf.address(),
-        .signalSemaphoreCount = num_signal_semaphores,
-        .pSignalSemaphores = signal_semaphores.data(),
-    };
-    switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
-    case VK_SUCCESS:
-        break;
-    case VK_ERROR_DEVICE_LOST:
-        device.ReportLoss();
-        [[fallthrough]];
-    default:
-        vk::Check(result);
-    }
-}
+        const u32 num_signal_semaphores = semaphore ? 2U : 1U;
 
-void VKScheduler::AllocateNewContext() {
-    std::unique_lock lock{mutex};
+        const u64 wait_value = signal_value - 1;
+        const VkPipelineStageFlags wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
 
-    current_cmdbuf = vk::CommandBuffer(command_pool->Commit(), device.GetDispatchLoader());
-    current_cmdbuf.Begin({
-        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
-        .pNext = nullptr,
-        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
-        .pInheritanceInfo = nullptr,
+        const VkSemaphore timeline_semaphore = master_semaphore->Handle();
+        const std::array signal_values{signal_value, u64(0)};
+        const std::array signal_semaphores{timeline_semaphore, semaphore};
+
+        const VkTimelineSemaphoreSubmitInfoKHR timeline_si{
+            .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR,
+            .pNext = nullptr,
+            .waitSemaphoreValueCount = 1,
+            .pWaitSemaphoreValues = &wait_value,
+            .signalSemaphoreValueCount = num_signal_semaphores,
+            .pSignalSemaphoreValues = signal_values.data(),
+        };
+        const VkSubmitInfo submit_info{
+            .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+            .pNext = &timeline_si,
+            .waitSemaphoreCount = 1,
+            .pWaitSemaphores = &timeline_semaphore,
+            .pWaitDstStageMask = &wait_stage_mask,
+            .commandBufferCount = 1,
+            .pCommandBuffers = cmdbuf.address(),
+            .signalSemaphoreCount = num_signal_semaphores,
+            .pSignalSemaphores = signal_semaphores.data(),
+        };
+        switch (const VkResult result = device.GetGraphicsQueue().Submit(submit_info)) {
+        case VK_SUCCESS:
+            break;
+        case VK_ERROR_DEVICE_LOST:
+            device.ReportLoss();
+            [[fallthrough]];
+        default:
+            vk::Check(result);
+        }
     });
+    chunk->MarkSubmit();
+    DispatchWork();
+}
 
+void VKScheduler::AllocateNewContext() {
     // Enable counters once again. These are disabled when a command buffer is finished.
     if (query_cache) {
         query_cache->UpdateCounters();
-- 
cgit v1.2.3


From a51503660435f1279ce0fa449f9cf76e74b45d74 Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 7 May 2021 00:29:08 -0300
Subject: vk_master_semaphore: Use fetch_add to increase master semaphore tick

---
 src/video_core/renderer_vulkan/vk_scheduler.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'src/video_core/renderer_vulkan/vk_scheduler.cpp')

diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index 81cb330d9..fcb6a5911 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -168,9 +168,7 @@ void VKScheduler::SubmitExecution(VkSemaphore semaphore) {
     EndPendingOperations();
     InvalidateState();
 
-    const u64 signal_value = master_semaphore->CurrentTick();
-    master_semaphore->NextTick();
-
+    const u64 signal_value = master_semaphore->NextTick();
     Record([semaphore, signal_value, this](vk::CommandBuffer cmdbuf) {
         cmdbuf.End();
 
-- 
cgit v1.2.3


From 36f158626726f940d9dba22a2b03ebbb5aa41c5e Mon Sep 17 00:00:00 2001
From: ReinUsesLisp <reinuseslisp@airmail.cc>
Date: Fri, 7 May 2021 06:26:12 -0300
Subject: vk_scheduler: Use locks instead of SPSC a queue

This tries to fix a data race where we'd wait forever for the GPU.
---
 src/video_core/renderer_vulkan/vk_scheduler.cpp | 58 ++++++++++++++-----------
 1 file changed, 33 insertions(+), 25 deletions(-)

(limited to 'src/video_core/renderer_vulkan/vk_scheduler.cpp')

diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index fcb6a5911..4840962de 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -47,8 +47,11 @@ VKScheduler::VKScheduler(const Device& device_, StateTracker& state_tracker_)
 }
 
 VKScheduler::~VKScheduler() {
-    quit = true;
-    cv.notify_all();
+    {
+        std::lock_guard lock{work_mutex};
+        quit = true;
+    }
+    work_cv.notify_all();
     worker_thread.join();
 }
 
@@ -69,20 +72,19 @@ void VKScheduler::WaitWorker() {
     MICROPROFILE_SCOPE(Vulkan_WaitForWorker);
     DispatchWork();
 
-    bool finished = false;
-    do {
-        cv.notify_all();
-        std::unique_lock lock{mutex};
-        finished = chunk_queue.Empty();
-    } while (!finished);
+    std::unique_lock lock{work_mutex};
+    wait_cv.wait(lock, [this] { return work_queue.empty(); });
 }
 
 void VKScheduler::DispatchWork() {
     if (chunk->Empty()) {
         return;
     }
-    chunk_queue.Push(std::move(chunk));
-    cv.notify_all();
+    {
+        std::lock_guard lock{work_mutex};
+        work_queue.push(std::move(chunk));
+    }
+    work_cv.notify_one();
     AcquireNewChunk();
 }
 
@@ -135,22 +137,27 @@ bool VKScheduler::UpdateGraphicsPipeline(GraphicsPipeline* pipeline) {
 
 void VKScheduler::WorkerThread() {
     Common::SetCurrentThreadName("yuzu:VulkanWorker");
-    std::unique_lock lock{mutex};
     do {
-        cv.wait(lock, [this] { return !chunk_queue.Empty() || quit; });
-        if (quit) {
-            continue;
+        if (work_queue.empty()) {
+            wait_cv.notify_all();
         }
-        while (!chunk_queue.Empty()) {
-            auto extracted_chunk = std::move(chunk_queue.Front());
-            chunk_queue.Pop();
-            const bool has_submit = extracted_chunk->HasSubmit();
-            extracted_chunk->ExecuteAll(current_cmdbuf);
-            if (has_submit) {
-                AllocateWorkerCommandBuffer();
+        std::unique_ptr<CommandChunk> work;
+        {
+            std::unique_lock lock{work_mutex};
+            work_cv.wait(lock, [this] { return !work_queue.empty() || quit; });
+            if (quit) {
+                continue;
             }
-            chunk_reserve.Push(std::move(extracted_chunk));
+            work = std::move(work_queue.front());
+            work_queue.pop();
+        }
+        const bool has_submit = work->HasSubmit();
+        work->ExecuteAll(current_cmdbuf);
+        if (has_submit) {
+            AllocateWorkerCommandBuffer();
         }
+        std::lock_guard reserve_lock{reserve_mutex};
+        chunk_reserve.push_back(std::move(work));
     } while (!quit);
 }
 
@@ -269,12 +276,13 @@ void VKScheduler::EndRenderPass() {
 }
 
 void VKScheduler::AcquireNewChunk() {
-    if (chunk_reserve.Empty()) {
+    std::lock_guard lock{reserve_mutex};
+    if (chunk_reserve.empty()) {
         chunk = std::make_unique<CommandChunk>();
         return;
     }
-    chunk = std::move(chunk_reserve.Front());
-    chunk_reserve.Pop();
+    chunk = std::move(chunk_reserve.back());
+    chunk_reserve.pop_back();
 }
 
 } // namespace Vulkan
-- 
cgit v1.2.3