From 4483089d704cd4913a748d2198359cc0cf7b32c5 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 7 Jan 2019 23:32:02 -0500 Subject: gpu: Refactor to take RendererBase instead of RasterizerInterface. --- src/video_core/gpu.h | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'src/video_core/gpu.h') diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 6313702f2..ac7aec6a4 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -16,8 +16,8 @@ class System; } namespace VideoCore { -class RasterizerInterface; -} +class RendererBase; +} // namespace VideoCore namespace Tegra { @@ -121,7 +121,8 @@ enum class EngineID { class GPU final { public: - explicit GPU(Core::System& system, VideoCore::RasterizerInterface& rasterizer); + explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); + ~GPU(); struct MethodCall { @@ -200,9 +201,24 @@ public: }; } regs{}; +private: + void ProcessBindMethod(const MethodCall& method_call); + void ProcessSemaphoreTriggerMethod(); + void ProcessSemaphoreRelease(); + void ProcessSemaphoreAcquire(); + + // Calls a GPU puller method. + void CallPullerMethod(const MethodCall& method_call); + // Calls a GPU engine method. + void CallEngineMethod(const MethodCall& method_call); + // Determines where the method should be executed. + bool ExecuteMethodOnEngine(const MethodCall& method_call); + private: std::unique_ptr dma_pusher; std::unique_ptr memory_manager; + + VideoCore::RendererBase& renderer; /// Mapping of command subchannels to their bound engine ids. std::array bound_engines = {}; @@ -217,18 +233,6 @@ private: std::unique_ptr maxwell_dma; /// Inline memory engine std::unique_ptr kepler_memory; - - void ProcessBindMethod(const MethodCall& method_call); - void ProcessSemaphoreTriggerMethod(); - void ProcessSemaphoreRelease(); - void ProcessSemaphoreAcquire(); - - // Calls a GPU puller method. - void CallPullerMethod(const MethodCall& method_call); - // Calls a GPU engine method. - void CallEngineMethod(const MethodCall& method_call); - // Determines where the method should be executed. - bool ExecuteMethodOnEngine(const MethodCall& method_call); }; #define ASSERT_REG_POSITION(field_name, position) \ -- cgit v1.2.3 From ac51d048a91593a3da124aeea32dc5b0898f1dd6 Mon Sep 17 00:00:00 2001 From: bunnei Date: Mon, 21 Jan 2019 15:18:09 -0500 Subject: gpu: Refactor command and swap buffers interface for asynch. --- src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp | 2 +- src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp | 14 ++------------ src/core/hle/service/nvflinger/nvflinger.cpp | 2 +- src/video_core/gpu.cpp | 10 ++++++++++ src/video_core/gpu.h | 15 ++++++++++++--- 5 files changed, 26 insertions(+), 17 deletions(-) (limited to 'src/video_core/gpu.h') diff --git a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp index dbe7ee6e8..20c7c39aa 100644 --- a/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp +++ b/src/core/hle/service/nvdrv/devices/nvdisp_disp0.cpp @@ -36,7 +36,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, u32 format, u32 width, u3 auto& instance = Core::System::GetInstance(); instance.GetPerfStats().EndGameFrame(); - instance.Renderer().SwapBuffers(framebuffer); + instance.GPU().SwapBuffers(framebuffer); } } // namespace Service::Nvidia::Devices diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp index 0a650f36c..8ce7bc7a5 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp @@ -136,16 +136,6 @@ u32 nvhost_gpu::AllocateObjectContext(const std::vector& input, std::vector< return 0; } -static void PushGPUEntries(Tegra::CommandList&& entries) { - if (entries.empty()) { - return; - } - - auto& dma_pusher{Core::System::GetInstance().GPU().DmaPusher()}; - dma_pusher.Push(std::move(entries)); - dma_pusher.DispatchCalls(); -} - u32 nvhost_gpu::SubmitGPFIFO(const std::vector& input, std::vector& output) { if (input.size() < sizeof(IoctlSubmitGpfifo)) { UNIMPLEMENTED(); @@ -163,7 +153,7 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector& input, std::vector& outp std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)], params.num_entries * sizeof(Tegra::CommandListHeader)); - PushGPUEntries(std::move(entries)); + Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); params.fence_out.id = 0; params.fence_out.value = 0; @@ -184,7 +174,7 @@ u32 nvhost_gpu::KickoffPB(const std::vector& input, std::vector& output) Memory::ReadBlock(params.address, entries.data(), params.num_entries * sizeof(Tegra::CommandListHeader)); - PushGPUEntries(std::move(entries)); + Core::System::GetInstance().GPU().PushGPUEntries(std::move(entries)); params.fence_out.id = 0; params.fence_out.value = 0; diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 56f31e2ac..fc496b654 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -186,7 +186,7 @@ void NVFlinger::Compose() { // There was no queued buffer to draw, render previous frame system_instance.GetPerfStats().EndGameFrame(); - system_instance.Renderer().SwapBuffers({}); + system_instance.GPU().SwapBuffers({}); continue; } diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 08abf8ac9..b0f3310e5 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -65,6 +65,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::PushGPUEntries(Tegra::CommandList&& entries) { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); +} + +void GPU::SwapBuffers( + std::optional> framebuffer) { + renderer.SwapBuffers(std::move(framebuffer)); +} + u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index ac7aec6a4..62649bd6e 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -200,6 +200,13 @@ public: std::array reg_array; }; } regs{}; + + /// Push GPU command entries to be processed + void PushGPUEntries(Tegra::CommandList&& entries); + + /// Swap buffers (render frame) + void SwapBuffers( + std::optional> framebuffer); private: void ProcessBindMethod(const MethodCall& method_call); @@ -207,11 +214,13 @@ private: void ProcessSemaphoreRelease(); void ProcessSemaphoreAcquire(); - // Calls a GPU puller method. + /// Calls a GPU puller method. void CallPullerMethod(const MethodCall& method_call); - // Calls a GPU engine method. + + /// Calls a GPU engine method. void CallEngineMethod(const MethodCall& method_call); - // Determines where the method should be executed. + + /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); private: -- cgit v1.2.3 From 7b574f406b25c02a0e0efd8b7ec13d68ecb55497 Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 23 Jan 2019 22:17:55 -0500 Subject: gpu: Move command processing to another thread. --- .../hle/service/nvdrv/devices/nvhost_as_gpu.cpp | 2 +- src/core/memory.cpp | 8 +- src/video_core/CMakeLists.txt | 2 + src/video_core/engines/kepler_memory.cpp | 2 +- src/video_core/engines/maxwell_dma.cpp | 4 +- src/video_core/gpu.cpp | 44 +++++- src/video_core/gpu.h | 22 ++- src/video_core/gpu_thread.cpp | 154 +++++++++++++++++++++ src/video_core/gpu_thread.h | 135 ++++++++++++++++++ 9 files changed, 358 insertions(+), 15 deletions(-) create mode 100644 src/video_core/gpu_thread.cpp create mode 100644 src/video_core/gpu_thread.h (limited to 'src/video_core/gpu.h') diff --git a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp index 466db7ccd..a34b9e753 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_as_gpu.cpp @@ -178,7 +178,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector& input, std::vector& ou auto& gpu = system_instance.GPU(); auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); ASSERT(cpu_addr); - system_instance.Renderer().Rasterizer().FlushAndInvalidateRegion(*cpu_addr, itr->second.size); + gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); diff --git a/src/core/memory.cpp b/src/core/memory.cpp index ec279cef8..6591c45d2 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -356,16 +356,16 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { const VAddr overlap_end = std::min(end, region_end); const VAddr overlap_size = overlap_end - overlap_start; - auto& rasterizer = system_instance.Renderer().Rasterizer(); + auto& gpu = system_instance.GPU(); switch (mode) { case FlushMode::Flush: - rasterizer.FlushRegion(overlap_start, overlap_size); + gpu.FlushRegion(overlap_start, overlap_size); break; case FlushMode::Invalidate: - rasterizer.InvalidateRegion(overlap_start, overlap_size); + gpu.InvalidateRegion(overlap_start, overlap_size); break; case FlushMode::FlushAndInvalidate: - rasterizer.FlushAndInvalidateRegion(overlap_start, overlap_size); + gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); break; } }; diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3e9d2b3be..3bb5d0ed7 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -17,6 +17,8 @@ add_library(video_core STATIC engines/shader_header.h gpu.cpp gpu.h + gpu_thread.cpp + gpu_thread.h macro_interpreter.cpp macro_interpreter.h memory_manager.cpp diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp index 4f6126116..aae2a4019 100644 --- a/src/video_core/engines/kepler_memory.cpp +++ b/src/video_core/engines/kepler_memory.cpp @@ -48,7 +48,7 @@ void KeplerMemory::ProcessData(u32 data) { // We have to invalidate the destination region to evict any outdated surfaces from the cache. // We do this before actually writing the new data because the destination address might contain // a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_address, sizeof(u32)); + Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); Memory::Write32(*dest_address, data); system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp index 0474c7ba3..9dfea5999 100644 --- a/src/video_core/engines/maxwell_dma.cpp +++ b/src/video_core/engines/maxwell_dma.cpp @@ -92,12 +92,12 @@ void MaxwellDMA::HandleCopy() { const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated // copying. - rasterizer.FlushRegion(*source_cpu, src_size); + Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); // We have to invalidate the destination region to evict any outdated surfaces from the // cache. We do this before actually writing the new data because the destination address // might contain a dirty surface that will have to be written back to memory. - rasterizer.InvalidateRegion(*dest_cpu, dst_size); + Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); }; if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index b0f3310e5..0d7a052dd 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,12 +6,14 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" +#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" +#include "video_core/gpu_thread.h" #include "video_core/renderer_base.h" namespace Tegra { @@ -37,6 +39,10 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren kepler_compute = std::make_unique(*memory_manager); maxwell_dma = std::make_unique(system, rasterizer, *memory_manager); kepler_memory = std::make_unique(system, rasterizer, *memory_manager); + + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread = std::make_unique(renderer, *dma_pusher); + } } GPU::~GPU() = default; @@ -66,13 +72,45 @@ const DmaPusher& GPU::DmaPusher() const { } void GPU::PushGPUEntries(Tegra::CommandList&& entries) { - dma_pusher->Push(std::move(entries)); - dma_pusher->DispatchCalls(); + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->SubmitList(std::move(entries)); + } else { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); + } } void GPU::SwapBuffers( std::optional> framebuffer) { - renderer.SwapBuffers(std::move(framebuffer)); + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->SwapBuffers(std::move(framebuffer)); + } else { + renderer.SwapBuffers(std::move(framebuffer)); + } +} + +void GPU::FlushRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->FlushRegion(addr, size); + } else { + renderer.Rasterizer().FlushRegion(addr, size); + } +} + +void GPU::InvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->InvalidateRegion(addr, size); + } else { + renderer.Rasterizer().InvalidateRegion(addr, size); + } +} + +void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_thread->FlushAndInvalidateRegion(addr, size); + } else { + renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); + } } u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 62649bd6e..3f3098bf1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -19,6 +19,10 @@ namespace VideoCore { class RendererBase; } // namespace VideoCore +namespace VideoCommon::GPUThread { +class ThreadManager; +} // namespace VideoCommon::GPUThread + namespace Tegra { enum class RenderTargetFormat : u32 { @@ -200,7 +204,7 @@ public: std::array reg_array; }; } regs{}; - + /// Push GPU command entries to be processed void PushGPUEntries(Tegra::CommandList&& entries); @@ -208,6 +212,15 @@ public: void SwapBuffers( std::optional> framebuffer); + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + void FlushRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + void FlushAndInvalidateRegion(VAddr addr, u64 size); + private: void ProcessBindMethod(const MethodCall& method_call); void ProcessSemaphoreTriggerMethod(); @@ -216,17 +229,18 @@ private: /// Calls a GPU puller method. void CallPullerMethod(const MethodCall& method_call); - + /// Calls a GPU engine method. void CallEngineMethod(const MethodCall& method_call); - + /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); private: std::unique_ptr dma_pusher; std::unique_ptr memory_manager; - + std::unique_ptr gpu_thread; + VideoCore::RendererBase& renderer; /// Mapping of command subchannels to their bound engine ids. diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp new file mode 100644 index 000000000..22c4cca4d --- /dev/null +++ b/src/video_core/gpu_thread.cpp @@ -0,0 +1,154 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "common/assert.h" +#include "common/microprofile.h" +#include "core/frontend/scope_acquire_window_context.h" +#include "core/settings.h" +#include "video_core/dma_pusher.h" +#include "video_core/gpu.h" +#include "video_core/gpu_thread.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon::GPUThread { + +/// Executes a single GPU thread command +static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher) { + if (const auto submit_list = std::get_if(command)) { + dma_pusher.Push(std::move(submit_list->entries)); + dma_pusher.DispatchCalls(); + } else if (const auto data = std::get_if(command)) { + renderer.SwapBuffers(data->framebuffer); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().FlushRegion(data->addr, data->size); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().InvalidateRegion(data->addr, data->size); + } else if (const auto data = std::get_if(command)) { + renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); + } else { + UNREACHABLE(); + } +} + +/// Runs the GPU thread +static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, + SynchState& state) { + + MicroProfileOnThreadCreate("GpuThread"); + + auto WaitForWakeup = [&]() { + std::unique_lock lock{state.signal_mutex}; + state.signal_condition.wait(lock, [&] { return !state.IsIdle() || !state.is_running; }); + }; + + // Wait for first GPU command before acquiring the window context + WaitForWakeup(); + + // If emulation was stopped during disk shader loading, abort before trying to acquire context + if (!state.is_running) { + return; + } + + Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; + + while (state.is_running) { + if (!state.is_running) { + return; + } + + { + // Thread has been woken up, so make the previous write queue the next read queue + std::lock_guard lock{state.signal_mutex}; + std::swap(state.push_queue, state.pop_queue); + } + + // Execute all of the GPU commands + while (!state.pop_queue->empty()) { + ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); + state.pop_queue->pop(); + } + + // Signal that the GPU thread has finished processing commands + if (state.IsIdle()) { + state.idle_condition.notify_one(); + } + + // Wait for CPU thread to send more GPU commands + WaitForWakeup(); + } +} + +ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) + : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), + std::ref(dma_pusher), std::ref(state)}, + thread_id{thread.get_id()} {} + +ThreadManager::~ThreadManager() { + { + // Notify GPU thread that a shutdown is pending + std::lock_guard lock{state.signal_mutex}; + state.is_running = false; + } + + state.signal_condition.notify_one(); + thread.join(); +} + +void ThreadManager::SubmitList(Tegra::CommandList&& entries) { + if (entries.empty()) { + return; + } + + PushCommand(SubmitListCommand(std::move(entries)), false, false); +} + +void ThreadManager::SwapBuffers( + std::optional> framebuffer) { + PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); +} + +void ThreadManager::FlushRegion(VAddr addr, u64 size) { + if (Settings::values.use_accurate_gpu_emulation) { + PushCommand(FlushRegionCommand(addr, size), true, false); + } +} + +void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { + PushCommand(InvalidateRegionCommand(addr, size), true, true); +} + +void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { + if (Settings::values.use_accurate_gpu_emulation) { + PushCommand(FlushAndInvalidateRegionCommand(addr, size), true, false); + } else { + InvalidateRegion(addr, size); + } +} + +void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { + { + std::lock_guard lock{state.signal_mutex}; + + if ((allow_on_cpu && state.IsIdle()) || IsGpuThread()) { + // Execute the command synchronously on the current thread + ExecuteCommand(&command_data, renderer, dma_pusher); + return; + } + + // Push the command to the GPU thread + state.push_queue->emplace(command_data); + } + + // Signal the GPU thread that commands are pending + state.signal_condition.notify_one(); + + if (wait_for_idle) { + // Wait for the GPU to be idle (all commands to be executed) + std::unique_lock lock{state.idle_mutex}; + state.idle_condition.wait(lock, [this] { return state.IsIdle(); }); + } +} + +} // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h new file mode 100644 index 000000000..ad9f9462b --- /dev/null +++ b/src/video_core/gpu_thread.h @@ -0,0 +1,135 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Tegra { +struct FramebufferConfig; +class DmaPusher; +} // namespace Tegra + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon::GPUThread { + +/// Command to signal to the GPU thread that a command list is ready for processing +struct SubmitListCommand final { + explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} + + Tegra::CommandList entries; +}; + +/// Command to signal to the GPU thread that a swap buffers is pending +struct SwapBuffersCommand final { + explicit SwapBuffersCommand(std::optional framebuffer) + : framebuffer{std::move(framebuffer)} {} + + std::optional framebuffer; +}; + +/// Command to signal to the GPU thread to flush a region +struct FlushRegionCommand final { + explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +/// Command to signal to the GPU thread to invalidate a region +struct InvalidateRegionCommand final { + explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +/// Command to signal to the GPU thread to flush and invalidate a region +struct FlushAndInvalidateRegionCommand final { + explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) + : addr{addr}, size{size} {} + + const VAddr addr; + const u64 size; +}; + +using CommandData = std::variant; + +/// Struct used to synchronize the GPU thread +struct SynchState final { + std::atomic is_running{true}; + std::condition_variable signal_condition; + std::mutex signal_mutex; + std::condition_variable idle_condition; + std::mutex idle_mutex; + + // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and + // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes + // empty. This allows for efficient thread-safe access, as it does not require any copies. + + using CommandQueue = std::queue; + std::array command_queues; + CommandQueue* push_queue{&command_queues[0]}; + CommandQueue* pop_queue{&command_queues[1]}; + + /// Returns true if the GPU thread should be idle, meaning there are no commands to process + bool IsIdle() const { + return command_queues[0].empty() && command_queues[1].empty(); + } +}; + +/// Class used to manage the GPU thread +class ThreadManager final { +public: + explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + ~ThreadManager(); + + /// Push GPU command entries to be processed + void SubmitList(Tegra::CommandList&& entries); + + /// Swap buffers (render frame) + void SwapBuffers( + std::optional> framebuffer); + + /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory + void FlushRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be invalidated + void InvalidateRegion(VAddr addr, u64 size); + + /// Notify rasterizer that any caches of the specified region should be flushed and invalidated + void FlushAndInvalidateRegion(VAddr addr, u64 size); + + /// Waits the caller until the GPU thread is idle, used for synchronization + void WaitForIdle(); + +private: + /// Pushes a command to be executed by the GPU thread + void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); + + /// Returns true if this is called by the GPU thread + bool IsGpuThread() const { + return std::this_thread::get_id() == thread_id; + } + +private: + SynchState state; + std::thread thread; + std::thread::id thread_id; + VideoCore::RendererBase& renderer; + Tegra::DmaPusher& dma_pusher; +}; + +} // namespace VideoCommon::GPUThread -- cgit v1.2.3 From aaa373585cd55bd03fcc589d2ad9f749e2cb99d4 Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 8 Feb 2019 23:21:53 -0500 Subject: gpu: Refactor a/synchronous implementations into their own classes. --- src/core/core.cpp | 9 ++++++-- src/video_core/CMakeLists.txt | 4 ++++ src/video_core/gpu.cpp | 48 ------------------------------------------- src/video_core/gpu.h | 26 ++++++++++------------- src/video_core/gpu_asynch.cpp | 37 +++++++++++++++++++++++++++++++++ src/video_core/gpu_asynch.h | 37 +++++++++++++++++++++++++++++++++ src/video_core/gpu_synch.cpp | 37 +++++++++++++++++++++++++++++++++ src/video_core/gpu_synch.h | 29 ++++++++++++++++++++++++++ 8 files changed, 162 insertions(+), 65 deletions(-) create mode 100644 src/video_core/gpu_asynch.cpp create mode 100644 src/video_core/gpu_asynch.h create mode 100644 src/video_core/gpu_synch.cpp create mode 100644 src/video_core/gpu_synch.h (limited to 'src/video_core/gpu.h') diff --git a/src/core/core.cpp b/src/core/core.cpp index 9e5d167c3..1d83e9e11 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -36,7 +36,8 @@ #include "frontend/applets/software_keyboard.h" #include "frontend/applets/web_browser.h" #include "video_core/debug_utils/debug_utils.h" -#include "video_core/gpu.h" +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_synch.h" #include "video_core/renderer_base.h" #include "video_core/video_core.h" @@ -131,7 +132,11 @@ struct System::Impl { is_powered_on = true; - gpu_core = std::make_unique(system, *renderer); + if (Settings::values.use_asynchronous_gpu_emulation) { + gpu_core = std::make_unique(system, *renderer); + } else { + gpu_core = std::make_unique(system, *renderer); + } cpu_core_manager.Initialize(system); diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 3bb5d0ed7..a4cb33c17 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -17,6 +17,10 @@ add_library(video_core STATIC engines/shader_header.h gpu.cpp gpu.h + gpu_asynch.cpp + gpu_asynch.h + gpu_synch.cpp + gpu_synch.h gpu_thread.cpp gpu_thread.h macro_interpreter.cpp diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 0d7a052dd..08abf8ac9 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -6,14 +6,12 @@ #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" -#include "core/settings.h" #include "video_core/engines/fermi_2d.h" #include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_memory.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_dma.h" #include "video_core/gpu.h" -#include "video_core/gpu_thread.h" #include "video_core/renderer_base.h" namespace Tegra { @@ -39,10 +37,6 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren kepler_compute = std::make_unique(*memory_manager); maxwell_dma = std::make_unique(system, rasterizer, *memory_manager); kepler_memory = std::make_unique(system, rasterizer, *memory_manager); - - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread = std::make_unique(renderer, *dma_pusher); - } } GPU::~GPU() = default; @@ -71,48 +65,6 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } -void GPU::PushGPUEntries(Tegra::CommandList&& entries) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->SubmitList(std::move(entries)); - } else { - dma_pusher->Push(std::move(entries)); - dma_pusher->DispatchCalls(); - } -} - -void GPU::SwapBuffers( - std::optional> framebuffer) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->SwapBuffers(std::move(framebuffer)); - } else { - renderer.SwapBuffers(std::move(framebuffer)); - } -} - -void GPU::FlushRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->FlushRegion(addr, size); - } else { - renderer.Rasterizer().FlushRegion(addr, size); - } -} - -void GPU::InvalidateRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->InvalidateRegion(addr, size); - } else { - renderer.Rasterizer().InvalidateRegion(addr, size); - } -} - -void GPU::FlushAndInvalidateRegion(VAddr addr, u64 size) { - if (Settings::values.use_asynchronous_gpu_emulation) { - gpu_thread->FlushAndInvalidateRegion(addr, size); - } else { - renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); - } -} - u32 RenderTargetBytesPerPixel(RenderTargetFormat format) { ASSERT(format != RenderTargetFormat::NONE); diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 3f3098bf1..14a421cc1 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -19,10 +19,6 @@ namespace VideoCore { class RendererBase; } // namespace VideoCore -namespace VideoCommon::GPUThread { -class ThreadManager; -} // namespace VideoCommon::GPUThread - namespace Tegra { enum class RenderTargetFormat : u32 { @@ -123,7 +119,7 @@ enum class EngineID { MAXWELL_DMA_COPY_A = 0xB0B5, }; -class GPU final { +class GPU { public: explicit GPU(Core::System& system, VideoCore::RendererBase& renderer); @@ -206,20 +202,20 @@ public: } regs{}; /// Push GPU command entries to be processed - void PushGPUEntries(Tegra::CommandList&& entries); + virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0; /// Swap buffers (render frame) - void SwapBuffers( - std::optional> framebuffer); + virtual void SwapBuffers( + std::optional> framebuffer) = 0; /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - void FlushRegion(VAddr addr, u64 size); + virtual void FlushRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be invalidated - void InvalidateRegion(VAddr addr, u64 size); + virtual void InvalidateRegion(VAddr addr, u64 size) = 0; /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - void FlushAndInvalidateRegion(VAddr addr, u64 size); + virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; private: void ProcessBindMethod(const MethodCall& method_call); @@ -236,13 +232,13 @@ private: /// Determines where the method should be executed. bool ExecuteMethodOnEngine(const MethodCall& method_call); -private: +protected: std::unique_ptr dma_pusher; - std::unique_ptr memory_manager; - std::unique_ptr gpu_thread; - VideoCore::RendererBase& renderer; +private: + std::unique_ptr memory_manager; + /// Mapping of command subchannels to their bound engine ids. std::array bound_engines = {}; diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp new file mode 100644 index 000000000..ad0a747e3 --- /dev/null +++ b/src/video_core/gpu_asynch.cpp @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/gpu_asynch.h" +#include "video_core/gpu_thread.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon { + +GPUAsynch::GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer) + : Tegra::GPU(system, renderer), gpu_thread{renderer, *dma_pusher} {} + +GPUAsynch::~GPUAsynch() = default; + +void GPUAsynch::PushGPUEntries(Tegra::CommandList&& entries) { + gpu_thread.SubmitList(std::move(entries)); +} + +void GPUAsynch::SwapBuffers( + std::optional> framebuffer) { + gpu_thread.SwapBuffers(std::move(framebuffer)); +} + +void GPUAsynch::FlushRegion(VAddr addr, u64 size) { + gpu_thread.FlushRegion(addr, size); +} + +void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { + gpu_thread.InvalidateRegion(addr, size); +} + +void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + gpu_thread.FlushAndInvalidateRegion(addr, size); +} + +} // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h new file mode 100644 index 000000000..58046f3e9 --- /dev/null +++ b/src/video_core/gpu_asynch.h @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/gpu.h" +#include "video_core/gpu_thread.h" + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon { + +namespace GPUThread { +class ThreadManager; +} // namespace GPUThread + +/// Implementation of GPU interface that runs the GPU asynchronously +class GPUAsynch : public Tegra::GPU { +public: + explicit GPUAsynch(Core::System& system, VideoCore::RendererBase& renderer); + ~GPUAsynch(); + + void PushGPUEntries(Tegra::CommandList&& entries) override; + void SwapBuffers( + std::optional> framebuffer) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; + +private: + GPUThread::ThreadManager gpu_thread; +}; + +} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.cpp b/src/video_core/gpu_synch.cpp new file mode 100644 index 000000000..4c00b96c7 --- /dev/null +++ b/src/video_core/gpu_synch.cpp @@ -0,0 +1,37 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include "video_core/gpu_synch.h" +#include "video_core/renderer_base.h" + +namespace VideoCommon { + +GPUSynch::GPUSynch(Core::System& system, VideoCore::RendererBase& renderer) + : Tegra::GPU(system, renderer) {} + +GPUSynch::~GPUSynch() = default; + +void GPUSynch::PushGPUEntries(Tegra::CommandList&& entries) { + dma_pusher->Push(std::move(entries)); + dma_pusher->DispatchCalls(); +} + +void GPUSynch::SwapBuffers( + std::optional> framebuffer) { + renderer.SwapBuffers(std::move(framebuffer)); +} + +void GPUSynch::FlushRegion(VAddr addr, u64 size) { + renderer.Rasterizer().FlushRegion(addr, size); +} + +void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { + renderer.Rasterizer().InvalidateRegion(addr, size); +} + +void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { + renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); +} + +} // namespace VideoCommon diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h new file mode 100644 index 000000000..658f683e2 --- /dev/null +++ b/src/video_core/gpu_synch.h @@ -0,0 +1,29 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include "video_core/gpu.h" + +namespace VideoCore { +class RendererBase; +} // namespace VideoCore + +namespace VideoCommon { + +/// Implementation of GPU interface that runs the GPU synchronously +class GPUSynch : public Tegra::GPU { +public: + explicit GPUSynch(Core::System& system, VideoCore::RendererBase& renderer); + ~GPUSynch(); + + void PushGPUEntries(Tegra::CommandList&& entries) override; + void SwapBuffers( + std::optional> framebuffer) override; + void FlushRegion(VAddr addr, u64 size) override; + void InvalidateRegion(VAddr addr, u64 size) override; + void FlushAndInvalidateRegion(VAddr addr, u64 size) override; +}; + +} // namespace VideoCommon -- cgit v1.2.3