diff options
Diffstat (limited to 'src/video_core/gpu_thread.h')
| -rw-r--r-- | src/video_core/gpu_thread.h | 136 |
1 files changed, 88 insertions, 48 deletions
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index edb148b14..62bcea5bb 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -4,26 +4,33 @@ #pragma once -#include <array> #include <atomic> #include <condition_variable> -#include <memory> #include <mutex> #include <optional> #include <thread> #include <variant> +#include "common/threadsafe_queue.h" +#include "video_core/gpu.h" + namespace Tegra { struct FramebufferConfig; class DmaPusher; } // namespace Tegra -namespace VideoCore { -class RendererBase; -} // namespace VideoCore +namespace Core { +class System; +namespace Timing { +struct EventType; +} // namespace Timing +} // namespace Core namespace VideoCommon::GPUThread { +/// Command to signal to the GPU thread that processing has ended +struct EndProcessingCommand final {}; + /// Command to signal to the GPU thread that a command list is ready for processing struct SubmitListCommand final { explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} @@ -36,65 +43,103 @@ struct SwapBuffersCommand final { explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) : framebuffer{std::move(framebuffer)} {} - std::optional<const Tegra::FramebufferConfig> framebuffer; + std::optional<Tegra::FramebufferConfig> framebuffer; }; /// Command to signal to the GPU thread to flush a region struct FlushRegionCommand final { - explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} - const VAddr addr; - const u64 size; + CacheAddr addr; + u64 size; }; /// Command to signal to the GPU thread to invalidate a region struct InvalidateRegionCommand final { - explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} + explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} - const VAddr addr; - const u64 size; + CacheAddr addr; + u64 size; }; /// Command to signal to the GPU thread to flush and invalidate a region struct FlushAndInvalidateRegionCommand final { - explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) + explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} - const VAddr addr; - const u64 size; + CacheAddr addr; + u64 size; }; -using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, - InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; +using CommandData = + std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, + InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; + +struct CommandDataContainer { + CommandDataContainer() = default; + + CommandDataContainer(CommandData&& data, u64 next_fence) + : data{std::move(data)}, fence{next_fence} {} + + CommandDataContainer& operator=(const CommandDataContainer& t) { + data = std::move(t.data); + fence = t.fence; + return *this; + } + + CommandData data; + u64 fence{}; +}; /// Struct used to synchronize the GPU thread struct SynchState final { - std::atomic<bool> is_running{true}; - std::atomic<bool> is_idle{true}; - std::condition_variable signal_condition; - std::mutex signal_mutex; - std::condition_variable idle_condition; - std::mutex idle_mutex; - - // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and - // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes - // empty. This allows for efficient thread-safe access, as it does not require any copies. - - using CommandQueue = std::queue<CommandData>; - std::array<CommandQueue, 2> command_queues; - CommandQueue* push_queue{&command_queues[0]}; - CommandQueue* pop_queue{&command_queues[1]}; - - void UpdateIdleState() { - std::lock_guard<std::mutex> lock{idle_mutex}; - is_idle = command_queues[0].empty() && command_queues[1].empty(); + std::atomic_bool is_running{true}; + std::atomic_int queued_frame_count{}; + std::mutex synchronization_mutex; + std::mutex commands_mutex; + std::condition_variable commands_condition; + std::condition_variable synchronization_condition; + + /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU + /// synchronized. This is entirely empirical. + bool IsSynchronized() const { + constexpr std::size_t max_queue_gap{5}; + return queue.Size() <= max_queue_gap; + } + + void TrySynchronize() { + if (IsSynchronized()) { + std::lock_guard<std::mutex> lock{synchronization_mutex}; + synchronization_condition.notify_one(); + } } + + void WaitForSynchronization(u64 fence); + + void SignalCommands() { + if (queue.Empty()) { + return; + } + + commands_condition.notify_one(); + } + + void WaitForCommands() { + std::unique_lock lock{commands_mutex}; + commands_condition.wait(lock, [this] { return !queue.Empty(); }); + } + + using CommandQueue = Common::SPSCQueue<CommandDataContainer>; + CommandQueue queue; + u64 last_fence{}; + std::atomic<u64> signaled_fence{}; }; /// Class used to manage the GPU thread class ThreadManager final { public: - explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher); + explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer, + Tegra::DmaPusher& dma_pusher); ~ThreadManager(); /// Push GPU command entries to be processed @@ -105,27 +150,22 @@ public: std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory - void FlushRegion(VAddr addr, u64 size); + void FlushRegion(CacheAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be invalidated - void InvalidateRegion(VAddr addr, u64 size); + void InvalidateRegion(CacheAddr addr, u64 size); /// Notify rasterizer that any caches of the specified region should be flushed and invalidated - void FlushAndInvalidateRegion(VAddr addr, u64 size); + void FlushAndInvalidateRegion(CacheAddr addr, u64 size); private: /// Pushes a command to be executed by the GPU thread - void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); - - /// Returns true if this is called by the GPU thread - bool IsGpuThread() const { - return std::this_thread::get_id() == thread_id; - } + u64 PushCommand(CommandData&& command_data); private: SynchState state; - VideoCore::RendererBase& renderer; - Tegra::DmaPusher& dma_pusher; + Core::System& system; + Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; }; |
