aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/gpu_thread.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/gpu_thread.h')
-rw-r--r--src/video_core/gpu_thread.h136
1 files changed, 88 insertions, 48 deletions
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index edb148b14..62bcea5bb 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -4,26 +4,33 @@
#pragma once
-#include <array>
#include <atomic>
#include <condition_variable>
-#include <memory>
#include <mutex>
#include <optional>
#include <thread>
#include <variant>
+#include "common/threadsafe_queue.h"
+#include "video_core/gpu.h"
+
namespace Tegra {
struct FramebufferConfig;
class DmaPusher;
} // namespace Tegra
-namespace VideoCore {
-class RendererBase;
-} // namespace VideoCore
+namespace Core {
+class System;
+namespace Timing {
+struct EventType;
+} // namespace Timing
+} // namespace Core
namespace VideoCommon::GPUThread {
+/// Command to signal to the GPU thread that processing has ended
+struct EndProcessingCommand final {};
+
/// Command to signal to the GPU thread that a command list is ready for processing
struct SubmitListCommand final {
explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {}
@@ -36,65 +43,103 @@ struct SwapBuffersCommand final {
explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer)
: framebuffer{std::move(framebuffer)} {}
- std::optional<const Tegra::FramebufferConfig> framebuffer;
+ std::optional<Tegra::FramebufferConfig> framebuffer;
};
/// Command to signal to the GPU thread to flush a region
struct FlushRegionCommand final {
- explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+ explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
- const VAddr addr;
- const u64 size;
+ CacheAddr addr;
+ u64 size;
};
/// Command to signal to the GPU thread to invalidate a region
struct InvalidateRegionCommand final {
- explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {}
+ explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {}
- const VAddr addr;
- const u64 size;
+ CacheAddr addr;
+ u64 size;
};
/// Command to signal to the GPU thread to flush and invalidate a region
struct FlushAndInvalidateRegionCommand final {
- explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size)
+ explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size)
: addr{addr}, size{size} {}
- const VAddr addr;
- const u64 size;
+ CacheAddr addr;
+ u64 size;
};
-using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
- InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+using CommandData =
+ std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand,
+ InvalidateRegionCommand, FlushAndInvalidateRegionCommand>;
+
+struct CommandDataContainer {
+ CommandDataContainer() = default;
+
+ CommandDataContainer(CommandData&& data, u64 next_fence)
+ : data{std::move(data)}, fence{next_fence} {}
+
+ CommandDataContainer& operator=(const CommandDataContainer& t) {
+ data = std::move(t.data);
+ fence = t.fence;
+ return *this;
+ }
+
+ CommandData data;
+ u64 fence{};
+};
/// Struct used to synchronize the GPU thread
struct SynchState final {
- std::atomic<bool> is_running{true};
- std::atomic<bool> is_idle{true};
- std::condition_variable signal_condition;
- std::mutex signal_mutex;
- std::condition_variable idle_condition;
- std::mutex idle_mutex;
-
- // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
- // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
- // empty. This allows for efficient thread-safe access, as it does not require any copies.
-
- using CommandQueue = std::queue<CommandData>;
- std::array<CommandQueue, 2> command_queues;
- CommandQueue* push_queue{&command_queues[0]};
- CommandQueue* pop_queue{&command_queues[1]};
-
- void UpdateIdleState() {
- std::lock_guard<std::mutex> lock{idle_mutex};
- is_idle = command_queues[0].empty() && command_queues[1].empty();
+ std::atomic_bool is_running{true};
+ std::atomic_int queued_frame_count{};
+ std::mutex synchronization_mutex;
+ std::mutex commands_mutex;
+ std::condition_variable commands_condition;
+ std::condition_variable synchronization_condition;
+
+ /// Returns true if the gap in GPU commands is small enough that we can consider the CPU and GPU
+ /// synchronized. This is entirely empirical.
+ bool IsSynchronized() const {
+ constexpr std::size_t max_queue_gap{5};
+ return queue.Size() <= max_queue_gap;
+ }
+
+ void TrySynchronize() {
+ if (IsSynchronized()) {
+ std::lock_guard<std::mutex> lock{synchronization_mutex};
+ synchronization_condition.notify_one();
+ }
}
+
+ void WaitForSynchronization(u64 fence);
+
+ void SignalCommands() {
+ if (queue.Empty()) {
+ return;
+ }
+
+ commands_condition.notify_one();
+ }
+
+ void WaitForCommands() {
+ std::unique_lock lock{commands_mutex};
+ commands_condition.wait(lock, [this] { return !queue.Empty(); });
+ }
+
+ using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
+ CommandQueue queue;
+ u64 last_fence{};
+ std::atomic<u64> signaled_fence{};
};
/// Class used to manage the GPU thread
class ThreadManager final {
public:
- explicit ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher);
+ explicit ThreadManager(Core::System& system, VideoCore::RendererBase& renderer,
+ Tegra::DmaPusher& dma_pusher);
~ThreadManager();
/// Push GPU command entries to be processed
@@ -105,27 +150,22 @@ public:
std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer);
/// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
- void FlushRegion(VAddr addr, u64 size);
+ void FlushRegion(CacheAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be invalidated
- void InvalidateRegion(VAddr addr, u64 size);
+ void InvalidateRegion(CacheAddr addr, u64 size);
/// Notify rasterizer that any caches of the specified region should be flushed and invalidated
- void FlushAndInvalidateRegion(VAddr addr, u64 size);
+ void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
private:
/// Pushes a command to be executed by the GPU thread
- void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu);
-
- /// Returns true if this is called by the GPU thread
- bool IsGpuThread() const {
- return std::this_thread::get_id() == thread_id;
- }
+ u64 PushCommand(CommandData&& command_data);
private:
SynchState state;
- VideoCore::RendererBase& renderer;
- Tegra::DmaPusher& dma_pusher;
+ Core::System& system;
+ Core::Timing::EventType* synchronization_event{};
std::thread thread;
std::thread::id thread_id;
};