1 files changed, 175 insertions, 74 deletions
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 1a2d747be..21410e125 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -13,14 +13,15 @@
 #include "common/common_types.h"
 #include "core/hle/service/nvdrv/nvdata.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
+#include "video_core/cdma_pusher.h"
 #include "video_core/dma_pusher.h"
 
 using CacheAddr = std::uintptr_t;
-inline CacheAddr ToCacheAddr(const void* host_ptr) {
+[[nodiscard]] inline CacheAddr ToCacheAddr(const void* host_ptr) {
     return reinterpret_cast<CacheAddr>(host_ptr);
 }
 
-inline u8* FromCacheAddr(CacheAddr cache_addr) {
+[[nodiscard]] inline u8* FromCacheAddr(CacheAddr cache_addr) {
     return reinterpret_cast<u8*>(cache_addr);
 }
 
@@ -33,58 +34,68 @@ class System;
 
 namespace VideoCore {
 class RendererBase;
+class ShaderNotify;
 } // namespace VideoCore
 
 namespace Tegra {
 
 enum class RenderTargetFormat : u32 {
     NONE = 0x0,
-    RGBA32_FLOAT = 0xC0,
-    RGBA32_UINT = 0xC2,
-    RGBA16_UNORM = 0xC6,
-    RGBA16_SNORM = 0xC7,
-    RGBA16_UINT = 0xC9,
-    RGBA16_FLOAT = 0xCA,
-    RG32_FLOAT = 0xCB,
-    RG32_UINT = 0xCD,
-    RGBX16_FLOAT = 0xCE,
-    BGRA8_UNORM = 0xCF,
-    BGRA8_SRGB = 0xD0,
-    RGB10_A2_UNORM = 0xD1,
-    RGBA8_UNORM = 0xD5,
-    RGBA8_SRGB = 0xD6,
-    RGBA8_SNORM = 0xD7,
-    RGBA8_UINT = 0xD9,
-    RG16_UNORM = 0xDA,
-    RG16_SNORM = 0xDB,
-    RG16_SINT = 0xDC,
-    RG16_UINT = 0xDD,
-    RG16_FLOAT = 0xDE,
-    R11G11B10_FLOAT = 0xE0,
+    R32B32G32A32_FLOAT = 0xC0,
+    R32G32B32A32_SINT = 0xC1,
+    R32G32B32A32_UINT = 0xC2,
+    R16G16B16A16_UNORM = 0xC6,
+    R16G16B16A16_SNORM = 0xC7,
+    R16G16B16A16_SINT = 0xC8,
+    R16G16B16A16_UINT = 0xC9,
+    R16G16B16A16_FLOAT = 0xCA,
+    R32G32_FLOAT = 0xCB,
+    R32G32_SINT = 0xCC,
+    R32G32_UINT = 0xCD,
+    R16G16B16X16_FLOAT = 0xCE,
+    B8G8R8A8_UNORM = 0xCF,
+    B8G8R8A8_SRGB = 0xD0,
+    A2B10G10R10_UNORM = 0xD1,
+    A2B10G10R10_UINT = 0xD2,
+    A8B8G8R8_UNORM = 0xD5,
+    A8B8G8R8_SRGB = 0xD6,
+    A8B8G8R8_SNORM = 0xD7,
+    A8B8G8R8_SINT = 0xD8,
+    A8B8G8R8_UINT = 0xD9,
+    R16G16_UNORM = 0xDA,
+    R16G16_SNORM = 0xDB,
+    R16G16_SINT = 0xDC,
+    R16G16_UINT = 0xDD,
+    R16G16_FLOAT = 0xDE,
+    B10G11R11_FLOAT = 0xE0,
     R32_SINT = 0xE3,
     R32_UINT = 0xE4,
     R32_FLOAT = 0xE5,
-    B5G6R5_UNORM = 0xE8,
-    BGR5A1_UNORM = 0xE9,
-    RG8_UNORM = 0xEA,
-    RG8_SNORM = 0xEB,
+    R5G6B5_UNORM = 0xE8,
+    A1R5G5B5_UNORM = 0xE9,
+    R8G8_UNORM = 0xEA,
+    R8G8_SNORM = 0xEB,
+    R8G8_SINT = 0xEC,
+    R8G8_UINT = 0xED,
     R16_UNORM = 0xEE,
     R16_SNORM = 0xEF,
     R16_SINT = 0xF0,
     R16_UINT = 0xF1,
     R16_FLOAT = 0xF2,
     R8_UNORM = 0xF3,
+    R8_SNORM = 0xF4,
+    R8_SINT = 0xF5,
     R8_UINT = 0xF6,
 };
 
 enum class DepthFormat : u32 {
-    Z32_FLOAT = 0xA,
-    Z16_UNORM = 0x13,
-    S8_Z24_UNORM = 0x14,
-    Z24_X8_UNORM = 0x15,
-    Z24_S8_UNORM = 0x16,
-    Z24_C8_UNORM = 0x18,
-    Z32_S8_X24_FLOAT = 0x19,
+    D32_FLOAT = 0xA,
+    D16_UNORM = 0x13,
+    S8_UINT_Z24_UNORM = 0x14,
+    D24X8_UNORM = 0x15,
+    D24S8_UNORM = 0x16,
+    D24C8_UNORM = 0x18,
+    D32_FLOAT_S8X24_UINT = 0x19,
 };
 
 struct CommandListHeader;
@@ -95,9 +106,9 @@ class DebugContext;
  */
 struct FramebufferConfig {
     enum class PixelFormat : u32 {
-        ABGR8 = 1,
-        RGB565 = 4,
-        BGRA8 = 5,
+        A8B8G8R8_UNORM = 1,
+        RGB565_UNORM = 4,
+        B8G8R8A8_UNORM = 5,
     };
 
     VAddr address;
@@ -132,60 +143,102 @@ class MemoryManager;
 
 class GPU {
 public:
-    explicit GPU(Core::System& system, std::unique_ptr<VideoCore::RendererBase>&& renderer,
-                 bool is_async);
-
-    virtual ~GPU();
-
     struct MethodCall {
         u32 method{};
         u32 argument{};
         u32 subchannel{};
         u32 method_count{};
 
-        bool IsLastCall() const {
-            return method_count <= 1;
-        }
-
         MethodCall(u32 method, u32 argument, u32 subchannel = 0, u32 method_count = 0)
             : method(method), argument(argument), subchannel(subchannel),
               method_count(method_count) {}
+
+        [[nodiscard]] bool IsLastCall() const {
+            return method_count <= 1;
+        }
     };
 
+    explicit GPU(Core::System& system, bool is_async, bool use_nvdec);
+    virtual ~GPU();
+
+    /// Binds a renderer to the GPU.
+    void BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer);
+
     /// Calls a GPU method.
     void CallMethod(const MethodCall& method_call);
 
+    /// Calls a GPU multivalue method.
+    void CallMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                         u32 methods_pending);
+
+    /// Flush all current written commands into the host GPU for execution.
     void FlushCommands();
+    /// Synchronizes CPU writes with Host GPU memory.
+    void SyncGuestHost();
+    /// Signal the ending of command list.
+    virtual void OnCommandListEnd();
+
+    /// Request a host GPU memory flush from the CPU.
+    [[nodiscard]] u64 RequestFlush(VAddr addr, std::size_t size);
+
+    /// Obtains current flush request fence id.
+    [[nodiscard]] u64 CurrentFlushRequestFence() const {
+        return current_flush_fence.load(std::memory_order_relaxed);
+    }
+
+    /// Tick pending requests within the GPU.
+    void TickWork();
 
     /// Returns a reference to the Maxwell3D GPU engine.
-    Engines::Maxwell3D& Maxwell3D();
+    [[nodiscard]] Engines::Maxwell3D& Maxwell3D();
 
     /// Returns a const reference to the Maxwell3D GPU engine.
-    const Engines::Maxwell3D& Maxwell3D() const;
+    [[nodiscard]] const Engines::Maxwell3D& Maxwell3D() const;
 
     /// Returns a reference to the KeplerCompute GPU engine.
-    Engines::KeplerCompute& KeplerCompute();
+    [[nodiscard]] Engines::KeplerCompute& KeplerCompute();
 
     /// Returns a reference to the KeplerCompute GPU engine.
-    const Engines::KeplerCompute& KeplerCompute() const;
+    [[nodiscard]] const Engines::KeplerCompute& KeplerCompute() const;
 
     /// Returns a reference to the GPU memory manager.
-    Tegra::MemoryManager& MemoryManager();
+    [[nodiscard]] Tegra::MemoryManager& MemoryManager();
 
     /// Returns a const reference to the GPU memory manager.
-    const Tegra::MemoryManager& MemoryManager() const;
+    [[nodiscard]] const Tegra::MemoryManager& MemoryManager() const;
 
     /// Returns a reference to the GPU DMA pusher.
-    Tegra::DmaPusher& DmaPusher();
+    [[nodiscard]] Tegra::DmaPusher& DmaPusher();
 
-    VideoCore::RendererBase& Renderer() {
+    /// Returns a const reference to the GPU DMA pusher.
+    [[nodiscard]] const Tegra::DmaPusher& DmaPusher() const;
+
+    /// Returns a reference to the GPU CDMA pusher.
+    [[nodiscard]] Tegra::CDmaPusher& CDmaPusher();
+
+    /// Returns a const reference to the GPU CDMA pusher.
+    [[nodiscard]] const Tegra::CDmaPusher& CDmaPusher() const;
+
+    /// Returns a reference to the underlying renderer.
+    [[nodiscard]] VideoCore::RendererBase& Renderer() {
         return *renderer;
     }
 
-    const VideoCore::RendererBase& Renderer() const {
+    /// Returns a const reference to the underlying renderer.
+    [[nodiscard]] const VideoCore::RendererBase& Renderer() const {
         return *renderer;
     }
 
+    /// Returns a reference to the shader notifier.
+    [[nodiscard]] VideoCore::ShaderNotify& ShaderNotify() {
+        return *shader_notify;
+    }
+
+    /// Returns a const reference to the shader notifier.
+    [[nodiscard]] const VideoCore::ShaderNotify& ShaderNotify() const {
+        return *shader_notify;
+    }
+
     // Waits for the GPU to finish working
     virtual void WaitIdle() const = 0;
 
@@ -194,27 +247,46 @@ public:
 
     void IncrementSyncPoint(u32 syncpoint_id);
 
-    u32 GetSyncpointValue(u32 syncpoint_id) const;
+    [[nodiscard]] u32 GetSyncpointValue(u32 syncpoint_id) const;
 
     void RegisterSyncptInterrupt(u32 syncpoint_id, u32 value);
 
-    bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
+    [[nodiscard]] bool CancelSyncptInterrupt(u32 syncpoint_id, u32 value);
 
-    u64 GetTicks() const;
+    [[nodiscard]] u64 GetTicks() const;
 
-    std::unique_lock<std::mutex> LockSync() {
+    [[nodiscard]] std::unique_lock<std::mutex> LockSync() {
         return std::unique_lock{sync_mutex};
     }
 
-    bool IsAsync() const {
+    [[nodiscard]] bool IsAsync() const {
         return is_async;
     }
 
-    /// Returns a const reference to the GPU DMA pusher.
-    const Tegra::DmaPusher& DmaPusher() const;
+    [[nodiscard]] bool UseNvdec() const {
+        return use_nvdec;
+    }
+
+    enum class FenceOperation : u32 {
+        Acquire = 0,
+        Increment = 1,
+    };
+
+    union FenceAction {
+        u32 raw;
+        BitField<0, 1, FenceOperation> op;
+        BitField<8, 24, u32> syncpoint_id;
+
+        [[nodiscard]] static CommandHeader Build(FenceOperation op, u32 syncpoint_id) {
+            FenceAction result{};
+            result.op.Assign(op);
+            result.syncpoint_id.Assign(syncpoint_id);
+            return {result.raw};
+        }
+    };
 
     struct Regs {
-        static constexpr size_t NUM_REGS = 0x100;
+        static constexpr size_t NUM_REGS = 0x40;
 
         union {
             struct {
@@ -223,7 +295,7 @@ public:
                     u32 address_high;
                     u32 address_low;
 
-                    GPUVAddr SemaphoreAddress() const {
+                    [[nodiscard]] GPUVAddr SemaphoreAddress() const {
                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
                                                      address_low);
                     }
@@ -233,7 +305,7 @@ public:
                 u32 semaphore_trigger;
                 INSERT_UNION_PADDING_WORDS(0xC);
 
-                // The puser and the puller share the reference counter, the pusher only has read
+                // The pusher and the puller share the reference counter, the pusher only has read
                 // access
                 u32 reference_count;
                 INSERT_UNION_PADDING_WORDS(0x5);
@@ -241,10 +313,7 @@ public:
                 u32 semaphore_acquire;
                 u32 semaphore_release;
                 u32 fence_value;
-                union {
-                    BitField<4, 4, u32> operation;
-                    BitField<8, 8, u32> id;
-                } fence_action;
+                FenceAction fence_action;
                 INSERT_UNION_PADDING_WORDS(0xE2);
 
                 // Puller state
@@ -263,9 +332,18 @@ public:
     /// core timing events.
     virtual void Start() = 0;
 
+    /// Obtain the CPU Context
+    virtual void ObtainContext() = 0;
+
+    /// Release the CPU Context
+    virtual void ReleaseContext() = 0;
+
     /// Push GPU command entries to be processed
     virtual void PushGPUEntries(Tegra::CommandList&& entries) = 0;
 
+    /// Push GPU command buffer entries to be processed
+    virtual void PushCommandBuffer(Tegra::ChCommandHeaderList& entries) = 0;
+
     /// Swap buffers (render frame)
     virtual void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) = 0;
 
@@ -283,6 +361,8 @@ protected:
 
 private:
     void ProcessBindMethod(const MethodCall& method_call);
+    void ProcessFenceActionMethod();
+    void ProcessWaitForInterruptMethod();
     void ProcessSemaphoreTriggerMethod();
     void ProcessSemaphoreRelease();
     void ProcessSemaphoreAcquire();
@@ -293,17 +373,22 @@ private:
     /// Calls a GPU engine method.
     void CallEngineMethod(const MethodCall& method_call);
 
+    /// Calls a GPU engine multivalue method.
+    void CallEngineMultiMethod(u32 method, u32 subchannel, const u32* base_start, u32 amount,
+                               u32 methods_pending);
+
     /// Determines where the method should be executed.
-    bool ExecuteMethodOnEngine(const MethodCall& method_call);
+    [[nodiscard]] bool ExecuteMethodOnEngine(u32 method);
 
 protected:
-    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
     Core::System& system;
+    std::unique_ptr<Tegra::MemoryManager> memory_manager;
+    std::unique_ptr<Tegra::DmaPusher> dma_pusher;
+    std::unique_ptr<Tegra::CDmaPusher> cdma_pusher;
     std::unique_ptr<VideoCore::RendererBase> renderer;
+    const bool use_nvdec;
 
 private:
-    std::unique_ptr<Tegra::MemoryManager> memory_manager;
-
     /// Mapping of command subchannels to their bound engine ids
     std::array<EngineID, 8> bound_engines = {};
     /// 3D engine
@@ -316,15 +401,31 @@ private:
     std::unique_ptr<Engines::MaxwellDMA> maxwell_dma;
     /// Inline memory engine
     std::unique_ptr<Engines::KeplerMemory> kepler_memory;
+    /// Shader build notifier
+    std::unique_ptr<VideoCore::ShaderNotify> shader_notify;
 
     std::array<std::atomic<u32>, Service::Nvidia::MaxSyncPoints> syncpoints{};
 
     std::array<std::list<u32>, Service::Nvidia::MaxSyncPoints> syncpt_interrupts;
 
     std::mutex sync_mutex;
+    std::mutex device_mutex;
 
     std::condition_variable sync_cv;
 
+    struct FlushRequest {
+        FlushRequest(u64 fence, VAddr addr, std::size_t size)
+            : fence{fence}, addr{addr}, size{size} {}
+        u64 fence;
+        VAddr addr;
+        std::size_t size;
+    };
+
+    std::list<FlushRequest> flush_requests;
+    std::atomic<u64> current_flush_fence{};
+    u64 last_flush_fence{};
+    std::mutex flush_request_mutex;
+
     const bool is_async;
 };