From a994446b6ec776c9383e8b13c45eeb461405adff Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Mon, 23 Apr 2018 20:01:29 -0500
Subject: GPU: Move the Maxwell3D macro uploading code to the inside of the
 Maxwell3D processor.

It doesn't belong in the PFIFO handler.
---
 src/video_core/engines/maxwell_3d.cpp | 14 ++++++++++----
 src/video_core/engines/maxwell_3d.h   | 17 +++++++++++++----
 2 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'src/video_core/engines')
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 2acbb9cd6..bc40f8d98 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,10 +22,6 @@ constexpr u32 MacroRegistersStart = 0xE00;
 Maxwell3D::Maxwell3D(MemoryManager& memory_manager)
     : memory_manager(memory_manager), macro_interpreter(*this) {}
 
-void Maxwell3D::SubmitMacroCode(u32 entry, std::vector<u32> code) {
-    uploaded_macros[entry * 2 + MacroRegistersStart] = std::move(code);
-}
-
 void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
     auto macro_code = uploaded_macros.find(method);
     // The requested macro must have been uploaded already.
@@ -75,6 +71,10 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
     regs.reg_array[method] = value;
 
     switch (method) {
+    case MAXWELL3D_REG_INDEX(macros.data): {
+        ProcessMacroUpload(value);
+        break;
+    }
     case MAXWELL3D_REG_INDEX(code_address.code_address_high):
     case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
         // Note: For some reason games (like Puyo Puyo Tetris) seem to write 0 to the CODE_ADDRESS
@@ -141,6 +141,12 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
     }
 }
 
+void Maxwell3D::ProcessMacroUpload(u32 data) {
+    // Store the uploaded macro code to interpret them when they're called.
+    auto& macro = uploaded_macros[regs.macros.entry * 2 + MacroRegistersStart];
+    macro.push_back(data);
+}
+
 void Maxwell3D::ProcessQueryGet() {
     GPUVAddr sequence_address = regs.query.QueryAddress();
     // Since the sequence address is given as a GPU VAddr, we have to convert it to an application
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index a022665eb..8edc3cd38 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -322,7 +322,15 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0x200);
+                INSERT_PADDING_WORDS(0x45);
+
+                struct {
+                    INSERT_PADDING_WORDS(1);
+                    u32 data;
+                    u32 entry;
+                } macros;
+
+                INSERT_PADDING_WORDS(0x1B8);
 
                 struct {
                     u32 address_high;
@@ -637,9 +645,6 @@ public:
     /// Write the value to the register identified by method.
     void WriteReg(u32 method, u32 value, u32 remaining_params);
 
-    /// Uploads the code for a GPU macro program associated with the specified entry.
-    void SubmitMacroCode(u32 entry, std::vector<u32> code);
-
     /// Returns a list of enabled textures for the specified shader stage.
     std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
 
@@ -670,6 +675,9 @@ private:
      */
     void CallMacroMethod(u32 method, std::vector<u32> parameters);
 
+    /// Handles writes to the macro uploading registers.
+    void ProcessMacroUpload(u32 data);
+
     /// Handles a write to the QUERY_GET register.
     void ProcessQueryGet();
 
@@ -687,6 +695,7 @@ private:
     static_assert(offsetof(Maxwell3D::Regs, field_name) == position * 4,                           \
                   "Field " #field_name " has invalid position")
 
+ASSERT_REG_POSITION(macros, 0x45);
 ASSERT_REG_POSITION(rt, 0x200);
 ASSERT_REG_POSITION(viewport_transform[0], 0x280);
 ASSERT_REG_POSITION(viewport, 0x300);
-- 
cgit v1.2.3


From c16cfbbc6c062491d84a6bc9976027b7a7587fdb Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Mon, 23 Apr 2018 20:03:50 -0500
Subject: GPU: Reduce the number of registers of Maxwell3D to 0xE00.

The rest are just macro shim registers.
---
 src/video_core/engines/maxwell_3d.cpp | 6 +++---
 src/video_core/engines/maxwell_3d.h   | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'src/video_core/engines')

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index bc40f8d98..4306b894f 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -33,9 +33,6 @@ void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
 }
 
 void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
-    ASSERT_MSG(method < Regs::NUM_REGS,
-               "Invalid Maxwell3D register, increase the size of the Regs structure");
-
     auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
 
     // It is an error to write to a register other than the current macro's ARG register before it
@@ -64,6 +61,9 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
         return;
     }
 
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid Maxwell3D register, increase the size of the Regs structure");
+
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandLoaded, nullptr);
     }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8edc3cd38..5cf62fb01 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -31,7 +31,7 @@ public:
     /// Register structure of the Maxwell3D engine.
     /// TODO(Subv): This structure will need to be made bigger as more registers are discovered.
     struct Regs {
-        static constexpr size_t NUM_REGS = 0xE36;
+        static constexpr size_t NUM_REGS = 0xE00;
 
         static constexpr size_t NumRenderTargets = 8;
         static constexpr size_t NumViewports = 16;
@@ -613,7 +613,7 @@ public:
                     u32 size[MaxShaderStage];
                 } tex_info_buffers;
 
-                INSERT_PADDING_WORDS(0x102);
+                INSERT_PADDING_WORDS(0xCC);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
-- 
cgit v1.2.3


From b1109931b9a92ce89635cb7c0c0c1c0c7e6866ed Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Mon, 23 Apr 2018 20:12:40 -0500
Subject: GPU: Added boilerplate code for the Fermi2D engine

---
 src/video_core/engines/fermi_2d.cpp |  7 ++++++-
 src/video_core/engines/fermi_2d.h   | 28 +++++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 2 deletions(-)

(limited to 'src/video_core/engines')

diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 7aab163dc..87634da21 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -7,7 +7,12 @@
 namespace Tegra {
 namespace Engines {
 
-void Fermi2D::WriteReg(u32 method, u32 value) {}
+Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager) {}
+
+void Fermi2D::WriteReg(u32 method, u32 value) {
+    ASSERT_MSG(method < Regs::NUM_REGS,
+               "Invalid Fermi2D register, increase the size of the Regs structure");
+}
 
 } // namespace Engines
 } // namespace Tegra
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 8967ddede..a97f5bb28 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -4,19 +4,45 @@
 
 #pragma once
 
+#include <array>
+#include "common/assert.h"
+#include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/memory_manager.h"
 
 namespace Tegra {
 namespace Engines {
 
+#define FERMI2D_REG_INDEX(field_name)                                                              \
+    (offsetof(Tegra::Engines::Fermi2D::Regs, field_name) / sizeof(u32))
+
 class Fermi2D final {
 public:
-    Fermi2D() = default;
+    explicit Fermi2D(MemoryManager& memory_manager);
     ~Fermi2D() = default;
 
     /// Write the value to the register identified by method.
     void WriteReg(u32 method, u32 value);
+
+    struct Regs {
+        static constexpr size_t NUM_REGS = 0x258;
+
+        union {
+            struct {
+                INSERT_PADDING_WORDS(0x258);
+            };
+            std::array<u32, NUM_REGS> reg_array;
+        };
+    } regs{};
+
+    MemoryManager& memory_manager;
 };
 
+#define ASSERT_REG_POSITION(field_name, position)                                                  \
+    static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4,                             \
+                  "Field " #field_name " has invalid position")
+
+#undef ASSERT_REG_POSITION
+
 } // namespace Engines
 } // namespace Tegra
-- 
cgit v1.2.3


From 378c881427d34962461099ef3d55de871710b897 Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Mon, 23 Apr 2018 22:14:08 -0500
Subject: GPU: Added surface copy registers to Fermi2D

---
 src/video_core/engines/fermi_2d.h | 58 ++++++++++++++++++++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

(limited to 'src/video_core/engines')

diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index a97f5bb28..78d06218f 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -6,8 +6,10 @@
 
 #include <array>
 #include "common/assert.h"
+#include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 
 namespace Tegra {
@@ -27,9 +29,59 @@ public:
     struct Regs {
         static constexpr size_t NUM_REGS = 0x258;
 
+        struct Surface {
+            RenderTargetFormat format;
+            BitField<0, 1, u32> linear;
+            union {
+                BitField<0, 4, u32> block_depth;
+                BitField<4, 4, u32> block_height;
+                BitField<8, 4, u32> block_width;
+            };
+            u32 depth;
+            u32 layer;
+            u32 pitch;
+            u32 width;
+            u32 height;
+            u32 address_high;
+            u32 address_low;
+
+            GPUVAddr Address() const {
+                return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
+                                             address_low);
+            }
+        };
+        static_assert(sizeof(Surface) == 0x28, "Surface has incorrect size");
+
+        enum class Operation : u32 {
+            SrcCopyAnd = 0,
+            ROPAnd = 1,
+            Blend = 2,
+            SrcCopy = 3,
+            ROP = 4,
+            SrcCopyPremult = 5,
+            BlendPremult = 6,
+        };
+
         union {
             struct {
-                INSERT_PADDING_WORDS(0x258);
+                INSERT_PADDING_WORDS(0x80);
+
+                Surface dst;
+
+                INSERT_PADDING_WORDS(2);
+
+                Surface src;
+
+                INSERT_PADDING_WORDS(0x15);
+
+                Operation operation;
+
+                INSERT_PADDING_WORDS(0x9);
+
+                // TODO(Subv): This is only a guess.
+                u32 trigger;
+
+                INSERT_PADDING_WORDS(0x1A3);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
@@ -42,6 +94,10 @@ public:
     static_assert(offsetof(Fermi2D::Regs, field_name) == position * 4,                             \
                   "Field " #field_name " has invalid position")
 
+ASSERT_REG_POSITION(dst, 0x80);
+ASSERT_REG_POSITION(src, 0x8C);
+ASSERT_REG_POSITION(operation, 0xAB);
+ASSERT_REG_POSITION(trigger, 0xB5);
 #undef ASSERT_REG_POSITION
 
 } // namespace Engines
-- 
cgit v1.2.3


From 20d86d8a36dca4bf1b465193745a365c3ab9abcd Mon Sep 17 00:00:00 2001
From: Subv <subv2112@gmail.com>
Date: Tue, 24 Apr 2018 22:00:40 -0500
Subject: GPU: Partially implemented the Fermi2D surface copy operation.

The hardware allows for some rather complicated operations to be performed on the data during the copy, this is not implemented.
Only same-format same-size raw copies are implemented for now.
---
 src/video_core/engines/fermi_2d.cpp | 54 +++++++++++++++++++++++++++++++++++++
 src/video_core/engines/fermi_2d.h   |  5 ++++
 2 files changed, 59 insertions(+)

(limited to 'src/video_core/engines')

diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 87634da21..9019f2504 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -2,7 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "core/memory.h"
 #include "video_core/engines/fermi_2d.h"
+#include "video_core/textures/decoders.h"
 
 namespace Tegra {
 namespace Engines {
@@ -12,6 +14,58 @@ Fermi2D::Fermi2D(MemoryManager& memory_manager) : memory_manager(memory_manager)
 void Fermi2D::WriteReg(u32 method, u32 value) {
     ASSERT_MSG(method < Regs::NUM_REGS,
                "Invalid Fermi2D register, increase the size of the Regs structure");
+
+    regs.reg_array[method] = value;
+
+    switch (method) {
+    case FERMI2D_REG_INDEX(trigger): {
+        HandleSurfaceCopy();
+        break;
+    }
+    }
+}
+
+void Fermi2D::HandleSurfaceCopy() {
+    NGLOG_WARNING(HW_GPU, "Requested a surface copy with operation {}",
+                  static_cast<u32>(regs.operation));
+
+    const GPUVAddr source = regs.src.Address();
+    const GPUVAddr dest = regs.dst.Address();
+
+    // TODO(Subv): Only same-format and same-size copies are allowed for now.
+    ASSERT(regs.src.format == regs.dst.format);
+    ASSERT(regs.src.width * regs.src.height == regs.dst.width * regs.dst.height);
+
+    // TODO(Subv): Only raw copies are implemented.
+    ASSERT(regs.operation == Regs::Operation::SrcCopy);
+
+    const VAddr source_cpu = *memory_manager.GpuToCpuAddress(source);
+    const VAddr dest_cpu = *memory_manager.GpuToCpuAddress(dest);
+
+    u32 src_bytes_per_pixel = RenderTargetBytesPerPixel(regs.src.format);
+    u32 dst_bytes_per_pixel = RenderTargetBytesPerPixel(regs.dst.format);
+
+    if (regs.src.linear == regs.dst.linear) {
+        // If the input layout and the output layout are the same, just perform a raw copy.
+        Memory::CopyBlock(dest_cpu, source_cpu,
+                          src_bytes_per_pixel * regs.dst.width * regs.dst.height);
+        return;
+    }
+
+    u8* src_buffer = Memory::GetPointer(source_cpu);
+    u8* dst_buffer = Memory::GetPointer(dest_cpu);
+
+    if (!regs.src.linear && regs.dst.linear) {
+        // If the input is tiled and the output is linear, deswizzle the input and copy it over.
+        Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel,
+                                  dst_bytes_per_pixel, src_buffer, dst_buffer, true,
+                                  regs.src.block_height);
+    } else {
+        // If the input is linear and the output is tiled, swizzle the input and copy it over.
+        Texture::CopySwizzledData(regs.src.width, regs.src.height, src_bytes_per_pixel,
+                                  dst_bytes_per_pixel, dst_buffer, src_buffer, false,
+                                  regs.dst.block_height);
+    }
 }
 
 } // namespace Engines
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 78d06218f..0c5b413cc 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -88,6 +88,11 @@ public:
     } regs{};
 
     MemoryManager& memory_manager;
+
+private:
+    /// Performs the copy from the source surface to the destination surface as configured in the
+    /// registers.
+    void HandleSurfaceCopy();
 };
 
 #define ASSERT_REG_POSITION(field_name, position)                                                  \
-- 
cgit v1.2.3