18 files changed, 678 insertions, 393 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d5831e752..2625ddfdc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -28,98 +28,106 @@ enum class BufferMethods {
     CountBufferMethods = 0x40,
 };
 
-void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) {
-    LOG_TRACE(HW_GPU,
-              "Processing method {:08X} on subchannel {} value "
-              "{:08X} remaining params {}",
-              method, subchannel, value, remaining_params);
-
-    ASSERT(subchannel < bound_engines.size());
-
-    if (method == static_cast<u32>(BufferMethods::BindObject)) {
-        // Bind the current subchannel to the desired engine id.
-        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
-        bound_engines[subchannel] = static_cast<EngineID>(value);
-        return;
-    }
+MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
 
-    if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
-        // TODO(Subv): Research and implement these methods.
-        LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
-        return;
-    }
+void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
+    MICROPROFILE_SCOPE(ProcessCommandLists);
 
-    const EngineID engine = bound_engines[subchannel];
-
-    switch (engine) {
-    case EngineID::FERMI_TWOD_A:
-        fermi_2d->WriteReg(method, value);
-        break;
-    case EngineID::MAXWELL_B:
-        maxwell_3d->WriteReg(method, value, remaining_params);
-        break;
-    case EngineID::MAXWELL_COMPUTE_B:
-        maxwell_compute->WriteReg(method, value);
-        break;
-    case EngineID::MAXWELL_DMA_COPY_A:
-        maxwell_dma->WriteReg(method, value);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented engine");
-    }
-}
+    auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
+        LOG_TRACE(HW_GPU,
+                  "Processing method {:08X} on subchannel {} value "
+                  "{:08X} remaining params {}",
+                  method, subchannel, value, remaining_params);
 
-void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
-    const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
-    VAddr current_addr = *head_address;
-    while (current_addr < *head_address + size * sizeof(CommandHeader)) {
-        const CommandHeader header = {Memory::Read32(current_addr)};
-        current_addr += sizeof(u32);
-
-        switch (header.mode.Value()) {
-        case SubmissionMode::IncreasingOld:
-        case SubmissionMode::Increasing: {
-            // Increase the method value with each argument.
-            for (unsigned i = 0; i < header.arg_count; ++i) {
-                WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
-                current_addr += sizeof(u32);
-            }
-            break;
+        ASSERT(subchannel < bound_engines.size());
+
+        if (method == static_cast<u32>(BufferMethods::BindObject)) {
+            // Bind the current subchannel to the desired engine id.
+            LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
+            bound_engines[subchannel] = static_cast<EngineID>(value);
+            return;
         }
-        case SubmissionMode::NonIncreasingOld:
-        case SubmissionMode::NonIncreasing: {
-            // Use the same method value for all arguments.
-            for (unsigned i = 0; i < header.arg_count; ++i) {
-                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
-                current_addr += sizeof(u32);
-            }
+
+        if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+            // TODO(Subv): Research and implement these methods.
+            LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+            return;
+        }
+
+        const EngineID engine = bound_engines[subchannel];
+
+        switch (engine) {
+        case EngineID::FERMI_TWOD_A:
+            fermi_2d->WriteReg(method, value);
+            break;
+        case EngineID::MAXWELL_B:
+            maxwell_3d->WriteReg(method, value, remaining_params);
             break;
+        case EngineID::MAXWELL_COMPUTE_B:
+            maxwell_compute->WriteReg(method, value);
+            break;
+        case EngineID::MAXWELL_DMA_COPY_A:
+            maxwell_dma->WriteReg(method, value);
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented engine");
         }
-        case SubmissionMode::IncreaseOnce: {
-            ASSERT(header.arg_count.Value() >= 1);
+    };
 
-            // Use the original method for the first argument and then the next method for all other
-            // arguments.
-            WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
-                     header.arg_count - 1);
+    for (auto entry : commands) {
+        Tegra::GPUVAddr address = entry.Address();
+        u32 size = entry.sz;
+        const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
+        VAddr current_addr = *head_address;
+        while (current_addr < *head_address + size * sizeof(CommandHeader)) {
+            const CommandHeader header = {Memory::Read32(current_addr)};
             current_addr += sizeof(u32);
 
-            for (unsigned i = 1; i < header.arg_count; ++i) {
-                WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
+            switch (header.mode.Value()) {
+            case SubmissionMode::IncreasingOld:
+            case SubmissionMode::Increasing: {
+                // Increase the method value with each argument.
+                for (unsigned i = 0; i < header.arg_count; ++i) {
+                    WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::NonIncreasingOld:
+            case SubmissionMode::NonIncreasing: {
+                // Use the same method value for all arguments.
+                for (unsigned i = 0; i < header.arg_count; ++i) {
+                    WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::IncreaseOnce: {
+                ASSERT(header.arg_count.Value() >= 1);
+
+                // Use the original method for the first argument and then the next method for all
+                // other arguments.
+                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+                         header.arg_count - 1);
                 current_addr += sizeof(u32);
+
+                for (unsigned i = 1; i < header.arg_count; ++i) {
+                    WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::Inline: {
+                // The register value is stored in the bits 16-28 as an immediate
+                WriteReg(header.method, header.subchannel, header.inline_data, 0);
+                break;
+            }
+            default:
+                UNIMPLEMENTED();
             }
-            break;
-        }
-        case SubmissionMode::Inline: {
-            // The register value is stored in the bits 16-28 as an immediate
-            WriteReg(header.method, header.subchannel, header.inline_data, 0);
-            break;
-        }
-        default:
-            UNIMPLEMENTED();
         }
     }
 }
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index a01153e0b..bd766e77a 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -7,6 +7,7 @@
 #include <type_traits>
 #include "common/bit_field.h"
 #include "common/common_types.h"
+#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
@@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
     IncreaseOnce = 5
 };
 
+struct CommandListHeader {
+    u32 entry0; // gpu_va_lo
+    union {
+        u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
+        BitField<0, 8, u32> gpu_va_hi;
+        BitField<8, 2, u32> unk1;
+        BitField<10, 21, u32> sz;
+        BitField<31, 1, u32> unk2;
+    };
+
+    GPUVAddr Address() const {
+        return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
+    }
+};
+static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
+
 union CommandHeader {
     u32 hex;
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 1308080b5..329079ddd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -135,8 +135,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
         break;
     }
 
-    rasterizer.NotifyMaxwellRegisterChanged(method);
-
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
     }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index f59d01738..d3be900a4 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -533,7 +533,11 @@ public:
                 u32 stencil_back_mask;
                 u32 stencil_back_func_mask;
 
-                INSERT_PADDING_WORDS(0x20);
+                INSERT_PADDING_WORDS(0x13);
+
+                u32 rt_separate_frag_data;
+
+                INSERT_PADDING_WORDS(0xC);
 
                 struct {
                     u32 address_high;
@@ -557,7 +561,22 @@ public:
                 struct {
                     union {
                         BitField<0, 4, u32> count;
+                        BitField<4, 3, u32> map_0;
+                        BitField<7, 3, u32> map_1;
+                        BitField<10, 3, u32> map_2;
+                        BitField<13, 3, u32> map_3;
+                        BitField<16, 3, u32> map_4;
+                        BitField<19, 3, u32> map_5;
+                        BitField<22, 3, u32> map_6;
+                        BitField<25, 3, u32> map_7;
                     };
+
+                    u32 GetMap(size_t index) const {
+                        const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
+                                                                     map_4, map_5, map_6, map_7};
+                        ASSERT(index < maps.size());
+                        return maps[index];
+                    }
                 } rt_control;
 
                 INSERT_PADDING_WORDS(0x2);
@@ -968,6 +987,7 @@ ASSERT_REG_POSITION(clear_stencil, 0x368);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
 ASSERT_REG_POSITION(rt_control, 0x487);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 6e740713f..c24d33d5c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -41,7 +41,6 @@ void MaxwellDMA::HandleCopy() {
 
     // TODO(Subv): Perform more research and implement all features of this engine.
     ASSERT(regs.exec.enable_swizzle == 0);
-    ASSERT(regs.exec.enable_2d == 1);
     ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
     ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
     ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
@@ -51,10 +50,19 @@ void MaxwellDMA::HandleCopy() {
     ASSERT(regs.dst_params.pos_y == 0);
 
     if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
-        Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count);
+        size_t copy_size = regs.x_count;
+
+        // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
+        // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
+        if (regs.exec.enable_2d) {
+            copy_size = copy_size * regs.y_count;
+        }
+
+        Memory::CopyBlock(dest_cpu, source_cpu, copy_size);
         return;
     }
 
+    ASSERT(regs.exec.enable_2d == 1);
     u8* src_buffer = Memory::GetPointer(source_cpu);
     u8* dst_buffer = Memory::GetPointer(dest_cpu);
 
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d2388673e..2db906ea5 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -244,6 +244,16 @@ enum class TextureType : u64 {
     TextureCube = 3,
 };
 
+enum class TextureQueryType : u64 {
+    Dimension = 1,
+    TextureType = 2,
+    SamplePosition = 5,
+    Filter = 16,
+    LevelOfDetail = 18,
+    Wrap = 20,
+    BorderColor = 22,
+};
+
 enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
 enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
 
@@ -414,6 +424,45 @@ union Instruction {
     } bfe;
 
     union {
+        BitField<48, 3, u64> pred48;
+
+        union {
+            BitField<20, 20, u64> entry_a;
+            BitField<39, 5, u64> entry_b;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } imm;
+
+        union {
+            BitField<20, 14, u64> cb_index;
+            BitField<34, 5, u64> cb_offset;
+            BitField<56, 1, u64> neg;
+            BitField<57, 1, u64> uses_cc;
+        } hi;
+
+        union {
+            BitField<20, 14, u64> cb_index;
+            BitField<34, 5, u64> cb_offset;
+            BitField<39, 5, u64> entry_a;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } rz;
+
+        union {
+            BitField<39, 5, u64> entry_a;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } r1;
+
+        union {
+            BitField<28, 8, u64> entry_a;
+            BitField<37, 1, u64> neg;
+            BitField<38, 1, u64> uses_cc;
+        } r2;
+
+    } lea;
+
+    union {
         BitField<0, 5, FlowCondition> cond;
     } flow;
 
@@ -468,6 +517,18 @@ union Instruction {
     } psetp;
 
     union {
+        BitField<12, 3, u64> pred12;
+        BitField<15, 1, u64> neg_pred12;
+        BitField<24, 2, PredOperation> cond;
+        BitField<29, 3, u64> pred29;
+        BitField<32, 1, u64> neg_pred29;
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred39;
+        BitField<44, 1, u64> bf;
+        BitField<45, 2, PredOperation> op;
+    } pset;
+
+    union {
         BitField<39, 3, u64> pred39;
         BitField<42, 1, u64> neg_pred;
         BitField<43, 1, u64> neg_a;
@@ -519,6 +580,21 @@ union Instruction {
     } tex;
 
     union {
+        BitField<22, 6, TextureQueryType> query_type;
+        BitField<31, 4, u64> component_mask;
+    } txq;
+
+    union {
+        BitField<28, 1, u64> array;
+        BitField<29, 2, TextureType> texture_type;
+        BitField<31, 4, u64> component_mask;
+
+        bool IsComponentEnabled(size_t component) const {
+            return ((1ull << component) & component_mask) != 0;
+        }
+    } tmml;
+
+    union {
         BitField<28, 1, u64> array;
         BitField<29, 2, TextureType> texture_type;
         BitField<56, 2, u64> component;
@@ -670,11 +746,13 @@ public:
         LDG, // Load from global memory
         STG, // Store in global memory
         TEX,
-        TEXQ,  // Texture Query
-        TEXS,  // Texture Fetch with scalar/non-vec4 source/destinations
-        TLDS,  // Texture Load with scalar/non-vec4 source/destinations
-        TLD4,  // Texture Load 4
-        TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
+        TXQ,    // Texture Query
+        TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
+        TLDS,   // Texture Load with scalar/non-vec4 source/destinations
+        TLD4,   // Texture Load 4
+        TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
+        TMML_B, // Texture Mip Map Level
+        TMML,   // Texture Mip Map Level
         EXIT,
         IPA,
         FFMA_IMM, // Fused Multiply and Add
@@ -699,6 +777,11 @@ public:
         ISCADD_C, // Scale and Add
         ISCADD_R,
         ISCADD_IMM,
+        LEA_R1,
+        LEA_R2,
+        LEA_RZ,
+        LEA_IMM,
+        LEA_HI,
         POPC_C,
         POPC_R,
         POPC_IMM,
@@ -757,6 +840,7 @@ public:
         ISET_C,
         ISET_IMM,
         PSETP,
+        PSET,
         XMAD_IMM,
         XMAD_CR,
         XMAD_RC,
@@ -780,6 +864,7 @@ public:
         IntegerSet,
         IntegerSetPredicate,
         PredicateSetPredicate,
+        PredicateSetRegister,
         Conversion,
         Xmad,
         Unknown,
@@ -894,11 +979,13 @@ private:
             INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
             INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
-            INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
+            INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
             INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
             INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
             INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
             INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
+            INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
+            INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
             INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
             INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
             INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
@@ -929,6 +1016,11 @@ private:
             INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
             INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
             INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
+            INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
+            INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
+            INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
+            INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
+            INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -983,6 +1075,7 @@ private:
             INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
             INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
             INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
+            INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
             INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
             INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
             INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e6d8e65c6..86a809f86 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,6 +66,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
     case RenderTargetFormat::RGBA8_UINT:
     case RenderTargetFormat::RGB10_A2_UNORM:
     case RenderTargetFormat::BGRA8_UNORM:
+    case RenderTargetFormat::BGRA8_SRGB:
     case RenderTargetFormat::RG16_UNORM:
     case RenderTargetFormat::RG16_SNORM:
     case RenderTargetFormat::RG16_UINT:
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d29f31f52..589a59b4f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <memory>
+#include <vector>
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/memory_manager.h"
@@ -26,6 +27,7 @@ enum class RenderTargetFormat : u32 {
     RG32_FLOAT = 0xCB,
     RG32_UINT = 0xCD,
     BGRA8_UNORM = 0xCF,
+    BGRA8_SRGB = 0xD0,
     RGB10_A2_UNORM = 0xD1,
     RGBA8_UNORM = 0xD5,
     RGBA8_SRGB = 0xD6,
@@ -67,6 +69,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
 /// Returns the number of bytes per pixel of each depth format.
 u32 DepthFormatBytesPerPixel(DepthFormat format);
 
+struct CommandListHeader;
 class DebugContext;
 
 /**
@@ -115,7 +118,7 @@ public:
     ~GPU();
 
     /// Processes a command list stored at the specified address in GPU memory.
-    void ProcessCommandList(GPUVAddr address, u32 size);
+    void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
 
     /// Returns a reference to the Maxwell3D GPU engine.
     Engines::Maxwell3D& Maxwell3D();
@@ -130,9 +133,6 @@ public:
     const Tegra::MemoryManager& MemoryManager() const;
 
 private:
-    /// Writes a single register in the engine bound to the specified subchannel
-    void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
-
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
 
     /// Mapping of command subchannels to their bound engine ids.
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 9d78e8b6b..cd819d69f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -20,9 +20,6 @@ public:
     /// Clear the current framebuffer
     virtual void Clear() = 0;
 
-    /// Notify rasterizer that the specified Maxwell register has been changed
-    virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
-
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e6d6917fa..c59f3af1b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -294,61 +294,80 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
         cached_pages.add({pages_interval, delta});
 }
 
-std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
-                                                                    bool using_depth_fb,
-                                                                    bool preserve_contents) {
+void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
+                                             bool preserve_contents,
+                                             boost::optional<size_t> single_color_target) {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
-    if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
-        LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
-        using_color_fb = false;
+    Surface depth_surface;
+    if (using_depth_fb) {
+        depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
     }
 
-    const bool has_stencil = regs.stencil_enable;
-    const bool write_color_fb =
-        state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
-        state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
-
-    const bool write_depth_fb =
-        (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
-        (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask));
+    // TODO(bunnei): Figure out how the below register works. According to envytools, this should be
+    // used to enable multiple render targets. However, it is left unset on all games that I have
+    // tested.
+    ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
 
-    Surface color_surface;
-    Surface depth_surface;
-    MathUtil::Rectangle<u32> surfaces_rect;
-    std::tie(color_surface, depth_surface, surfaces_rect) =
-        res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents);
+    // Bind the framebuffer surfaces
+    state.draw.draw_framebuffer = framebuffer.handle;
+    state.Apply();
 
-    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
-    const MathUtil::Rectangle<u32> draw_rect{
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
-                                         surfaces_rect.left, surfaces_rect.right)), // Left
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
-                                         surfaces_rect.bottom, surfaces_rect.top)), // Top
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
-                                         surfaces_rect.left, surfaces_rect.right)), // Right
-        static_cast<u32>(
-            std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
-                            surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+    if (using_color_fb) {
+        if (single_color_target) {
+            // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
+            Surface color_surface =
+                res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
+            glFramebufferTexture2D(
+                GL_DRAW_FRAMEBUFFER,
+                GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
+                color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
+            glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
+        } else {
+            // Multiple color attachments are enabled
+            std::array<GLenum, Maxwell::NumRenderTargets> buffers;
+            for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+                Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
+                buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+                glFramebufferTexture2D(
+                    GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+                    GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
+                    0);
+            }
+            glDrawBuffers(regs.rt_control.count, buffers.data());
+        }
+    } else {
+        // No color attachments are enabled - zero out all of them
+        for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
+                                   GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
+                                   0, 0);
+        }
+        glDrawBuffer(GL_NONE);
+    }
 
-    // Bind the framebuffer surfaces
-    BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
+    if (depth_surface) {
+        if (regs.stencil_enable) {
+            // Attach both depth and stencil
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   depth_surface->Texture().handle, 0);
+        } else {
+            // Attach depth
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                                   depth_surface->Texture().handle, 0);
+            // Clear stencil attachment
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+        }
+    } else {
+        // Clear both depth and stencil attachment
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+                               0);
+    }
 
-    SyncViewport(surfaces_rect);
+    SyncViewport();
 
-    // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
-    // scissor test to prevent drawing outside of the framebuffer region
-    state.scissor.enabled = true;
-    state.scissor.x = draw_rect.left;
-    state.scissor.y = draw_rect.bottom;
-    state.scissor.width = draw_rect.GetWidth();
-    state.scissor.height = draw_rect.GetHeight();
     state.Apply();
-
-    // Only return the surface to be marked as dirty if writing to it is enabled.
-    return std::make_pair(write_color_fb ? color_surface : nullptr,
-                          write_depth_fb ? depth_surface : nullptr);
 }
 
 void RasterizerOpenGL::Clear() {
@@ -356,8 +375,9 @@ void RasterizerOpenGL::Clear() {
     SCOPE_EXIT({ prev_state.Apply(); });
 
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-    bool use_color_fb = false;
-    bool use_depth_fb = false;
+    bool use_color{};
+    bool use_depth{};
+    bool use_stencil{};
 
     OpenGLState clear_state;
     clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer;
@@ -366,22 +386,13 @@ void RasterizerOpenGL::Clear() {
     clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
     clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
 
-    GLbitfield clear_mask{};
     if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
         regs.clear_buffers.A) {
-        if (regs.clear_buffers.RT == 0) {
-            // We only support clearing the first color attachment for now
-            clear_mask |= GL_COLOR_BUFFER_BIT;
-            use_color_fb = true;
-        } else {
-            // TODO(subv): Add support for the other color attachments
-            LOG_CRITICAL(HW_GPU, "Clear unimplemented for RT {}", regs.clear_buffers.RT);
-        }
+        use_color = true;
     }
     if (regs.clear_buffers.Z) {
         ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
-        use_depth_fb = true;
-        clear_mask |= GL_DEPTH_BUFFER_BIT;
+        use_depth = true;
 
         // Always enable the depth write when clearing the depth buffer. The depth write mask is
         // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@@ -390,34 +401,33 @@ void RasterizerOpenGL::Clear() {
     }
     if (regs.clear_buffers.S) {
         ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
-        use_depth_fb = true;
-        clear_mask |= GL_STENCIL_BUFFER_BIT;
+        use_stencil = true;
         clear_state.stencil.test_enabled = true;
     }
 
-    if (!use_color_fb && !use_depth_fb) {
+    if (!use_color && !use_depth && !use_stencil) {
         // No color surface nor depth/stencil surface are enabled
         return;
     }
 
-    if (clear_mask == 0) {
-        // No clear mask is enabled
-        return;
-    }
-
     ScopeAcquireGLContext acquire_context{emu_window};
 
-    auto [dirty_color_surface, dirty_depth_surface] =
-        ConfigureFramebuffers(use_color_fb, use_depth_fb, false);
+    ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
+                          regs.clear_buffers.RT.Value());
 
     clear_state.Apply();
 
-    glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
-                 regs.clear_color[3]);
-    glClearDepth(regs.clear_depth);
-    glClearStencil(regs.clear_stencil);
+    if (use_color) {
+        glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+    }
 
-    glClear(clear_mask);
+    if (use_depth && use_stencil) {
+        glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
+    } else if (use_depth) {
+        glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
+    } else if (use_stencil) {
+        glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
+    }
 }
 
 void RasterizerOpenGL::DrawArrays() {
@@ -430,8 +440,7 @@ void RasterizerOpenGL::DrawArrays() {
 
     ScopeAcquireGLContext acquire_context{emu_window};
 
-    const auto [dirty_color_surface, dirty_depth_surface] =
-        ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true);
+    ConfigureFramebuffers();
 
     SyncDepthTestState();
     SyncStencilTestState();
@@ -525,8 +534,6 @@ void RasterizerOpenGL::DrawArrays() {
     state.Apply();
 }
 
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
-
 void RasterizerOpenGL::FlushAll() {}
 
 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
@@ -729,38 +736,12 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
     return current_unit + static_cast<u32>(entries.size());
 }
 
-void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
-                                               const Surface& depth_surface, bool has_stencil) {
-    state.draw.draw_framebuffer = framebuffer.handle;
-    state.Apply();
-
-    glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
-                           color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
-    if (depth_surface != nullptr) {
-        if (has_stencil) {
-            // attach both depth and stencil
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->Texture().handle, 0);
-        } else {
-            // attach depth
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->Texture().handle, 0);
-            // clear stencil attachment
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-        }
-    } else {
-        // clear both depth and stencil attachment
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
-    }
-}
-
-void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
+void RasterizerOpenGL::SyncViewport() {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
     const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
 
-    state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
-    state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
+    state.viewport.x = viewport_rect.left;
+    state.viewport.y = viewport_rect.bottom;
     state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
     state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index c6bb1516b..745c3dc0c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -13,6 +13,7 @@
 #include <vector>
 
 #include <boost/icl/interval_map.hpp>
+#include <boost/optional.hpp>
 #include <boost/range/iterator_range.hpp>
 #include <glad/glad.h>
 
@@ -45,7 +46,6 @@ public:
 
     void DrawArrays() override;
     void Clear() override;
-    void NotifyMaxwellRegisterChanged(u32 method) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
@@ -97,14 +97,16 @@ private:
         GLvec4 border_color;
     };
 
-    /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth>
-    /// surfaces if writing was enabled.
-    std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
-                                                      bool preserve_contents);
-
-    /// Binds the framebuffer color and depth surface
-    void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
-                                 bool has_stencil);
+    /**
+     * Configures the color and depth framebuffer states.
+     * @param use_color_fb If true, configure color framebuffers.
+     * @param using_depth_fb If true, configure the depth/stencil framebuffer.
+     * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
+     * @param single_color_target Specifies if a single color buffer target should be used.
+     */
+    void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
+                               bool preserve_contents = true,
+                               boost::optional<size_t> single_color_target = {});
 
     /*
      * Configures the current constbuffers to use for the draw command.
@@ -127,7 +129,7 @@ private:
                       u32 current_unit);
 
     /// Syncs the viewport to match the guest state
-    void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
+    void SyncViewport();
 
     /// Syncs the clip enabled status to match the guest state
     void SyncClipEnabled();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 360fb0cd5..fb56decc0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -52,17 +52,31 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.type = GetFormatType(params.pixel_format);
     params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
     params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
-    params.depth = config.tic.Depth();
     params.unaligned_height = config.tic.Height();
-    params.size_in_bytes = params.SizeInBytes();
-    params.cache_width = Common::AlignUp(params.width, 8);
-    params.cache_height = Common::AlignUp(params.height, 8);
     params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
+
+    switch (params.target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+        params.depth = 1;
+        break;
+    case SurfaceTarget::Texture3D:
+    case SurfaceTarget::Texture2DArray:
+        params.depth = config.tic.Depth();
+        break;
+    default:
+        LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
+        UNREACHABLE();
+        params.depth = 1;
+        break;
+    }
+
+    params.size_in_bytes = params.SizeInBytes();
     return params;
 }
 
-/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
-    const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) {
+    const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
     SurfaceParams params{};
     params.addr = TryGetCpuAddr(config.Address());
     params.is_tiled = true;
@@ -72,12 +86,10 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.type = GetFormatType(params.pixel_format);
     params.width = config.width;
     params.height = config.height;
-    params.depth = 1;
     params.unaligned_height = config.height;
-    params.size_in_bytes = params.SizeInBytes();
-    params.cache_width = Common::AlignUp(params.width, 8);
-    params.cache_height = Common::AlignUp(params.height, 8);
     params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
+    params.size_in_bytes = params.SizeInBytes();
     return params;
 }
 
@@ -93,12 +105,10 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.type = GetFormatType(params.pixel_format);
     params.width = zeta_width;
     params.height = zeta_height;
-    params.depth = 1;
     params.unaligned_height = zeta_height;
-    params.size_in_bytes = params.SizeInBytes();
-    params.cache_width = Common::AlignUp(params.width, 8);
-    params.cache_height = Common::AlignUp(params.height, 8);
     params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
+    params.size_in_bytes = params.SizeInBytes();
     return params;
 }
 
@@ -461,30 +471,27 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
         // Only pre-create the texture for non-compressed textures.
         switch (params.target) {
         case SurfaceParams::SurfaceTarget::Texture1D:
-            glTexImage1D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
-                         rect.GetWidth(), 0, format_tuple.format, format_tuple.type, nullptr);
+            glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+                           rect.GetWidth());
             break;
         case SurfaceParams::SurfaceTarget::Texture2D:
-            glTexImage2D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
-                         rect.GetWidth(), rect.GetHeight(), 0, format_tuple.format,
-                         format_tuple.type, nullptr);
+            glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+                           rect.GetWidth(), rect.GetHeight());
             break;
         case SurfaceParams::SurfaceTarget::Texture3D:
         case SurfaceParams::SurfaceTarget::Texture2DArray:
-            glTexImage3D(SurfaceTargetToGL(params.target), 0, format_tuple.internal_format,
-                         rect.GetWidth(), rect.GetHeight(), params.depth, 0, format_tuple.format,
-                         format_tuple.type, nullptr);
+            glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+                           rect.GetWidth(), rect.GetHeight(), params.depth);
             break;
         default:
             LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
                          static_cast<u32>(params.target));
             UNREACHABLE();
-            glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, rect.GetWidth(),
-                         rect.GetHeight(), 0, format_tuple.format, format_tuple.type, nullptr);
+            glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
+                           rect.GetHeight());
         }
     }
 
-    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL, 0);
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
     glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
@@ -505,7 +512,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 
     S8Z24 input_pixel{};
     Z24S8 output_pixel{};
-    const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
+    constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
     for (size_t y = 0; y < height; ++y) {
         for (size_t x = 0; x < width; ++x) {
             const size_t offset{bpp * (y * width + x)};
@@ -518,7 +525,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 }
 
 static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
-    const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
+    constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
     for (size_t y = 0; y < height; ++y) {
         for (size_t x = 0; x < width; ++x) {
             const size_t offset{bpp * (y * width + x)};
@@ -584,12 +591,13 @@ void CachedSurface::LoadGLBuffer() {
             UNREACHABLE();
         }
 
-        gl_buffer.resize(params.depth * copy_size);
+        gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size);
         morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
             params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
             params.addr);
     } else {
-        const u8* const texture_src_data_end{texture_src_data + (params.depth * copy_size)};
+        const u8* const texture_src_data_end{texture_src_data +
+                                             (static_cast<size_t>(params.depth) * copy_size)};
         gl_buffer.assign(texture_src_data, texture_src_data_end);
     }
 
@@ -608,18 +616,20 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
 
     MICROPROFILE_SCOPE(OpenGL_TextureUL);
 
-    ASSERT(gl_buffer.size() ==
-           params.width * params.height * GetGLBytesPerPixel(params.pixel_format) * params.depth);
+    ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height *
+                                   GetGLBytesPerPixel(params.pixel_format) * params.depth);
 
     const auto& rect{params.GetRect()};
 
     // Load data from memory to the surface
-    GLint x0 = static_cast<GLint>(rect.left);
-    GLint y0 = static_cast<GLint>(rect.bottom);
-    size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format);
+    const GLint x0 = static_cast<GLint>(rect.left);
+    const GLint y0 = static_cast<GLint>(rect.bottom);
+    const size_t buffer_offset =
+        static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) *
+        GetGLBytesPerPixel(params.pixel_format);
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    GLuint target_tex = texture.handle;
+    const GLuint target_tex = texture.handle;
     OpenGLState cur_state = OpenGLState::GetCurState();
 
     const auto& old_tex = cur_state.texture_units[0];
@@ -705,62 +715,34 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
     return GetSurface(SurfaceParams::CreateForTexture(config));
 }
 
-SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb,
-                                                                       bool using_depth_fb,
-                                                                       bool preserve_contents) {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
+    const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
+    if (!regs.zeta.Address() || !regs.zeta_enable) {
+        return {};
+    }
 
-    // TODO(bunnei): This is hard corded to use just the first render buffer
-    LOG_TRACE(Render_OpenGL, "hard-coded for render target 0!");
+    SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
+        regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)};
 
-    // get color and depth surfaces
-    SurfaceParams color_params{};
-    SurfaceParams depth_params{};
+    return GetSurface(depth_params, preserve_contents);
+}
 
-    if (using_color_fb) {
-        color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]);
-    }
+Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) {
+    const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
 
-    if (using_depth_fb) {
-        depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
-                                                           regs.zeta.Address(), regs.zeta.format);
-    }
+    ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
 
-    MathUtil::Rectangle<u32> color_rect{};
-    Surface color_surface;
-    if (using_color_fb) {
-        color_surface = GetSurface(color_params, preserve_contents);
-        if (color_surface) {
-            color_rect = color_surface->GetSurfaceParams().GetRect();
-        }
+    if (index >= regs.rt_control.count) {
+        return {};
     }
 
-    MathUtil::Rectangle<u32> depth_rect{};
-    Surface depth_surface;
-    if (using_depth_fb) {
-        depth_surface = GetSurface(depth_params, preserve_contents);
-        if (depth_surface) {
-            depth_rect = depth_surface->GetSurfaceParams().GetRect();
-        }
+    if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
+        return {};
     }
 
-    MathUtil::Rectangle<u32> fb_rect{};
-    if (color_surface && depth_surface) {
-        fb_rect = color_rect;
-        // Color and Depth surfaces must have the same dimensions and offsets
-        if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
-            color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
-            color_surface = GetSurface(color_params);
-            depth_surface = GetSurface(depth_params);
-            fb_rect = color_surface->GetSurfaceParams().GetRect();
-        }
-    } else if (color_surface) {
-        fb_rect = color_rect;
-    } else if (depth_surface) {
-        fb_rect = depth_rect;
-    }
+    const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
 
-    return std::make_tuple(color_surface, depth_surface, fb_rect);
+    return GetSurface(color_params, preserve_contents);
 }
 
 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
@@ -826,16 +808,20 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
     // Get a new surface with the new parameters, and blit the previous surface to it
     Surface new_surface{GetUncachedSurface(new_params)};
 
-    // If format is unchanged, we can do a faster blit without reinterpreting pixel data
-    if (params.pixel_format == new_params.pixel_format) {
+    if (params.pixel_format == new_params.pixel_format ||
+        !Settings::values.use_accurate_framebuffers) {
+        // If the format is the same, just do a framebuffer blit. This is significantly faster than
+        // using PBOs. The is also likely less accurate, as textures will be converted rather than
+        // reinterpreted.
+
         BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
                      params.GetRect(), params.type, read_framebuffer.handle,
                      draw_framebuffer.handle);
-        return new_surface;
-    }
+    } else {
+        // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
+        // where pixels are reinterpreted as a new format (without conversion). This code path uses
+        // OpenGL PBOs and is quite slow.
 
-    // When using accurate framebuffers, always copy old data to new surface, regardless of format
-    if (Settings::values.use_accurate_framebuffers) {
         auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
         auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 8312b2c7a..57ea8593b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -297,6 +297,7 @@ struct SurfaceParams {
             return PixelFormat::ABGR8S;
         case Tegra::RenderTargetFormat::RGBA8_UINT:
             return PixelFormat::ABGR8UI;
+        case Tegra::RenderTargetFormat::BGRA8_SRGB:
         case Tegra::RenderTargetFormat::BGRA8_UNORM:
             return PixelFormat::BGRA8;
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
@@ -569,6 +570,7 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RGBA8_UNORM:
         case Tegra::RenderTargetFormat::RGBA8_SRGB:
         case Tegra::RenderTargetFormat::BGRA8_UNORM:
+        case Tegra::RenderTargetFormat::BGRA8_SRGB:
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
         case Tegra::RenderTargetFormat::R8_UNORM:
         case Tegra::RenderTargetFormat::RG16_UNORM:
@@ -669,8 +671,7 @@ struct SurfaceParams {
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
 
     /// Creates SurfaceParams from a framebuffer configuration
-    static SurfaceParams CreateForFramebuffer(
-        const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
+    static SurfaceParams CreateForFramebuffer(size_t index);
 
     /// Creates SurfaceParams for a depth buffer configuration
     static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -679,8 +680,8 @@ struct SurfaceParams {
 
     /// Checks if surfaces are compatible for caching
     bool IsCompatibleSurface(const SurfaceParams& other) const {
-        return std::tie(pixel_format, type, cache_width, cache_height) ==
-               std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height);
+        return std::tie(pixel_format, type, width, height) ==
+               std::tie(other.pixel_format, other.type, other.width, other.height);
     }
 
     VAddr addr;
@@ -695,10 +696,6 @@ struct SurfaceParams {
     u32 unaligned_height;
     size_t size_in_bytes;
     SurfaceTarget target;
-
-    // Parameters used for caching only
-    u32 cache_width;
-    u32 cache_height;
 };
 
 }; // namespace OpenGL
@@ -774,9 +771,11 @@ public:
     /// Get a surface based on the texture configuration
     Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
 
-    /// Get the color and depth surfaces based on the framebuffer configuration
-    SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
-                                                    bool preserve_contents);
+    /// Get the depth surface based on the framebuffer configuration
+    Surface GetDepthBufferSurface(bool preserve_contents);
+
+    /// Get the color surface based on the framebuffer configuration and the specified render target
+    Surface GetColorBufferSurface(size_t index, bool preserve_contents);
 
     /// Flushes the surface to Switch memory
     void FlushSurface(const Surface& surface);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7e4b85ac3..61080f5cc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -13,8 +13,8 @@ namespace OpenGL {
 
 /// Gets the address for the specified shader stage program
 static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
-    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
     return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
                                                shader_config.offset);
 }
@@ -86,7 +86,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
 }
 
 GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
-    auto search{resource_cache.find(buffer.GetHash())};
+    const auto search{resource_cache.find(buffer.GetHash())};
     if (search == resource_cache.end()) {
         const GLuint index{
             glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
@@ -98,7 +98,7 @@ GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& b
 }
 
 GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
-    auto search{uniform_cache.find(sampler.GetHash())};
+    const auto search{uniform_cache.find(sampler.GetHash())};
     if (search == uniform_cache.end()) {
         const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
         uniform_cache[sampler.GetHash()] = index;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 172ba8335..582c811e0 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -113,7 +113,7 @@ private:
 
     /// Scans a range of code for labels and determines the exit method.
     ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
-        auto [iter, inserted] =
+        const auto [iter, inserted] =
             exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
         ExitMethod& exit_method = iter->second;
         if (!inserted)
@@ -131,22 +131,22 @@ private:
                     if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
                         return exit_method = ExitMethod::AlwaysEnd;
                     } else {
-                        ExitMethod not_met = Scan(offset + 1, end, labels);
+                        const ExitMethod not_met = Scan(offset + 1, end, labels);
                         return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
                     }
                 }
                 case OpCode::Id::BRA: {
-                    u32 target = offset + instr.bra.GetBranchTarget();
+                    const u32 target = offset + instr.bra.GetBranchTarget();
                     labels.insert(target);
-                    ExitMethod no_jmp = Scan(offset + 1, end, labels);
-                    ExitMethod jmp = Scan(target, end, labels);
+                    const ExitMethod no_jmp = Scan(offset + 1, end, labels);
+                    const ExitMethod jmp = Scan(target, end, labels);
                     return exit_method = ParallelExit(no_jmp, jmp);
                 }
                 case OpCode::Id::SSY: {
                     // The SSY instruction uses a similar encoding as the BRA instruction.
                     ASSERT_MSG(instr.bra.constant_buffer == 0,
                                "Constant buffer SSY is not supported");
-                    u32 target = offset + instr.bra.GetBranchTarget();
+                    const u32 target = offset + instr.bra.GetBranchTarget();
                     labels.insert(target);
                     // Continue scanning for an exit method.
                     break;
@@ -346,8 +346,8 @@ public:
      */
     void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
                                     const Tegra::Shader::IpaMode& input_mode) {
-        std::string dest = GetRegisterAsFloat(reg);
-        std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
+        const std::string dest = GetRegisterAsFloat(reg);
+        const std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
         shader.AddLine(dest + " = " + src + ';');
     }
 
@@ -359,8 +359,8 @@ public:
      * @param reg The register to use as the source value.
      */
     void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
-        std::string dest = GetOutputAttribute(attribute);
-        std::string src = GetRegisterAsFloat(reg);
+        const std::string dest = GetOutputAttribute(attribute);
+        const std::string src = GetRegisterAsFloat(reg);
 
         if (!dest.empty()) {
             // Can happen with unknown/unimplemented output attributes, in which case we ignore the
@@ -393,9 +393,9 @@ public:
                                    GLSLRegister::Type type) {
         declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
 
-        std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
-        std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
-                            final_offset + " % 4]";
+        const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
+        const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
+                                  final_offset + " % 4]";
 
         if (type == GLSLRegister::Type::Float) {
             return value;
@@ -468,10 +468,10 @@ public:
     /// necessary.
     std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
                               bool is_array) {
-        size_t offset = static_cast<size_t>(sampler.index.Value());
+        const size_t offset = static_cast<size_t>(sampler.index.Value());
 
         // If this sampler has already been used, return the existing mapping.
-        auto itr =
+        const auto itr =
             std::find_if(used_samplers.begin(), used_samplers.end(),
                          [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
 
@@ -481,8 +481,8 @@ public:
         }
 
         // Otherwise create a new mapping for this sampler
-        size_t next_index = used_samplers.size();
-        SamplerEntry entry{stage, offset, next_index, type, is_array};
+        const size_t next_index = used_samplers.size();
+        const SamplerEntry entry{stage, offset, next_index, type, is_array};
         used_samplers.emplace_back(entry);
         return entry.GetName();
     }
@@ -699,7 +699,7 @@ private:
         };
 
         bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
-            u32 bit = render_target * 4 + component;
+            const u32 bit = render_target * 4 + component;
             return enabled_color_outputs & (1 << bit);
         }
     };
@@ -707,7 +707,7 @@ private:
 
     /// Gets the Subroutine object corresponding to the specified address.
     const Subroutine& GetSubroutine(u32 begin, u32 end) const {
-        auto iter = subroutines.find(Subroutine{begin, end, suffix});
+        const auto iter = subroutines.find(Subroutine{begin, end, suffix});
         ASSERT(iter != subroutines.end());
         return *iter;
     }
@@ -752,7 +752,7 @@ private:
         // Can't assign to the constant predicate.
         ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
 
-        std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
+        const std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
         shader.AddLine(variable + " = " + value + ';');
         declr_predicates.insert(std::move(variable));
     }
@@ -1023,7 +1023,7 @@ private:
             // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
             for (u32 component = 0; component < 4; ++component) {
                 if (header.IsColorComponentOutputEnabled(render_target, component)) {
-                    shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component,
+                    shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
                                                regs.GetRegisterAsFloat(current_reg)));
                     ++current_reg;
                 }
@@ -1033,7 +1033,11 @@ private:
         if (header.writes_depth) {
             // The depth output is always 2 registers after the last color output, and current_reg
             // already contains one past the last color register.
-            shader.AddLine("gl_FragDepth = " + regs.GetRegisterAsFloat(current_reg + 1) + ';');
+
+            shader.AddLine(
+                "gl_FragDepth = " +
+                regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) +
+                ';');
         }
     }
 
@@ -1435,7 +1439,7 @@ private:
                 if (instr.alu_integer.negate_b)
                     op_b = "-(" + op_b + ')';
 
-                std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
+                const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
 
                 regs.SetRegisterToInteger(instr.gpr0, true, 0,
                                           "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
@@ -1453,7 +1457,7 @@ private:
             case OpCode::Id::SEL_C:
             case OpCode::Id::SEL_R:
             case OpCode::Id::SEL_IMM: {
-                std::string condition =
+                const std::string condition =
                     GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
                 regs.SetRegisterToInteger(instr.gpr0, true, 0,
                                           '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
@@ -1475,8 +1479,9 @@ private:
             case OpCode::Id::LOP3_C:
             case OpCode::Id::LOP3_R:
             case OpCode::Id::LOP3_IMM: {
-                std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
+                const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
                 std::string lut;
+
                 if (opcode->GetId() == OpCode::Id::LOP3_R) {
                     lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
                 } else {
@@ -1491,15 +1496,82 @@ private:
             case OpCode::Id::IMNMX_IMM: {
                 ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None,
                            "Unimplemented");
-                std::string condition =
+                const std::string condition =
                     GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
-                std::string parameters = op_a + ',' + op_b;
+                const std::string parameters = op_a + ',' + op_b;
                 regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
                                           '(' + condition + ") ? min(" + parameters + ") : max(" +
                                               parameters + ')',
                                           1, 1);
                 break;
             }
+            case OpCode::Id::LEA_R2:
+            case OpCode::Id::LEA_R1:
+            case OpCode::Id::LEA_IMM:
+            case OpCode::Id::LEA_RZ:
+            case OpCode::Id::LEA_HI: {
+                std::string op_a;
+                std::string op_b;
+                std::string op_c;
+
+                switch (opcode->GetId()) {
+                case OpCode::Id::LEA_R2: {
+                    op_a = regs.GetRegisterAsInteger(instr.gpr20);
+                    op_b = regs.GetRegisterAsInteger(instr.gpr39);
+                    op_c = std::to_string(instr.lea.r2.entry_a);
+                    break;
+                }
+
+                case OpCode::Id::LEA_R1: {
+                    const bool neg = instr.lea.r1.neg != 0;
+                    op_a = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_a = "-(" + op_a + ')';
+                    op_b = regs.GetRegisterAsInteger(instr.gpr20);
+                    op_c = std::to_string(instr.lea.r1.entry_a);
+                    break;
+                }
+
+                case OpCode::Id::LEA_IMM: {
+                    const bool neg = instr.lea.imm.neg != 0;
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_b = "-(" + op_b + ')';
+                    op_a = std::to_string(instr.lea.imm.entry_a);
+                    op_c = std::to_string(instr.lea.imm.entry_b);
+                    break;
+                }
+
+                case OpCode::Id::LEA_RZ: {
+                    const bool neg = instr.lea.rz.neg != 0;
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_b = "-(" + op_b + ')';
+                    op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
+                                           GLSLRegister::Type::Integer);
+                    op_c = std::to_string(instr.lea.rz.entry_a);
+
+                    break;
+                }
+
+                case OpCode::Id::LEA_HI:
+                default: {
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    op_a = std::to_string(instr.lea.imm.entry_a);
+                    op_c = std::to_string(instr.lea.imm.entry_b);
+                    LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+                    UNREACHABLE();
+                }
+                }
+                if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) {
+                    LOG_ERROR(HW_GPU, "Unhandled LEA Predicate");
+                    UNREACHABLE();
+                }
+                const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
+
+                break;
+            }
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
                              opcode->GetName());
@@ -1510,7 +1582,7 @@ private:
             break;
         }
         case OpCode::Type::Ffma: {
-            std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+            const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
             std::string op_b = instr.ffma.negate_b ? "-" : "";
             std::string op_c = instr.ffma.negate_c ? "-" : "";
 
@@ -1720,7 +1792,7 @@ private:
                 shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
                                " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
 
-                std::string op_a =
+                const std::string op_a =
                     regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
                                             GLSLRegister::Type::Float);
 
@@ -1730,7 +1802,7 @@ private:
                     break;
 
                 case Tegra::Shader::UniformType::Double: {
-                    std::string op_b =
+                    const std::string op_b =
                         regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
                                                 "index", GLSLRegister::Type::Float);
                     regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
@@ -1760,13 +1832,13 @@ private:
 
                 switch (texture_type) {
                 case Tegra::Shader::TextureType::Texture1D: {
-                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
                     coord = "float coords = " + x + ';';
                     break;
                 }
                 case Tegra::Shader::TextureType::Texture2D: {
-                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                     coord = "vec2 coords = vec2(" + x + ", " + y + ");";
                     break;
                 }
@@ -1776,8 +1848,8 @@ private:
                     UNREACHABLE();
 
                     // Fallback to interpreting as a 2D texture for now
-                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                     coord = "vec2 coords = vec2(" + x + ", " + y + ");";
                     texture_type = Tegra::Shader::TextureType::Texture2D;
                 }
@@ -1811,13 +1883,13 @@ private:
                 switch (texture_type) {
                 case Tegra::Shader::TextureType::Texture2D: {
                     if (is_array) {
-                        std::string index = regs.GetRegisterAsInteger(instr.gpr8);
-                        std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                        std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+                        const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                        const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
                         coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
                     } else {
-                        std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                        std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                        const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
                         coord = "vec2 coords = vec2(" + x + ", " + y + ");";
                     }
                     break;
@@ -1828,8 +1900,8 @@ private:
                     UNREACHABLE();
 
                     // Fallback to interpreting as a 2D texture for now
-                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
                     coord = "vec2 coords = vec2(" + x + ", " + y + ");";
                     texture_type = Tegra::Shader::TextureType::Texture2D;
                     is_array = false;
@@ -1850,8 +1922,8 @@ private:
                         LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture");
                         UNREACHABLE();
                     } else {
-                        std::string x = regs.GetRegisterAsInteger(instr.gpr8);
-                        std::string y = regs.GetRegisterAsInteger(instr.gpr20);
+                        const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
+                        const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
                         coord = "ivec2 coords = ivec2(" + x + ", " + y + ");";
                     }
                     break;
@@ -1874,8 +1946,8 @@ private:
 
                 switch (instr.tld4.texture_type) {
                 case Tegra::Shader::TextureType::Texture2D: {
-                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                     coord = "vec2 coords = vec2(" + x + ", " + y + ");";
                     break;
                 }
@@ -1920,6 +1992,74 @@ private:
                 WriteTexsInstruction(instr, coord, texture);
                 break;
             }
+            case OpCode::Id::TXQ: {
+                // TODO: the new commits on the texture refactor, change the way samplers work.
+                // Sadly, not all texture instructions specify the type of texture their sampler
+                // uses. This must be fixed at a later instance.
+                const std::string sampler =
+                    GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
+                switch (instr.txq.query_type) {
+                case Tegra::Shader::TextureQueryType::Dimension: {
+                    const std::string texture = "textureQueryLevels(" + sampler + ')';
+                    regs.SetRegisterToInteger(instr.gpr0, true, 0, texture, 1, 1);
+                    break;
+                }
+                default: {
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture query type: {}",
+                                 static_cast<u32>(instr.txq.query_type.Value()));
+                    UNREACHABLE();
+                }
+                }
+                break;
+            }
+            case OpCode::Id::TMML: {
+                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                const bool is_array = instr.tmml.array != 0;
+                auto texture_type = instr.tmml.texture_type.Value();
+                const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
+
+                // TODO: add coordinates for different samplers once other texture types are
+                // implemented.
+                std::string coord;
+                switch (texture_type) {
+                case Tegra::Shader::TextureType::Texture1D: {
+                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    coord = "float coords = " + x + ';';
+                    break;
+                }
+                case Tegra::Shader::TextureType::Texture2D: {
+                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    break;
+                }
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+                                 static_cast<u32>(texture_type));
+                    UNREACHABLE();
+
+                    // Fallback to interpreting as a 2D texture for now
+                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    texture_type = Tegra::Shader::TextureType::Texture2D;
+                }
+                // Add an extra scope and declare the texture coords inside to prevent
+                // overwriting them in case they are used as outputs of the texs instruction.
+                shader.AddLine('{');
+                ++shader.scope;
+                shader.AddLine(coord);
+                const std::string texture = "textureQueryLod(" + sampler + ", coords)";
+                const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);";
+                shader.AddLine(tmp);
+
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
+                regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
+                --shader.scope;
+                shader.AddLine('}');
+                break;
+            }
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
                 UNREACHABLE();
@@ -1959,12 +2099,12 @@ private:
             // We can't use the constant predicate as destination.
             ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
 
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.fsetp.op);
+            const std::string combiner = GetPredicateCombiner(instr.fsetp.op);
 
-            std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
+            const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
             // Set the primary predicate to the result of Predicate OP SecondPredicate
             SetPredicate(instr.fsetp.pred3,
                          '(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1978,7 +2118,8 @@ private:
             break;
         }
         case OpCode::Type::IntegerSetPredicate: {
-            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
+            const std::string op_a =
+                regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
             std::string op_b;
 
             if (instr.is_b_imm) {
@@ -1995,12 +2136,12 @@ private:
             // We can't use the constant predicate as destination.
             ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
 
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.isetp.op);
+            const std::string combiner = GetPredicateCombiner(instr.isetp.op);
 
-            std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
+            const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
             // Set the primary predicate to the result of Predicate OP SecondPredicate
             SetPredicate(instr.isetp.pred3,
                          '(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -2013,21 +2154,45 @@ private:
             }
             break;
         }
+        case OpCode::Type::PredicateSetRegister: {
+            const std::string op_a =
+                GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+            const std::string op_b =
+                GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+
+            const std::string second_pred =
+                GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+            const std::string combiner = GetPredicateCombiner(instr.pset.op);
+
+            const std::string predicate =
+                '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
+            const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
+            if (instr.pset.bf == 0) {
+                const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
+                regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1);
+            } else {
+                const std::string value = '(' + result + ") ? 1.0 : 0.0";
+                regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
+            }
+
+            break;
+        }
         case OpCode::Type::PredicateSetPredicate: {
-            std::string op_a =
+            const std::string op_a =
                 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
-            std::string op_b =
+            const std::string op_b =
                 GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
 
             // We can't use the constant predicate as destination.
             ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
 
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.psetp.op);
+            const std::string combiner = GetPredicateCombiner(instr.psetp.op);
 
-            std::string predicate =
+            const std::string predicate =
                 '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
 
             // Set the primary predicate to the result of Predicate OP SecondPredicate
@@ -2053,7 +2218,7 @@ private:
             std::string op_b = instr.fset.neg_b ? "-" : "";
 
             if (instr.is_b_imm) {
-                std::string imm = GetImmediate19(instr);
+                const std::string imm = GetImmediate19(instr);
                 if (instr.fset.neg_imm)
                     op_b += "(-" + imm + ')';
                 else
@@ -2073,13 +2238,14 @@ private:
 
             // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
             // condition is true, and to 0 otherwise.
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.fset.op);
+            const std::string combiner = GetPredicateCombiner(instr.fset.op);
 
-            std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
-                                    ") " + combiner + " (" + second_pred + "))";
+            const std::string predicate = "((" +
+                                          GetPredicateComparison(instr.fset.cond, op_a, op_b) +
+                                          ") " + combiner + " (" + second_pred + "))";
 
             if (instr.fset.bf) {
                 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -2090,7 +2256,7 @@ private:
             break;
         }
         case OpCode::Type::IntegerSet: {
-            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
+            const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
 
             std::string op_b;
 
@@ -2107,13 +2273,14 @@ private:
 
             // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
             // condition is true, and to 0 otherwise.
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.iset.op);
+            const std::string combiner = GetPredicateCombiner(instr.iset.op);
 
-            std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
-                                    ") " + combiner + " (" + second_pred + "))";
+            const std::string predicate = "((" +
+                                          GetPredicateComparison(instr.iset.cond, op_a, op_b) +
+                                          ") " + combiner + " (" + second_pred + "))";
 
             if (instr.iset.bf) {
                 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -2263,7 +2430,7 @@ private:
             case OpCode::Id::BRA: {
                 ASSERT_MSG(instr.bra.constant_buffer == 0,
                            "BRA with constant buffers are not implemented");
-                u32 target = offset + instr.bra.GetBranchTarget();
+                const u32 target = offset + instr.bra.GetBranchTarget();
                 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
                 break;
             }
@@ -2287,7 +2454,7 @@ private:
                 // has a similar structure to the BRA opcode.
                 ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
 
-                u32 target = offset + instr.bra.GetBranchTarget();
+                const u32 target = offset + instr.bra.GetBranchTarget();
                 EmitPushToSSYStack(target);
                 break;
             }
@@ -2381,10 +2548,10 @@ private:
                     shader.AddLine("case " + std::to_string(label) + "u: {");
                     ++shader.scope;
 
-                    auto next_it = labels.lower_bound(label + 1);
-                    u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+                    const auto next_it = labels.lower_bound(label + 1);
+                    const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
 
-                    u32 compile_end = CompileRange(label, next_label);
+                    const u32 compile_end = CompileRange(label, next_label);
                     if (compile_end > next_label && compile_end != PROGRAM_END) {
                         // This happens only when there is a label inside a IF/LOOP block
                         shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
@@ -2447,7 +2614,8 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
                                                 Maxwell3D::Regs::ShaderStage stage,
                                                 const std::string& suffix) {
     try {
-        auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
+        const auto subroutines =
+            ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
         GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
         return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
     } catch (const DecompileFail& exception) {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index e1b1a9d73..b0466c18f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -88,7 +88,14 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
             .get_value_or({});
     out += R"(
 in vec4 position;
-layout(location = 0) out vec4 color[8];
+layout(location = 0) out vec4 FragColor0;
+layout(location = 1) out vec4 FragColor1;
+layout(location = 2) out vec4 FragColor2;
+layout(location = 3) out vec4 FragColor3;
+layout(location = 4) out vec4 FragColor4;
+layout(location = 5) out vec4 FragColor5;
+layout(location = 6) out vec4 FragColor6;
+layout(location = 7) out vec4 FragColor7;
 
 layout (std140) uniform fs_config {
     vec4 viewport_flip;
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5781d9d16..5f3fe067e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -25,7 +25,7 @@ GLuint LoadShader(const char* source, GLenum type) {
     default:
         UNREACHABLE();
     }
-    GLuint shader_id = glCreateShader(type);
+    const GLuint shader_id = glCreateShader(type);
     glShaderSource(shader_id, 1, &source, nullptr);
     LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
     glCompileShader(shader_id);
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e565afcee..aadf68f16 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -29,7 +29,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coh
     if (GLAD_GL_ARB_buffer_storage) {
         persistent = true;
         coherent = prefer_coherent;
-        GLbitfield flags =
+        const GLbitfield flags =
             GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
         glBufferStorage(gl_target, allocate_size, nullptr, flags);
         mapped_ptr = static_cast<u8*>(glMapBufferRange(