25 files changed, 1110 insertions, 582 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d5831e752..2625ddfdc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -28,98 +28,106 @@ enum class BufferMethods {
     CountBufferMethods = 0x40,
 };
 
-void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) {
-    LOG_TRACE(HW_GPU,
-              "Processing method {:08X} on subchannel {} value "
-              "{:08X} remaining params {}",
-              method, subchannel, value, remaining_params);
-
-    ASSERT(subchannel < bound_engines.size());
-
-    if (method == static_cast<u32>(BufferMethods::BindObject)) {
-        // Bind the current subchannel to the desired engine id.
-        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
-        bound_engines[subchannel] = static_cast<EngineID>(value);
-        return;
-    }
+MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
 
-    if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
-        // TODO(Subv): Research and implement these methods.
-        LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
-        return;
-    }
+void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
+    MICROPROFILE_SCOPE(ProcessCommandLists);
 
-    const EngineID engine = bound_engines[subchannel];
-
-    switch (engine) {
-    case EngineID::FERMI_TWOD_A:
-        fermi_2d->WriteReg(method, value);
-        break;
-    case EngineID::MAXWELL_B:
-        maxwell_3d->WriteReg(method, value, remaining_params);
-        break;
-    case EngineID::MAXWELL_COMPUTE_B:
-        maxwell_compute->WriteReg(method, value);
-        break;
-    case EngineID::MAXWELL_DMA_COPY_A:
-        maxwell_dma->WriteReg(method, value);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented engine");
-    }
-}
+    auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
+        LOG_TRACE(HW_GPU,
+                  "Processing method {:08X} on subchannel {} value "
+                  "{:08X} remaining params {}",
+                  method, subchannel, value, remaining_params);
 
-void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
-    const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
-    VAddr current_addr = *head_address;
-    while (current_addr < *head_address + size * sizeof(CommandHeader)) {
-        const CommandHeader header = {Memory::Read32(current_addr)};
-        current_addr += sizeof(u32);
-
-        switch (header.mode.Value()) {
-        case SubmissionMode::IncreasingOld:
-        case SubmissionMode::Increasing: {
-            // Increase the method value with each argument.
-            for (unsigned i = 0; i < header.arg_count; ++i) {
-                WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
-                current_addr += sizeof(u32);
-            }
-            break;
+        ASSERT(subchannel < bound_engines.size());
+
+        if (method == static_cast<u32>(BufferMethods::BindObject)) {
+            // Bind the current subchannel to the desired engine id.
+            LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
+            bound_engines[subchannel] = static_cast<EngineID>(value);
+            return;
         }
-        case SubmissionMode::NonIncreasingOld:
-        case SubmissionMode::NonIncreasing: {
-            // Use the same method value for all arguments.
-            for (unsigned i = 0; i < header.arg_count; ++i) {
-                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
-                current_addr += sizeof(u32);
-            }
+
+        if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+            // TODO(Subv): Research and implement these methods.
+            LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+            return;
+        }
+
+        const EngineID engine = bound_engines[subchannel];
+
+        switch (engine) {
+        case EngineID::FERMI_TWOD_A:
+            fermi_2d->WriteReg(method, value);
+            break;
+        case EngineID::MAXWELL_B:
+            maxwell_3d->WriteReg(method, value, remaining_params);
             break;
+        case EngineID::MAXWELL_COMPUTE_B:
+            maxwell_compute->WriteReg(method, value);
+            break;
+        case EngineID::MAXWELL_DMA_COPY_A:
+            maxwell_dma->WriteReg(method, value);
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented engine");
         }
-        case SubmissionMode::IncreaseOnce: {
-            ASSERT(header.arg_count.Value() >= 1);
+    };
 
-            // Use the original method for the first argument and then the next method for all other
-            // arguments.
-            WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
-                     header.arg_count - 1);
+    for (auto entry : commands) {
+        Tegra::GPUVAddr address = entry.Address();
+        u32 size = entry.sz;
+        const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
+        VAddr current_addr = *head_address;
+        while (current_addr < *head_address + size * sizeof(CommandHeader)) {
+            const CommandHeader header = {Memory::Read32(current_addr)};
             current_addr += sizeof(u32);
 
-            for (unsigned i = 1; i < header.arg_count; ++i) {
-                WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
+            switch (header.mode.Value()) {
+            case SubmissionMode::IncreasingOld:
+            case SubmissionMode::Increasing: {
+                // Increase the method value with each argument.
+                for (unsigned i = 0; i < header.arg_count; ++i) {
+                    WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::NonIncreasingOld:
+            case SubmissionMode::NonIncreasing: {
+                // Use the same method value for all arguments.
+                for (unsigned i = 0; i < header.arg_count; ++i) {
+                    WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::IncreaseOnce: {
+                ASSERT(header.arg_count.Value() >= 1);
+
+                // Use the original method for the first argument and then the next method for all
+                // other arguments.
+                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+                         header.arg_count - 1);
                 current_addr += sizeof(u32);
+
+                for (unsigned i = 1; i < header.arg_count; ++i) {
+                    WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::Inline: {
+                // The register value is stored in the bits 16-28 as an immediate
+                WriteReg(header.method, header.subchannel, header.inline_data, 0);
+                break;
+            }
+            default:
+                UNIMPLEMENTED();
             }
-            break;
-        }
-        case SubmissionMode::Inline: {
-            // The register value is stored in the bits 16-28 as an immediate
-            WriteReg(header.method, header.subchannel, header.inline_data, 0);
-            break;
-        }
-        default:
-            UNIMPLEMENTED();
         }
     }
 }
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index a01153e0b..bd766e77a 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -7,6 +7,7 @@
 #include <type_traits>
 #include "common/bit_field.h"
 #include "common/common_types.h"
+#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
@@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
     IncreaseOnce = 5
 };
 
+struct CommandListHeader {
+    u32 entry0; // gpu_va_lo
+    union {
+        u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
+        BitField<0, 8, u32> gpu_va_hi;
+        BitField<8, 2, u32> unk1;
+        BitField<10, 21, u32> sz;
+        BitField<31, 1, u32> unk2;
+    };
+
+    GPUVAddr Address() const {
+        return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
+    }
+};
+static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
+
 union CommandHeader {
     u32 hex;
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index e63ad4d46..329079ddd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -135,8 +135,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
         break;
     }
 
-    rasterizer.NotifyMaxwellRegisterChanged(method);
-
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
     }
@@ -293,10 +291,6 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
                    tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
                "TIC versions other than BlockLinear or Pitch are unimplemented");
 
-    ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
-                   (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
-               "Texture types other than Texture2D are unimplemented");
-
     auto r_type = tic_entry.r_type.Value();
     auto g_type = tic_entry.g_type.Value();
     auto b_type = tic_entry.b_type.Value();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index f59d01738..d3be900a4 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -533,7 +533,11 @@ public:
                 u32 stencil_back_mask;
                 u32 stencil_back_func_mask;
 
-                INSERT_PADDING_WORDS(0x20);
+                INSERT_PADDING_WORDS(0x13);
+
+                u32 rt_separate_frag_data;
+
+                INSERT_PADDING_WORDS(0xC);
 
                 struct {
                     u32 address_high;
@@ -557,7 +561,22 @@ public:
                 struct {
                     union {
                         BitField<0, 4, u32> count;
+                        BitField<4, 3, u32> map_0;
+                        BitField<7, 3, u32> map_1;
+                        BitField<10, 3, u32> map_2;
+                        BitField<13, 3, u32> map_3;
+                        BitField<16, 3, u32> map_4;
+                        BitField<19, 3, u32> map_5;
+                        BitField<22, 3, u32> map_6;
+                        BitField<25, 3, u32> map_7;
                     };
+
+                    u32 GetMap(size_t index) const {
+                        const std::array<u32, NumRenderTargets> maps{map_0, map_1, map_2, map_3,
+                                                                     map_4, map_5, map_6, map_7};
+                        ASSERT(index < maps.size());
+                        return maps[index];
+                    }
                 } rt_control;
 
                 INSERT_PADDING_WORDS(0x2);
@@ -968,6 +987,7 @@ ASSERT_REG_POSITION(clear_stencil, 0x368);
 ASSERT_REG_POSITION(stencil_back_func_ref, 0x3D5);
 ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
+ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
 ASSERT_REG_POSITION(rt_control, 0x487);
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index 6e740713f..c24d33d5c 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -41,7 +41,6 @@ void MaxwellDMA::HandleCopy() {
 
     // TODO(Subv): Perform more research and implement all features of this engine.
     ASSERT(regs.exec.enable_swizzle == 0);
-    ASSERT(regs.exec.enable_2d == 1);
     ASSERT(regs.exec.query_mode == Regs::QueryMode::None);
     ASSERT(regs.exec.query_intr == Regs::QueryIntr::None);
     ASSERT(regs.exec.copy_mode == Regs::CopyMode::Unk2);
@@ -51,10 +50,19 @@ void MaxwellDMA::HandleCopy() {
     ASSERT(regs.dst_params.pos_y == 0);
 
     if (regs.exec.is_dst_linear == regs.exec.is_src_linear) {
-        Memory::CopyBlock(dest_cpu, source_cpu, regs.x_count * regs.y_count);
+        size_t copy_size = regs.x_count;
+
+        // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
+        // buffer of length `x_count`, otherwise we copy a 2D buffer of size (x_count, y_count).
+        if (regs.exec.enable_2d) {
+            copy_size = copy_size * regs.y_count;
+        }
+
+        Memory::CopyBlock(dest_cpu, source_cpu, copy_size);
         return;
     }
 
+    ASSERT(regs.exec.enable_2d == 1);
     u8* src_buffer = Memory::GetPointer(source_cpu);
     u8* dst_buffer = Memory::GetPointer(dest_cpu);
 
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index d2388673e..58f2904ce 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -244,6 +244,25 @@ enum class TextureType : u64 {
     TextureCube = 3,
 };
 
+enum class TextureQueryType : u64 {
+    Dimension = 1,
+    TextureType = 2,
+    SamplePosition = 5,
+    Filter = 16,
+    LevelOfDetail = 18,
+    Wrap = 20,
+    BorderColor = 22,
+};
+
+enum class TextureProcessMode : u64 {
+    None = 0,
+    LZ = 1,  // Unknown, appears to be the same as none.
+    LB = 2,  // Load Bias.
+    LL = 3,  // Load LOD (LevelOfDetail)
+    LBA = 6, // Load Bias. The A is unknown, does not appear to differ with LB
+    LLA = 7  // Load LOD. The A is unknown, does not appear to differ with LL
+};
+
 enum class IpaInterpMode : u64 { Linear = 0, Perspective = 1, Flat = 2, Sc = 3 };
 enum class IpaSampleMode : u64 { Default = 0, Centroid = 1, Offset = 2 };
 
@@ -414,6 +433,45 @@ union Instruction {
     } bfe;
 
     union {
+        BitField<48, 3, u64> pred48;
+
+        union {
+            BitField<20, 20, u64> entry_a;
+            BitField<39, 5, u64> entry_b;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } imm;
+
+        union {
+            BitField<20, 14, u64> cb_index;
+            BitField<34, 5, u64> cb_offset;
+            BitField<56, 1, u64> neg;
+            BitField<57, 1, u64> uses_cc;
+        } hi;
+
+        union {
+            BitField<20, 14, u64> cb_index;
+            BitField<34, 5, u64> cb_offset;
+            BitField<39, 5, u64> entry_a;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } rz;
+
+        union {
+            BitField<39, 5, u64> entry_a;
+            BitField<45, 1, u64> neg;
+            BitField<46, 1, u64> uses_cc;
+        } r1;
+
+        union {
+            BitField<28, 8, u64> entry_a;
+            BitField<37, 1, u64> neg;
+            BitField<38, 1, u64> uses_cc;
+        } r2;
+
+    } lea;
+
+    union {
         BitField<0, 5, FlowCondition> cond;
     } flow;
 
@@ -468,6 +526,18 @@ union Instruction {
     } psetp;
 
     union {
+        BitField<12, 3, u64> pred12;
+        BitField<15, 1, u64> neg_pred12;
+        BitField<24, 2, PredOperation> cond;
+        BitField<29, 3, u64> pred29;
+        BitField<32, 1, u64> neg_pred29;
+        BitField<39, 3, u64> pred39;
+        BitField<42, 1, u64> neg_pred39;
+        BitField<44, 1, u64> bf;
+        BitField<45, 2, PredOperation> op;
+    } pset;
+
+    union {
         BitField<39, 3, u64> pred39;
         BitField<42, 1, u64> neg_pred;
         BitField<43, 1, u64> neg_a;
@@ -512,6 +582,7 @@ union Instruction {
         BitField<28, 1, u64> array;
         BitField<29, 2, TextureType> texture_type;
         BitField<31, 4, u64> component_mask;
+        BitField<55, 3, TextureProcessMode> process_mode;
 
         bool IsComponentEnabled(size_t component) const {
             return ((1ull << component) & component_mask) != 0;
@@ -519,6 +590,21 @@ union Instruction {
     } tex;
 
     union {
+        BitField<22, 6, TextureQueryType> query_type;
+        BitField<31, 4, u64> component_mask;
+    } txq;
+
+    union {
+        BitField<28, 1, u64> array;
+        BitField<29, 2, TextureType> texture_type;
+        BitField<31, 4, u64> component_mask;
+
+        bool IsComponentEnabled(size_t component) const {
+            return ((1ull << component) & component_mask) != 0;
+        }
+    } tmml;
+
+    union {
         BitField<28, 1, u64> array;
         BitField<29, 2, TextureType> texture_type;
         BitField<56, 2, u64> component;
@@ -670,11 +756,13 @@ public:
         LDG, // Load from global memory
         STG, // Store in global memory
         TEX,
-        TEXQ,  // Texture Query
-        TEXS,  // Texture Fetch with scalar/non-vec4 source/destinations
-        TLDS,  // Texture Load with scalar/non-vec4 source/destinations
-        TLD4,  // Texture Load 4
-        TLD4S, // Texture Load 4 with scalar / non - vec4 source / destinations
+        TXQ,    // Texture Query
+        TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
+        TLDS,   // Texture Load with scalar/non-vec4 source/destinations
+        TLD4,   // Texture Load 4
+        TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
+        TMML_B, // Texture Mip Map Level
+        TMML,   // Texture Mip Map Level
         EXIT,
         IPA,
         FFMA_IMM, // Fused Multiply and Add
@@ -699,6 +787,11 @@ public:
         ISCADD_C, // Scale and Add
         ISCADD_R,
         ISCADD_IMM,
+        LEA_R1,
+        LEA_R2,
+        LEA_RZ,
+        LEA_IMM,
+        LEA_HI,
         POPC_C,
         POPC_R,
         POPC_IMM,
@@ -757,6 +850,7 @@ public:
         ISET_C,
         ISET_IMM,
         PSETP,
+        PSET,
         XMAD_IMM,
         XMAD_CR,
         XMAD_RC,
@@ -780,6 +874,7 @@ public:
         IntegerSet,
         IntegerSetPredicate,
         PredicateSetPredicate,
+        PredicateSetRegister,
         Conversion,
         Xmad,
         Unknown,
@@ -894,11 +989,13 @@ private:
             INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
             INST("110000----111---", Id::TEX, Type::Memory, "TEX"),
-            INST("1101111101001---", Id::TEXQ, Type::Memory, "TEXQ"),
+            INST("1101111101001---", Id::TXQ, Type::Memory, "TXQ"),
             INST("1101100---------", Id::TEXS, Type::Memory, "TEXS"),
             INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
             INST("110010----111---", Id::TLD4, Type::Memory, "TLD4"),
             INST("1101111100------", Id::TLD4S, Type::Memory, "TLD4S"),
+            INST("110111110110----", Id::TMML_B, Type::Memory, "TMML_B"),
+            INST("1101111101011---", Id::TMML, Type::Memory, "TMML"),
             INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
             INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
             INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
@@ -929,6 +1026,11 @@ private:
             INST("0100110010100---", Id::SEL_C, Type::ArithmeticInteger, "SEL_C"),
             INST("0101110010100---", Id::SEL_R, Type::ArithmeticInteger, "SEL_R"),
             INST("0011100-10100---", Id::SEL_IMM, Type::ArithmeticInteger, "SEL_IMM"),
+            INST("0101101111011---", Id::LEA_R2, Type::ArithmeticInteger, "LEA_R2"),
+            INST("0101101111010---", Id::LEA_R1, Type::ArithmeticInteger, "LEA_R1"),
+            INST("001101101101----", Id::LEA_IMM, Type::ArithmeticInteger, "LEA_IMM"),
+            INST("010010111101----", Id::LEA_RZ, Type::ArithmeticInteger, "LEA_RZ"),
+            INST("00011000--------", Id::LEA_HI, Type::ArithmeticInteger, "LEA_HI"),
             INST("0101000010000---", Id::MUFU, Type::Arithmetic, "MUFU"),
             INST("0100110010010---", Id::RRO_C, Type::Arithmetic, "RRO_C"),
             INST("0101110010010---", Id::RRO_R, Type::Arithmetic, "RRO_R"),
@@ -983,6 +1085,7 @@ private:
             INST("010110110101----", Id::ISET_R, Type::IntegerSet, "ISET_R"),
             INST("010010110101----", Id::ISET_C, Type::IntegerSet, "ISET_C"),
             INST("0011011-0101----", Id::ISET_IMM, Type::IntegerSet, "ISET_IMM"),
+            INST("0101000010001---", Id::PSET, Type::PredicateSetRegister, "PSET"),
             INST("0101000010010---", Id::PSETP, Type::PredicateSetPredicate, "PSETP"),
             INST("0011011-00------", Id::XMAD_IMM, Type::Xmad, "XMAD_IMM"),
             INST("0100111---------", Id::XMAD_CR, Type::Xmad, "XMAD_CR"),
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index e6d8e65c6..86a809f86 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -66,6 +66,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format) {
     case RenderTargetFormat::RGBA8_UINT:
     case RenderTargetFormat::RGB10_A2_UNORM:
     case RenderTargetFormat::BGRA8_UNORM:
+    case RenderTargetFormat::BGRA8_SRGB:
     case RenderTargetFormat::RG16_UNORM:
     case RenderTargetFormat::RG16_SNORM:
     case RenderTargetFormat::RG16_UINT:
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d29f31f52..589a59b4f 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <memory>
+#include <vector>
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/memory_manager.h"
@@ -26,6 +27,7 @@ enum class RenderTargetFormat : u32 {
     RG32_FLOAT = 0xCB,
     RG32_UINT = 0xCD,
     BGRA8_UNORM = 0xCF,
+    BGRA8_SRGB = 0xD0,
     RGB10_A2_UNORM = 0xD1,
     RGBA8_UNORM = 0xD5,
     RGBA8_SRGB = 0xD6,
@@ -67,6 +69,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
 /// Returns the number of bytes per pixel of each depth format.
 u32 DepthFormatBytesPerPixel(DepthFormat format);
 
+struct CommandListHeader;
 class DebugContext;
 
 /**
@@ -115,7 +118,7 @@ public:
     ~GPU();
 
     /// Processes a command list stored at the specified address in GPU memory.
-    void ProcessCommandList(GPUVAddr address, u32 size);
+    void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
 
     /// Returns a reference to the Maxwell3D GPU engine.
     Engines::Maxwell3D& Maxwell3D();
@@ -130,9 +133,6 @@ public:
     const Tegra::MemoryManager& MemoryManager() const;
 
 private:
-    /// Writes a single register in the engine bound to the specified subchannel
-    void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
-
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
 
     /// Mapping of command subchannels to their bound engine ids.
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 9d78e8b6b..cd819d69f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -20,9 +20,6 @@ public:
     /// Clear the current framebuffer
     virtual void Clear() = 0;
 
-    /// Notify rasterizer that the specified Maxwell register has been changed
-    virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
-
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
 
diff --git a/src/video_core/renderer_base.cpp b/src/video_core/renderer_base.cpp
index be17a2b9c..0df3725c2 100644
--- a/src/video_core/renderer_base.cpp
+++ b/src/video_core/renderer_base.cpp
@@ -19,6 +19,7 @@ void RendererBase::RefreshBaseSettings() {
     UpdateCurrentFramebufferLayout();
 
     renderer_settings.use_framelimiter = Settings::values.use_frame_limit;
+    renderer_settings.set_background_color = true;
 }
 
 void RendererBase::UpdateCurrentFramebufferLayout() {
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 2a357f9d0..2cd0738ff 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -19,6 +19,7 @@ namespace VideoCore {
 
 struct RendererSettings {
     std::atomic_bool use_framelimiter{false};
+    std::atomic_bool set_background_color{false};
 };
 
 class RendererBase : NonCopyable {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0c3bbc475..c59f3af1b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -294,61 +294,80 @@ void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
         cached_pages.add({pages_interval, delta});
 }
 
-std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb,
-                                                                    bool using_depth_fb,
-                                                                    bool preserve_contents) {
+void RasterizerOpenGL::ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
+                                             bool preserve_contents,
+                                             boost::optional<size_t> single_color_target) {
     MICROPROFILE_SCOPE(OpenGL_Framebuffer);
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
 
-    if (regs.rt[0].format == Tegra::RenderTargetFormat::NONE) {
-        LOG_ERROR(HW_GPU, "RenderTargetFormat is not configured");
-        using_color_fb = false;
+    Surface depth_surface;
+    if (using_depth_fb) {
+        depth_surface = res_cache.GetDepthBufferSurface(preserve_contents);
     }
 
-    const bool has_stencil = regs.stencil_enable;
-    const bool write_color_fb =
-        state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
-        state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE;
-
-    const bool write_depth_fb =
-        (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
-        (has_stencil && (state.stencil.front.write_mask || state.stencil.back.write_mask));
+    // TODO(bunnei): Figure out how the below register works. According to envytools, this should be
+    // used to enable multiple render targets. However, it is left unset on all games that I have
+    // tested.
+    ASSERT_MSG(regs.rt_separate_frag_data == 0, "Unimplemented");
 
-    Surface color_surface;
-    Surface depth_surface;
-    MathUtil::Rectangle<u32> surfaces_rect;
-    std::tie(color_surface, depth_surface, surfaces_rect) =
-        res_cache.GetFramebufferSurfaces(using_color_fb, using_depth_fb, preserve_contents);
+    // Bind the framebuffer surfaces
+    state.draw.draw_framebuffer = framebuffer.handle;
+    state.Apply();
 
-    const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
-    const MathUtil::Rectangle<u32> draw_rect{
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.left,
-                                         surfaces_rect.left, surfaces_rect.right)), // Left
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.top,
-                                         surfaces_rect.bottom, surfaces_rect.top)), // Top
-        static_cast<u32>(std::clamp<s32>(static_cast<s32>(surfaces_rect.left) + viewport_rect.right,
-                                         surfaces_rect.left, surfaces_rect.right)), // Right
-        static_cast<u32>(
-            std::clamp<s32>(static_cast<s32>(surfaces_rect.bottom) + viewport_rect.bottom,
-                            surfaces_rect.bottom, surfaces_rect.top))}; // Bottom
+    if (using_color_fb) {
+        if (single_color_target) {
+            // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
+            Surface color_surface =
+                res_cache.GetColorBufferSurface(*single_color_target, preserve_contents);
+            glFramebufferTexture2D(
+                GL_DRAW_FRAMEBUFFER,
+                GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
+                color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
+            glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
+        } else {
+            // Multiple color attachments are enabled
+            std::array<GLenum, Maxwell::NumRenderTargets> buffers;
+            for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+                Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
+                buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+                glFramebufferTexture2D(
+                    GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+                    GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
+                    0);
+            }
+            glDrawBuffers(regs.rt_control.count, buffers.data());
+        }
+    } else {
+        // No color attachments are enabled - zero out all of them
+        for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
+                                   GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
+                                   0, 0);
+        }
+        glDrawBuffer(GL_NONE);
+    }
 
-    // Bind the framebuffer surfaces
-    BindFramebufferSurfaces(color_surface, depth_surface, has_stencil);
+    if (depth_surface) {
+        if (regs.stencil_enable) {
+            // Attach both depth and stencil
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
+                                   depth_surface->Texture().handle, 0);
+        } else {
+            // Attach depth
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                                   depth_surface->Texture().handle, 0);
+            // Clear stencil attachment
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+        }
+    } else {
+        // Clear both depth and stencil attachment
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+                               0);
+    }
 
-    SyncViewport(surfaces_rect);
+    SyncViewport();
 
-    // Viewport can have negative offsets or larger dimensions than our framebuffer sub-rect. Enable
-    // scissor test to prevent drawing outside of the framebuffer region
-    state.scissor.enabled = true;
-    state.scissor.x = draw_rect.left;
-    state.scissor.y = draw_rect.bottom;
-    state.scissor.width = draw_rect.GetWidth();
-    state.scissor.height = draw_rect.GetHeight();
     state.Apply();
-
-    // Only return the surface to be marked as dirty if writing to it is enabled.
-    return std::make_pair(write_color_fb ? color_surface : nullptr,
-                          write_depth_fb ? depth_surface : nullptr);
 }
 
 void RasterizerOpenGL::Clear() {
@@ -356,8 +375,9 @@ void RasterizerOpenGL::Clear() {
     SCOPE_EXIT({ prev_state.Apply(); });
 
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
-    bool use_color_fb = false;
-    bool use_depth_fb = false;
+    bool use_color{};
+    bool use_depth{};
+    bool use_stencil{};
 
     OpenGLState clear_state;
     clear_state.draw.draw_framebuffer = state.draw.draw_framebuffer;
@@ -366,22 +386,13 @@ void RasterizerOpenGL::Clear() {
     clear_state.color_mask.blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
     clear_state.color_mask.alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
 
-    GLbitfield clear_mask{};
     if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
         regs.clear_buffers.A) {
-        if (regs.clear_buffers.RT == 0) {
-            // We only support clearing the first color attachment for now
-            clear_mask |= GL_COLOR_BUFFER_BIT;
-            use_color_fb = true;
-        } else {
-            // TODO(subv): Add support for the other color attachments
-            LOG_CRITICAL(HW_GPU, "Clear unimplemented for RT {}", regs.clear_buffers.RT);
-        }
+        use_color = true;
     }
     if (regs.clear_buffers.Z) {
         ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
-        use_depth_fb = true;
-        clear_mask |= GL_DEPTH_BUFFER_BIT;
+        use_depth = true;
 
         // Always enable the depth write when clearing the depth buffer. The depth write mask is
         // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
@@ -390,34 +401,33 @@ void RasterizerOpenGL::Clear() {
     }
     if (regs.clear_buffers.S) {
         ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
-        use_depth_fb = true;
-        clear_mask |= GL_STENCIL_BUFFER_BIT;
+        use_stencil = true;
         clear_state.stencil.test_enabled = true;
     }
 
-    if (!use_color_fb && !use_depth_fb) {
+    if (!use_color && !use_depth && !use_stencil) {
         // No color surface nor depth/stencil surface are enabled
         return;
     }
 
-    if (clear_mask == 0) {
-        // No clear mask is enabled
-        return;
-    }
-
     ScopeAcquireGLContext acquire_context{emu_window};
 
-    auto [dirty_color_surface, dirty_depth_surface] =
-        ConfigureFramebuffers(use_color_fb, use_depth_fb, false);
+    ConfigureFramebuffers(use_color, use_depth || use_stencil, false,
+                          regs.clear_buffers.RT.Value());
 
     clear_state.Apply();
 
-    glClearColor(regs.clear_color[0], regs.clear_color[1], regs.clear_color[2],
-                 regs.clear_color[3]);
-    glClearDepth(regs.clear_depth);
-    glClearStencil(regs.clear_stencil);
+    if (use_color) {
+        glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+    }
 
-    glClear(clear_mask);
+    if (use_depth && use_stencil) {
+        glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
+    } else if (use_depth) {
+        glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
+    } else if (use_stencil) {
+        glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
+    }
 }
 
 void RasterizerOpenGL::DrawArrays() {
@@ -430,8 +440,7 @@ void RasterizerOpenGL::DrawArrays() {
 
     ScopeAcquireGLContext acquire_context{emu_window};
 
-    const auto [dirty_color_surface, dirty_depth_surface] =
-        ConfigureFramebuffers(true, regs.zeta.Address() != 0 && regs.zeta_enable != 0, true);
+    ConfigureFramebuffers();
 
     SyncDepthTestState();
     SyncStencilTestState();
@@ -525,8 +534,6 @@ void RasterizerOpenGL::DrawArrays() {
     state.Apply();
 }
 
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
-
 void RasterizerOpenGL::FlushAll() {}
 
 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
@@ -586,7 +593,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
 void RasterizerOpenGL::SamplerInfo::Create() {
     sampler.Create();
     mag_filter = min_filter = Tegra::Texture::TextureFilter::Linear;
-    wrap_u = wrap_v = Tegra::Texture::WrapMode::Wrap;
+    wrap_u = wrap_v = wrap_p = Tegra::Texture::WrapMode::Wrap;
 
     // default is GL_LINEAR_MIPMAP_LINEAR
     glSamplerParameteri(sampler.handle, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
@@ -613,8 +620,13 @@ void RasterizerOpenGL::SamplerInfo::SyncWithConfig(const Tegra::Texture::TSCEntr
         wrap_v = config.wrap_v;
         glSamplerParameteri(s, GL_TEXTURE_WRAP_T, MaxwellToGL::WrapMode(wrap_v));
     }
+    if (wrap_p != config.wrap_p) {
+        wrap_p = config.wrap_p;
+        glSamplerParameteri(s, GL_TEXTURE_WRAP_R, MaxwellToGL::WrapMode(wrap_p));
+    }
 
-    if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border) {
+    if (wrap_u == Tegra::Texture::WrapMode::Border || wrap_v == Tegra::Texture::WrapMode::Border ||
+        wrap_p == Tegra::Texture::WrapMode::Border) {
         const GLvec4 new_border_color = {{config.border_color_r, config.border_color_g,
                                           config.border_color_b, config.border_color_a}};
         if (border_color != new_border_color) {
@@ -698,14 +710,15 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
         const auto texture = maxwell3d.GetStageTexture(entry.GetStage(), entry.GetOffset());
 
         if (!texture.enabled) {
-            state.texture_units[current_bindpoint].texture_2d = 0;
+            state.texture_units[current_bindpoint].texture = 0;
             continue;
         }
 
         texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
         Surface surface = res_cache.GetTextureSurface(texture);
         if (surface != nullptr) {
-            state.texture_units[current_bindpoint].texture_2d = surface->Texture().handle;
+            state.texture_units[current_bindpoint].texture = surface->Texture().handle;
+            state.texture_units[current_bindpoint].target = surface->Target();
             state.texture_units[current_bindpoint].swizzle.r =
                 MaxwellToGL::SwizzleSource(texture.tic.x_source);
             state.texture_units[current_bindpoint].swizzle.g =
@@ -716,45 +729,19 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
                 MaxwellToGL::SwizzleSource(texture.tic.w_source);
         } else {
             // Can occur when texture addr is null or its memory is unmapped/invalid
-            state.texture_units[current_bindpoint].texture_2d = 0;
+            state.texture_units[current_bindpoint].texture = 0;
         }
     }
 
     return current_unit + static_cast<u32>(entries.size());
 }
 
-void RasterizerOpenGL::BindFramebufferSurfaces(const Surface& color_surface,
-                                               const Surface& depth_surface, bool has_stencil) {
-    state.draw.draw_framebuffer = framebuffer.handle;
-    state.Apply();
-
-    glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
-                           color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
-    if (depth_surface != nullptr) {
-        if (has_stencil) {
-            // attach both depth and stencil
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->Texture().handle, 0);
-        } else {
-            // attach depth
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->Texture().handle, 0);
-            // clear stencil attachment
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-        }
-    } else {
-        // clear both depth and stencil attachment
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
-    }
-}
-
-void RasterizerOpenGL::SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect) {
+void RasterizerOpenGL::SyncViewport() {
     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
     const MathUtil::Rectangle<s32> viewport_rect{regs.viewport_transform[0].GetRect()};
 
-    state.viewport.x = static_cast<GLint>(surfaces_rect.left) + viewport_rect.left;
-    state.viewport.y = static_cast<GLint>(surfaces_rect.bottom) + viewport_rect.bottom;
+    state.viewport.x = viewport_rect.left;
+    state.viewport.y = viewport_rect.bottom;
     state.viewport.width = static_cast<GLsizei>(viewport_rect.GetWidth());
     state.viewport.height = static_cast<GLsizei>(viewport_rect.GetHeight());
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9c30dc0e8..745c3dc0c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -13,6 +13,7 @@
 #include <vector>
 
 #include <boost/icl/interval_map.hpp>
+#include <boost/optional.hpp>
 #include <boost/range/iterator_range.hpp>
 #include <glad/glad.h>
 
@@ -45,7 +46,6 @@ public:
 
     void DrawArrays() override;
     void Clear() override;
-    void NotifyMaxwellRegisterChanged(u32 method) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
@@ -93,17 +93,20 @@ private:
         Tegra::Texture::TextureFilter min_filter;
         Tegra::Texture::WrapMode wrap_u;
         Tegra::Texture::WrapMode wrap_v;
+        Tegra::Texture::WrapMode wrap_p;
         GLvec4 border_color;
     };
 
-    /// Configures the color and depth framebuffer states and returns the dirty <Color, Depth>
-    /// surfaces if writing was enabled.
-    std::pair<Surface, Surface> ConfigureFramebuffers(bool using_color_fb, bool using_depth_fb,
-                                                      bool preserve_contents);
-
-    /// Binds the framebuffer color and depth surface
-    void BindFramebufferSurfaces(const Surface& color_surface, const Surface& depth_surface,
-                                 bool has_stencil);
+    /**
+     * Configures the color and depth framebuffer states.
+     * @param use_color_fb If true, configure color framebuffers.
+     * @param using_depth_fb If true, configure the depth/stencil framebuffer.
+     * @param preserve_contents If true, tries to preserve data from a previously used framebuffer.
+     * @param single_color_target Specifies if a single color buffer target should be used.
+     */
+    void ConfigureFramebuffers(bool use_color_fb = true, bool using_depth_fb = true,
+                               bool preserve_contents = true,
+                               boost::optional<size_t> single_color_target = {});
 
     /*
      * Configures the current constbuffers to use for the draw command.
@@ -126,7 +129,7 @@ private:
                       u32 current_unit);
 
     /// Syncs the viewport to match the guest state
-    void SyncViewport(const MathUtil::Rectangle<u32>& surfaces_rect);
+    void SyncViewport();
 
     /// Syncs the clip enabled status to match the guest state
     void SyncClipEnabled();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index f6b2c5a86..32001e44b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -7,6 +7,7 @@
 
 #include "common/alignment.h"
 #include "common/assert.h"
+#include "common/logging/log.h"
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "core/core.h"
@@ -52,14 +53,30 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
     params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
     params.unaligned_height = config.tic.Height();
+    params.target = SurfaceTargetFromTextureType(config.tic.texture_type);
+
+    switch (params.target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture2D:
+        params.depth = 1;
+        break;
+    case SurfaceTarget::Texture3D:
+    case SurfaceTarget::Texture2DArray:
+        params.depth = config.tic.Depth();
+        break;
+    default:
+        LOG_CRITICAL(HW_GPU, "Unknown depth for target={}", static_cast<u32>(params.target));
+        UNREACHABLE();
+        params.depth = 1;
+        break;
+    }
+
     params.size_in_bytes = params.SizeInBytes();
-    params.cache_width = Common::AlignUp(params.width, 16);
-    params.cache_height = Common::AlignUp(params.height, 16);
     return params;
 }
 
-/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(
-    const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config) {
+/*static*/ SurfaceParams SurfaceParams::CreateForFramebuffer(size_t index) {
+    const auto& config{Core::System::GetInstance().GPU().Maxwell3D().regs.rt[index]};
     SurfaceParams params{};
     params.addr = TryGetCpuAddr(config.Address());
     params.is_tiled = true;
@@ -70,9 +87,9 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.width = config.width;
     params.height = config.height;
     params.unaligned_height = config.height;
+    params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
     params.size_in_bytes = params.SizeInBytes();
-    params.cache_width = Common::AlignUp(params.width, 16);
-    params.cache_height = Common::AlignUp(params.height, 16);
     return params;
 }
 
@@ -86,13 +103,12 @@ static VAddr TryGetCpuAddr(Tegra::GPUVAddr gpu_addr) {
     params.pixel_format = PixelFormatFromDepthFormat(format);
     params.component_type = ComponentTypeFromDepthFormat(format);
     params.type = GetFormatType(params.pixel_format);
-    params.size_in_bytes = params.SizeInBytes();
     params.width = zeta_width;
     params.height = zeta_height;
     params.unaligned_height = zeta_height;
+    params.target = SurfaceTarget::Texture2D;
+    params.depth = 1;
     params.size_in_bytes = params.SizeInBytes();
-    params.cache_width = Common::AlignUp(params.width, 16);
-    params.cache_height = Common::AlignUp(params.height, 16);
     return params;
 }
 
@@ -100,7 +116,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
     {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false},                     // ABGR8S
     {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false},   // ABGR8UI
-    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false},    // B5G6R5U
+    {GL_RGB8, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false},   // B5G6R5U
     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
      false}, // A2B10G10R10U
     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
@@ -166,6 +182,26 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
      ComponentType::Float, false}, // Z32FS8
 }};
 
+static GLenum SurfaceTargetToGL(SurfaceParams::SurfaceTarget target) {
+    switch (target) {
+    case SurfaceParams::SurfaceTarget::Texture1D:
+        return GL_TEXTURE_1D;
+    case SurfaceParams::SurfaceTarget::Texture2D:
+        return GL_TEXTURE_2D;
+    case SurfaceParams::SurfaceTarget::Texture3D:
+        return GL_TEXTURE_3D;
+    case SurfaceParams::SurfaceTarget::Texture1DArray:
+        return GL_TEXTURE_1D_ARRAY;
+    case SurfaceParams::SurfaceTarget::Texture2DArray:
+        return GL_TEXTURE_2D_ARRAY;
+    case SurfaceParams::SurfaceTarget::TextureCubemap:
+        return GL_TEXTURE_CUBE_MAP;
+    }
+    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture target={}", static_cast<u32>(target));
+    UNREACHABLE();
+    return {};
+}
+
 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
     ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
     auto& format = tex_format_tuples[static_cast<unsigned int>(pixel_format)];
@@ -220,7 +256,8 @@ static bool IsFormatBCn(PixelFormat format) {
 }
 
 template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_buffer, VAddr addr) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, size_t gl_buffer_size,
+                VAddr addr) {
     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / CHAR_BIT;
     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
 
@@ -230,18 +267,18 @@ void MortonCopy(u32 stride, u32 block_height, u32 height, std::vector<u8>& gl_bu
         const u32 tile_size{IsFormatBCn(format) ? 4U : 1U};
         const std::vector<u8> data = Tegra::Texture::UnswizzleTexture(
             addr, tile_size, bytes_per_pixel, stride, height, block_height);
-        const size_t size_to_copy{std::min(gl_buffer.size(), data.size())};
-        gl_buffer.assign(data.begin(), data.begin() + size_to_copy);
+        const size_t size_to_copy{std::min(gl_buffer_size, data.size())};
+        memcpy(gl_buffer, data.data(), size_to_copy);
     } else {
         // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should
         // check the configuration for this and perform more generic un/swizzle
         LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
         VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
-                                       Memory::GetPointer(addr), gl_buffer.data(), morton_to_gl);
+                                       Memory::GetPointer(addr), gl_buffer, morton_to_gl);
     }
 }
 
-static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
                             SurfaceParams::MaxPixelFormat>
     morton_to_gl_fns = {
         // clang-format off
@@ -298,7 +335,7 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
         // clang-format on
 };
 
-static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
+static constexpr std::array<void (*)(u32, u32, u32, u8*, size_t, VAddr),
                             SurfaceParams::MaxPixelFormat>
     gl_to_morton_fns = {
         // clang-format off
@@ -357,33 +394,6 @@ static constexpr std::array<void (*)(u32, u32, u32, std::vector<u8>&, VAddr),
         // clang-format on
 };
 
-// Allocate an uninitialized texture of appropriate size and format for the surface
-static void AllocateSurfaceTexture(GLuint texture, const FormatTuple& format_tuple, u32 width,
-                                   u32 height) {
-    OpenGLState cur_state = OpenGLState::GetCurState();
-
-    // Keep track of previous texture bindings
-    GLuint old_tex = cur_state.texture_units[0].texture_2d;
-    cur_state.texture_units[0].texture_2d = texture;
-    cur_state.Apply();
-    glActiveTexture(GL_TEXTURE0);
-
-    if (!format_tuple.compressed) {
-        // Only pre-create the texture for non-compressed textures.
-        glTexImage2D(GL_TEXTURE_2D, 0, format_tuple.internal_format, width, height, 0,
-                     format_tuple.format, format_tuple.type, nullptr);
-    }
-
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-
-    // Restore previous texture bindings
-    cur_state.texture_units[0].texture_2d = old_tex;
-    cur_state.Apply();
-}
-
 static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rect, GLuint dst_tex,
                          const MathUtil::Rectangle<u32>& dst_rect, SurfaceType type,
                          GLuint read_fb_handle, GLuint draw_fb_handle) {
@@ -438,12 +448,53 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
     return true;
 }
 
-CachedSurface::CachedSurface(const SurfaceParams& params) : params(params) {
+CachedSurface::CachedSurface(const SurfaceParams& params)
+    : params(params), gl_target(SurfaceTargetToGL(params.target)) {
     texture.Create();
     const auto& rect{params.GetRect()};
-    AllocateSurfaceTexture(texture.handle,
-                           GetFormatTuple(params.pixel_format, params.component_type),
+
+    // Keep track of previous texture bindings
+    OpenGLState cur_state = OpenGLState::GetCurState();
+    const auto& old_tex = cur_state.texture_units[0];
+    SCOPE_EXIT({
+        cur_state.texture_units[0] = old_tex;
+        cur_state.Apply();
+    });
+
+    cur_state.texture_units[0].texture = texture.handle;
+    cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
+    cur_state.Apply();
+    glActiveTexture(GL_TEXTURE0);
+
+    const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
+    if (!format_tuple.compressed) {
+        // Only pre-create the texture for non-compressed textures.
+        switch (params.target) {
+        case SurfaceParams::SurfaceTarget::Texture1D:
+            glTexStorage1D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+                           rect.GetWidth());
+            break;
+        case SurfaceParams::SurfaceTarget::Texture2D:
+            glTexStorage2D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
                            rect.GetWidth(), rect.GetHeight());
+            break;
+        case SurfaceParams::SurfaceTarget::Texture3D:
+        case SurfaceParams::SurfaceTarget::Texture2DArray:
+            glTexStorage3D(SurfaceTargetToGL(params.target), 1, format_tuple.internal_format,
+                           rect.GetWidth(), rect.GetHeight(), params.depth);
+            break;
+        default:
+            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                         static_cast<u32>(params.target));
+            UNREACHABLE();
+            glTexStorage2D(GL_TEXTURE_2D, 1, format_tuple.internal_format, rect.GetWidth(),
+                           rect.GetHeight());
+        }
+    }
+
+    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 }
 
 static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
@@ -461,7 +512,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 
     S8Z24 input_pixel{};
     Z24S8 output_pixel{};
-    const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
+    constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
     for (size_t y = 0; y < height; ++y) {
         for (size_t x = 0; x < width; ++x) {
             const size_t offset{bpp * (y * width + x)};
@@ -474,7 +525,7 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 }
 
 static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
-    const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
+    constexpr auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8U)};
     for (size_t y = 0; y < height; ++y) {
         for (size_t x = 0; x < width; ++x) {
             const size_t offset{bpp * (y * width + x)};
@@ -514,23 +565,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
     }
 }
 
-/**
- * Helper function to perform software conversion (as needed) when flushing a buffer to Switch
- * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
- * typical desktop GPUs.
- */
-static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& /*data*/, PixelFormat pixel_format,
-                                                u32 /*width*/, u32 /*height*/) {
-    switch (pixel_format) {
-    case PixelFormat::ASTC_2D_4X4:
-    case PixelFormat::S8Z24:
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented pixel_format={}",
-                     static_cast<u32>(pixel_format));
-        UNREACHABLE();
-        break;
-    }
-}
-
 MICROPROFILE_DEFINE(OpenGL_SurfaceLoad, "OpenGL", "Surface Load", MP_RGB(128, 64, 192));
 void CachedSurface::LoadGLBuffer() {
     ASSERT(params.type != SurfaceType::Fill);
@@ -545,13 +579,25 @@ void CachedSurface::LoadGLBuffer() {
     MICROPROFILE_SCOPE(OpenGL_SurfaceLoad);
 
     if (params.is_tiled) {
-        gl_buffer.resize(copy_size);
+        // TODO(bunnei): This only unswizzles and copies a 2D texture - we do not yet know how to do
+        // this for 3D textures, etc.
+        switch (params.target) {
+        case SurfaceParams::SurfaceTarget::Texture2D:
+            // Pass impl. to the fallback code below
+            break;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented tiled load for target={}",
+                         static_cast<u32>(params.target));
+            UNREACHABLE();
+        }
 
+        gl_buffer.resize(static_cast<size_t>(params.depth) * copy_size);
         morton_to_gl_fns[static_cast<size_t>(params.pixel_format)](
-            params.width, params.block_height, params.height, gl_buffer, params.addr);
+            params.width, params.block_height, params.height, gl_buffer.data(), copy_size,
+            params.addr);
     } else {
-        const u8* const texture_src_data_end = texture_src_data + copy_size;
-
+        const u8* const texture_src_data_end{texture_src_data +
+                                             (static_cast<size_t>(params.depth) * copy_size)};
         gl_buffer.assign(texture_src_data, texture_src_data_end);
     }
 
@@ -560,23 +606,7 @@ void CachedSurface::LoadGLBuffer() {
 
 MICROPROFILE_DEFINE(OpenGL_SurfaceFlush, "OpenGL", "Surface Flush", MP_RGB(128, 192, 64));
 void CachedSurface::FlushGLBuffer() {
-    u8* const dst_buffer = Memory::GetPointer(params.addr);
-
-    ASSERT(dst_buffer);
-    ASSERT(gl_buffer.size() ==
-           params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
-
-    MICROPROFILE_SCOPE(OpenGL_SurfaceFlush);
-
-    ConvertFormatAsNeeded_FlushGLBuffer(gl_buffer, params.pixel_format, params.width,
-                                        params.height);
-
-    if (!params.is_tiled) {
-        std::memcpy(dst_buffer, gl_buffer.data(), params.size_in_bytes);
-    } else {
-        gl_to_morton_fns[static_cast<size_t>(params.pixel_format)](
-            params.width, params.block_height, params.height, gl_buffer, params.addr);
-    }
+    ASSERT_MSG(false, "Unimplemented");
 }
 
 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 64, 192));
@@ -586,22 +616,29 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
 
     MICROPROFILE_SCOPE(OpenGL_TextureUL);
 
-    ASSERT(gl_buffer.size() ==
-           params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
+    ASSERT(gl_buffer.size() == static_cast<size_t>(params.width) * params.height *
+                                   GetGLBytesPerPixel(params.pixel_format) * params.depth);
 
     const auto& rect{params.GetRect()};
 
     // Load data from memory to the surface
-    GLint x0 = static_cast<GLint>(rect.left);
-    GLint y0 = static_cast<GLint>(rect.bottom);
-    size_t buffer_offset = (y0 * params.width + x0) * GetGLBytesPerPixel(params.pixel_format);
+    const GLint x0 = static_cast<GLint>(rect.left);
+    const GLint y0 = static_cast<GLint>(rect.bottom);
+    const size_t buffer_offset =
+        static_cast<size_t>(static_cast<size_t>(y0) * params.width + static_cast<size_t>(x0)) *
+        GetGLBytesPerPixel(params.pixel_format);
 
     const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-    GLuint target_tex = texture.handle;
+    const GLuint target_tex = texture.handle;
     OpenGLState cur_state = OpenGLState::GetCurState();
 
-    GLuint old_tex = cur_state.texture_units[0].texture_2d;
-    cur_state.texture_units[0].texture_2d = target_tex;
+    const auto& old_tex = cur_state.texture_units[0];
+    SCOPE_EXIT({
+        cur_state.texture_units[0] = old_tex;
+        cur_state.Apply();
+    });
+    cur_state.texture_units[0].texture = target_tex;
+    cur_state.texture_units[0].target = SurfaceTargetToGL(params.target);
     cur_state.Apply();
 
     // Ensure no bad interactions with GL_UNPACK_ALIGNMENT
@@ -610,136 +647,102 @@ void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle
 
     glActiveTexture(GL_TEXTURE0);
     if (tuple.compressed) {
-        glCompressedTexImage2D(
-            GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
-            static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
-            &gl_buffer[buffer_offset]);
+        switch (params.target) {
+        case SurfaceParams::SurfaceTarget::Texture2D:
+            glCompressedTexImage2D(
+                SurfaceTargetToGL(params.target), 0, tuple.internal_format,
+                static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height), 0,
+                static_cast<GLsizei>(params.size_in_bytes), &gl_buffer[buffer_offset]);
+            break;
+        case SurfaceParams::SurfaceTarget::Texture3D:
+        case SurfaceParams::SurfaceTarget::Texture2DArray:
+            glCompressedTexImage3D(
+                SurfaceTargetToGL(params.target), 0, tuple.internal_format,
+                static_cast<GLsizei>(params.width), static_cast<GLsizei>(params.height),
+                static_cast<GLsizei>(params.depth), 0, static_cast<GLsizei>(params.size_in_bytes),
+                &gl_buffer[buffer_offset]);
+            break;
+        default:
+            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                         static_cast<u32>(params.target));
+            UNREACHABLE();
+            glCompressedTexImage2D(
+                GL_TEXTURE_2D, 0, tuple.internal_format, static_cast<GLsizei>(params.width),
+                static_cast<GLsizei>(params.height), 0, static_cast<GLsizei>(params.size_in_bytes),
+                &gl_buffer[buffer_offset]);
+        }
     } else {
-        glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
-                        static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
-                        &gl_buffer[buffer_offset]);
-    }
-
-    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-
-    cur_state.texture_units[0].texture_2d = old_tex;
-    cur_state.Apply();
-}
-
-MICROPROFILE_DEFINE(OpenGL_TextureDL, "OpenGL", "Texture Download", MP_RGB(128, 192, 64));
-void CachedSurface::DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
-    if (params.type == SurfaceType::Fill)
-        return;
-
-    MICROPROFILE_SCOPE(OpenGL_TextureDL);
-
-    gl_buffer.resize(params.width * params.height * GetGLBytesPerPixel(params.pixel_format));
-
-    OpenGLState state = OpenGLState::GetCurState();
-    OpenGLState prev_state = state;
-    SCOPE_EXIT({ prev_state.Apply(); });
-
-    const FormatTuple& tuple = GetFormatTuple(params.pixel_format, params.component_type);
-
-    // Ensure no bad interactions with GL_PACK_ALIGNMENT
-    ASSERT(params.width * GetGLBytesPerPixel(params.pixel_format) % 4 == 0);
-    glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.width));
-
-    const auto& rect{params.GetRect()};
-    size_t buffer_offset =
-        (rect.bottom * params.width + rect.left) * GetGLBytesPerPixel(params.pixel_format);
-
-    state.UnbindTexture(texture.handle);
-    state.draw.read_framebuffer = read_fb_handle;
-    state.Apply();
 
-    if (params.type == SurfaceType::ColorTexture) {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
-                               texture.handle, 0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
-    } else if (params.type == SurfaceType::Depth) {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
-                               texture.handle, 0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-    } else {
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
-        glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                               texture.handle, 0);
+        switch (params.target) {
+        case SurfaceParams::SurfaceTarget::Texture1D:
+            glTexSubImage1D(SurfaceTargetToGL(params.target), 0, x0,
+                            static_cast<GLsizei>(rect.GetWidth()), tuple.format, tuple.type,
+                            &gl_buffer[buffer_offset]);
+            break;
+        case SurfaceParams::SurfaceTarget::Texture2D:
+            glTexSubImage2D(SurfaceTargetToGL(params.target), 0, x0, y0,
+                            static_cast<GLsizei>(rect.GetWidth()),
+                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                            &gl_buffer[buffer_offset]);
+            break;
+        case SurfaceParams::SurfaceTarget::Texture3D:
+        case SurfaceParams::SurfaceTarget::Texture2DArray:
+            glTexSubImage3D(SurfaceTargetToGL(params.target), 0, x0, y0, 0,
+                            static_cast<GLsizei>(rect.GetWidth()),
+                            static_cast<GLsizei>(rect.GetHeight()), params.depth, tuple.format,
+                            tuple.type, &gl_buffer[buffer_offset]);
+            break;
+        default:
+            LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                         static_cast<u32>(params.target));
+            UNREACHABLE();
+            glTexSubImage2D(GL_TEXTURE_2D, 0, x0, y0, static_cast<GLsizei>(rect.GetWidth()),
+                            static_cast<GLsizei>(rect.GetHeight()), tuple.format, tuple.type,
+                            &gl_buffer[buffer_offset]);
+        }
     }
-    glReadPixels(static_cast<GLint>(rect.left), static_cast<GLint>(rect.bottom),
-                 static_cast<GLsizei>(rect.GetWidth()), static_cast<GLsizei>(rect.GetHeight()),
-                 tuple.format, tuple.type, &gl_buffer[buffer_offset]);
 
-    glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 }
 
 RasterizerCacheOpenGL::RasterizerCacheOpenGL() {
     read_framebuffer.Create();
     draw_framebuffer.Create();
+    copy_pbo.Create();
 }
 
 Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextureInfo& config) {
     return GetSurface(SurfaceParams::CreateForTexture(config));
 }
 
-SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(bool using_color_fb,
-                                                                       bool using_depth_fb,
-                                                                       bool preserve_contents) {
-    const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs;
+Surface RasterizerCacheOpenGL::GetDepthBufferSurface(bool preserve_contents) {
+    const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
+    if (!regs.zeta.Address() || !regs.zeta_enable) {
+        return {};
+    }
 
-    // TODO(bunnei): This is hard corded to use just the first render buffer
-    LOG_TRACE(Render_OpenGL, "hard-coded for render target 0!");
+    SurfaceParams depth_params{SurfaceParams::CreateForDepthBuffer(
+        regs.zeta_width, regs.zeta_height, regs.zeta.Address(), regs.zeta.format)};
 
-    // get color and depth surfaces
-    SurfaceParams color_params{};
-    SurfaceParams depth_params{};
+    return GetSurface(depth_params, preserve_contents);
+}
 
-    if (using_color_fb) {
-        color_params = SurfaceParams::CreateForFramebuffer(regs.rt[0]);
-    }
+Surface RasterizerCacheOpenGL::GetColorBufferSurface(size_t index, bool preserve_contents) {
+    const auto& regs{Core::System::GetInstance().GPU().Maxwell3D().regs};
 
-    if (using_depth_fb) {
-        depth_params = SurfaceParams::CreateForDepthBuffer(regs.zeta_width, regs.zeta_height,
-                                                           regs.zeta.Address(), regs.zeta.format);
-    }
+    ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
 
-    MathUtil::Rectangle<u32> color_rect{};
-    Surface color_surface;
-    if (using_color_fb) {
-        color_surface = GetSurface(color_params, preserve_contents);
-        if (color_surface) {
-            color_rect = color_surface->GetSurfaceParams().GetRect();
-        }
+    if (index >= regs.rt_control.count) {
+        return {};
     }
 
-    MathUtil::Rectangle<u32> depth_rect{};
-    Surface depth_surface;
-    if (using_depth_fb) {
-        depth_surface = GetSurface(depth_params, preserve_contents);
-        if (depth_surface) {
-            depth_rect = depth_surface->GetSurfaceParams().GetRect();
-        }
+    if (regs.rt[index].Address() == 0 || regs.rt[index].format == Tegra::RenderTargetFormat::NONE) {
+        return {};
     }
 
-    MathUtil::Rectangle<u32> fb_rect{};
-    if (color_surface && depth_surface) {
-        fb_rect = color_rect;
-        // Color and Depth surfaces must have the same dimensions and offsets
-        if (color_rect.bottom != depth_rect.bottom || color_rect.top != depth_rect.top ||
-            color_rect.left != depth_rect.left || color_rect.right != depth_rect.right) {
-            color_surface = GetSurface(color_params);
-            depth_surface = GetSurface(depth_params);
-            fb_rect = color_surface->GetSurfaceParams().GetRect();
-        }
-    } else if (color_surface) {
-        fb_rect = color_rect;
-    } else if (depth_surface) {
-        fb_rect = depth_rect;
-    }
+    const SurfaceParams color_params{SurfaceParams::CreateForFramebuffer(index)};
 
-    return std::make_tuple(color_surface, depth_surface, fb_rect);
+    return GetSurface(color_params, preserve_contents);
 }
 
 void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
@@ -748,7 +751,6 @@ void RasterizerCacheOpenGL::LoadSurface(const Surface& surface) {
 }
 
 void RasterizerCacheOpenGL::FlushSurface(const Surface& surface) {
-    surface->DownloadGLTexture(read_framebuffer.handle, draw_framebuffer.handle);
     surface->FlushGLBuffer();
 }
 
@@ -806,27 +808,26 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
     // Get a new surface with the new parameters, and blit the previous surface to it
     Surface new_surface{GetUncachedSurface(new_params)};
 
-    // If format is unchanged, we can do a faster blit without reinterpreting pixel data
-    if (params.pixel_format == new_params.pixel_format) {
+    if (params.pixel_format == new_params.pixel_format ||
+        !Settings::values.use_accurate_framebuffers) {
+        // If the format is the same, just do a framebuffer blit. This is significantly faster than
+        // using PBOs. The is also likely less accurate, as textures will be converted rather than
+        // reinterpreted.
+
         BlitTextures(surface->Texture().handle, params.GetRect(), new_surface->Texture().handle,
-                     new_surface->GetSurfaceParams().GetRect(), params.type,
-                     read_framebuffer.handle, draw_framebuffer.handle);
-        return new_surface;
-    }
+                     params.GetRect(), params.type, read_framebuffer.handle,
+                     draw_framebuffer.handle);
+    } else {
+        // When use_accurate_framebuffers setting is enabled, perform a more accurate surface copy,
+        // where pixels are reinterpreted as a new format (without conversion). This code path uses
+        // OpenGL PBOs and is quite slow.
 
-    // When using accurate framebuffers, always copy old data to new surface, regardless of format
-    if (Settings::values.use_accurate_framebuffers) {
         auto source_format = GetFormatTuple(params.pixel_format, params.component_type);
         auto dest_format = GetFormatTuple(new_params.pixel_format, new_params.component_type);
 
         size_t buffer_size = std::max(params.SizeInBytes(), new_params.SizeInBytes());
 
-        // Use a Pixel Buffer Object to download the previous texture and then upload it to the new
-        // one using the new format.
-        OGLBuffer pbo;
-        pbo.Create();
-
-        glBindBuffer(GL_PIXEL_PACK_BUFFER, pbo.handle);
+        glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo.handle);
         glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW_ARB);
         if (source_format.compressed) {
             glGetCompressedTextureImage(surface->Texture().handle, 0,
@@ -845,8 +846,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
                 // of the data in this case. Games like Super Mario Odyssey seem to hit this case
                 // when drawing, it re-uses the memory of a previous texture as a bigger framebuffer
                 // but it doesn't clear it beforehand, the texture is already full of zeros.
-                LOG_CRITICAL(HW_GPU, "Trying to upload extra texture data from the CPU during "
-                                     "reinterpretation but the texture is tiled.");
+                LOG_DEBUG(HW_GPU, "Trying to upload extra texture data from the CPU during "
+                                  "reinterpretation but the texture is tiled.");
             }
             size_t remaining_size = new_params.SizeInBytes() - params.SizeInBytes();
             std::vector<u8> data(remaining_size);
@@ -859,21 +860,38 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& surface,
 
         const auto& dest_rect{new_params.GetRect()};
 
-        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, pbo.handle);
+        glBindBuffer(GL_PIXEL_UNPACK_BUFFER, copy_pbo.handle);
         if (dest_format.compressed) {
-            glCompressedTexSubImage2D(
-                GL_TEXTURE_2D, 0, 0, 0, static_cast<GLsizei>(dest_rect.GetWidth()),
-                static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
-                static_cast<GLsizei>(new_params.SizeInBytes()), nullptr);
+            LOG_CRITICAL(HW_GPU, "Compressed copy is unimplemented!");
+            UNREACHABLE();
         } else {
-            glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
-                                static_cast<GLsizei>(dest_rect.GetWidth()),
-                                static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
-                                dest_format.type, nullptr);
+            switch (new_params.target) {
+            case SurfaceParams::SurfaceTarget::Texture1D:
+                glTextureSubImage1D(new_surface->Texture().handle, 0, 0,
+                                    static_cast<GLsizei>(dest_rect.GetWidth()), dest_format.format,
+                                    dest_format.type, nullptr);
+                break;
+            case SurfaceParams::SurfaceTarget::Texture2D:
+                glTextureSubImage2D(new_surface->Texture().handle, 0, 0, 0,
+                                    static_cast<GLsizei>(dest_rect.GetWidth()),
+                                    static_cast<GLsizei>(dest_rect.GetHeight()), dest_format.format,
+                                    dest_format.type, nullptr);
+                break;
+            case SurfaceParams::SurfaceTarget::Texture3D:
+            case SurfaceParams::SurfaceTarget::Texture2DArray:
+                glTextureSubImage3D(new_surface->Texture().handle, 0, 0, 0, 0,
+                                    static_cast<GLsizei>(dest_rect.GetWidth()),
+                                    static_cast<GLsizei>(dest_rect.GetHeight()),
+                                    static_cast<GLsizei>(new_params.depth), dest_format.format,
+                                    dest_format.type, nullptr);
+                break;
+            default:
+                LOG_CRITICAL(Render_OpenGL, "Unimplemented surface target={}",
+                             static_cast<u32>(params.target));
+                UNREACHABLE();
+            }
         }
         glBindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
-
-        pbo.Release();
     }
 
     return new_surface;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index aad75f200..57ea8593b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -109,6 +109,33 @@ struct SurfaceParams {
         Invalid = 4,
     };
 
+    enum class SurfaceTarget {
+        Texture1D,
+        Texture2D,
+        Texture3D,
+        Texture1DArray,
+        Texture2DArray,
+        TextureCubemap,
+    };
+
+    static SurfaceTarget SurfaceTargetFromTextureType(Tegra::Texture::TextureType texture_type) {
+        switch (texture_type) {
+        case Tegra::Texture::TextureType::Texture1D:
+            return SurfaceTarget::Texture1D;
+        case Tegra::Texture::TextureType::Texture2D:
+        case Tegra::Texture::TextureType::Texture2DNoMipmap:
+            return SurfaceTarget::Texture2D;
+        case Tegra::Texture::TextureType::Texture1DArray:
+            return SurfaceTarget::Texture1DArray;
+        case Tegra::Texture::TextureType::Texture2DArray:
+            return SurfaceTarget::Texture2DArray;
+        default:
+            LOG_CRITICAL(HW_GPU, "Unimplemented texture_type={}", static_cast<u32>(texture_type));
+            UNREACHABLE();
+            return SurfaceTarget::Texture2D;
+        }
+    }
+
     /**
      * Gets the compression factor for the specified PixelFormat. This applies to just the
      * "compressed width" and "compressed height", not the overall compression factor of a
@@ -270,6 +297,7 @@ struct SurfaceParams {
             return PixelFormat::ABGR8S;
         case Tegra::RenderTargetFormat::RGBA8_UINT:
             return PixelFormat::ABGR8UI;
+        case Tegra::RenderTargetFormat::BGRA8_SRGB:
         case Tegra::RenderTargetFormat::BGRA8_UNORM:
             return PixelFormat::BGRA8;
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
@@ -542,6 +570,7 @@ struct SurfaceParams {
         case Tegra::RenderTargetFormat::RGBA8_UNORM:
         case Tegra::RenderTargetFormat::RGBA8_SRGB:
         case Tegra::RenderTargetFormat::BGRA8_UNORM:
+        case Tegra::RenderTargetFormat::BGRA8_SRGB:
         case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
         case Tegra::RenderTargetFormat::R8_UNORM:
         case Tegra::RenderTargetFormat::RG16_UNORM:
@@ -635,15 +664,14 @@ struct SurfaceParams {
         ASSERT(width % compression_factor == 0);
         ASSERT(height % compression_factor == 0);
         return (width / compression_factor) * (height / compression_factor) *
-               GetFormatBpp(pixel_format) / CHAR_BIT;
+               GetFormatBpp(pixel_format) * depth / CHAR_BIT;
     }
 
     /// Creates SurfaceParams from a texture configuration
     static SurfaceParams CreateForTexture(const Tegra::Texture::FullTextureInfo& config);
 
     /// Creates SurfaceParams from a framebuffer configuration
-    static SurfaceParams CreateForFramebuffer(
-        const Tegra::Engines::Maxwell3D::Regs::RenderTargetConfig& config);
+    static SurfaceParams CreateForFramebuffer(size_t index);
 
     /// Creates SurfaceParams for a depth buffer configuration
     static SurfaceParams CreateForDepthBuffer(u32 zeta_width, u32 zeta_height,
@@ -652,8 +680,8 @@ struct SurfaceParams {
 
     /// Checks if surfaces are compatible for caching
     bool IsCompatibleSurface(const SurfaceParams& other) const {
-        return std::tie(pixel_format, type, cache_width, cache_height) ==
-               std::tie(other.pixel_format, other.type, other.cache_width, other.cache_height);
+        return std::tie(pixel_format, type, width, height) ==
+               std::tie(other.pixel_format, other.type, other.width, other.height);
     }
 
     VAddr addr;
@@ -664,12 +692,10 @@ struct SurfaceParams {
     SurfaceType type;
     u32 width;
     u32 height;
+    u32 depth;
     u32 unaligned_height;
     size_t size_in_bytes;
-
-    // Parameters used for caching only
-    u32 cache_width;
-    u32 cache_height;
+    SurfaceTarget target;
 };
 
 }; // namespace OpenGL
@@ -709,6 +735,10 @@ public:
         return texture;
     }
 
+    GLenum Target() const {
+        return gl_target;
+    }
+
     static constexpr unsigned int GetGLBytesPerPixel(SurfaceParams::PixelFormat format) {
         if (format == SurfaceParams::PixelFormat::Invalid)
             return 0;
@@ -724,14 +754,14 @@ public:
     void LoadGLBuffer();
     void FlushGLBuffer();
 
-    // Upload/Download data in gl_buffer in/to this surface's texture
+    // Upload data in gl_buffer to this surface's texture
     void UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
-    void DownloadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle);
 
 private:
     OGLTexture texture;
     std::vector<u8> gl_buffer;
     SurfaceParams params;
+    GLenum gl_target;
 };
 
 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
@@ -741,9 +771,11 @@ public:
     /// Get a surface based on the texture configuration
     Surface GetTextureSurface(const Tegra::Texture::FullTextureInfo& config);
 
-    /// Get the color and depth surfaces based on the framebuffer configuration
-    SurfaceSurfaceRect_Tuple GetFramebufferSurfaces(bool using_color_fb, bool using_depth_fb,
-                                                    bool preserve_contents);
+    /// Get the depth surface based on the framebuffer configuration
+    Surface GetDepthBufferSurface(bool preserve_contents);
+
+    /// Get the color surface based on the framebuffer configuration and the specified render target
+    Surface GetColorBufferSurface(size_t index, bool preserve_contents);
 
     /// Flushes the surface to Switch memory
     void FlushSurface(const Surface& surface);
@@ -774,6 +806,10 @@ private:
 
     OGLFramebuffer read_framebuffer;
     OGLFramebuffer draw_framebuffer;
+
+    /// Use a Pixel Buffer Object to download the previous texture and then upload it to the new one
+    /// using the new format.
+    OGLBuffer copy_pbo;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 7e4b85ac3..61080f5cc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -13,8 +13,8 @@ namespace OpenGL {
 
 /// Gets the address for the specified shader stage program
 static VAddr GetShaderAddress(Maxwell::ShaderProgram program) {
-    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
-    auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    const auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
     return *gpu.memory_manager.GpuToCpuAddress(gpu.regs.code_address.CodeAddress() +
                                                shader_config.offset);
 }
@@ -86,7 +86,7 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
 }
 
 GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& buffer) {
-    auto search{resource_cache.find(buffer.GetHash())};
+    const auto search{resource_cache.find(buffer.GetHash())};
     if (search == resource_cache.end()) {
         const GLuint index{
             glGetProgramResourceIndex(program.handle, GL_UNIFORM_BLOCK, buffer.GetName().c_str())};
@@ -98,7 +98,7 @@ GLuint CachedShader::GetProgramResourceIndex(const GLShader::ConstBufferEntry& b
 }
 
 GLint CachedShader::GetUniformLocation(const GLShader::SamplerEntry& sampler) {
-    auto search{uniform_cache.find(sampler.GetHash())};
+    const auto search{uniform_cache.find(sampler.GetHash())};
     if (search == uniform_cache.end()) {
         const GLint index{glGetUniformLocation(program.handle, sampler.GetName().c_str())};
         uniform_cache[sampler.GetHash()] = index;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 841647ebe..2d56370c7 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -113,7 +113,7 @@ private:
 
     /// Scans a range of code for labels and determines the exit method.
     ExitMethod Scan(u32 begin, u32 end, std::set<u32>& labels) {
-        auto [iter, inserted] =
+        const auto [iter, inserted] =
             exit_method_map.emplace(std::make_pair(begin, end), ExitMethod::Undetermined);
         ExitMethod& exit_method = iter->second;
         if (!inserted)
@@ -131,22 +131,22 @@ private:
                     if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
                         return exit_method = ExitMethod::AlwaysEnd;
                     } else {
-                        ExitMethod not_met = Scan(offset + 1, end, labels);
+                        const ExitMethod not_met = Scan(offset + 1, end, labels);
                         return exit_method = ParallelExit(ExitMethod::AlwaysEnd, not_met);
                     }
                 }
                 case OpCode::Id::BRA: {
-                    u32 target = offset + instr.bra.GetBranchTarget();
+                    const u32 target = offset + instr.bra.GetBranchTarget();
                     labels.insert(target);
-                    ExitMethod no_jmp = Scan(offset + 1, end, labels);
-                    ExitMethod jmp = Scan(target, end, labels);
+                    const ExitMethod no_jmp = Scan(offset + 1, end, labels);
+                    const ExitMethod jmp = Scan(target, end, labels);
                     return exit_method = ParallelExit(no_jmp, jmp);
                 }
                 case OpCode::Id::SSY: {
                     // The SSY instruction uses a similar encoding as the BRA instruction.
                     ASSERT_MSG(instr.bra.constant_buffer == 0,
                                "Constant buffer SSY is not supported");
-                    u32 target = offset + instr.bra.GetBranchTarget();
+                    const u32 target = offset + instr.bra.GetBranchTarget();
                     labels.insert(target);
                     // Continue scanning for an exit method.
                     break;
@@ -346,8 +346,8 @@ public:
      */
     void SetRegisterToInputAttibute(const Register& reg, u64 elem, Attribute::Index attribute,
                                     const Tegra::Shader::IpaMode& input_mode) {
-        std::string dest = GetRegisterAsFloat(reg);
-        std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
+        const std::string dest = GetRegisterAsFloat(reg);
+        const std::string src = GetInputAttribute(attribute, input_mode) + GetSwizzle(elem);
         shader.AddLine(dest + " = " + src + ';');
     }
 
@@ -359,8 +359,8 @@ public:
      * @param reg The register to use as the source value.
      */
     void SetOutputAttributeToRegister(Attribute::Index attribute, u64 elem, const Register& reg) {
-        std::string dest = GetOutputAttribute(attribute);
-        std::string src = GetRegisterAsFloat(reg);
+        const std::string dest = GetOutputAttribute(attribute);
+        const std::string src = GetRegisterAsFloat(reg);
 
         if (!dest.empty()) {
             // Can happen with unknown/unimplemented output attributes, in which case we ignore the
@@ -393,9 +393,9 @@ public:
                                    GLSLRegister::Type type) {
         declr_const_buffers[cbuf_index].MarkAsUsedIndirect(cbuf_index, stage);
 
-        std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
-        std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
-                            final_offset + " % 4]";
+        const std::string final_offset = fmt::format("({} + {})", index_str, offset / 4);
+        const std::string value = 'c' + std::to_string(cbuf_index) + '[' + final_offset + " / 4][" +
+                                  final_offset + " % 4]";
 
         if (type == GLSLRegister::Type::Float) {
             return value;
@@ -443,13 +443,12 @@ public:
         }
         declarations.AddNewLine();
 
-        // Append the sampler2D array for the used textures.
-        const size_t num_samplers = used_samplers.size();
-        if (num_samplers > 0) {
-            declarations.AddLine("uniform sampler2D " + SamplerEntry::GetArrayName(stage) + '[' +
-                                 std::to_string(num_samplers) + "];");
-            declarations.AddNewLine();
+        const auto& samplers = GetSamplers();
+        for (const auto& sampler : samplers) {
+            declarations.AddLine("uniform " + sampler.GetTypeString() + ' ' + sampler.GetName() +
+                                 ';');
         }
+        declarations.AddNewLine();
     }
 
     /// Returns a list of constant buffer declarations
@@ -461,27 +460,29 @@ public:
     }
 
     /// Returns a list of samplers used in the shader
-    std::vector<SamplerEntry> GetSamplers() const {
+    const std::vector<SamplerEntry>& GetSamplers() const {
         return used_samplers;
     }
 
     /// Returns the GLSL sampler used for the input shader sampler, and creates a new one if
     /// necessary.
-    std::string AccessSampler(const Sampler& sampler) {
-        size_t offset = static_cast<size_t>(sampler.index.Value());
+    std::string AccessSampler(const Sampler& sampler, Tegra::Shader::TextureType type,
+                              bool is_array) {
+        const size_t offset = static_cast<size_t>(sampler.index.Value());
 
         // If this sampler has already been used, return the existing mapping.
-        auto itr =
+        const auto itr =
             std::find_if(used_samplers.begin(), used_samplers.end(),
                          [&](const SamplerEntry& entry) { return entry.GetOffset() == offset; });
 
         if (itr != used_samplers.end()) {
+            ASSERT(itr->GetType() == type && itr->IsArray() == is_array);
             return itr->GetName();
         }
 
         // Otherwise create a new mapping for this sampler
-        size_t next_index = used_samplers.size();
-        SamplerEntry entry{stage, offset, next_index};
+        const size_t next_index = used_samplers.size();
+        const SamplerEntry entry{stage, offset, next_index, type, is_array};
         used_samplers.emplace_back(entry);
         return entry.GetName();
     }
@@ -698,7 +699,7 @@ private:
         };
 
         bool IsColorComponentOutputEnabled(u32 render_target, u32 component) const {
-            u32 bit = render_target * 4 + component;
+            const u32 bit = render_target * 4 + component;
             return enabled_color_outputs & (1 << bit);
         }
     };
@@ -706,7 +707,7 @@ private:
 
     /// Gets the Subroutine object corresponding to the specified address.
     const Subroutine& GetSubroutine(u32 begin, u32 end) const {
-        auto iter = subroutines.find(Subroutine{begin, end, suffix});
+        const auto iter = subroutines.find(Subroutine{begin, end, suffix});
         ASSERT(iter != subroutines.end());
         return *iter;
     }
@@ -722,8 +723,8 @@ private:
     }
 
     /// Generates code representing a texture sampler.
-    std::string GetSampler(const Sampler& sampler) {
-        return regs.AccessSampler(sampler);
+    std::string GetSampler(const Sampler& sampler, Tegra::Shader::TextureType type, bool is_array) {
+        return regs.AccessSampler(sampler, type, is_array);
     }
 
     /**
@@ -751,7 +752,7 @@ private:
         // Can't assign to the constant predicate.
         ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
 
-        std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
+        const std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
         shader.AddLine(variable + " = " + value + ';');
         declr_predicates.insert(std::move(variable));
     }
@@ -1022,7 +1023,7 @@ private:
             // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
             for (u32 component = 0; component < 4; ++component) {
                 if (header.IsColorComponentOutputEnabled(render_target, component)) {
-                    shader.AddLine(fmt::format("color[{}][{}] = {};", render_target, component,
+                    shader.AddLine(fmt::format("FragColor{}[{}] = {};", render_target, component,
                                                regs.GetRegisterAsFloat(current_reg)));
                     ++current_reg;
                 }
@@ -1032,7 +1033,11 @@ private:
         if (header.writes_depth) {
             // The depth output is always 2 registers after the last color output, and current_reg
             // already contains one past the last color register.
-            shader.AddLine("gl_FragDepth = " + regs.GetRegisterAsFloat(current_reg + 1) + ';');
+
+            shader.AddLine(
+                "gl_FragDepth = " +
+                regs.GetRegisterAsFloat(static_cast<Tegra::Shader::Register>(current_reg) + 1) +
+                ';');
         }
     }
 
@@ -1434,7 +1439,7 @@ private:
                 if (instr.alu_integer.negate_b)
                     op_b = "-(" + op_b + ')';
 
-                std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
+                const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
 
                 regs.SetRegisterToInteger(instr.gpr0, true, 0,
                                           "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
@@ -1452,7 +1457,7 @@ private:
             case OpCode::Id::SEL_C:
             case OpCode::Id::SEL_R:
             case OpCode::Id::SEL_IMM: {
-                std::string condition =
+                const std::string condition =
                     GetPredicateCondition(instr.sel.pred, instr.sel.neg_pred != 0);
                 regs.SetRegisterToInteger(instr.gpr0, true, 0,
                                           '(' + condition + ") ? " + op_a + " : " + op_b, 1, 1);
@@ -1474,8 +1479,9 @@ private:
             case OpCode::Id::LOP3_C:
             case OpCode::Id::LOP3_R:
             case OpCode::Id::LOP3_IMM: {
-                std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
+                const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
                 std::string lut;
+
                 if (opcode->GetId() == OpCode::Id::LOP3_R) {
                     lut = '(' + std::to_string(instr.alu.lop3.GetImmLut28()) + ')';
                 } else {
@@ -1490,15 +1496,82 @@ private:
             case OpCode::Id::IMNMX_IMM: {
                 ASSERT_MSG(instr.imnmx.exchange == Tegra::Shader::IMinMaxExchange::None,
                            "Unimplemented");
-                std::string condition =
+                const std::string condition =
                     GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
-                std::string parameters = op_a + ',' + op_b;
+                const std::string parameters = op_a + ',' + op_b;
                 regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
                                           '(' + condition + ") ? min(" + parameters + ") : max(" +
                                               parameters + ')',
                                           1, 1);
                 break;
             }
+            case OpCode::Id::LEA_R2:
+            case OpCode::Id::LEA_R1:
+            case OpCode::Id::LEA_IMM:
+            case OpCode::Id::LEA_RZ:
+            case OpCode::Id::LEA_HI: {
+                std::string op_a;
+                std::string op_b;
+                std::string op_c;
+
+                switch (opcode->GetId()) {
+                case OpCode::Id::LEA_R2: {
+                    op_a = regs.GetRegisterAsInteger(instr.gpr20);
+                    op_b = regs.GetRegisterAsInteger(instr.gpr39);
+                    op_c = std::to_string(instr.lea.r2.entry_a);
+                    break;
+                }
+
+                case OpCode::Id::LEA_R1: {
+                    const bool neg = instr.lea.r1.neg != 0;
+                    op_a = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_a = "-(" + op_a + ')';
+                    op_b = regs.GetRegisterAsInteger(instr.gpr20);
+                    op_c = std::to_string(instr.lea.r1.entry_a);
+                    break;
+                }
+
+                case OpCode::Id::LEA_IMM: {
+                    const bool neg = instr.lea.imm.neg != 0;
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_b = "-(" + op_b + ')';
+                    op_a = std::to_string(instr.lea.imm.entry_a);
+                    op_c = std::to_string(instr.lea.imm.entry_b);
+                    break;
+                }
+
+                case OpCode::Id::LEA_RZ: {
+                    const bool neg = instr.lea.rz.neg != 0;
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    if (neg)
+                        op_b = "-(" + op_b + ')';
+                    op_a = regs.GetUniform(instr.lea.rz.cb_index, instr.lea.rz.cb_offset,
+                                           GLSLRegister::Type::Integer);
+                    op_c = std::to_string(instr.lea.rz.entry_a);
+
+                    break;
+                }
+
+                case OpCode::Id::LEA_HI:
+                default: {
+                    op_b = regs.GetRegisterAsInteger(instr.gpr8);
+                    op_a = std::to_string(instr.lea.imm.entry_a);
+                    op_c = std::to_string(instr.lea.imm.entry_b);
+                    LOG_CRITICAL(HW_GPU, "Unhandled LEA subinstruction: {}", opcode->GetName());
+                    UNREACHABLE();
+                }
+                }
+                if (instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex)) {
+                    LOG_ERROR(HW_GPU, "Unhandled LEA Predicate");
+                    UNREACHABLE();
+                }
+                const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
+
+                break;
+            }
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled ArithmeticInteger instruction: {}",
                              opcode->GetName());
@@ -1509,7 +1582,7 @@ private:
             break;
         }
         case OpCode::Type::Ffma: {
-            std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+            const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
             std::string op_b = instr.ffma.negate_b ? "-" : "";
             std::string op_c = instr.ffma.negate_c ? "-" : "";
 
@@ -1719,7 +1792,7 @@ private:
                 shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
                                " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
 
-                std::string op_a =
+                const std::string op_a =
                     regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 0, "index",
                                             GLSLRegister::Type::Float);
 
@@ -1729,7 +1802,7 @@ private:
                     break;
 
                 case Tegra::Shader::UniformType::Double: {
-                    std::string op_b =
+                    const std::string op_b =
                         regs.GetUniformIndirect(instr.cbuf36.index, instr.cbuf36.offset + 4,
                                                 "index", GLSLRegister::Type::Float);
                     regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
@@ -1753,17 +1826,74 @@ private:
                 break;
             }
             case OpCode::Id::TEX: {
-                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                const std::string sampler = GetSampler(instr.sampler);
-                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+                ASSERT_MSG(instr.tex.array == 0, "TEX arrays unimplemented");
+                Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
+                std::string coord;
+
+                switch (texture_type) {
+                case Tegra::Shader::TextureType::Texture1D: {
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    coord = "float coords = " + x + ';';
+                    break;
+                }
+                case Tegra::Shader::TextureType::Texture2D: {
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    break;
+                }
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+                                 static_cast<u32>(texture_type));
+                    UNREACHABLE();
+
+                    // Fallback to interpreting as a 2D texture for now
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    texture_type = Tegra::Shader::TextureType::Texture2D;
+                }
+                // TODO: make sure coordinates are always indexed to gpr8 and gpr20 is always bias
+                // or lod.
+                const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
+
+                const std::string sampler = GetSampler(instr.sampler, texture_type, false);
                 // Add an extra scope and declare the texture coords inside to prevent
                 // overwriting them in case they are used as outputs of the texs instruction.
+
                 shader.AddLine("{");
                 ++shader.scope;
                 shader.AddLine(coord);
-                const std::string texture = "texture(" + sampler + ", coords)";
+                std::string texture;
 
+                switch (instr.tex.process_mode) {
+                case Tegra::Shader::TextureProcessMode::None: {
+                    texture = "texture(" + sampler + ", coords)";
+                    break;
+                }
+                case Tegra::Shader::TextureProcessMode::LZ: {
+                    texture = "textureLod(" + sampler + ", coords, 0.0)";
+                    break;
+                }
+                case Tegra::Shader::TextureProcessMode::LB:
+                case Tegra::Shader::TextureProcessMode::LBA: {
+                    // TODO: Figure if A suffix changes the equation at all.
+                    texture = "texture(" + sampler + ", coords, " + op_c + ')';
+                    break;
+                }
+                case Tegra::Shader::TextureProcessMode::LL:
+                case Tegra::Shader::TextureProcessMode::LLA: {
+                    // TODO: Figure if A suffix changes the equation at all.
+                    texture = "textureLod(" + sampler + ", coords, " + op_c + ')';
+                    break;
+                }
+                default: {
+                    texture = "texture(" + sampler + ", coords)";
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture process mode {}",
+                                 static_cast<u32>(instr.tex.process_mode.Value()));
+                    UNREACHABLE();
+                }
+                }
                 size_t dest_elem{};
                 for (size_t elem = 0; elem < 4; ++elem) {
                     if (!instr.tex.IsComponentEnabled(elem)) {
@@ -1778,20 +1908,65 @@ private:
                 break;
             }
             case OpCode::Id::TEXS: {
-                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
-                const std::string sampler = GetSampler(instr.sampler);
-                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+                std::string coord;
+                Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
+                bool is_array{instr.texs.IsArrayTexture()};
+
+                switch (texture_type) {
+                case Tegra::Shader::TextureType::Texture2D: {
+                    if (is_array) {
+                        const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                        const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+                        coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
+                    } else {
+                        const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                        const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+                        coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    }
+                    break;
+                }
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+                                 static_cast<u32>(texture_type));
+                    UNREACHABLE();
 
+                    // Fallback to interpreting as a 2D texture for now
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
+                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    texture_type = Tegra::Shader::TextureType::Texture2D;
+                    is_array = false;
+                }
+                const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
                 const std::string texture = "texture(" + sampler + ", coords)";
                 WriteTexsInstruction(instr, coord, texture);
                 break;
             }
             case OpCode::Id::TLDS: {
-                const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8);
-                const std::string op_b = regs.GetRegisterAsInteger(instr.gpr20);
-                const std::string sampler = GetSampler(instr.sampler);
-                const std::string coord = "ivec2 coords = ivec2(" + op_a + ", " + op_b + ");";
+                ASSERT(instr.tlds.GetTextureType() == Tegra::Shader::TextureType::Texture2D);
+                ASSERT(instr.tlds.IsArrayTexture() == false);
+                std::string coord;
+
+                switch (instr.tlds.GetTextureType()) {
+                case Tegra::Shader::TextureType::Texture2D: {
+                    if (instr.tlds.IsArrayTexture()) {
+                        LOG_CRITICAL(HW_GPU, "Unhandled 2d array texture");
+                        UNREACHABLE();
+                    } else {
+                        const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
+                        const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
+                        coord = "ivec2 coords = ivec2(" + x + ", " + y + ");";
+                    }
+                    break;
+                }
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+                                 static_cast<u32>(instr.tlds.GetTextureType()));
+                    UNREACHABLE();
+                }
+                const std::string sampler = GetSampler(instr.sampler, instr.tlds.GetTextureType(),
+                                                       instr.tlds.IsArrayTexture());
                 const std::string texture = "texelFetch(" + sampler + ", coords, 0)";
                 WriteTexsInstruction(instr, coord, texture);
                 break;
@@ -1799,12 +1974,12 @@ private:
             case OpCode::Id::TLD4: {
                 ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
                 ASSERT(instr.tld4.array == 0);
-                std::string coord{};
+                std::string coord;
 
                 switch (instr.tld4.texture_type) {
                 case Tegra::Shader::TextureType::Texture2D: {
-                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
                     coord = "vec2 coords = vec2(" + x + ", " + y + ");";
                     break;
                 }
@@ -1814,7 +1989,8 @@ private:
                     UNREACHABLE();
                 }
 
-                const std::string sampler = GetSampler(instr.sampler);
+                const std::string sampler =
+                    GetSampler(instr.sampler, instr.tld4.texture_type, false);
                 // Add an extra scope and declare the texture coords inside to prevent
                 // overwriting them in case they are used as outputs of the texs instruction.
                 shader.AddLine("{");
@@ -1840,13 +2016,82 @@ private:
                 const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
                 const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
                 // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
-                const std::string sampler = GetSampler(instr.sampler);
+                const std::string sampler =
+                    GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
                 const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
                 const std::string texture = "textureGather(" + sampler + ", coords, " +
                                             std::to_string(instr.tld4s.component) + ')';
                 WriteTexsInstruction(instr, coord, texture);
                 break;
             }
+            case OpCode::Id::TXQ: {
+                // TODO: the new commits on the texture refactor, change the way samplers work.
+                // Sadly, not all texture instructions specify the type of texture their sampler
+                // uses. This must be fixed at a later instance.
+                const std::string sampler =
+                    GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false);
+                switch (instr.txq.query_type) {
+                case Tegra::Shader::TextureQueryType::Dimension: {
+                    const std::string texture = "textureQueryLevels(" + sampler + ')';
+                    regs.SetRegisterToInteger(instr.gpr0, true, 0, texture, 1, 1);
+                    break;
+                }
+                default: {
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture query type: {}",
+                                 static_cast<u32>(instr.txq.query_type.Value()));
+                    UNREACHABLE();
+                }
+                }
+                break;
+            }
+            case OpCode::Id::TMML: {
+                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                const bool is_array = instr.tmml.array != 0;
+                auto texture_type = instr.tmml.texture_type.Value();
+                const std::string sampler = GetSampler(instr.sampler, texture_type, is_array);
+
+                // TODO: add coordinates for different samplers once other texture types are
+                // implemented.
+                std::string coord;
+                switch (texture_type) {
+                case Tegra::Shader::TextureType::Texture1D: {
+                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    coord = "float coords = " + x + ';';
+                    break;
+                }
+                case Tegra::Shader::TextureType::Texture2D: {
+                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    break;
+                }
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unhandled texture type {}",
+                                 static_cast<u32>(texture_type));
+                    UNREACHABLE();
+
+                    // Fallback to interpreting as a 2D texture for now
+                    std::string x = regs.GetRegisterAsFloat(instr.gpr8);
+                    std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    texture_type = Tegra::Shader::TextureType::Texture2D;
+                }
+                // Add an extra scope and declare the texture coords inside to prevent
+                // overwriting them in case they are used as outputs of the texs instruction.
+                shader.AddLine('{');
+                ++shader.scope;
+                shader.AddLine(coord);
+                const std::string texture = "textureQueryLod(" + sampler + ", coords)";
+                const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);";
+                shader.AddLine(tmp);
+
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
+                regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
+                --shader.scope;
+                shader.AddLine('}');
+                break;
+            }
             default: {
                 LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {}", opcode->GetName());
                 UNREACHABLE();
@@ -1886,12 +2131,12 @@ private:
             // We can't use the constant predicate as destination.
             ASSERT(instr.fsetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
 
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.fsetp.pred39, instr.fsetp.neg_pred != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.fsetp.op);
+            const std::string combiner = GetPredicateCombiner(instr.fsetp.op);
 
-            std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
+            const std::string predicate = GetPredicateComparison(instr.fsetp.cond, op_a, op_b);
             // Set the primary predicate to the result of Predicate OP SecondPredicate
             SetPredicate(instr.fsetp.pred3,
                          '(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1905,7 +2150,8 @@ private:
             break;
         }
         case OpCode::Type::IntegerSetPredicate: {
-            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
+            const std::string op_a =
+                regs.GetRegisterAsInteger(instr.gpr8, 0, instr.isetp.is_signed);
             std::string op_b;
 
             if (instr.is_b_imm) {
@@ -1922,12 +2168,12 @@ private:
             // We can't use the constant predicate as destination.
             ASSERT(instr.isetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
 
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.isetp.pred39, instr.isetp.neg_pred != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.isetp.op);
+            const std::string combiner = GetPredicateCombiner(instr.isetp.op);
 
-            std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
+            const std::string predicate = GetPredicateComparison(instr.isetp.cond, op_a, op_b);
             // Set the primary predicate to the result of Predicate OP SecondPredicate
             SetPredicate(instr.isetp.pred3,
                          '(' + predicate + ") " + combiner + " (" + second_pred + ')');
@@ -1940,21 +2186,45 @@ private:
             }
             break;
         }
+        case OpCode::Type::PredicateSetRegister: {
+            const std::string op_a =
+                GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
+            const std::string op_b =
+                GetPredicateCondition(instr.pset.pred29, instr.pset.neg_pred29 != 0);
+
+            const std::string second_pred =
+                GetPredicateCondition(instr.pset.pred39, instr.pset.neg_pred39 != 0);
+
+            const std::string combiner = GetPredicateCombiner(instr.pset.op);
+
+            const std::string predicate =
+                '(' + op_a + ") " + GetPredicateCombiner(instr.pset.cond) + " (" + op_b + ')';
+            const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
+            if (instr.pset.bf == 0) {
+                const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
+                regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1);
+            } else {
+                const std::string value = '(' + result + ") ? 1.0 : 0.0";
+                regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
+            }
+
+            break;
+        }
         case OpCode::Type::PredicateSetPredicate: {
-            std::string op_a =
+            const std::string op_a =
                 GetPredicateCondition(instr.psetp.pred12, instr.psetp.neg_pred12 != 0);
-            std::string op_b =
+            const std::string op_b =
                 GetPredicateCondition(instr.psetp.pred29, instr.psetp.neg_pred29 != 0);
 
             // We can't use the constant predicate as destination.
             ASSERT(instr.psetp.pred3 != static_cast<u64>(Pred::UnusedIndex));
 
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.psetp.pred39, instr.psetp.neg_pred39 != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.psetp.op);
+            const std::string combiner = GetPredicateCombiner(instr.psetp.op);
 
-            std::string predicate =
+            const std::string predicate =
                 '(' + op_a + ") " + GetPredicateCombiner(instr.psetp.cond) + " (" + op_b + ')';
 
             // Set the primary predicate to the result of Predicate OP SecondPredicate
@@ -1980,7 +2250,7 @@ private:
             std::string op_b = instr.fset.neg_b ? "-" : "";
 
             if (instr.is_b_imm) {
-                std::string imm = GetImmediate19(instr);
+                const std::string imm = GetImmediate19(instr);
                 if (instr.fset.neg_imm)
                     op_b += "(-" + imm + ')';
                 else
@@ -2000,13 +2270,14 @@ private:
 
             // The fset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
             // condition is true, and to 0 otherwise.
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.fset.pred39, instr.fset.neg_pred != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.fset.op);
+            const std::string combiner = GetPredicateCombiner(instr.fset.op);
 
-            std::string predicate = "((" + GetPredicateComparison(instr.fset.cond, op_a, op_b) +
-                                    ") " + combiner + " (" + second_pred + "))";
+            const std::string predicate = "((" +
+                                          GetPredicateComparison(instr.fset.cond, op_a, op_b) +
+                                          ") " + combiner + " (" + second_pred + "))";
 
             if (instr.fset.bf) {
                 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -2017,7 +2288,7 @@ private:
             break;
         }
         case OpCode::Type::IntegerSet: {
-            std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
+            const std::string op_a = regs.GetRegisterAsInteger(instr.gpr8, 0, instr.iset.is_signed);
 
             std::string op_b;
 
@@ -2034,13 +2305,14 @@ private:
 
             // The iset instruction sets a register to 1.0 or -1 (depending on the bf bit) if the
             // condition is true, and to 0 otherwise.
-            std::string second_pred =
+            const std::string second_pred =
                 GetPredicateCondition(instr.iset.pred39, instr.iset.neg_pred != 0);
 
-            std::string combiner = GetPredicateCombiner(instr.iset.op);
+            const std::string combiner = GetPredicateCombiner(instr.iset.op);
 
-            std::string predicate = "((" + GetPredicateComparison(instr.iset.cond, op_a, op_b) +
-                                    ") " + combiner + " (" + second_pred + "))";
+            const std::string predicate = "((" +
+                                          GetPredicateComparison(instr.iset.cond, op_a, op_b) +
+                                          ") " + combiner + " (" + second_pred + "))";
 
             if (instr.iset.bf) {
                 regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
@@ -2190,7 +2462,7 @@ private:
             case OpCode::Id::BRA: {
                 ASSERT_MSG(instr.bra.constant_buffer == 0,
                            "BRA with constant buffers are not implemented");
-                u32 target = offset + instr.bra.GetBranchTarget();
+                const u32 target = offset + instr.bra.GetBranchTarget();
                 shader.AddLine("{ jmp_to = " + std::to_string(target) + "u; break; }");
                 break;
             }
@@ -2214,7 +2486,7 @@ private:
                 // has a similar structure to the BRA opcode.
                 ASSERT_MSG(instr.bra.constant_buffer == 0, "Constant buffer SSY is not supported");
 
-                u32 target = offset + instr.bra.GetBranchTarget();
+                const u32 target = offset + instr.bra.GetBranchTarget();
                 EmitPushToSSYStack(target);
                 break;
             }
@@ -2308,10 +2580,10 @@ private:
                     shader.AddLine("case " + std::to_string(label) + "u: {");
                     ++shader.scope;
 
-                    auto next_it = labels.lower_bound(label + 1);
-                    u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
+                    const auto next_it = labels.lower_bound(label + 1);
+                    const u32 next_label = next_it == labels.end() ? subroutine.end : *next_it;
 
-                    u32 compile_end = CompileRange(label, next_label);
+                    const u32 compile_end = CompileRange(label, next_label);
                     if (compile_end > next_label && compile_end != PROGRAM_END) {
                         // This happens only when there is a label inside a IF/LOOP block
                         shader.AddLine(" jmp_to = " + std::to_string(compile_end) + "u; break; }");
@@ -2374,7 +2646,8 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
                                                 Maxwell3D::Regs::ShaderStage stage,
                                                 const std::string& suffix) {
     try {
-        auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
+        const auto subroutines =
+            ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
         GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
         return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
     } catch (const DecompileFail& exception) {
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index e1b1a9d73..b0466c18f 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -88,7 +88,14 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
             .get_value_or({});
     out += R"(
 in vec4 position;
-layout(location = 0) out vec4 color[8];
+layout(location = 0) out vec4 FragColor0;
+layout(location = 1) out vec4 FragColor1;
+layout(location = 2) out vec4 FragColor2;
+layout(location = 3) out vec4 FragColor3;
+layout(location = 4) out vec4 FragColor4;
+layout(location = 5) out vec4 FragColor5;
+layout(location = 6) out vec4 FragColor6;
+layout(location = 7) out vec4 FragColor7;
 
 layout (std140) uniform fs_config {
     vec4 viewport_flip;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index cbb2090ea..a43e2997b 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -9,6 +9,7 @@
 #include <vector>
 
 #include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
 
 namespace OpenGL::GLShader {
 
@@ -73,8 +74,9 @@ class SamplerEntry {
     using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 
 public:
-    SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index)
-        : offset(offset), stage(stage), sampler_index(index) {}
+    SamplerEntry(Maxwell::ShaderStage stage, size_t offset, size_t index,
+                 Tegra::Shader::TextureType type, bool is_array)
+        : offset(offset), stage(stage), sampler_index(index), type(type), is_array(is_array) {}
 
     size_t GetOffset() const {
         return offset;
@@ -89,8 +91,41 @@ public:
     }
 
     std::string GetName() const {
-        return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '[' +
-               std::to_string(sampler_index) + ']';
+        return std::string(TextureSamplerNames[static_cast<size_t>(stage)]) + '_' +
+               std::to_string(sampler_index);
+    }
+
+    std::string GetTypeString() const {
+        using Tegra::Shader::TextureType;
+        std::string glsl_type;
+
+        switch (type) {
+        case TextureType::Texture1D:
+            glsl_type = "sampler1D";
+            break;
+        case TextureType::Texture2D:
+            glsl_type = "sampler2D";
+            break;
+        case TextureType::Texture3D:
+            glsl_type = "sampler3D";
+            break;
+        case TextureType::TextureCube:
+            glsl_type = "samplerCube";
+            break;
+        default:
+            UNIMPLEMENTED();
+        }
+        if (is_array)
+            glsl_type += "Array";
+        return glsl_type;
+    }
+
+    Tegra::Shader::TextureType GetType() const {
+        return type;
+    }
+
+    bool IsArray() const {
+        return is_array;
     }
 
     u32 GetHash() const {
@@ -105,11 +140,14 @@ private:
     static constexpr std::array<const char*, Maxwell::MaxShaderStage> TextureSamplerNames = {
         "tex_vs", "tex_tessc", "tex_tesse", "tex_gs", "tex_fs",
     };
+
     /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
     /// instruction.
     size_t offset;
-    Maxwell::ShaderStage stage; ///< Shader stage where this sampler was used.
-    size_t sampler_index;       ///< Value used to index into the generated GLSL sampler array.
+    Maxwell::ShaderStage stage;      ///< Shader stage where this sampler was used.
+    size_t sampler_index;            ///< Value used to index into the generated GLSL sampler array.
+    Tegra::Shader::TextureType type; ///< The type used to sample this texture (Texture2D, etc)
+    bool is_array; ///< Whether the texture is being sampled as an array texture or not.
 };
 
 struct ShaderEntries {
diff --git a/src/video_core/renderer_opengl/gl_shader_util.cpp b/src/video_core/renderer_opengl/gl_shader_util.cpp
index 5781d9d16..5f3fe067e 100644
--- a/src/video_core/renderer_opengl/gl_shader_util.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_util.cpp
@@ -25,7 +25,7 @@ GLuint LoadShader(const char* source, GLenum type) {
     default:
         UNREACHABLE();
     }
-    GLuint shader_id = glCreateShader(type);
+    const GLuint shader_id = glCreateShader(type);
     glShaderSource(shader_id, 1, &source, nullptr);
     LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type);
     glCompileShader(shader_id);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 60a4defd1..6f70deb96 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -200,9 +200,9 @@ void OpenGLState::Apply() const {
         const auto& texture_unit = texture_units[i];
         const auto& cur_state_texture_unit = cur_state.texture_units[i];
 
-        if (texture_unit.texture_2d != cur_state_texture_unit.texture_2d) {
+        if (texture_unit.texture != cur_state_texture_unit.texture) {
             glActiveTexture(TextureUnits::MaxwellTexture(static_cast<int>(i)).Enum());
-            glBindTexture(GL_TEXTURE_2D, texture_unit.texture_2d);
+            glBindTexture(texture_unit.target, texture_unit.texture);
         }
         if (texture_unit.sampler != cur_state_texture_unit.sampler) {
             glBindSampler(static_cast<GLuint>(i), texture_unit.sampler);
@@ -214,7 +214,7 @@ void OpenGLState::Apply() const {
             texture_unit.swizzle.a != cur_state_texture_unit.swizzle.a) {
             std::array<GLint, 4> mask = {texture_unit.swizzle.r, texture_unit.swizzle.g,
                                          texture_unit.swizzle.b, texture_unit.swizzle.a};
-            glTexParameteriv(GL_TEXTURE_2D, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
+            glTexParameteriv(texture_unit.target, GL_TEXTURE_SWIZZLE_RGBA, mask.data());
         }
     }
 
@@ -287,7 +287,7 @@ void OpenGLState::Apply() const {
 
 OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
     for (auto& unit : texture_units) {
-        if (unit.texture_2d == handle) {
+        if (unit.texture == handle) {
             unit.Unbind();
         }
     }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 46e96a97d..e3e24b9e7 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -94,8 +94,9 @@ public:
 
     // 3 texture units - one for each that is used in PICA fragment shader emulation
     struct TextureUnit {
-        GLuint texture_2d; // GL_TEXTURE_BINDING_2D
-        GLuint sampler;    // GL_SAMPLER_BINDING
+        GLuint texture; // GL_TEXTURE_BINDING_2D
+        GLuint sampler; // GL_SAMPLER_BINDING
+        GLenum target;
         struct {
             GLint r; // GL_TEXTURE_SWIZZLE_R
             GLint g; // GL_TEXTURE_SWIZZLE_G
@@ -104,7 +105,7 @@ public:
         } swizzle;
 
         void Unbind() {
-            texture_2d = 0;
+            texture = 0;
             swizzle.r = GL_RED;
             swizzle.g = GL_GREEN;
             swizzle.b = GL_BLUE;
@@ -114,6 +115,7 @@ public:
         void Reset() {
             Unbind();
             sampler = 0;
+            target = GL_TEXTURE_2D;
         }
     };
     std::array<TextureUnit, 32> texture_units;
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index e565afcee..aadf68f16 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -29,7 +29,7 @@ OGLStreamBuffer::OGLStreamBuffer(GLenum target, GLsizeiptr size, bool prefer_coh
     if (GLAD_GL_ARB_buffer_storage) {
         persistent = true;
         coherent = prefer_coherent;
-        GLbitfield flags =
+        const GLbitfield flags =
             GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
         glBufferStorage(gl_target, allocate_size, nullptr, flags);
         mapped_ptr = static_cast<u8*>(glMapBufferRange(
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 411a73d50..96d916b07 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -177,7 +177,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
                                        Memory::GetPointer(framebuffer_addr),
                                        gl_framebuffer_data.data(), true);
 
-        state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+        state.texture_units[0].texture = screen_info.texture.resource.handle;
         state.Apply();
 
         glActiveTexture(GL_TEXTURE0);
@@ -194,7 +194,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
 
         glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 
-        state.texture_units[0].texture_2d = 0;
+        state.texture_units[0].texture = 0;
         state.Apply();
     }
 }
@@ -205,7 +205,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuf
  */
 void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
                                                 const TextureInfo& texture) {
-    state.texture_units[0].texture_2d = texture.resource.handle;
+    state.texture_units[0].texture = texture.resource.handle;
     state.Apply();
 
     glActiveTexture(GL_TEXTURE0);
@@ -214,7 +214,7 @@ void RendererOpenGL::LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color
     // Update existing texture
     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, 1, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, framebuffer_data);
 
-    state.texture_units[0].texture_2d = 0;
+    state.texture_units[0].texture = 0;
     state.Apply();
 }
 
@@ -260,7 +260,7 @@ void RendererOpenGL::InitOpenGLObjects() {
     // Allocation of storage is deferred until the first frame, when we
     // know the framebuffer size.
 
-    state.texture_units[0].texture_2d = screen_info.texture.resource.handle;
+    state.texture_units[0].texture = screen_info.texture.resource.handle;
     state.Apply();
 
     glActiveTexture(GL_TEXTURE0);
@@ -272,7 +272,7 @@ void RendererOpenGL::InitOpenGLObjects() {
 
     screen_info.display_texture = screen_info.texture.resource.handle;
 
-    state.texture_units[0].texture_2d = 0;
+    state.texture_units[0].texture = 0;
     state.Apply();
 
     // Clear screen to black
@@ -305,14 +305,14 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
         UNREACHABLE();
     }
 
-    state.texture_units[0].texture_2d = texture.resource.handle;
+    state.texture_units[0].texture = texture.resource.handle;
     state.Apply();
 
     glActiveTexture(GL_TEXTURE0);
     glTexImage2D(GL_TEXTURE_2D, 0, internal_format, texture.width, texture.height, 0,
                  texture.gl_format, texture.gl_type, nullptr);
 
-    state.texture_units[0].texture_2d = 0;
+    state.texture_units[0].texture = 0;
     state.Apply();
 }
 
@@ -354,14 +354,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
         ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
     }};
 
-    state.texture_units[0].texture_2d = screen_info.display_texture;
+    state.texture_units[0].texture = screen_info.display_texture;
     state.texture_units[0].swizzle = {GL_RED, GL_GREEN, GL_BLUE, GL_ALPHA};
     state.Apply();
 
     glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(vertices), vertices.data());
     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
 
-    state.texture_units[0].texture_2d = 0;
+    state.texture_units[0].texture = 0;
     state.Apply();
 }
 
@@ -369,6 +369,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
  * Draws the emulated screens to the emulator window.
  */
 void RendererOpenGL::DrawScreen() {
+    if (renderer_settings.set_background_color) {
+        // Update background color before drawing
+        glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
+                     0.0f);
+    }
+
     const auto& layout = render_window.GetFramebufferLayout();
     const auto& screen = layout.screen;
 
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c6bd2f4b9..c2fb824b2 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -170,8 +170,12 @@ struct TICEntry {
         BitField<0, 16, u32> width_minus_1;
         BitField<23, 4, TextureType> texture_type;
     };
-    u16 height_minus_1;
-    INSERT_PADDING_BYTES(10);
+    union {
+        BitField<0, 16, u32> height_minus_1;
+        BitField<16, 15, u32> depth_minus_1;
+    };
+
+    INSERT_PADDING_BYTES(8);
 
     GPUVAddr Address() const {
         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
@@ -192,6 +196,10 @@ struct TICEntry {
         return height_minus_1 + 1;
     }
 
+    u32 Depth() const {
+        return depth_minus_1 + 1;
+    }
+
     u32 BlockHeight() const {
         ASSERT(header_version == TICHeaderVersion::BlockLinear ||
                header_version == TICHeaderVersion::BlockLinearColorKey);