88 files changed, 5823 insertions, 2876 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index e2f85c5f1..6f3f2aa9f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -6,6 +6,7 @@ add_library(video_core STATIC
     dma_pusher.h
     debug_utils/debug_utils.cpp
     debug_utils/debug_utils.h
+    engines/const_buffer_engine_interface.h
     engines/const_buffer_info.h
     engines/engine_upload.cpp
     engines/engine_upload.h
@@ -35,6 +36,8 @@ add_library(video_core STATIC
     memory_manager.h
     morton.cpp
     morton.h
+    rasterizer_accelerated.cpp
+    rasterizer_accelerated.h
     rasterizer_cache.cpp
     rasterizer_cache.h
     rasterizer_interface.h
@@ -105,9 +108,17 @@ add_library(video_core STATIC
     shader/decode/warp.cpp
     shader/decode/xmad.cpp
     shader/decode/other.cpp
+    shader/ast.cpp
+    shader/ast.h
+    shader/compiler_settings.cpp
+    shader/compiler_settings.h
+    shader/const_buffer_locker.cpp
+    shader/const_buffer_locker.h
     shader/control_flow.cpp
     shader/control_flow.h
     shader/decode.cpp
+    shader/expr.cpp
+    shader/expr.h
     shader/node_helper.cpp
     shader/node_helper.h
     shader/node.h
@@ -116,6 +127,8 @@ add_library(video_core STATIC
     shader/track.cpp
     surface.cpp
     surface.h
+    texture_cache/format_lookup_table.cpp
+    texture_cache/format_lookup_table.h
     texture_cache/surface_base.cpp
     texture_cache/surface_base.h
     texture_cache/surface_params.cpp
@@ -169,3 +182,9 @@ target_link_libraries(video_core PRIVATE glad)
 if (ENABLE_VULKAN)
     target_link_libraries(video_core PRIVATE sirit)
 endif()
+
+if (MSVC)
+    target_compile_options(video_core PRIVATE /we4267)
+else()
+    target_compile_options(video_core PRIVATE -Werror=conversion -Wno-error=sign-conversion)
+endif()
diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h
index 2442ddfd6..4408b5001 100644
--- a/src/video_core/buffer_cache/buffer_cache.h
+++ b/src/video_core/buffer_cache/buffer_cache.h
@@ -12,6 +12,10 @@
 #include <utility>
 #include <vector>
 
+#include <boost/icl/interval_map.hpp>
+#include <boost/icl/interval_set.hpp>
+#include <boost/range/iterator_range.hpp>
+
 #include "common/alignment.h"
 #include "common/common_types.h"
 #include "core/core.h"
@@ -30,7 +34,7 @@ public:
     using BufferInfo = std::pair<const TBufferType*, u64>;
 
     BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
-                            bool is_written = false) {
+                            bool is_written = false, bool use_fast_cbuf = false) {
         std::lock_guard lock{mutex};
 
         auto& memory_manager = system.GPU().MemoryManager();
@@ -43,9 +47,13 @@ public:
         // Cache management is a big overhead, so only cache entries with a given size.
         // TODO: Figure out which size is the best for given games.
         constexpr std::size_t max_stream_size = 0x800;
-        if (size < max_stream_size) {
+        if (use_fast_cbuf || size < max_stream_size) {
             if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) {
-                return StreamBufferUpload(host_ptr, size, alignment);
+                if (use_fast_cbuf) {
+                    return ConstBufferUpload(host_ptr, size);
+                } else {
+                    return StreamBufferUpload(host_ptr, size, alignment);
+                }
             }
         }
 
@@ -152,6 +160,10 @@ protected:
     virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset,
                            std::size_t dst_offset, std::size_t size) = 0;
 
+    virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
+        return {};
+    }
+
     /// Register an object into the cache
     void Register(const MapInterval& new_map, bool inherit_written = false) {
         const CacheAddr cache_ptr = new_map->GetStart();
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
new file mode 100644
index 000000000..ac27b6cbe
--- /dev/null
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -0,0 +1,119 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <type_traits>
+#include "common/bit_field.h"
+#include "common/common_types.h"
+#include "video_core/engines/shader_bytecode.h"
+#include "video_core/textures/texture.h"
+
+namespace Tegra::Engines {
+
+enum class ShaderType : u32 {
+    Vertex = 0,
+    TesselationControl = 1,
+    TesselationEval = 2,
+    Geometry = 3,
+    Fragment = 4,
+    Compute = 5,
+};
+
+struct SamplerDescriptor {
+    union {
+        BitField<0, 20, Tegra::Shader::TextureType> texture_type;
+        BitField<20, 1, u32> is_array;
+        BitField<21, 1, u32> is_buffer;
+        BitField<22, 1, u32> is_shadow;
+        u32 raw{};
+    };
+
+    bool operator==(const SamplerDescriptor& rhs) const noexcept {
+        return raw == rhs.raw;
+    }
+
+    bool operator!=(const SamplerDescriptor& rhs) const noexcept {
+        return !operator==(rhs);
+    }
+
+    static SamplerDescriptor FromTicTexture(Tegra::Texture::TextureType tic_texture_type) {
+        SamplerDescriptor result;
+        switch (tic_texture_type) {
+        case Tegra::Texture::TextureType::Texture1D:
+            result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+            result.is_array.Assign(0);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        case Tegra::Texture::TextureType::Texture2D:
+            result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+            result.is_array.Assign(0);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        case Tegra::Texture::TextureType::Texture3D:
+            result.texture_type.Assign(Tegra::Shader::TextureType::Texture3D);
+            result.is_array.Assign(0);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        case Tegra::Texture::TextureType::TextureCubemap:
+            result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
+            result.is_array.Assign(0);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        case Tegra::Texture::TextureType::Texture1DArray:
+            result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+            result.is_array.Assign(1);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        case Tegra::Texture::TextureType::Texture2DArray:
+            result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+            result.is_array.Assign(1);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        case Tegra::Texture::TextureType::Texture1DBuffer:
+            result.texture_type.Assign(Tegra::Shader::TextureType::Texture1D);
+            result.is_array.Assign(0);
+            result.is_buffer.Assign(1);
+            result.is_shadow.Assign(0);
+            return result;
+        case Tegra::Texture::TextureType::Texture2DNoMipmap:
+            result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+            result.is_array.Assign(0);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        case Tegra::Texture::TextureType::TextureCubeArray:
+            result.texture_type.Assign(Tegra::Shader::TextureType::TextureCube);
+            result.is_array.Assign(1);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        default:
+            result.texture_type.Assign(Tegra::Shader::TextureType::Texture2D);
+            result.is_array.Assign(0);
+            result.is_buffer.Assign(0);
+            result.is_shadow.Assign(0);
+            return result;
+        }
+    }
+};
+static_assert(std::is_trivially_copyable_v<SamplerDescriptor>);
+
+class ConstBufferEngineInterface {
+public:
+    virtual ~ConstBufferEngineInterface() = default;
+    virtual u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const = 0;
+    virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
+    virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+                                                    u64 offset) const = 0;
+    virtual u32 GetBoundBuffer() const = 0;
+};
+
+} // namespace Tegra::Engines
diff --git a/src/video_core/engines/fermi_2d.cpp b/src/video_core/engines/fermi_2d.cpp
index 7ff44f06d..85d308e26 100644
--- a/src/video_core/engines/fermi_2d.cpp
+++ b/src/video_core/engines/fermi_2d.cpp
@@ -28,6 +28,13 @@ void Fermi2D::CallMethod(const GPU::MethodCall& method_call) {
     }
 }
 
+std::pair<u32, u32> DelimitLine(u32 src_1, u32 src_2, u32 dst_1, u32 dst_2, u32 src_line) {
+    const u32 line_a = src_2 - src_1;
+    const u32 line_b = dst_2 - dst_1;
+    const u32 excess = std::max<s32>(0, line_a - src_line + src_1);
+    return {line_b - (excess * line_b) / line_a, excess};
+}
+
 void Fermi2D::HandleSurfaceCopy() {
     LOG_DEBUG(HW_GPU, "Requested a surface copy with operation {}",
               static_cast<u32>(regs.operation));
@@ -47,10 +54,27 @@ void Fermi2D::HandleSurfaceCopy() {
         src_blit_x2 = static_cast<u32>((regs.blit_src_x >> 32) + regs.blit_dst_width);
         src_blit_y2 = static_cast<u32>((regs.blit_src_y >> 32) + regs.blit_dst_height);
     }
+    u32 dst_blit_x2 = regs.blit_dst_x + regs.blit_dst_width;
+    u32 dst_blit_y2 = regs.blit_dst_y + regs.blit_dst_height;
+    const auto [new_dst_w, src_excess_x] =
+        DelimitLine(src_blit_x1, src_blit_x2, regs.blit_dst_x, dst_blit_x2, regs.src.width);
+    const auto [new_dst_h, src_excess_y] =
+        DelimitLine(src_blit_y1, src_blit_y2, regs.blit_dst_y, dst_blit_y2, regs.src.height);
+    dst_blit_x2 = new_dst_w + regs.blit_dst_x;
+    src_blit_x2 = src_blit_x2 - src_excess_x;
+    dst_blit_y2 = new_dst_h + regs.blit_dst_y;
+    src_blit_y2 = src_blit_y2 - src_excess_y;
+    const auto [new_src_w, dst_excess_x] =
+        DelimitLine(regs.blit_dst_x, dst_blit_x2, src_blit_x1, src_blit_x2, regs.dst.width);
+    const auto [new_src_h, dst_excess_y] =
+        DelimitLine(regs.blit_dst_y, dst_blit_y2, src_blit_y1, src_blit_y2, regs.dst.height);
+    src_blit_x2 = new_src_w + src_blit_x1;
+    dst_blit_x2 = dst_blit_x2 - dst_excess_x;
+    src_blit_y2 = new_src_h + src_blit_y1;
+    dst_blit_y2 = dst_blit_y2 - dst_excess_y;
     const Common::Rectangle<u32> src_rect{src_blit_x1, src_blit_y1, src_blit_x2, src_blit_y2};
-    const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y,
-                                          regs.blit_dst_x + regs.blit_dst_width,
-                                          regs.blit_dst_y + regs.blit_dst_height};
+    const Common::Rectangle<u32> dst_rect{regs.blit_dst_x, regs.blit_dst_y, dst_blit_x2,
+                                          dst_blit_y2};
     Config copy_config;
     copy_config.operation = regs.operation;
     copy_config.filter = regs.blit_control.filter;
diff --git a/src/video_core/engines/fermi_2d.h b/src/video_core/engines/fermi_2d.h
index 0901cf2fa..dba342c70 100644
--- a/src/video_core/engines/fermi_2d.h
+++ b/src/video_core/engines/fermi_2d.h
@@ -99,19 +99,19 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0x80);
+                INSERT_UNION_PADDING_WORDS(0x80);
 
                 Surface dst;
 
-                INSERT_PADDING_WORDS(2);
+                INSERT_UNION_PADDING_WORDS(2);
 
                 Surface src;
 
-                INSERT_PADDING_WORDS(0x15);
+                INSERT_UNION_PADDING_WORDS(0x15);
 
                 Operation operation;
 
-                INSERT_PADDING_WORDS(0x177);
+                INSERT_UNION_PADDING_WORDS(0x177);
 
                 union {
                     u32 raw;
@@ -119,7 +119,7 @@ public:
                     BitField<4, 1, Filter> filter;
                 } blit_control;
 
-                INSERT_PADDING_WORDS(0x8);
+                INSERT_UNION_PADDING_WORDS(0x8);
 
                 u32 blit_dst_x;
                 u32 blit_dst_y;
@@ -130,7 +130,7 @@ public:
                 u64 blit_src_x;
                 u64 blit_src_y;
 
-                INSERT_PADDING_WORDS(0x21);
+                INSERT_UNION_PADDING_WORDS(0x21);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 63d449135..3a39aeabe 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -50,7 +50,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
     }
 }
 
-Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
+Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) const {
     const std::bitset<8> cbuf_mask = launch_description.const_buffer_enable_mask.Value();
     ASSERT(cbuf_mask[regs.tex_cb_index]);
 
@@ -61,22 +61,38 @@ Tegra::Texture::FullTextureInfo KeplerCompute::GetTexture(std::size_t offset) co
     ASSERT(address < texinfo.Address() + texinfo.size);
 
     const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(address)};
-    return GetTextureInfo(tex_handle, offset);
+    return GetTextureInfo(tex_handle);
 }
 
-Texture::FullTextureInfo KeplerCompute::GetTextureInfo(const Texture::TextureHandle tex_handle,
-                                                       std::size_t offset) const {
-    return Texture::FullTextureInfo{static_cast<u32>(offset), GetTICEntry(tex_handle.tic_id),
-                                    GetTSCEntry(tex_handle.tsc_id)};
+Texture::FullTextureInfo KeplerCompute::GetTextureInfo(Texture::TextureHandle tex_handle) const {
+    return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
 }
 
-u32 KeplerCompute::AccessConstBuffer32(u64 const_buffer, u64 offset) const {
+u32 KeplerCompute::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
+    ASSERT(stage == ShaderType::Compute);
     const auto& buffer = launch_description.const_buffer_config[const_buffer];
     u32 result;
     std::memcpy(&result, memory_manager.GetPointer(buffer.Address() + offset), sizeof(u32));
     return result;
 }
 
+SamplerDescriptor KeplerCompute::AccessBoundSampler(ShaderType stage, u64 offset) const {
+    return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
+}
+
+SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+                                                       u64 offset) const {
+    ASSERT(stage == ShaderType::Compute);
+    const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
+    const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
+
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+    const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
+    SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
+    result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+    return result;
+}
+
 void KeplerCompute::ProcessLaunch() {
     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 90cf650d2..5259d92bd 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -10,6 +10,7 @@
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
 #include "video_core/textures/texture.h"
@@ -37,7 +38,7 @@ namespace Tegra::Engines {
 #define KEPLER_COMPUTE_REG_INDEX(field_name)                                                       \
     (offsetof(Tegra::Engines::KeplerCompute::Regs, field_name) / sizeof(u32))
 
-class KeplerCompute final {
+class KeplerCompute final : public ConstBufferEngineInterface {
 public:
     explicit KeplerCompute(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                            MemoryManager& memory_manager);
@@ -50,7 +51,7 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0x60);
+                INSERT_UNION_PADDING_WORDS(0x60);
 
                 Upload::Registers upload;
 
@@ -62,7 +63,7 @@ public:
 
                 u32 data_upload;
 
-                INSERT_PADDING_WORDS(0x3F);
+                INSERT_UNION_PADDING_WORDS(0x3F);
 
                 struct {
                     u32 address;
@@ -71,11 +72,11 @@ public:
                     }
                 } launch_desc_loc;
 
-                INSERT_PADDING_WORDS(0x1);
+                INSERT_UNION_PADDING_WORDS(0x1);
 
                 u32 launch;
 
-                INSERT_PADDING_WORDS(0x4A7);
+                INSERT_UNION_PADDING_WORDS(0x4A7);
 
                 struct {
                     u32 address_high;
@@ -87,7 +88,7 @@ public:
                     }
                 } tsc;
 
-                INSERT_PADDING_WORDS(0x3);
+                INSERT_UNION_PADDING_WORDS(0x3);
 
                 struct {
                     u32 address_high;
@@ -99,7 +100,7 @@ public:
                     }
                 } tic;
 
-                INSERT_PADDING_WORDS(0x22);
+                INSERT_UNION_PADDING_WORDS(0x22);
 
                 struct {
                     u32 address_high;
@@ -110,11 +111,11 @@ public:
                     }
                 } code_loc;
 
-                INSERT_PADDING_WORDS(0x3FE);
+                INSERT_UNION_PADDING_WORDS(0x3FE);
 
                 u32 tex_cb_index;
 
-                INSERT_PADDING_WORDS(0x374);
+                INSERT_UNION_PADDING_WORDS(0x374);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
@@ -178,7 +179,7 @@ public:
         };
 
         INSERT_PADDING_WORDS(0x11);
-    } launch_description;
+    } launch_description{};
 
     struct {
         u32 write_offset = 0;
@@ -195,13 +196,21 @@ public:
     /// Write the value to the register identified by method.
     void CallMethod(const GPU::MethodCall& method_call);
 
-    Tegra::Texture::FullTextureInfo GetTexture(std::size_t offset) const;
+    Texture::FullTextureInfo GetTexture(std::size_t offset) const;
 
-    /// Given a Texture Handle, returns the TSC and TIC entries.
-    Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
-                                            std::size_t offset) const;
+    /// Given a texture handle, returns the TSC and TIC entries.
+    Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
 
-    u32 AccessConstBuffer32(u64 const_buffer, u64 offset) const;
+    u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
+
+    SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
+
+    SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+                                            u64 offset) const override;
+
+    u32 GetBoundBuffer() const override {
+        return regs.tex_cb_index;
+    }
 
 private:
     Core::System& system;
diff --git a/src/video_core/engines/kepler_memory.h b/src/video_core/engines/kepler_memory.h
index e0e25c321..396fb6e86 100644
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@@ -45,7 +45,7 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0x60);
+                INSERT_UNION_PADDING_WORDS(0x60);
 
                 Upload::Registers upload;
 
@@ -57,7 +57,7 @@ public:
 
                 u32 data;
 
-                INSERT_PADDING_WORDS(0x11);
+                INSERT_UNION_PADDING_WORDS(0x11);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index b318aedb8..a44c09003 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -98,10 +98,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
     mme_inline[MAXWELL3D_REG_INDEX(index_array.count)] = true;
 }
 
-#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
+#define DIRTY_REGS_POS(field_name) static_cast<u8>(offsetof(Maxwell3D::DirtyRegs, field_name))
 
 void Maxwell3D::InitDirtySettings() {
-    const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
+    const auto set_block = [this](std::size_t start, std::size_t range, u8 position) {
         const auto start_itr = dirty_pointers.begin() + start;
         const auto end_itr = start_itr + range;
         std::fill(start_itr, end_itr, position);
@@ -112,10 +112,10 @@ void Maxwell3D::InitDirtySettings() {
     constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
     constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
     constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
-    u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
+    u8 rt_dirty_reg = DIRTY_REGS_POS(render_target);
     for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
         set_block(rt_reg, registers_per_rt, rt_dirty_reg);
-        rt_dirty_reg++;
+        ++rt_dirty_reg;
     }
     constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
     dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
@@ -129,35 +129,35 @@ void Maxwell3D::InitDirtySettings() {
     constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
     constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
     constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
-    u32 va_reg = DIRTY_REGS_POS(vertex_array);
-    u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
+    u8 va_dirty_reg = DIRTY_REGS_POS(vertex_array);
+    u8 vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
     for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
          vertex_reg += vertex_array_size) {
-        set_block(vertex_reg, 3, va_reg);
+        set_block(vertex_reg, 3, va_dirty_reg);
         // The divisor concerns vertex array instances
-        dirty_pointers[vertex_reg + 3] = vi_reg;
-        va_reg++;
-        vi_reg++;
+        dirty_pointers[static_cast<std::size_t>(vertex_reg) + 3] = vi_dirty_reg;
+        ++va_dirty_reg;
+        ++vi_dirty_reg;
     }
     constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
     constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
     constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
-    va_reg = DIRTY_REGS_POS(vertex_array);
+    va_dirty_reg = DIRTY_REGS_POS(vertex_array);
     for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
          vertex_reg += vertex_limit_size) {
-        set_block(vertex_reg, vertex_limit_size, va_reg);
-        va_reg++;
+        set_block(vertex_reg, vertex_limit_size, va_dirty_reg);
+        va_dirty_reg++;
     }
     constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
     constexpr u32 vertex_instance_size =
         sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
     constexpr u32 vertex_instance_end =
         vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
-    vi_reg = DIRTY_REGS_POS(vertex_instance);
+    vi_dirty_reg = DIRTY_REGS_POS(vertex_instance);
     for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
          vertex_reg += vertex_instance_size) {
-        set_block(vertex_reg, vertex_instance_size, vi_reg);
-        vi_reg++;
+        set_block(vertex_reg, vertex_instance_size, vi_dirty_reg);
+        vi_dirty_reg++;
     }
     set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
               DIRTY_REGS_POS(vertex_attrib_format));
@@ -171,7 +171,7 @@ void Maxwell3D::InitDirtySettings() {
     // State
 
     // Viewport
-    constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
+    constexpr u8 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
     constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
     constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
     set_block(viewport_start, viewport_size, viewport_dirty_reg);
@@ -198,7 +198,7 @@ void Maxwell3D::InitDirtySettings() {
     set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
 
     // Depth Test
-    constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
+    constexpr u8 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
     dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
     dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
     dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
@@ -223,12 +223,12 @@ void Maxwell3D::InitDirtySettings() {
     dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
 
     // Color Mask
-    constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
+    constexpr u8 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
     dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
     set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
               color_mask_dirty_reg);
     // Blend State
-    constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
+    constexpr u8 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
     set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
               blend_state_dirty_reg);
     dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
@@ -237,18 +237,23 @@ void Maxwell3D::InitDirtySettings() {
               blend_state_dirty_reg);
 
     // Scissor State
-    constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
+    constexpr u8 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
     set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
               scissor_test_dirty_reg);
 
     // Polygon Offset
-    constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
+    constexpr u8 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
     dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
     dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
     dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
     dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
     dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
     dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
+
+    // Depth bounds
+    constexpr u8 depth_bounds_values_dirty_reg = DIRTY_REGS_POS(depth_bounds_values);
+    dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[0])] = depth_bounds_values_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(depth_bounds[1])] = depth_bounds_values_dirty_reg;
 }
 
 void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u32* parameters) {
@@ -256,7 +261,8 @@ void Maxwell3D::CallMacroMethod(u32 method, std::size_t num_parameters, const u3
     executing_macro = 0;
 
     // Lookup the macro offset
-    const u32 entry = ((method - MacroRegistersStart) >> 1) % macro_positions.size();
+    const u32 entry =
+        ((method - MacroRegistersStart) >> 1) % static_cast<u32>(macro_positions.size());
 
     // Execute the current macro.
     macro_interpreter.Execute(macro_positions[entry], num_parameters, parameters);
@@ -473,7 +479,7 @@ void Maxwell3D::CallMethodFromMME(const GPU::MethodCall& method_call) {
 }
 
 void Maxwell3D::FlushMMEInlineDraw() {
-    LOG_DEBUG(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
+    LOG_TRACE(HW_GPU, "called, topology={}, count={}", static_cast<u32>(regs.draw.topology.Value()),
               regs.vertex_buffer.count);
     ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
     ASSERT(mme_draw.instance_count == mme_draw.gl_end_count);
@@ -736,14 +742,6 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
     Texture::TICEntry tic_entry;
     memory_manager.ReadBlockUnsafe(tic_address_gpu, &tic_entry, sizeof(Texture::TICEntry));
 
-    [[maybe_unused]] const auto r_type{tic_entry.r_type.Value()};
-    [[maybe_unused]] const auto g_type{tic_entry.g_type.Value()};
-    [[maybe_unused]] const auto b_type{tic_entry.b_type.Value()};
-    [[maybe_unused]] const auto a_type{tic_entry.a_type.Value()};
-
-    // TODO(Subv): Different data types for separate components are not supported
-    DEBUG_ASSERT(r_type == g_type && r_type == b_type && r_type == a_type);
-
     return tic_entry;
 }
 
@@ -755,61 +753,8 @@ Texture::TSCEntry Maxwell3D::GetTSCEntry(u32 tsc_index) const {
     return tsc_entry;
 }
 
-std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderStage stage) const {
-    std::vector<Texture::FullTextureInfo> textures;
-
-    auto& fragment_shader = state.shader_stages[static_cast<std::size_t>(stage)];
-    auto& tex_info_buffer = fragment_shader.const_buffers[regs.tex_cb_index];
-    ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
-
-    GPUVAddr tex_info_buffer_end = tex_info_buffer.address + tex_info_buffer.size;
-
-    // Offset into the texture constbuffer where the texture info begins.
-    static constexpr std::size_t TextureInfoOffset = 0x20;
-
-    for (GPUVAddr current_texture = tex_info_buffer.address + TextureInfoOffset;
-         current_texture < tex_info_buffer_end; current_texture += sizeof(Texture::TextureHandle)) {
-
-        const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(current_texture)};
-
-        Texture::FullTextureInfo tex_info{};
-        // TODO(Subv): Use the shader to determine which textures are actually accessed.
-        tex_info.index =
-            static_cast<u32>(current_texture - tex_info_buffer.address - TextureInfoOffset) /
-            sizeof(Texture::TextureHandle);
-
-        // Load the TIC data.
-        auto tic_entry = GetTICEntry(tex_handle.tic_id);
-        // TODO(Subv): Workaround for BitField's move constructor being deleted.
-        std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-
-        // Load the TSC data
-        auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
-        // TODO(Subv): Workaround for BitField's move constructor being deleted.
-        std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-
-        textures.push_back(tex_info);
-    }
-
-    return textures;
-}
-
-Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle,
-                                                   std::size_t offset) const {
-    Texture::FullTextureInfo tex_info{};
-    tex_info.index = static_cast<u32>(offset);
-
-    // Load the TIC data.
-    auto tic_entry = GetTICEntry(tex_handle.tic_id);
-    // TODO(Subv): Workaround for BitField's move constructor being deleted.
-    std::memcpy(&tex_info.tic, &tic_entry, sizeof(tic_entry));
-
-    // Load the TSC data
-    auto tsc_entry = GetTSCEntry(tex_handle.tsc_id);
-    // TODO(Subv): Workaround for BitField's move constructor being deleted.
-    std::memcpy(&tex_info.tsc, &tsc_entry, sizeof(tsc_entry));
-
-    return tex_info;
+Texture::FullTextureInfo Maxwell3D::GetTextureInfo(Texture::TextureHandle tex_handle) const {
+    return Texture::FullTextureInfo{GetTICEntry(tex_handle.tic_id), GetTSCEntry(tex_handle.tsc_id)};
 }
 
 Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
@@ -825,7 +770,7 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
 
     const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
 
-    return GetTextureInfo(tex_handle, offset);
+    return GetTextureInfo(tex_handle);
 }
 
 u32 Maxwell3D::GetRegisterValue(u32 method) const {
@@ -841,7 +786,8 @@ void Maxwell3D::ProcessClearBuffers() {
     rasterizer.Clear();
 }
 
-u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const {
+u32 Maxwell3D::AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const {
+    ASSERT(stage != ShaderType::Compute);
     const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
     const auto& buffer = shader_stage.const_buffers[const_buffer];
     u32 result;
@@ -849,4 +795,22 @@ u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u6
     return result;
 }
 
+SamplerDescriptor Maxwell3D::AccessBoundSampler(ShaderType stage, u64 offset) const {
+    return AccessBindlessSampler(stage, regs.tex_cb_index, offset * sizeof(Texture::TextureHandle));
+}
+
+SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+                                                   u64 offset) const {
+    ASSERT(stage != ShaderType::Compute);
+    const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
+    const auto& tex_info_buffer = shader.const_buffers[const_buffer];
+    const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
+
+    const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
+    const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
+    SamplerDescriptor result = SamplerDescriptor::FromTicTexture(tex_info.tic.texture_type.Value());
+    result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());
+    return result;
+}
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 4c97759ed..1aa7c274f 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -15,6 +15,7 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/math_util.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/const_buffer_info.h"
 #include "video_core/engines/engine_upload.h"
 #include "video_core/gpu.h"
@@ -44,7 +45,7 @@ namespace Tegra::Engines {
 #define MAXWELL3D_REG_INDEX(field_name)                                                            \
     (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
 
-class Maxwell3D final {
+class Maxwell3D final : public ConstBufferEngineInterface {
 public:
     explicit Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
                        MemoryManager& memory_manager);
@@ -495,7 +496,7 @@ public:
             Equation equation_a;
             Factor factor_source_a;
             Factor factor_dest_a;
-            INSERT_PADDING_WORDS(1);
+            INSERT_UNION_PADDING_WORDS(1);
         };
 
         struct RenderTargetConfig {
@@ -516,7 +517,7 @@ public:
             };
             u32 layer_stride;
             u32 base_layer;
-            INSERT_PADDING_WORDS(7);
+            INSERT_UNION_PADDING_WORDS(7);
 
             GPUVAddr Address() const {
                 return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
@@ -541,7 +542,7 @@ public:
             f32 translate_x;
             f32 translate_y;
             f32 translate_z;
-            INSERT_PADDING_WORDS(2);
+            INSERT_UNION_PADDING_WORDS(2);
 
             Common::Rectangle<s32> GetRect() const {
                 return {
@@ -605,7 +606,7 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0x45);
+                INSERT_UNION_PADDING_WORDS(0x45);
 
                 struct {
                     u32 upload_address;
@@ -614,7 +615,7 @@ public:
                     u32 bind;
                 } macros;
 
-                INSERT_PADDING_WORDS(0x17);
+                INSERT_UNION_PADDING_WORDS(0x17);
 
                 Upload::Registers upload;
                 struct {
@@ -625,7 +626,7 @@ public:
 
                 u32 data_upload;
 
-                INSERT_PADDING_WORDS(0x44);
+                INSERT_UNION_PADDING_WORDS(0x44);
 
                 struct {
                     union {
@@ -635,11 +636,11 @@ public:
                     };
                 } sync_info;
 
-                INSERT_PADDING_WORDS(0x11E);
+                INSERT_UNION_PADDING_WORDS(0x11E);
 
                 u32 tfb_enabled;
 
-                INSERT_PADDING_WORDS(0x2E);
+                INSERT_UNION_PADDING_WORDS(0x2E);
 
                 std::array<RenderTargetConfig, NumRenderTargets> rt;
 
@@ -647,47 +648,49 @@ public:
 
                 std::array<ViewPort, NumViewports> viewports;
 
-                INSERT_PADDING_WORDS(0x1D);
+                INSERT_UNION_PADDING_WORDS(0x1D);
 
                 struct {
                     u32 first;
                     u32 count;
                 } vertex_buffer;
 
-                INSERT_PADDING_WORDS(1);
+                INSERT_UNION_PADDING_WORDS(1);
 
                 float clear_color[4];
                 float clear_depth;
 
-                INSERT_PADDING_WORDS(0x3);
+                INSERT_UNION_PADDING_WORDS(0x3);
 
                 s32 clear_stencil;
 
-                INSERT_PADDING_WORDS(0x7);
+                INSERT_UNION_PADDING_WORDS(0x7);
 
                 u32 polygon_offset_point_enable;
                 u32 polygon_offset_line_enable;
                 u32 polygon_offset_fill_enable;
 
-                INSERT_PADDING_WORDS(0xD);
+                INSERT_UNION_PADDING_WORDS(0xD);
 
                 std::array<ScissorTest, NumViewports> scissor_test;
 
-                INSERT_PADDING_WORDS(0x15);
+                INSERT_UNION_PADDING_WORDS(0x15);
 
                 s32 stencil_back_func_ref;
                 u32 stencil_back_mask;
                 u32 stencil_back_func_mask;
 
-                INSERT_PADDING_WORDS(0xC);
+                INSERT_UNION_PADDING_WORDS(0xC);
 
                 u32 color_mask_common;
 
-                INSERT_PADDING_WORDS(0x6);
+                INSERT_UNION_PADDING_WORDS(0x6);
 
                 u32 rt_separate_frag_data;
 
-                INSERT_PADDING_WORDS(0xC);
+                f32 depth_bounds[2];
+
+                INSERT_UNION_PADDING_WORDS(0xA);
 
                 struct {
                     u32 address_high;
@@ -707,7 +710,7 @@ public:
                     }
                 } zeta;
 
-                INSERT_PADDING_WORDS(0x41);
+                INSERT_UNION_PADDING_WORDS(0x41);
 
                 union {
                     BitField<0, 4, u32> stencil;
@@ -716,11 +719,11 @@ public:
                     BitField<12, 4, u32> viewport;
                 } clear_flags;
 
-                INSERT_PADDING_WORDS(0x19);
+                INSERT_UNION_PADDING_WORDS(0x19);
 
                 std::array<VertexAttribute, NumVertexAttributes> vertex_attrib_format;
 
-                INSERT_PADDING_WORDS(0xF);
+                INSERT_UNION_PADDING_WORDS(0xF);
 
                 struct {
                     union {
@@ -743,16 +746,16 @@ public:
                     }
                 } rt_control;
 
-                INSERT_PADDING_WORDS(0x2);
+                INSERT_UNION_PADDING_WORDS(0x2);
 
                 u32 zeta_width;
                 u32 zeta_height;
 
-                INSERT_PADDING_WORDS(0x27);
+                INSERT_UNION_PADDING_WORDS(0x27);
 
                 u32 depth_test_enable;
 
-                INSERT_PADDING_WORDS(0x5);
+                INSERT_UNION_PADDING_WORDS(0x5);
 
                 u32 independent_blend_enable;
 
@@ -760,7 +763,7 @@ public:
 
                 u32 alpha_test_enabled;
 
-                INSERT_PADDING_WORDS(0x6);
+                INSERT_UNION_PADDING_WORDS(0x6);
 
                 u32 d3d_cull_mode;
 
@@ -774,7 +777,7 @@ public:
                     float b;
                     float a;
                 } blend_color;
-                INSERT_PADDING_WORDS(0x4);
+                INSERT_UNION_PADDING_WORDS(0x4);
 
                 struct {
                     u32 separate_alpha;
@@ -783,7 +786,7 @@ public:
                     Blend::Factor factor_dest_rgb;
                     Blend::Equation equation_a;
                     Blend::Factor factor_source_a;
-                    INSERT_PADDING_WORDS(1);
+                    INSERT_UNION_PADDING_WORDS(1);
                     Blend::Factor factor_dest_a;
 
                     u32 enable_common;
@@ -799,7 +802,7 @@ public:
                 u32 stencil_front_func_mask;
                 u32 stencil_front_mask;
 
-                INSERT_PADDING_WORDS(0x2);
+                INSERT_UNION_PADDING_WORDS(0x2);
 
                 u32 frag_color_clamp;
 
@@ -808,12 +811,12 @@ public:
                     BitField<4, 1, u32> triangle_rast_flip;
                 } screen_y_control;
 
-                INSERT_PADDING_WORDS(0x21);
+                INSERT_UNION_PADDING_WORDS(0x21);
 
                 u32 vb_element_base;
                 u32 vb_base_instance;
 
-                INSERT_PADDING_WORDS(0x35);
+                INSERT_UNION_PADDING_WORDS(0x35);
 
                 union {
                     BitField<0, 1, u32> c0;
@@ -826,11 +829,11 @@ public:
                     BitField<7, 1, u32> c7;
                 } clip_distance_enabled;
 
-                INSERT_PADDING_WORDS(0x1);
+                INSERT_UNION_PADDING_WORDS(0x1);
 
                 float point_size;
 
-                INSERT_PADDING_WORDS(0x7);
+                INSERT_UNION_PADDING_WORDS(0x7);
 
                 u32 zeta_enable;
 
@@ -839,7 +842,7 @@ public:
                     BitField<4, 1, u32> alpha_to_one;
                 } multisample_control;
 
-                INSERT_PADDING_WORDS(0x4);
+                INSERT_UNION_PADDING_WORDS(0x4);
 
                 struct {
                     u32 address_high;
@@ -863,11 +866,11 @@ public:
                     }
                 } tsc;
 
-                INSERT_PADDING_WORDS(0x1);
+                INSERT_UNION_PADDING_WORDS(0x1);
 
                 float polygon_offset_factor;
 
-                INSERT_PADDING_WORDS(0x1);
+                INSERT_UNION_PADDING_WORDS(0x1);
 
                 struct {
                     u32 tic_address_high;
@@ -880,7 +883,7 @@ public:
                     }
                 } tic;
 
-                INSERT_PADDING_WORDS(0x5);
+                INSERT_UNION_PADDING_WORDS(0x5);
 
                 u32 stencil_two_side_enable;
                 StencilOp stencil_back_op_fail;
@@ -888,13 +891,13 @@ public:
                 StencilOp stencil_back_op_zpass;
                 ComparisonOp stencil_back_func_func;
 
-                INSERT_PADDING_WORDS(0x4);
+                INSERT_UNION_PADDING_WORDS(0x4);
 
                 u32 framebuffer_srgb;
 
                 float polygon_offset_units;
 
-                INSERT_PADDING_WORDS(0x11);
+                INSERT_UNION_PADDING_WORDS(0x11);
 
                 union {
                     BitField<2, 1, u32> coord_origin;
@@ -910,7 +913,7 @@ public:
                             (static_cast<GPUVAddr>(code_address_high) << 32) | code_address_low);
                     }
                 } code_address;
-                INSERT_PADDING_WORDS(1);
+                INSERT_UNION_PADDING_WORDS(1);
 
                 struct {
                     u32 vertex_end_gl;
@@ -922,14 +925,14 @@ public:
                     };
                 } draw;
 
-                INSERT_PADDING_WORDS(0xA);
+                INSERT_UNION_PADDING_WORDS(0xA);
 
                 struct {
                     u32 enabled;
                     u32 index;
                 } primitive_restart;
 
-                INSERT_PADDING_WORDS(0x5F);
+                INSERT_UNION_PADDING_WORDS(0x5F);
 
                 struct {
                     u32 start_addr_high;
@@ -970,9 +973,9 @@ public:
                     }
                 } index_array;
 
-                INSERT_PADDING_WORDS(0x7);
+                INSERT_UNION_PADDING_WORDS(0x7);
 
-                INSERT_PADDING_WORDS(0x1F);
+                INSERT_UNION_PADDING_WORDS(0x1F);
 
                 float polygon_offset_clamp;
 
@@ -986,17 +989,17 @@ public:
                     }
                 } instanced_arrays;
 
-                INSERT_PADDING_WORDS(0x6);
+                INSERT_UNION_PADDING_WORDS(0x6);
 
                 Cull cull;
 
                 u32 pixel_center_integer;
 
-                INSERT_PADDING_WORDS(0x1);
+                INSERT_UNION_PADDING_WORDS(0x1);
 
                 u32 viewport_transform_enabled;
 
-                INSERT_PADDING_WORDS(0x3);
+                INSERT_UNION_PADDING_WORDS(0x3);
 
                 union {
                     BitField<0, 1, u32> depth_range_0_1;
@@ -1004,13 +1007,13 @@ public:
                     BitField<4, 1, u32> depth_clamp_far;
                 } view_volume_clip_control;
 
-                INSERT_PADDING_WORDS(0x21);
+                INSERT_UNION_PADDING_WORDS(0x21);
                 struct {
                     u32 enable;
                     LogicOperation operation;
                 } logic_op;
 
-                INSERT_PADDING_WORDS(0x1);
+                INSERT_UNION_PADDING_WORDS(0x1);
 
                 union {
                     u32 raw;
@@ -1023,9 +1026,9 @@ public:
                     BitField<6, 4, u32> RT;
                     BitField<10, 11, u32> layer;
                 } clear_buffers;
-                INSERT_PADDING_WORDS(0xB);
+                INSERT_UNION_PADDING_WORDS(0xB);
                 std::array<ColorMask, NumRenderTargets> color_mask;
-                INSERT_PADDING_WORDS(0x38);
+                INSERT_UNION_PADDING_WORDS(0x38);
 
                 struct {
                     u32 query_address_high;
@@ -1047,7 +1050,7 @@ public:
                     }
                 } query;
 
-                INSERT_PADDING_WORDS(0x3C);
+                INSERT_UNION_PADDING_WORDS(0x3C);
 
                 struct {
                     union {
@@ -1087,10 +1090,10 @@ public:
                         BitField<4, 4, ShaderProgram> program;
                     };
                     u32 offset;
-                    INSERT_PADDING_WORDS(14);
+                    INSERT_UNION_PADDING_WORDS(14);
                 } shader_config[MaxShaderProgram];
 
-                INSERT_PADDING_WORDS(0x60);
+                INSERT_UNION_PADDING_WORDS(0x60);
 
                 u32 firmware[0x20];
 
@@ -1107,7 +1110,7 @@ public:
                     }
                 } const_buffer;
 
-                INSERT_PADDING_WORDS(0x10);
+                INSERT_UNION_PADDING_WORDS(0x10);
 
                 struct {
                     union {
@@ -1115,14 +1118,14 @@ public:
                         BitField<0, 1, u32> valid;
                         BitField<4, 5, u32> index;
                     };
-                    INSERT_PADDING_WORDS(7);
+                    INSERT_UNION_PADDING_WORDS(7);
                 } cb_bind[MaxShaderStage];
 
-                INSERT_PADDING_WORDS(0x56);
+                INSERT_UNION_PADDING_WORDS(0x56);
 
                 u32 tex_cb_index;
 
-                INSERT_PADDING_WORDS(0x395);
+                INSERT_UNION_PADDING_WORDS(0x395);
 
                 struct {
                     /// Compressed address of a buffer that holds information about bound SSBOs.
@@ -1134,14 +1137,14 @@ public:
                     }
                 } ssbo_info;
 
-                INSERT_PADDING_WORDS(0x11);
+                INSERT_UNION_PADDING_WORDS(0x11);
 
                 struct {
                     u32 address[MaxShaderStage];
                     u32 size[MaxShaderStage];
                 } tex_info_buffers;
 
-                INSERT_PADDING_WORDS(0xCC);
+                INSERT_UNION_PADDING_WORDS(0xCC);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
@@ -1163,6 +1166,8 @@ public:
 
     struct DirtyRegs {
         static constexpr std::size_t NUM_REGS = 256;
+        static_assert(NUM_REGS - 1 <= std::numeric_limits<u8>::max());
+
         union {
             struct {
                 bool null_dirty;
@@ -1201,6 +1206,7 @@ public:
                 bool transform_feedback;
                 bool color_mask;
                 bool polygon_offset;
+                bool depth_bounds_values;
 
                 // Complementary
                 bool viewport_transform;
@@ -1244,17 +1250,22 @@ public:
 
     void FlushMMEInlineDraw();
 
-    /// Given a Texture Handle, returns the TSC and TIC entries.
-    Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
-                                            std::size_t offset) const;
-
-    /// Returns a list of enabled textures for the specified shader stage.
-    std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
+    /// Given a texture handle, returns the TSC and TIC entries.
+    Texture::FullTextureInfo GetTextureInfo(Texture::TextureHandle tex_handle) const;
 
     /// Returns the texture information for a specific texture in a specific shader stage.
     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
 
-    u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const;
+    u32 AccessConstBuffer32(ShaderType stage, u64 const_buffer, u64 offset) const override;
+
+    SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const override;
+
+    SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
+                                            u64 offset) const override;
+
+    u32 GetBoundBuffer() const override {
+        return regs.tex_cb_index;
+    }
 
     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
     /// we've seen used.
@@ -1400,6 +1411,7 @@ ASSERT_REG_POSITION(stencil_back_mask, 0x3D6);
 ASSERT_REG_POSITION(stencil_back_func_mask, 0x3D7);
 ASSERT_REG_POSITION(color_mask_common, 0x3E4);
 ASSERT_REG_POSITION(rt_separate_frag_data, 0x3EB);
+ASSERT_REG_POSITION(depth_bounds, 0x3EC);
 ASSERT_REG_POSITION(zeta, 0x3F8);
 ASSERT_REG_POSITION(clear_flags, 0x43E);
 ASSERT_REG_POSITION(vertex_attrib_format, 0x458);
diff --git a/src/video_core/engines/maxwell_dma.h b/src/video_core/engines/maxwell_dma.h
index 93808a9bb..4f40d1d1f 100644
--- a/src/video_core/engines/maxwell_dma.h
+++ b/src/video_core/engines/maxwell_dma.h
@@ -94,7 +94,7 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0xC0);
+                INSERT_UNION_PADDING_WORDS(0xC0);
 
                 struct {
                     union {
@@ -112,7 +112,7 @@ public:
                     };
                 } exec;
 
-                INSERT_PADDING_WORDS(0x3F);
+                INSERT_UNION_PADDING_WORDS(0x3F);
 
                 struct {
                     u32 address_high;
@@ -139,7 +139,7 @@ public:
                 u32 x_count;
                 u32 y_count;
 
-                INSERT_PADDING_WORDS(0xB8);
+                INSERT_UNION_PADDING_WORDS(0xB8);
 
                 u32 const0;
                 u32 const1;
@@ -162,11 +162,11 @@ public:
 
                 Parameters dst_params;
 
-                INSERT_PADDING_WORDS(1);
+                INSERT_UNION_PADDING_WORDS(1);
 
                 Parameters src_params;
 
-                INSERT_PADDING_WORDS(0x13);
+                INSERT_UNION_PADDING_WORDS(0x13);
             };
             std::array<u32, NUM_REGS> reg_array;
         };
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 7a6355ce2..9fafed4a2 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -574,7 +574,7 @@ enum class ShuffleOperation : u64 {
 };
 
 union Instruction {
-    Instruction& operator=(const Instruction& instr) {
+    constexpr Instruction& operator=(const Instruction& instr) {
         value = instr.value;
         return *this;
     }
@@ -616,6 +616,14 @@ union Instruction {
     } shfl;
 
     union {
+        BitField<44, 1, u64> ftz;
+        BitField<39, 2, u64> tab5cb8_2;
+        BitField<38, 1, u64> ndv;
+        BitField<47, 1, u64> cc;
+        BitField<28, 8, u64> swizzle;
+    } fswzadd;
+
+    union {
         BitField<8, 8, Register> gpr;
         BitField<20, 24, s64> offset;
     } gmem;
@@ -1238,6 +1246,32 @@ union Instruction {
     } tld4;
 
     union {
+        BitField<35, 1, u64> ndv_flag;
+        BitField<49, 1, u64> nodep_flag;
+        BitField<50, 1, u64> dc_flag;
+        BitField<33, 2, u64> info;
+        BitField<37, 2, u64> component;
+
+        bool UsesMiscMode(TextureMiscMode mode) const {
+            switch (mode) {
+            case TextureMiscMode::NDV:
+                return ndv_flag != 0;
+            case TextureMiscMode::NODEP:
+                return nodep_flag != 0;
+            case TextureMiscMode::DC:
+                return dc_flag != 0;
+            case TextureMiscMode::AOFFI:
+                return info == 1;
+            case TextureMiscMode::PTP:
+                return info == 2;
+            default:
+                break;
+            }
+            return false;
+        }
+    } tld4_b;
+
+    union {
         BitField<49, 1, u64> nodep_flag;
         BitField<50, 1, u64> dc_flag;
         BitField<51, 1, u64> aoffi_flag;
@@ -1452,7 +1486,8 @@ union Instruction {
             u32 value = static_cast<u32>(target);
             // The branch offset is relative to the next instruction and is stored in bytes, so
             // divide it by the size of an instruction and add 1 to it.
-            return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
+            return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
+                   1;
         }
     } bra;
 
@@ -1466,7 +1501,8 @@ union Instruction {
             u32 value = static_cast<u32>(target);
             // The branch offset is relative to the next instruction and is stored in bytes, so
             // divide it by the size of an instruction and add 1 to it.
-            return static_cast<s32>((value ^ mask) - mask) / sizeof(Instruction) + 1;
+            return static_cast<s32>((value ^ mask) - mask) / static_cast<s32>(sizeof(Instruction)) +
+                   1;
         }
     } brx;
 
@@ -1564,6 +1600,7 @@ public:
         DEPBAR,
         VOTE,
         SHFL,
+        FSWZADD,
         BFE_C,
         BFE_R,
         BFE_IMM,
@@ -1590,7 +1627,8 @@ public:
         TEXS,   // Texture Fetch with scalar/non-vec4 source/destinations
         TLD,    // Texture Load
         TLDS,   // Texture Load with scalar/non-vec4 source/destinations
-        TLD4,   // Texture Load 4
+        TLD4,   // Texture Gather 4
+        TLD4_B, // Texture Gather 4 Bindless
         TLD4S,  // Texture Load 4 with scalar / non - vec4 source / destinations
         TMML_B, // Texture Mip Map Level
         TMML,   // Texture Mip Map Level
@@ -1760,22 +1798,22 @@ public:
 
     class Matcher {
     public:
-        Matcher(const char* const name, u16 mask, u16 expected, OpCode::Id id, OpCode::Type type)
+        constexpr Matcher(const char* const name, u16 mask, u16 expected, Id id, Type type)
             : name{name}, mask{mask}, expected{expected}, id{id}, type{type} {}
 
-        const char* GetName() const {
+        constexpr const char* GetName() const {
             return name;
         }
 
-        u16 GetMask() const {
+        constexpr u16 GetMask() const {
             return mask;
         }
 
-        Id GetId() const {
+        constexpr Id GetId() const {
             return id;
         }
 
-        Type GetType() const {
+        constexpr Type GetType() const {
             return type;
         }
 
@@ -1784,7 +1822,7 @@ public:
          * @param instruction The instruction to test
          * @returns true if the given instruction matches.
          */
-        bool Matches(u16 instruction) const {
+        constexpr bool Matches(u16 instruction) const {
             return (instruction & mask) == expected;
         }
 
@@ -1818,32 +1856,32 @@ private:
          * A '0' in a bitstring indicates that a zero must be present at that bit position.
          * A '1' in a bitstring indicates that a one must be present at that bit position.
          */
-        static auto GetMaskAndExpect(const char* const bitstring) {
+        static constexpr auto GetMaskAndExpect(const char* const bitstring) {
             u16 mask = 0, expect = 0;
             for (std::size_t i = 0; i < opcode_bitsize; i++) {
                 const std::size_t bit_position = opcode_bitsize - i - 1;
                 switch (bitstring[i]) {
                 case '0':
-                    mask |= 1 << bit_position;
+                    mask |= static_cast<u16>(1U << bit_position);
                     break;
                 case '1':
-                    expect |= 1 << bit_position;
-                    mask |= 1 << bit_position;
+                    expect |= static_cast<u16>(1U << bit_position);
+                    mask |= static_cast<u16>(1U << bit_position);
                     break;
                 default:
                     // Ignore
                     break;
                 }
             }
-            return std::make_tuple(mask, expect);
+            return std::make_pair(mask, expect);
         }
 
     public:
         /// Creates a matcher that can match and parse instructions based on bitstring.
-        static auto GetMatcher(const char* const bitstring, OpCode::Id op, OpCode::Type type,
-                               const char* const name) {
-            const auto mask_expect = GetMaskAndExpect(bitstring);
-            return Matcher(name, std::get<0>(mask_expect), std::get<1>(mask_expect), op, type);
+        static constexpr auto GetMatcher(const char* const bitstring, Id op, Type type,
+                                         const char* const name) {
+            const auto [mask, expected] = GetMaskAndExpect(bitstring);
+            return Matcher(name, mask, expected, op, type);
         }
     };
 
@@ -1861,6 +1899,7 @@ private:
             INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"),
             INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"),
             INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"),
+            INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"),
             INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"),
             INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"),
             INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"),
@@ -1881,6 +1920,7 @@ private:
             INST("11011100--11----", Id::TLD, Type::Texture, "TLD"),
             INST("1101-01---------", Id::TLDS, Type::Texture, "TLDS"),
             INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),
+            INST("1101111011111---", Id::TLD4_B, Type::Texture, "TLD4_B"),
             INST("1101111100------", Id::TLD4S, Type::Texture, "TLD4S"),
             INST("110111110110----", Id::TMML_B, Type::Texture, "TMML_B"),
             INST("1101111101011---", Id::TMML, Type::Texture, "TMML"),
diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h
index e86a7f04a..bc80661d8 100644
--- a/src/video_core/engines/shader_header.h
+++ b/src/video_core/engines/shader_header.h
@@ -38,37 +38,37 @@ struct Header {
         BitField<26, 1, u32> does_load_or_store;
         BitField<27, 1, u32> does_fp64;
         BitField<28, 4, u32> stream_out_mask;
-    } common0;
+    } common0{};
 
     union {
         BitField<0, 24, u32> shader_local_memory_low_size;
         BitField<24, 8, u32> per_patch_attribute_count;
-    } common1;
+    } common1{};
 
     union {
         BitField<0, 24, u32> shader_local_memory_high_size;
         BitField<24, 8, u32> threads_per_input_primitive;
-    } common2;
+    } common2{};
 
     union {
         BitField<0, 24, u32> shader_local_memory_crs_size;
         BitField<24, 4, OutputTopology> output_topology;
         BitField<28, 4, u32> reserved;
-    } common3;
+    } common3{};
 
     union {
         BitField<0, 12, u32> max_output_vertices;
         BitField<12, 8, u32> store_req_start; // NOTE: not used by geometry shaders.
         BitField<24, 4, u32> reserved;
         BitField<12, 8, u32> store_req_end; // NOTE: not used by geometry shaders.
-    } common4;
+    } common4{};
 
     union {
         struct {
-            INSERT_PADDING_BYTES(3);  // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1);  // ImapSystemValuesB
-            INSERT_PADDING_BYTES(16); // ImapGenericVector[32]
-            INSERT_PADDING_BYTES(2);  // ImapColor
+            INSERT_UNION_PADDING_BYTES(3);  // ImapSystemValuesA
+            INSERT_UNION_PADDING_BYTES(1);  // ImapSystemValuesB
+            INSERT_UNION_PADDING_BYTES(16); // ImapGenericVector[32]
+            INSERT_UNION_PADDING_BYTES(2);  // ImapColor
             union {
                 BitField<0, 8, u16> clip_distances;
                 BitField<8, 1, u16> point_sprite_s;
@@ -79,20 +79,20 @@ struct Header {
                 BitField<14, 1, u16> instance_id;
                 BitField<15, 1, u16> vertex_id;
             };
-            INSERT_PADDING_BYTES(5);  // ImapFixedFncTexture[10]
-            INSERT_PADDING_BYTES(1);  // ImapReserved
-            INSERT_PADDING_BYTES(3);  // OmapSystemValuesA
-            INSERT_PADDING_BYTES(1);  // OmapSystemValuesB
-            INSERT_PADDING_BYTES(16); // OmapGenericVector[32]
-            INSERT_PADDING_BYTES(2);  // OmapColor
-            INSERT_PADDING_BYTES(2);  // OmapSystemValuesC
-            INSERT_PADDING_BYTES(5);  // OmapFixedFncTexture[10]
-            INSERT_PADDING_BYTES(1);  // OmapReserved
+            INSERT_UNION_PADDING_BYTES(5);  // ImapFixedFncTexture[10]
+            INSERT_UNION_PADDING_BYTES(1);  // ImapReserved
+            INSERT_UNION_PADDING_BYTES(3);  // OmapSystemValuesA
+            INSERT_UNION_PADDING_BYTES(1);  // OmapSystemValuesB
+            INSERT_UNION_PADDING_BYTES(16); // OmapGenericVector[32]
+            INSERT_UNION_PADDING_BYTES(2);  // OmapColor
+            INSERT_UNION_PADDING_BYTES(2);  // OmapSystemValuesC
+            INSERT_UNION_PADDING_BYTES(5);  // OmapFixedFncTexture[10]
+            INSERT_UNION_PADDING_BYTES(1);  // OmapReserved
         } vtg;
 
         struct {
-            INSERT_PADDING_BYTES(3); // ImapSystemValuesA
-            INSERT_PADDING_BYTES(1); // ImapSystemValuesB
+            INSERT_UNION_PADDING_BYTES(3); // ImapSystemValuesA
+            INSERT_UNION_PADDING_BYTES(1); // ImapSystemValuesB
             union {
                 BitField<0, 2, AttributeUse> x;
                 BitField<2, 2, AttributeUse> y;
@@ -100,10 +100,10 @@ struct Header {
                 BitField<6, 2, AttributeUse> z;
                 u8 raw;
             } imap_generic_vector[32];
-            INSERT_PADDING_BYTES(2);  // ImapColor
-            INSERT_PADDING_BYTES(2);  // ImapSystemValuesC
-            INSERT_PADDING_BYTES(10); // ImapFixedFncTexture[10]
-            INSERT_PADDING_BYTES(2);  // ImapReserved
+            INSERT_UNION_PADDING_BYTES(2);  // ImapColor
+            INSERT_UNION_PADDING_BYTES(2);  // ImapSystemValuesC
+            INSERT_UNION_PADDING_BYTES(10); // ImapFixedFncTexture[10]
+            INSERT_UNION_PADDING_BYTES(2);  // ImapReserved
             struct {
                 u32 target;
                 union {
@@ -139,6 +139,8 @@ struct Header {
                 return result;
             }
         } ps;
+
+        std::array<u32, 0xF> raw{};
     };
 
     u64 GetLocalMemorySize() const {
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 76cfe8107..095660115 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include "common/assert.h"
+#include "common/microprofile.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/memory.h"
@@ -17,6 +18,8 @@
 
 namespace Tegra {
 
+MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
+
 GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async)
     : system{system}, renderer{renderer}, is_async{is_async} {
     auto& rasterizer{renderer.Rasterizer()};
@@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const {
     return *dma_pusher;
 }
 
+void GPU::WaitFence(u32 syncpoint_id, u32 value) const {
+    // Synced GPU, is always in sync
+    if (!is_async) {
+        return;
+    }
+    MICROPROFILE_SCOPE(GPU_wait);
+    while (syncpoints[syncpoint_id].load(std::memory_order_relaxed) < value) {
+    }
+}
+
 void GPU::IncrementSyncPoint(const u32 syncpoint_id) {
     syncpoints[syncpoint_id]++;
     std::lock_guard lock{sync_mutex};
@@ -326,7 +339,7 @@ void GPU::ProcessSemaphoreTriggerMethod() {
         block.sequence = regs.semaphore_sequence;
         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
         // CoreTiming
-        block.timestamp = Core::System::GetInstance().CoreTiming().GetTicks();
+        block.timestamp = system.CoreTiming().GetTicks();
         memory_manager->WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block,
                                    sizeof(block));
     } else {
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 29fa8e95b..ecc338ae9 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -177,6 +177,12 @@ public:
     /// Returns a reference to the GPU DMA pusher.
     Tegra::DmaPusher& DmaPusher();
 
+    // Waits for the GPU to finish working
+    virtual void WaitIdle() const = 0;
+
+    /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame.
+    void WaitFence(u32 syncpoint_id, u32 value) const;
+
     void IncrementSyncPoint(u32 syncpoint_id);
 
     u32 GetSyncpointValue(u32 syncpoint_id) const;
@@ -201,7 +207,7 @@ public:
 
         union {
             struct {
-                INSERT_PADDING_WORDS(0x4);
+                INSERT_UNION_PADDING_WORDS(0x4);
                 struct {
                     u32 address_high;
                     u32 address_low;
@@ -214,12 +220,12 @@ public:
 
                 u32 semaphore_sequence;
                 u32 semaphore_trigger;
-                INSERT_PADDING_WORDS(0xC);
+                INSERT_UNION_PADDING_WORDS(0xC);
 
                 // The puser and the puller share the reference counter, the pusher only has read
                 // access
                 u32 reference_count;
-                INSERT_PADDING_WORDS(0x5);
+                INSERT_UNION_PADDING_WORDS(0x5);
 
                 u32 semaphore_acquire;
                 u32 semaphore_release;
@@ -228,7 +234,7 @@ public:
                     BitField<4, 4, u32> operation;
                     BitField<8, 8, u32> id;
                 } fence_action;
-                INSERT_PADDING_WORDS(0xE2);
+                INSERT_UNION_PADDING_WORDS(0xE2);
 
                 // Puller state
                 u32 acquire_mode;
diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp
index f2a3a390e..04222d060 100644
--- a/src/video_core/gpu_asynch.cpp
+++ b/src/video_core/gpu_asynch.cpp
@@ -44,4 +44,8 @@ void GPUAsynch::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) con
     interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value);
 }
 
+void GPUAsynch::WaitIdle() const {
+    gpu_thread.WaitIdle();
+}
+
 } // namespace VideoCommon
diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h
index a12f9bac4..1241ade1d 100644
--- a/src/video_core/gpu_asynch.h
+++ b/src/video_core/gpu_asynch.h
@@ -25,6 +25,7 @@ public:
     void FlushRegion(CacheAddr addr, u64 size) override;
     void InvalidateRegion(CacheAddr addr, u64 size) override;
     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void WaitIdle() const override;
 
 protected:
     void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override;
diff --git a/src/video_core/gpu_synch.h b/src/video_core/gpu_synch.h
index 5eb1c461c..c71baee89 100644
--- a/src/video_core/gpu_synch.h
+++ b/src/video_core/gpu_synch.h
@@ -24,6 +24,7 @@ public:
     void FlushRegion(CacheAddr addr, u64 size) override;
     void InvalidateRegion(CacheAddr addr, u64 size) override;
     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void WaitIdle() const override {}
 
 protected:
     void TriggerCpuInterrupt([[maybe_unused]] u32 syncpoint_id,
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 5f039e4fd..758a37f14 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -5,8 +5,6 @@
 #include "common/assert.h"
 #include "common/microprofile.h"
 #include "core/core.h"
-#include "core/core_timing.h"
-#include "core/core_timing_util.h"
 #include "core/frontend/scope_acquire_window_context.h"
 #include "video_core/dma_pusher.h"
 #include "video_core/gpu.h"
@@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() {
 
 void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) {
     thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)};
-    synchronization_event = system.CoreTiming().RegisterEvent(
-        "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); });
 }
 
 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
-    const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
-    const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
-    system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
+    PushCommand(SubmitListCommand(std::move(entries)));
 }
 
 void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
@@ -96,16 +90,15 @@ void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
     InvalidateRegion(addr, size);
 }
 
+void ThreadManager::WaitIdle() const {
+    while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed)) {
+    }
+}
+
 u64 ThreadManager::PushCommand(CommandData&& command_data) {
     const u64 fence{++state.last_fence};
     state.queue.Push(CommandDataContainer(std::move(command_data), fence));
     return fence;
 }
 
-MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192));
-void SynchState::WaitForSynchronization(u64 fence) {
-    while (signaled_fence.load() < fence)
-        ;
-}
-
 } // namespace VideoCommon::GPUThread
diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h
index 3ae0ec9f3..08dc96bb3 100644
--- a/src/video_core/gpu_thread.h
+++ b/src/video_core/gpu_thread.h
@@ -21,9 +21,6 @@ class DmaPusher;
 
 namespace Core {
 class System;
-namespace Timing {
-struct EventType;
-} // namespace Timing
 } // namespace Core
 
 namespace VideoCommon::GPUThread {
@@ -89,8 +86,6 @@ struct CommandDataContainer {
 struct SynchState final {
     std::atomic_bool is_running{true};
 
-    void WaitForSynchronization(u64 fence);
-
     using CommandQueue = Common::SPSCQueue<CommandDataContainer>;
     CommandQueue queue;
     u64 last_fence{};
@@ -121,6 +116,9 @@ public:
     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
     void FlushAndInvalidateRegion(CacheAddr addr, u64 size);
 
+    // Wait until the gpu thread is idle.
+    void WaitIdle() const;
+
 private:
     /// Pushes a command to be executed by the GPU thread
     u64 PushCommand(CommandData&& command_data);
@@ -128,7 +126,6 @@ private:
 private:
     SynchState state;
     Core::System& system;
-    Core::Timing::EventType* synchronization_event{};
     std::thread thread;
     std::thread::id thread_id;
 };
diff --git a/src/video_core/macro_interpreter.cpp b/src/video_core/macro_interpreter.cpp
index dbaeac6db..42031d80a 100644
--- a/src/video_core/macro_interpreter.cpp
+++ b/src/video_core/macro_interpreter.cpp
@@ -11,6 +11,77 @@
 MICROPROFILE_DEFINE(MacroInterp, "GPU", "Execute macro interpreter", MP_RGB(128, 128, 192));
 
 namespace Tegra {
+namespace {
+enum class Operation : u32 {
+    ALU = 0,
+    AddImmediate = 1,
+    ExtractInsert = 2,
+    ExtractShiftLeftImmediate = 3,
+    ExtractShiftLeftRegister = 4,
+    Read = 5,
+    Unused = 6, // This operation doesn't seem to be a valid encoding.
+    Branch = 7,
+};
+} // Anonymous namespace
+
+enum class MacroInterpreter::ALUOperation : u32 {
+    Add = 0,
+    AddWithCarry = 1,
+    Subtract = 2,
+    SubtractWithBorrow = 3,
+    // Operations 4-7 don't seem to be valid encodings.
+    Xor = 8,
+    Or = 9,
+    And = 10,
+    AndNot = 11,
+    Nand = 12
+};
+
+enum class MacroInterpreter::ResultOperation : u32 {
+    IgnoreAndFetch = 0,
+    Move = 1,
+    MoveAndSetMethod = 2,
+    FetchAndSend = 3,
+    MoveAndSend = 4,
+    FetchAndSetMethod = 5,
+    MoveAndSetMethodFetchAndSend = 6,
+    MoveAndSetMethodSend = 7
+};
+
+enum class MacroInterpreter::BranchCondition : u32 {
+    Zero = 0,
+    NotZero = 1,
+};
+
+union MacroInterpreter::Opcode {
+    u32 raw;
+    BitField<0, 3, Operation> operation;
+    BitField<4, 3, ResultOperation> result_operation;
+    BitField<4, 1, BranchCondition> branch_condition;
+    // If set on a branch, then the branch doesn't have a delay slot.
+    BitField<5, 1, u32> branch_annul;
+    BitField<7, 1, u32> is_exit;
+    BitField<8, 3, u32> dst;
+    BitField<11, 3, u32> src_a;
+    BitField<14, 3, u32> src_b;
+    // The signed immediate overlaps the second source operand and the alu operation.
+    BitField<14, 18, s32> immediate;
+
+    BitField<17, 5, ALUOperation> alu_operation;
+
+    // Bitfield instructions data
+    BitField<17, 5, u32> bf_src_bit;
+    BitField<22, 5, u32> bf_size;
+    BitField<27, 5, u32> bf_dst_bit;
+
+    u32 GetBitfieldMask() const {
+        return (1 << bf_size) - 1;
+    }
+
+    s32 GetBranchTarget() const {
+        return static_cast<s32>(immediate * sizeof(u32));
+    }
+};
 
 MacroInterpreter::MacroInterpreter(Engines::Maxwell3D& maxwell3d) : maxwell3d(maxwell3d) {}
 
diff --git a/src/video_core/macro_interpreter.h b/src/video_core/macro_interpreter.h
index 76b6a895b..631146d89 100644
--- a/src/video_core/macro_interpreter.h
+++ b/src/video_core/macro_interpreter.h
@@ -6,7 +6,6 @@
 
 #include <array>
 #include <optional>
-#include <vector>
 
 #include "common/bit_field.h"
 #include "common/common_types.h"
@@ -28,75 +27,11 @@ public:
     void Execute(u32 offset, std::size_t num_parameters, const u32* parameters);
 
 private:
-    enum class Operation : u32 {
-        ALU = 0,
-        AddImmediate = 1,
-        ExtractInsert = 2,
-        ExtractShiftLeftImmediate = 3,
-        ExtractShiftLeftRegister = 4,
-        Read = 5,
-        Unused = 6, // This operation doesn't seem to be a valid encoding.
-        Branch = 7,
-    };
-
-    enum class ALUOperation : u32 {
-        Add = 0,
-        AddWithCarry = 1,
-        Subtract = 2,
-        SubtractWithBorrow = 3,
-        // Operations 4-7 don't seem to be valid encodings.
-        Xor = 8,
-        Or = 9,
-        And = 10,
-        AndNot = 11,
-        Nand = 12
-    };
-
-    enum class ResultOperation : u32 {
-        IgnoreAndFetch = 0,
-        Move = 1,
-        MoveAndSetMethod = 2,
-        FetchAndSend = 3,
-        MoveAndSend = 4,
-        FetchAndSetMethod = 5,
-        MoveAndSetMethodFetchAndSend = 6,
-        MoveAndSetMethodSend = 7
-    };
+    enum class ALUOperation : u32;
+    enum class BranchCondition : u32;
+    enum class ResultOperation : u32;
 
-    enum class BranchCondition : u32 {
-        Zero = 0,
-        NotZero = 1,
-    };
-
-    union Opcode {
-        u32 raw;
-        BitField<0, 3, Operation> operation;
-        BitField<4, 3, ResultOperation> result_operation;
-        BitField<4, 1, BranchCondition> branch_condition;
-        BitField<5, 1, u32>
-            branch_annul; // If set on a branch, then the branch doesn't have a delay slot.
-        BitField<7, 1, u32> is_exit;
-        BitField<8, 3, u32> dst;
-        BitField<11, 3, u32> src_a;
-        BitField<14, 3, u32> src_b;
-        // The signed immediate overlaps the second source operand and the alu operation.
-        BitField<14, 18, s32> immediate;
-
-        BitField<17, 5, ALUOperation> alu_operation;
-
-        // Bitfield instructions data
-        BitField<17, 5, u32> bf_src_bit;
-        BitField<22, 5, u32> bf_size;
-        BitField<27, 5, u32> bf_dst_bit;
-
-        u32 GetBitfieldMask() const {
-            return (1 << bf_size) - 1;
-        }
-
-        s32 GetBranchTarget() const {
-            return static_cast<s32>(immediate * sizeof(u32));
-        }
-    };
+    union Opcode;
 
     union MethodAddress {
         u32 raw;
@@ -149,9 +84,10 @@ private:
 
     Engines::Maxwell3D& maxwell3d;
 
-    u32 pc; ///< Current program counter
-    std::optional<u32>
-        delayed_pc; ///< Program counter to execute at after the delay slot is executed.
+    /// Current program counter
+    u32 pc;
+    /// Program counter to execute at after the delay slot is executed.
+    std::optional<u32> delayed_pc;
 
     static constexpr std::size_t NumMacroRegisters = 8;
 
diff --git a/src/video_core/morton.cpp b/src/video_core/morton.cpp
index ab71870ab..2f2fe6859 100644
--- a/src/video_core/morton.cpp
+++ b/src/video_core/morton.cpp
@@ -93,6 +93,7 @@ static constexpr ConversionArray morton_to_linear_fns = {
     MortonCopy<true, PixelFormat::DXT23_SRGB>,
     MortonCopy<true, PixelFormat::DXT45_SRGB>,
     MortonCopy<true, PixelFormat::BC7U_SRGB>,
+    MortonCopy<true, PixelFormat::R4G4B4A4U>,
     MortonCopy<true, PixelFormat::ASTC_2D_4X4_SRGB>,
     MortonCopy<true, PixelFormat::ASTC_2D_8X8_SRGB>,
     MortonCopy<true, PixelFormat::ASTC_2D_8X5_SRGB>,
@@ -101,6 +102,17 @@ static constexpr ConversionArray morton_to_linear_fns = {
     MortonCopy<true, PixelFormat::ASTC_2D_5X5_SRGB>,
     MortonCopy<true, PixelFormat::ASTC_2D_10X8>,
     MortonCopy<true, PixelFormat::ASTC_2D_10X8_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_6X6>,
+    MortonCopy<true, PixelFormat::ASTC_2D_6X6_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_10X10>,
+    MortonCopy<true, PixelFormat::ASTC_2D_10X10_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_12X12>,
+    MortonCopy<true, PixelFormat::ASTC_2D_12X12_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X6>,
+    MortonCopy<true, PixelFormat::ASTC_2D_8X6_SRGB>,
+    MortonCopy<true, PixelFormat::ASTC_2D_6X5>,
+    MortonCopy<true, PixelFormat::ASTC_2D_6X5_SRGB>,
+    MortonCopy<true, PixelFormat::E5B9G9R9F>,
     MortonCopy<true, PixelFormat::Z32F>,
     MortonCopy<true, PixelFormat::Z16>,
     MortonCopy<true, PixelFormat::Z24S8>,
@@ -162,6 +174,7 @@ static constexpr ConversionArray linear_to_morton_fns = {
     MortonCopy<false, PixelFormat::DXT23_SRGB>,
     MortonCopy<false, PixelFormat::DXT45_SRGB>,
     MortonCopy<false, PixelFormat::BC7U_SRGB>,
+    MortonCopy<false, PixelFormat::R4G4B4A4U>,
     nullptr,
     nullptr,
     nullptr,
@@ -170,6 +183,17 @@ static constexpr ConversionArray linear_to_morton_fns = {
     nullptr,
     nullptr,
     nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    nullptr,
+    MortonCopy<false, PixelFormat::E5B9G9R9F>,
     MortonCopy<false, PixelFormat::Z32F>,
     MortonCopy<false, PixelFormat::Z16>,
     MortonCopy<false, PixelFormat::Z24S8>,
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
new file mode 100644
index 000000000..b230dcc18
--- /dev/null
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -0,0 +1,63 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <mutex>
+
+#include <boost/icl/interval_map.hpp>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/memory.h"
+#include "video_core/rasterizer_accelerated.h"
+
+namespace VideoCore {
+
+namespace {
+
+template <typename Map, typename Interval>
+constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
+    return boost::make_iterator_range(map.equal_range(interval));
+}
+
+} // Anonymous namespace
+
+RasterizerAccelerated::RasterizerAccelerated() = default;
+
+RasterizerAccelerated::~RasterizerAccelerated() = default;
+
+void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+    std::lock_guard lock{pages_mutex};
+    const u64 page_start{addr >> Memory::PAGE_BITS};
+    const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
+
+    // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
+    // subtract after iterating
+    const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end);
+    if (delta > 0) {
+        cached_pages.add({pages_interval, delta});
+    }
+
+    for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
+        const auto interval = pair.first & pages_interval;
+        const int count = pair.second;
+
+        const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
+        const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
+        const u64 interval_size = interval_end_addr - interval_start_addr;
+
+        if (delta > 0 && count == delta) {
+            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
+        } else if (delta < 0 && count == -delta) {
+            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
+        } else {
+            ASSERT(count >= 0);
+        }
+    }
+
+    if (delta < 0) {
+        cached_pages.add({pages_interval, delta});
+    }
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
new file mode 100644
index 000000000..8f7e3547e
--- /dev/null
+++ b/src/video_core/rasterizer_accelerated.h
@@ -0,0 +1,31 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <mutex>
+
+#include <boost/icl/interval_map.hpp>
+
+#include "common/common_types.h"
+#include "video_core/rasterizer_interface.h"
+
+namespace VideoCore {
+
+/// Implements the shared part in GPU accelerated rasterizers in RasterizerInterface.
+class RasterizerAccelerated : public RasterizerInterface {
+public:
+    explicit RasterizerAccelerated();
+    ~RasterizerAccelerated() override;
+
+    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
+
+private:
+    using CachedPageMap = boost::icl::interval_map<u64, int>;
+    CachedPageMap cached_pages;
+
+    std::mutex pages_mutex;
+};
+
+} // namespace VideoCore
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index f8a807c84..0375fca17 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -8,13 +8,17 @@
 
 #include "common/assert.h"
 #include "common/microprofile.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 
 namespace OpenGL {
 
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
 MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128));
 
 CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size)
@@ -26,11 +30,22 @@ CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t siz
 CachedBufferBlock::~CachedBufferBlock() = default;
 
 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
-                               std::size_t stream_size)
-    : VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>{
-          rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {}
+                               const Device& device, std::size_t stream_size)
+    : GenericBufferCache{rasterizer, system, std::make_unique<OGLStreamBuffer>(stream_size, true)} {
+    if (!device.HasFastBufferSubData()) {
+        return;
+    }
+
+    static constexpr auto size = static_cast<GLsizeiptr>(Maxwell::MaxConstBufferSize);
+    glCreateBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+    for (const GLuint cbuf : cbufs) {
+        glNamedBufferData(cbuf, size, nullptr, GL_STREAM_DRAW);
+    }
+}
 
-OGLBufferCache::~OGLBufferCache() = default;
+OGLBufferCache::~OGLBufferCache() {
+    glDeleteBuffers(static_cast<GLsizei>(std::size(cbufs)), std::data(cbufs));
+}
 
 Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) {
     return std::make_shared<CachedBufferBlock>(cache_addr, size);
@@ -69,4 +84,12 @@ void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t
                              static_cast<GLsizeiptr>(size));
 }
 
+OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
+                                                             std::size_t size) {
+    DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
+    const GLuint& cbuf = cbufs[cbuf_cursor++];
+    glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
+    return {&cbuf, 0};
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 022e7bfa9..8c7145443 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -4,10 +4,12 @@
 
 #pragma once
 
+#include <array>
 #include <memory>
 
 #include "common/common_types.h"
 #include "video_core/buffer_cache/buffer_cache.h"
+#include "video_core/engines/maxwell_3d.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_stream_buffer.h"
@@ -18,12 +20,14 @@ class System;
 
 namespace OpenGL {
 
+class Device;
 class OGLStreamBuffer;
 class RasterizerOpenGL;
 
 class CachedBufferBlock;
 
 using Buffer = std::shared_ptr<CachedBufferBlock>;
+using GenericBufferCache = VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer>;
 
 class CachedBufferBlock : public VideoCommon::BufferBlock {
 public:
@@ -38,14 +42,18 @@ private:
     OGLBuffer gl_buffer{};
 };
 
-class OGLBufferCache final : public VideoCommon::BufferCache<Buffer, GLuint, OGLStreamBuffer> {
+class OGLBufferCache final : public GenericBufferCache {
 public:
     explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system,
-                            std::size_t stream_size);
+                            const Device& device, std::size_t stream_size);
     ~OGLBufferCache();
 
     const GLuint* GetEmptyBuffer(std::size_t) override;
 
+    void Acquire() noexcept {
+        cbuf_cursor = 0;
+    }
+
 protected:
     Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override;
 
@@ -61,6 +69,14 @@ protected:
 
     void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
                    std::size_t dst_offset, std::size_t size) override;
+
+    BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) override;
+
+private:
+    std::size_t cbuf_cursor = 0;
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::MaxConstBuffers *
+                           Tegra::Engines::Maxwell3D::Regs::MaxShaderProgram>
+        cbufs;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 64de7e425..b30d5be74 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -51,19 +51,24 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
 } // Anonymous namespace
 
 Device::Device() {
+    const std::string_view vendor = reinterpret_cast<const char*>(glGetString(GL_VENDOR));
     const std::vector extensions = GetExtensions();
 
+    const bool is_nvidia = vendor == "NVIDIA Corporation";
+
     uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
     shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
     has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group &&
                           GLAD_GL_NV_shader_thread_shuffle;
+    has_shader_ballot = GLAD_GL_ARB_shader_ballot;
     has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array;
     has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted");
     has_variable_aoffi = TestVariableAoffi();
     has_component_indexing_bug = TestComponentIndexingBug();
     has_precise_bug = TestPreciseBug();
+    has_fast_buffer_sub_data = is_nvidia;
 
     LOG_INFO(Render_OpenGL, "Renderer_VariableAOFFI: {}", has_variable_aoffi);
     LOG_INFO(Render_OpenGL, "Renderer_ComponentIndexingBug: {}", has_component_indexing_bug);
@@ -75,6 +80,7 @@ Device::Device(std::nullptr_t) {
     max_vertex_attributes = 16;
     max_varyings = 15;
     has_warp_intrinsics = true;
+    has_shader_ballot = true;
     has_vertex_viewport_layer = true;
     has_image_load_formatted = true;
     has_variable_aoffi = true;
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index bb273c3d6..6c86fe207 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -34,6 +34,10 @@ public:
         return has_warp_intrinsics;
     }
 
+    bool HasShaderBallot() const {
+        return has_shader_ballot;
+    }
+
     bool HasVertexViewportLayer() const {
         return has_vertex_viewport_layer;
     }
@@ -54,6 +58,10 @@ public:
         return has_precise_bug;
     }
 
+    bool HasFastBufferSubData() const {
+        return has_fast_buffer_sub_data;
+    }
+
 private:
     static bool TestVariableAoffi();
     static bool TestComponentIndexingBug();
@@ -64,11 +72,13 @@ private:
     u32 max_vertex_attributes{};
     u32 max_varyings{};
     bool has_warp_intrinsics{};
+    bool has_shader_ballot{};
     bool has_vertex_viewport_layer{};
     bool has_image_load_formatted{};
     bool has_variable_aoffi{};
     bool has_component_indexing_bug{};
     bool has_precise_bug{};
+    bool has_fast_buffer_sub_data{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 6a17bed72..05f8e511b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -67,9 +67,7 @@ static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buf
 RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
                                    ScreenInfo& info)
     : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device},
-      system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} {
-    OpenGLState::ApplyDefaultState();
-
+      system{system}, screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
     shader_program_manager = std::make_unique<GLShader::ProgramManager>();
     state.draw.shader_program = 0;
     state.Apply();
@@ -259,10 +257,8 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
             continue;
         }
 
-        const std::size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
-
         GLShader::MaxwellUniformData ubo{};
-        ubo.SetFromRegs(gpu, stage);
+        ubo.SetFromRegs(gpu);
         const auto [buffer, offset] =
             buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
 
@@ -271,10 +267,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
 
         Shader shader{shader_cache.GetStageProgram(program)};
 
-        const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
-        SetupDrawConstBuffers(stage_enum, shader);
-        SetupDrawGlobalMemory(stage_enum, shader);
-        const auto texture_buffer_usage{SetupDrawTextures(stage_enum, shader, base_bindings)};
+        // Stage indices are 0 - 5
+        const auto stage = static_cast<Maxwell::ShaderStage>(index == 0 ? 0 : index - 1);
+        SetupDrawConstBuffers(stage, shader);
+        SetupDrawGlobalMemory(stage, shader);
+        const auto texture_buffer_usage{SetupDrawTextures(stage, shader, base_bindings)};
 
         const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage};
         const auto [program_handle, next_bindings] = shader->GetProgramHandle(variant);
@@ -342,41 +339,6 @@ std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const {
            static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
 }
 
-template <typename Map, typename Interval>
-static constexpr auto RangeFromInterval(Map& map, const Interval& interval) {
-    return boost::make_iterator_range(map.equal_range(interval));
-}
-
-void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
-    const u64 page_start{addr >> Memory::PAGE_BITS};
-    const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS};
-
-    // Interval maps will erase segments if count reaches 0, so if delta is negative we have to
-    // subtract after iterating
-    const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end);
-    if (delta > 0)
-        cached_pages.add({pages_interval, delta});
-
-    for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) {
-        const auto interval = pair.first & pages_interval;
-        const int count = pair.second;
-
-        const VAddr interval_start_addr = boost::icl::first(interval) << Memory::PAGE_BITS;
-        const VAddr interval_end_addr = boost::icl::last_next(interval) << Memory::PAGE_BITS;
-        const u64 interval_size = interval_end_addr - interval_start_addr;
-
-        if (delta > 0 && count == delta)
-            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, true);
-        else if (delta < 0 && count == -delta)
-            Memory::RasterizerMarkRegionCached(interval_start_addr, interval_size, false);
-        else
-            ASSERT(count >= 0);
-    }
-
-    if (delta < 0)
-        cached_pages.add({pages_interval, delta});
-}
-
 void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
                                          const VideoCore::DiskResourceLoadCallback& callback) {
     shader_cache.LoadDiskCache(stop_loading, callback);
@@ -412,7 +374,7 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
         fbkey.color_attachments[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
         fbkey.colors[index] = std::move(color_surface);
     }
-    fbkey.colors_count = regs.rt_control.count;
+    fbkey.colors_count = static_cast<u16>(regs.rt_control.count);
 
     if (depth_surface) {
         // Assume that a surface will be written to if it is used as a framebuffer, even if
@@ -595,6 +557,8 @@ void RasterizerOpenGL::DrawPrelude() {
     SyncPolygonOffset();
     SyncAlphaTest();
 
+    buffer_cache.Acquire();
+
     // Draw the vertex batch
     const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
 
@@ -916,7 +880,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
     const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4));
 
     const auto alignment = device.GetUniformBufferAlignment();
-    const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
+    const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
+                                                          device.HasFastBufferSubData());
     bind_ubo_pushbuffer.Push(cbuf, offset, size);
 }
 
@@ -968,14 +933,14 @@ TextureBufferUsage RasterizerOpenGL::SetupDrawTextures(Maxwell::ShaderStage stag
 
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& entry = entries[bindpoint];
-        const auto texture = [&]() {
+        const auto texture = [&] {
             if (!entry.IsBindless()) {
                 return maxwell3d.GetStageTexture(stage, entry.GetOffset());
             }
-            const auto cbuf = entry.GetBindlessCBuf();
-            Tegra::Texture::TextureHandle tex_handle;
-            tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
-            return maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
+            const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage);
+            const Tegra::Texture::TextureHandle tex_handle =
+                maxwell3d.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
+            return maxwell3d.GetTextureInfo(tex_handle);
         }();
 
         if (SetupTexture(base_bindings.sampler + bindpoint, texture, entry)) {
@@ -998,14 +963,13 @@ TextureBufferUsage RasterizerOpenGL::SetupComputeTextures(const Shader& kernel)
 
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& entry = entries[bindpoint];
-        const auto texture = [&]() {
+        const auto texture = [&] {
             if (!entry.IsBindless()) {
                 return compute.GetTexture(entry.GetOffset());
             }
-            const auto cbuf = entry.GetBindlessCBuf();
-            Tegra::Texture::TextureHandle tex_handle;
-            tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
-            return compute.GetTextureInfo(tex_handle, entry.GetOffset());
+            const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
+                Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
+            return compute.GetTextureInfo(tex_handle);
         }();
 
         if (SetupTexture(bindpoint, texture, entry)) {
@@ -1043,14 +1007,13 @@ void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
     const auto& entries = shader->GetShaderEntries().images;
     for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
         const auto& entry = entries[bindpoint];
-        const auto tic = [&]() {
+        const auto tic = [&] {
             if (!entry.IsBindless()) {
                 return compute.GetTexture(entry.GetOffset()).tic;
             }
-            const auto cbuf = entry.GetBindlessCBuf();
-            Tegra::Texture::TextureHandle tex_handle;
-            tex_handle.raw = compute.AccessConstBuffer32(cbuf.first, cbuf.second);
-            return compute.GetTextureInfo(tex_handle, entry.GetOffset()).tic;
+            const Tegra::Texture::TextureHandle tex_handle = compute.AccessConstBuffer32(
+                Tegra::Engines::ShaderType::Compute, entry.GetBuffer(), entry.GetOffset());
+            return compute.GetTextureInfo(tex_handle).tic;
         }();
         SetupImage(bindpoint, tic, entry);
     }
@@ -1091,6 +1054,15 @@ void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
     }
     state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
     state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
+
+    bool flip_y = false;
+    if (regs.viewport_transform[0].scale_y < 0.0) {
+        flip_y = !flip_y;
+    }
+    if (regs.screen_y_control.y_negate != 0) {
+        flip_y = !flip_y;
+    }
+    state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
 }
 
 void RasterizerOpenGL::SyncClipEnabled(
@@ -1113,28 +1085,14 @@ void RasterizerOpenGL::SyncClipCoef() {
 }
 
 void RasterizerOpenGL::SyncCullMode() {
-    auto& maxwell3d = system.GPU().Maxwell3D();
-
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     state.cull.enabled = regs.cull.enabled != 0;
     if (state.cull.enabled) {
-        state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
         state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
-
-        const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
-                                  regs.viewport_transform[0].scale_y < 0.0f};
-
-        // If the GPU is configured to flip the rasterized triangles, then we need to flip the
-        // notion of front and back. Note: We flip the triangles when the value of the register is 0
-        // because OpenGL already does it for us.
-        if (flip_triangles) {
-            if (state.cull.front_face == GL_CCW)
-                state.cull.front_face = GL_CW;
-            else if (state.cull.front_face == GL_CW)
-                state.cull.front_face = GL_CCW;
-        }
     }
+
+    state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
 }
 
 void RasterizerOpenGL::SyncPrimitiveRestart() {
@@ -1340,7 +1298,9 @@ void RasterizerOpenGL::SyncPolygonOffset() {
     state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
     state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
     state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
-    state.polygon_offset.units = regs.polygon_offset_units;
+
+    // Hardware divides polygon offset units by two
+    state.polygon_offset.units = regs.polygon_offset_units / 2.0f;
     state.polygon_offset.factor = regs.polygon_offset_factor;
     state.polygon_offset.clamp = regs.polygon_offset_clamp;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 9c10ebda3..bd6fe5c3a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -13,12 +13,12 @@
 #include <tuple>
 #include <utility>
 
-#include <boost/icl/interval_map.hpp>
 #include <glad/glad.h>
 
 #include "common/common_types.h"
 #include "video_core/engines/const_buffer_info.h"
 #include "video_core/engines/maxwell_3d.h"
+#include "video_core/rasterizer_accelerated.h"
 #include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
@@ -51,7 +51,7 @@ namespace OpenGL {
 struct ScreenInfo;
 struct DrawParameters;
 
-class RasterizerOpenGL : public VideoCore::RasterizerInterface {
+class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
 public:
     explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
                               ScreenInfo& info);
@@ -72,7 +72,6 @@ public:
                                const Tegra::Engines::Fermi2D::Config& copy_config) override;
     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
                            u32 pixel_stride) override;
-    void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
     void LoadDiskResources(const std::atomic_bool& stop_loading,
                            const VideoCore::DiskResourceLoadCallback& callback) override;
 
@@ -227,9 +226,6 @@ private:
     AccelDraw accelerate_draw = AccelDraw::Disabled;
 
     OGLFramebuffer clear_framebuffer;
-
-    using CachedPageMap = boost::icl::interval_map<u64, int>;
-    CachedPageMap cached_pages;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 42ca3b1bd..04a239a39 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -3,13 +3,16 @@
 // Refer to the license.txt file included.
 
 #include <mutex>
+#include <optional>
+#include <string>
 #include <thread>
+#include <unordered_set>
 #include <boost/functional/hash.hpp>
 #include "common/assert.h"
-#include "common/hash.h"
 #include "common/scope_exit.h"
 #include "core/core.h"
 #include "core/frontend/emu_window.h"
+#include "video_core/engines/kepler_compute.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
@@ -21,18 +24,20 @@
 
 namespace OpenGL {
 
+using Tegra::Engines::ShaderType;
+using VideoCommon::Shader::ConstBufferLocker;
 using VideoCommon::Shader::ProgramCode;
+using VideoCommon::Shader::ShaderIR;
+
+namespace {
 
 // One UBO is always reserved for emulation values on staged shaders
 constexpr u32 STAGE_RESERVED_UBOS = 1;
 
-struct UnspecializedShader {
-    std::string code;
-    GLShader::ShaderEntries entries;
-    ProgramType program_type;
-};
+constexpr u32 STAGE_MAIN_OFFSET = 10;
+constexpr u32 KERNEL_MAIN_OFFSET = 0;
 
-namespace {
+constexpr VideoCommon::Shader::CompilerSettings COMPILER_SETTINGS{};
 
 /// Gets the address for the specified shader stage program
 GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
@@ -41,6 +46,39 @@ GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program)
     return gpu.regs.code_address.CodeAddress() + shader_config.offset;
 }
 
+/// Gets if the current instruction offset is a scheduler instruction
+constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // Sched instructions appear once every 4 instructions.
+    constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+/// Calculates the size of a program stream
+std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+    constexpr std::size_t start_offset = 10;
+    // This is the encoded version of BRA that jumps to itself. All Nvidia
+    // shaders end with one.
+    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
+    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
+    std::size_t offset = start_offset;
+    while (offset < program.size()) {
+        const u64 instruction = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if ((instruction & mask) == self_jumping_branch) {
+                // End on Maxwell's "nop" instruction
+                break;
+            }
+            if (instruction == 0) {
+                break;
+            }
+        }
+        offset++;
+    }
+    // The last instruction is included in the program size
+    return std::min(offset + 1, program.size());
+}
+
 /// Gets the shader program code from memory for the specified address
 ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr gpu_addr,
                           const u8* host_ptr) {
@@ -51,6 +89,7 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g
     });
     memory_manager.ReadBlockUnsafe(gpu_addr, program_code.data(),
                                    program_code.size() * sizeof(u64));
+    program_code.resize(CalculateProgramSize(program_code));
     return program_code;
 }
 
@@ -71,14 +110,6 @@ constexpr GLenum GetShaderType(ProgramType program_type) {
     }
 }
 
-/// Gets if the current instruction offset is a scheduler instruction
-constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
-    // Sched instructions appear once every 4 instructions.
-    constexpr std::size_t SchedPeriod = 4;
-    const std::size_t absolute_offset = offset - main_offset;
-    return (absolute_offset % SchedPeriod) == 0;
-}
-
 /// Describes primitive behavior on geometry shaders
 constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
     switch (primitive_mode) {
@@ -121,110 +152,151 @@ ProgramType GetProgramType(Maxwell::ShaderProgram program) {
     return {};
 }
 
-/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
-    constexpr std::size_t start_offset = 10;
-    // This is the encoded version of BRA that jumps to itself. All Nvidia
-    // shaders end with one.
-    constexpr u64 self_jumping_branch = 0xE2400FFFFF07000FULL;
-    constexpr u64 mask = 0xFFFFFFFFFF7FFFFFULL;
-    std::size_t offset = start_offset;
-    std::size_t size = start_offset * sizeof(u64);
-    while (offset < program.size()) {
-        const u64 instruction = program[offset];
-        if (!IsSchedInstruction(offset, start_offset)) {
-            if ((instruction & mask) == self_jumping_branch) {
-                // End on Maxwell's "nop" instruction
-                break;
-            }
-            if (instruction == 0) {
-                break;
-            }
-        }
-        size += sizeof(u64);
-        offset++;
-    }
-    // The last instruction is included in the program size
-    return std::min(size + sizeof(u64), program.size() * sizeof(u64));
-}
-
 /// Hashes one (or two) program streams
 u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code,
-                        const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) {
-    if (size_a == 0) {
-        size_a = CalculateProgramSize(code);
-    }
-    u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a);
-    if (program_type != ProgramType::VertexA) {
-        return unique_identifier;
-    }
-    // VertexA programs include two programs
-
-    std::size_t seed = 0;
-    boost::hash_combine(seed, unique_identifier);
-
-    if (size_b == 0) {
-        size_b = CalculateProgramSize(code_b);
+                        const ProgramCode& code_b) {
+    u64 unique_identifier = boost::hash_value(code);
+    if (program_type == ProgramType::VertexA) {
+        // VertexA programs include two programs
+        boost::hash_combine(unique_identifier, boost::hash_value(code_b));
     }
-    const u64 identifier_b =
-        Common::CityHash64(reinterpret_cast<const char*>(code_b.data()), size_b);
-    boost::hash_combine(seed, identifier_b);
-    return static_cast<u64>(seed);
+    return unique_identifier;
 }
 
 /// Creates an unspecialized program from code streams
-GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type,
-                                      ProgramCode program_code, ProgramCode program_code_b) {
-    GLShader::ShaderSetup setup(program_code);
-    setup.program.size_a = CalculateProgramSize(program_code);
-    setup.program.size_b = 0;
-    if (program_type == ProgramType::VertexA) {
-        // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
-        // Conventional HW does not support this, so we combine VertexA and VertexB into one
-        // stage here.
-        setup.SetProgramB(program_code_b);
-        setup.program.size_b = CalculateProgramSize(program_code_b);
-    }
-    setup.program.unique_identifier = GetUniqueIdentifier(
-        program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b);
-
+std::string GenerateGLSL(const Device& device, ProgramType program_type, const ShaderIR& ir,
+                         const std::optional<ShaderIR>& ir_b) {
     switch (program_type) {
     case ProgramType::VertexA:
     case ProgramType::VertexB:
-        return GLShader::GenerateVertexShader(device, setup);
+        return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
     case ProgramType::Geometry:
-        return GLShader::GenerateGeometryShader(device, setup);
+        return GLShader::GenerateGeometryShader(device, ir);
     case ProgramType::Fragment:
-        return GLShader::GenerateFragmentShader(device, setup);
+        return GLShader::GenerateFragmentShader(device, ir);
     case ProgramType::Compute:
-        return GLShader::GenerateComputeShader(device, setup);
+        return GLShader::GenerateComputeShader(device, ir);
     default:
         UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type));
         return {};
     }
 }
 
-CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries,
-                               ProgramType program_type, const ProgramVariant& variant,
-                               bool hint_retrievable = false) {
+constexpr const char* GetProgramTypeName(ProgramType program_type) {
+    switch (program_type) {
+    case ProgramType::VertexA:
+    case ProgramType::VertexB:
+        return "VS";
+    case ProgramType::TessellationControl:
+        return "TCS";
+    case ProgramType::TessellationEval:
+        return "TES";
+    case ProgramType::Geometry:
+        return "GS";
+    case ProgramType::Fragment:
+        return "FS";
+    case ProgramType::Compute:
+        return "CS";
+    }
+    return "UNK";
+}
+
+Tegra::Engines::ShaderType GetEnginesShaderType(ProgramType program_type) {
+    switch (program_type) {
+    case ProgramType::VertexA:
+    case ProgramType::VertexB:
+        return Tegra::Engines::ShaderType::Vertex;
+    case ProgramType::TessellationControl:
+        return Tegra::Engines::ShaderType::TesselationControl;
+    case ProgramType::TessellationEval:
+        return Tegra::Engines::ShaderType::TesselationEval;
+    case ProgramType::Geometry:
+        return Tegra::Engines::ShaderType::Geometry;
+    case ProgramType::Fragment:
+        return Tegra::Engines::ShaderType::Fragment;
+    case ProgramType::Compute:
+        return Tegra::Engines::ShaderType::Compute;
+    }
+    UNREACHABLE();
+    return {};
+}
+
+std::string GetShaderId(u64 unique_identifier, ProgramType program_type) {
+    return fmt::format("{}{:016X}", GetProgramTypeName(program_type), unique_identifier);
+}
+
+Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(
+    Core::System& system, ProgramType program_type) {
+    if (program_type == ProgramType::Compute) {
+        return system.GPU().KeplerCompute();
+    } else {
+        return system.GPU().Maxwell3D();
+    }
+}
+
+std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ProgramType program_type) {
+    return std::make_unique<ConstBufferLocker>(GetEnginesShaderType(program_type),
+                                               GetConstBufferEngineInterface(system, program_type));
+}
+
+void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
+    for (const auto& key : usage.keys) {
+        const auto [buffer, offset] = key.first;
+        locker.InsertKey(buffer, offset, key.second);
+    }
+    for (const auto& [offset, sampler] : usage.bound_samplers) {
+        locker.InsertBoundSampler(offset, sampler);
+    }
+    for (const auto& [key, sampler] : usage.bindless_samplers) {
+        const auto [buffer, offset] = key;
+        locker.InsertBindlessSampler(buffer, offset, sampler);
+    }
+}
+
+CachedProgram BuildShader(const Device& device, u64 unique_identifier, ProgramType program_type,
+                          const ProgramCode& program_code, const ProgramCode& program_code_b,
+                          const ProgramVariant& variant, ConstBufferLocker& locker,
+                          bool hint_retrievable = false) {
+    LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, program_type));
+
+    const bool is_compute = program_type == ProgramType::Compute;
+    const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+    const ShaderIR ir(program_code, main_offset, COMPILER_SETTINGS, locker);
+    std::optional<ShaderIR> ir_b;
+    if (!program_code_b.empty()) {
+        ir_b.emplace(program_code_b, main_offset, COMPILER_SETTINGS, locker);
+    }
+    const auto entries = GLShader::GetEntries(ir);
+
     auto base_bindings{variant.base_bindings};
     const auto primitive_mode{variant.primitive_mode};
     const auto texture_buffer_usage{variant.texture_buffer_usage};
 
-    std::string source = R"(#version 430 core
+    std::string source = fmt::format(R"(// {}
+#version 430 core
 #extension GL_ARB_separate_shader_objects : enable
-#extension GL_ARB_shader_viewport_layer_array : enable
-#extension GL_EXT_shader_image_load_formatted : enable
-#extension GL_NV_gpu_shader5 : enable
-#extension GL_NV_shader_thread_group : enable
-#extension GL_NV_shader_thread_shuffle : enable
-)";
-    if (program_type == ProgramType::Compute) {
+)",
+                                     GetShaderId(unique_identifier, program_type));
+    if (is_compute) {
         source += "#extension GL_ARB_compute_variable_group_size : require\n";
     }
+    if (device.HasShaderBallot()) {
+        source += "#extension GL_ARB_shader_ballot : require\n";
+    }
+    if (device.HasVertexViewportLayer()) {
+        source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
+    }
+    if (device.HasImageLoadFormatted()) {
+        source += "#extension GL_EXT_shader_image_load_formatted : require\n";
+    }
+    if (device.HasWarpIntrinsics()) {
+        source += "#extension GL_NV_gpu_shader5 : require\n"
+                  "#extension GL_NV_shader_thread_group : require\n"
+                  "#extension GL_NV_shader_thread_shuffle : require\n";
+    }
     source += '\n';
 
-    if (program_type != ProgramType::Compute) {
+    if (!is_compute) {
         source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
     }
 
@@ -268,7 +340,7 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
     }
 
     source += '\n';
-    source += code;
+    source += GenerateGLSL(device, program_type, ir, ir_b);
 
     OGLShader shader;
     shader.Create(source.c_str(), GetShaderType(program_type));
@@ -278,85 +350,99 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn
     return program;
 }
 
-std::set<GLenum> GetSupportedFormats() {
-    std::set<GLenum> supported_formats;
-
+std::unordered_set<GLenum> GetSupportedFormats() {
     GLint num_formats{};
     glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
 
     std::vector<GLint> formats(num_formats);
     glGetIntegerv(GL_PROGRAM_BINARY_FORMATS, formats.data());
 
-    for (const GLint format : formats)
+    std::unordered_set<GLenum> supported_formats;
+    for (const GLint format : formats) {
         supported_formats.insert(static_cast<GLenum>(format));
+    }
     return supported_formats;
 }
 
 } // Anonymous namespace
 
 CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type,
-                           GLShader::ProgramResult result)
-    : RasterizerCacheObject{params.host_ptr}, cpu_addr{params.cpu_addr},
-      unique_identifier{params.unique_identifier}, program_type{program_type},
-      disk_cache{params.disk_cache}, precompiled_programs{params.precompiled_programs},
-      entries{result.second}, code{std::move(result.first)}, shader_length{entries.shader_length} {}
+                           GLShader::ShaderEntries entries, ProgramCode program_code,
+                           ProgramCode program_code_b)
+    : RasterizerCacheObject{params.host_ptr}, system{params.system},
+      disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
+      unique_identifier{params.unique_identifier}, program_type{program_type}, entries{entries},
+      program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {
+    if (!params.precompiled_variants) {
+        return;
+    }
+    for (const auto& pair : *params.precompiled_variants) {
+        auto locker = MakeLocker(system, program_type);
+        const auto& usage = pair->first;
+        FillLocker(*locker, usage);
+
+        std::unique_ptr<LockerVariant>* locker_variant = nullptr;
+        const auto it =
+            std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
+                return variant->locker->HasEqualKeys(*locker);
+            });
+        if (it == locker_variants.end()) {
+            locker_variant = &locker_variants.emplace_back();
+            *locker_variant = std::make_unique<LockerVariant>();
+            locker_variant->get()->locker = std::move(locker);
+        } else {
+            locker_variant = &*it;
+        }
+        locker_variant->get()->programs.emplace(usage.variant, pair->second);
+    }
+}
 
 Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
                                            Maxwell::ShaderProgram program_type,
-                                           ProgramCode&& program_code,
-                                           ProgramCode&& program_code_b) {
-    const auto code_size{CalculateProgramSize(program_code)};
-    const auto code_size_b{CalculateProgramSize(program_code_b)};
-    auto result{
-        CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)};
-    if (result.first.empty()) {
-        // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
-        return {};
-    }
-
+                                           ProgramCode program_code, ProgramCode program_code_b) {
     params.disk_cache.SaveRaw(ShaderDiskCacheRaw(
-        params.unique_identifier, GetProgramType(program_type),
-        static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)),
-        std::move(program_code), std::move(program_code_b)));
-
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params, GetProgramType(program_type), std::move(result)));
-}
-
-Shader CachedShader::CreateStageFromCache(const ShaderParameters& params,
-                                          Maxwell::ShaderProgram program_type,
-                                          GLShader::ProgramResult result) {
+        params.unique_identifier, GetProgramType(program_type), program_code, program_code_b));
+
+    ConstBufferLocker locker(GetEnginesShaderType(GetProgramType(program_type)),
+                             params.system.GPU().Maxwell3D());
+    const ShaderIR ir(program_code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
+    // TODO(Rodrigo): Handle VertexA shaders
+    // std::optional<ShaderIR> ir_b;
+    // if (!program_code_b.empty()) {
+    //     ir_b.emplace(program_code_b, STAGE_MAIN_OFFSET);
+    // }
     return std::shared_ptr<CachedShader>(
-        new CachedShader(params, GetProgramType(program_type), std::move(result)));
+        new CachedShader(params, GetProgramType(program_type), GLShader::GetEntries(ir),
+                         std::move(program_code), std::move(program_code_b)));
 }
 
-Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) {
-    auto result{CreateProgram(params.device, ProgramType::Compute, code, {})};
+Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
+    params.disk_cache.SaveRaw(
+        ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, code));
 
-    const auto code_size{CalculateProgramSize(code)};
-    params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute,
-                                                 static_cast<u32>(code_size / sizeof(u64)), 0,
-                                                 std::move(code), {}));
-
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params, ProgramType::Compute, std::move(result)));
+    ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute,
+                             params.system.GPU().KeplerCompute());
+    const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
+    return std::shared_ptr<CachedShader>(new CachedShader(
+        params, ProgramType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
 }
 
-Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params,
-                                           GLShader::ProgramResult result) {
-    return std::shared_ptr<CachedShader>(
-        new CachedShader(params, ProgramType::Compute, std::move(result)));
+Shader CachedShader::CreateFromCache(const ShaderParameters& params,
+                                     const UnspecializedShader& unspecialized) {
+    return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.program_type,
+                                                          unspecialized.entries, unspecialized.code,
+                                                          unspecialized.code_b));
 }
 
 std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) {
-    const auto [entry, is_cache_miss] = programs.try_emplace(variant);
+    UpdateVariant();
+
+    const auto [entry, is_cache_miss] = curr_variant->programs.try_emplace(variant);
     auto& program = entry->second;
     if (is_cache_miss) {
-        program = TryLoadProgram(variant);
-        if (!program) {
-            program = SpecializeShader(code, entries, program_type, variant);
-            disk_cache.SaveUsage(GetUsage(variant));
-        }
+        program = BuildShader(device, unique_identifier, program_type, program_code, program_code_b,
+                              variant, *curr_variant->locker);
+        disk_cache.SaveUsage(GetUsage(variant, *curr_variant->locker));
 
         LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
     }
@@ -372,18 +458,33 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar
     return {program->handle, base_bindings};
 }
 
-CachedProgram CachedShader::TryLoadProgram(const ProgramVariant& variant) const {
-    const auto found = precompiled_programs.find(GetUsage(variant));
-    if (found == precompiled_programs.end()) {
-        return {};
+void CachedShader::UpdateVariant() {
+    if (curr_variant && !curr_variant->locker->IsConsistent()) {
+        curr_variant = nullptr;
+    }
+    if (!curr_variant) {
+        for (auto& variant : locker_variants) {
+            if (variant->locker->IsConsistent()) {
+                curr_variant = variant.get();
+            }
+        }
+    }
+    if (!curr_variant) {
+        auto& new_variant = locker_variants.emplace_back();
+        new_variant = std::make_unique<LockerVariant>();
+        new_variant->locker = MakeLocker(system, program_type);
+        curr_variant = new_variant.get();
     }
-    return found->second;
 }
 
-ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant) const {
+ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
+                                            const ConstBufferLocker& locker) const {
     ShaderDiskCacheUsage usage;
     usage.unique_identifier = unique_identifier;
     usage.variant = variant;
+    usage.keys = locker.GetKeys();
+    usage.bound_samplers = locker.GetBoundSamplers();
+    usage.bindless_samplers = locker.GetBindlessSamplers();
     return usage;
 }
 
@@ -399,18 +500,15 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
         return;
     }
     const auto [raws, shader_usages] = *transferable;
-
-    auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
-
-    const auto supported_formats{GetSupportedFormats()};
-    const auto unspecialized_shaders{
-        GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
-    if (stop_loading) {
+    if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
         return;
     }
 
-    // Track if precompiled cache was altered during loading to know if we have to serialize the
-    // virtual precompiled cache file back to the hard drive
+    const auto dumps = disk_cache.LoadPrecompiled();
+    const auto supported_formats = GetSupportedFormats();
+
+    // Track if precompiled cache was altered during loading to know if we have to
+    // serialize the virtual precompiled cache file back to the hard drive
     bool precompiled_cache_altered = false;
 
     // Inform the frontend about shader build initialization
@@ -433,9 +531,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                 return;
             }
             const auto& usage{shader_usages[i]};
-            LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
-                     usage.unique_identifier, i, shader_usages.size());
-
             const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
             const auto dump{dumps.find(usage)};
 
@@ -449,21 +544,28 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                 }
             }
             if (!shader) {
-                shader = SpecializeShader(unspecialized.code, unspecialized.entries,
-                                          unspecialized.program_type, usage.variant, true);
+                auto locker{MakeLocker(system, unspecialized.program_type)};
+                FillLocker(*locker, usage);
+                shader = BuildShader(device, usage.unique_identifier, unspecialized.program_type,
+                                     unspecialized.code, unspecialized.code_b, usage.variant,
+                                     *locker, true);
             }
 
-            std::scoped_lock lock(mutex);
+            std::scoped_lock lock{mutex};
             if (callback) {
                 callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
                          shader_usages.size());
             }
 
             precompiled_programs.emplace(usage, std::move(shader));
+
+            // TODO(Rodrigo): Is there a better way to do this?
+            precompiled_variants[usage.unique_identifier].push_back(
+                precompiled_programs.find(usage));
         }
     };
 
-    const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)};
+    const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
     const std::size_t bucket_size{shader_usages.size() / num_workers};
     std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
     std::vector<std::thread> threads(num_workers);
@@ -483,7 +585,6 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
     if (compilation_failed) {
         // Invalidate the precompiled cache if a shader dumped shader was rejected
         disk_cache.InvalidatePrecompiled();
-        dumps.clear();
         precompiled_cache_altered = true;
         return;
     }
@@ -491,8 +592,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
         return;
     }
 
-    // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
-    // precompiling them
+    // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
+    // before precompiling them
 
     for (std::size_t i = 0; i < shader_usages.size(); ++i) {
         const auto& usage{shader_usages[i]};
@@ -508,9 +609,13 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
     }
 }
 
-CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
-    const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats) {
+const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
+    const auto it = precompiled_variants.find(unique_identifier);
+    return it == precompiled_variants.end() ? nullptr : &it->second;
+}
 
+CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
+    const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
     if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
         LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
         return {};
@@ -532,56 +637,52 @@ CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
     return shader;
 }
 
-std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecializedShaders(
+bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
     const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
-    const std::vector<ShaderDiskCacheRaw>& raws,
-    const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled) {
-    std::unordered_map<u64, UnspecializedShader> unspecialized;
-
+    const std::vector<ShaderDiskCacheRaw>& raws) {
     if (callback) {
         callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
     }
 
     for (std::size_t i = 0; i < raws.size(); ++i) {
         if (stop_loading) {
-            return {};
+            return false;
         }
         const auto& raw{raws[i]};
         const u64 unique_identifier{raw.GetUniqueIdentifier()};
         const u64 calculated_hash{
             GetUniqueIdentifier(raw.GetProgramType(), raw.GetProgramCode(), raw.GetProgramCodeB())};
         if (unique_identifier != calculated_hash) {
-            LOG_ERROR(
-                Render_OpenGL,
-                "Invalid hash in entry={:016x} (obtained hash={:016x}) - removing shader cache",
-                raw.GetUniqueIdentifier(), calculated_hash);
+            LOG_ERROR(Render_OpenGL,
+                      "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
+                      "removing shader cache",
+                      raw.GetUniqueIdentifier(), calculated_hash);
             disk_cache.InvalidateTransferable();
-            return {};
+            return false;
         }
 
-        GLShader::ProgramResult result;
-        if (const auto it = decompiled.find(unique_identifier); it != decompiled.end()) {
-            // If it's stored in the precompiled file, avoid decompiling it here
-            const auto& stored_decompiled{it->second};
-            result = {stored_decompiled.code, stored_decompiled.entries};
-        } else {
-            // Otherwise decompile the shader at boot and save the result to the decompiled file
-            result = CreateProgram(device, raw.GetProgramType(), raw.GetProgramCode(),
-                                   raw.GetProgramCodeB());
-            disk_cache.SaveDecompiled(unique_identifier, result.first, result.second);
-        }
-
-        precompiled_shaders.insert({unique_identifier, result});
-
-        unspecialized.insert(
-            {raw.GetUniqueIdentifier(),
-             {std::move(result.first), std::move(result.second), raw.GetProgramType()}});
+        const u32 main_offset =
+            raw.GetProgramType() == ProgramType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+        ConstBufferLocker locker(GetEnginesShaderType(raw.GetProgramType()));
+        const ShaderIR ir(raw.GetProgramCode(), main_offset, COMPILER_SETTINGS, locker);
+        // TODO(Rodrigo): Handle VertexA shaders
+        // std::optional<ShaderIR> ir_b;
+        // if (raw.HasProgramA()) {
+        //     ir_b.emplace(raw.GetProgramCodeB(), main_offset);
+        // }
+
+        UnspecializedShader unspecialized;
+        unspecialized.entries = GLShader::GetEntries(ir);
+        unspecialized.program_type = raw.GetProgramType();
+        unspecialized.code = raw.GetProgramCode();
+        unspecialized.code_b = raw.GetProgramCodeB();
+        unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
 
         if (callback) {
             callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
         }
     }
-    return unspecialized;
+    return true;
 }
 
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
@@ -590,37 +691,35 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
     }
 
     auto& memory_manager{system.GPU().MemoryManager()};
-    const GPUVAddr program_addr{GetShaderAddress(system, program)};
+    const GPUVAddr address{GetShaderAddress(system, program)};
 
     // Look up shader in the cache based on address
-    const auto host_ptr{memory_manager.GetPointer(program_addr)};
+    const auto host_ptr{memory_manager.GetPointer(address)};
     Shader shader{TryGet(host_ptr)};
     if (shader) {
         return last_shaders[static_cast<std::size_t>(program)] = shader;
     }
 
     // No shader found - create a new one
-    ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
-    ProgramCode program_code_b;
-    const bool is_program_a{program == Maxwell::ShaderProgram::VertexA};
-    if (is_program_a) {
-        const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
-        program_code_b = GetShaderCode(memory_manager, program_addr_b,
-                                       memory_manager.GetPointer(program_addr_b));
-    }
-
-    const auto unique_identifier =
-        GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b);
-    const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
-    const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
-                                  host_ptr,   unique_identifier};
-
-    const auto found = precompiled_shaders.find(unique_identifier);
-    if (found == precompiled_shaders.end()) {
-        shader = CachedShader::CreateStageFromMemory(params, program, std::move(program_code),
-                                                     std::move(program_code_b));
+    ProgramCode code{GetShaderCode(memory_manager, address, host_ptr)};
+    ProgramCode code_b;
+    if (program == Maxwell::ShaderProgram::VertexA) {
+        const GPUVAddr address_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
+        code_b = GetShaderCode(memory_manager, address_b, memory_manager.GetPointer(address_b));
+    }
+
+    const auto unique_identifier = GetUniqueIdentifier(GetProgramType(program), code, code_b);
+    const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
+    const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
+    const ShaderParameters params{system,   disk_cache, precompiled_variants, device,
+                                  cpu_addr, host_ptr,   unique_identifier};
+
+    const auto found = unspecialized_shaders.find(unique_identifier);
+    if (found == unspecialized_shaders.end()) {
+        shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
+                                                     std::move(code_b));
     } else {
-        shader = CachedShader::CreateStageFromCache(params, program, found->second);
+        shader = CachedShader::CreateFromCache(params, found->second);
     }
     Register(shader);
 
@@ -638,15 +737,16 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
     // No kernel found - create a new one
     auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
     const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})};
+    const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
     const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
-    const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr,
-                                  host_ptr,   unique_identifier};
+    const ShaderParameters params{system,   disk_cache, precompiled_variants, device,
+                                  cpu_addr, host_ptr,   unique_identifier};
 
-    const auto found = precompiled_shaders.find(unique_identifier);
-    if (found == precompiled_shaders.end()) {
+    const auto found = unspecialized_shaders.find(unique_identifier);
+    if (found == unspecialized_shaders.end()) {
         kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
     } else {
-        kernel = CachedShader::CreateKernelFromCache(params, found->second);
+        kernel = CachedShader::CreateFromCache(params, found->second);
     }
 
     Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index de195cc5d..6bd7c9cf1 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -8,9 +8,10 @@
 #include <atomic>
 #include <bitset>
 #include <memory>
-#include <set>
+#include <string>
 #include <tuple>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include <glad/glad.h>
@@ -20,6 +21,8 @@
 #include "video_core/renderer_opengl/gl_resource_manager.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
 #include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/shader_ir.h"
 
 namespace Core {
 class System;
@@ -40,11 +43,19 @@ using Shader = std::shared_ptr<CachedShader>;
 using CachedProgram = std::shared_ptr<OGLProgram>;
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
-using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>;
+using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
+
+struct UnspecializedShader {
+    GLShader::ShaderEntries entries;
+    ProgramType program_type;
+    ProgramCode code;
+    ProgramCode code_b;
+};
 
 struct ShaderParameters {
+    Core::System& system;
     ShaderDiskCacheOpenGL& disk_cache;
-    const PrecompiledPrograms& precompiled_programs;
+    const PrecompiledVariants* precompiled_variants;
     const Device& device;
     VAddr cpu_addr;
     u8* host_ptr;
@@ -55,23 +66,18 @@ class CachedShader final : public RasterizerCacheObject {
 public:
     static Shader CreateStageFromMemory(const ShaderParameters& params,
                                         Maxwell::ShaderProgram program_type,
-                                        ProgramCode&& program_code, ProgramCode&& program_code_b);
-
-    static Shader CreateStageFromCache(const ShaderParameters& params,
-                                       Maxwell::ShaderProgram program_type,
-                                       GLShader::ProgramResult result);
+                                        ProgramCode program_code, ProgramCode program_code_b);
+    static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
 
-    static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code);
-
-    static Shader CreateKernelFromCache(const ShaderParameters& params,
-                                        GLShader::ProgramResult result);
+    static Shader CreateFromCache(const ShaderParameters& params,
+                                  const UnspecializedShader& unspecialized);
 
     VAddr GetCpuAddr() const override {
         return cpu_addr;
     }
 
     std::size_t GetSizeInBytes() const override {
-        return shader_length;
+        return program_code.size() * sizeof(u64);
     }
 
     /// Gets the shader entries for the shader
@@ -83,24 +89,36 @@ public:
     std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant);
 
 private:
+    struct LockerVariant {
+        std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
+        std::unordered_map<ProgramVariant, CachedProgram> programs;
+    };
+
     explicit CachedShader(const ShaderParameters& params, ProgramType program_type,
-                          GLShader::ProgramResult result);
+                          GLShader::ShaderEntries entries, ProgramCode program_code,
+                          ProgramCode program_code_b);
 
-    CachedProgram TryLoadProgram(const ProgramVariant& variant) const;
+    void UpdateVariant();
 
-    ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant) const;
+    ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
+                                  const VideoCommon::Shader::ConstBufferLocker& locker) const;
+
+    Core::System& system;
+    ShaderDiskCacheOpenGL& disk_cache;
+    const Device& device;
 
     VAddr cpu_addr{};
+
     u64 unique_identifier{};
     ProgramType program_type{};
-    ShaderDiskCacheOpenGL& disk_cache;
-    const PrecompiledPrograms& precompiled_programs;
 
     GLShader::ShaderEntries entries;
-    std::string code;
-    std::size_t shader_length{};
 
-    std::unordered_map<ProgramVariant, CachedProgram> programs;
+    ProgramCode program_code;
+    ProgramCode program_code_b;
+
+    LockerVariant* curr_variant = nullptr;
+    std::vector<std::unique_ptr<LockerVariant>> locker_variants;
 };
 
 class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -123,21 +141,26 @@ protected:
     void FlushObjectInner(const Shader& object) override {}
 
 private:
-    std::unordered_map<u64, UnspecializedShader> GenerateUnspecializedShaders(
-        const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
-        const std::vector<ShaderDiskCacheRaw>& raws,
-        const std::unordered_map<u64, ShaderDiskCacheDecompiled>& decompiled);
+    bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
+                                      const VideoCore::DiskResourceLoadCallback& callback,
+                                      const std::vector<ShaderDiskCacheRaw>& raws);
 
     CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
-                                             const std::set<GLenum>& supported_formats);
+                                             const std::unordered_set<GLenum>& supported_formats);
+
+    const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
 
     Core::System& system;
     Core::Frontend::EmuWindow& emu_window;
     const Device& device;
+
     ShaderDiskCacheOpenGL disk_cache;
 
-    PrecompiledShaders precompiled_shaders;
     PrecompiledPrograms precompiled_programs;
+    std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
+
+    std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
+
     std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 };
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e6b36a0f2..4f2b49170 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -19,6 +19,7 @@
 #include "video_core/renderer_opengl/gl_device.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/shader/ast.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -242,6 +243,26 @@ constexpr const char* GetTypeString(Type type) {
     }
 }
 
+constexpr const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
+    switch (image_type) {
+    case Tegra::Shader::ImageType::Texture1D:
+        return "1D";
+    case Tegra::Shader::ImageType::TextureBuffer:
+        return "Buffer";
+    case Tegra::Shader::ImageType::Texture1DArray:
+        return "1DArray";
+    case Tegra::Shader::ImageType::Texture2D:
+        return "2D";
+    case Tegra::Shader::ImageType::Texture2DArray:
+        return "2DArray";
+    case Tegra::Shader::ImageType::Texture3D:
+        return "3D";
+    default:
+        UNREACHABLE();
+        return "1D";
+    }
+}
+
 /// Generates code to use for a swizzle operation.
 constexpr const char* GetSwizzle(u32 element) {
     constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
@@ -314,39 +335,24 @@ constexpr bool IsVertexShader(ProgramType stage) {
     return stage == ProgramType::VertexA || stage == ProgramType::VertexB;
 }
 
+class ASTDecompiler;
+class ExprDecompiler;
+
 class GLSLDecompiler final {
 public:
     explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage,
                             std::string suffix)
         : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
 
-    void Decompile() {
-        DeclareVertex();
-        DeclareGeometry();
-        DeclareRegisters();
-        DeclarePredicates();
-        DeclareLocalMemory();
-        DeclareSharedMemory();
-        DeclareInternalFlags();
-        DeclareInputAttributes();
-        DeclareOutputAttributes();
-        DeclareConstantBuffers();
-        DeclareGlobalMemory();
-        DeclareSamplers();
-        DeclarePhysicalAttributeReader();
-        DeclareImages();
-
-        code.AddLine("void execute_{}() {{", suffix);
-        ++code.scope;
-
+    void DecompileBranchMode() {
         // VM's program counter
         const auto first_address = ir.GetBasicBlocks().begin()->first;
         code.AddLine("uint jmp_to = {}U;", first_address);
 
         // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
         // unlikely that shaders will use 20 nested SSYs and PBKs.
+        constexpr u32 FLOW_STACK_SIZE = 20;
         if (!ir.IsFlowStackDisabled()) {
-            constexpr u32 FLOW_STACK_SIZE = 20;
             for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
                 code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
                 code.AddLine("uint {} = 0U;", FlowStackTopName(stack));
@@ -372,38 +378,47 @@ public:
         code.AddLine("default: return;");
         code.AddLine("}}");
 
-        for (std::size_t i = 0; i < 2; ++i) {
-            --code.scope;
-            code.AddLine("}}");
+        --code.scope;
+        code.AddLine("}}");
+    }
+
+    void DecompileAST();
+
+    void Decompile() {
+        DeclareVertex();
+        DeclareGeometry();
+        DeclareRegisters();
+        DeclarePredicates();
+        DeclareLocalMemory();
+        DeclareInternalFlags();
+        DeclareInputAttributes();
+        DeclareOutputAttributes();
+        DeclareConstantBuffers();
+        DeclareGlobalMemory();
+        DeclareSamplers();
+        DeclarePhysicalAttributeReader();
+
+        code.AddLine("void execute_{}() {{", suffix);
+        ++code.scope;
+
+        if (ir.IsDecompiled()) {
+            DecompileAST();
+        } else {
+            DecompileBranchMode();
         }
+
+        --code.scope;
+        code.AddLine("}}");
     }
 
     std::string GetResult() {
         return code.GetResult();
     }
 
-    ShaderEntries GetShaderEntries() const {
-        ShaderEntries entries;
-        for (const auto& cbuf : ir.GetConstantBuffers()) {
-            entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
-                                               cbuf.first);
-        }
-        for (const auto& sampler : ir.GetSamplers()) {
-            entries.samplers.emplace_back(sampler);
-        }
-        for (const auto& [offset, image] : ir.GetImages()) {
-            entries.images.emplace_back(image);
-        }
-        for (const auto& [base, usage] : ir.GetGlobalMemory()) {
-            entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset,
-                                                       usage.is_read, usage.is_written);
-        }
-        entries.clip_distances = ir.GetClipDistances();
-        entries.shader_length = ir.GetLength();
-        return entries;
-    }
-
 private:
+    friend class ASTDecompiler;
+    friend class ExprDecompiler;
+
     void DeclareVertex() {
         if (!IsVertexShader(stage))
             return;
@@ -720,27 +735,7 @@ private:
 
     void DeclareImages() {
         const auto& images{ir.GetImages()};
-        for (const auto& [offset, image] : images) {
-            const char* image_type = [&] {
-                switch (image.GetType()) {
-                case Tegra::Shader::ImageType::Texture1D:
-                    return "1D";
-                case Tegra::Shader::ImageType::TextureBuffer:
-                    return "Buffer";
-                case Tegra::Shader::ImageType::Texture1DArray:
-                    return "1DArray";
-                case Tegra::Shader::ImageType::Texture2D:
-                    return "2D";
-                case Tegra::Shader::ImageType::Texture2DArray:
-                    return "2DArray";
-                case Tegra::Shader::ImageType::Texture3D:
-                    return "3D";
-                default:
-                    UNREACHABLE();
-                    return "1D";
-                }
-            }();
-
+        for (const auto& image : images) {
             std::string qualifier = "coherent volatile";
             if (image.IsRead() && !image.IsWritten()) {
                 qualifier += " readonly";
@@ -748,13 +743,10 @@ private:
                 qualifier += " writeonly";
             }
 
-            std::string format;
-            if (image.IsAtomic()) {
-                format = "r32ui, ";
-            }
-
+            const char* format = image.IsAtomic() ? "r32ui, " : "";
+            const char* type_declaration = GetImageTypeDeclaration(image.GetType());
             code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format,
-                         image.GetIndex(), qualifier, image_type, GetImage(image));
+                         image.GetIndex(), qualifier, type_declaration, GetImage(image));
         }
         if (!images.empty()) {
             code.AddNewLine();
@@ -1135,7 +1127,7 @@ private:
         for (const auto& variant : extras) {
             if (const auto argument = std::get_if<TextureArgument>(&variant)) {
                 expr += GenerateTextureArgument(*argument);
-            } else if (std::get_if<TextureAoffi>(&variant)) {
+            } else if (std::holds_alternative<TextureAoffi>(variant)) {
                 expr += GenerateTextureAoffi(meta->aoffi);
             } else {
                 UNREACHABLE();
@@ -1145,8 +1137,8 @@ private:
         return expr + ')';
     }
 
-    std::string GenerateTextureArgument(TextureArgument argument) {
-        const auto [type, operand] = argument;
+    std::string GenerateTextureArgument(const TextureArgument& argument) {
+        const auto& [type, operand] = argument;
         if (operand == nullptr) {
             return {};
         }
@@ -1222,7 +1214,7 @@ private:
 
     std::string BuildImageValues(Operation operation) {
         constexpr std::array constructors{"uint", "uvec2", "uvec3", "uvec4"};
-        const auto meta{std::get<MetaImage>(operation.GetMeta())};
+        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
 
         const std::size_t values_count{meta.values.size()};
         std::string expr = fmt::format("{}(", constructors.at(values_count - 1));
@@ -1387,6 +1379,26 @@ private:
         return GenerateUnary(operation, "float", Type::Float, type);
     }
 
+    Expression FSwizzleAdd(Operation operation) {
+        const std::string op_a = VisitOperand(operation, 0).AsFloat();
+        const std::string op_b = VisitOperand(operation, 1).AsFloat();
+
+        if (!device.HasShaderBallot()) {
+            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+            return {fmt::format("{} + {}", op_a, op_b), Type::Float};
+        }
+
+        const std::string instr_mask = VisitOperand(operation, 2).AsUint();
+        const std::string mask = code.GenerateTemporary();
+        code.AddLine("uint {} = ({} >> ((gl_SubGroupInvocationARB & 3) << 1)) & 3;", mask,
+                     instr_mask);
+
+        const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{}]", mask);
+        const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{}]", mask);
+        return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b),
+                Type::Float};
+    }
+
     Expression ICastFloat(Operation operation) {
         return GenerateUnary(operation, "int", Type::Int, Type::Float);
     }
@@ -1494,6 +1506,8 @@ private:
         case Tegra::Shader::HalfType::H1_H1:
             return {fmt::format("vec2({}[1])", operand.AsHalfFloat()), Type::HalfFloat};
         }
+        UNREACHABLE();
+        return {"0", Type::Int};
     }
 
     Expression HMergeF32(Operation operation) {
@@ -1676,7 +1690,7 @@ private:
 
         const auto type = meta->sampler.IsShadow() ? Type::Float : Type::Int;
         return {GenerateTexture(operation, "Gather",
-                                {TextureArgument{type, meta->component}, TextureAoffi{}}) +
+                                {TextureAoffi{}, TextureArgument{type, meta->component}}) +
                     GetSwizzle(meta->element),
                 Type::Float};
     }
@@ -1765,14 +1779,14 @@ private:
             return {"0", Type::Int};
         }
 
-        const auto meta{std::get<MetaImage>(operation.GetMeta())};
+        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
         return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image),
                             BuildIntegerCoordinates(operation), GetSwizzle(meta.element)),
                 Type::Uint};
     }
 
     Expression ImageStore(Operation operation) {
-        const auto meta{std::get<MetaImage>(operation.GetMeta())};
+        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
         code.AddLine("imageStore({}, {}, {});", GetImage(meta.image),
                      BuildIntegerCoordinates(operation), BuildImageValues(operation));
         return {};
@@ -1780,7 +1794,7 @@ private:
 
     template <const std::string_view& opname>
     Expression AtomicImage(Operation operation) {
-        const auto meta{std::get<MetaImage>(operation.GetMeta())};
+        const auto& meta{std::get<MetaImage>(operation.GetMeta())};
         ASSERT(meta.values.size() == 1);
 
         return {fmt::format("imageAtomic{}({}, {}, {})", opname, GetImage(meta.image),
@@ -1822,10 +1836,9 @@ private:
         return {};
     }
 
-    Expression Exit(Operation operation) {
+    void PreExit() {
         if (stage != ProgramType::Fragment) {
-            code.AddLine("return;");
-            return {};
+            return;
         }
         const auto& used_registers = ir.GetRegisters();
         const auto SafeGetRegister = [&](u32 reg) -> Expression {
@@ -1857,7 +1870,10 @@ private:
             // already contains one past the last color register.
             code.AddLine("gl_FragDepth = {};", SafeGetRegister(current_reg + 1).AsFloat());
         }
+    }
 
+    Expression Exit(Operation operation) {
+        PreExit();
         code.AddLine("return;");
         return {};
     }
@@ -1876,10 +1892,6 @@ private:
     Expression EmitVertex(Operation operation) {
         ASSERT_MSG(stage == ProgramType::Geometry,
                    "EmitVertex is expected to be used in a geometry shader.");
-
-        // If a geometry shader is attached, it will always flip (it's the last stage before
-        // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
-        code.AddLine("gl_Position.xy *= viewport_flip.xy;");
         code.AddLine("EmitVertex();");
         return {};
     }
@@ -1887,14 +1899,12 @@ private:
     Expression EndPrimitive(Operation operation) {
         ASSERT_MSG(stage == ProgramType::Geometry,
                    "EndPrimitive is expected to be used in a geometry shader.");
-
         code.AddLine("EndPrimitive();");
         return {};
     }
 
     Expression YNegate(Operation operation) {
-        // Config pack's third value is Y_NEGATE's state.
-        return {"config_pack[2]", Type::Uint};
+        return {"y_direction", Type::Float};
     }
 
     template <u32 element>
@@ -1946,34 +1956,24 @@ private:
         return Vote(operation, "allThreadsEqualNV");
     }
 
-    template <const std::string_view& func>
-    Expression Shuffle(Operation operation) {
-        const std::string value = VisitOperand(operation, 0).AsFloat();
-        if (!device.HasWarpIntrinsics()) {
-            LOG_ERROR(Render_OpenGL, "Nvidia shuffle intrinsics are required by this shader");
-            // On a "single-thread" device we are either on the same thread or out of bounds. Both
-            // cases return the passed value.
-            return {value, Type::Float};
+    Expression ThreadId(Operation operation) {
+        if (!device.HasShaderBallot()) {
+            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+            return {"0U", Type::Uint};
         }
-
-        const std::string index = VisitOperand(operation, 1).AsUint();
-        const std::string width = VisitOperand(operation, 2).AsUint();
-        return {fmt::format("{}({}, {}, {})", func, value, index, width), Type::Float};
+        return {"gl_SubGroupInvocationARB", Type::Uint};
     }
 
-    template <const std::string_view& func>
-    Expression InRangeShuffle(Operation operation) {
-        const std::string index = VisitOperand(operation, 0).AsUint();
-        const std::string width = VisitOperand(operation, 1).AsUint();
-        if (!device.HasWarpIntrinsics()) {
-            // On a "single-thread" device we are only in bounds when the requested index is 0.
-            return {fmt::format("({} == 0U)", index), Type::Bool};
+    Expression ShuffleIndexed(Operation operation) {
+        std::string value = VisitOperand(operation, 0).AsFloat();
+
+        if (!device.HasShaderBallot()) {
+            LOG_ERROR(Render_OpenGL, "Shader ballot is unavailable but required by the shader");
+            return {std::move(value), Type::Float};
         }
 
-        const std::string in_range = code.GenerateTemporary();
-        code.AddLine("bool {};", in_range);
-        code.AddLine("{}(0U, {}, {}, {});", func, index, width, in_range);
-        return {in_range, Type::Bool};
+        const std::string index = VisitOperand(operation, 1).AsUint();
+        return {fmt::format("readInvocationARB({}, {})", value, index), Type::Float};
     }
 
     struct Func final {
@@ -1985,11 +1985,6 @@ private:
         static constexpr std::string_view Or = "Or";
         static constexpr std::string_view Xor = "Xor";
         static constexpr std::string_view Exchange = "Exchange";
-
-        static constexpr std::string_view ShuffleIndexed = "shuffleNV";
-        static constexpr std::string_view ShuffleUp = "shuffleUpNV";
-        static constexpr std::string_view ShuffleDown = "shuffleDownNV";
-        static constexpr std::string_view ShuffleButterfly = "shuffleXorNV";
     };
 
     static constexpr std::array operation_decompilers = {
@@ -2020,6 +2015,7 @@ private:
         &GLSLDecompiler::FTrunc,
         &GLSLDecompiler::FCastInteger<Type::Int>,
         &GLSLDecompiler::FCastInteger<Type::Uint>,
+        &GLSLDecompiler::FSwizzleAdd,
 
         &GLSLDecompiler::Add<Type::Int>,
         &GLSLDecompiler::Mul<Type::Int>,
@@ -2155,15 +2151,8 @@ private:
         &GLSLDecompiler::VoteAny,
         &GLSLDecompiler::VoteEqual,
 
-        &GLSLDecompiler::Shuffle<Func::ShuffleIndexed>,
-        &GLSLDecompiler::Shuffle<Func::ShuffleUp>,
-        &GLSLDecompiler::Shuffle<Func::ShuffleDown>,
-        &GLSLDecompiler::Shuffle<Func::ShuffleButterfly>,
-
-        &GLSLDecompiler::InRangeShuffle<Func::ShuffleIndexed>,
-        &GLSLDecompiler::InRangeShuffle<Func::ShuffleUp>,
-        &GLSLDecompiler::InRangeShuffle<Func::ShuffleDown>,
-        &GLSLDecompiler::InRangeShuffle<Func::ShuffleButterfly>,
+        &GLSLDecompiler::ThreadId,
+        &GLSLDecompiler::ShuffleIndexed,
     };
     static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
 
@@ -2229,7 +2218,7 @@ private:
         code.AddLine("#ifdef SAMPLER_{}_IS_BUFFER", sampler.GetIndex());
     }
 
-    std::string GetDeclarationWithSuffix(u32 index, const std::string& name) const {
+    std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
         return fmt::format("{}_{}_{}", name, index, suffix);
     }
 
@@ -2254,27 +2243,259 @@ private:
     ShaderWriter code;
 };
 
+std::string GetFlowVariable(u32 i) {
+    return fmt::format("flow_var_{}", i);
+}
+
+class ExprDecompiler {
+public:
+    explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
+
+    void operator()(const ExprAnd& expr) {
+        inner += "( ";
+        std::visit(*this, *expr.operand1);
+        inner += " && ";
+        std::visit(*this, *expr.operand2);
+        inner += ')';
+    }
+
+    void operator()(const ExprOr& expr) {
+        inner += "( ";
+        std::visit(*this, *expr.operand1);
+        inner += " || ";
+        std::visit(*this, *expr.operand2);
+        inner += ')';
+    }
+
+    void operator()(const ExprNot& expr) {
+        inner += '!';
+        std::visit(*this, *expr.operand1);
+    }
+
+    void operator()(const ExprPredicate& expr) {
+        const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
+        inner += decomp.GetPredicate(pred);
+    }
+
+    void operator()(const ExprCondCode& expr) {
+        const Node cc = decomp.ir.GetConditionCode(expr.cc);
+        std::string target;
+
+        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
+            const auto index = pred->GetIndex();
+            switch (index) {
+            case Tegra::Shader::Pred::NeverExecute:
+                target = "false";
+                break;
+            case Tegra::Shader::Pred::UnusedIndex:
+                target = "true";
+                break;
+            default:
+                target = decomp.GetPredicate(index);
+                break;
+            }
+        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
+            target = decomp.GetInternalFlag(flag->GetFlag());
+        } else {
+            UNREACHABLE();
+        }
+        inner += target;
+    }
+
+    void operator()(const ExprVar& expr) {
+        inner += GetFlowVariable(expr.var_index);
+    }
+
+    void operator()(const ExprBoolean& expr) {
+        inner += expr.value ? "true" : "false";
+    }
+
+    void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
+        inner +=
+            "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
+    }
+
+    const std::string& GetResult() const {
+        return inner;
+    }
+
+private:
+    std::string inner;
+    GLSLDecompiler& decomp;
+};
+
+class ASTDecompiler {
+public:
+    explicit ASTDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
+
+    void operator()(const ASTProgram& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()(const ASTIfThen& ast) {
+        ExprDecompiler expr_parser{decomp};
+        std::visit(expr_parser, *ast.condition);
+        decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
+        decomp.code.scope++;
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        decomp.code.scope--;
+        decomp.code.AddLine("}}");
+    }
+
+    void operator()(const ASTIfElse& ast) {
+        decomp.code.AddLine("else {{");
+        decomp.code.scope++;
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        decomp.code.scope--;
+        decomp.code.AddLine("}}");
+    }
+
+    void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
+        UNREACHABLE();
+    }
+
+    void operator()(const ASTBlockDecoded& ast) {
+        decomp.VisitBlock(ast.nodes);
+    }
+
+    void operator()(const ASTVarSet& ast) {
+        ExprDecompiler expr_parser{decomp};
+        std::visit(expr_parser, *ast.condition);
+        decomp.code.AddLine("{} = {};", GetFlowVariable(ast.index), expr_parser.GetResult());
+    }
+
+    void operator()(const ASTLabel& ast) {
+        decomp.code.AddLine("// Label_{}:", ast.index);
+    }
+
+    void operator()([[maybe_unused]] const ASTGoto& ast) {
+        UNREACHABLE();
+    }
+
+    void operator()(const ASTDoWhile& ast) {
+        ExprDecompiler expr_parser{decomp};
+        std::visit(expr_parser, *ast.condition);
+        decomp.code.AddLine("do {{");
+        decomp.code.scope++;
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        decomp.code.scope--;
+        decomp.code.AddLine("}} while({});", expr_parser.GetResult());
+    }
+
+    void operator()(const ASTReturn& ast) {
+        const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
+        if (!is_true) {
+            ExprDecompiler expr_parser{decomp};
+            std::visit(expr_parser, *ast.condition);
+            decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
+            decomp.code.scope++;
+        }
+        if (ast.kills) {
+            decomp.code.AddLine("discard;");
+        } else {
+            decomp.PreExit();
+            decomp.code.AddLine("return;");
+        }
+        if (!is_true) {
+            decomp.code.scope--;
+            decomp.code.AddLine("}}");
+        }
+    }
+
+    void operator()(const ASTBreak& ast) {
+        const bool is_true = VideoCommon::Shader::ExprIsTrue(ast.condition);
+        if (!is_true) {
+            ExprDecompiler expr_parser{decomp};
+            std::visit(expr_parser, *ast.condition);
+            decomp.code.AddLine("if ({}) {{", expr_parser.GetResult());
+            decomp.code.scope++;
+        }
+        decomp.code.AddLine("break;");
+        if (!is_true) {
+            decomp.code.scope--;
+            decomp.code.AddLine("}}");
+        }
+    }
+
+    void Visit(const ASTNode& node) {
+        std::visit(*this, *node->GetInnerData());
+    }
+
+private:
+    GLSLDecompiler& decomp;
+};
+
+void GLSLDecompiler::DecompileAST() {
+    const u32 num_flow_variables = ir.GetASTNumVariables();
+    for (u32 i = 0; i < num_flow_variables; i++) {
+        code.AddLine("bool {} = false;", GetFlowVariable(i));
+    }
+
+    ASTDecompiler decompiler{*this};
+    decompiler.Visit(ir.GetASTProgram());
+}
+
 } // Anonymous namespace
 
+ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
+    ShaderEntries entries;
+    for (const auto& cbuf : ir.GetConstantBuffers()) {
+        entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
+                                           cbuf.first);
+    }
+    for (const auto& [base, usage] : ir.GetGlobalMemory()) {
+        entries.global_memory_entries.emplace_back(base.cbuf_index, base.cbuf_offset, usage.is_read,
+                                                   usage.is_written);
+    }
+    for (const auto& sampler : ir.GetSamplers()) {
+        entries.samplers.emplace_back(sampler);
+    }
+    for (const auto& image : ir.GetImages()) {
+        entries.images.emplace_back(image);
+    }
+    entries.clip_distances = ir.GetClipDistances();
+    entries.shader_length = ir.GetLength();
+    return entries;
+}
+
 std::string GetCommonDeclarations() {
-    return fmt::format(
-        "#define ftoi floatBitsToInt\n"
-        "#define ftou floatBitsToUint\n"
-        "#define itof intBitsToFloat\n"
-        "#define utof uintBitsToFloat\n\n"
-        "bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{\n"
-        "    bvec2 is_nan1 = isnan(pair1);\n"
-        "    bvec2 is_nan2 = isnan(pair2);\n"
-        "    return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || "
-        "is_nan2.y);\n"
-        "}}\n\n");
+    return R"(#define ftoi floatBitsToInt
+#define ftou floatBitsToUint
+#define itof intBitsToFloat
+#define utof uintBitsToFloat
+
+bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
+    bvec2 is_nan1 = isnan(pair1);
+    bvec2 is_nan2 = isnan(pair2);
+    return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
+}
+
+const float fswzadd_modifiers_a[] = float[4](-1.0f,  1.0f, -1.0f,  0.0f );
+const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f,  1.0f, -1.0f );
+)";
 }
 
-ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
-                        const std::string& suffix) {
+std::string Decompile(const Device& device, const ShaderIR& ir, ProgramType stage,
+                      const std::string& suffix) {
     GLSLDecompiler decompiler(device, ir, stage, suffix);
     decompiler.Decompile();
-    return {decompiler.GetResult(), decompiler.GetShaderEntries()};
+    return decompiler.GetResult();
 }
 
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index e538dc001..b1e75e6cc 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -34,10 +34,7 @@ enum class ProgramType : u32 {
 
 namespace OpenGL::GLShader {
 
-struct ShaderEntries;
-
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using ProgramResult = std::pair<std::string, ShaderEntries>;
 using SamplerEntry = VideoCommon::Shader::Sampler;
 using ImageEntry = VideoCommon::Shader::Image;
 
@@ -85,17 +82,18 @@ private:
 
 struct ShaderEntries {
     std::vector<ConstBufferEntry> const_buffers;
+    std::vector<GlobalMemoryEntry> global_memory_entries;
     std::vector<SamplerEntry> samplers;
-    std::vector<SamplerEntry> bindless_samplers;
     std::vector<ImageEntry> images;
-    std::vector<GlobalMemoryEntry> global_memory_entries;
     std::array<bool, Maxwell::NumClipDistances> clip_distances{};
     std::size_t shader_length{};
 };
 
+ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
+
 std::string GetCommonDeclarations();
 
-ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
-                        ProgramType stage, const std::string& suffix);
+std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+                      ProgramType stage, const std::string& suffix);
 
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 6a7012b54..184a565e6 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -22,6 +22,29 @@
 
 namespace OpenGL {
 
+using VideoCommon::Shader::BindlessSamplerMap;
+using VideoCommon::Shader::BoundSamplerMap;
+using VideoCommon::Shader::KeyMap;
+
+namespace {
+
+struct ConstBufferKey {
+    u32 cbuf;
+    u32 offset;
+    u32 value;
+};
+
+struct BoundSamplerKey {
+    u32 offset;
+    Tegra::Engines::SamplerDescriptor sampler;
+};
+
+struct BindlessSamplerKey {
+    u32 cbuf;
+    u32 offset;
+    Tegra::Engines::SamplerDescriptor sampler;
+};
+
 using ShaderCacheVersionHash = std::array<u8, 64>;
 
 enum class TransferableEntryKind : u32 {
@@ -29,18 +52,10 @@ enum class TransferableEntryKind : u32 {
     Usage,
 };
 
-enum class PrecompiledEntryKind : u32 {
-    Decompiled,
-    Dump,
-};
-
-constexpr u32 NativeVersion = 4;
+constexpr u32 NativeVersion = 5;
 
 // Making sure sizes doesn't change by accident
 static_assert(sizeof(BaseBindings) == 16);
-static_assert(sizeof(ShaderDiskCacheUsage) == 40);
-
-namespace {
 
 ShaderCacheVersionHash GetShaderCacheVersionHash() {
     ShaderCacheVersionHash hash{};
@@ -49,13 +64,11 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
     return hash;
 }
 
-} // namespace
+} // Anonymous namespace
 
 ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
-                                       u32 program_code_size, u32 program_code_size_b,
                                        ProgramCode program_code, ProgramCode program_code_b)
     : unique_identifier{unique_identifier}, program_type{program_type},
-      program_code_size{program_code_size}, program_code_size_b{program_code_size_b},
       program_code{std::move(program_code)}, program_code_b{std::move(program_code_b)} {}
 
 ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
@@ -90,15 +103,16 @@ bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
 bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
     if (file.WriteObject(unique_identifier) != 1 ||
         file.WriteObject(static_cast<u32>(program_type)) != 1 ||
-        file.WriteObject(program_code_size) != 1 || file.WriteObject(program_code_size_b) != 1) {
+        file.WriteObject(static_cast<u32>(program_code.size())) != 1 ||
+        file.WriteObject(static_cast<u32>(program_code_b.size())) != 1) {
         return false;
     }
 
-    if (file.WriteArray(program_code.data(), program_code_size) != program_code_size)
+    if (file.WriteArray(program_code.data(), program_code.size()) != program_code.size())
         return false;
 
     if (HasProgramA() &&
-        file.WriteArray(program_code_b.data(), program_code_size_b) != program_code_size_b) {
+        file.WriteArray(program_code_b.data(), program_code_b.size()) != program_code_b.size()) {
         return false;
     }
     return true;
@@ -112,44 +126,47 @@ std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskC
 ShaderDiskCacheOpenGL::LoadTransferable() {
     // Skip games without title id
     const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
-    if (!Settings::values.use_disk_shader_cache || !has_title_id)
+    if (!Settings::values.use_disk_shader_cache || !has_title_id) {
         return {};
-    tried_to_load = true;
+    }
 
     FileUtil::IOFile file(GetTransferablePath(), "rb");
     if (!file.IsOpen()) {
         LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
                  GetTitleID());
+        is_usable = true;
         return {};
     }
 
     u32 version{};
     if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
         LOG_ERROR(Render_OpenGL,
-                  "Failed to get transferable cache version for title id={} - skipping",
+                  "Failed to get transferable cache version for title id={}, skipping",
                   GetTitleID());
         return {};
     }
 
     if (version < NativeVersion) {
-        LOG_INFO(Render_OpenGL, "Transferable shader cache is old - removing");
+        LOG_INFO(Render_OpenGL, "Transferable shader cache is old, removing");
         file.Close();
         InvalidateTransferable();
+        is_usable = true;
         return {};
     }
     if (version > NativeVersion) {
         LOG_WARNING(Render_OpenGL, "Transferable shader cache was generated with a newer version "
-                                   "of the emulator - skipping");
+                                   "of the emulator, skipping");
         return {};
     }
 
     // Version is valid, load the shaders
+    constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
     std::vector<ShaderDiskCacheRaw> raws;
     std::vector<ShaderDiskCacheUsage> usages;
     while (file.Tell() < file.GetSize()) {
         TransferableEntryKind kind{};
         if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
-            LOG_ERROR(Render_OpenGL, "Failed to read transferable file - skipping");
+            LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
             return {};
         }
 
@@ -157,7 +174,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
         case TransferableEntryKind::Raw: {
             ShaderDiskCacheRaw entry;
             if (!entry.Load(file)) {
-                LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry - skipping");
+                LOG_ERROR(Render_OpenGL, error_loading);
                 return {};
             }
             transferable.insert({entry.GetUniqueIdentifier(), {}});
@@ -165,30 +182,62 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
             break;
         }
         case TransferableEntryKind::Usage: {
-            ShaderDiskCacheUsage usage{};
-            if (file.ReadBytes(&usage, sizeof(usage)) != sizeof(usage)) {
-                LOG_ERROR(Render_OpenGL, "Failed to load transferable usage entry - skipping");
+            ShaderDiskCacheUsage usage;
+
+            u32 num_keys{};
+            u32 num_bound_samplers{};
+            u32 num_bindless_samplers{};
+            if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
+                file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
+                file.ReadArray(&num_bound_samplers, 1) != 1 ||
+                file.ReadArray(&num_bindless_samplers, 1) != 1) {
+                LOG_ERROR(Render_OpenGL, error_loading);
                 return {};
             }
+
+            std::vector<ConstBufferKey> keys(num_keys);
+            std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
+            std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
+            if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
+                file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
+                    bound_samplers.size() ||
+                file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
+                    bindless_samplers.size()) {
+                LOG_ERROR(Render_OpenGL, error_loading);
+                return {};
+            }
+            for (const auto& key : keys) {
+                usage.keys.insert({{key.cbuf, key.offset}, key.value});
+            }
+            for (const auto& key : bound_samplers) {
+                usage.bound_samplers.emplace(key.offset, key.sampler);
+            }
+            for (const auto& key : bindless_samplers) {
+                usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
+            }
+
             usages.push_back(std::move(usage));
             break;
         }
         default:
-            LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={} - skipping",
+            LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
                       static_cast<u32>(kind));
             return {};
         }
     }
 
-    return {{raws, usages}};
+    is_usable = true;
+    return {{std::move(raws), std::move(usages)}};
 }
 
-std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
+std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
 ShaderDiskCacheOpenGL::LoadPrecompiled() {
-    if (!IsUsable())
+    if (!is_usable) {
         return {};
+    }
 
-    FileUtil::IOFile file(GetPrecompiledPath(), "rb");
+    std::string path = GetPrecompiledPath();
+    FileUtil::IOFile file(path, "rb");
     if (!file.IsOpen()) {
         LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
                  GetTitleID());
@@ -198,7 +247,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
     const auto result = LoadPrecompiledFile(file);
     if (!result) {
         LOG_INFO(Render_OpenGL,
-                 "Failed to load precompiled cache for game with title id={} - removing",
+                 "Failed to load precompiled cache for game with title id={}, removing",
                  GetTitleID());
         file.Close();
         InvalidatePrecompiled();
@@ -207,7 +256,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
     return *result;
 }
 
-std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
+std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
 ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
     // Read compressed file from disk and decompress to virtual precompiled cache file
     std::vector<u8> compressed(file.GetSize());
@@ -227,238 +276,56 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
         return {};
     }
 
-    std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
     ShaderDumpsMap dumps;
     while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
-        PrecompiledEntryKind kind{};
-        if (!LoadObjectFromPrecompiled(kind)) {
+        u32 num_keys{};
+        u32 num_bound_samplers{};
+        u32 num_bindless_samplers{};
+        ShaderDiskCacheUsage usage;
+        if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
+            !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
+            !LoadObjectFromPrecompiled(num_bound_samplers) ||
+            !LoadObjectFromPrecompiled(num_bindless_samplers)) {
             return {};
         }
-
-        switch (kind) {
-        case PrecompiledEntryKind::Decompiled: {
-            u64 unique_identifier{};
-            if (!LoadObjectFromPrecompiled(unique_identifier)) {
-                return {};
-            }
-
-            auto entry = LoadDecompiledEntry();
-            if (!entry) {
-                return {};
-            }
-            decompiled.insert({unique_identifier, std::move(*entry)});
-            break;
-        }
-        case PrecompiledEntryKind::Dump: {
-            ShaderDiskCacheUsage usage;
-            if (!LoadObjectFromPrecompiled(usage)) {
-                return {};
-            }
-
-            ShaderDiskCacheDump dump;
-            if (!LoadObjectFromPrecompiled(dump.binary_format)) {
-                return {};
-            }
-
-            u32 binary_length{};
-            if (!LoadObjectFromPrecompiled(binary_length)) {
-                return {};
-            }
-
-            dump.binary.resize(binary_length);
-            if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
-                return {};
-            }
-
-            dumps.insert({usage, dump});
-            break;
-        }
-        default:
+        std::vector<ConstBufferKey> keys(num_keys);
+        std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
+        std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
+        if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
+            !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
+                bound_samplers.size() ||
+            !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
+                bindless_samplers.size()) {
             return {};
         }
-    }
-    return {{decompiled, dumps}};
-}
-
-std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEntry() {
-    u32 code_size{};
-    if (!LoadObjectFromPrecompiled(code_size)) {
-        return {};
-    }
-
-    std::string code(code_size, '\0');
-    if (!LoadArrayFromPrecompiled(code.data(), code.size())) {
-        return {};
-    }
-
-    ShaderDiskCacheDecompiled entry;
-    entry.code = std::move(code);
-
-    u32 const_buffers_count{};
-    if (!LoadObjectFromPrecompiled(const_buffers_count)) {
-        return {};
-    }
-
-    for (u32 i = 0; i < const_buffers_count; ++i) {
-        u32 max_offset{};
-        u32 index{};
-        bool is_indirect{};
-        if (!LoadObjectFromPrecompiled(max_offset) || !LoadObjectFromPrecompiled(index) ||
-            !LoadObjectFromPrecompiled(is_indirect)) {
-            return {};
+        for (const auto& key : keys) {
+            usage.keys.insert({{key.cbuf, key.offset}, key.value});
         }
-        entry.entries.const_buffers.emplace_back(max_offset, is_indirect, index);
-    }
-
-    u32 samplers_count{};
-    if (!LoadObjectFromPrecompiled(samplers_count)) {
-        return {};
-    }
-
-    for (u32 i = 0; i < samplers_count; ++i) {
-        u64 offset{};
-        u64 index{};
-        u32 type{};
-        bool is_array{};
-        bool is_shadow{};
-        bool is_bindless{};
-        if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
-            !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_array) ||
-            !LoadObjectFromPrecompiled(is_shadow) || !LoadObjectFromPrecompiled(is_bindless)) {
-            return {};
+        for (const auto& key : bound_samplers) {
+            usage.bound_samplers.emplace(key.offset, key.sampler);
         }
-        entry.entries.samplers.emplace_back(
-            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
-            static_cast<Tegra::Shader::TextureType>(type), is_array, is_shadow, is_bindless);
-    }
-
-    u32 images_count{};
-    if (!LoadObjectFromPrecompiled(images_count)) {
-        return {};
-    }
-    for (u32 i = 0; i < images_count; ++i) {
-        u64 offset{};
-        u64 index{};
-        u32 type{};
-        u8 is_bindless{};
-        u8 is_written{};
-        u8 is_read{};
-        u8 is_atomic{};
-        if (!LoadObjectFromPrecompiled(offset) || !LoadObjectFromPrecompiled(index) ||
-            !LoadObjectFromPrecompiled(type) || !LoadObjectFromPrecompiled(is_bindless) ||
-            !LoadObjectFromPrecompiled(is_written) || !LoadObjectFromPrecompiled(is_read) ||
-            !LoadObjectFromPrecompiled(is_atomic)) {
-            return {};
+        for (const auto& key : bindless_samplers) {
+            usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
         }
-        entry.entries.images.emplace_back(
-            static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
-            static_cast<Tegra::Shader::ImageType>(type), is_bindless != 0, is_written != 0,
-            is_read != 0, is_atomic != 0);
-    }
 
-    u32 global_memory_count{};
-    if (!LoadObjectFromPrecompiled(global_memory_count)) {
-        return {};
-    }
-    for (u32 i = 0; i < global_memory_count; ++i) {
-        u32 cbuf_index{};
-        u32 cbuf_offset{};
-        bool is_read{};
-        bool is_written{};
-        if (!LoadObjectFromPrecompiled(cbuf_index) || !LoadObjectFromPrecompiled(cbuf_offset) ||
-            !LoadObjectFromPrecompiled(is_read) || !LoadObjectFromPrecompiled(is_written)) {
+        ShaderDiskCacheDump dump;
+        if (!LoadObjectFromPrecompiled(dump.binary_format)) {
             return {};
         }
-        entry.entries.global_memory_entries.emplace_back(cbuf_index, cbuf_offset, is_read,
-                                                         is_written);
-    }
 
-    for (auto& clip_distance : entry.entries.clip_distances) {
-        if (!LoadObjectFromPrecompiled(clip_distance)) {
+        u32 binary_length{};
+        if (!LoadObjectFromPrecompiled(binary_length)) {
             return {};
         }
-    }
 
-    u64 shader_length{};
-    if (!LoadObjectFromPrecompiled(shader_length)) {
-        return {};
-    }
-    entry.entries.shader_length = static_cast<std::size_t>(shader_length);
-
-    return entry;
-}
-
-bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std::string& code,
-                                               const GLShader::ShaderEntries& entries) {
-    if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Decompiled)) ||
-        !SaveObjectToPrecompiled(unique_identifier) ||
-        !SaveObjectToPrecompiled(static_cast<u32>(code.size())) ||
-        !SaveArrayToPrecompiled(code.data(), code.size())) {
-        return false;
-    }
-
-    if (!SaveObjectToPrecompiled(static_cast<u32>(entries.const_buffers.size()))) {
-        return false;
-    }
-    for (const auto& cbuf : entries.const_buffers) {
-        if (!SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetMaxOffset())) ||
-            !SaveObjectToPrecompiled(static_cast<u32>(cbuf.GetIndex())) ||
-            !SaveObjectToPrecompiled(cbuf.IsIndirect())) {
-            return false;
-        }
-    }
-
-    if (!SaveObjectToPrecompiled(static_cast<u32>(entries.samplers.size()))) {
-        return false;
-    }
-    for (const auto& sampler : entries.samplers) {
-        if (!SaveObjectToPrecompiled(static_cast<u64>(sampler.GetOffset())) ||
-            !SaveObjectToPrecompiled(static_cast<u64>(sampler.GetIndex())) ||
-            !SaveObjectToPrecompiled(static_cast<u32>(sampler.GetType())) ||
-            !SaveObjectToPrecompiled(sampler.IsArray()) ||
-            !SaveObjectToPrecompiled(sampler.IsShadow()) ||
-            !SaveObjectToPrecompiled(sampler.IsBindless())) {
-            return false;
-        }
-    }
-
-    if (!SaveObjectToPrecompiled(static_cast<u32>(entries.images.size()))) {
-        return false;
-    }
-    for (const auto& image : entries.images) {
-        if (!SaveObjectToPrecompiled(static_cast<u64>(image.GetOffset())) ||
-            !SaveObjectToPrecompiled(static_cast<u64>(image.GetIndex())) ||
-            !SaveObjectToPrecompiled(static_cast<u32>(image.GetType())) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(image.IsBindless() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(image.IsWritten() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(image.IsRead() ? 1 : 0)) ||
-            !SaveObjectToPrecompiled(static_cast<u8>(image.IsAtomic() ? 1 : 0))) {
-            return false;
-        }
-    }
-
-    if (!SaveObjectToPrecompiled(static_cast<u32>(entries.global_memory_entries.size()))) {
-        return false;
-    }
-    for (const auto& gmem : entries.global_memory_entries) {
-        if (!SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufIndex())) ||
-            !SaveObjectToPrecompiled(static_cast<u32>(gmem.GetCbufOffset())) ||
-            !SaveObjectToPrecompiled(gmem.IsRead()) || !SaveObjectToPrecompiled(gmem.IsWritten())) {
-            return false;
-        }
-    }
-
-    for (const bool clip_distance : entries.clip_distances) {
-        if (!SaveObjectToPrecompiled(clip_distance)) {
-            return false;
+        dump.binary.resize(binary_length);
+        if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
+            return {};
         }
-    }
 
-    if (!SaveObjectToPrecompiled(static_cast<u64>(entries.shader_length))) {
-        return false;
+        dumps.emplace(std::move(usage), dump);
     }
-
-    return true;
+    return dumps;
 }
 
 void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -479,8 +346,9 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
 }
 
 void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
-    if (!IsUsable())
+    if (!is_usable) {
         return;
+    }
 
     const u64 id = entry.GetUniqueIdentifier();
     if (transferable.find(id) != transferable.end()) {
@@ -489,10 +357,11 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
     }
 
     FileUtil::IOFile file = AppendTransferableFile();
-    if (!file.IsOpen())
+    if (!file.IsOpen()) {
         return;
+    }
     if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
-        LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry - removing");
+        LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
         file.Close();
         InvalidateTransferable();
         return;
@@ -501,8 +370,9 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
 }
 
 void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
-    if (!IsUsable())
+    if (!is_usable) {
         return;
+    }
 
     const auto it = transferable.find(usage.unique_identifier);
     ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
@@ -517,35 +387,54 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
     FileUtil::IOFile file = AppendTransferableFile();
     if (!file.IsOpen())
         return;
-
-    if (file.WriteObject(TransferableEntryKind::Usage) != 1 || file.WriteObject(usage) != 1) {
-        LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry - removing");
+    const auto Close = [&] {
+        LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
         file.Close();
         InvalidateTransferable();
+    };
+
+    if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
+        file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
+        file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
+        file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
+        file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
+        Close();
         return;
     }
+    for (const auto& [pair, value] : usage.keys) {
+        const auto [cbuf, offset] = pair;
+        if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
+            Close();
+            return;
+        }
+    }
+    for (const auto& [offset, sampler] : usage.bound_samplers) {
+        if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
+            Close();
+            return;
+        }
+    }
+    for (const auto& [pair, sampler] : usage.bindless_samplers) {
+        const auto [cbuf, offset] = pair;
+        if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
+            Close();
+            return;
+        }
+    }
 }
 
-void ShaderDiskCacheOpenGL::SaveDecompiled(u64 unique_identifier, const std::string& code,
-                                           const GLShader::ShaderEntries& entries) {
-    if (!IsUsable())
+void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
+    if (!is_usable) {
         return;
+    }
 
+    // TODO(Rodrigo): This is a design smell. I shouldn't be having to manually write the header
+    // when writing the dump. This should be done the moment I get access to write to the virtual
+    // file.
     if (precompiled_cache_virtual_file.GetSize() == 0) {
         SavePrecompiledHeaderToVirtualPrecompiledCache();
     }
 
-    if (!SaveDecompiledFile(unique_identifier, code, entries)) {
-        LOG_ERROR(Render_OpenGL,
-                  "Failed to save decompiled entry to the precompiled file - removing");
-        InvalidatePrecompiled();
-    }
-}
-
-void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
-    if (!IsUsable())
-        return;
-
     GLint binary_length{};
     glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
 
@@ -553,25 +442,51 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
     std::vector<u8> binary(binary_length);
     glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
 
-    if (!SaveObjectToPrecompiled(static_cast<u32>(PrecompiledEntryKind::Dump)) ||
-        !SaveObjectToPrecompiled(usage) ||
-        !SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
-        !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
-        !SaveArrayToPrecompiled(binary.data(), binary.size())) {
-        LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016x} - removing",
+    const auto Close = [&] {
+        LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
                   usage.unique_identifier);
         InvalidatePrecompiled();
+    };
+
+    if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
+        !SaveObjectToPrecompiled(usage.variant) ||
+        !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
+        !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
+        !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
+        Close();
         return;
     }
-}
-
-bool ShaderDiskCacheOpenGL::IsUsable() const {
-    return tried_to_load && Settings::values.use_disk_shader_cache;
+    for (const auto& [pair, value] : usage.keys) {
+        const auto [cbuf, offset] = pair;
+        if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
+            Close();
+            return;
+        }
+    }
+    for (const auto& [offset, sampler] : usage.bound_samplers) {
+        if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
+            Close();
+            return;
+        }
+    }
+    for (const auto& [pair, sampler] : usage.bindless_samplers) {
+        const auto [cbuf, offset] = pair;
+        if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
+            Close();
+            return;
+        }
+    }
+    if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
+        !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
+        !SaveArrayToPrecompiled(binary.data(), binary.size())) {
+        Close();
+    }
 }
 
 FileUtil::IOFile ShaderDiskCacheOpenGL::AppendTransferableFile() const {
-    if (!EnsureDirectories())
+    if (!EnsureDirectories()) {
         return {};
+    }
 
     const auto transferable_path{GetTransferablePath()};
     const bool existed = FileUtil::Exists(transferable_path);
@@ -603,8 +518,8 @@ void ShaderDiskCacheOpenGL::SavePrecompiledHeaderToVirtualPrecompiledCache() {
 
 void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
     precompiled_cache_virtual_file_offset = 0;
-    const std::vector<u8>& uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
-    const std::vector<u8>& compressed =
+    const std::vector<u8> uncompressed = precompiled_cache_virtual_file.ReadAllBytes();
+    const std::vector<u8> compressed =
         Common::Compression::CompressDataZSTDDefault(uncompressed.data(), uncompressed.size());
 
     const auto precompiled_path{GetPrecompiledPath()};
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index cc8bbd61e..db23ada93 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -8,6 +8,7 @@
 #include <optional>
 #include <string>
 #include <tuple>
+#include <type_traits>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
@@ -19,6 +20,7 @@
 #include "common/common_types.h"
 #include "core/file_sys/vfs_vector.h"
 #include "video_core/renderer_opengl/gl_shader_gen.h"
+#include "video_core/shader/const_buffer_locker.h"
 
 namespace Core {
 class System;
@@ -53,6 +55,7 @@ struct BaseBindings {
         return !operator==(rhs);
     }
 };
+static_assert(std::is_trivially_copyable_v<BaseBindings>);
 
 /// Describes the different variants a single program can be compiled.
 struct ProgramVariant {
@@ -70,13 +73,20 @@ struct ProgramVariant {
     }
 };
 
+static_assert(std::is_trivially_copyable_v<ProgramVariant>);
+
 /// Describes how a shader is used.
 struct ShaderDiskCacheUsage {
     u64 unique_identifier{};
     ProgramVariant variant;
+    VideoCommon::Shader::KeyMap keys;
+    VideoCommon::Shader::BoundSamplerMap bound_samplers;
+    VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
 
     bool operator==(const ShaderDiskCacheUsage& rhs) const {
-        return std::tie(unique_identifier, variant) == std::tie(rhs.unique_identifier, rhs.variant);
+        return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
+               std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
+                        rhs.bindless_samplers);
     }
 
     bool operator!=(const ShaderDiskCacheUsage& rhs) const {
@@ -123,8 +133,7 @@ namespace OpenGL {
 class ShaderDiskCacheRaw {
 public:
     explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type,
-                                u32 program_code_size, u32 program_code_size_b,
-                                ProgramCode program_code, ProgramCode program_code_b);
+                                ProgramCode program_code, ProgramCode program_code_b = {});
     ShaderDiskCacheRaw();
     ~ShaderDiskCacheRaw();
 
@@ -155,22 +164,14 @@ public:
 private:
     u64 unique_identifier{};
     ProgramType program_type{};
-    u32 program_code_size{};
-    u32 program_code_size_b{};
 
     ProgramCode program_code;
     ProgramCode program_code_b;
 };
 
-/// Contains decompiled data from a shader
-struct ShaderDiskCacheDecompiled {
-    std::string code;
-    GLShader::ShaderEntries entries;
-};
-
 /// Contains an OpenGL dumped binary program
 struct ShaderDiskCacheDump {
-    GLenum binary_format;
+    GLenum binary_format{};
     std::vector<u8> binary;
 };
 
@@ -184,9 +185,7 @@ public:
     LoadTransferable();
 
     /// Loads current game's precompiled cache. Invalidates on failure.
-    std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
-              std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
-    LoadPrecompiled();
+    std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
 
     /// Removes the transferable (and precompiled) cache file.
     void InvalidateTransferable();
@@ -200,10 +199,6 @@ public:
     /// Saves shader usage to the transferable file. Does not check for collisions.
     void SaveUsage(const ShaderDiskCacheUsage& usage);
 
-    /// Saves a decompiled entry to the precompiled file. Does not check for collisions.
-    void SaveDecompiled(u64 unique_identifier, const std::string& code,
-                        const GLShader::ShaderEntries& entries);
-
     /// Saves a dump entry to the precompiled file. Does not check for collisions.
     void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
 
@@ -212,21 +207,9 @@ public:
 
 private:
     /// Loads the transferable cache. Returns empty on failure.
-    std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
-                            std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
+    std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
     LoadPrecompiledFile(FileUtil::IOFile& file);
 
-    /// Loads a decompiled cache entry from m_precompiled_cache_virtual_file. Returns empty on
-    /// failure.
-    std::optional<ShaderDiskCacheDecompiled> LoadDecompiledEntry();
-
-    /// Saves a decompiled entry to the passed file. Returns true on success.
-    bool SaveDecompiledFile(u64 unique_identifier, const std::string& code,
-                            const GLShader::ShaderEntries& entries);
-
-    /// Returns if the cache can be used
-    bool IsUsable() const;
-
     /// Opens current game's transferable file and write it's header if it doesn't exist
     FileUtil::IOFile AppendTransferableFile() const;
 
@@ -297,7 +280,7 @@ private:
     std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
 
     // The cache has been loaded at boot
-    bool tried_to_load{};
+    bool is_usable{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 3a8d9e1da..af17216bd 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -11,93 +11,56 @@
 namespace OpenGL::GLShader {
 
 using Tegra::Engines::Maxwell3D;
+using VideoCommon::Shader::CompileDepth;
+using VideoCommon::Shader::CompilerSettings;
 using VideoCommon::Shader::ProgramCode;
 using VideoCommon::Shader::ShaderIR;
 
-static constexpr u32 PROGRAM_OFFSET = 10;
-static constexpr u32 COMPUTE_OFFSET = 0;
-
-ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) {
-    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
-    std::string out = "// Shader Unique Id: VS" + id + "\n\n";
-    out += GetCommonDeclarations();
-
+std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
+    std::string out = GetCommonDeclarations();
     out += R"(
 layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
-    vec4 viewport_flip;
-    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+    float y_direction;
 };
 
 )";
-
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
-    const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB;
-    ProgramResult program = Decompile(device, program_ir, stage, "vertex");
-    out += program.first;
-
-    if (setup.IsDualProgram()) {
-        const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b);
-        ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b");
-        out += program_b.first;
+    const auto stage = ir_b ? ProgramType::VertexA : ProgramType::VertexB;
+    out += Decompile(device, ir, stage, "vertex");
+    if (ir_b) {
+        out += Decompile(device, *ir_b, ProgramType::VertexB, "vertex_b");
     }
 
     out += R"(
 void main() {
     execute_vertex();
 )";
-
-    if (setup.IsDualProgram()) {
+    if (ir_b) {
         out += "    execute_vertex_b();";
     }
-
-    out += R"(
-
-    // Set Position Y direction
-    gl_Position.y *= utof(config_pack[2]);
-    // Check if the flip stage is VertexB
-    // Config pack's second value is flip_stage
-    if (config_pack[1] == 1) {
-        // Viewport can be flipped, which is unsupported by glViewport
-        gl_Position.xy *= viewport_flip.xy;
-    }
-})";
-
-    return {std::move(out), std::move(program.second)};
+    out += "}\n";
+    return out;
 }
 
-ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup) {
-    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
-    std::string out = "// Shader Unique Id: GS" + id + "\n\n";
-    out += GetCommonDeclarations();
-
+std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
+    std::string out = GetCommonDeclarations();
     out += R"(
 layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
-    vec4 viewport_flip;
-    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+    float y_direction;
 };
 
 )";
-
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
-    ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry");
-    out += program.first;
+    out += Decompile(device, ir, ProgramType::Geometry, "geometry");
 
     out += R"(
 void main() {
     execute_geometry();
-};)";
-
-    return {std::move(out), std::move(program.second)};
+}
+)";
+    return out;
 }
 
-ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup) {
-    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
-    std::string out = "// Shader Unique Id: FS" + id + "\n\n";
-    out += GetCommonDeclarations();
-
+std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
+    std::string out = GetCommonDeclarations();
     out += R"(
 layout (location = 0) out vec4 FragColor0;
 layout (location = 1) out vec4 FragColor1;
@@ -109,40 +72,29 @@ layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;
 
 layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
-    vec4 viewport_flip;
-    uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
+    float y_direction;
 };
 
 )";
-    const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a);
-    ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment");
-    out += program.first;
+    out += Decompile(device, ir, ProgramType::Fragment, "fragment");
 
     out += R"(
 void main() {
     execute_fragment();
 }
-
 )";
-    return {std::move(out), std::move(program.second)};
+    return out;
 }
 
-ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) {
-    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
-
-    std::string out = "// Shader Unique Id: CS" + id + "\n\n";
-    out += GetCommonDeclarations();
-
-    const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a);
-    ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute");
-    out += program.first;
-
+std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
+    std::string out = GetCommonDeclarations();
+    out += Decompile(device, ir, ProgramType::Compute, "compute");
     out += R"(
 void main() {
     execute_compute();
 }
 )";
-    return {std::move(out), std::move(program.second)};
+    return out;
 }
 
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 3833e88ab..cba2be9f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -17,44 +17,18 @@ class Device;
 namespace OpenGL::GLShader {
 
 using VideoCommon::Shader::ProgramCode;
-
-struct ShaderSetup {
-    explicit ShaderSetup(ProgramCode program_code) {
-        program.code = std::move(program_code);
-    }
-
-    struct {
-        ProgramCode code;
-        ProgramCode code_b; // Used for dual vertex shaders
-        u64 unique_identifier;
-        std::size_t size_a;
-        std::size_t size_b;
-    } program;
-
-    /// Used in scenarios where we have a dual vertex shaders
-    void SetProgramB(ProgramCode program_b) {
-        program.code_b = std::move(program_b);
-        has_program_b = true;
-    }
-
-    bool IsDualProgram() const {
-        return has_program_b;
-    }
-
-private:
-    bool has_program_b{};
-};
+using VideoCommon::Shader::ShaderIR;
 
 /// Generates the GLSL vertex shader program source code for the given VS program
-ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup);
+std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
 
 /// Generates the GLSL geometry shader program source code for the given GS program
-ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& setup);
+std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
 
 /// Generates the GLSL fragment shader program source code for the given FS program
-ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup);
+std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
 
 /// Generates the GLSL compute shader program source code for the given CS program
-ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup);
+std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
 
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index b05f90f20..75d3fac04 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -40,27 +40,11 @@ void ProgramManager::UpdatePipeline() {
     old_state = current_state;
 }
 
-void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shader_stage) {
+void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) {
     const auto& regs = maxwell.regs;
-    const auto& state = maxwell.state;
-
-    // TODO(bunnei): Support more than one viewport
-    viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
-    viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
-
-    instance_id = state.current_instance;
-
-    // Assign in which stage the position has to be flipped
-    // (the last stage before the fragment shader).
-    constexpr u32 geometry_index = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::Geometry);
-    if (maxwell.regs.shader_config[geometry_index].enable) {
-        flip_stage = geometry_index;
-    } else {
-        flip_stage = static_cast<u32>(Maxwell3D::Regs::ShaderProgram::VertexB);
-    }
 
     // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
-    y_direction = regs.screen_y_control.y_negate == 0 ? 1.f : -1.f;
+    y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f;
 }
 
 } // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 6961e702a..3703e7018 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -18,17 +18,12 @@ namespace OpenGL::GLShader {
 /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
 ///       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
 ///       Not following that rule will cause problems on some AMD drivers.
-struct MaxwellUniformData {
-    void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell, std::size_t shader_stage);
-
-    alignas(16) GLvec4 viewport_flip;
-    struct alignas(16) {
-        GLuint instance_id;
-        GLuint flip_stage;
-        GLfloat y_direction;
-    };
+struct alignas(16) MaxwellUniformData {
+    void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell);
+
+    GLfloat y_direction;
 };
-static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
 static_assert(sizeof(MaxwellUniformData) < 16384,
               "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
 
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index bf86b5a0b..ccbe5912e 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
 #include <iterator>
 #include <glad/glad.h>
 #include "common/assert.h"
@@ -69,147 +70,29 @@ void Enable(GLenum cap, GLuint index, bool enable) {
 }
 
 void Enable(GLenum cap, bool& current_value, bool new_value) {
-    if (UpdateValue(current_value, new_value))
+    if (UpdateValue(current_value, new_value)) {
         Enable(cap, new_value);
+    }
 }
 
 void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
-    if (UpdateValue(current_value, new_value))
+    if (UpdateValue(current_value, new_value)) {
         Enable(cap, index, new_value);
-}
-
-} // namespace
-
-OpenGLState::OpenGLState() {
-    // These all match default OpenGL values
-    framebuffer_srgb.enabled = false;
-
-    multisample_control.alpha_to_coverage = false;
-    multisample_control.alpha_to_one = false;
-
-    cull.enabled = false;
-    cull.mode = GL_BACK;
-    cull.front_face = GL_CCW;
-
-    depth.test_enabled = false;
-    depth.test_func = GL_LESS;
-    depth.write_mask = GL_TRUE;
-
-    primitive_restart.enabled = false;
-    primitive_restart.index = 0;
-
-    for (auto& item : color_mask) {
-        item.red_enabled = GL_TRUE;
-        item.green_enabled = GL_TRUE;
-        item.blue_enabled = GL_TRUE;
-        item.alpha_enabled = GL_TRUE;
-    }
-
-    const auto ResetStencil = [](auto& config) {
-        config.test_func = GL_ALWAYS;
-        config.test_ref = 0;
-        config.test_mask = 0xFFFFFFFF;
-        config.write_mask = 0xFFFFFFFF;
-        config.action_depth_fail = GL_KEEP;
-        config.action_depth_pass = GL_KEEP;
-        config.action_stencil_fail = GL_KEEP;
-    };
-    stencil.test_enabled = false;
-    ResetStencil(stencil.front);
-    ResetStencil(stencil.back);
-
-    for (auto& item : viewports) {
-        item.x = 0;
-        item.y = 0;
-        item.width = 0;
-        item.height = 0;
-        item.depth_range_near = 0.0f;
-        item.depth_range_far = 1.0f;
-        item.scissor.enabled = false;
-        item.scissor.x = 0;
-        item.scissor.y = 0;
-        item.scissor.width = 0;
-        item.scissor.height = 0;
     }
+}
 
-    for (auto& item : blend) {
-        item.enabled = true;
-        item.rgb_equation = GL_FUNC_ADD;
-        item.a_equation = GL_FUNC_ADD;
-        item.src_rgb_func = GL_ONE;
-        item.dst_rgb_func = GL_ZERO;
-        item.src_a_func = GL_ONE;
-        item.dst_a_func = GL_ZERO;
-    }
-
-    independant_blend.enabled = false;
-
-    blend_color.red = 0.0f;
-    blend_color.green = 0.0f;
-    blend_color.blue = 0.0f;
-    blend_color.alpha = 0.0f;
-
-    logic_op.enabled = false;
-    logic_op.operation = GL_COPY;
-
-    draw.read_framebuffer = 0;
-    draw.draw_framebuffer = 0;
-    draw.vertex_array = 0;
-    draw.shader_program = 0;
-    draw.program_pipeline = 0;
-
-    clip_distance = {};
-
-    point.size = 1;
-
-    fragment_color_clamp.enabled = false;
-
-    depth_clamp.far_plane = false;
-    depth_clamp.near_plane = false;
-
-    polygon_offset.fill_enable = false;
-    polygon_offset.line_enable = false;
-    polygon_offset.point_enable = false;
-    polygon_offset.factor = 0.0f;
-    polygon_offset.units = 0.0f;
-    polygon_offset.clamp = 0.0f;
+} // Anonymous namespace
 
-    alpha_test.enabled = false;
-    alpha_test.func = GL_ALWAYS;
-    alpha_test.ref = 0.0f;
-}
+OpenGLState::OpenGLState() = default;
 
 void OpenGLState::SetDefaultViewports() {
-    for (auto& item : viewports) {
-        item.x = 0;
-        item.y = 0;
-        item.width = 0;
-        item.height = 0;
-        item.depth_range_near = 0.0f;
-        item.depth_range_far = 1.0f;
-        item.scissor.enabled = false;
-        item.scissor.x = 0;
-        item.scissor.y = 0;
-        item.scissor.width = 0;
-        item.scissor.height = 0;
-    }
+    viewports.fill(Viewport{});
 
     depth_clamp.far_plane = false;
     depth_clamp.near_plane = false;
 }
 
-void OpenGLState::ApplyDefaultState() {
-    glEnable(GL_BLEND);
-    glDisable(GL_FRAMEBUFFER_SRGB);
-    glDisable(GL_CULL_FACE);
-    glDisable(GL_DEPTH_TEST);
-    glDisable(GL_PRIMITIVE_RESTART);
-    glDisable(GL_STENCIL_TEST);
-    glDisable(GL_COLOR_LOGIC_OP);
-    glDisable(GL_SCISSOR_TEST);
-}
-
-void OpenGLState::ApplyFramebufferState() const {
+void OpenGLState::ApplyFramebufferState() {
     if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
         glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
     }
@@ -218,52 +101,52 @@ void OpenGLState::ApplyFramebufferState() const {
     }
 }
 
-void OpenGLState::ApplyVertexArrayState() const {
+void OpenGLState::ApplyVertexArrayState() {
     if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
         glBindVertexArray(draw.vertex_array);
     }
 }
 
-void OpenGLState::ApplyShaderProgram() const {
+void OpenGLState::ApplyShaderProgram() {
     if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
         glUseProgram(draw.shader_program);
     }
 }
 
-void OpenGLState::ApplyProgramPipeline() const {
+void OpenGLState::ApplyProgramPipeline() {
     if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
         glBindProgramPipeline(draw.program_pipeline);
     }
 }
 
-void OpenGLState::ApplyClipDistances() const {
+void OpenGLState::ApplyClipDistances() {
     for (std::size_t i = 0; i < clip_distance.size(); ++i) {
         Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
                clip_distance[i]);
     }
 }
 
-void OpenGLState::ApplyPointSize() const {
+void OpenGLState::ApplyPointSize() {
     if (UpdateValue(cur_state.point.size, point.size)) {
         glPointSize(point.size);
     }
 }
 
-void OpenGLState::ApplyFragmentColorClamp() const {
+void OpenGLState::ApplyFragmentColorClamp() {
     if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
         glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
                      fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
     }
 }
 
-void OpenGLState::ApplyMultisample() const {
+void OpenGLState::ApplyMultisample() {
     Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
            multisample_control.alpha_to_coverage);
     Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
            multisample_control.alpha_to_one);
 }
 
-void OpenGLState::ApplyDepthClamp() const {
+void OpenGLState::ApplyDepthClamp() {
     if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
         depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
         return;
@@ -276,7 +159,7 @@ void OpenGLState::ApplyDepthClamp() const {
     Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
 }
 
-void OpenGLState::ApplySRgb() const {
+void OpenGLState::ApplySRgb() {
     if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
         return;
     cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
@@ -287,7 +170,7 @@ void OpenGLState::ApplySRgb() const {
     }
 }
 
-void OpenGLState::ApplyCulling() const {
+void OpenGLState::ApplyCulling() {
     Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
 
     if (UpdateValue(cur_state.cull.mode, cull.mode)) {
@@ -299,7 +182,12 @@ void OpenGLState::ApplyCulling() const {
     }
 }
 
-void OpenGLState::ApplyColorMask() const {
+void OpenGLState::ApplyColorMask() {
+    if (!dirty.color_mask) {
+        return;
+    }
+    dirty.color_mask = false;
+
     for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
         const auto& updated = color_mask[i];
         auto& current = cur_state.color_mask[i];
@@ -314,7 +202,7 @@ void OpenGLState::ApplyColorMask() const {
     }
 }
 
-void OpenGLState::ApplyDepth() const {
+void OpenGLState::ApplyDepth() {
     Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
 
     if (cur_state.depth.test_func != depth.test_func) {
@@ -328,7 +216,7 @@ void OpenGLState::ApplyDepth() const {
     }
 }
 
-void OpenGLState::ApplyPrimitiveRestart() const {
+void OpenGLState::ApplyPrimitiveRestart() {
     Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
 
     if (cur_state.primitive_restart.index != primitive_restart.index) {
@@ -337,7 +225,12 @@ void OpenGLState::ApplyPrimitiveRestart() const {
     }
 }
 
-void OpenGLState::ApplyStencilTest() const {
+void OpenGLState::ApplyStencilTest() {
+    if (!dirty.stencil_state) {
+        return;
+    }
+    dirty.stencil_state = false;
+
     Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
 
     const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
@@ -366,7 +259,7 @@ void OpenGLState::ApplyStencilTest() const {
     ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
 }
 
-void OpenGLState::ApplyViewport() const {
+void OpenGLState::ApplyViewport() {
     for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
         const auto& updated = viewports[i];
         auto& current = cur_state.viewports[i];
@@ -403,7 +296,7 @@ void OpenGLState::ApplyViewport() const {
     }
 }
 
-void OpenGLState::ApplyGlobalBlending() const {
+void OpenGLState::ApplyGlobalBlending() {
     const Blend& updated = blend[0];
     Blend& current = cur_state.blend[0];
 
@@ -427,7 +320,7 @@ void OpenGLState::ApplyGlobalBlending() const {
     }
 }
 
-void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
+void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) {
     const Blend& updated = blend[target];
     Blend& current = cur_state.blend[target];
 
@@ -451,7 +344,12 @@ void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) const {
     }
 }
 
-void OpenGLState::ApplyBlending() const {
+void OpenGLState::ApplyBlending() {
+    if (!dirty.blend_state) {
+        return;
+    }
+    dirty.blend_state = false;
+
     if (independant_blend.enabled) {
         const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
         for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
@@ -470,7 +368,7 @@ void OpenGLState::ApplyBlending() const {
     }
 }
 
-void OpenGLState::ApplyLogicOp() const {
+void OpenGLState::ApplyLogicOp() {
     Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
 
     if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
@@ -478,7 +376,12 @@ void OpenGLState::ApplyLogicOp() const {
     }
 }
 
-void OpenGLState::ApplyPolygonOffset() const {
+void OpenGLState::ApplyPolygonOffset() {
+    if (!dirty.polygon_offset) {
+        return;
+    }
+    dirty.polygon_offset = false;
+
     Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
            polygon_offset.fill_enable);
     Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
@@ -499,7 +402,7 @@ void OpenGLState::ApplyPolygonOffset() const {
     }
 }
 
-void OpenGLState::ApplyAlphaTest() const {
+void OpenGLState::ApplyAlphaTest() {
     Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
     if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
                   std::tie(alpha_test.func, alpha_test.ref))) {
@@ -507,19 +410,25 @@ void OpenGLState::ApplyAlphaTest() const {
     }
 }
 
-void OpenGLState::ApplyTextures() const {
+void OpenGLState::ApplyClipControl() {
+    if (UpdateValue(cur_state.clip_control.origin, clip_control.origin)) {
+        glClipControl(clip_control.origin, GL_NEGATIVE_ONE_TO_ONE);
+    }
+}
+
+void OpenGLState::ApplyTextures() {
     if (const auto update = UpdateArray(cur_state.textures, textures)) {
         glBindTextures(update->first, update->second, textures.data() + update->first);
     }
 }
 
-void OpenGLState::ApplySamplers() const {
+void OpenGLState::ApplySamplers() {
     if (const auto update = UpdateArray(cur_state.samplers, samplers)) {
         glBindSamplers(update->first, update->second, samplers.data() + update->first);
     }
 }
 
-void OpenGLState::ApplyImages() const {
+void OpenGLState::ApplyImages() {
     if (const auto update = UpdateArray(cur_state.images, images)) {
         glBindImageTextures(update->first, update->second, images.data() + update->first);
     }
@@ -535,33 +444,22 @@ void OpenGLState::Apply() {
     ApplyPointSize();
     ApplyFragmentColorClamp();
     ApplyMultisample();
-    if (dirty.color_mask) {
-        ApplyColorMask();
-        dirty.color_mask = false;
-    }
+    ApplyColorMask();
     ApplyDepthClamp();
     ApplyViewport();
-    if (dirty.stencil_state) {
-        ApplyStencilTest();
-        dirty.stencil_state = false;
-    }
+    ApplyStencilTest();
     ApplySRgb();
     ApplyCulling();
     ApplyDepth();
     ApplyPrimitiveRestart();
-    if (dirty.blend_state) {
-        ApplyBlending();
-        dirty.blend_state = false;
-    }
+    ApplyBlending();
     ApplyLogicOp();
     ApplyTextures();
     ApplySamplers();
     ApplyImages();
-    if (dirty.polygon_offset) {
-        ApplyPolygonOffset();
-        dirty.polygon_offset = false;
-    }
+    ApplyPolygonOffset();
     ApplyAlphaTest();
+    ApplyClipControl();
 }
 
 void OpenGLState::EmulateViewportWithScissor() {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index c358d3b38..eaff22bda 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -5,168 +5,150 @@
 #pragma once
 
 #include <array>
+#include <type_traits>
 #include <glad/glad.h>
 #include "video_core/engines/maxwell_3d.h"
 
 namespace OpenGL {
 
-namespace TextureUnits {
-
-struct TextureUnit {
-    GLint id;
-    constexpr GLenum Enum() const {
-        return static_cast<GLenum>(GL_TEXTURE0 + id);
-    }
-};
-
-constexpr TextureUnit MaxwellTexture(int unit) {
-    return TextureUnit{unit};
-}
-
-constexpr TextureUnit LightingLUT{3};
-constexpr TextureUnit FogLUT{4};
-constexpr TextureUnit ProcTexNoiseLUT{5};
-constexpr TextureUnit ProcTexColorMap{6};
-constexpr TextureUnit ProcTexAlphaMap{7};
-constexpr TextureUnit ProcTexLUT{8};
-constexpr TextureUnit ProcTexDiffLUT{9};
-
-} // namespace TextureUnits
-
 class OpenGLState {
 public:
     struct {
-        bool enabled; // GL_FRAMEBUFFER_SRGB
+        bool enabled = false; // GL_FRAMEBUFFER_SRGB
     } framebuffer_srgb;
 
     struct {
-        bool alpha_to_coverage; // GL_ALPHA_TO_COVERAGE
-        bool alpha_to_one;      // GL_ALPHA_TO_ONE
+        bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE
+        bool alpha_to_one = false;      // GL_ALPHA_TO_ONE
     } multisample_control;
 
     struct {
-        bool enabled; // GL_CLAMP_FRAGMENT_COLOR_ARB
+        bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB
     } fragment_color_clamp;
 
     struct {
-        bool far_plane;
-        bool near_plane;
+        bool far_plane = false;
+        bool near_plane = false;
     } depth_clamp; // GL_DEPTH_CLAMP
 
     struct {
-        bool enabled;      // GL_CULL_FACE
-        GLenum mode;       // GL_CULL_FACE_MODE
-        GLenum front_face; // GL_FRONT_FACE
+        bool enabled = false;       // GL_CULL_FACE
+        GLenum mode = GL_BACK;      // GL_CULL_FACE_MODE
+        GLenum front_face = GL_CCW; // GL_FRONT_FACE
     } cull;
 
     struct {
-        bool test_enabled;    // GL_DEPTH_TEST
-        GLenum test_func;     // GL_DEPTH_FUNC
-        GLboolean write_mask; // GL_DEPTH_WRITEMASK
+        bool test_enabled = false;      // GL_DEPTH_TEST
+        GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK
+        GLenum test_func = GL_LESS;     // GL_DEPTH_FUNC
     } depth;
 
     struct {
-        bool enabled;
-        GLuint index;
+        bool enabled = false;
+        GLuint index = 0;
     } primitive_restart; // GL_PRIMITIVE_RESTART
 
     struct ColorMask {
-        GLboolean red_enabled;
-        GLboolean green_enabled;
-        GLboolean blue_enabled;
-        GLboolean alpha_enabled;
+        GLboolean red_enabled = GL_TRUE;
+        GLboolean green_enabled = GL_TRUE;
+        GLboolean blue_enabled = GL_TRUE;
+        GLboolean alpha_enabled = GL_TRUE;
     };
     std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
         color_mask; // GL_COLOR_WRITEMASK
     struct {
-        bool test_enabled; // GL_STENCIL_TEST
+        bool test_enabled = false; // GL_STENCIL_TEST
         struct {
-            GLenum test_func;           // GL_STENCIL_FUNC
-            GLint test_ref;             // GL_STENCIL_REF
-            GLuint test_mask;           // GL_STENCIL_VALUE_MASK
-            GLuint write_mask;          // GL_STENCIL_WRITEMASK
-            GLenum action_stencil_fail; // GL_STENCIL_FAIL
-            GLenum action_depth_fail;   // GL_STENCIL_PASS_DEPTH_FAIL
-            GLenum action_depth_pass;   // GL_STENCIL_PASS_DEPTH_PASS
+            GLenum test_func = GL_ALWAYS;         // GL_STENCIL_FUNC
+            GLint test_ref = 0;                   // GL_STENCIL_REF
+            GLuint test_mask = 0xFFFFFFFF;        // GL_STENCIL_VALUE_MASK
+            GLuint write_mask = 0xFFFFFFFF;       // GL_STENCIL_WRITEMASK
+            GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL
+            GLenum action_depth_fail = GL_KEEP;   // GL_STENCIL_PASS_DEPTH_FAIL
+            GLenum action_depth_pass = GL_KEEP;   // GL_STENCIL_PASS_DEPTH_PASS
         } front, back;
     } stencil;
 
     struct Blend {
-        bool enabled;        // GL_BLEND
-        GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
-        GLenum a_equation;   // GL_BLEND_EQUATION_ALPHA
-        GLenum src_rgb_func; // GL_BLEND_SRC_RGB
-        GLenum dst_rgb_func; // GL_BLEND_DST_RGB
-        GLenum src_a_func;   // GL_BLEND_SRC_ALPHA
-        GLenum dst_a_func;   // GL_BLEND_DST_ALPHA
+        bool enabled = false;              // GL_BLEND
+        GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB
+        GLenum a_equation = GL_FUNC_ADD;   // GL_BLEND_EQUATION_ALPHA
+        GLenum src_rgb_func = GL_ONE;      // GL_BLEND_SRC_RGB
+        GLenum dst_rgb_func = GL_ZERO;     // GL_BLEND_DST_RGB
+        GLenum src_a_func = GL_ONE;        // GL_BLEND_SRC_ALPHA
+        GLenum dst_a_func = GL_ZERO;       // GL_BLEND_DST_ALPHA
     };
     std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
 
     struct {
-        bool enabled;
+        bool enabled = false;
     } independant_blend;
 
     struct {
-        GLclampf red;
-        GLclampf green;
-        GLclampf blue;
-        GLclampf alpha;
+        GLclampf red = 0.0f;
+        GLclampf green = 0.0f;
+        GLclampf blue = 0.0f;
+        GLclampf alpha = 0.0f;
     } blend_color; // GL_BLEND_COLOR
 
     struct {
-        bool enabled; // GL_LOGIC_OP_MODE
-        GLenum operation;
+        bool enabled = false; // GL_LOGIC_OP_MODE
+        GLenum operation = GL_COPY;
     } logic_op;
 
-    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures{};
-    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers{};
-    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images{};
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {};
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {};
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {};
 
     struct {
-        GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
-        GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
-        GLuint vertex_array;     // GL_VERTEX_ARRAY_BINDING
-        GLuint shader_program;   // GL_CURRENT_PROGRAM
-        GLuint program_pipeline; // GL_PROGRAM_PIPELINE_BINDING
+        GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
+        GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING
+        GLuint vertex_array = 0;     // GL_VERTEX_ARRAY_BINDING
+        GLuint shader_program = 0;   // GL_CURRENT_PROGRAM
+        GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING
     } draw;
 
-    struct viewport {
-        GLint x;
-        GLint y;
-        GLint width;
-        GLint height;
-        GLfloat depth_range_near; // GL_DEPTH_RANGE
-        GLfloat depth_range_far;  // GL_DEPTH_RANGE
+    struct Viewport {
+        GLint x = 0;
+        GLint y = 0;
+        GLint width = 0;
+        GLint height = 0;
+        GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE
+        GLfloat depth_range_far = 1.0f;  // GL_DEPTH_RANGE
         struct {
-            bool enabled; // GL_SCISSOR_TEST
-            GLint x;
-            GLint y;
-            GLsizei width;
-            GLsizei height;
+            bool enabled = false; // GL_SCISSOR_TEST
+            GLint x = 0;
+            GLint y = 0;
+            GLsizei width = 0;
+            GLsizei height = 0;
         } scissor;
     };
-    std::array<viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
+    std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
 
     struct {
-        float size; // GL_POINT_SIZE
+        float size = 1.0f; // GL_POINT_SIZE
     } point;
 
     struct {
-        bool point_enable;
-        bool line_enable;
-        bool fill_enable;
-        GLfloat units;
-        GLfloat factor;
-        GLfloat clamp;
+        bool point_enable = false;
+        bool line_enable = false;
+        bool fill_enable = false;
+        GLfloat units = 0.0f;
+        GLfloat factor = 0.0f;
+        GLfloat clamp = 0.0f;
     } polygon_offset;
 
     struct {
-        bool enabled; // GL_ALPHA_TEST
-        GLenum func;  // GL_ALPHA_TEST_FUNC
-        GLfloat ref;  // GL_ALPHA_TEST_REF
+        bool enabled = false;    // GL_ALPHA_TEST
+        GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC
+        GLfloat ref = 0.0f;      // GL_ALPHA_TEST_REF
     } alpha_test;
 
-    std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE
+    std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
+
+    struct {
+        GLenum origin = GL_LOWER_LEFT;
+    } clip_control;
 
     OpenGLState();
 
@@ -179,34 +161,32 @@ public:
     /// Apply this state as the current OpenGL state
     void Apply();
 
-    void ApplyFramebufferState() const;
-    void ApplyVertexArrayState() const;
-    void ApplyShaderProgram() const;
-    void ApplyProgramPipeline() const;
-    void ApplyClipDistances() const;
-    void ApplyPointSize() const;
-    void ApplyFragmentColorClamp() const;
-    void ApplyMultisample() const;
-    void ApplySRgb() const;
-    void ApplyCulling() const;
-    void ApplyColorMask() const;
-    void ApplyDepth() const;
-    void ApplyPrimitiveRestart() const;
-    void ApplyStencilTest() const;
-    void ApplyViewport() const;
-    void ApplyTargetBlending(std::size_t target, bool force) const;
-    void ApplyGlobalBlending() const;
-    void ApplyBlending() const;
-    void ApplyLogicOp() const;
-    void ApplyTextures() const;
-    void ApplySamplers() const;
-    void ApplyImages() const;
-    void ApplyDepthClamp() const;
-    void ApplyPolygonOffset() const;
-    void ApplyAlphaTest() const;
-
-    /// Set the initial OpenGL state
-    static void ApplyDefaultState();
+    void ApplyFramebufferState();
+    void ApplyVertexArrayState();
+    void ApplyShaderProgram();
+    void ApplyProgramPipeline();
+    void ApplyClipDistances();
+    void ApplyPointSize();
+    void ApplyFragmentColorClamp();
+    void ApplyMultisample();
+    void ApplySRgb();
+    void ApplyCulling();
+    void ApplyColorMask();
+    void ApplyDepth();
+    void ApplyPrimitiveRestart();
+    void ApplyStencilTest();
+    void ApplyViewport();
+    void ApplyTargetBlending(std::size_t target, bool force);
+    void ApplyGlobalBlending();
+    void ApplyBlending();
+    void ApplyLogicOp();
+    void ApplyTextures();
+    void ApplySamplers();
+    void ApplyImages();
+    void ApplyDepthClamp();
+    void ApplyPolygonOffset();
+    void ApplyAlphaTest();
+    void ApplyClipControl();
 
     /// Resets any references to the given resource
     OpenGLState& UnbindTexture(GLuint handle);
@@ -253,5 +233,6 @@ private:
         bool color_mask;
     } dirty{};
 };
+static_assert(std::is_trivially_copyable_v<OpenGLState>);
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 173b76c4e..4659e098f 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -23,7 +23,6 @@ namespace OpenGL {
 using Tegra::Texture::SwizzleSource;
 using VideoCore::MortonSwizzleMode;
 
-using VideoCore::Surface::ComponentType;
 using VideoCore::Surface::PixelFormat;
 using VideoCore::Surface::SurfaceCompression;
 using VideoCore::Surface::SurfaceTarget;
@@ -40,102 +39,95 @@ struct FormatTuple {
     GLint internal_format;
     GLenum format;
     GLenum type;
-    ComponentType component_type;
     bool compressed;
 };
 
 constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm, false}, // ABGR8U
-    {GL_RGBA8, GL_RGBA, GL_BYTE, ComponentType::SNorm, false},                     // ABGR8S
-    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false},   // ABGR8UI
-    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, ComponentType::UNorm, false}, // B5G6R5U
-    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, ComponentType::UNorm,
-     false}, // A2B10G10R10U
-    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, ComponentType::UNorm, false}, // A1B5G5R5U
-    {GL_R8, GL_RED, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},                    // R8U
-    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false},           // R8UI
-    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false},                 // RGBA16F
-    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false},              // RGBA16U
-    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false},     // RGBA16UI
-    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
-     false},                                                                     // R11FG11FB10F
-    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
-    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true}, // DXT1
-    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true}, // DXT23
-    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true},                                                                                 // DXT45
-    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm, true}, // DXN1
-    {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true},                                                                     // DXN2UNORM
-    {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, ComponentType::SNorm, true}, // DXN2SNORM
-    {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true}, // BC7U
-    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
-     true}, // BC6H_UF16
-    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
-     true},                                                                    // BC6H_SF16
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_4X4
-    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // BGRA8
-    {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false},              // RGBA32F
-    {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false},                  // RG32F
-    {GL_R32F, GL_RED, GL_FLOAT, ComponentType::Float, false},                  // R32F
-    {GL_R16F, GL_RED, GL_HALF_FLOAT, ComponentType::Float, false},             // R16F
-    {GL_R16, GL_RED, GL_UNSIGNED_SHORT, ComponentType::UNorm, false},          // R16U
-    {GL_R16_SNORM, GL_RED, GL_SHORT, ComponentType::SNorm, false},             // R16S
-    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // R16UI
-    {GL_R16I, GL_RED_INTEGER, GL_SHORT, ComponentType::SInt, false},           // R16I
-    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, ComponentType::UNorm, false},          // RG16
-    {GL_RG16F, GL_RG, GL_HALF_FLOAT, ComponentType::Float, false},             // RG16F
-    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false}, // RG16UI
-    {GL_RG16I, GL_RG_INTEGER, GL_SHORT, ComponentType::SInt, false},           // RG16I
-    {GL_RG16_SNORM, GL_RG, GL_SHORT, ComponentType::SNorm, false},             // RG16S
-    {GL_RGB32F, GL_RGB, GL_FLOAT, ComponentType::Float, false},                // RGB32F
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, ComponentType::UNorm,
-     false},                                                                   // RGBA8_SRGB
-    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},            // RG8U
-    {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                     // RG8S
-    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},   // RG32UI
-    {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, ComponentType::Float, false},        // RGBX16F
-    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false},   // R32UI
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_8X8
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_8X5
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_5X4
-    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // BGRA8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                        // ABGR8U
+    {GL_RGBA8, GL_RGBA, GL_BYTE, false},                                            // ABGR8S
+    {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false},                         // ABGR8UI
+    {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                        // B5G6R5U
+    {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},                  // A2B10G10R10U
+    {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV, false},                    // A1B5G5R5U
+    {GL_R8, GL_RED, GL_UNSIGNED_BYTE, false},                                       // R8U
+    {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false},                             // R8UI
+    {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false},                                    // RGBA16F
+    {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false},                                 // RGBA16U
+    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false},                       // RGBA16UI
+    {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false},            // R11FG11FB10F
+    {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false},                         // RGBA32UI
+    {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},     // DXT1
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},     // DXT23
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},     // DXT45
+    {GL_COMPRESSED_RED_RGTC1, GL_RED, GL_UNSIGNED_INT_8_8_8_8, true},               // DXN1
+    {GL_COMPRESSED_RG_RGTC2, GL_RG, GL_UNSIGNED_INT_8_8_8_8, true},                 // DXN2UNORM
+    {GL_COMPRESSED_SIGNED_RG_RGTC2, GL_RG, GL_INT, true},                           // DXN2SNORM
+    {GL_COMPRESSED_RGBA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},        // BC7U
+    {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true}, // BC6H_UF16
+    {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true},   // BC6H_SF16
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_4X4
+    {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, false},                                   // BGRA8
+    {GL_RGBA32F, GL_RGBA, GL_FLOAT, false},                                         // RGBA32F
+    {GL_RG32F, GL_RG, GL_FLOAT, false},                                             // RG32F
+    {GL_R32F, GL_RED, GL_FLOAT, false},                                             // R32F
+    {GL_R16F, GL_RED, GL_HALF_FLOAT, false},                                        // R16F
+    {GL_R16, GL_RED, GL_UNSIGNED_SHORT, false},                                     // R16U
+    {GL_R16_SNORM, GL_RED, GL_SHORT, false},                                        // R16S
+    {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT, false},                           // R16UI
+    {GL_R16I, GL_RED_INTEGER, GL_SHORT, false},                                     // R16I
+    {GL_RG16, GL_RG, GL_UNSIGNED_SHORT, false},                                     // RG16
+    {GL_RG16F, GL_RG, GL_HALF_FLOAT, false},                                        // RG16F
+    {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT, false},                           // RG16UI
+    {GL_RG16I, GL_RG_INTEGER, GL_SHORT, false},                                     // RG16I
+    {GL_RG16_SNORM, GL_RG, GL_SHORT, false},                                        // RG16S
+    {GL_RGB32F, GL_RGB, GL_FLOAT, false},                                           // RGB32F
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                 // RGBA8_SRGB
+    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false},                                       // RG8U
+    {GL_RG8, GL_RG, GL_BYTE, false},                                                // RG8S
+    {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI
+    {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false},                                   // RGBX16F
+    {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_5X4
+    {GL_SRGB8_ALPHA8, GL_BGRA, GL_UNSIGNED_BYTE, false},                            // BGRA8
     // Compressed sRGB formats
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true}, // DXT1_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true}, // DXT23_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true}, // DXT45_SRGB
-    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
-     true},                                                                    // BC7U_SRGB
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4_SRGB
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X8_SRGB
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_8X5_SRGB
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X4_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_5X5
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_5X5_SRGB
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_10X8
-    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_10X8_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT1_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT23_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true}, // DXT45_SRGB
+    {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true},    // BC7U_SRGB
+    {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV, false},                        // R4G4B4A4U
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_4X4_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_8X8_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_8X5_SRGB
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_5X4_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_5X5
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_5X5_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_10X8
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_10X8_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_6X6
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_6X6_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_10X10
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_10X10_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_12X12
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_12X12_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_8X6
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_8X6_SRGB
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},             // ASTC_2D_6X5
+    {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE, false},      // ASTC_2D_6X5_SRGB
+    {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV, false}, // E5B9G9R9F
 
     // Depth formats
-    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
-    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
-     false}, // Z16
+    {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, false},         // Z32F
+    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, false}, // Z16
 
     // DepthStencil formats
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
-     false}, // Z24S8
-    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
-     false}, // S8Z24
-    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV,
-     ComponentType::Float, false}, // Z32FS8
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false},               // Z24S8
+    {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, false},               // S8Z24
+    {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, false}, // Z32FS8
 }};
 
-const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
+const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
     ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
     const auto& format{tex_format_tuples[static_cast<std::size_t>(pixel_format)]};
     return format;
@@ -237,7 +229,7 @@ OGLTexture CreateTexture(const SurfaceParams& params, GLenum target, GLenum inte
 
 CachedSurface::CachedSurface(const GPUVAddr gpu_addr, const SurfaceParams& params)
     : VideoCommon::SurfaceBase<View>(gpu_addr, params) {
-    const auto& tuple{GetFormatTuple(params.pixel_format, params.component_type)};
+    const auto& tuple{GetFormatTuple(params.pixel_format)};
     internal_format = tuple.internal_format;
     format = tuple.format;
     type = tuple.type;
@@ -439,8 +431,7 @@ OGLTextureView CachedSurfaceView::CreateTextureView() const {
     texture_view.Create();
 
     const GLuint handle{texture_view.handle};
-    const FormatTuple& tuple{
-        GetFormatTuple(owner_params.pixel_format, owner_params.component_type)};
+    const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)};
 
     glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
                   params.num_levels, params.base_layer, params.num_layers);
@@ -550,8 +541,8 @@ void TextureCacheOpenGL::BufferCopy(Surface& src_surface, Surface& dst_surface)
     const auto& dst_params = dst_surface->GetSurfaceParams();
     UNIMPLEMENTED_IF(src_params.num_levels > 1 || dst_params.num_levels > 1);
 
-    const auto source_format = GetFormatTuple(src_params.pixel_format, src_params.component_type);
-    const auto dest_format = GetFormatTuple(dst_params.pixel_format, dst_params.component_type);
+    const auto source_format = GetFormatTuple(src_params.pixel_format);
+    const auto dest_format = GetFormatTuple(dst_params.pixel_format);
 
     const std::size_t source_size = src_surface->GetHostSizeInBytes();
     const std::size_t dest_size = dst_surface->GetHostSizeInBytes();
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 1e6ef66ab..7646cbb0e 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -102,8 +102,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst
 RendererOpenGL::~RendererOpenGL() = default;
 
 void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
-    system.GetPerfStats().EndSystemFrame();
-
     // Maintain the rasterizer's state as a priority
     OpenGLState prev_state = OpenGLState::GetCurState();
     state.AllDirty();
@@ -135,9 +133,6 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
 
     render_window.PollEvents();
 
-    system.FrameLimiter().DoFrameLimiting(system.CoreTiming().GetGlobalTimeUs());
-    system.GetPerfStats().BeginSystemFrame();
-
     // Restore the rasterizer state
     prev_state.AllDirty();
     prev_state.Apply();
@@ -328,10 +323,12 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
     // (e.g. handheld mode) on a 1920x1080 framebuffer.
     f32 scale_u = 1.f, scale_v = 1.f;
     if (framebuffer_crop_rect.GetWidth() > 0) {
-        scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) / screen_info.texture.width;
+        scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+                  static_cast<f32>(screen_info.texture.width);
     }
     if (framebuffer_crop_rect.GetHeight() > 0) {
-        scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) / screen_info.texture.height;
+        scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+                  static_cast<f32>(screen_info.texture.height);
     }
 
     std::array<ScreenRectVertex, 4> vertices = {{
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 3c5acda3e..463ed43ae 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -95,83 +95,82 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar
 } // namespace Sampler
 
 struct FormatTuple {
-    vk::Format format;            ///< Vulkan format
-    ComponentType component_type; ///< Abstracted component type
-    bool attachable;              ///< True when this format can be used as an attachment
+    vk::Format format; ///< Vulkan format
+    bool attachable;   ///< True when this format can be used as an attachment
 };
 
 static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
-    {vk::Format::eA8B8G8R8UnormPack32, ComponentType::UNorm, true},    // ABGR8U
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ABGR8S
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ABGR8UI
-    {vk::Format::eB5G6R5UnormPack16, ComponentType::UNorm, false},     // B5G6R5U
-    {vk::Format::eA2B10G10R10UnormPack32, ComponentType::UNorm, true}, // A2B10G10R10U
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // A1B5G5R5U
-    {vk::Format::eR8Unorm, ComponentType::UNorm, true},                // R8U
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R8UI
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16F
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16U
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA16UI
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R11FG11FB10F
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA32UI
-    {vk::Format::eBc1RgbaUnormBlock, ComponentType::UNorm, false},     // DXT1
-    {vk::Format::eBc2UnormBlock, ComponentType::UNorm, false},         // DXT23
-    {vk::Format::eBc3UnormBlock, ComponentType::UNorm, false},         // DXT45
-    {vk::Format::eBc4UnormBlock, ComponentType::UNorm, false},         // DXN1
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // DXN2UNORM
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // DXN2SNORM
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC7U
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC6H_UF16
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BC6H_SF16
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_4X4
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // BGRA8
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBA32F
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG32F
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R32F
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16F
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16U
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16S
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16UI
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R16I
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16F
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16UI
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16I
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG16S
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGB32F
-    {vk::Format::eA8B8G8R8SrgbPack32, ComponentType::UNorm, true},     // RGBA8_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG8U
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG8S
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RG32UI
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // RGBX16F
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // R32UI
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_8X8
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_8X5
-    {vk::Format::eUndefined, ComponentType::Invalid, false},           // ASTC_2D_5X4
+    {vk::Format::eA8B8G8R8UnormPack32, true},    // ABGR8U
+    {vk::Format::eUndefined, false},             // ABGR8S
+    {vk::Format::eUndefined, false},             // ABGR8UI
+    {vk::Format::eB5G6R5UnormPack16, false},     // B5G6R5U
+    {vk::Format::eA2B10G10R10UnormPack32, true}, // A2B10G10R10U
+    {vk::Format::eUndefined, false},             // A1B5G5R5U
+    {vk::Format::eR8Unorm, true},                // R8U
+    {vk::Format::eUndefined, false},             // R8UI
+    {vk::Format::eUndefined, false},             // RGBA16F
+    {vk::Format::eUndefined, false},             // RGBA16U
+    {vk::Format::eUndefined, false},             // RGBA16UI
+    {vk::Format::eUndefined, false},             // R11FG11FB10F
+    {vk::Format::eUndefined, false},             // RGBA32UI
+    {vk::Format::eBc1RgbaUnormBlock, false},     // DXT1
+    {vk::Format::eBc2UnormBlock, false},         // DXT23
+    {vk::Format::eBc3UnormBlock, false},         // DXT45
+    {vk::Format::eBc4UnormBlock, false},         // DXN1
+    {vk::Format::eUndefined, false},             // DXN2UNORM
+    {vk::Format::eUndefined, false},             // DXN2SNORM
+    {vk::Format::eUndefined, false},             // BC7U
+    {vk::Format::eUndefined, false},             // BC6H_UF16
+    {vk::Format::eUndefined, false},             // BC6H_SF16
+    {vk::Format::eUndefined, false},             // ASTC_2D_4X4
+    {vk::Format::eUndefined, false},             // BGRA8
+    {vk::Format::eUndefined, false},             // RGBA32F
+    {vk::Format::eUndefined, false},             // RG32F
+    {vk::Format::eUndefined, false},             // R32F
+    {vk::Format::eUndefined, false},             // R16F
+    {vk::Format::eUndefined, false},             // R16U
+    {vk::Format::eUndefined, false},             // R16S
+    {vk::Format::eUndefined, false},             // R16UI
+    {vk::Format::eUndefined, false},             // R16I
+    {vk::Format::eUndefined, false},             // RG16
+    {vk::Format::eUndefined, false},             // RG16F
+    {vk::Format::eUndefined, false},             // RG16UI
+    {vk::Format::eUndefined, false},             // RG16I
+    {vk::Format::eUndefined, false},             // RG16S
+    {vk::Format::eUndefined, false},             // RGB32F
+    {vk::Format::eA8B8G8R8SrgbPack32, true},     // RGBA8_SRGB
+    {vk::Format::eUndefined, false},             // RG8U
+    {vk::Format::eUndefined, false},             // RG8S
+    {vk::Format::eUndefined, false},             // RG32UI
+    {vk::Format::eUndefined, false},             // RGBX16F
+    {vk::Format::eUndefined, false},             // R32UI
+    {vk::Format::eUndefined, false},             // ASTC_2D_8X8
+    {vk::Format::eUndefined, false},             // ASTC_2D_8X5
+    {vk::Format::eUndefined, false},             // ASTC_2D_5X4
 
     // Compressed sRGB formats
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // BGRA8_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT1_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT23_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // DXT45_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // BC7U_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_4X4_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X8_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_8X5_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X4_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_5X5_SRGB
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8
-    {vk::Format::eUndefined, ComponentType::Invalid, false}, // ASTC_2D_10X8_SRGB
+    {vk::Format::eUndefined, false}, // BGRA8_SRGB
+    {vk::Format::eUndefined, false}, // DXT1_SRGB
+    {vk::Format::eUndefined, false}, // DXT23_SRGB
+    {vk::Format::eUndefined, false}, // DXT45_SRGB
+    {vk::Format::eUndefined, false}, // BC7U_SRGB
+    {vk::Format::eUndefined, false}, // ASTC_2D_4X4_SRGB
+    {vk::Format::eUndefined, false}, // ASTC_2D_8X8_SRGB
+    {vk::Format::eUndefined, false}, // ASTC_2D_8X5_SRGB
+    {vk::Format::eUndefined, false}, // ASTC_2D_5X4_SRGB
+    {vk::Format::eUndefined, false}, // ASTC_2D_5X5
+    {vk::Format::eUndefined, false}, // ASTC_2D_5X5_SRGB
+    {vk::Format::eUndefined, false}, // ASTC_2D_10X8
+    {vk::Format::eUndefined, false}, // ASTC_2D_10X8_SRGB
 
     // Depth formats
-    {vk::Format::eD32Sfloat, ComponentType::Float, true}, // Z32F
-    {vk::Format::eD16Unorm, ComponentType::UNorm, true},  // Z16
+    {vk::Format::eD32Sfloat, true}, // Z32F
+    {vk::Format::eD16Unorm, true},  // Z16
 
     // DepthStencil formats
-    {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // Z24S8
-    {vk::Format::eD24UnormS8Uint, ComponentType::UNorm, true}, // S8Z24 (emulated)
-    {vk::Format::eUndefined, ComponentType::Invalid, false},   // Z32FS8
+    {vk::Format::eD24UnormS8Uint, true}, // Z24S8
+    {vk::Format::eD24UnormS8Uint, true}, // S8Z24 (emulated)
+    {vk::Format::eUndefined, false},     // Z32FS8
 }};
 
 static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
@@ -180,14 +179,13 @@ static constexpr bool IsZetaFormat(PixelFormat pixel_format) {
 }
 
 std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
-                                          PixelFormat pixel_format, ComponentType component_type) {
+                                          PixelFormat pixel_format) {
     ASSERT(static_cast<std::size_t>(pixel_format) < tex_format_tuples.size());
 
     const auto tuple = tex_format_tuples[static_cast<u32>(pixel_format)];
     UNIMPLEMENTED_IF_MSG(tuple.format == vk::Format::eUndefined,
-                         "Unimplemented texture format with pixel format={} and component type={}",
-                         static_cast<u32>(pixel_format), static_cast<u32>(component_type));
-    ASSERT_MSG(component_type == tuple.component_type, "Component type mismatch");
+                         "Unimplemented texture format with pixel format={}",
+                         static_cast<u32>(pixel_format));
 
     auto usage = vk::FormatFeatureFlagBits::eSampledImage |
                  vk::FormatFeatureFlagBits::eTransferDst | vk::FormatFeatureFlagBits::eTransferSrc;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 4cadc0721..5b0ffd87a 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -16,7 +16,6 @@ namespace Vulkan::MaxwellToVK {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using PixelFormat = VideoCore::Surface::PixelFormat;
-using ComponentType = VideoCore::Surface::ComponentType;
 
 namespace Sampler {
 
@@ -31,7 +30,7 @@ vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compar
 } // namespace Sampler
 
 std::pair<vk::Format, bool> SurfaceFormat(const VKDevice& device, FormatType format_type,
-                                          PixelFormat pixel_format, ComponentType component_type);
+                                          PixelFormat pixel_format);
 
 vk::ShaderStageFlagBits ShaderStage(Maxwell::ShaderStage stage);
 
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 77fc58f25..2850d5b59 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -88,6 +88,9 @@ bool IsPrecise(Operation operand) {
 
 } // namespace
 
+class ASTDecompiler;
+class ExprDecompiler;
+
 class SPIRVDecompiler : public Sirit::Module {
 public:
     explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
@@ -97,27 +100,7 @@ public:
         AddExtension("SPV_KHR_variable_pointers");
     }
 
-    void Decompile() {
-        AllocateBindings();
-        AllocateLabels();
-
-        DeclareVertex();
-        DeclareGeometry();
-        DeclareFragment();
-        DeclareRegisters();
-        DeclarePredicates();
-        DeclareLocalMemory();
-        DeclareInternalFlags();
-        DeclareInputAttributes();
-        DeclareOutputAttributes();
-        DeclareConstantBuffers();
-        DeclareGlobalBuffers();
-        DeclareSamplers();
-
-        execute_function =
-            Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void)));
-        Emit(OpLabel());
-
+    void DecompileBranchMode() {
         const u32 first_address = ir.GetBasicBlocks().begin()->first;
         const Id loop_label = OpLabel("loop");
         const Id merge_label = OpLabel("merge");
@@ -174,6 +157,43 @@ public:
         Emit(continue_label);
         Emit(OpBranch(loop_label));
         Emit(merge_label);
+    }
+
+    void DecompileAST();
+
+    void Decompile() {
+        const bool is_fully_decompiled = ir.IsDecompiled();
+        AllocateBindings();
+        if (!is_fully_decompiled) {
+            AllocateLabels();
+        }
+
+        DeclareVertex();
+        DeclareGeometry();
+        DeclareFragment();
+        DeclareRegisters();
+        DeclarePredicates();
+        if (is_fully_decompiled) {
+            DeclareFlowVariables();
+        }
+        DeclareLocalMemory();
+        DeclareInternalFlags();
+        DeclareInputAttributes();
+        DeclareOutputAttributes();
+        DeclareConstantBuffers();
+        DeclareGlobalBuffers();
+        DeclareSamplers();
+
+        execute_function =
+            Emit(OpFunction(t_void, spv::FunctionControlMask::Inline, TypeFunction(t_void)));
+        Emit(OpLabel());
+
+        if (is_fully_decompiled) {
+            DecompileAST();
+        } else {
+            DecompileBranchMode();
+        }
+
         Emit(OpReturn());
         Emit(OpFunctionEnd());
     }
@@ -206,6 +226,9 @@ public:
     }
 
 private:
+    friend class ASTDecompiler;
+    friend class ExprDecompiler;
+
     static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
 
     void AllocateBindings() {
@@ -294,6 +317,14 @@ private:
         }
     }
 
+    void DeclareFlowVariables() {
+        for (u32 i = 0; i < ir.GetASTNumVariables(); i++) {
+            const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+            Name(id, fmt::format("flow_var_{}", static_cast<u32>(i)));
+            flow_variables.emplace(i, AddGlobalVariable(id));
+        }
+    }
+
     void DeclareLocalMemory() {
         if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) {
             const auto element_count = static_cast<u32>(Common::AlignUp(local_memory_size, 4) / 4);
@@ -615,9 +646,15 @@ private:
             Emit(OpBranchConditional(condition, true_label, skip_label));
             Emit(true_label);
 
+            ++conditional_nest_count;
             VisitBasicBlock(conditional->GetCode());
+            --conditional_nest_count;
 
-            Emit(OpBranch(skip_label));
+            if (inside_branch == 0) {
+                Emit(OpBranch(skip_label));
+            } else {
+                inside_branch--;
+            }
             Emit(skip_label);
             return {};
 
@@ -746,6 +783,11 @@ private:
         return {};
     }
 
+    Id FSwizzleAdd(Operation operation) {
+        UNIMPLEMENTED();
+        return {};
+    }
+
     Id HNegate(Operation operation) {
         UNIMPLEMENTED();
         return {};
@@ -980,7 +1022,11 @@ private:
         UNIMPLEMENTED_IF(!target);
 
         Emit(OpStore(jmp_to, Constant(t_uint, target->GetValue())));
-        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        Emit(OpBranch(continue_label));
+        inside_branch = conditional_nest_count;
+        if (conditional_nest_count == 0) {
+            Emit(OpLabel());
+        }
         return {};
     }
 
@@ -988,7 +1034,11 @@ private:
         const Id op_a = VisitOperand<Type::Uint>(operation, 0);
 
         Emit(OpStore(jmp_to, op_a));
-        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        Emit(OpBranch(continue_label));
+        inside_branch = conditional_nest_count;
+        if (conditional_nest_count == 0) {
+            Emit(OpLabel());
+        }
         return {};
     }
 
@@ -1015,11 +1065,15 @@ private:
 
         Emit(OpStore(flow_stack_top, previous));
         Emit(OpStore(jmp_to, target));
-        BranchingOp([&]() { Emit(OpBranch(continue_label)); });
+        Emit(OpBranch(continue_label));
+        inside_branch = conditional_nest_count;
+        if (conditional_nest_count == 0) {
+            Emit(OpLabel());
+        }
         return {};
     }
 
-    Id Exit(Operation operation) {
+    Id PreExit() {
         switch (stage) {
         case ShaderStage::Vertex: {
             // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
@@ -1067,12 +1121,35 @@ private:
         }
         }
 
-        BranchingOp([&]() { Emit(OpReturn()); });
+        return {};
+    }
+
+    Id Exit(Operation operation) {
+        PreExit();
+        inside_branch = conditional_nest_count;
+        if (conditional_nest_count > 0) {
+            Emit(OpReturn());
+        } else {
+            const Id dummy = OpLabel();
+            Emit(OpBranch(dummy));
+            Emit(dummy);
+            Emit(OpReturn());
+            Emit(OpLabel());
+        }
         return {};
     }
 
     Id Discard(Operation operation) {
-        BranchingOp([&]() { Emit(OpKill()); });
+        inside_branch = conditional_nest_count;
+        if (conditional_nest_count > 0) {
+            Emit(OpKill());
+        } else {
+            const Id dummy = OpLabel();
+            Emit(OpBranch(dummy));
+            Emit(dummy);
+            Emit(OpKill());
+            Emit(OpLabel());
+        }
         return {};
     }
 
@@ -1123,42 +1200,12 @@ private:
         return {};
     }
 
-    Id ShuffleIndexed(Operation) {
-        UNIMPLEMENTED();
-        return {};
-    }
-
-    Id ShuffleUp(Operation) {
-        UNIMPLEMENTED();
-        return {};
-    }
-
-    Id ShuffleDown(Operation) {
-        UNIMPLEMENTED();
-        return {};
-    }
-
-    Id ShuffleButterfly(Operation) {
-        UNIMPLEMENTED();
-        return {};
-    }
-
-    Id InRangeShuffleIndexed(Operation) {
+    Id ThreadId(Operation) {
         UNIMPLEMENTED();
         return {};
     }
 
-    Id InRangeShuffleUp(Operation) {
-        UNIMPLEMENTED();
-        return {};
-    }
-
-    Id InRangeShuffleDown(Operation) {
-        UNIMPLEMENTED();
-        return {};
-    }
-
-    Id InRangeShuffleButterfly(Operation) {
+    Id ShuffleIndexed(Operation) {
         UNIMPLEMENTED();
         return {};
     }
@@ -1267,17 +1314,6 @@ private:
         return {};
     }
 
-    void BranchingOp(std::function<void()> call) {
-        const Id true_label = OpLabel();
-        const Id skip_label = OpLabel();
-        Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::Flatten));
-        Emit(OpBranchConditional(v_true, true_label, skip_label, 1, 0));
-        Emit(true_label);
-        call();
-
-        Emit(skip_label);
-    }
-
     std::tuple<Id, Id> CreateFlowStack() {
         // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
         // that shaders will use 20 nested SSYs and PBKs.
@@ -1332,6 +1368,7 @@ private:
         &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>,
         &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>,
         &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>,
+        &SPIRVDecompiler::FSwizzleAdd,
 
         &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>,
         &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>,
@@ -1467,15 +1504,8 @@ private:
         &SPIRVDecompiler::VoteAny,
         &SPIRVDecompiler::VoteEqual,
 
+        &SPIRVDecompiler::ThreadId,
         &SPIRVDecompiler::ShuffleIndexed,
-        &SPIRVDecompiler::ShuffleUp,
-        &SPIRVDecompiler::ShuffleDown,
-        &SPIRVDecompiler::ShuffleButterfly,
-
-        &SPIRVDecompiler::InRangeShuffleIndexed,
-        &SPIRVDecompiler::InRangeShuffleUp,
-        &SPIRVDecompiler::InRangeShuffleDown,
-        &SPIRVDecompiler::InRangeShuffleButterfly,
     };
     static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
 
@@ -1483,6 +1513,8 @@ private:
     const ShaderIR& ir;
     const ShaderStage stage;
     const Tegra::Shader::Header header;
+    u64 conditional_nest_count{};
+    u64 inside_branch{};
 
     const Id t_void = Name(TypeVoid(), "void");
 
@@ -1545,6 +1577,7 @@ private:
     Id per_vertex{};
     std::map<u32, Id> registers;
     std::map<Tegra::Shader::Pred, Id> predicates;
+    std::map<u32, Id> flow_variables;
     Id local_memory{};
     std::array<Id, INTERNAL_FLAGS_COUNT> internal_flags{};
     std::map<Attribute::Index, Id> input_attributes;
@@ -1580,6 +1613,235 @@ private:
     std::map<u32, Id> labels;
 };
 
+class ExprDecompiler {
+public:
+    explicit ExprDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
+
+    Id operator()(const ExprAnd& expr) {
+        const Id type_def = decomp.GetTypeDefinition(Type::Bool);
+        const Id op1 = Visit(expr.operand1);
+        const Id op2 = Visit(expr.operand2);
+        return decomp.Emit(decomp.OpLogicalAnd(type_def, op1, op2));
+    }
+
+    Id operator()(const ExprOr& expr) {
+        const Id type_def = decomp.GetTypeDefinition(Type::Bool);
+        const Id op1 = Visit(expr.operand1);
+        const Id op2 = Visit(expr.operand2);
+        return decomp.Emit(decomp.OpLogicalOr(type_def, op1, op2));
+    }
+
+    Id operator()(const ExprNot& expr) {
+        const Id type_def = decomp.GetTypeDefinition(Type::Bool);
+        const Id op1 = Visit(expr.operand1);
+        return decomp.Emit(decomp.OpLogicalNot(type_def, op1));
+    }
+
+    Id operator()(const ExprPredicate& expr) {
+        const auto pred = static_cast<Tegra::Shader::Pred>(expr.predicate);
+        return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.predicates.at(pred)));
+    }
+
+    Id operator()(const ExprCondCode& expr) {
+        const Node cc = decomp.ir.GetConditionCode(expr.cc);
+        Id target;
+
+        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
+            const auto index = pred->GetIndex();
+            switch (index) {
+            case Tegra::Shader::Pred::NeverExecute:
+                target = decomp.v_false;
+                break;
+            case Tegra::Shader::Pred::UnusedIndex:
+                target = decomp.v_true;
+                break;
+            default:
+                target = decomp.predicates.at(index);
+                break;
+            }
+        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
+            target = decomp.internal_flags.at(static_cast<u32>(flag->GetFlag()));
+        }
+        return decomp.Emit(decomp.OpLoad(decomp.t_bool, target));
+    }
+
+    Id operator()(const ExprVar& expr) {
+        return decomp.Emit(decomp.OpLoad(decomp.t_bool, decomp.flow_variables.at(expr.var_index)));
+    }
+
+    Id operator()(const ExprBoolean& expr) {
+        return expr.value ? decomp.v_true : decomp.v_false;
+    }
+
+    Id operator()(const ExprGprEqual& expr) {
+        const Id target = decomp.Constant(decomp.t_uint, expr.value);
+        const Id gpr = decomp.BitcastTo<Type::Uint>(
+            decomp.Emit(decomp.OpLoad(decomp.t_float, decomp.registers.at(expr.gpr))));
+        return decomp.Emit(decomp.OpLogicalEqual(decomp.t_uint, gpr, target));
+    }
+
+    Id Visit(const Expr& node) {
+        return std::visit(*this, *node);
+    }
+
+private:
+    SPIRVDecompiler& decomp;
+};
+
+class ASTDecompiler {
+public:
+    explicit ASTDecompiler(SPIRVDecompiler& decomp) : decomp{decomp} {}
+
+    void operator()(const ASTProgram& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()(const ASTIfThen& ast) {
+        ExprDecompiler expr_parser{decomp};
+        const Id condition = expr_parser.Visit(ast.condition);
+        const Id then_label = decomp.OpLabel();
+        const Id endif_label = decomp.OpLabel();
+        decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone));
+        decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label));
+        decomp.Emit(then_label);
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        decomp.Emit(decomp.OpBranch(endif_label));
+        decomp.Emit(endif_label);
+    }
+
+    void operator()([[maybe_unused]] const ASTIfElse& ast) {
+        UNREACHABLE();
+    }
+
+    void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {
+        UNREACHABLE();
+    }
+
+    void operator()(const ASTBlockDecoded& ast) {
+        decomp.VisitBasicBlock(ast.nodes);
+    }
+
+    void operator()(const ASTVarSet& ast) {
+        ExprDecompiler expr_parser{decomp};
+        const Id condition = expr_parser.Visit(ast.condition);
+        decomp.Emit(decomp.OpStore(decomp.flow_variables.at(ast.index), condition));
+    }
+
+    void operator()([[maybe_unused]] const ASTLabel& ast) {
+        // Do nothing
+    }
+
+    void operator()([[maybe_unused]] const ASTGoto& ast) {
+        UNREACHABLE();
+    }
+
+    void operator()(const ASTDoWhile& ast) {
+        const Id loop_label = decomp.OpLabel();
+        const Id endloop_label = decomp.OpLabel();
+        const Id loop_start_block = decomp.OpLabel();
+        const Id loop_end_block = decomp.OpLabel();
+        current_loop_exit = endloop_label;
+        decomp.Emit(decomp.OpBranch(loop_label));
+        decomp.Emit(loop_label);
+        decomp.Emit(
+            decomp.OpLoopMerge(endloop_label, loop_end_block, spv::LoopControlMask::MaskNone));
+        decomp.Emit(decomp.OpBranch(loop_start_block));
+        decomp.Emit(loop_start_block);
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        ExprDecompiler expr_parser{decomp};
+        const Id condition = expr_parser.Visit(ast.condition);
+        decomp.Emit(decomp.OpBranchConditional(condition, loop_label, endloop_label));
+        decomp.Emit(endloop_label);
+    }
+
+    void operator()(const ASTReturn& ast) {
+        if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
+            ExprDecompiler expr_parser{decomp};
+            const Id condition = expr_parser.Visit(ast.condition);
+            const Id then_label = decomp.OpLabel();
+            const Id endif_label = decomp.OpLabel();
+            decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone));
+            decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label));
+            decomp.Emit(then_label);
+            if (ast.kills) {
+                decomp.Emit(decomp.OpKill());
+            } else {
+                decomp.PreExit();
+                decomp.Emit(decomp.OpReturn());
+            }
+            decomp.Emit(endif_label);
+        } else {
+            const Id next_block = decomp.OpLabel();
+            decomp.Emit(decomp.OpBranch(next_block));
+            decomp.Emit(next_block);
+            if (ast.kills) {
+                decomp.Emit(decomp.OpKill());
+            } else {
+                decomp.PreExit();
+                decomp.Emit(decomp.OpReturn());
+            }
+            decomp.Emit(decomp.OpLabel());
+        }
+    }
+
+    void operator()(const ASTBreak& ast) {
+        if (!VideoCommon::Shader::ExprIsTrue(ast.condition)) {
+            ExprDecompiler expr_parser{decomp};
+            const Id condition = expr_parser.Visit(ast.condition);
+            const Id then_label = decomp.OpLabel();
+            const Id endif_label = decomp.OpLabel();
+            decomp.Emit(decomp.OpSelectionMerge(endif_label, spv::SelectionControlMask::MaskNone));
+            decomp.Emit(decomp.OpBranchConditional(condition, then_label, endif_label));
+            decomp.Emit(then_label);
+            decomp.Emit(decomp.OpBranch(current_loop_exit));
+            decomp.Emit(endif_label);
+        } else {
+            const Id next_block = decomp.OpLabel();
+            decomp.Emit(decomp.OpBranch(next_block));
+            decomp.Emit(next_block);
+            decomp.Emit(decomp.OpBranch(current_loop_exit));
+            decomp.Emit(decomp.OpLabel());
+        }
+    }
+
+    void Visit(const ASTNode& node) {
+        std::visit(*this, *node->GetInnerData());
+    }
+
+private:
+    SPIRVDecompiler& decomp;
+    Id current_loop_exit{};
+};
+
+void SPIRVDecompiler::DecompileAST() {
+    const u32 num_flow_variables = ir.GetASTNumVariables();
+    for (u32 i = 0; i < num_flow_variables; i++) {
+        const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
+        Name(id, fmt::format("flow_var_{}", i));
+        flow_variables.emplace(i, AddGlobalVariable(id));
+    }
+
+    const ASTNode program = ir.GetASTProgram();
+    ASTDecompiler decompiler{*this};
+    decompiler.Visit(program);
+
+    const Id next_block = OpLabel();
+    Emit(OpBranch(next_block));
+    Emit(next_block);
+}
+
 DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
                            Maxwell::ShaderStage stage) {
     auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
diff --git a/src/video_core/shader/ast.cpp b/src/video_core/shader/ast.cpp
new file mode 100644
index 000000000..3f96d9076
--- /dev/null
+++ b/src/video_core/shader/ast.cpp
@@ -0,0 +1,753 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <string>
+#include <string_view>
+
+#include <fmt/format.h>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/expr.h"
+
+namespace VideoCommon::Shader {
+
+ASTZipper::ASTZipper() = default;
+
+void ASTZipper::Init(const ASTNode new_first, const ASTNode parent) {
+    ASSERT(new_first->manager == nullptr);
+    first = new_first;
+    last = new_first;
+
+    ASTNode current = first;
+    while (current) {
+        current->manager = this;
+        current->parent = parent;
+        last = current;
+        current = current->next;
+    }
+}
+
+void ASTZipper::PushBack(const ASTNode new_node) {
+    ASSERT(new_node->manager == nullptr);
+    new_node->previous = last;
+    if (last) {
+        last->next = new_node;
+    }
+    new_node->next.reset();
+    last = new_node;
+    if (!first) {
+        first = new_node;
+    }
+    new_node->manager = this;
+}
+
+void ASTZipper::PushFront(const ASTNode new_node) {
+    ASSERT(new_node->manager == nullptr);
+    new_node->previous.reset();
+    new_node->next = first;
+    if (first) {
+        first->previous = new_node;
+    }
+    if (last == first) {
+        last = new_node;
+    }
+    first = new_node;
+    new_node->manager = this;
+}
+
+void ASTZipper::InsertAfter(const ASTNode new_node, const ASTNode at_node) {
+    ASSERT(new_node->manager == nullptr);
+    if (!at_node) {
+        PushFront(new_node);
+        return;
+    }
+    const ASTNode next = at_node->next;
+    if (next) {
+        next->previous = new_node;
+    }
+    new_node->previous = at_node;
+    if (at_node == last) {
+        last = new_node;
+    }
+    new_node->next = next;
+    at_node->next = new_node;
+    new_node->manager = this;
+}
+
+void ASTZipper::InsertBefore(const ASTNode new_node, const ASTNode at_node) {
+    ASSERT(new_node->manager == nullptr);
+    if (!at_node) {
+        PushBack(new_node);
+        return;
+    }
+    const ASTNode previous = at_node->previous;
+    if (previous) {
+        previous->next = new_node;
+    }
+    new_node->next = at_node;
+    if (at_node == first) {
+        first = new_node;
+    }
+    new_node->previous = previous;
+    at_node->previous = new_node;
+    new_node->manager = this;
+}
+
+void ASTZipper::DetachTail(ASTNode node) {
+    ASSERT(node->manager == this);
+    if (node == first) {
+        first.reset();
+        last.reset();
+        return;
+    }
+
+    last = node->previous;
+    last->next.reset();
+    node->previous.reset();
+
+    ASTNode current = std::move(node);
+    while (current) {
+        current->manager = nullptr;
+        current->parent.reset();
+        current = current->next;
+    }
+}
+
+void ASTZipper::DetachSegment(const ASTNode start, const ASTNode end) {
+    ASSERT(start->manager == this && end->manager == this);
+    if (start == end) {
+        DetachSingle(start);
+        return;
+    }
+    const ASTNode prev = start->previous;
+    const ASTNode post = end->next;
+    if (!prev) {
+        first = post;
+    } else {
+        prev->next = post;
+    }
+    if (!post) {
+        last = prev;
+    } else {
+        post->previous = prev;
+    }
+    start->previous.reset();
+    end->next.reset();
+    ASTNode current = start;
+    bool found = false;
+    while (current) {
+        current->manager = nullptr;
+        current->parent.reset();
+        found |= current == end;
+        current = current->next;
+    }
+    ASSERT(found);
+}
+
+void ASTZipper::DetachSingle(const ASTNode node) {
+    ASSERT(node->manager == this);
+    const ASTNode prev = node->previous;
+    const ASTNode post = node->next;
+    node->previous.reset();
+    node->next.reset();
+    if (!prev) {
+        first = post;
+    } else {
+        prev->next = post;
+    }
+    if (!post) {
+        last = prev;
+    } else {
+        post->previous = prev;
+    }
+
+    node->manager = nullptr;
+    node->parent.reset();
+}
+
+void ASTZipper::Remove(const ASTNode node) {
+    ASSERT(node->manager == this);
+    const ASTNode next = node->next;
+    const ASTNode previous = node->previous;
+    if (previous) {
+        previous->next = next;
+    }
+    if (next) {
+        next->previous = previous;
+    }
+    node->parent.reset();
+    node->manager = nullptr;
+    if (node == last) {
+        last = previous;
+    }
+    if (node == first) {
+        first = next;
+    }
+}
+
+class ExprPrinter final {
+public:
+    void operator()(const ExprAnd& expr) {
+        inner += "( ";
+        std::visit(*this, *expr.operand1);
+        inner += " && ";
+        std::visit(*this, *expr.operand2);
+        inner += ')';
+    }
+
+    void operator()(const ExprOr& expr) {
+        inner += "( ";
+        std::visit(*this, *expr.operand1);
+        inner += " || ";
+        std::visit(*this, *expr.operand2);
+        inner += ')';
+    }
+
+    void operator()(const ExprNot& expr) {
+        inner += "!";
+        std::visit(*this, *expr.operand1);
+    }
+
+    void operator()(const ExprPredicate& expr) {
+        inner += "P" + std::to_string(expr.predicate);
+    }
+
+    void operator()(const ExprCondCode& expr) {
+        u32 cc = static_cast<u32>(expr.cc);
+        inner += "CC" + std::to_string(cc);
+    }
+
+    void operator()(const ExprVar& expr) {
+        inner += "V" + std::to_string(expr.var_index);
+    }
+
+    void operator()(const ExprBoolean& expr) {
+        inner += expr.value ? "true" : "false";
+    }
+
+    void operator()(const ExprGprEqual& expr) {
+        inner += "( gpr_" + std::to_string(expr.gpr) + " == " + std::to_string(expr.value) + ')';
+    }
+
+    const std::string& GetResult() const {
+        return inner;
+    }
+
+private:
+    std::string inner;
+};
+
+class ASTPrinter {
+public:
+    void operator()(const ASTProgram& ast) {
+        scope++;
+        inner += "program {\n";
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        inner += "}\n";
+        scope--;
+    }
+
+    void operator()(const ASTIfThen& ast) {
+        ExprPrinter expr_parser{};
+        std::visit(expr_parser, *ast.condition);
+        inner += fmt::format("{}if ({}) {{\n", Indent(), expr_parser.GetResult());
+        scope++;
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        scope--;
+        inner += fmt::format("{}}}\n", Indent());
+    }
+
+    void operator()(const ASTIfElse& ast) {
+        inner += Indent();
+        inner += "else {\n";
+
+        scope++;
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        scope--;
+
+        inner += Indent();
+        inner += "}\n";
+    }
+
+    void operator()(const ASTBlockEncoded& ast) {
+        inner += fmt::format("{}Block({}, {});\n", Indent(), ast.start, ast.end);
+    }
+
+    void operator()([[maybe_unused]] const ASTBlockDecoded& ast) {
+        inner += Indent();
+        inner += "Block;\n";
+    }
+
+    void operator()(const ASTVarSet& ast) {
+        ExprPrinter expr_parser{};
+        std::visit(expr_parser, *ast.condition);
+        inner += fmt::format("{}V{} := {};\n", Indent(), ast.index, expr_parser.GetResult());
+    }
+
+    void operator()(const ASTLabel& ast) {
+        inner += fmt::format("Label_{}:\n", ast.index);
+    }
+
+    void operator()(const ASTGoto& ast) {
+        ExprPrinter expr_parser{};
+        std::visit(expr_parser, *ast.condition);
+        inner +=
+            fmt::format("{}({}) -> goto Label_{};\n", Indent(), expr_parser.GetResult(), ast.label);
+    }
+
+    void operator()(const ASTDoWhile& ast) {
+        ExprPrinter expr_parser{};
+        std::visit(expr_parser, *ast.condition);
+        inner += fmt::format("{}do {{\n", Indent());
+        scope++;
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+        scope--;
+        inner += fmt::format("{}}} while ({});\n", Indent(), expr_parser.GetResult());
+    }
+
+    void operator()(const ASTReturn& ast) {
+        ExprPrinter expr_parser{};
+        std::visit(expr_parser, *ast.condition);
+        inner += fmt::format("{}({}) -> {};\n", Indent(), expr_parser.GetResult(),
+                             ast.kills ? "discard" : "exit");
+    }
+
+    void operator()(const ASTBreak& ast) {
+        ExprPrinter expr_parser{};
+        std::visit(expr_parser, *ast.condition);
+        inner += fmt::format("{}({}) -> break;\n", Indent(), expr_parser.GetResult());
+    }
+
+    void Visit(const ASTNode& node) {
+        std::visit(*this, *node->GetInnerData());
+    }
+
+    const std::string& GetResult() const {
+        return inner;
+    }
+
+private:
+    std::string_view Indent() {
+        if (space_segment_scope == scope) {
+            return space_segment;
+        }
+
+        // Ensure that we don't exceed our view.
+        ASSERT(scope * 2 < spaces.size());
+
+        space_segment = spaces.substr(0, scope * 2);
+        space_segment_scope = scope;
+        return space_segment;
+    }
+
+    std::string inner{};
+    std::string_view space_segment;
+
+    u32 scope{};
+    u32 space_segment_scope{};
+
+    static constexpr std::string_view spaces{"                                    "};
+};
+
+std::string ASTManager::Print() const {
+    ASTPrinter printer{};
+    printer.Visit(main_node);
+    return printer.GetResult();
+}
+
+ASTManager::ASTManager(bool full_decompile, bool disable_else_derivation)
+    : full_decompile{full_decompile}, disable_else_derivation{disable_else_derivation} {};
+
+ASTManager::~ASTManager() {
+    Clear();
+}
+
+void ASTManager::Init() {
+    main_node = ASTBase::Make<ASTProgram>(ASTNode{});
+    program = std::get_if<ASTProgram>(main_node->GetInnerData());
+    false_condition = MakeExpr<ExprBoolean>(false);
+}
+
+void ASTManager::DeclareLabel(u32 address) {
+    const auto pair = labels_map.emplace(address, labels_count);
+    if (pair.second) {
+        labels_count++;
+        labels.resize(labels_count);
+    }
+}
+
+void ASTManager::InsertLabel(u32 address) {
+    const u32 index = labels_map[address];
+    const ASTNode label = ASTBase::Make<ASTLabel>(main_node, index);
+    labels[index] = label;
+    program->nodes.PushBack(label);
+}
+
+void ASTManager::InsertGoto(Expr condition, u32 address) {
+    const u32 index = labels_map[address];
+    const ASTNode goto_node = ASTBase::Make<ASTGoto>(main_node, std::move(condition), index);
+    gotos.push_back(goto_node);
+    program->nodes.PushBack(goto_node);
+}
+
+void ASTManager::InsertBlock(u32 start_address, u32 end_address) {
+    ASTNode block = ASTBase::Make<ASTBlockEncoded>(main_node, start_address, end_address);
+    program->nodes.PushBack(std::move(block));
+}
+
+void ASTManager::InsertReturn(Expr condition, bool kills) {
+    ASTNode node = ASTBase::Make<ASTReturn>(main_node, std::move(condition), kills);
+    program->nodes.PushBack(std::move(node));
+}
+
+// The decompile algorithm is based on
+// "Taming control flow: A structured approach to eliminating goto statements"
+// by AM Erosa, LJ Hendren 1994. In general, the idea is to get gotos to be
+// on the same structured level as the label which they jump to. This is done,
+// through outward/inward movements and lifting. Once they are at the same
+// level, you can enclose them in an "if" structure or a "do-while" structure.
+void ASTManager::Decompile() {
+    auto it = gotos.begin();
+    while (it != gotos.end()) {
+        const ASTNode goto_node = *it;
+        const auto label_index = goto_node->GetGotoLabel();
+        if (!label_index) {
+            return;
+        }
+        const ASTNode label = labels[*label_index];
+        if (!full_decompile) {
+            // We only decompile backward jumps
+            if (!IsBackwardsJump(goto_node, label)) {
+                it++;
+                continue;
+            }
+        }
+        if (IndirectlyRelated(goto_node, label)) {
+            while (!DirectlyRelated(goto_node, label)) {
+                MoveOutward(goto_node);
+            }
+        }
+        if (DirectlyRelated(goto_node, label)) {
+            u32 goto_level = goto_node->GetLevel();
+            const u32 label_level = label->GetLevel();
+            while (label_level < goto_level) {
+                MoveOutward(goto_node);
+                goto_level--;
+            }
+            // TODO(Blinkhawk): Implement Lifting and Inward Movements
+        }
+        if (label->GetParent() == goto_node->GetParent()) {
+            bool is_loop = false;
+            ASTNode current = goto_node->GetPrevious();
+            while (current) {
+                if (current == label) {
+                    is_loop = true;
+                    break;
+                }
+                current = current->GetPrevious();
+            }
+
+            if (is_loop) {
+                EncloseDoWhile(goto_node, label);
+            } else {
+                EncloseIfThen(goto_node, label);
+            }
+            it = gotos.erase(it);
+            continue;
+        }
+        it++;
+    }
+    if (full_decompile) {
+        for (const ASTNode& label : labels) {
+            auto& manager = label->GetManager();
+            manager.Remove(label);
+        }
+        labels.clear();
+    } else {
+        auto label_it = labels.begin();
+        while (label_it != labels.end()) {
+            bool can_remove = true;
+            ASTNode label = *label_it;
+            for (const ASTNode& goto_node : gotos) {
+                const auto label_index = goto_node->GetGotoLabel();
+                if (!label_index) {
+                    return;
+                }
+                ASTNode& glabel = labels[*label_index];
+                if (glabel == label) {
+                    can_remove = false;
+                    break;
+                }
+            }
+            if (can_remove) {
+                label->MarkLabelUnused();
+            }
+        }
+    }
+}
+
+bool ASTManager::IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const {
+    u32 goto_level = goto_node->GetLevel();
+    u32 label_level = label_node->GetLevel();
+    while (goto_level > label_level) {
+        goto_level--;
+        goto_node = goto_node->GetParent();
+    }
+    while (label_level > goto_level) {
+        label_level--;
+        label_node = label_node->GetParent();
+    }
+    while (goto_node->GetParent() != label_node->GetParent()) {
+        goto_node = goto_node->GetParent();
+        label_node = label_node->GetParent();
+    }
+    ASTNode current = goto_node->GetPrevious();
+    while (current) {
+        if (current == label_node) {
+            return true;
+        }
+        current = current->GetPrevious();
+    }
+    return false;
+}
+
+bool ASTManager::IndirectlyRelated(const ASTNode& first, const ASTNode& second) const {
+    return !(first->GetParent() == second->GetParent() || DirectlyRelated(first, second));
+}
+
+bool ASTManager::DirectlyRelated(const ASTNode& first, const ASTNode& second) const {
+    if (first->GetParent() == second->GetParent()) {
+        return false;
+    }
+    const u32 first_level = first->GetLevel();
+    const u32 second_level = second->GetLevel();
+    u32 min_level;
+    u32 max_level;
+    ASTNode max;
+    ASTNode min;
+    if (first_level > second_level) {
+        min_level = second_level;
+        min = second;
+        max_level = first_level;
+        max = first;
+    } else {
+        min_level = first_level;
+        min = first;
+        max_level = second_level;
+        max = second;
+    }
+
+    while (max_level > min_level) {
+        max_level--;
+        max = max->GetParent();
+    }
+
+    return min->GetParent() == max->GetParent();
+}
+
+void ASTManager::ShowCurrentState(std::string_view state) const {
+    LOG_CRITICAL(HW_GPU, "\nState {}:\n\n{}\n", state, Print());
+    SanityCheck();
+}
+
+void ASTManager::SanityCheck() const {
+    for (const auto& label : labels) {
+        if (!label->GetParent()) {
+            LOG_CRITICAL(HW_GPU, "Sanity Check Failed");
+        }
+    }
+}
+
+void ASTManager::EncloseDoWhile(ASTNode goto_node, ASTNode label) {
+    ASTZipper& zipper = goto_node->GetManager();
+    const ASTNode loop_start = label->GetNext();
+    if (loop_start == goto_node) {
+        zipper.Remove(goto_node);
+        return;
+    }
+    const ASTNode parent = label->GetParent();
+    const Expr condition = goto_node->GetGotoCondition();
+    zipper.DetachSegment(loop_start, goto_node);
+    const ASTNode do_while_node = ASTBase::Make<ASTDoWhile>(parent, condition);
+    ASTZipper* sub_zipper = do_while_node->GetSubNodes();
+    sub_zipper->Init(loop_start, do_while_node);
+    zipper.InsertAfter(do_while_node, label);
+    sub_zipper->Remove(goto_node);
+}
+
+void ASTManager::EncloseIfThen(ASTNode goto_node, ASTNode label) {
+    ASTZipper& zipper = goto_node->GetManager();
+    const ASTNode if_end = label->GetPrevious();
+    if (if_end == goto_node) {
+        zipper.Remove(goto_node);
+        return;
+    }
+    const ASTNode prev = goto_node->GetPrevious();
+    const Expr condition = goto_node->GetGotoCondition();
+    bool do_else = false;
+    if (!disable_else_derivation && prev->IsIfThen()) {
+        const Expr if_condition = prev->GetIfCondition();
+        do_else = ExprAreEqual(if_condition, condition);
+    }
+    const ASTNode parent = label->GetParent();
+    zipper.DetachSegment(goto_node, if_end);
+    ASTNode if_node;
+    if (do_else) {
+        if_node = ASTBase::Make<ASTIfElse>(parent);
+    } else {
+        Expr neg_condition = MakeExprNot(condition);
+        if_node = ASTBase::Make<ASTIfThen>(parent, neg_condition);
+    }
+    ASTZipper* sub_zipper = if_node->GetSubNodes();
+    sub_zipper->Init(goto_node, if_node);
+    zipper.InsertAfter(if_node, prev);
+    sub_zipper->Remove(goto_node);
+}
+
+void ASTManager::MoveOutward(ASTNode goto_node) {
+    ASTZipper& zipper = goto_node->GetManager();
+    const ASTNode parent = goto_node->GetParent();
+    ASTZipper& zipper2 = parent->GetManager();
+    const ASTNode grandpa = parent->GetParent();
+    const bool is_loop = parent->IsLoop();
+    const bool is_else = parent->IsIfElse();
+    const bool is_if = parent->IsIfThen();
+
+    const ASTNode prev = goto_node->GetPrevious();
+    const ASTNode post = goto_node->GetNext();
+
+    const Expr condition = goto_node->GetGotoCondition();
+    zipper.DetachSingle(goto_node);
+    if (is_loop) {
+        const u32 var_index = NewVariable();
+        const Expr var_condition = MakeExpr<ExprVar>(var_index);
+        const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
+        const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
+        zipper2.InsertBefore(var_node_init, parent);
+        zipper.InsertAfter(var_node, prev);
+        goto_node->SetGotoCondition(var_condition);
+        const ASTNode break_node = ASTBase::Make<ASTBreak>(parent, var_condition);
+        zipper.InsertAfter(break_node, var_node);
+    } else if (is_if || is_else) {
+        const u32 var_index = NewVariable();
+        const Expr var_condition = MakeExpr<ExprVar>(var_index);
+        const ASTNode var_node = ASTBase::Make<ASTVarSet>(parent, var_index, condition);
+        const ASTNode var_node_init = ASTBase::Make<ASTVarSet>(parent, var_index, false_condition);
+        if (is_if) {
+            zipper2.InsertBefore(var_node_init, parent);
+        } else {
+            zipper2.InsertBefore(var_node_init, parent->GetPrevious());
+        }
+        zipper.InsertAfter(var_node, prev);
+        goto_node->SetGotoCondition(var_condition);
+        if (post) {
+            zipper.DetachTail(post);
+            const ASTNode if_node = ASTBase::Make<ASTIfThen>(parent, MakeExprNot(var_condition));
+            ASTZipper* sub_zipper = if_node->GetSubNodes();
+            sub_zipper->Init(post, if_node);
+            zipper.InsertAfter(if_node, var_node);
+        }
+    } else {
+        UNREACHABLE();
+    }
+    const ASTNode next = parent->GetNext();
+    if (is_if && next && next->IsIfElse()) {
+        zipper2.InsertAfter(goto_node, next);
+        goto_node->SetParent(grandpa);
+        return;
+    }
+    zipper2.InsertAfter(goto_node, parent);
+    goto_node->SetParent(grandpa);
+}
+
+class ASTClearer {
+public:
+    ASTClearer() = default;
+
+    void operator()(const ASTProgram& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()(const ASTIfThen& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()(const ASTIfElse& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()([[maybe_unused]] const ASTBlockEncoded& ast) {}
+
+    void operator()(ASTBlockDecoded& ast) {
+        ast.nodes.clear();
+    }
+
+    void operator()([[maybe_unused]] const ASTVarSet& ast) {}
+
+    void operator()([[maybe_unused]] const ASTLabel& ast) {}
+
+    void operator()([[maybe_unused]] const ASTGoto& ast) {}
+
+    void operator()(const ASTDoWhile& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()([[maybe_unused]] const ASTReturn& ast) {}
+
+    void operator()([[maybe_unused]] const ASTBreak& ast) {}
+
+    void Visit(const ASTNode& node) {
+        std::visit(*this, *node->GetInnerData());
+        node->Clear();
+    }
+};
+
+void ASTManager::Clear() {
+    if (!main_node) {
+        return;
+    }
+    ASTClearer clearer{};
+    clearer.Visit(main_node);
+    main_node.reset();
+    program = nullptr;
+    labels_map.clear();
+    labels.clear();
+    gotos.clear();
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/ast.h b/src/video_core/shader/ast.h
new file mode 100644
index 000000000..a2f0044ba
--- /dev/null
+++ b/src/video_core/shader/ast.h
@@ -0,0 +1,400 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <functional>
+#include <list>
+#include <memory>
+#include <optional>
+#include <string>
+#include <unordered_map>
+#include <vector>
+
+#include "video_core/shader/expr.h"
+#include "video_core/shader/node.h"
+
+namespace VideoCommon::Shader {
+
+class ASTBase;
+class ASTBlockDecoded;
+class ASTBlockEncoded;
+class ASTBreak;
+class ASTDoWhile;
+class ASTGoto;
+class ASTIfElse;
+class ASTIfThen;
+class ASTLabel;
+class ASTProgram;
+class ASTReturn;
+class ASTVarSet;
+
+using ASTData = std::variant<ASTProgram, ASTIfThen, ASTIfElse, ASTBlockEncoded, ASTBlockDecoded,
+                             ASTVarSet, ASTGoto, ASTLabel, ASTDoWhile, ASTReturn, ASTBreak>;
+
+using ASTNode = std::shared_ptr<ASTBase>;
+
+enum class ASTZipperType : u32 {
+    Program,
+    IfThen,
+    IfElse,
+    Loop,
+};
+
+class ASTZipper final {
+public:
+    explicit ASTZipper();
+
+    void Init(ASTNode first, ASTNode parent);
+
+    ASTNode GetFirst() const {
+        return first;
+    }
+
+    ASTNode GetLast() const {
+        return last;
+    }
+
+    void PushBack(ASTNode new_node);
+    void PushFront(ASTNode new_node);
+    void InsertAfter(ASTNode new_node, ASTNode at_node);
+    void InsertBefore(ASTNode new_node, ASTNode at_node);
+    void DetachTail(ASTNode node);
+    void DetachSingle(ASTNode node);
+    void DetachSegment(ASTNode start, ASTNode end);
+    void Remove(ASTNode node);
+
+    ASTNode first{};
+    ASTNode last{};
+};
+
+class ASTProgram {
+public:
+    ASTZipper nodes{};
+};
+
+class ASTIfThen {
+public:
+    explicit ASTIfThen(Expr condition) : condition{std::move(condition)} {}
+    Expr condition;
+    ASTZipper nodes{};
+};
+
+class ASTIfElse {
+public:
+    ASTZipper nodes{};
+};
+
+class ASTBlockEncoded {
+public:
+    explicit ASTBlockEncoded(u32 start, u32 end) : start{start}, end{end} {}
+    u32 start;
+    u32 end;
+};
+
+class ASTBlockDecoded {
+public:
+    explicit ASTBlockDecoded(NodeBlock&& new_nodes) : nodes(std::move(new_nodes)) {}
+    NodeBlock nodes;
+};
+
+class ASTVarSet {
+public:
+    explicit ASTVarSet(u32 index, Expr condition) : index{index}, condition{std::move(condition)} {}
+    u32 index;
+    Expr condition;
+};
+
+class ASTLabel {
+public:
+    explicit ASTLabel(u32 index) : index{index} {}
+    u32 index;
+    bool unused{};
+};
+
+class ASTGoto {
+public:
+    explicit ASTGoto(Expr condition, u32 label) : condition{std::move(condition)}, label{label} {}
+    Expr condition;
+    u32 label;
+};
+
+class ASTDoWhile {
+public:
+    explicit ASTDoWhile(Expr condition) : condition{std::move(condition)} {}
+    Expr condition;
+    ASTZipper nodes{};
+};
+
+class ASTReturn {
+public:
+    explicit ASTReturn(Expr condition, bool kills)
+        : condition{std::move(condition)}, kills{kills} {}
+    Expr condition;
+    bool kills;
+};
+
+class ASTBreak {
+public:
+    explicit ASTBreak(Expr condition) : condition{std::move(condition)} {}
+    Expr condition;
+};
+
+class ASTBase {
+public:
+    explicit ASTBase(ASTNode parent, ASTData data)
+        : data{std::move(data)}, parent{std::move(parent)} {}
+
+    template <class U, class... Args>
+    static ASTNode Make(ASTNode parent, Args&&... args) {
+        return std::make_shared<ASTBase>(std::move(parent),
+                                         ASTData(U(std::forward<Args>(args)...)));
+    }
+
+    void SetParent(ASTNode new_parent) {
+        parent = std::move(new_parent);
+    }
+
+    ASTNode& GetParent() {
+        return parent;
+    }
+
+    const ASTNode& GetParent() const {
+        return parent;
+    }
+
+    u32 GetLevel() const {
+        u32 level = 0;
+        auto next_parent = parent;
+        while (next_parent) {
+            next_parent = next_parent->GetParent();
+            level++;
+        }
+        return level;
+    }
+
+    ASTData* GetInnerData() {
+        return &data;
+    }
+
+    const ASTData* GetInnerData() const {
+        return &data;
+    }
+
+    ASTNode GetNext() const {
+        return next;
+    }
+
+    ASTNode GetPrevious() const {
+        return previous;
+    }
+
+    ASTZipper& GetManager() {
+        return *manager;
+    }
+
+    const ASTZipper& GetManager() const {
+        return *manager;
+    }
+
+    std::optional<u32> GetGotoLabel() const {
+        auto inner = std::get_if<ASTGoto>(&data);
+        if (inner) {
+            return {inner->label};
+        }
+        return {};
+    }
+
+    Expr GetGotoCondition() const {
+        auto inner = std::get_if<ASTGoto>(&data);
+        if (inner) {
+            return inner->condition;
+        }
+        return nullptr;
+    }
+
+    void MarkLabelUnused() {
+        auto inner = std::get_if<ASTLabel>(&data);
+        if (inner) {
+            inner->unused = true;
+        }
+    }
+
+    bool IsLabelUnused() const {
+        auto inner = std::get_if<ASTLabel>(&data);
+        if (inner) {
+            return inner->unused;
+        }
+        return true;
+    }
+
+    std::optional<u32> GetLabelIndex() const {
+        auto inner = std::get_if<ASTLabel>(&data);
+        if (inner) {
+            return {inner->index};
+        }
+        return {};
+    }
+
+    Expr GetIfCondition() const {
+        auto inner = std::get_if<ASTIfThen>(&data);
+        if (inner) {
+            return inner->condition;
+        }
+        return nullptr;
+    }
+
+    void SetGotoCondition(Expr new_condition) {
+        auto inner = std::get_if<ASTGoto>(&data);
+        if (inner) {
+            inner->condition = std::move(new_condition);
+        }
+    }
+
+    bool IsIfThen() const {
+        return std::holds_alternative<ASTIfThen>(data);
+    }
+
+    bool IsIfElse() const {
+        return std::holds_alternative<ASTIfElse>(data);
+    }
+
+    bool IsBlockEncoded() const {
+        return std::holds_alternative<ASTBlockEncoded>(data);
+    }
+
+    void TransformBlockEncoded(NodeBlock&& nodes) {
+        data = ASTBlockDecoded(std::move(nodes));
+    }
+
+    bool IsLoop() const {
+        return std::holds_alternative<ASTDoWhile>(data);
+    }
+
+    ASTZipper* GetSubNodes() {
+        if (std::holds_alternative<ASTProgram>(data)) {
+            return &std::get_if<ASTProgram>(&data)->nodes;
+        }
+        if (std::holds_alternative<ASTIfThen>(data)) {
+            return &std::get_if<ASTIfThen>(&data)->nodes;
+        }
+        if (std::holds_alternative<ASTIfElse>(data)) {
+            return &std::get_if<ASTIfElse>(&data)->nodes;
+        }
+        if (std::holds_alternative<ASTDoWhile>(data)) {
+            return &std::get_if<ASTDoWhile>(&data)->nodes;
+        }
+        return nullptr;
+    }
+
+    void Clear() {
+        next.reset();
+        previous.reset();
+        parent.reset();
+        manager = nullptr;
+    }
+
+private:
+    friend class ASTZipper;
+
+    ASTData data;
+    ASTNode parent{};
+    ASTNode next{};
+    ASTNode previous{};
+    ASTZipper* manager{};
+};
+
+class ASTManager final {
+public:
+    ASTManager(bool full_decompile, bool disable_else_derivation);
+    ~ASTManager();
+
+    ASTManager(const ASTManager& o) = delete;
+    ASTManager& operator=(const ASTManager& other) = delete;
+
+    ASTManager(ASTManager&& other) noexcept = default;
+    ASTManager& operator=(ASTManager&& other) noexcept = default;
+
+    void Init();
+
+    void DeclareLabel(u32 address);
+
+    void InsertLabel(u32 address);
+
+    void InsertGoto(Expr condition, u32 address);
+
+    void InsertBlock(u32 start_address, u32 end_address);
+
+    void InsertReturn(Expr condition, bool kills);
+
+    std::string Print() const;
+
+    void Decompile();
+
+    void ShowCurrentState(std::string_view state) const;
+
+    void SanityCheck() const;
+
+    void Clear();
+
+    bool IsFullyDecompiled() const {
+        if (full_decompile) {
+            return gotos.empty();
+        }
+
+        for (ASTNode goto_node : gotos) {
+            auto label_index = goto_node->GetGotoLabel();
+            if (!label_index) {
+                return false;
+            }
+            ASTNode glabel = labels[*label_index];
+            if (IsBackwardsJump(goto_node, glabel)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+    ASTNode GetProgram() const {
+        return main_node;
+    }
+
+    u32 GetVariables() const {
+        return variables;
+    }
+
+    const std::vector<ASTNode>& GetLabels() const {
+        return labels;
+    }
+
+private:
+    bool IsBackwardsJump(ASTNode goto_node, ASTNode label_node) const;
+
+    bool IndirectlyRelated(const ASTNode& first, const ASTNode& second) const;
+
+    bool DirectlyRelated(const ASTNode& first, const ASTNode& second) const;
+
+    void EncloseDoWhile(ASTNode goto_node, ASTNode label);
+
+    void EncloseIfThen(ASTNode goto_node, ASTNode label);
+
+    void MoveOutward(ASTNode goto_node);
+
+    u32 NewVariable() {
+        return variables++;
+    }
+
+    bool full_decompile{};
+    bool disable_else_derivation{};
+    std::unordered_map<u32, u32> labels_map{};
+    u32 labels_count{};
+    std::vector<ASTNode> labels{};
+    std::list<ASTNode> gotos{};
+    u32 variables{};
+    ASTProgram* program{};
+    ASTNode main_node{};
+    Expr false_condition{};
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.cpp b/src/video_core/shader/compiler_settings.cpp
new file mode 100644
index 000000000..cddcbd4f0
--- /dev/null
+++ b/src/video_core/shader/compiler_settings.cpp
@@ -0,0 +1,26 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/shader/compiler_settings.h"
+
+namespace VideoCommon::Shader {
+
+std::string CompileDepthAsString(const CompileDepth cd) {
+    switch (cd) {
+    case CompileDepth::BruteForce:
+        return "Brute Force Compile";
+    case CompileDepth::FlowStack:
+        return "Simple Flow Stack Mode";
+    case CompileDepth::NoFlowStack:
+        return "Remove Flow Stack";
+    case CompileDepth::DecompileBackwards:
+        return "Decompile Backward Jumps";
+    case CompileDepth::FullDecompile:
+        return "Full Decompilation";
+    default:
+        return "Unknown Compiler Process";
+    }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/compiler_settings.h b/src/video_core/shader/compiler_settings.h
new file mode 100644
index 000000000..916018c01
--- /dev/null
+++ b/src/video_core/shader/compiler_settings.h
@@ -0,0 +1,26 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "video_core/engines/shader_bytecode.h"
+
+namespace VideoCommon::Shader {
+
+enum class CompileDepth : u32 {
+    BruteForce = 0,
+    FlowStack = 1,
+    NoFlowStack = 2,
+    DecompileBackwards = 3,
+    FullDecompile = 4,
+};
+
+std::string CompileDepthAsString(CompileDepth cd);
+
+struct CompilerSettings {
+    CompileDepth depth{CompileDepth::NoFlowStack};
+    bool disable_else_derivation{true};
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
new file mode 100644
index 000000000..fe467608e
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -0,0 +1,110 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <algorithm>
+#include <memory>
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/shader/const_buffer_locker.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Engines::SamplerDescriptor;
+
+ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage)
+    : stage{shader_stage} {}
+
+ConstBufferLocker::ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
+                                     Tegra::Engines::ConstBufferEngineInterface& engine)
+    : stage{shader_stage}, engine{&engine} {}
+
+ConstBufferLocker::~ConstBufferLocker() = default;
+
+std::optional<u32> ConstBufferLocker::ObtainKey(u32 buffer, u32 offset) {
+    const std::pair<u32, u32> key = {buffer, offset};
+    const auto iter = keys.find(key);
+    if (iter != keys.end()) {
+        return iter->second;
+    }
+    if (!engine) {
+        return std::nullopt;
+    }
+    const u32 value = engine->AccessConstBuffer32(stage, buffer, offset);
+    keys.emplace(key, value);
+    return value;
+}
+
+std::optional<SamplerDescriptor> ConstBufferLocker::ObtainBoundSampler(u32 offset) {
+    const u32 key = offset;
+    const auto iter = bound_samplers.find(key);
+    if (iter != bound_samplers.end()) {
+        return iter->second;
+    }
+    if (!engine) {
+        return std::nullopt;
+    }
+    const SamplerDescriptor value = engine->AccessBoundSampler(stage, offset);
+    bound_samplers.emplace(key, value);
+    return value;
+}
+
+std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindlessSampler(
+    u32 buffer, u32 offset) {
+    const std::pair key = {buffer, offset};
+    const auto iter = bindless_samplers.find(key);
+    if (iter != bindless_samplers.end()) {
+        return iter->second;
+    }
+    if (!engine) {
+        return std::nullopt;
+    }
+    const SamplerDescriptor value = engine->AccessBindlessSampler(stage, buffer, offset);
+    bindless_samplers.emplace(key, value);
+    return value;
+}
+
+void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
+    keys.insert_or_assign({buffer, offset}, value);
+}
+
+void ConstBufferLocker::InsertBoundSampler(u32 offset, SamplerDescriptor sampler) {
+    bound_samplers.insert_or_assign(offset, sampler);
+}
+
+void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor sampler) {
+    bindless_samplers.insert_or_assign({buffer, offset}, sampler);
+}
+
+bool ConstBufferLocker::IsConsistent() const {
+    if (!engine) {
+        return false;
+    }
+    return std::all_of(keys.begin(), keys.end(),
+                       [this](const auto& pair) {
+                           const auto [cbuf, offset] = pair.first;
+                           const auto value = pair.second;
+                           return value == engine->AccessConstBuffer32(stage, cbuf, offset);
+                       }) &&
+           std::all_of(bound_samplers.begin(), bound_samplers.end(),
+                       [this](const auto& sampler) {
+                           const auto [key, value] = sampler;
+                           return value == engine->AccessBoundSampler(stage, key);
+                       }) &&
+           std::all_of(bindless_samplers.begin(), bindless_samplers.end(),
+                       [this](const auto& sampler) {
+                           const auto [cbuf, offset] = sampler.first;
+                           const auto value = sampler.second;
+                           return value == engine->AccessBindlessSampler(stage, cbuf, offset);
+                       });
+}
+
+bool ConstBufferLocker::HasEqualKeys(const ConstBufferLocker& rhs) const {
+    return keys == rhs.keys && bound_samplers == rhs.bound_samplers &&
+           bindless_samplers == rhs.bindless_samplers;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
new file mode 100644
index 000000000..600e2f3c3
--- /dev/null
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -0,0 +1,80 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+#include "common/common_types.h"
+#include "common/hash.h"
+#include "video_core/engines/const_buffer_engine_interface.h"
+
+namespace VideoCommon::Shader {
+
+using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
+using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
+using BindlessSamplerMap =
+    std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
+
+/**
+ * The ConstBufferLocker is a class use to interface the 3D and compute engines with the shader
+ * compiler. with it, the shader can obtain required data from GPU state and store it for disk
+ * shader compilation.
+ **/
+class ConstBufferLocker {
+public:
+    explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage);
+
+    explicit ConstBufferLocker(Tegra::Engines::ShaderType shader_stage,
+                               Tegra::Engines::ConstBufferEngineInterface& engine);
+
+    ~ConstBufferLocker();
+
+    /// Retrieves a key from the locker, if it's registered, it will give the registered value, if
+    /// not it will obtain it from maxwell3d and register it.
+    std::optional<u32> ObtainKey(u32 buffer, u32 offset);
+
+    std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
+
+    std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
+
+    /// Inserts a key.
+    void InsertKey(u32 buffer, u32 offset, u32 value);
+
+    /// Inserts a bound sampler key.
+    void InsertBoundSampler(u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+    /// Inserts a bindless sampler key.
+    void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
+
+    /// Checks keys and samplers against engine's current const buffers. Returns true if they are
+    /// the same value, false otherwise;
+    bool IsConsistent() const;
+
+    /// Returns true if the keys are equal to the other ones in the locker.
+    bool HasEqualKeys(const ConstBufferLocker& rhs) const;
+
+    /// Gives an getter to the const buffer keys in the database.
+    const KeyMap& GetKeys() const {
+        return keys;
+    }
+
+    /// Gets samplers database.
+    const BoundSamplerMap& GetBoundSamplers() const {
+        return bound_samplers;
+    }
+
+    /// Gets bindless samplers database.
+    const BindlessSamplerMap& GetBindlessSamplers() const {
+        return bindless_samplers;
+    }
+
+private:
+    const Tegra::Engines::ShaderType stage;
+    Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
+    KeyMap keys;
+    BoundSamplerMap bound_samplers;
+    BindlessSamplerMap bindless_samplers;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index ec3a76690..b427ac873 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -4,18 +4,21 @@
 
 #include <list>
 #include <map>
+#include <set>
 #include <stack>
 #include <unordered_map>
-#include <unordered_set>
 #include <vector>
 
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "video_core/shader/ast.h"
 #include "video_core/shader/control_flow.h"
 #include "video_core/shader/shader_ir.h"
 
 namespace VideoCommon::Shader {
+
 namespace {
+
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
@@ -34,14 +37,20 @@ struct BlockStack {
     std::stack<u32> pbk_stack{};
 };
 
-struct BlockBranchInfo {
-    Condition condition{};
-    s32 address{exit_branch};
-    bool kill{};
-    bool is_sync{};
-    bool is_brk{};
-    bool ignore{};
-};
+template <typename T, typename... Args>
+BlockBranchInfo MakeBranchInfo(Args&&... args) {
+    static_assert(std::is_convertible_v<T, BranchData>);
+    return std::make_shared<BranchData>(T(std::forward<Args>(args)...));
+}
+
+bool BlockBranchIsIgnored(BlockBranchInfo first) {
+    bool ignore = false;
+    if (std::holds_alternative<SingleBranch>(*first)) {
+        const auto branch = std::get_if<SingleBranch>(first.get());
+        ignore = branch->ignore;
+    }
+    return ignore;
+}
 
 struct BlockInfo {
     u32 start{};
@@ -55,21 +64,21 @@ struct BlockInfo {
 };
 
 struct CFGRebuildState {
-    explicit CFGRebuildState(const ProgramCode& program_code, const std::size_t program_size,
-                             const u32 start)
-        : start{start}, program_code{program_code}, program_size{program_size} {}
+    explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
+        : program_code{program_code}, start{start}, locker{locker} {}
 
-    u32 start{};
-    std::vector<BlockInfo> block_info{};
-    std::list<u32> inspect_queries{};
-    std::list<Query> queries{};
-    std::unordered_map<u32, u32> registered{};
-    std::unordered_set<u32> labels{};
-    std::map<u32, u32> ssy_labels{};
-    std::map<u32, u32> pbk_labels{};
-    std::unordered_map<u32, BlockStack> stacks{};
     const ProgramCode& program_code;
-    const std::size_t program_size;
+    ConstBufferLocker& locker;
+    u32 start{};
+    std::vector<BlockInfo> block_info;
+    std::list<u32> inspect_queries;
+    std::list<Query> queries;
+    std::unordered_map<u32, u32> registered;
+    std::set<u32> labels;
+    std::map<u32, u32> ssy_labels;
+    std::map<u32, u32> pbk_labels;
+    std::unordered_map<u32, BlockStack> stacks;
+    ASTManager* manager{};
 };
 
 enum class BlockCollision : u32 { None, Found, Inside };
@@ -102,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
 }
 
 Pred GetPredicate(u32 index, bool negated) {
-    return static_cast<Pred>(index + (negated ? 8 : 0));
+    return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
 }
 
 /**
@@ -122,10 +131,122 @@ enum class ParseResult : u32 {
     AbnormalFlow,
 };
 
+struct BranchIndirectInfo {
+    u32 buffer{};
+    u32 offset{};
+    u32 entries{};
+    s32 relative_position{};
+};
+
+struct BufferInfo {
+    u32 index;
+    u32 offset;
+};
+
+std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
+    const Instruction instr = state.program_code[pos];
+    const auto opcode = OpCode::Decode(instr);
+    if (opcode->get().GetId() != OpCode::Id::BRX) {
+        return std::nullopt;
+    }
+    if (instr.brx.constant_buffer != 0) {
+        return std::nullopt;
+    }
+    --pos;
+    return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
+}
+
+template <typename Result, typename TestCallable, typename PackCallable>
+// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
+// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
+std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
+                                       PackCallable pack) {
+    for (; pos >= state.start; --pos) {
+        if (IsSchedInstruction(pos, state.start)) {
+            continue;
+        }
+        const Instruction instr = state.program_code[pos];
+        const auto opcode = OpCode::Decode(instr);
+        if (!opcode) {
+            continue;
+        }
+        if (test(instr, opcode->get())) {
+            --pos;
+            return std::make_optional(pack(instr, opcode->get()));
+        }
+    }
+    return std::nullopt;
+}
+
+std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
+                                                   u64 brx_tracked_register) {
+    return TrackInstruction<std::pair<BufferInfo, u64>>(
+        state, pos,
+        [brx_tracked_register](auto instr, const auto& opcode) {
+            return opcode.GetId() == OpCode::Id::LD_C &&
+                   instr.gpr0.Value() == brx_tracked_register &&
+                   instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
+        },
+        [](auto instr, const auto& opcode) {
+            const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
+                                     static_cast<u32>(instr.cbuf36.GetOffset())};
+            return std::make_pair(info, instr.gpr8.Value());
+        });
+}
+
+std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
+                                    u64 ldc_tracked_register) {
+    return TrackInstruction<u64>(state, pos,
+                                 [ldc_tracked_register](auto instr, const auto& opcode) {
+                                     return opcode.GetId() == OpCode::Id::SHL_IMM &&
+                                            instr.gpr0.Value() == ldc_tracked_register;
+                                 },
+                                 [](auto instr, const auto&) { return instr.gpr8.Value(); });
+}
+
+std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
+                                   u64 shl_tracked_register) {
+    return TrackInstruction<u32>(state, pos,
+                                 [shl_tracked_register](auto instr, const auto& opcode) {
+                                     return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+                                            instr.gpr0.Value() == shl_tracked_register;
+                                 },
+                                 [](auto instr, const auto&) {
+                                     return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+                                 });
+}
+
+std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
+    const auto brx_info = GetBRXInfo(state, pos);
+    if (!brx_info) {
+        return std::nullopt;
+    }
+    const auto [relative_position, brx_tracked_register] = *brx_info;
+
+    const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
+    if (!ldc_info) {
+        return std::nullopt;
+    }
+    const auto [buffer_info, ldc_tracked_register] = *ldc_info;
+
+    const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
+    if (!shl_tracked_register) {
+        return std::nullopt;
+    }
+
+    const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
+    if (!entries) {
+        return std::nullopt;
+    }
+
+    return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
+}
+
 std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
     u32 offset = static_cast<u32>(address);
-    const u32 end_address = static_cast<u32>(state.program_size / sizeof(Instruction));
+    const u32 end_address = static_cast<u32>(state.program_code.size());
     ParseInfo parse_info{};
+    SingleBranch single_branch{};
 
     const auto insert_label = [](CFGRebuildState& state, u32 address) {
         const auto pair = state.labels.emplace(address);
@@ -138,13 +259,14 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
         if (offset >= end_address) {
             // ASSERT_OR_EXECUTE can't be used, as it ignores the break
             ASSERT_MSG(false, "Shader passed the current limit!");
-            parse_info.branch_info.address = exit_branch;
-            parse_info.branch_info.ignore = false;
+
+            single_branch.address = exit_branch;
+            single_branch.ignore = false;
             break;
         }
         if (state.registered.count(offset) != 0) {
-            parse_info.branch_info.address = offset;
-            parse_info.branch_info.ignore = true;
+            single_branch.address = offset;
+            single_branch.ignore = true;
             break;
         }
         if (IsSchedInstruction(offset, state.start)) {
@@ -161,24 +283,26 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
         switch (opcode->get().GetId()) {
         case OpCode::Id::EXIT: {
             const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            parse_info.branch_info.condition.predicate =
-                GetPredicate(pred_index, instr.negate_pred != 0);
-            if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+            if (single_branch.condition.predicate == Pred::NeverExecute) {
                 offset++;
                 continue;
             }
             const ConditionCode cc = instr.flow_condition_code;
-            parse_info.branch_info.condition.cc = cc;
+            single_branch.condition.cc = cc;
             if (cc == ConditionCode::F) {
                 offset++;
                 continue;
             }
-            parse_info.branch_info.address = exit_branch;
-            parse_info.branch_info.kill = false;
-            parse_info.branch_info.is_sync = false;
-            parse_info.branch_info.is_brk = false;
-            parse_info.branch_info.ignore = false;
+            single_branch.address = exit_branch;
+            single_branch.kill = false;
+            single_branch.is_sync = false;
+            single_branch.is_brk = false;
+            single_branch.ignore = false;
             parse_info.end_address = offset;
+            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+                single_branch.condition, single_branch.address, single_branch.kill,
+                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
 
             return {ParseResult::ControlCaught, parse_info};
         }
@@ -187,99 +311,107 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
                 return {ParseResult::AbnormalFlow, parse_info};
             }
             const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            parse_info.branch_info.condition.predicate =
-                GetPredicate(pred_index, instr.negate_pred != 0);
-            if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+            if (single_branch.condition.predicate == Pred::NeverExecute) {
                 offset++;
                 continue;
             }
             const ConditionCode cc = instr.flow_condition_code;
-            parse_info.branch_info.condition.cc = cc;
+            single_branch.condition.cc = cc;
             if (cc == ConditionCode::F) {
                 offset++;
                 continue;
             }
             const u32 branch_offset = offset + instr.bra.GetBranchTarget();
             if (branch_offset == 0) {
-                parse_info.branch_info.address = exit_branch;
+                single_branch.address = exit_branch;
             } else {
-                parse_info.branch_info.address = branch_offset;
+                single_branch.address = branch_offset;
             }
             insert_label(state, branch_offset);
-            parse_info.branch_info.kill = false;
-            parse_info.branch_info.is_sync = false;
-            parse_info.branch_info.is_brk = false;
-            parse_info.branch_info.ignore = false;
+            single_branch.kill = false;
+            single_branch.is_sync = false;
+            single_branch.is_brk = false;
+            single_branch.ignore = false;
             parse_info.end_address = offset;
+            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+                single_branch.condition, single_branch.address, single_branch.kill,
+                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
 
             return {ParseResult::ControlCaught, parse_info};
         }
         case OpCode::Id::SYNC: {
             const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            parse_info.branch_info.condition.predicate =
-                GetPredicate(pred_index, instr.negate_pred != 0);
-            if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+            if (single_branch.condition.predicate == Pred::NeverExecute) {
                 offset++;
                 continue;
             }
             const ConditionCode cc = instr.flow_condition_code;
-            parse_info.branch_info.condition.cc = cc;
+            single_branch.condition.cc = cc;
             if (cc == ConditionCode::F) {
                 offset++;
                 continue;
             }
-            parse_info.branch_info.address = unassigned_branch;
-            parse_info.branch_info.kill = false;
-            parse_info.branch_info.is_sync = true;
-            parse_info.branch_info.is_brk = false;
-            parse_info.branch_info.ignore = false;
+            single_branch.address = unassigned_branch;
+            single_branch.kill = false;
+            single_branch.is_sync = true;
+            single_branch.is_brk = false;
+            single_branch.ignore = false;
             parse_info.end_address = offset;
+            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+                single_branch.condition, single_branch.address, single_branch.kill,
+                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
 
             return {ParseResult::ControlCaught, parse_info};
         }
         case OpCode::Id::BRK: {
             const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            parse_info.branch_info.condition.predicate =
-                GetPredicate(pred_index, instr.negate_pred != 0);
-            if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+            if (single_branch.condition.predicate == Pred::NeverExecute) {
                 offset++;
                 continue;
             }
             const ConditionCode cc = instr.flow_condition_code;
-            parse_info.branch_info.condition.cc = cc;
+            single_branch.condition.cc = cc;
             if (cc == ConditionCode::F) {
                 offset++;
                 continue;
             }
-            parse_info.branch_info.address = unassigned_branch;
-            parse_info.branch_info.kill = false;
-            parse_info.branch_info.is_sync = false;
-            parse_info.branch_info.is_brk = true;
-            parse_info.branch_info.ignore = false;
+            single_branch.address = unassigned_branch;
+            single_branch.kill = false;
+            single_branch.is_sync = false;
+            single_branch.is_brk = true;
+            single_branch.ignore = false;
             parse_info.end_address = offset;
+            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+                single_branch.condition, single_branch.address, single_branch.kill,
+                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
 
             return {ParseResult::ControlCaught, parse_info};
         }
         case OpCode::Id::KIL: {
             const auto pred_index = static_cast<u32>(instr.pred.pred_index);
-            parse_info.branch_info.condition.predicate =
-                GetPredicate(pred_index, instr.negate_pred != 0);
-            if (parse_info.branch_info.condition.predicate == Pred::NeverExecute) {
+            single_branch.condition.predicate = GetPredicate(pred_index, instr.negate_pred != 0);
+            if (single_branch.condition.predicate == Pred::NeverExecute) {
                 offset++;
                 continue;
             }
             const ConditionCode cc = instr.flow_condition_code;
-            parse_info.branch_info.condition.cc = cc;
+            single_branch.condition.cc = cc;
             if (cc == ConditionCode::F) {
                 offset++;
                 continue;
             }
-            parse_info.branch_info.address = exit_branch;
-            parse_info.branch_info.kill = true;
-            parse_info.branch_info.is_sync = false;
-            parse_info.branch_info.is_brk = false;
-            parse_info.branch_info.ignore = false;
+            single_branch.address = exit_branch;
+            single_branch.kill = true;
+            single_branch.is_sync = false;
+            single_branch.is_brk = false;
+            single_branch.ignore = false;
             parse_info.end_address = offset;
+            parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+                single_branch.condition, single_branch.address, single_branch.kill,
+                single_branch.is_sync, single_branch.is_brk, single_branch.ignore);
 
             return {ParseResult::ControlCaught, parse_info};
         }
@@ -296,7 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
             break;
         }
         case OpCode::Id::BRX: {
-            return {ParseResult::AbnormalFlow, parse_info};
+            const auto tmp = TrackBranchIndirectInfo(state, offset);
+            if (!tmp) {
+                LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
+                return {ParseResult::AbnormalFlow, parse_info};
+            }
+
+            const auto result = *tmp;
+            const s32 pc_target = offset + result.relative_position;
+            std::vector<CaseBranch> branches;
+            for (u32 i = 0; i < result.entries; i++) {
+                auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+                if (!key) {
+                    return {ParseResult::AbnormalFlow, parse_info};
+                }
+                u32 value = *key;
+                u32 target = static_cast<u32>((value >> 3) + pc_target);
+                insert_label(state, target);
+                branches.emplace_back(value, target);
+            }
+            parse_info.end_address = offset;
+            parse_info.branch_info = MakeBranchInfo<MultiBranch>(
+                static_cast<u32>(instr.gpr8.Value()), std::move(branches));
+
+            return {ParseResult::ControlCaught, parse_info};
         }
         default:
             break;
@@ -304,10 +459,13 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
 
         offset++;
     }
-    parse_info.branch_info.kill = false;
-    parse_info.branch_info.is_sync = false;
-    parse_info.branch_info.is_brk = false;
+    single_branch.kill = false;
+    single_branch.is_sync = false;
+    single_branch.is_brk = false;
     parse_info.end_address = offset - 1;
+    parse_info.branch_info = MakeBranchInfo<SingleBranch>(
+        single_branch.condition, single_branch.address, single_branch.kill, single_branch.is_sync,
+        single_branch.is_brk, single_branch.ignore);
     return {ParseResult::BlockEnd, parse_info};
 }
 
@@ -331,9 +489,10 @@ bool TryInspectAddress(CFGRebuildState& state) {
         BlockInfo& current_block = state.block_info[block_index];
         current_block.end = address - 1;
         new_block.branch = current_block.branch;
-        BlockBranchInfo forward_branch{};
-        forward_branch.address = address;
-        forward_branch.ignore = true;
+        BlockBranchInfo forward_branch = MakeBranchInfo<SingleBranch>();
+        const auto branch = std::get_if<SingleBranch>(forward_branch.get());
+        branch->address = address;
+        branch->ignore = true;
         current_block.branch = forward_branch;
         return true;
     }
@@ -348,12 +507,15 @@ bool TryInspectAddress(CFGRebuildState& state) {
 
     BlockInfo& block_info = CreateBlockInfo(state, address, parse_info.end_address);
     block_info.branch = parse_info.branch_info;
-    if (parse_info.branch_info.condition.IsUnconditional()) {
+    if (std::holds_alternative<SingleBranch>(*block_info.branch)) {
+        const auto branch = std::get_if<SingleBranch>(block_info.branch.get());
+        if (branch->condition.IsUnconditional()) {
+            return true;
+        }
+        const u32 fallthrough_address = parse_info.end_address + 1;
+        state.inspect_queries.push_front(fallthrough_address);
         return true;
     }
-
-    const u32 fallthrough_address = parse_info.end_address + 1;
-    state.inspect_queries.push_front(fallthrough_address);
     return true;
 }
 
@@ -391,91 +553,205 @@ bool TryQuery(CFGRebuildState& state) {
     state.queries.pop_front();
     gather_labels(q2.ssy_stack, state.ssy_labels, block);
     gather_labels(q2.pbk_stack, state.pbk_labels, block);
-    if (!block.branch.condition.IsUnconditional()) {
-        q2.address = block.end + 1;
-        state.queries.push_back(q2);
-    }
+    if (std::holds_alternative<SingleBranch>(*block.branch)) {
+        const auto branch = std::get_if<SingleBranch>(block.branch.get());
+        if (!branch->condition.IsUnconditional()) {
+            q2.address = block.end + 1;
+            state.queries.push_back(q2);
+        }
 
-    Query conditional_query{q2};
-    if (block.branch.is_sync) {
-        if (block.branch.address == unassigned_branch) {
-            block.branch.address = conditional_query.ssy_stack.top();
+        Query conditional_query{q2};
+        if (branch->is_sync) {
+            if (branch->address == unassigned_branch) {
+                branch->address = conditional_query.ssy_stack.top();
+            }
+            conditional_query.ssy_stack.pop();
         }
-        conditional_query.ssy_stack.pop();
-    }
-    if (block.branch.is_brk) {
-        if (block.branch.address == unassigned_branch) {
-            block.branch.address = conditional_query.pbk_stack.top();
+        if (branch->is_brk) {
+            if (branch->address == unassigned_branch) {
+                branch->address = conditional_query.pbk_stack.top();
+            }
+            conditional_query.pbk_stack.pop();
         }
-        conditional_query.pbk_stack.pop();
+        conditional_query.address = branch->address;
+        state.queries.push_back(std::move(conditional_query));
+        return true;
+    }
+    const auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+    for (const auto& branch_case : multi_branch->branches) {
+        Query conditional_query{q2};
+        conditional_query.address = branch_case.address;
+        state.queries.push_back(std::move(conditional_query));
     }
-    conditional_query.address = block.branch.address;
-    state.queries.push_back(std::move(conditional_query));
     return true;
 }
+
 } // Anonymous namespace
 
-std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
-                                              std::size_t program_size, u32 start_address) {
-    CFGRebuildState state{program_code, program_size, start_address};
+void InsertBranch(ASTManager& mm, const BlockBranchInfo& branch_info) {
+    const auto get_expr = ([&](const Condition& cond) -> Expr {
+        Expr result{};
+        if (cond.cc != ConditionCode::T) {
+            result = MakeExpr<ExprCondCode>(cond.cc);
+        }
+        if (cond.predicate != Pred::UnusedIndex) {
+            u32 pred = static_cast<u32>(cond.predicate);
+            bool negate = false;
+            if (pred > 7) {
+                negate = true;
+                pred -= 8;
+            }
+            Expr extra = MakeExpr<ExprPredicate>(pred);
+            if (negate) {
+                extra = MakeExpr<ExprNot>(extra);
+            }
+            if (result) {
+                return MakeExpr<ExprAnd>(extra, result);
+            }
+            return extra;
+        }
+        if (result) {
+            return result;
+        }
+        return MakeExpr<ExprBoolean>(true);
+    });
+    if (std::holds_alternative<SingleBranch>(*branch_info)) {
+        const auto branch = std::get_if<SingleBranch>(branch_info.get());
+        if (branch->address < 0) {
+            if (branch->kill) {
+                mm.InsertReturn(get_expr(branch->condition), true);
+                return;
+            }
+            mm.InsertReturn(get_expr(branch->condition), false);
+            return;
+        }
+        mm.InsertGoto(get_expr(branch->condition), branch->address);
+        return;
+    }
+    const auto multi_branch = std::get_if<MultiBranch>(branch_info.get());
+    for (const auto& branch_case : multi_branch->branches) {
+        mm.InsertGoto(MakeExpr<ExprGprEqual>(multi_branch->gpr, branch_case.cmp_value),
+                      branch_case.address);
+    }
+}
+
+void DecompileShader(CFGRebuildState& state) {
+    state.manager->Init();
+    for (auto label : state.labels) {
+        state.manager->DeclareLabel(label);
+    }
+    for (auto& block : state.block_info) {
+        if (state.labels.count(block.start) != 0) {
+            state.manager->InsertLabel(block.start);
+        }
+        const bool ignore = BlockBranchIsIgnored(block.branch);
+        u32 end = ignore ? block.end + 1 : block.end;
+        state.manager->InsertBlock(block.start, end);
+        if (!ignore) {
+            InsertBranch(*state.manager, block.branch);
+        }
+    }
+    state.manager->Decompile();
+}
+
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
+                                                const CompilerSettings& settings,
+                                                ConstBufferLocker& locker) {
+    auto result_out = std::make_unique<ShaderCharacteristics>();
+    if (settings.depth == CompileDepth::BruteForce) {
+        result_out->settings.depth = CompileDepth::BruteForce;
+        return result_out;
+    }
 
+    CFGRebuildState state{program_code, start_address, locker};
     // Inspect Code and generate blocks
     state.labels.clear();
     state.labels.emplace(start_address);
     state.inspect_queries.push_back(state.start);
     while (!state.inspect_queries.empty()) {
         if (!TryInspectAddress(state)) {
-            return {};
+            result_out->settings.depth = CompileDepth::BruteForce;
+            return result_out;
         }
     }
 
-    // Decompile Stacks
-    state.queries.push_back(Query{state.start, {}, {}});
-    bool decompiled = true;
-    while (!state.queries.empty()) {
-        if (!TryQuery(state)) {
-            decompiled = false;
-            break;
+    bool use_flow_stack = true;
+
+    bool decompiled = false;
+
+    if (settings.depth != CompileDepth::FlowStack) {
+        // Decompile Stacks
+        state.queries.push_back(Query{state.start, {}, {}});
+        decompiled = true;
+        while (!state.queries.empty()) {
+            if (!TryQuery(state)) {
+                decompiled = false;
+                break;
+            }
         }
     }
 
+    use_flow_stack = !decompiled;
+
     // Sort and organize results
     std::sort(state.block_info.begin(), state.block_info.end(),
-              [](const BlockInfo& a, const BlockInfo& b) { return a.start < b.start; });
-    ShaderCharacteristics result_out{};
-    result_out.decompilable = decompiled;
-    result_out.start = start_address;
-    result_out.end = start_address;
-    for (const auto& block : state.block_info) {
+              [](const BlockInfo& a, const BlockInfo& b) -> bool { return a.start < b.start; });
+    if (decompiled && settings.depth != CompileDepth::NoFlowStack) {
+        ASTManager manager{settings.depth != CompileDepth::DecompileBackwards,
+                           settings.disable_else_derivation};
+        state.manager = &manager;
+        DecompileShader(state);
+        decompiled = state.manager->IsFullyDecompiled();
+        if (!decompiled) {
+            if (settings.depth == CompileDepth::FullDecompile) {
+                LOG_CRITICAL(HW_GPU, "Failed to remove all the gotos!:");
+            } else {
+                LOG_CRITICAL(HW_GPU, "Failed to remove all backward gotos!:");
+            }
+            state.manager->ShowCurrentState("Of Shader");
+            state.manager->Clear();
+        } else {
+            auto characteristics = std::make_unique<ShaderCharacteristics>();
+            characteristics->start = start_address;
+            characteristics->settings.depth = settings.depth;
+            characteristics->manager = std::move(manager);
+            characteristics->end = state.block_info.back().end + 1;
+            return characteristics;
+        }
+    }
+
+    result_out->start = start_address;
+    result_out->settings.depth =
+        use_flow_stack ? CompileDepth::FlowStack : CompileDepth::NoFlowStack;
+    result_out->blocks.clear();
+    for (auto& block : state.block_info) {
         ShaderBlock new_block{};
         new_block.start = block.start;
         new_block.end = block.end;
-        new_block.ignore_branch = block.branch.ignore;
+        new_block.ignore_branch = BlockBranchIsIgnored(block.branch);
         if (!new_block.ignore_branch) {
-            new_block.branch.cond = block.branch.condition;
-            new_block.branch.kills = block.branch.kill;
-            new_block.branch.address = block.branch.address;
+            new_block.branch = block.branch;
         }
-        result_out.end = std::max(result_out.end, block.end);
-        result_out.blocks.push_back(new_block);
+        result_out->end = std::max(result_out->end, block.end);
+        result_out->blocks.push_back(new_block);
     }
-    if (result_out.decompilable) {
-        result_out.labels = std::move(state.labels);
-        return {std::move(result_out)};
+    if (!use_flow_stack) {
+        result_out->labels = std::move(state.labels);
+        return result_out;
     }
 
-    // If it's not decompilable, merge the unlabelled blocks together
-    auto back = result_out.blocks.begin();
+    auto back = result_out->blocks.begin();
     auto next = std::next(back);
-    while (next != result_out.blocks.end()) {
+    while (next != result_out->blocks.end()) {
         if (state.labels.count(next->start) == 0 && next->start == back->end + 1) {
             back->end = next->end;
-            next = result_out.blocks.erase(next);
+            next = result_out->blocks.erase(next);
             continue;
         }
         back = next;
         ++next;
     }
-    return {std::move(result_out)};
+
+    return result_out;
 }
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.h b/src/video_core/shader/control_flow.h
index b0a5e4f8c..5304998b9 100644
--- a/src/video_core/shader/control_flow.h
+++ b/src/video_core/shader/control_flow.h
@@ -6,9 +6,12 @@
 
 #include <list>
 #include <optional>
-#include <unordered_set>
+#include <set>
+#include <variant>
 
 #include "video_core/engines/shader_bytecode.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/compiler_settings.h"
 #include "video_core/shader/shader_ir.h"
 
 namespace VideoCommon::Shader {
@@ -35,29 +38,61 @@ struct Condition {
     }
 };
 
-struct ShaderBlock {
-    struct Branch {
-        Condition cond{};
-        bool kills{};
-        s32 address{};
+class SingleBranch {
+public:
+    SingleBranch() = default;
+    SingleBranch(Condition condition, s32 address, bool kill, bool is_sync, bool is_brk,
+                 bool ignore)
+        : condition{condition}, address{address}, kill{kill}, is_sync{is_sync}, is_brk{is_brk},
+          ignore{ignore} {}
+
+    bool operator==(const SingleBranch& b) const {
+        return std::tie(condition, address, kill, is_sync, is_brk, ignore) ==
+               std::tie(b.condition, b.address, b.kill, b.is_sync, b.is_brk, b.ignore);
+    }
+
+    bool operator!=(const SingleBranch& b) const {
+        return !operator==(b);
+    }
+
+    Condition condition{};
+    s32 address{exit_branch};
+    bool kill{};
+    bool is_sync{};
+    bool is_brk{};
+    bool ignore{};
+};
 
-        bool operator==(const Branch& b) const {
-            return std::tie(cond, kills, address) == std::tie(b.cond, b.kills, b.address);
-        }
+struct CaseBranch {
+    CaseBranch(u32 cmp_value, u32 address) : cmp_value{cmp_value}, address{address} {}
+    u32 cmp_value;
+    u32 address;
+};
+
+class MultiBranch {
+public:
+    MultiBranch(u32 gpr, std::vector<CaseBranch>&& branches)
+        : gpr{gpr}, branches{std::move(branches)} {}
+
+    u32 gpr{};
+    std::vector<CaseBranch> branches{};
+};
+
+using BranchData = std::variant<SingleBranch, MultiBranch>;
+using BlockBranchInfo = std::shared_ptr<BranchData>;
 
-        bool operator!=(const Branch& b) const {
-            return !operator==(b);
-        }
-    };
+bool BlockBranchInfoAreEqual(BlockBranchInfo first, BlockBranchInfo second);
 
+struct ShaderBlock {
     u32 start{};
     u32 end{};
     bool ignore_branch{};
-    Branch branch{};
+    BlockBranchInfo branch{};
 
     bool operator==(const ShaderBlock& sb) const {
-        return std::tie(start, end, ignore_branch, branch) ==
-               std::tie(sb.start, sb.end, sb.ignore_branch, sb.branch);
+        return std::tie(start, end, ignore_branch) ==
+                   std::tie(sb.start, sb.end, sb.ignore_branch) &&
+               BlockBranchInfoAreEqual(branch, sb.branch);
     }
 
     bool operator!=(const ShaderBlock& sb) const {
@@ -67,13 +102,15 @@ struct ShaderBlock {
 
 struct ShaderCharacteristics {
     std::list<ShaderBlock> blocks{};
-    bool decompilable{};
+    std::set<u32> labels{};
     u32 start{};
     u32 end{};
-    std::unordered_set<u32> labels{};
+    ASTManager manager{true, true};
+    CompilerSettings settings{};
 };
 
-std::optional<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code,
-                                              std::size_t program_size, u32 start_address);
+std::unique_ptr<ShaderCharacteristics> ScanFlow(const ProgramCode& program_code, u32 start_address,
+                                                const CompilerSettings& settings,
+                                                ConstBufferLocker& locker);
 
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 47a9fd961..22c3e5120 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,60 +33,140 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
     return (absolute_offset % SchedPeriod) == 0;
 }
 
-} // namespace
+} // Anonymous namespace
+
+class ASTDecoder {
+public:
+    ASTDecoder(ShaderIR& ir) : ir(ir) {}
+
+    void operator()(ASTProgram& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()(ASTIfThen& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()(ASTIfElse& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()(ASTBlockEncoded& ast) {}
+
+    void operator()(ASTBlockDecoded& ast) {}
+
+    void operator()(ASTVarSet& ast) {}
+
+    void operator()(ASTLabel& ast) {}
+
+    void operator()(ASTGoto& ast) {}
+
+    void operator()(ASTDoWhile& ast) {
+        ASTNode current = ast.nodes.GetFirst();
+        while (current) {
+            Visit(current);
+            current = current->GetNext();
+        }
+    }
+
+    void operator()(ASTReturn& ast) {}
+
+    void operator()(ASTBreak& ast) {}
+
+    void Visit(ASTNode& node) {
+        std::visit(*this, *node->GetInnerData());
+        if (node->IsBlockEncoded()) {
+            auto block = std::get_if<ASTBlockEncoded>(node->GetInnerData());
+            NodeBlock bb = ir.DecodeRange(block->start, block->end);
+            node->TransformBlockEncoded(std::move(bb));
+        }
+    }
+
+private:
+    ShaderIR& ir;
+};
 
 void ShaderIR::Decode() {
     std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header));
 
-    disable_flow_stack = false;
-    const auto info = ScanFlow(program_code, program_size, main_offset);
-    if (info) {
-        const auto& shader_info = *info;
-        coverage_begin = shader_info.start;
-        coverage_end = shader_info.end;
-        if (shader_info.decompilable) {
-            disable_flow_stack = true;
-            const auto insert_block = [this](NodeBlock& nodes, u32 label) {
-                if (label == static_cast<u32>(exit_branch)) {
-                    return;
-                }
-                basic_blocks.insert({label, nodes});
-            };
-            const auto& blocks = shader_info.blocks;
-            NodeBlock current_block;
-            u32 current_label = static_cast<u32>(exit_branch);
-            for (auto& block : blocks) {
-                if (shader_info.labels.count(block.start) != 0) {
-                    insert_block(current_block, current_label);
-                    current_block.clear();
-                    current_label = block.start;
-                }
-                if (!block.ignore_branch) {
-                    DecodeRangeInner(current_block, block.start, block.end);
-                    InsertControlFlow(current_block, block);
-                } else {
-                    DecodeRangeInner(current_block, block.start, block.end + 1);
-                }
-            }
-            insert_block(current_block, current_label);
-            return;
-        }
-        LOG_WARNING(HW_GPU, "Flow Stack Removing Failed! Falling back to old method");
-        // we can't decompile it, fallback to standard method
+    decompiled = false;
+    auto info = ScanFlow(program_code, main_offset, settings, locker);
+    auto& shader_info = *info;
+    coverage_begin = shader_info.start;
+    coverage_end = shader_info.end;
+    switch (shader_info.settings.depth) {
+    case CompileDepth::FlowStack: {
         for (const auto& block : shader_info.blocks) {
             basic_blocks.insert({block.start, DecodeRange(block.start, block.end + 1)});
         }
-        return;
+        break;
     }
-    LOG_WARNING(HW_GPU, "Flow Analysis Failed! Falling back to brute force compiling");
-
-    // Now we need to deal with an undecompilable shader. We need to brute force
-    // a shader that captures every position.
-    coverage_begin = main_offset;
-    const u32 shader_end = static_cast<u32>(program_size / sizeof(u64));
-    coverage_end = shader_end;
-    for (u32 label = main_offset; label < shader_end; label++) {
-        basic_blocks.insert({label, DecodeRange(label, label + 1)});
+    case CompileDepth::NoFlowStack: {
+        disable_flow_stack = true;
+        const auto insert_block = [this](NodeBlock& nodes, u32 label) {
+            if (label == static_cast<u32>(exit_branch)) {
+                return;
+            }
+            basic_blocks.insert({label, nodes});
+        };
+        const auto& blocks = shader_info.blocks;
+        NodeBlock current_block;
+        u32 current_label = static_cast<u32>(exit_branch);
+        for (auto& block : blocks) {
+            if (shader_info.labels.count(block.start) != 0) {
+                insert_block(current_block, current_label);
+                current_block.clear();
+                current_label = block.start;
+            }
+            if (!block.ignore_branch) {
+                DecodeRangeInner(current_block, block.start, block.end);
+                InsertControlFlow(current_block, block);
+            } else {
+                DecodeRangeInner(current_block, block.start, block.end + 1);
+            }
+        }
+        insert_block(current_block, current_label);
+        break;
+    }
+    case CompileDepth::DecompileBackwards:
+    case CompileDepth::FullDecompile: {
+        program_manager = std::move(shader_info.manager);
+        disable_flow_stack = true;
+        decompiled = true;
+        ASTDecoder decoder{*this};
+        ASTNode program = GetASTProgram();
+        decoder.Visit(program);
+        break;
+    }
+    default:
+        LOG_CRITICAL(HW_GPU, "Unknown decompilation mode!");
+        [[fallthrough]];
+    case CompileDepth::BruteForce: {
+        const auto shader_end = static_cast<u32>(program_code.size());
+        coverage_begin = main_offset;
+        coverage_end = shader_end;
+        for (u32 label = main_offset; label < shader_end; ++label) {
+            basic_blocks.insert({label, DecodeRange(label, label + 1)});
+        }
+        break;
+    }
+    }
+    if (settings.depth != shader_info.settings.depth) {
+        LOG_WARNING(
+            HW_GPU, "Decompiling to this setting \"{}\" failed, downgrading to this setting \"{}\"",
+            CompileDepthAsString(settings.depth), CompileDepthAsString(shader_info.settings.depth));
     }
 }
 
@@ -118,24 +198,39 @@ void ShaderIR::InsertControlFlow(NodeBlock& bb, const ShaderBlock& block) {
         }
         return result;
     };
-    if (block.branch.address < 0) {
-        if (block.branch.kills) {
-            Node n = Operation(OperationCode::Discard);
-            n = apply_conditions(block.branch.cond, n);
+    if (std::holds_alternative<SingleBranch>(*block.branch)) {
+        auto branch = std::get_if<SingleBranch>(block.branch.get());
+        if (branch->address < 0) {
+            if (branch->kill) {
+                Node n = Operation(OperationCode::Discard);
+                n = apply_conditions(branch->condition, n);
+                bb.push_back(n);
+                global_code.push_back(n);
+                return;
+            }
+            Node n = Operation(OperationCode::Exit);
+            n = apply_conditions(branch->condition, n);
             bb.push_back(n);
             global_code.push_back(n);
             return;
         }
-        Node n = Operation(OperationCode::Exit);
-        n = apply_conditions(block.branch.cond, n);
+        Node n = Operation(OperationCode::Branch, Immediate(branch->address));
+        n = apply_conditions(branch->condition, n);
         bb.push_back(n);
         global_code.push_back(n);
         return;
     }
-    Node n = Operation(OperationCode::Branch, Immediate(block.branch.address));
-    n = apply_conditions(block.branch.cond, n);
-    bb.push_back(n);
-    global_code.push_back(n);
+    auto multi_branch = std::get_if<MultiBranch>(block.branch.get());
+    Node op_a = GetRegister(multi_branch->gpr);
+    for (auto& branch_case : multi_branch->branches) {
+        Node n = Operation(OperationCode::Branch, Immediate(branch_case.address));
+        Node op_b = Immediate(branch_case.cmp_value);
+        Node condition =
+            GetPredicateComparisonInteger(Tegra::Shader::PredCondition::Equal, false, op_a, op_b);
+        auto result = Conditional(condition, {n});
+        bb.push_back(result);
+        global_code.push_back(result);
+    }
 }
 
 u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 1473c282a..fcedd2af6 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
     case OpCode::Id::FMUL_IMM: {
         // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
         if (instr.fmul.tab5cb8_2 != 0) {
-            LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
-                        instr.fmul.tab5cb8_2.Value());
+            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+                      instr.fmul.tab5cb8_2.Value());
         }
         if (instr.fmul.tab5c68_0 != 1) {
-            LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
-                        instr.fmul.tab5c68_0.Value());
+            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+                      instr.fmul.tab5c68_0.Value());
         }
 
         op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
@@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
     case OpCode::Id::RRO_C:
     case OpCode::Id::RRO_R:
     case OpCode::Id::RRO_IMM: {
+        LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
+
         // Currently RRO is only implemented as a register move.
         op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
         SetRegister(bb, instr.gpr0, op_b);
-        LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
         break;
     }
     default:
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index b06cbe441..ee7d9a29d 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
 
     if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
         opcode->get().GetId() == OpCode::Id::HADD2_R) {
-        if (instr.alu_half.ftz != 0) {
-            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        if (instr.alu_half.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
         }
     }
 
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 6466fc011..d179b9873 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
-        if (instr.alu_half_imm.ftz != 0) {
-            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        if (instr.alu_half_imm.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
         }
     } else {
-        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
-            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
         }
     }
 
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index b73f6536e..a33d242e9 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -144,7 +144,7 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
     case OpCode::Id::ICMP_IMM: {
         const Node zero = Immediate(0);
 
-        const auto [op_b, test] = [&]() -> std::pair<Node, Node> {
+        const auto [op_rhs, test] = [&]() -> std::pair<Node, Node> {
             switch (opcode->get().GetId()) {
             case OpCode::Id::ICMP_CR:
                 return {GetConstBuffer(instr.cbuf34.index, instr.cbuf34.offset),
@@ -161,10 +161,10 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
                 return {zero, zero};
             }
         }();
-        const Node op_a = GetRegister(instr.gpr8);
+        const Node op_lhs = GetRegister(instr.gpr8);
         const Node comparison =
             GetPredicateComparisonInteger(instr.icmp.cond, instr.icmp.is_signed != 0, test, zero);
-        SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_a, op_b));
+        SetRegister(bb, instr.gpr0, Operation(OperationCode::Select, comparison, op_lhs, op_rhs));
         break;
     }
     case OpCode::Id::LOP_C:
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index ca2f39e8d..5973588d6 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
 
     UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
     if (instr.ffma.tab5980_0 != 1) {
-        LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+        LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
     }
     if (instr.ffma.tab5980_1 != 0) {
-        LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+        LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
     }
 
     const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 48ca7a4af..848e46874 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    if (instr.hset2.ftz != 0) {
-        LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+    if (instr.hset2.ftz == 0) {
+        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
     }
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index 840694527..310655619 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -4,6 +4,7 @@
 
 #include "common/assert.h"
 #include "common/common_types.h"
+#include "common/logging/log.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
@@ -18,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    DEBUG_ASSERT(instr.hsetp2.ftz == 0);
+    if (instr.hsetp2.ftz != 0) {
+        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+    }
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
     op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
@@ -32,6 +35,8 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
         h_and = instr.hsetp2.cbuf_and_imm.h_and;
         op_b = GetOperandAbsNegHalf(GetConstBuffer(instr.cbuf34.index, instr.cbuf34.GetOffset()),
                                     instr.hsetp2.cbuf.abs_b, instr.hsetp2.cbuf.negate_b);
+        // F32 is hardcoded in hardware
+        op_b = UnpackHalfFloat(std::move(op_b), Tegra::Shader::HalfType::F32);
         break;
     case OpCode::Id::HSETP2_IMM:
         cond = instr.hsetp2.cbuf_and_imm.cond;
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 95ec1cdd9..d2fe4ec5d 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -143,39 +143,37 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
 }
 
 Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
-    const auto offset{static_cast<std::size_t>(image.index.Value())};
-    if (const auto image = TryUseExistingImage(offset, type)) {
-        return *image;
+    const auto offset = static_cast<u32>(image.index.Value());
+
+    const auto it =
+        std::find_if(std::begin(used_images), std::end(used_images),
+                     [offset](const Image& entry) { return entry.GetOffset() == offset; });
+    if (it != std::end(used_images)) {
+        ASSERT(!it->IsBindless() && it->GetType() == it->GetType());
+        return *it;
     }
 
-    const std::size_t next_index{used_images.size()};
-    return used_images.emplace(offset, Image{offset, next_index, type}).first->second;
+    const auto next_index = static_cast<u32>(used_images.size());
+    return used_images.emplace_back(next_index, offset, type);
 }
 
 Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
-    const Node image_register{GetRegister(reg)};
-    const auto [base_image, cbuf_index, cbuf_offset]{
-        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
-    const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
-
-    if (const auto image = TryUseExistingImage(cbuf_key, type)) {
-        return *image;
-    }
-
-    const std::size_t next_index{used_images.size()};
-    return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type})
-        .first->second;
-}
-
-Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) {
-    auto it = used_images.find(offset);
-    if (it == used_images.end()) {
-        return nullptr;
+    const Node image_register = GetRegister(reg);
+    const auto [base_image, buffer, offset] =
+        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
+
+    const auto it =
+        std::find_if(std::begin(used_images), std::end(used_images),
+                     [buffer = buffer, offset = offset](const Image& entry) {
+                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+                     });
+    if (it != std::end(used_images)) {
+        ASSERT(it->IsBindless() && it->GetType() == it->GetType());
+        return *it;
     }
-    auto& image = it->second;
-    ASSERT(image.GetType() == type);
 
-    return &image;
+    const auto next_index = static_cast<u32>(used_images.size());
+    return used_images.emplace_back(next_index, offset, buffer, type);
 }
 
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 7923d4d69..335d78146 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -166,9 +166,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         }();
 
         const auto [real_address_base, base_address, descriptor] =
-            TrackAndGetGlobalMemory(bb, instr, false);
+            TrackGlobalMemory(bb, instr, false);
 
         const u32 count = GetUniformTypeElementsCount(type);
+        if (!real_address_base || !base_address) {
+            // Tracking failed, load zeroes.
+            for (u32 i = 0; i < count; ++i) {
+                SetRegister(bb, instr.gpr0.Value() + i, Immediate(0.0f));
+            }
+            break;
+        }
+
         for (u32 i = 0; i < count; ++i) {
             const Node it_offset = Immediate(i * 4);
             const Node real_address =
@@ -260,22 +268,19 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         }();
 
         const auto [real_address_base, base_address, descriptor] =
-            TrackAndGetGlobalMemory(bb, instr, true);
-
-        // Encode in temporary registers like this: real_base_address, {registers_to_be_written...}
-        SetTemporary(bb, 0, real_address_base);
+            TrackGlobalMemory(bb, instr, true);
+        if (!real_address_base || !base_address) {
+            // Tracking failed, skip the store.
+            break;
+        }
 
         const u32 count = GetUniformTypeElementsCount(type);
         for (u32 i = 0; i < count; ++i) {
-            SetTemporary(bb, i + 1, GetRegister(instr.gpr0.Value() + i));
-        }
-        for (u32 i = 0; i < count; ++i) {
             const Node it_offset = Immediate(i * 4);
-            const Node real_address =
-                Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset);
+            const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
             const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-
-            bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporary(i + 1)));
+            const Node value = GetRegister(instr.gpr0.Value() + i);
+            bb.push_back(Operation(OperationCode::Assign, gmem, value));
         }
         break;
     }
@@ -301,15 +306,17 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb,
-                                                                           Instruction instr,
-                                                                           bool is_write) {
+std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
+                                                                     Instruction instr,
+                                                                     bool is_write) {
     const auto addr_register{GetRegister(instr.gmem.gpr)};
     const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
 
     const auto [base_address, index, offset] =
         TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT(base_address != nullptr);
+    ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
+                          { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+                          "Global memory tracking failed");
 
     bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
 
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index d46e0f823..17cd45d3c 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -67,7 +67,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::MOV_SYS: {
-        const Node value = [&]() {
+        const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::Ydirection:
                 return Operation(OperationCode::YNegate);
@@ -256,7 +256,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::DEPBAR: {
-        LOG_WARNING(HW_GPU, "DEPBAR instruction is stubbed");
+        LOG_DEBUG(HW_GPU, "DEPBAR instruction is stubbed");
         break;
     }
     default:
diff --git a/src/video_core/shader/decode/shift.cpp b/src/video_core/shader/decode/shift.cpp
index f6ee68a54..d419e9c45 100644
--- a/src/video_core/shader/decode/shift.cpp
+++ b/src/video_core/shader/decode/shift.cpp
@@ -18,7 +18,7 @@ u32 ShaderIR::DecodeShift(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     Node op_a = GetRegister(instr.gpr8);
-    Node op_b = [&]() {
+    Node op_b = [this, instr] {
         if (instr.is_b_imm) {
             return Immediate(instr.alu.GetSignedImm20_20());
         } else if (instr.is_b_gpr) {
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b934a069..bb926a132 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     bool is_bindless = false;
     switch (opcode->get().GetId()) {
     case OpCode::Id::TEX: {
-        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
-        }
-
         const TextureType texture_type{instr.tex.texture_type};
         const bool is_array = instr.tex.array != 0;
         const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
 
-        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
-        }
-
         const TextureType texture_type{instr.tex_b.texture_type};
         const bool is_array = instr.tex_b.array != 0;
         const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
         const auto process_mode = instr.texs.GetTextureProcessMode();
 
-        if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
-        }
-
         const Node4 components =
             GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
 
@@ -96,6 +84,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         }
         break;
     }
+    case OpCode::Id::TLD4_B: {
+        is_bindless = true;
+        [[fallthrough]];
+    }
     case OpCode::Id::TLD4: {
         ASSERT(instr.tld4.array == 0);
         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
@@ -103,24 +95,20 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
                              "PTP is not implemented");
 
-        if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
-        }
-
         const auto texture_type = instr.tld4.texture_type.Value();
-        const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
+        const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
+                                               : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
         const bool is_array = instr.tld4.array != 0;
-        const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
+        const bool is_aoffi = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::AOFFI)
+                                          : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
         WriteTexInstructionFloat(
-            bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
+            bb, instr,
+            GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
         break;
     }
     case OpCode::Id::TLD4S: {
         UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
-        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
-        }
 
         const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
         const Node op_a = GetRegister(instr.gpr8);
@@ -141,7 +129,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const Node component = Immediate(static_cast<u32>(instr.tld4s.component));
 
         const auto& sampler =
-            GetSampler(instr.sampler, TextureType::Texture2D, false, depth_compare);
+            GetSampler(instr.sampler, {{TextureType::Texture2D, false, depth_compare}});
 
         Node4 values;
         for (u32 element = 0; element < values.size(); ++element) {
@@ -150,25 +138,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
         }
 
-        WriteTexsInstructionFloat(bb, instr, values);
+        WriteTexsInstructionFloat(bb, instr, values, true);
         break;
     }
     case OpCode::Id::TXQ_B:
         is_bindless = true;
         [[fallthrough]];
     case OpCode::Id::TXQ: {
-        if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
-        }
-
         // TODO: The new commits on the texture refactor, change the way samplers work.
         // Sadly, not all texture instructions specify the type of texture their sampler
         // uses. This must be fixed at a later instance.
         const auto& sampler =
-            is_bindless
-                ? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
-                                     false)
-                : GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
+            is_bindless ? GetBindlessSampler(instr.gpr8, {}) : GetSampler(instr.sampler, {});
 
         u32 indexer = 0;
         switch (instr.txq.query_type) {
@@ -201,15 +182,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
                              "NDV is not implemented");
 
-        if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
-        }
-
         auto texture_type = instr.tmml.texture_type.Value();
         const bool is_array = instr.tmml.array != 0;
-        const auto& sampler = is_bindless
-                                  ? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
-                                  : GetSampler(instr.sampler, texture_type, is_array, false);
+        const auto& sampler =
+            is_bindless ? GetBindlessSampler(instr.gpr20, {{texture_type, is_array, false}})
+                        : GetSampler(instr.sampler, {{texture_type, is_array, false}});
 
         std::vector<Node> coords;
 
@@ -250,25 +227,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
         UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
 
-        if (instr.tld.nodep_flag) {
-            LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
-        }
-
         WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
         break;
     }
     case OpCode::Id::TLDS: {
-        const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+        const TextureType texture_type{instr.tlds.GetTextureType()};
         const bool is_array{instr.tlds.IsArrayTexture()};
 
         UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
         UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
 
-        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
-        }
-
         const Node4 components = GetTldsCode(instr, texture_type, is_array);
 
         if (instr.tlds.fp32_flag) {
@@ -285,48 +254,84 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, TextureType type,
-                                    bool is_array, bool is_shadow) {
-    const auto offset = static_cast<std::size_t>(sampler.index.Value());
+const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
+                                    std::optional<SamplerInfo> sampler_info) {
+    const auto offset = static_cast<u32>(sampler.index.Value());
+
+    TextureType type;
+    bool is_array;
+    bool is_shadow;
+    if (sampler_info) {
+        type = sampler_info->type;
+        is_array = sampler_info->is_array;
+        is_shadow = sampler_info->is_shadow;
+    } else if (const auto sampler = locker.ObtainBoundSampler(offset)) {
+        type = sampler->texture_type.Value();
+        is_array = sampler->is_array.Value() != 0;
+        is_shadow = sampler->is_shadow.Value() != 0;
+    } else {
+        LOG_WARNING(HW_GPU, "Unknown sampler info");
+        type = TextureType::Texture2D;
+        is_array = false;
+        is_shadow = false;
+    }
 
     // If this sampler has already been used, return the existing mapping.
-    const auto itr =
+    const auto it =
         std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [&](const Sampler& entry) { return entry.GetOffset() == offset; });
-    if (itr != used_samplers.end()) {
-        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
-               itr->IsShadow() == is_shadow);
-        return *itr;
+                     [offset](const Sampler& entry) { return entry.GetOffset() == offset; });
+    if (it != used_samplers.end()) {
+        ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+               it->IsShadow() == is_shadow);
+        return *it;
     }
 
     // Otherwise create a new mapping for this sampler
-    const std::size_t next_index = used_samplers.size();
-    const Sampler entry{offset, next_index, type, is_array, is_shadow};
-    return *used_samplers.emplace(entry).first;
+    const auto next_index = static_cast<u32>(used_samplers.size());
+    return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow));
 }
 
-const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
-                                            bool is_array, bool is_shadow) {
+const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
+                                            std::optional<SamplerInfo> sampler_info) {
     const Node sampler_register = GetRegister(reg);
-    const auto [base_sampler, cbuf_index, cbuf_offset] =
+    const auto [base_sampler, buffer, offset] =
         TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
     ASSERT(base_sampler != nullptr);
-    const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
+
+    TextureType type;
+    bool is_array;
+    bool is_shadow;
+    if (sampler_info) {
+        type = sampler_info->type;
+        is_array = sampler_info->is_array;
+        is_shadow = sampler_info->is_shadow;
+    } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) {
+        type = sampler->texture_type.Value();
+        is_array = sampler->is_array.Value() != 0;
+        is_shadow = sampler->is_shadow.Value() != 0;
+    } else {
+        LOG_WARNING(HW_GPU, "Unknown sampler info");
+        type = TextureType::Texture2D;
+        is_array = false;
+        is_shadow = false;
+    }
 
     // If this sampler has already been used, return the existing mapping.
-    const auto itr =
+    const auto it =
         std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
-    if (itr != used_samplers.end()) {
-        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
-               itr->IsShadow() == is_shadow);
-        return *itr;
+                     [buffer = buffer, offset = offset](const Sampler& entry) {
+                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+                     });
+    if (it != used_samplers.end()) {
+        ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+               it->IsShadow() == is_shadow);
+        return *it;
     }
 
     // Otherwise create a new mapping for this sampler
-    const std::size_t next_index = used_samplers.size();
-    const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
-    return *used_samplers.emplace(entry).first;
+    const auto next_index = static_cast<u32>(used_samplers.size());
+    return used_samplers.emplace_back(
+        Sampler(next_index, offset, buffer, type, is_array, is_shadow));
 }
 
 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
@@ -344,14 +349,14 @@ void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const
     }
 }
 
-void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr,
-                                         const Node4& components) {
+void ShaderIR::WriteTexsInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
+                                         bool ignore_mask) {
     // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
     // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
 
     u32 dest_elem = 0;
     for (u32 component = 0; component < 4; ++component) {
-        if (!instr.texs.IsComponentEnabled(component))
+        if (!instr.texs.IsComponentEnabled(component) && !ignore_mask)
             continue;
         SetTemporary(bb, dest_elem++, components[component]);
     }
@@ -411,9 +416,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                              (texture_type == TextureType::TextureCube && is_array && is_shadow),
                          "This method is not supported.");
 
-    const auto& sampler = is_bindless
-                              ? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
-                              : GetSampler(instr.sampler, texture_type, is_array, is_shadow);
+    const auto& sampler =
+        is_bindless ? GetBindlessSampler(*bindless_reg, {{texture_type, is_array, is_shadow}})
+                    : GetSampler(instr.sampler, {{texture_type, is_array, is_shadow}});
 
     const bool lod_needed = process_mode == TextureProcessMode::LZ ||
                             process_mode == TextureProcessMode::LL ||
@@ -553,7 +558,7 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
 }
 
 Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
-                            bool is_array, bool is_aoffi) {
+                            bool is_array, bool is_aoffi, bool is_bindless) {
     const std::size_t coord_count = GetCoordCount(texture_type);
 
     // If enabled arrays index is always stored in the gpr8 field
@@ -567,6 +572,12 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
     }
 
     u64 parameter_register = instr.gpr20.Value();
+
+    const auto& sampler =
+        is_bindless
+            ? GetBindlessSampler(parameter_register++, {{texture_type, is_array, depth_compare}})
+            : GetSampler(instr.sampler, {{texture_type, is_array, depth_compare}});
+
     std::vector<Node> aoffi;
     if (is_aoffi) {
         aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
@@ -577,12 +588,14 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
         dc = GetRegister(parameter_register++);
     }
 
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+    const Node component = is_bindless ? Immediate(static_cast<u32>(instr.tld4_b.component))
+                                       : Immediate(static_cast<u32>(instr.tld4.component));
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
+        MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, component,
+                         element};
         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
     }
 
@@ -610,7 +623,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
     // const Node aoffi_register{is_aoffi ? GetRegister(gpr20_cursor++) : nullptr};
     // const Node multisample{is_multisample ? GetRegister(gpr20_cursor++) : nullptr};
 
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+    const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
@@ -646,7 +659,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
     // When lod is used always is in gpr20
     const Node lod = lod_enabled ? GetRegister(instr.gpr20) : Immediate(0);
 
-    const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+    const auto& sampler = GetSampler(instr.sampler, {{texture_type, is_array, false}});
 
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 97fc6f9b1..b047cf870 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -23,7 +23,7 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
     const Node op_a =
         GetVideoOperand(GetRegister(instr.gpr8), instr.video.is_byte_chunk_a, instr.video.signed_a,
                         instr.video.type_a, instr.video.byte_height_a);
-    const Node op_b = [&]() {
+    const Node op_b = [this, instr] {
         if (instr.video.use_register_b) {
             return GetVideoOperand(GetRegister(instr.gpr20), instr.video.is_byte_chunk_b,
                                    instr.video.signed_b, instr.video.type_b,
diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp
index a8e481b3c..d98d0e1dd 100644
--- a/src/video_core/shader/decode/warp.cpp
+++ b/src/video_core/shader/decode/warp.cpp
@@ -17,6 +17,7 @@ using Tegra::Shader::ShuffleOperation;
 using Tegra::Shader::VoteOperation;
 
 namespace {
+
 OperationCode GetOperationCode(VoteOperation vote_op) {
     switch (vote_op) {
     case VoteOperation::All:
@@ -30,6 +31,7 @@ OperationCode GetOperationCode(VoteOperation vote_op) {
         return OperationCode::VoteAll;
     }
 }
+
 } // Anonymous namespace
 
 u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
@@ -48,47 +50,57 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) {
     case OpCode::Id::SHFL: {
         Node mask = instr.shfl.is_mask_imm ? Immediate(static_cast<u32>(instr.shfl.mask_imm))
                                            : GetRegister(instr.gpr39);
-        Node width = [&] {
-            // Convert the obscure SHFL mask back into GL_NV_shader_thread_shuffle's width. This has
-            // been done reversing Nvidia's math. It won't work on all cases due to SHFL having
-            // different parameters that don't properly map to GLSL's interface, but it should work
-            // for cases emitted by Nvidia's compiler.
-            if (instr.shfl.operation == ShuffleOperation::Up) {
-                return Operation(
-                    OperationCode::ILogicalShiftRight,
-                    Operation(OperationCode::IAdd, std::move(mask), Immediate(-0x2000)),
-                    Immediate(8));
-            } else {
-                return Operation(OperationCode::ILogicalShiftRight,
-                                 Operation(OperationCode::IAdd, Immediate(0x201F),
-                                           Operation(OperationCode::INegate, std::move(mask))),
-                                 Immediate(8));
-            }
-        }();
+        Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
+                                             : GetRegister(instr.gpr20);
+
+        Node thread_id = Operation(OperationCode::ThreadId);
+        Node clamp = Operation(OperationCode::IBitwiseAnd, mask, Immediate(0x1FU));
+        Node seg_mask = BitfieldExtract(mask, 8, 16);
 
-        const auto [operation, in_range] = [instr]() -> std::pair<OperationCode, OperationCode> {
+        Node neg_seg_mask = Operation(OperationCode::IBitwiseNot, seg_mask);
+        Node min_thread_id = Operation(OperationCode::IBitwiseAnd, thread_id, seg_mask);
+        Node max_thread_id = Operation(OperationCode::IBitwiseOr, min_thread_id,
+                                       Operation(OperationCode::IBitwiseAnd, clamp, neg_seg_mask));
+
+        Node src_thread_id = [instr, index, neg_seg_mask, min_thread_id, thread_id] {
             switch (instr.shfl.operation) {
             case ShuffleOperation::Idx:
-                return {OperationCode::ShuffleIndexed, OperationCode::InRangeShuffleIndexed};
-            case ShuffleOperation::Up:
-                return {OperationCode::ShuffleUp, OperationCode::InRangeShuffleUp};
+                return Operation(OperationCode::IBitwiseOr,
+                                 Operation(OperationCode::IBitwiseAnd, index, neg_seg_mask),
+                                 min_thread_id);
             case ShuffleOperation::Down:
-                return {OperationCode::ShuffleDown, OperationCode::InRangeShuffleDown};
+                return Operation(OperationCode::IAdd, thread_id, index);
+            case ShuffleOperation::Up:
+                return Operation(OperationCode::IAdd, thread_id,
+                                 Operation(OperationCode::INegate, index));
             case ShuffleOperation::Bfly:
-                return {OperationCode::ShuffleButterfly, OperationCode::InRangeShuffleButterfly};
+                return Operation(OperationCode::IBitwiseXor, thread_id, index);
             }
-            UNREACHABLE_MSG("Invalid SHFL operation: {}",
-                            static_cast<u64>(instr.shfl.operation.Value()));
-            return {};
+            UNREACHABLE();
+            return Immediate(0U);
         }();
 
-        // Setting the predicate before the register is intentional to avoid overwriting.
-        Node index = instr.shfl.is_index_imm ? Immediate(static_cast<u32>(instr.shfl.index_imm))
-                                             : GetRegister(instr.gpr20);
-        SetPredicate(bb, instr.shfl.pred48, Operation(in_range, index, width));
+        Node in_bounds = [instr, src_thread_id, min_thread_id, max_thread_id] {
+            if (instr.shfl.operation == ShuffleOperation::Up) {
+                return Operation(OperationCode::LogicalIGreaterEqual, src_thread_id, min_thread_id);
+            } else {
+                return Operation(OperationCode::LogicalILessEqual, src_thread_id, max_thread_id);
+            }
+        }();
+
+        SetPredicate(bb, instr.shfl.pred48, in_bounds);
         SetRegister(
             bb, instr.gpr0,
-            Operation(operation, GetRegister(instr.gpr8), std::move(index), std::move(width)));
+            Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id));
+        break;
+    }
+    case OpCode::Id::FSWZADD: {
+        UNIMPLEMENTED_IF(instr.fswzadd.ndv);
+
+        Node op_a = GetRegister(instr.gpr8);
+        Node op_b = GetRegister(instr.gpr20);
+        Node mask = Immediate(static_cast<u32>(instr.fswzadd.swizzle));
+        SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask));
         break;
     }
     default:
diff --git a/src/video_core/shader/expr.cpp b/src/video_core/shader/expr.cpp
new file mode 100644
index 000000000..2647865d4
--- /dev/null
+++ b/src/video_core/shader/expr.cpp
@@ -0,0 +1,93 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+#include <variant>
+
+#include "video_core/shader/expr.h"
+
+namespace VideoCommon::Shader {
+namespace {
+bool ExprIsBoolean(const Expr& expr) {
+    return std::holds_alternative<ExprBoolean>(*expr);
+}
+
+bool ExprBooleanGet(const Expr& expr) {
+    return std::get_if<ExprBoolean>(expr.get())->value;
+}
+} // Anonymous namespace
+
+bool ExprAnd::operator==(const ExprAnd& b) const {
+    return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
+}
+
+bool ExprAnd::operator!=(const ExprAnd& b) const {
+    return !operator==(b);
+}
+
+bool ExprOr::operator==(const ExprOr& b) const {
+    return (*operand1 == *b.operand1) && (*operand2 == *b.operand2);
+}
+
+bool ExprOr::operator!=(const ExprOr& b) const {
+    return !operator==(b);
+}
+
+bool ExprNot::operator==(const ExprNot& b) const {
+    return *operand1 == *b.operand1;
+}
+
+bool ExprNot::operator!=(const ExprNot& b) const {
+    return !operator==(b);
+}
+
+Expr MakeExprNot(Expr first) {
+    if (std::holds_alternative<ExprNot>(*first)) {
+        return std::get_if<ExprNot>(first.get())->operand1;
+    }
+    return MakeExpr<ExprNot>(std::move(first));
+}
+
+Expr MakeExprAnd(Expr first, Expr second) {
+    if (ExprIsBoolean(first)) {
+        return ExprBooleanGet(first) ? second : first;
+    }
+    if (ExprIsBoolean(second)) {
+        return ExprBooleanGet(second) ? first : second;
+    }
+    return MakeExpr<ExprAnd>(std::move(first), std::move(second));
+}
+
+Expr MakeExprOr(Expr first, Expr second) {
+    if (ExprIsBoolean(first)) {
+        return ExprBooleanGet(first) ? first : second;
+    }
+    if (ExprIsBoolean(second)) {
+        return ExprBooleanGet(second) ? second : first;
+    }
+    return MakeExpr<ExprOr>(std::move(first), std::move(second));
+}
+
+bool ExprAreEqual(const Expr& first, const Expr& second) {
+    return (*first) == (*second);
+}
+
+bool ExprAreOpposite(const Expr& first, const Expr& second) {
+    if (std::holds_alternative<ExprNot>(*first)) {
+        return ExprAreEqual(std::get_if<ExprNot>(first.get())->operand1, second);
+    }
+    if (std::holds_alternative<ExprNot>(*second)) {
+        return ExprAreEqual(std::get_if<ExprNot>(second.get())->operand1, first);
+    }
+    return false;
+}
+
+bool ExprIsTrue(const Expr& first) {
+    if (ExprIsBoolean(first)) {
+        return ExprBooleanGet(first);
+    }
+    return false;
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/expr.h b/src/video_core/shader/expr.h
new file mode 100644
index 000000000..4e8264367
--- /dev/null
+++ b/src/video_core/shader/expr.h
@@ -0,0 +1,156 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <variant>
+
+#include "video_core/engines/shader_bytecode.h"
+
+namespace VideoCommon::Shader {
+
+using Tegra::Shader::ConditionCode;
+using Tegra::Shader::Pred;
+
+class ExprAnd;
+class ExprBoolean;
+class ExprCondCode;
+class ExprGprEqual;
+class ExprNot;
+class ExprOr;
+class ExprPredicate;
+class ExprVar;
+
+using ExprData = std::variant<ExprVar, ExprCondCode, ExprPredicate, ExprNot, ExprOr, ExprAnd,
+                              ExprBoolean, ExprGprEqual>;
+using Expr = std::shared_ptr<ExprData>;
+
+class ExprAnd final {
+public:
+    explicit ExprAnd(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
+
+    bool operator==(const ExprAnd& b) const;
+    bool operator!=(const ExprAnd& b) const;
+
+    Expr operand1;
+    Expr operand2;
+};
+
+class ExprOr final {
+public:
+    explicit ExprOr(Expr a, Expr b) : operand1{std::move(a)}, operand2{std::move(b)} {}
+
+    bool operator==(const ExprOr& b) const;
+    bool operator!=(const ExprOr& b) const;
+
+    Expr operand1;
+    Expr operand2;
+};
+
+class ExprNot final {
+public:
+    explicit ExprNot(Expr a) : operand1{std::move(a)} {}
+
+    bool operator==(const ExprNot& b) const;
+    bool operator!=(const ExprNot& b) const;
+
+    Expr operand1;
+};
+
+class ExprVar final {
+public:
+    explicit ExprVar(u32 index) : var_index{index} {}
+
+    bool operator==(const ExprVar& b) const {
+        return var_index == b.var_index;
+    }
+
+    bool operator!=(const ExprVar& b) const {
+        return !operator==(b);
+    }
+
+    u32 var_index;
+};
+
+class ExprPredicate final {
+public:
+    explicit ExprPredicate(u32 predicate) : predicate{predicate} {}
+
+    bool operator==(const ExprPredicate& b) const {
+        return predicate == b.predicate;
+    }
+
+    bool operator!=(const ExprPredicate& b) const {
+        return !operator==(b);
+    }
+
+    u32 predicate;
+};
+
+class ExprCondCode final {
+public:
+    explicit ExprCondCode(ConditionCode cc) : cc{cc} {}
+
+    bool operator==(const ExprCondCode& b) const {
+        return cc == b.cc;
+    }
+
+    bool operator!=(const ExprCondCode& b) const {
+        return !operator==(b);
+    }
+
+    ConditionCode cc;
+};
+
+class ExprBoolean final {
+public:
+    explicit ExprBoolean(bool val) : value{val} {}
+
+    bool operator==(const ExprBoolean& b) const {
+        return value == b.value;
+    }
+
+    bool operator!=(const ExprBoolean& b) const {
+        return !operator==(b);
+    }
+
+    bool value;
+};
+
+class ExprGprEqual final {
+public:
+    ExprGprEqual(u32 gpr, u32 value) : gpr{gpr}, value{value} {}
+
+    bool operator==(const ExprGprEqual& b) const {
+        return gpr == b.gpr && value == b.value;
+    }
+
+    bool operator!=(const ExprGprEqual& b) const {
+        return !operator==(b);
+    }
+
+    u32 gpr;
+    u32 value;
+};
+
+template <typename T, typename... Args>
+Expr MakeExpr(Args&&... args) {
+    static_assert(std::is_convertible_v<T, ExprData>);
+    return std::make_shared<ExprData>(T(std::forward<Args>(args)...));
+}
+
+bool ExprAreEqual(const Expr& first, const Expr& second);
+
+bool ExprAreOpposite(const Expr& first, const Expr& second);
+
+Expr MakeExprNot(Expr first);
+
+Expr MakeExprAnd(Expr first, Expr second);
+
+Expr MakeExprOr(Expr first, Expr second);
+
+bool ExprIsTrue(const Expr& first);
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 338bab17c..54217e6a4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -47,6 +47,7 @@ enum class OperationCode {
     FTrunc,        /// (MetaArithmetic, float a) -> float
     FCastInteger,  /// (MetaArithmetic, int a) -> float
     FCastUInteger, /// (MetaArithmetic, uint a) -> float
+    FSwizzleAdd,   /// (float a, float b, uint mask) -> float
 
     IAdd,                  /// (MetaArithmetic, int a, int b) -> int
     IMul,                  /// (MetaArithmetic, int a, int b) -> int
@@ -181,15 +182,8 @@ enum class OperationCode {
     VoteAny,      /// (bool) -> bool
     VoteEqual,    /// (bool) -> bool
 
-    ShuffleIndexed,   /// (uint value, uint index, uint width) -> uint
-    ShuffleUp,        /// (uint value, uint index, uint width) -> uint
-    ShuffleDown,      /// (uint value, uint index, uint width) -> uint
-    ShuffleButterfly, /// (uint value, uint index, uint width) -> uint
-
-    InRangeShuffleIndexed,   /// (uint index, uint width) -> bool
-    InRangeShuffleUp,        /// (uint index, uint width) -> bool
-    InRangeShuffleDown,      /// (uint index, uint width) -> bool
-    InRangeShuffleButterfly, /// (uint index, uint width) -> bool
+    ThreadId,       /// () -> uint
+    ShuffleIndexed, /// (uint value, uint index) -> uint
 
     Amount,
 };
@@ -230,62 +224,49 @@ using NodeBlock = std::vector<Node>;
 class Sampler {
 public:
     /// This constructor is for bound samplers
-    explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
-                     bool is_array, bool is_shadow)
-        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
-          is_bindless{false} {}
+    constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
+                               bool is_array, bool is_shadow)
+        : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
 
     /// This constructor is for bindless samplers
-    explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
-                     Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
-        : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
-          is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
-
-    /// This constructor is for serialization/deserialization
-    explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
-                     bool is_array, bool is_shadow, bool is_bindless)
-        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
-          is_bindless{is_bindless} {}
-
-    std::size_t GetOffset() const {
+    constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
+                               bool is_array, bool is_shadow)
+        : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
+          is_shadow{is_shadow}, is_bindless{true} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+    constexpr u32 GetOffset() const {
         return offset;
     }
 
-    std::size_t GetIndex() const {
-        return index;
+    constexpr u32 GetBuffer() const {
+        return buffer;
     }
 
-    Tegra::Shader::TextureType GetType() const {
+    constexpr Tegra::Shader::TextureType GetType() const {
         return type;
     }
 
-    bool IsArray() const {
+    constexpr bool IsArray() const {
         return is_array;
     }
 
-    bool IsShadow() const {
+    constexpr bool IsShadow() const {
         return is_shadow;
     }
 
-    bool IsBindless() const {
+    constexpr bool IsBindless() const {
         return is_bindless;
     }
 
-    std::pair<u32, u32> GetBindlessCBuf() const {
-        return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
-    }
-
-    bool operator<(const Sampler& rhs) const {
-        return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
-               std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
-                        rhs.is_bindless);
-    }
-
 private:
-    /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
-    /// instruction.
-    std::size_t offset{};
-    std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
+    u32 index{};  ///< Emulated index given for the this sampler.
+    u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
+    u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
     bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
     bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
@@ -294,18 +275,13 @@ private:
 
 class Image final {
 public:
-    constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
-        : offset{offset}, index{index}, type{type}, is_bindless{false} {}
+    /// This constructor is for bound images
+    constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
+        : index{index}, offset{offset}, type{type} {}
 
-    constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
-                             Tegra::Shader::ImageType type)
-        : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
-          is_bindless{true} {}
-
-    constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
-                             bool is_bindless, bool is_written, bool is_read, bool is_atomic)
-        : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
-          is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {}
+    /// This constructor is for bindless samplers
+    constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
+        : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
 
     void MarkWrite() {
         is_written = true;
@@ -321,12 +297,16 @@ public:
         is_atomic = true;
     }
 
-    constexpr std::size_t GetOffset() const {
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+    constexpr u32 GetOffset() const {
         return offset;
     }
 
-    constexpr std::size_t GetIndex() const {
-        return index;
+    constexpr u32 GetBuffer() const {
+        return buffer;
     }
 
     constexpr Tegra::Shader::ImageType GetType() const {
@@ -349,18 +329,11 @@ public:
         return is_atomic;
     }
 
-    constexpr std::pair<u32, u32> GetBindlessCBuf() const {
-        return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
-    }
-
-    constexpr bool operator<(const Image& rhs) const {
-        return std::tie(offset, index, type, is_bindless) <
-               std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
-    }
-
 private:
-    u64 offset{};
-    std::size_t index{};
+    u32 index{};
+    u32 offset{};
+    u32 buffer{};
+
     Tegra::Shader::ImageType type{};
     bool is_bindless{};
     bool is_written{};
@@ -410,7 +383,7 @@ public:
     explicit OperationNode(OperationCode code) : OperationNode(code, Meta{}) {}
 
     explicit OperationNode(OperationCode code, Meta meta)
-        : OperationNode(code, meta, std::vector<Node>{}) {}
+        : OperationNode(code, std::move(meta), std::vector<Node>{}) {}
 
     explicit OperationNode(OperationCode code, std::vector<Node> operands)
         : OperationNode(code, Meta{}, std::move(operands)) {}
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 2c357f310..1d9825c76 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -2,8 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
+#include <array>
 #include <cmath>
-#include <unordered_map>
 
 #include "common/assert.h"
 #include "common/common_types.h"
@@ -22,8 +23,9 @@ using Tegra::Shader::PredCondition;
 using Tegra::Shader::PredOperation;
 using Tegra::Shader::Register;
 
-ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, const std::size_t size)
-    : program_code{program_code}, main_offset{main_offset}, program_size{size} {
+ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
+                   ConstBufferLocker& locker)
+    : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
     Decode();
 }
 
@@ -137,7 +139,7 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
     return MakeNode<AbufNode>(index, static_cast<u32>(element), std::move(buffer));
 }
 
-Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) {
+Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
     const Node node = MakeNode<InternalFlagNode>(flag);
     if (negated) {
         return Operation(OperationCode::LogicalNegate, node);
@@ -269,21 +271,24 @@ Node ShaderIR::GetSaturatedHalfFloat(Node value, bool saturate) {
 }
 
 Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, Node op_b) {
-    const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
-        {PredCondition::LessThan, OperationCode::LogicalFLessThan},
-        {PredCondition::Equal, OperationCode::LogicalFEqual},
-        {PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
-        {PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
-        {PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
-        {PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
-        {PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
-        {PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
-        {PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
-        {PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
-        {PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual}};
-
-    const auto comparison{PredicateComparisonTable.find(condition)};
-    UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+    static constexpr std::array comparison_table{
+        std::pair{PredCondition::LessThan, OperationCode::LogicalFLessThan},
+        std::pair{PredCondition::Equal, OperationCode::LogicalFEqual},
+        std::pair{PredCondition::LessEqual, OperationCode::LogicalFLessEqual},
+        std::pair{PredCondition::GreaterThan, OperationCode::LogicalFGreaterThan},
+        std::pair{PredCondition::NotEqual, OperationCode::LogicalFNotEqual},
+        std::pair{PredCondition::GreaterEqual, OperationCode::LogicalFGreaterEqual},
+        std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalFLessThan},
+        std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalFNotEqual},
+        std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalFLessEqual},
+        std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalFGreaterThan},
+        std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalFGreaterEqual},
+    };
+
+    const auto comparison =
+        std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+                     [condition](const auto entry) { return condition == entry.first; });
+    UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
                          "Unknown predicate comparison operation");
 
     Node predicate = Operation(comparison->second, NO_PRECISE, op_a, op_b);
@@ -304,21 +309,24 @@ Node ShaderIR::GetPredicateComparisonFloat(PredCondition condition, Node op_a, N
 
 Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_signed, Node op_a,
                                              Node op_b) {
-    const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
-        {PredCondition::LessThan, OperationCode::LogicalILessThan},
-        {PredCondition::Equal, OperationCode::LogicalIEqual},
-        {PredCondition::LessEqual, OperationCode::LogicalILessEqual},
-        {PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
-        {PredCondition::NotEqual, OperationCode::LogicalINotEqual},
-        {PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
-        {PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
-        {PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
-        {PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
-        {PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
-        {PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual}};
-
-    const auto comparison{PredicateComparisonTable.find(condition)};
-    UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+    static constexpr std::array comparison_table{
+        std::pair{PredCondition::LessThan, OperationCode::LogicalILessThan},
+        std::pair{PredCondition::Equal, OperationCode::LogicalIEqual},
+        std::pair{PredCondition::LessEqual, OperationCode::LogicalILessEqual},
+        std::pair{PredCondition::GreaterThan, OperationCode::LogicalIGreaterThan},
+        std::pair{PredCondition::NotEqual, OperationCode::LogicalINotEqual},
+        std::pair{PredCondition::GreaterEqual, OperationCode::LogicalIGreaterEqual},
+        std::pair{PredCondition::LessThanWithNan, OperationCode::LogicalILessThan},
+        std::pair{PredCondition::NotEqualWithNan, OperationCode::LogicalINotEqual},
+        std::pair{PredCondition::LessEqualWithNan, OperationCode::LogicalILessEqual},
+        std::pair{PredCondition::GreaterThanWithNan, OperationCode::LogicalIGreaterThan},
+        std::pair{PredCondition::GreaterEqualWithNan, OperationCode::LogicalIGreaterEqual},
+    };
+
+    const auto comparison =
+        std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+                     [condition](const auto entry) { return condition == entry.first; });
+    UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
                          "Unknown predicate comparison operation");
 
     Node predicate = SignedOperation(comparison->second, is_signed, NO_PRECISE, std::move(op_a),
@@ -335,45 +343,52 @@ Node ShaderIR::GetPredicateComparisonInteger(PredCondition condition, bool is_si
 
 Node ShaderIR::GetPredicateComparisonHalf(Tegra::Shader::PredCondition condition, Node op_a,
                                           Node op_b) {
-    const std::unordered_map<PredCondition, OperationCode> PredicateComparisonTable = {
-        {PredCondition::LessThan, OperationCode::Logical2HLessThan},
-        {PredCondition::Equal, OperationCode::Logical2HEqual},
-        {PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
-        {PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
-        {PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
-        {PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
-        {PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
-        {PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
-        {PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
-        {PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
-        {PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan}};
-
-    const auto comparison{PredicateComparisonTable.find(condition)};
-    UNIMPLEMENTED_IF_MSG(comparison == PredicateComparisonTable.end(),
+    static constexpr std::array comparison_table{
+        std::pair{PredCondition::LessThan, OperationCode::Logical2HLessThan},
+        std::pair{PredCondition::Equal, OperationCode::Logical2HEqual},
+        std::pair{PredCondition::LessEqual, OperationCode::Logical2HLessEqual},
+        std::pair{PredCondition::GreaterThan, OperationCode::Logical2HGreaterThan},
+        std::pair{PredCondition::NotEqual, OperationCode::Logical2HNotEqual},
+        std::pair{PredCondition::GreaterEqual, OperationCode::Logical2HGreaterEqual},
+        std::pair{PredCondition::LessThanWithNan, OperationCode::Logical2HLessThanWithNan},
+        std::pair{PredCondition::NotEqualWithNan, OperationCode::Logical2HNotEqualWithNan},
+        std::pair{PredCondition::LessEqualWithNan, OperationCode::Logical2HLessEqualWithNan},
+        std::pair{PredCondition::GreaterThanWithNan, OperationCode::Logical2HGreaterThanWithNan},
+        std::pair{PredCondition::GreaterEqualWithNan, OperationCode::Logical2HGreaterEqualWithNan},
+    };
+
+    const auto comparison =
+        std::find_if(comparison_table.cbegin(), comparison_table.cend(),
+                     [condition](const auto entry) { return condition == entry.first; });
+    UNIMPLEMENTED_IF_MSG(comparison == comparison_table.cend(),
                          "Unknown predicate comparison operation");
 
     return Operation(comparison->second, NO_PRECISE, std::move(op_a), std::move(op_b));
 }
 
 OperationCode ShaderIR::GetPredicateCombiner(PredOperation operation) {
-    const std::unordered_map<PredOperation, OperationCode> PredicateOperationTable = {
-        {PredOperation::And, OperationCode::LogicalAnd},
-        {PredOperation::Or, OperationCode::LogicalOr},
-        {PredOperation::Xor, OperationCode::LogicalXor},
+    static constexpr std::array operation_table{
+        OperationCode::LogicalAnd,
+        OperationCode::LogicalOr,
+        OperationCode::LogicalXor,
     };
 
-    const auto op = PredicateOperationTable.find(operation);
-    UNIMPLEMENTED_IF_MSG(op == PredicateOperationTable.end(), "Unknown predicate operation");
-    return op->second;
+    const auto index = static_cast<std::size_t>(operation);
+    if (index >= operation_table.size()) {
+        UNIMPLEMENTED_MSG("Unknown predicate operation.");
+        return {};
+    }
+
+    return operation_table[index];
 }
 
-Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) {
+Node ShaderIR::GetConditionCode(Tegra::Shader::ConditionCode cc) const {
     switch (cc) {
     case Tegra::Shader::ConditionCode::NEU:
         return GetInternalFlag(InternalFlag::Zero, true);
     default:
         UNIMPLEMENTED_MSG("Unimplemented condition code: {}", static_cast<u32>(cc));
-        return GetPredicate(static_cast<u64>(Pred::NeverExecute));
+        return MakeNode<PredicateNode>(Pred::NeverExecute, false);
     }
 }
 
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 6f666ee30..76a849818 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <list>
 #include <map>
 #include <optional>
 #include <set>
@@ -15,6 +16,9 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
+#include "video_core/shader/ast.h"
+#include "video_core/shader/compiler_settings.h"
+#include "video_core/shader/const_buffer_locker.h"
 #include "video_core/shader/node.h"
 
 namespace VideoCommon::Shader {
@@ -45,7 +49,7 @@ public:
     }
 
     u32 GetSize() const {
-        return max_offset + sizeof(float);
+        return max_offset + static_cast<u32>(sizeof(float));
     }
 
     u32 GetMaxOffset() const {
@@ -64,7 +68,8 @@ struct GlobalMemoryUsage {
 
 class ShaderIR final {
 public:
-    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, std::size_t size);
+    explicit ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSettings settings,
+                      ConstBufferLocker& locker);
     ~ShaderIR();
 
     const std::map<u32, NodeBlock>& GetBasicBlocks() const {
@@ -91,11 +96,11 @@ public:
         return used_cbufs;
     }
 
-    const std::set<Sampler>& GetSamplers() const {
+    const std::list<Sampler>& GetSamplers() const {
         return used_samplers;
     }
 
-    const std::map<u64, Image>& GetImages() const {
+    const std::list<Image>& GetImages() const {
         return used_images;
     }
 
@@ -144,11 +149,38 @@ public:
         return disable_flow_stack;
     }
 
-    u32 ConvertAddressToNvidiaSpace(const u32 address) const {
-        return (address - main_offset) * sizeof(Tegra::Shader::Instruction);
+    bool IsDecompiled() const {
+        return decompiled;
     }
 
+    const ASTManager& GetASTManager() const {
+        return program_manager;
+    }
+
+    ASTNode GetASTProgram() const {
+        return program_manager.GetProgram();
+    }
+
+    u32 GetASTNumVariables() const {
+        return program_manager.GetVariables();
+    }
+
+    u32 ConvertAddressToNvidiaSpace(u32 address) const {
+        return (address - main_offset) * static_cast<u32>(sizeof(Tegra::Shader::Instruction));
+    }
+
+    /// Returns a condition code evaluated from internal flags
+    Node GetConditionCode(Tegra::Shader::ConditionCode cc) const;
+
 private:
+    friend class ASTDecoder;
+
+    struct SamplerInfo {
+        Tegra::Shader::TextureType type;
+        bool is_array;
+        bool is_shadow;
+    };
+
     void Decode();
 
     NodeBlock DecodeRange(u32 begin, u32 end);
@@ -213,7 +245,7 @@ private:
     /// Generates a node representing an output attribute. Keeps track of used attributes.
     Node GetOutputAttribute(Tegra::Shader::Attribute::Index index, u64 element, Node buffer);
     /// Generates a node representing an internal flag
-    Node GetInternalFlag(InternalFlag flag, bool negated = false);
+    Node GetInternalFlag(InternalFlag flag, bool negated = false) const;
     /// Generates a node representing a local memory address
     Node GetLocalMemory(Node address);
     /// Generates a node representing a shared memory address
@@ -271,17 +303,13 @@ private:
     /// Returns a predicate combiner operation
     OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
 
-    /// Returns a condition code evaluated from internal flags
-    Node GetConditionCode(Tegra::Shader::ConditionCode cc);
-
     /// Accesses a texture sampler
     const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
-                              Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
+                              std::optional<SamplerInfo> sampler_info);
 
     // Accesses a texture sampler for a bindless texture.
     const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
-                                      Tegra::Shader::TextureType type, bool is_array,
-                                      bool is_shadow);
+                                      std::optional<SamplerInfo> sampler_info);
 
     /// Accesses an image.
     Image& GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type);
@@ -289,9 +317,6 @@ private:
     /// Access a bindless image sampler.
     Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
 
-    /// Tries to access an existing image, updating it's state as needed
-    Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type);
-
     /// Extracts a sequence of bits from a node
     Node BitfieldExtract(Node value, u32 offset, u32 bits);
 
@@ -302,7 +327,7 @@ private:
                                   const Node4& components);
 
     void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
-                                   const Node4& components);
+                                   const Node4& components, bool ignore_mask = false);
     void WriteTexsInstructionHalfFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                        const Node4& components);
 
@@ -316,7 +341,7 @@ private:
                       bool is_array);
 
     Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
-                      bool depth_compare, bool is_array, bool is_aoffi);
+                      bool depth_compare, bool is_array, bool is_aoffi, bool is_bindless);
 
     Node4 GetTldCode(Tegra::Shader::Instruction instr);
 
@@ -351,12 +376,16 @@ private:
     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
                                        s64 cursor) const;
 
-    std::tuple<Node, Node, GlobalMemoryBase> TrackAndGetGlobalMemory(
-        NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write);
+    std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
+                                                               Tegra::Shader::Instruction instr,
+                                                               bool is_write);
 
     const ProgramCode& program_code;
     const u32 main_offset;
-    const std::size_t program_size;
+    const CompilerSettings settings;
+    ConstBufferLocker& locker;
+
+    bool decompiled{};
     bool disable_flow_stack{};
 
     u32 coverage_begin{};
@@ -364,14 +393,15 @@ private:
 
     std::map<u32, NodeBlock> basic_blocks;
     NodeBlock global_code;
+    ASTManager program_manager{true, true};
 
     std::set<u32> used_registers;
     std::set<Tegra::Shader::Pred> used_predicates;
     std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
     std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
     std::map<u32, ConstBuffer> used_cbufs;
-    std::set<Sampler> used_samplers;
-    std::map<u64, Image> used_images;
+    std::list<Sampler> used_samplers;
+    std::list<Image> used_images;
     std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
     std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
     bool uses_layer{};
diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp
index 250afc6d6..1655ccf16 100644
--- a/src/video_core/surface.cpp
+++ b/src/video_core/surface.cpp
@@ -168,282 +168,6 @@ PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format)
     }
 }
 
-PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
-                                         Tegra::Texture::ComponentType component_type,
-                                         bool is_srgb) {
-    // TODO(Subv): Properly implement this
-    switch (format) {
-    case Tegra::Texture::TextureFormat::A8R8G8B8:
-        if (is_srgb) {
-            return PixelFormat::RGBA8_SRGB;
-        }
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::ABGR8U;
-        case Tegra::Texture::ComponentType::SNORM:
-            return PixelFormat::ABGR8S;
-        case Tegra::Texture::ComponentType::UINT:
-            return PixelFormat::ABGR8UI;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::B5G6R5:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::B5G6R5U;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::A2B10G10R10:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::A2B10G10R10U;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::A1B5G5R5:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::A1B5G5R5U;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::R8:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::R8U;
-        case Tegra::Texture::ComponentType::UINT:
-            return PixelFormat::R8UI;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::G8R8:
-        // TextureFormat::G8R8 is actually ordered red then green, as such we can use
-        // PixelFormat::RG8U and PixelFormat::RG8S. This was tested with The Legend of Zelda: Breath
-        // of the Wild, which uses this format to render the hearts on the UI.
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::RG8U;
-        case Tegra::Texture::ComponentType::SNORM:
-            return PixelFormat::RG8S;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::RGBA16U;
-        case Tegra::Texture::ComponentType::FLOAT:
-            return PixelFormat::RGBA16F;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::BF10GF11RF11:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::FLOAT:
-            return PixelFormat::R11FG11FB10F;
-        default:
-            break;
-        }
-    case Tegra::Texture::TextureFormat::R32_G32_B32_A32:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::FLOAT:
-            return PixelFormat::RGBA32F;
-        case Tegra::Texture::ComponentType::UINT:
-            return PixelFormat::RGBA32UI;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::R32_G32:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::FLOAT:
-            return PixelFormat::RG32F;
-        case Tegra::Texture::ComponentType::UINT:
-            return PixelFormat::RG32UI;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::R32_G32_B32:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::FLOAT:
-            return PixelFormat::RGB32F;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::R16:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::FLOAT:
-            return PixelFormat::R16F;
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::R16U;
-        case Tegra::Texture::ComponentType::SNORM:
-            return PixelFormat::R16S;
-        case Tegra::Texture::ComponentType::UINT:
-            return PixelFormat::R16UI;
-        case Tegra::Texture::ComponentType::SINT:
-            return PixelFormat::R16I;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::R32:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::FLOAT:
-            return PixelFormat::R32F;
-        case Tegra::Texture::ComponentType::UINT:
-            return PixelFormat::R32UI;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::ZF32:
-        return PixelFormat::Z32F;
-    case Tegra::Texture::TextureFormat::Z16:
-        return PixelFormat::Z16;
-    case Tegra::Texture::TextureFormat::S8Z24:
-        return PixelFormat::S8Z24;
-    case Tegra::Texture::TextureFormat::ZF32_X24S8:
-        return PixelFormat::Z32FS8;
-    case Tegra::Texture::TextureFormat::DXT1:
-        return is_srgb ? PixelFormat::DXT1_SRGB : PixelFormat::DXT1;
-    case Tegra::Texture::TextureFormat::DXT23:
-        return is_srgb ? PixelFormat::DXT23_SRGB : PixelFormat::DXT23;
-    case Tegra::Texture::TextureFormat::DXT45:
-        return is_srgb ? PixelFormat::DXT45_SRGB : PixelFormat::DXT45;
-    case Tegra::Texture::TextureFormat::DXN1:
-        return PixelFormat::DXN1;
-    case Tegra::Texture::TextureFormat::DXN2:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::DXN2UNORM;
-        case Tegra::Texture::ComponentType::SNORM:
-            return PixelFormat::DXN2SNORM;
-        default:
-            break;
-        }
-        break;
-    case Tegra::Texture::TextureFormat::BC7U:
-        return is_srgb ? PixelFormat::BC7U_SRGB : PixelFormat::BC7U;
-    case Tegra::Texture::TextureFormat::BC6H_UF16:
-        return PixelFormat::BC6H_UF16;
-    case Tegra::Texture::TextureFormat::BC6H_SF16:
-        return PixelFormat::BC6H_SF16;
-    case Tegra::Texture::TextureFormat::ASTC_2D_4X4:
-        return is_srgb ? PixelFormat::ASTC_2D_4X4_SRGB : PixelFormat::ASTC_2D_4X4;
-    case Tegra::Texture::TextureFormat::ASTC_2D_5X4:
-        return is_srgb ? PixelFormat::ASTC_2D_5X4_SRGB : PixelFormat::ASTC_2D_5X4;
-    case Tegra::Texture::TextureFormat::ASTC_2D_5X5:
-        return is_srgb ? PixelFormat::ASTC_2D_5X5_SRGB : PixelFormat::ASTC_2D_5X5;
-    case Tegra::Texture::TextureFormat::ASTC_2D_8X8:
-        return is_srgb ? PixelFormat::ASTC_2D_8X8_SRGB : PixelFormat::ASTC_2D_8X8;
-    case Tegra::Texture::TextureFormat::ASTC_2D_8X5:
-        return is_srgb ? PixelFormat::ASTC_2D_8X5_SRGB : PixelFormat::ASTC_2D_8X5;
-    case Tegra::Texture::TextureFormat::ASTC_2D_10X8:
-        return is_srgb ? PixelFormat::ASTC_2D_10X8_SRGB : PixelFormat::ASTC_2D_10X8;
-    case Tegra::Texture::TextureFormat::R16_G16:
-        switch (component_type) {
-        case Tegra::Texture::ComponentType::FLOAT:
-            return PixelFormat::RG16F;
-        case Tegra::Texture::ComponentType::UNORM:
-            return PixelFormat::RG16;
-        case Tegra::Texture::ComponentType::SNORM:
-            return PixelFormat::RG16S;
-        case Tegra::Texture::ComponentType::UINT:
-            return PixelFormat::RG16UI;
-        case Tegra::Texture::ComponentType::SINT:
-            return PixelFormat::RG16I;
-        default:
-            break;
-        }
-        break;
-    default:
-        break;
-    }
-    LOG_CRITICAL(HW_GPU, "Unimplemented format={}, component_type={}", static_cast<u32>(format),
-                 static_cast<u32>(component_type));
-    UNREACHABLE();
-    return PixelFormat::ABGR8U;
-}
-
-ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
-    // TODO(Subv): Implement more component types
-    switch (type) {
-    case Tegra::Texture::ComponentType::UNORM:
-        return ComponentType::UNorm;
-    case Tegra::Texture::ComponentType::FLOAT:
-        return ComponentType::Float;
-    case Tegra::Texture::ComponentType::SNORM:
-        return ComponentType::SNorm;
-    case Tegra::Texture::ComponentType::UINT:
-        return ComponentType::UInt;
-    case Tegra::Texture::ComponentType::SINT:
-        return ComponentType::SInt;
-    default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
-        UNREACHABLE();
-        return ComponentType::UNorm;
-    }
-}
-
-ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
-    // TODO(Subv): Implement more render targets
-    switch (format) {
-    case Tegra::RenderTargetFormat::RGBA8_UNORM:
-    case Tegra::RenderTargetFormat::RGBA8_SRGB:
-    case Tegra::RenderTargetFormat::BGRA8_UNORM:
-    case Tegra::RenderTargetFormat::BGRA8_SRGB:
-    case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
-    case Tegra::RenderTargetFormat::R8_UNORM:
-    case Tegra::RenderTargetFormat::RG16_UNORM:
-    case Tegra::RenderTargetFormat::R16_UNORM:
-    case Tegra::RenderTargetFormat::B5G6R5_UNORM:
-    case Tegra::RenderTargetFormat::BGR5A1_UNORM:
-    case Tegra::RenderTargetFormat::RG8_UNORM:
-    case Tegra::RenderTargetFormat::RGBA16_UNORM:
-        return ComponentType::UNorm;
-    case Tegra::RenderTargetFormat::RGBA8_SNORM:
-    case Tegra::RenderTargetFormat::RG16_SNORM:
-    case Tegra::RenderTargetFormat::R16_SNORM:
-    case Tegra::RenderTargetFormat::RG8_SNORM:
-        return ComponentType::SNorm;
-    case Tegra::RenderTargetFormat::RGBA16_FLOAT:
-    case Tegra::RenderTargetFormat::RGBX16_FLOAT:
-    case Tegra::RenderTargetFormat::R11G11B10_FLOAT:
-    case Tegra::RenderTargetFormat::RGBA32_FLOAT:
-    case Tegra::RenderTargetFormat::RG32_FLOAT:
-    case Tegra::RenderTargetFormat::RG16_FLOAT:
-    case Tegra::RenderTargetFormat::R16_FLOAT:
-    case Tegra::RenderTargetFormat::R32_FLOAT:
-        return ComponentType::Float;
-    case Tegra::RenderTargetFormat::RGBA32_UINT:
-    case Tegra::RenderTargetFormat::RGBA16_UINT:
-    case Tegra::RenderTargetFormat::RG16_UINT:
-    case Tegra::RenderTargetFormat::R8_UINT:
-    case Tegra::RenderTargetFormat::R16_UINT:
-    case Tegra::RenderTargetFormat::RG32_UINT:
-    case Tegra::RenderTargetFormat::R32_UINT:
-    case Tegra::RenderTargetFormat::RGBA8_UINT:
-        return ComponentType::UInt;
-    case Tegra::RenderTargetFormat::RG16_SINT:
-    case Tegra::RenderTargetFormat::R16_SINT:
-        return ComponentType::SInt;
-    default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-        UNREACHABLE();
-        return ComponentType::UNorm;
-    }
-}
-
 PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
     switch (format) {
     case Tegra::FramebufferConfig::PixelFormat::ABGR8:
@@ -458,22 +182,6 @@ PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat
     }
 }
 
-ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
-    switch (format) {
-    case Tegra::DepthFormat::Z16_UNORM:
-    case Tegra::DepthFormat::S8_Z24_UNORM:
-    case Tegra::DepthFormat::Z24_S8_UNORM:
-        return ComponentType::UNorm;
-    case Tegra::DepthFormat::Z32_FLOAT:
-    case Tegra::DepthFormat::Z32_S8_X24_FLOAT:
-        return ComponentType::Float;
-    default:
-        LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
-        UNREACHABLE();
-        return ComponentType::UNorm;
-    }
-}
-
 SurfaceType GetFormatType(PixelFormat pixel_format) {
     if (static_cast<std::size_t>(pixel_format) <
         static_cast<std::size_t>(PixelFormat::MaxColorFormat)) {
@@ -510,6 +218,16 @@ bool IsPixelFormatASTC(PixelFormat format) {
     case PixelFormat::ASTC_2D_8X5_SRGB:
     case PixelFormat::ASTC_2D_10X8:
     case PixelFormat::ASTC_2D_10X8_SRGB:
+    case PixelFormat::ASTC_2D_6X6:
+    case PixelFormat::ASTC_2D_6X6_SRGB:
+    case PixelFormat::ASTC_2D_10X10:
+    case PixelFormat::ASTC_2D_10X10_SRGB:
+    case PixelFormat::ASTC_2D_12X12:
+    case PixelFormat::ASTC_2D_12X12_SRGB:
+    case PixelFormat::ASTC_2D_8X6:
+    case PixelFormat::ASTC_2D_8X6_SRGB:
+    case PixelFormat::ASTC_2D_6X5:
+    case PixelFormat::ASTC_2D_6X5_SRGB:
         return true;
     default:
         return false;
@@ -530,6 +248,11 @@ bool IsPixelFormatSRGB(PixelFormat format) {
     case PixelFormat::ASTC_2D_5X4_SRGB:
     case PixelFormat::ASTC_2D_5X5_SRGB:
     case PixelFormat::ASTC_2D_10X8_SRGB:
+    case PixelFormat::ASTC_2D_6X6_SRGB:
+    case PixelFormat::ASTC_2D_10X10_SRGB:
+    case PixelFormat::ASTC_2D_12X12_SRGB:
+    case PixelFormat::ASTC_2D_8X6_SRGB:
+    case PixelFormat::ASTC_2D_6X5_SRGB:
         return true;
     default:
         return false;
diff --git a/src/video_core/surface.h b/src/video_core/surface.h
index 1e1c432a5..0d17a93ed 100644
--- a/src/video_core/surface.h
+++ b/src/video_core/surface.h
@@ -67,45 +67,47 @@ enum class PixelFormat {
     DXT23_SRGB = 49,
     DXT45_SRGB = 50,
     BC7U_SRGB = 51,
-    ASTC_2D_4X4_SRGB = 52,
-    ASTC_2D_8X8_SRGB = 53,
-    ASTC_2D_8X5_SRGB = 54,
-    ASTC_2D_5X4_SRGB = 55,
-    ASTC_2D_5X5 = 56,
-    ASTC_2D_5X5_SRGB = 57,
-    ASTC_2D_10X8 = 58,
-    ASTC_2D_10X8_SRGB = 59,
+    R4G4B4A4U = 52,
+    ASTC_2D_4X4_SRGB = 53,
+    ASTC_2D_8X8_SRGB = 54,
+    ASTC_2D_8X5_SRGB = 55,
+    ASTC_2D_5X4_SRGB = 56,
+    ASTC_2D_5X5 = 57,
+    ASTC_2D_5X5_SRGB = 58,
+    ASTC_2D_10X8 = 59,
+    ASTC_2D_10X8_SRGB = 60,
+    ASTC_2D_6X6 = 61,
+    ASTC_2D_6X6_SRGB = 62,
+    ASTC_2D_10X10 = 63,
+    ASTC_2D_10X10_SRGB = 64,
+    ASTC_2D_12X12 = 65,
+    ASTC_2D_12X12_SRGB = 66,
+    ASTC_2D_8X6 = 67,
+    ASTC_2D_8X6_SRGB = 68,
+    ASTC_2D_6X5 = 69,
+    ASTC_2D_6X5_SRGB = 70,
+    E5B9G9R9F = 71,
 
     MaxColorFormat,
 
     // Depth formats
-    Z32F = 60,
-    Z16 = 61,
+    Z32F = 72,
+    Z16 = 73,
 
     MaxDepthFormat,
 
     // DepthStencil formats
-    Z24S8 = 62,
-    S8Z24 = 63,
-    Z32FS8 = 64,
+    Z24S8 = 74,
+    S8Z24 = 75,
+    Z32FS8 = 76,
 
     MaxDepthStencilFormat,
 
     Max = MaxDepthStencilFormat,
     Invalid = 255,
 };
-
 static constexpr std::size_t MaxPixelFormat = static_cast<std::size_t>(PixelFormat::Max);
 
-enum class ComponentType {
-    Invalid = 0,
-    SNorm = 1,
-    UNorm = 2,
-    SInt = 3,
-    UInt = 4,
-    Float = 5,
-};
-
 enum class SurfaceType {
     ColorTexture = 0,
     Depth = 1,
@@ -177,6 +179,7 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
     2, // DXT23_SRGB
     2, // DXT45_SRGB
     2, // BC7U_SRGB
+    0, // R4G4B4A4U
     2, // ASTC_2D_4X4_SRGB
     2, // ASTC_2D_8X8_SRGB
     2, // ASTC_2D_8X5_SRGB
@@ -185,6 +188,17 @@ constexpr std::array<u32, MaxPixelFormat> compression_factor_shift_table = {{
     2, // ASTC_2D_5X5_SRGB
     2, // ASTC_2D_10X8
     2, // ASTC_2D_10X8_SRGB
+    2, // ASTC_2D_6X6
+    2, // ASTC_2D_6X6_SRGB
+    2, // ASTC_2D_10X10
+    2, // ASTC_2D_10X10_SRGB
+    2, // ASTC_2D_12X12
+    2, // ASTC_2D_12X12_SRGB
+    2, // ASTC_2D_8X6
+    2, // ASTC_2D_8X6_SRGB
+    2, // ASTC_2D_6X5
+    2, // ASTC_2D_6X5_SRGB
+    0, // E5B9G9R9F
     0, // Z32F
     0, // Z16
     0, // Z24S8
@@ -261,6 +275,7 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
     4,  // DXT23_SRGB
     4,  // DXT45_SRGB
     4,  // BC7U_SRGB
+    1,  // R4G4B4A4U
     4,  // ASTC_2D_4X4_SRGB
     8,  // ASTC_2D_8X8_SRGB
     8,  // ASTC_2D_8X5_SRGB
@@ -269,6 +284,17 @@ constexpr std::array<u32, MaxPixelFormat> block_width_table = {{
     5,  // ASTC_2D_5X5_SRGB
     10, // ASTC_2D_10X8
     10, // ASTC_2D_10X8_SRGB
+    6,  // ASTC_2D_6X6
+    6,  // ASTC_2D_6X6_SRGB
+    10, // ASTC_2D_10X10
+    10, // ASTC_2D_10X10_SRGB
+    12, // ASTC_2D_12X12
+    12, // ASTC_2D_12X12_SRGB
+    8,  // ASTC_2D_8X6
+    8,  // ASTC_2D_8X6_SRGB
+    6,  // ASTC_2D_6X5
+    6,  // ASTC_2D_6X5_SRGB
+    1,  // E5B9G9R9F
     1,  // Z32F
     1,  // Z16
     1,  // Z24S8
@@ -285,71 +311,83 @@ static constexpr u32 GetDefaultBlockWidth(PixelFormat format) {
 }
 
 constexpr std::array<u32, MaxPixelFormat> block_height_table = {{
-    1, // ABGR8U
-    1, // ABGR8S
-    1, // ABGR8UI
-    1, // B5G6R5U
-    1, // A2B10G10R10U
-    1, // A1B5G5R5U
-    1, // R8U
-    1, // R8UI
-    1, // RGBA16F
-    1, // RGBA16U
-    1, // RGBA16UI
-    1, // R11FG11FB10F
-    1, // RGBA32UI
-    4, // DXT1
-    4, // DXT23
-    4, // DXT45
-    4, // DXN1
-    4, // DXN2UNORM
-    4, // DXN2SNORM
-    4, // BC7U
-    4, // BC6H_UF16
-    4, // BC6H_SF16
-    4, // ASTC_2D_4X4
-    1, // BGRA8
-    1, // RGBA32F
-    1, // RG32F
-    1, // R32F
-    1, // R16F
-    1, // R16U
-    1, // R16S
-    1, // R16UI
-    1, // R16I
-    1, // RG16
-    1, // RG16F
-    1, // RG16UI
-    1, // RG16I
-    1, // RG16S
-    1, // RGB32F
-    1, // RGBA8_SRGB
-    1, // RG8U
-    1, // RG8S
-    1, // RG32UI
-    1, // RGBX16F
-    1, // R32UI
-    8, // ASTC_2D_8X8
-    5, // ASTC_2D_8X5
-    4, // ASTC_2D_5X4
-    1, // BGRA8_SRGB
-    4, // DXT1_SRGB
-    4, // DXT23_SRGB
-    4, // DXT45_SRGB
-    4, // BC7U_SRGB
-    4, // ASTC_2D_4X4_SRGB
-    8, // ASTC_2D_8X8_SRGB
-    5, // ASTC_2D_8X5_SRGB
-    4, // ASTC_2D_5X4_SRGB
-    5, // ASTC_2D_5X5
-    5, // ASTC_2D_5X5_SRGB
-    8, // ASTC_2D_10X8
-    8, // ASTC_2D_10X8_SRGB
-    1, // Z32F
-    1, // Z16
-    1, // Z24S8
-    1, // S8Z24
-    1, // Z32FS8
+    1,  // ABGR8U
+    1,  // ABGR8S
+    1,  // ABGR8UI
+    1,  // B5G6R5U
+    1,  // A2B10G10R10U
+    1,  // A1B5G5R5U
+    1,  // R8U
+    1,  // R8UI
+    1,  // RGBA16F
+    1,  // RGBA16U
+    1,  // RGBA16UI
+    1,  // R11FG11FB10F
+    1,  // RGBA32UI
+    4,  // DXT1
+    4,  // DXT23
+    4,  // DXT45
+    4,  // DXN1
+    4,  // DXN2UNORM
+    4,  // DXN2SNORM
+    4,  // BC7U
+    4,  // BC6H_UF16
+    4,  // BC6H_SF16
+    4,  // ASTC_2D_4X4
+    1,  // BGRA8
+    1,  // RGBA32F
+    1,  // RG32F
+    1,  // R32F
+    1,  // R16F
+    1,  // R16U
+    1,  // R16S
+    1,  // R16UI
+    1,  // R16I
+    1,  // RG16
+    1,  // RG16F
+    1,  // RG16UI
+    1,  // RG16I
+    1,  // RG16S
+    1,  // RGB32F
+    1,  // RGBA8_SRGB
+    1,  // RG8U
+    1,  // RG8S
+    1,  // RG32UI
+    1,  // RGBX16F
+    1,  // R32UI
+    8,  // ASTC_2D_8X8
+    5,  // ASTC_2D_8X5
+    4,  // ASTC_2D_5X4
+    1,  // BGRA8_SRGB
+    4,  // DXT1_SRGB
+    4,  // DXT23_SRGB
+    4,  // DXT45_SRGB
+    4,  // BC7U_SRGB
+    1,  // R4G4B4A4U
+    4,  // ASTC_2D_4X4_SRGB
+    8,  // ASTC_2D_8X8_SRGB
+    5,  // ASTC_2D_8X5_SRGB
+    4,  // ASTC_2D_5X4_SRGB
+    5,  // ASTC_2D_5X5
+    5,  // ASTC_2D_5X5_SRGB
+    8,  // ASTC_2D_10X8
+    8,  // ASTC_2D_10X8_SRGB
+    6,  // ASTC_2D_6X6
+    6,  // ASTC_2D_6X6_SRGB
+    10, // ASTC_2D_10X10
+    10, // ASTC_2D_10X10_SRGB
+    12, // ASTC_2D_12X12
+    12, // ASTC_2D_12X12_SRGB
+    6,  // ASTC_2D_8X6
+    6,  // ASTC_2D_8X6_SRGB
+    5,  // ASTC_2D_6X5
+    5,  // ASTC_2D_6X5_SRGB
+    1,  // E5B9G9R9F
+    1,  // Z32F
+    1,  // Z16
+    1,  // Z24S8
+    1,  // S8Z24
+    1,  // Z32FS8
 }};
 
 static constexpr u32 GetDefaultBlockHeight(PixelFormat format) {
@@ -413,6 +451,7 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
     128, // DXT23_SRGB
     128, // DXT45_SRGB
     128, // BC7U
+    16,  // R4G4B4A4U
     128, // ASTC_2D_4X4_SRGB
     128, // ASTC_2D_8X8_SRGB
     128, // ASTC_2D_8X5_SRGB
@@ -421,6 +460,17 @@ constexpr std::array<u32, MaxPixelFormat> bpp_table = {{
     128, // ASTC_2D_5X5_SRGB
     128, // ASTC_2D_10X8
     128, // ASTC_2D_10X8_SRGB
+    128, // ASTC_2D_6X6
+    128, // ASTC_2D_6X6_SRGB
+    128, // ASTC_2D_10X10
+    128, // ASTC_2D_10X10_SRGB
+    128, // ASTC_2D_12X12
+    128, // ASTC_2D_12X12_SRGB
+    128, // ASTC_2D_8X6
+    128, // ASTC_2D_8X6_SRGB
+    128, // ASTC_2D_6X5
+    128, // ASTC_2D_6X5_SRGB
+    32,  // E5B9G9R9F
     32,  // Z32F
     16,  // Z16
     32,  // Z24S8
@@ -504,6 +554,7 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
     SurfaceCompression::Compressed, // DXT23_SRGB
     SurfaceCompression::Compressed, // DXT45_SRGB
     SurfaceCompression::Compressed, // BC7U_SRGB
+    SurfaceCompression::None,       // R4G4B4A4U
     SurfaceCompression::Converted,  // ASTC_2D_4X4_SRGB
     SurfaceCompression::Converted,  // ASTC_2D_8X8_SRGB
     SurfaceCompression::Converted,  // ASTC_2D_8X5_SRGB
@@ -512,6 +563,17 @@ constexpr std::array<SurfaceCompression, MaxPixelFormat> compression_type_table
     SurfaceCompression::Converted,  // ASTC_2D_5X5_SRGB
     SurfaceCompression::Converted,  // ASTC_2D_10X8
     SurfaceCompression::Converted,  // ASTC_2D_10X8_SRGB
+    SurfaceCompression::Converted,  // ASTC_2D_6X6
+    SurfaceCompression::Converted,  // ASTC_2D_6X6_SRGB
+    SurfaceCompression::Converted,  // ASTC_2D_10X10
+    SurfaceCompression::Converted,  // ASTC_2D_10X10_SRGB
+    SurfaceCompression::Converted,  // ASTC_2D_12X12
+    SurfaceCompression::Converted,  // ASTC_2D_12X12_SRGB
+    SurfaceCompression::Converted,  // ASTC_2D_8X6
+    SurfaceCompression::Converted,  // ASTC_2D_8X6_SRGB
+    SurfaceCompression::Converted,  // ASTC_2D_6X5
+    SurfaceCompression::Converted,  // ASTC_2D_6X5_SRGB
+    SurfaceCompression::None,       // E5B9G9R9F
     SurfaceCompression::None,       // Z32F
     SurfaceCompression::None,       // Z16
     SurfaceCompression::None,       // Z24S8
@@ -537,18 +599,8 @@ PixelFormat PixelFormatFromDepthFormat(Tegra::DepthFormat format);
 
 PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format);
 
-PixelFormat PixelFormatFromTextureFormat(Tegra::Texture::TextureFormat format,
-                                         Tegra::Texture::ComponentType component_type,
-                                         bool is_srgb);
-
-ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type);
-
-ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format);
-
 PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format);
 
-ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format);
-
 SurfaceType GetFormatType(PixelFormat pixel_format);
 
 bool IsPixelFormatASTC(PixelFormat format);
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
new file mode 100644
index 000000000..271e67533
--- /dev/null
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -0,0 +1,208 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <array>
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "video_core/texture_cache/format_lookup_table.h"
+
+namespace VideoCommon {
+
+using Tegra::Texture::ComponentType;
+using Tegra::Texture::TextureFormat;
+using VideoCore::Surface::PixelFormat;
+
+namespace {
+
+constexpr auto SNORM = ComponentType::SNORM;
+constexpr auto UNORM = ComponentType::UNORM;
+constexpr auto SINT = ComponentType::SINT;
+constexpr auto UINT = ComponentType::UINT;
+constexpr auto SNORM_FORCE_FP16 = ComponentType::SNORM_FORCE_FP16;
+constexpr auto UNORM_FORCE_FP16 = ComponentType::UNORM_FORCE_FP16;
+constexpr auto FLOAT = ComponentType::FLOAT;
+constexpr bool C = false; // Normal color
+constexpr bool S = true;  // Srgb
+
+struct Table {
+    constexpr Table(TextureFormat texture_format, bool is_srgb, ComponentType red_component,
+                    ComponentType green_component, ComponentType blue_component,
+                    ComponentType alpha_component, PixelFormat pixel_format)
+        : texture_format{texture_format}, pixel_format{pixel_format}, red_component{red_component},
+          green_component{green_component}, blue_component{blue_component},
+          alpha_component{alpha_component}, is_srgb{is_srgb} {}
+
+    TextureFormat texture_format;
+    PixelFormat pixel_format;
+    ComponentType red_component;
+    ComponentType green_component;
+    ComponentType blue_component;
+    ComponentType alpha_component;
+    bool is_srgb;
+};
+constexpr std::array<Table, 74> DefinitionTable = {{
+    {TextureFormat::A8R8G8B8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ABGR8U},
+    {TextureFormat::A8R8G8B8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::ABGR8S},
+    {TextureFormat::A8R8G8B8, C, UINT, UINT, UINT, UINT, PixelFormat::ABGR8UI},
+    {TextureFormat::A8R8G8B8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA8_SRGB},
+
+    {TextureFormat::B5G6R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::B5G6R5U},
+
+    {TextureFormat::A2B10G10R10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A2B10G10R10U},
+
+    {TextureFormat::A1B5G5R5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::A1B5G5R5U},
+
+    {TextureFormat::A4B4G4R4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R4G4B4A4U},
+
+    {TextureFormat::R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R8U},
+    {TextureFormat::R8, C, UINT, UINT, UINT, UINT, PixelFormat::R8UI},
+
+    {TextureFormat::G8R8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG8U},
+    {TextureFormat::G8R8, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG8S},
+
+    {TextureFormat::R16_G16_B16_A16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RGBA16U},
+    {TextureFormat::R16_G16_B16_A16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA16F},
+    {TextureFormat::R16_G16_B16_A16, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA16UI},
+
+    {TextureFormat::R16_G16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG16F},
+    {TextureFormat::R16_G16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::RG16},
+    {TextureFormat::R16_G16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::RG16S},
+    {TextureFormat::R16_G16, C, UINT, UINT, UINT, UINT, PixelFormat::RG16UI},
+    {TextureFormat::R16_G16, C, SINT, SINT, SINT, SINT, PixelFormat::RG16I},
+
+    {TextureFormat::R16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R16F},
+    {TextureFormat::R16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::R16U},
+    {TextureFormat::R16, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::R16S},
+    {TextureFormat::R16, C, UINT, UINT, UINT, UINT, PixelFormat::R16UI},
+    {TextureFormat::R16, C, SINT, SINT, SINT, SINT, PixelFormat::R16I},
+
+    {TextureFormat::BF10GF11RF11, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R11FG11FB10F},
+
+    {TextureFormat::R32_G32_B32_A32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGBA32F},
+    {TextureFormat::R32_G32_B32_A32, C, UINT, UINT, UINT, UINT, PixelFormat::RGBA32UI},
+
+    {TextureFormat::R32_G32_B32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RGB32F},
+
+    {TextureFormat::R32_G32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::RG32F},
+    {TextureFormat::R32_G32, C, UINT, UINT, UINT, UINT, PixelFormat::RG32UI},
+
+    {TextureFormat::R32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::R32F},
+    {TextureFormat::R32, C, UINT, UINT, UINT, UINT, PixelFormat::R32UI},
+
+    {TextureFormat::E5B9G9R9_SHAREDEXP, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::E5B9G9R9F},
+
+    {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
+    {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
+    {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
+    {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8},
+
+    {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
+    {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
+
+    {TextureFormat::DXT23, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23},
+    {TextureFormat::DXT23, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT23_SRGB},
+
+    {TextureFormat::DXT45, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45},
+    {TextureFormat::DXT45, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT45_SRGB},
+
+    // TODO: Use a different pixel format for SNORM
+    {TextureFormat::DXN1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN1},
+    {TextureFormat::DXN1, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN1},
+
+    {TextureFormat::DXN2, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXN2UNORM},
+    {TextureFormat::DXN2, C, SNORM, SNORM, SNORM, SNORM, PixelFormat::DXN2SNORM},
+
+    {TextureFormat::BC7U, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U},
+    {TextureFormat::BC7U, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::BC7U_SRGB},
+
+    {TextureFormat::BC6H_SF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_SF16},
+    {TextureFormat::BC6H_UF16, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::BC6H_UF16},
+
+    {TextureFormat::ASTC_2D_4X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4},
+    {TextureFormat::ASTC_2D_4X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_4X4_SRGB},
+
+    {TextureFormat::ASTC_2D_5X4, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4},
+    {TextureFormat::ASTC_2D_5X4, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X4_SRGB},
+
+    {TextureFormat::ASTC_2D_5X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5},
+    {TextureFormat::ASTC_2D_5X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_5X5_SRGB},
+
+    {TextureFormat::ASTC_2D_8X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8},
+    {TextureFormat::ASTC_2D_8X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X8_SRGB},
+
+    {TextureFormat::ASTC_2D_8X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5},
+    {TextureFormat::ASTC_2D_8X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X5_SRGB},
+
+    {TextureFormat::ASTC_2D_10X8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8},
+    {TextureFormat::ASTC_2D_10X8, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X8_SRGB},
+
+    {TextureFormat::ASTC_2D_6X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6},
+    {TextureFormat::ASTC_2D_6X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X6_SRGB},
+
+    {TextureFormat::ASTC_2D_10X10, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10},
+    {TextureFormat::ASTC_2D_10X10, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_10X10_SRGB},
+
+    {TextureFormat::ASTC_2D_12X12, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12},
+    {TextureFormat::ASTC_2D_12X12, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_12X12_SRGB},
+
+    {TextureFormat::ASTC_2D_8X6, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6},
+    {TextureFormat::ASTC_2D_8X6, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_8X6_SRGB},
+
+    {TextureFormat::ASTC_2D_6X5, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5},
+    {TextureFormat::ASTC_2D_6X5, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::ASTC_2D_6X5_SRGB},
+}};
+
+} // Anonymous namespace
+
+FormatLookupTable::FormatLookupTable() {
+    table.fill(static_cast<u8>(PixelFormat::Invalid));
+
+    for (const auto& entry : DefinitionTable) {
+        table[CalculateIndex(entry.texture_format, entry.is_srgb != 0, entry.red_component,
+                             entry.green_component, entry.blue_component, entry.alpha_component)] =
+            static_cast<u8>(entry.pixel_format);
+    }
+}
+
+PixelFormat FormatLookupTable::GetPixelFormat(TextureFormat format, bool is_srgb,
+                                              ComponentType red_component,
+                                              ComponentType green_component,
+                                              ComponentType blue_component,
+                                              ComponentType alpha_component) const noexcept {
+    const auto pixel_format = static_cast<PixelFormat>(table[CalculateIndex(
+        format, is_srgb, red_component, green_component, blue_component, alpha_component)]);
+    // [[likely]]
+    if (pixel_format != PixelFormat::Invalid) {
+        return pixel_format;
+    }
+    UNIMPLEMENTED_MSG("texture format={} srgb={} components={{{} {} {} {}}}",
+                      static_cast<int>(format), is_srgb, static_cast<int>(red_component),
+                      static_cast<int>(green_component), static_cast<int>(blue_component),
+                      static_cast<int>(alpha_component));
+    return PixelFormat::ABGR8U;
+}
+
+void FormatLookupTable::Set(TextureFormat format, bool is_srgb, ComponentType red_component,
+                            ComponentType green_component, ComponentType blue_component,
+                            ComponentType alpha_component, PixelFormat pixel_format) {}
+
+std::size_t FormatLookupTable::CalculateIndex(TextureFormat format, bool is_srgb,
+                                              ComponentType red_component,
+                                              ComponentType green_component,
+                                              ComponentType blue_component,
+                                              ComponentType alpha_component) noexcept {
+    const auto format_index = static_cast<std::size_t>(format);
+    const auto red_index = static_cast<std::size_t>(red_component);
+    const auto green_index = static_cast<std::size_t>(red_component);
+    const auto blue_index = static_cast<std::size_t>(red_component);
+    const auto alpha_index = static_cast<std::size_t>(red_component);
+    const std::size_t srgb_index = is_srgb ? 1 : 0;
+
+    return format_index * PerFormat +
+           srgb_index * PerComponent * PerComponent * PerComponent * PerComponent +
+           alpha_index * PerComponent * PerComponent * PerComponent +
+           blue_index * PerComponent * PerComponent + green_index * PerComponent + red_index;
+}
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/format_lookup_table.h b/src/video_core/texture_cache/format_lookup_table.h
new file mode 100644
index 000000000..aa77e0a5a
--- /dev/null
+++ b/src/video_core/texture_cache/format_lookup_table.h
@@ -0,0 +1,51 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <limits>
+#include "video_core/surface.h"
+#include "video_core/textures/texture.h"
+
+namespace VideoCommon {
+
+class FormatLookupTable {
+public:
+    explicit FormatLookupTable();
+
+    VideoCore::Surface::PixelFormat GetPixelFormat(
+        Tegra::Texture::TextureFormat format, bool is_srgb,
+        Tegra::Texture::ComponentType red_component, Tegra::Texture::ComponentType green_component,
+        Tegra::Texture::ComponentType blue_component,
+        Tegra::Texture::ComponentType alpha_component) const noexcept;
+
+private:
+    static_assert(VideoCore::Surface::MaxPixelFormat <= std::numeric_limits<u8>::max());
+
+    static constexpr std::size_t NumTextureFormats = 128;
+
+    static constexpr std::size_t PerComponent = 8;
+    static constexpr std::size_t PerComponents2 = PerComponent * PerComponent;
+    static constexpr std::size_t PerComponents3 = PerComponents2 * PerComponent;
+    static constexpr std::size_t PerComponents4 = PerComponents3 * PerComponent;
+    static constexpr std::size_t PerFormat = PerComponents4 * 2;
+
+    static std::size_t CalculateIndex(Tegra::Texture::TextureFormat format, bool is_srgb,
+                                      Tegra::Texture::ComponentType red_component,
+                                      Tegra::Texture::ComponentType green_component,
+                                      Tegra::Texture::ComponentType blue_component,
+                                      Tegra::Texture::ComponentType alpha_component) noexcept;
+
+    void Set(Tegra::Texture::TextureFormat format, bool is_srgb,
+             Tegra::Texture::ComponentType red_component,
+             Tegra::Texture::ComponentType green_component,
+             Tegra::Texture::ComponentType blue_component,
+             Tegra::Texture::ComponentType alpha_component,
+             VideoCore::Surface::PixelFormat pixel_format);
+
+    std::array<u8, NumTextureFormats * PerFormat> table;
+};
+
+} // namespace VideoCommon
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 683c49207..829268b4c 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include "common/algorithm.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/microprofile.h"
diff --git a/src/video_core/texture_cache/surface_base.h b/src/video_core/texture_cache/surface_base.h
index 5e497e49f..1bed82898 100644
--- a/src/video_core/texture_cache/surface_base.h
+++ b/src/video_core/texture_cache/surface_base.h
@@ -4,12 +4,11 @@
 
 #pragma once
 
-#include <algorithm>
+#include <optional>
+#include <tuple>
 #include <unordered_map>
 #include <vector>
 
-#include "common/assert.h"
-#include "common/binary_find.h"
 #include "common/common_types.h"
 #include "video_core/gpu.h"
 #include "video_core/morton.h"
diff --git a/src/video_core/texture_cache/surface_params.cpp b/src/video_core/texture_cache/surface_params.cpp
index 1e4d3fb79..858e17e08 100644
--- a/src/video_core/texture_cache/surface_params.cpp
+++ b/src/video_core/texture_cache/surface_params.cpp
@@ -2,24 +2,23 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
-#include <map>
+#include <algorithm>
+#include <string>
+#include <tuple>
 
 #include "common/alignment.h"
 #include "common/bit_util.h"
 #include "core/core.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/surface.h"
+#include "video_core/texture_cache/format_lookup_table.h"
 #include "video_core/texture_cache/surface_params.h"
 
 namespace VideoCommon {
 
-using VideoCore::Surface::ComponentTypeFromDepthFormat;
-using VideoCore::Surface::ComponentTypeFromRenderTarget;
-using VideoCore::Surface::ComponentTypeFromTexture;
 using VideoCore::Surface::PixelFormat;
 using VideoCore::Surface::PixelFormatFromDepthFormat;
 using VideoCore::Surface::PixelFormatFromRenderTargetFormat;
-using VideoCore::Surface::PixelFormatFromTextureFormat;
 using VideoCore::Surface::SurfaceTarget;
 using VideoCore::Surface::SurfaceTargetFromTextureType;
 using VideoCore::Surface::SurfaceType;
@@ -69,7 +68,8 @@ constexpr u32 GetMipmapSize(bool uncompressed, u32 mip_size, u32 tile) {
 
 } // Anonymous namespace
 
-SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& tic,
+SurfaceParams SurfaceParams::CreateForTexture(const FormatLookupTable& lookup_table,
+                                              const Tegra::Texture::TICEntry& tic,
                                               const VideoCommon::Shader::Sampler& entry) {
     SurfaceParams params;
     params.is_tiled = tic.IsTiled();
@@ -78,8 +78,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti
     params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
     params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
     params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
-    params.pixel_format =
-        PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion);
+    params.pixel_format = lookup_table.GetPixelFormat(
+        tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
     params.type = GetFormatType(params.pixel_format);
     if (entry.IsShadow() && params.type == SurfaceType::ColorTexture) {
         switch (params.pixel_format) {
@@ -99,7 +99,6 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti
         }
         params.type = GetFormatType(params.pixel_format);
     }
-    params.component_type = ComponentTypeFromTexture(tic.r_type.Value());
     params.type = GetFormatType(params.pixel_format);
     // TODO: on 1DBuffer we should use the tic info.
     if (tic.IsBuffer()) {
@@ -128,7 +127,8 @@ SurfaceParams SurfaceParams::CreateForTexture(const Tegra::Texture::TICEntry& ti
     return params;
 }
 
-SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic,
+SurfaceParams SurfaceParams::CreateForImage(const FormatLookupTable& lookup_table,
+                                            const Tegra::Texture::TICEntry& tic,
                                             const VideoCommon::Shader::Image& entry) {
     SurfaceParams params;
     params.is_tiled = tic.IsTiled();
@@ -137,10 +137,9 @@ SurfaceParams SurfaceParams::CreateForImage(const Tegra::Texture::TICEntry& tic,
     params.block_height = params.is_tiled ? tic.BlockHeight() : 0,
     params.block_depth = params.is_tiled ? tic.BlockDepth() : 0,
     params.tile_width_spacing = params.is_tiled ? (1 << tic.tile_width_spacing.Value()) : 1;
-    params.pixel_format =
-        PixelFormatFromTextureFormat(tic.format, tic.r_type.Value(), params.srgb_conversion);
+    params.pixel_format = lookup_table.GetPixelFormat(
+        tic.format, params.srgb_conversion, tic.r_type, tic.g_type, tic.b_type, tic.a_type);
     params.type = GetFormatType(params.pixel_format);
-    params.component_type = ComponentTypeFromTexture(tic.r_type.Value());
     params.type = GetFormatType(params.pixel_format);
     params.target = ImageTypeToSurfaceTarget(entry.GetType());
     // TODO: on 1DBuffer we should use the tic info.
@@ -181,7 +180,6 @@ SurfaceParams SurfaceParams::CreateForDepthBuffer(
     params.block_depth = std::min(block_depth, 5U);
     params.tile_width_spacing = 1;
     params.pixel_format = PixelFormatFromDepthFormat(format);
-    params.component_type = ComponentTypeFromDepthFormat(format);
     params.type = GetFormatType(params.pixel_format);
     params.width = zeta_width;
     params.height = zeta_height;
@@ -206,7 +204,6 @@ SurfaceParams SurfaceParams::CreateForFramebuffer(Core::System& system, std::siz
     params.block_depth = config.memory_layout.block_depth;
     params.tile_width_spacing = 1;
     params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
-    params.component_type = ComponentTypeFromRenderTarget(config.format);
     params.type = GetFormatType(params.pixel_format);
     if (params.is_tiled) {
         params.pitch = 0;
@@ -236,7 +233,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
     params.block_depth = params.is_tiled ? std::min(config.BlockDepth(), 5U) : 0,
     params.tile_width_spacing = 1;
     params.pixel_format = PixelFormatFromRenderTargetFormat(config.format);
-    params.component_type = ComponentTypeFromRenderTarget(config.format);
     params.type = GetFormatType(params.pixel_format);
     params.width = config.width;
     params.height = config.height;
@@ -355,10 +351,10 @@ std::size_t SurfaceParams::GetInnerMipmapMemorySize(u32 level, bool as_host_size
 
 bool SurfaceParams::operator==(const SurfaceParams& rhs) const {
     return std::tie(is_tiled, block_width, block_height, block_depth, tile_width_spacing, width,
-                    height, depth, pitch, num_levels, pixel_format, component_type, type, target) ==
+                    height, depth, pitch, num_levels, pixel_format, type, target) ==
            std::tie(rhs.is_tiled, rhs.block_width, rhs.block_height, rhs.block_depth,
                     rhs.tile_width_spacing, rhs.width, rhs.height, rhs.depth, rhs.pitch,
-                    rhs.num_levels, rhs.pixel_format, rhs.component_type, rhs.type, rhs.target);
+                    rhs.num_levels, rhs.pixel_format, rhs.type, rhs.target);
 }
 
 std::string SurfaceParams::TargetName() const {
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index c58e7f8a4..709aa0dc2 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -16,16 +16,20 @@
 
 namespace VideoCommon {
 
+class FormatLookupTable;
+
 using VideoCore::Surface::SurfaceCompression;
 
 class SurfaceParams {
 public:
     /// Creates SurfaceCachedParams from a texture configuration.
-    static SurfaceParams CreateForTexture(const Tegra::Texture::TICEntry& tic,
+    static SurfaceParams CreateForTexture(const FormatLookupTable& lookup_table,
+                                          const Tegra::Texture::TICEntry& tic,
                                           const VideoCommon::Shader::Sampler& entry);
 
     /// Creates SurfaceCachedParams from an image configuration.
-    static SurfaceParams CreateForImage(const Tegra::Texture::TICEntry& tic,
+    static SurfaceParams CreateForImage(const FormatLookupTable& lookup_table,
+                                        const Tegra::Texture::TICEntry& tic,
                                         const VideoCommon::Shader::Image& entry);
 
     /// Creates SurfaceCachedParams for a depth buffer configuration.
@@ -248,7 +252,6 @@ public:
     u32 num_levels;
     u32 emulated_levels;
     VideoCore::Surface::PixelFormat pixel_format;
-    VideoCore::Surface::ComponentType component_type;
     VideoCore::Surface::SurfaceType type;
     VideoCore::Surface::SurfaceTarget target;
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 877c6635d..41309ebea 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -29,6 +29,7 @@
 #include "video_core/rasterizer_interface.h"
 #include "video_core/surface.h"
 #include "video_core/texture_cache/copy_params.h"
+#include "video_core/texture_cache/format_lookup_table.h"
 #include "video_core/texture_cache/surface_base.h"
 #include "video_core/texture_cache/surface_params.h"
 #include "video_core/texture_cache/surface_view.h"
@@ -62,10 +63,10 @@ public:
         }
     }
 
-    /***
-     * `Guard` guarantees that rendertargets don't unregister themselves if the
+    /**
+     * Guarantees that rendertargets don't unregister themselves if the
      * collide. Protection is currently only done on 3D slices.
-     ***/
+     */
     void GuardRenderTargets(bool new_guard) {
         guard_render_targets = new_guard;
     }
@@ -96,7 +97,7 @@ public:
         if (!gpu_addr) {
             return {};
         }
-        const auto params{SurfaceParams::CreateForTexture(tic, entry)};
+        const auto params{SurfaceParams::CreateForTexture(format_lookup_table, tic, entry)};
         const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
         if (guard_samplers) {
             sampled_textures.push_back(surface);
@@ -111,7 +112,7 @@ public:
         if (!gpu_addr) {
             return {};
         }
-        const auto params{SurfaceParams::CreateForImage(tic, entry)};
+        const auto params{SurfaceParams::CreateForImage(format_lookup_table, tic, entry)};
         const auto [surface, view] = GetSurface(gpu_addr, params, true, false);
         if (guard_samplers) {
             sampled_textures.push_back(surface);
@@ -224,8 +225,13 @@ public:
                      const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
                      const Tegra::Engines::Fermi2D::Config& copy_config) {
         std::lock_guard lock{mutex};
-        std::pair<TSurface, TView> dst_surface = GetFermiSurface(dst_config);
-        std::pair<TSurface, TView> src_surface = GetFermiSurface(src_config);
+        SurfaceParams src_params = SurfaceParams::CreateForFermiCopySurface(src_config);
+        SurfaceParams dst_params = SurfaceParams::CreateForFermiCopySurface(dst_config);
+        const GPUVAddr src_gpu_addr = src_config.Address();
+        const GPUVAddr dst_gpu_addr = dst_config.Address();
+        DeduceBestBlit(src_params, dst_params, src_gpu_addr, dst_gpu_addr);
+        std::pair<TSurface, TView> dst_surface = GetSurface(dst_gpu_addr, dst_params, true, false);
+        std::pair<TSurface, TView> src_surface = GetSurface(src_gpu_addr, src_params, true, false);
         ImageBlit(src_surface.second, dst_surface.second, copy_config);
         dst_surface.first->MarkAsModified(true, Tick());
     }
@@ -282,7 +288,7 @@ protected:
                            const Tegra::Engines::Fermi2D::Config& copy_config) = 0;
 
     // Depending on the backend, a buffer copy can be slow as it means deoptimizing the texture
-    // and reading it from a sepparate buffer.
+    // and reading it from a separate buffer.
     virtual void BufferCopy(TSurface& src_surface, TSurface& dst_surface) = 0;
 
     void ManageRenderTargetUnregister(TSurface& surface) {
@@ -357,13 +363,37 @@ private:
         BufferCopy = 3,
     };
 
+    enum class DeductionType : u32 {
+        DeductionComplete,
+        DeductionIncomplete,
+        DeductionFailed,
+    };
+
+    struct Deduction {
+        DeductionType type{DeductionType::DeductionFailed};
+        TSurface surface{};
+
+        bool Failed() const {
+            return type == DeductionType::DeductionFailed;
+        }
+
+        bool Incomplete() const {
+            return type == DeductionType::DeductionIncomplete;
+        }
+
+        bool IsDepth() const {
+            return surface->GetSurfaceParams().IsPixelFormatZeta();
+        }
+    };
+
     /**
-     * `PickStrategy` takes care of selecting a proper strategy to deal with a texture recycle.
-     * @param overlaps, the overlapping surfaces registered in the cache.
-     * @param params, the paremeters on the new surface.
-     * @param gpu_addr, the starting address of the new surface.
-     * @param untopological, tells the recycler that the texture has no way to match the overlaps
-     * due to topological reasons.
+     * Takes care of selecting a proper strategy to deal with a texture recycle.
+     *
+     * @param overlaps      The overlapping surfaces registered in the cache.
+     * @param params        The parameters on the new surface.
+     * @param gpu_addr      The starting address of the new surface.
+     * @param untopological Indicates to the recycler that the texture has no way
+     *                      to match the overlaps due to topological reasons.
      **/
     RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
                                  const GPUVAddr gpu_addr, const MatchTopologyResult untopological) {
@@ -374,7 +404,7 @@ private:
         if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
             return RecycleStrategy::Flush;
         }
-        for (auto s : overlaps) {
+        for (const auto& s : overlaps) {
             const auto& s_params = s->GetSurfaceParams();
             if (s_params.block_depth > 1 || s_params.target == SurfaceTarget::Texture3D) {
                 return RecycleStrategy::Flush;
@@ -391,16 +421,19 @@ private:
     }
 
     /**
-     *  `RecycleSurface` es a method we use to decide what to do with textures we can't resolve in
-     *the cache It has 2 implemented strategies: Ignore and Flush. Ignore just unregisters all the
-     *overlaps and loads the new texture. Flush, flushes all the overlaps into memory and loads the
-     *new surface from that data.
-     * @param overlaps, the overlapping surfaces registered in the cache.
-     * @param params, the paremeters on the new surface.
-     * @param gpu_addr, the starting address of the new surface.
-     * @param preserve_contents, tells if the new surface should be loaded from meory or left blank
-     * @param untopological, tells the recycler that the texture has no way to match the overlaps
-     * due to topological reasons.
+     * Used to decide what to do with textures we can't resolve in the cache It has 2 implemented
+     * strategies: Ignore and Flush.
+     *
+     * - Ignore: Just unregisters all the overlaps and loads the new texture.
+     * - Flush: Flushes all the overlaps into memory and loads the new surface from that data.
+     *
+     * @param overlaps          The overlapping surfaces registered in the cache.
+     * @param params            The parameters for the new surface.
+     * @param gpu_addr          The starting address of the new surface.
+     * @param preserve_contents Indicates that the new surface should be loaded from memory or left
+     *                          blank.
+     * @param untopological     Indicates to the recycler that the texture has no way to match the
+     *                          overlaps due to topological reasons.
      **/
     std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
                                               const SurfaceParams& params, const GPUVAddr gpu_addr,
@@ -437,10 +470,12 @@ private:
     }
 
     /**
-     * `RebuildSurface` this method takes a single surface and recreates into another that
-     * may differ in format, target or width alingment.
-     * @param current_surface, the registered surface in the cache which we want to convert.
-     * @param params, the new surface params which we'll use to recreate the surface.
+     * Takes a single surface and recreates into another that may differ in
+     * format, target or width alignment.
+     *
+     * @param current_surface The registered surface in the cache which we want to convert.
+     * @param params          The new surface params which we'll use to recreate the surface.
+     * @param is_render       Whether or not the surface is a render target.
      **/
     std::pair<TSurface, TView> RebuildSurface(TSurface current_surface, const SurfaceParams& params,
                                               bool is_render) {
@@ -451,15 +486,13 @@ private:
             GetSiblingFormat(cr_params.pixel_format) == params.pixel_format) {
             SurfaceParams new_params = params;
             new_params.pixel_format = cr_params.pixel_format;
-            new_params.component_type = cr_params.component_type;
             new_params.type = cr_params.type;
             new_surface = GetUncachedSurface(gpu_addr, new_params);
         } else {
             new_surface = GetUncachedSurface(gpu_addr, params);
         }
         const auto& final_params = new_surface->GetSurfaceParams();
-        if (cr_params.type != final_params.type ||
-            (cr_params.component_type != final_params.component_type)) {
+        if (cr_params.type != final_params.type) {
             BufferCopy(current_surface, new_surface);
         } else {
             std::vector<CopyParams> bricks = current_surface->BreakDown(final_params);
@@ -474,12 +507,14 @@ private:
     }
 
     /**
-     * `ManageStructuralMatch` this method takes a single surface and checks with the new surface's
-     * params if it's an exact match, we return the main view of the registered surface. If it's
-     * formats don't match, we rebuild the surface. We call this last method a `Mirage`. If formats
+     * Takes a single surface and checks with the new surface's params if it's an exact
+     * match, we return the main view of the registered surface. If its formats don't
+     * match, we rebuild the surface. We call this last method a `Mirage`. If formats
      * match but the targets don't, we create an overview View of the registered surface.
-     * @param current_surface, the registered surface in the cache which we want to convert.
-     * @param params, the new surface params which we want to check.
+     *
+     * @param current_surface The registered surface in the cache which we want to convert.
+     * @param params          The new surface params which we want to check.
+     * @param is_render       Whether or not the surface is a render target.
      **/
     std::pair<TSurface, TView> ManageStructuralMatch(TSurface current_surface,
                                                      const SurfaceParams& params, bool is_render) {
@@ -501,13 +536,14 @@ private:
     }
 
     /**
-     * `TryReconstructSurface` unlike `RebuildSurface` where we know the registered surface
-     * matches the candidate in some way, we got no guarantess here. We try to see if the overlaps
-     * are sublayers/mipmaps of the new surface, if they all match we end up recreating a surface
-     * for them, else we return nothing.
-     * @param overlaps, the overlapping surfaces registered in the cache.
-     * @param params, the paremeters on the new surface.
-     * @param gpu_addr, the starting address of the new surface.
+     * Unlike RebuildSurface where we know whether or not registered surfaces match the candidate
+     * in some way, we have no guarantees here. We try to see if the overlaps are sublayers/mipmaps
+     * of the new surface, if they all match we end up recreating a surface for them,
+     * else we return nothing.
+     *
+     * @param overlaps The overlapping surfaces registered in the cache.
+     * @param params   The parameters on the new surface.
+     * @param gpu_addr The starting address of the new surface.
      **/
     std::optional<std::pair<TSurface, TView>> TryReconstructSurface(std::vector<TSurface>& overlaps,
                                                                     const SurfaceParams& params,
@@ -547,7 +583,7 @@ private:
         } else if (Settings::values.use_accurate_gpu_emulation && passed_tests != overlaps.size()) {
             return {};
         }
-        for (auto surface : overlaps) {
+        for (const auto& surface : overlaps) {
             Unregister(surface);
         }
         new_surface->MarkAsModified(modified, Tick());
@@ -556,19 +592,27 @@ private:
     }
 
     /**
-     * `GetSurface` gets the starting address and parameters of a candidate surface and tries
-     * to find a matching surface within the cache. This is done in 3 big steps. The first is to
-     * check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
-     * Step 2 is checking if there are any overlaps at all, if none, we just load the texture from
-     * memory else we move to step 3. Step 3 consists on figuring the relationship between the
-     * candidate texture and the overlaps. We divide the scenarios depending if there's 1 or many
-     * overlaps. If there's many, we just try to reconstruct a new surface out of them based on the
-     * candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we have to
-     * check if the candidate is a view (layer/mipmap) of the overlap or if the registered surface
-     * is a mipmap/layer of the candidate. In this last case we reconstruct a new surface.
-     * @param gpu_addr, the starting address of the candidate surface.
-     * @param params, the paremeters on the candidate surface.
-     * @param preserve_contents, tells if the new surface should be loaded from meory or left blank.
+     * Gets the starting address and parameters of a candidate surface and tries
+     * to find a matching surface within the cache. This is done in 3 big steps:
+     *
+     * 1. Check the 1st Level Cache in order to find an exact match, if we fail, we move to step 2.
+     *
+     * 2. Check if there are any overlaps at all, if there are none, we just load the texture from
+     *    memory else we move to step 3.
+     *
+     * 3. Consists of figuring out the relationship between the candidate texture and the
+     *    overlaps. We divide the scenarios depending if there's 1 or many overlaps. If
+     *    there's many, we just try to reconstruct a new surface out of them based on the
+     *    candidate's parameters, if we fail, we recycle. When there's only 1 overlap then we
+     *    have to check if the candidate is a view (layer/mipmap) of the overlap or if the
+     *    registered surface is a mipmap/layer of the candidate. In this last case we reconstruct
+     *    a new surface.
+     *
+     * @param gpu_addr          The starting address of the candidate surface.
+     * @param params            The parameters on the candidate surface.
+     * @param preserve_contents Indicates that the new surface should be loaded from memory or
+     *                          left blank.
+     * @param is_render         Whether or not the surface is a render target.
      **/
     std::pair<TSurface, TView> GetSurface(const GPUVAddr gpu_addr, const SurfaceParams& params,
                                           bool preserve_contents, bool is_render) {
@@ -623,7 +667,7 @@ private:
         // Step 3
         // Now we need to figure the relationship between the texture and its overlaps
         // we do a topological test to ensure we can find some relationship. If it fails
-        // inmediatly recycle the texture
+        // immediately recycle the texture
         for (const auto& surface : overlaps) {
             const auto topological_result = surface->MatchesTopology(params);
             if (topological_result != MatchTopologyResult::FullMatch) {
@@ -691,6 +735,123 @@ private:
                               MatchTopologyResult::FullMatch);
     }
 
+    /**
+     * Gets the starting address and parameters of a candidate surface and tries to find a
+     * matching surface within the cache that's similar to it. If there are many textures
+     * or the texture found if entirely incompatible, it will fail. If no texture is found, the
+     * blit will be unsuccessful.
+     *
+     * @param gpu_addr The starting address of the candidate surface.
+     * @param params   The parameters on the candidate surface.
+     **/
+    Deduction DeduceSurface(const GPUVAddr gpu_addr, const SurfaceParams& params) {
+        const auto host_ptr{system.GPU().MemoryManager().GetPointer(gpu_addr)};
+        const auto cache_addr{ToCacheAddr(host_ptr)};
+
+        if (!cache_addr) {
+            Deduction result{};
+            result.type = DeductionType::DeductionFailed;
+            return result;
+        }
+
+        if (const auto iter = l1_cache.find(cache_addr); iter != l1_cache.end()) {
+            TSurface& current_surface = iter->second;
+            const auto topological_result = current_surface->MatchesTopology(params);
+            if (topological_result != MatchTopologyResult::FullMatch) {
+                Deduction result{};
+                result.type = DeductionType::DeductionFailed;
+                return result;
+            }
+            const auto struct_result = current_surface->MatchesStructure(params);
+            if (struct_result != MatchStructureResult::None &&
+                current_surface->MatchTarget(params.target)) {
+                Deduction result{};
+                result.type = DeductionType::DeductionComplete;
+                result.surface = current_surface;
+                return result;
+            }
+        }
+
+        const std::size_t candidate_size = params.GetGuestSizeInBytes();
+        auto overlaps{GetSurfacesInRegion(cache_addr, candidate_size)};
+
+        if (overlaps.empty()) {
+            Deduction result{};
+            result.type = DeductionType::DeductionIncomplete;
+            return result;
+        }
+
+        if (overlaps.size() > 1) {
+            Deduction result{};
+            result.type = DeductionType::DeductionFailed;
+            return result;
+        } else {
+            Deduction result{};
+            result.type = DeductionType::DeductionComplete;
+            result.surface = overlaps[0];
+            return result;
+        }
+    }
+
+    /**
+     * Gets the a source and destination starting address and parameters,
+     * and tries to deduce if they are supposed to be depth textures. If so, their
+     * parameters are modified and fixed into so.
+     *
+     * @param src_params   The parameters of the candidate surface.
+     * @param dst_params   The parameters of the destination surface.
+     * @param src_gpu_addr The starting address of the candidate surface.
+     * @param dst_gpu_addr The starting address of the destination surface.
+     **/
+    void DeduceBestBlit(SurfaceParams& src_params, SurfaceParams& dst_params,
+                        const GPUVAddr src_gpu_addr, const GPUVAddr dst_gpu_addr) {
+        auto deduced_src = DeduceSurface(src_gpu_addr, src_params);
+        auto deduced_dst = DeduceSurface(src_gpu_addr, src_params);
+        if (deduced_src.Failed() || deduced_dst.Failed()) {
+            return;
+        }
+
+        const bool incomplete_src = deduced_src.Incomplete();
+        const bool incomplete_dst = deduced_dst.Incomplete();
+
+        if (incomplete_src && incomplete_dst) {
+            return;
+        }
+
+        const bool any_incomplete = incomplete_src || incomplete_dst;
+
+        if (!any_incomplete) {
+            if (!(deduced_src.IsDepth() && deduced_dst.IsDepth())) {
+                return;
+            }
+        } else {
+            if (incomplete_src && !(deduced_dst.IsDepth())) {
+                return;
+            }
+
+            if (incomplete_dst && !(deduced_src.IsDepth())) {
+                return;
+            }
+        }
+
+        const auto inherit_format = [](SurfaceParams& to, TSurface from) {
+            const SurfaceParams& params = from->GetSurfaceParams();
+            to.pixel_format = params.pixel_format;
+            to.type = params.type;
+        };
+        // Now we got the cases where one or both is Depth and the other is not known
+        if (!incomplete_src) {
+            inherit_format(src_params, deduced_src.surface);
+        } else {
+            inherit_format(src_params, deduced_dst.surface);
+        }
+        if (!incomplete_dst) {
+            inherit_format(dst_params, deduced_dst.surface);
+        } else {
+            inherit_format(dst_params, deduced_src.surface);
+        }
+    }
+
     std::pair<TSurface, TView> InitializeSurface(GPUVAddr gpu_addr, const SurfaceParams& params,
                                                  bool preserve_contents) {
         auto new_surface{GetUncachedSurface(gpu_addr, params)};
@@ -793,6 +954,8 @@ private:
 
     VideoCore::RasterizerInterface& rasterizer;
 
+    FormatLookupTable format_lookup_table;
+
     u64 ticks{};
 
     // Guards the cache for protection conflicts.
diff --git a/src/video_core/textures/astc.cpp b/src/video_core/textures/astc.cpp
index a9b8f69af..33bd31865 100644
--- a/src/video_core/textures/astc.cpp
+++ b/src/video_core/textures/astc.cpp
@@ -92,11 +92,11 @@ private:
         const unsigned int mask = 1 << m_NextBit++;
 
         // clear the bit
-        *m_CurByte &= ~mask;
+        *m_CurByte &= static_cast<unsigned char>(~mask);
 
         // Write the bit, if necessary
         if (b)
-            *m_CurByte |= mask;
+            *m_CurByte |= static_cast<unsigned char>(mask);
 
         // Next byte?
         if (m_NextBit >= 8) {
@@ -137,7 +137,7 @@ public:
         }
 
         uint64_t mask = (1 << (end - start + 1)) - 1;
-        return (m_Bits >> start) & mask;
+        return (m_Bits >> start) & static_cast<IntType>(mask);
     }
 
 private:
@@ -422,7 +422,7 @@ static TexelWeightParams DecodeBlockInfo(InputBitStream& strm) {
     TexelWeightParams params;
 
     // Read the entire block mode all at once
-    uint16_t modeBits = strm.ReadBits(11);
+    uint16_t modeBits = static_cast<uint16_t>(strm.ReadBits(11));
 
     // Does this match the void extent block mode?
     if ((modeBits & 0x01FF) == 0x1FC) {
@@ -625,10 +625,10 @@ static void FillVoidExtentLDR(InputBitStream& strm, uint32_t* const outBuf, uint
     }
 
     // Decode the RGBA components and renormalize them to the range [0, 255]
-    uint16_t r = strm.ReadBits(16);
-    uint16_t g = strm.ReadBits(16);
-    uint16_t b = strm.ReadBits(16);
-    uint16_t a = strm.ReadBits(16);
+    uint16_t r = static_cast<uint16_t>(strm.ReadBits(16));
+    uint16_t g = static_cast<uint16_t>(strm.ReadBits(16));
+    uint16_t b = static_cast<uint16_t>(strm.ReadBits(16));
+    uint16_t a = static_cast<uint16_t>(strm.ReadBits(16));
 
     uint32_t rgba = (r >> 8) | (g & 0xFF00) | (static_cast<uint32_t>(b) & 0xFF00) << 8 |
                     (static_cast<uint32_t>(a) & 0xFF00) << 16;
@@ -656,7 +656,7 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
         return 0;
     if (toBit == 0)
         return 0;
-    IntType v = val & ((1 << numBits) - 1);
+    IntType v = val & static_cast<IntType>((1 << numBits) - 1);
     IntType res = v;
     uint32_t reslen = numBits;
     while (reslen < toBit) {
@@ -666,8 +666,8 @@ static IntType Replicate(const IntType& val, uint32_t numBits, uint32_t toBit) {
             comp = numBits - newshift;
             numBits = newshift;
         }
-        res <<= numBits;
-        res |= v >> comp;
+        res = static_cast<IntType>(res << numBits);
+        res = static_cast<IntType>(res | (v >> comp));
         reslen += numBits;
     }
     return res;
@@ -681,9 +681,10 @@ protected:
 
 public:
     Pixel() = default;
-    Pixel(ChannelType a, ChannelType r, ChannelType g, ChannelType b, unsigned bitDepth = 8)
+    Pixel(uint32_t a, uint32_t r, uint32_t g, uint32_t b, unsigned bitDepth = 8)
         : m_BitDepth{uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth), uint8_t(bitDepth)},
-          color{a, r, g, b} {}
+          color{static_cast<ChannelType>(a), static_cast<ChannelType>(r),
+                static_cast<ChannelType>(g), static_cast<ChannelType>(b)} {}
 
     // Changes the depth of each pixel. This scales the values to
     // the appropriate bit depth by either truncating the least
@@ -713,7 +714,7 @@ public:
             // Do nothing
             return val;
         } else if (oldDepth == 0 && newDepth != 0) {
-            return (1 << newDepth) - 1;
+            return static_cast<ChannelType>((1 << newDepth) - 1);
         } else if (newDepth > oldDepth) {
             return Replicate(val, oldDepth, newDepth);
         } else {
@@ -721,10 +722,11 @@ public:
             if (newDepth == 0) {
                 return 0xFF;
             } else {
-                uint8_t bitsWasted = oldDepth - newDepth;
+                uint8_t bitsWasted = static_cast<uint8_t>(oldDepth - newDepth);
                 uint16_t v = static_cast<uint16_t>(val);
-                v = (v + (1 << (bitsWasted - 1))) >> bitsWasted;
-                v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v), (1 << newDepth) - 1);
+                v = static_cast<uint16_t>((v + (1 << (bitsWasted - 1))) >> bitsWasted);
+                v = ::std::min<uint16_t>(::std::max<uint16_t>(0, v),
+                                         static_cast<uint16_t>((1 << newDepth) - 1));
                 return static_cast<uint8_t>(v);
             }
         }
@@ -1190,18 +1192,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
     uint8_t seed11 = static_cast<uint8_t>((rnum >> 26) & 0xF);
     uint8_t seed12 = static_cast<uint8_t>(((rnum >> 30) | (rnum << 2)) & 0xF);
 
-    seed1 *= seed1;
-    seed2 *= seed2;
-    seed3 *= seed3;
-    seed4 *= seed4;
-    seed5 *= seed5;
-    seed6 *= seed6;
-    seed7 *= seed7;
-    seed8 *= seed8;
-    seed9 *= seed9;
-    seed10 *= seed10;
-    seed11 *= seed11;
-    seed12 *= seed12;
+    seed1 = static_cast<uint8_t>(seed1 * seed1);
+    seed2 = static_cast<uint8_t>(seed2 * seed2);
+    seed3 = static_cast<uint8_t>(seed3 * seed3);
+    seed4 = static_cast<uint8_t>(seed4 * seed4);
+    seed5 = static_cast<uint8_t>(seed5 * seed5);
+    seed6 = static_cast<uint8_t>(seed6 * seed6);
+    seed7 = static_cast<uint8_t>(seed7 * seed7);
+    seed8 = static_cast<uint8_t>(seed8 * seed8);
+    seed9 = static_cast<uint8_t>(seed9 * seed9);
+    seed10 = static_cast<uint8_t>(seed10 * seed10);
+    seed11 = static_cast<uint8_t>(seed11 * seed11);
+    seed12 = static_cast<uint8_t>(seed12 * seed12);
 
     int32_t sh1, sh2, sh3;
     if (seed & 1) {
@@ -1213,18 +1215,18 @@ static uint32_t SelectPartition(int32_t seed, int32_t x, int32_t y, int32_t z,
     }
     sh3 = (seed & 0x10) ? sh1 : sh2;
 
-    seed1 >>= sh1;
-    seed2 >>= sh2;
-    seed3 >>= sh1;
-    seed4 >>= sh2;
-    seed5 >>= sh1;
-    seed6 >>= sh2;
-    seed7 >>= sh1;
-    seed8 >>= sh2;
-    seed9 >>= sh3;
-    seed10 >>= sh3;
-    seed11 >>= sh3;
-    seed12 >>= sh3;
+    seed1 = static_cast<uint8_t>(seed1 >> sh1);
+    seed2 = static_cast<uint8_t>(seed2 >> sh2);
+    seed3 = static_cast<uint8_t>(seed3 >> sh1);
+    seed4 = static_cast<uint8_t>(seed4 >> sh2);
+    seed5 = static_cast<uint8_t>(seed5 >> sh1);
+    seed6 = static_cast<uint8_t>(seed6 >> sh2);
+    seed7 = static_cast<uint8_t>(seed7 >> sh1);
+    seed8 = static_cast<uint8_t>(seed8 >> sh2);
+    seed9 = static_cast<uint8_t>(seed9 >> sh3);
+    seed10 = static_cast<uint8_t>(seed10 >> sh3);
+    seed11 = static_cast<uint8_t>(seed11 >> sh3);
+    seed12 = static_cast<uint8_t>(seed12 >> sh3);
 
     int32_t a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14);
     int32_t b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10);
@@ -1557,7 +1559,9 @@ static void DecompressBlock(const uint8_t inBuf[16], const uint32_t blockWidth,
 
     // Make sure that higher non-texel bits are set to zero
     const uint32_t clearByteStart = (weightParams.GetPackedBitSize() >> 3) + 1;
-    texelWeightData[clearByteStart - 1] &= (1 << (weightParams.GetPackedBitSize() % 8)) - 1;
+    texelWeightData[clearByteStart - 1] =
+        texelWeightData[clearByteStart - 1] &
+        static_cast<uint8_t>((1 << (weightParams.GetPackedBitSize() % 8)) - 1);
     memset(texelWeightData + clearByteStart, 0, 16 - clearByteStart);
 
     std::vector<IntegerEncodedValue> texelWeightValues;
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index e36bc2c04..8e82c6748 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -132,6 +132,8 @@ enum class SwizzleSource : u32 {
 };
 
 union TextureHandle {
+    TextureHandle(u32 raw) : raw{raw} {}
+
     u32 raw;
     BitField<0, 20, u32> tic_id;
     BitField<20, 12, u32> tsc_id;
@@ -340,13 +342,14 @@ struct TSCEntry {
     float GetLodBias() const {
         // Sign extend the 13-bit value.
         constexpr u32 mask = 1U << (13 - 1);
-        return static_cast<s32>((mip_lod_bias ^ mask) - mask) / 256.0f;
+        return static_cast<float>(static_cast<s32>((mip_lod_bias ^ mask) - mask)) / 256.0f;
     }
 
     std::array<float, 4> GetBorderColor() const {
         if (srgb_conversion) {
-            return {srgb_border_color_r / 255.0f, srgb_border_color_g / 255.0f,
-                    srgb_border_color_b / 255.0f, border_color[3]};
+            return {static_cast<float>(srgb_border_color_r) / 255.0f,
+                    static_cast<float>(srgb_border_color_g) / 255.0f,
+                    static_cast<float>(srgb_border_color_b) / 255.0f, border_color[3]};
         }
         return border_color;
     }
@@ -354,7 +357,6 @@ struct TSCEntry {
 static_assert(sizeof(TSCEntry) == 0x20, "TSCEntry has wrong size");
 
 struct FullTextureInfo {
-    u32 index;
     TICEntry tic;
     TSCEntry tsc;
 };
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 60cda0ca3..8e947394c 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -28,7 +28,7 @@ std::unique_ptr<Tegra::GPU> CreateGPU(Core::System& system) {
 
 u16 GetResolutionScaleFactor(const RendererBase& renderer) {
     return static_cast<u16>(
-        Settings::values.resolution_factor
+        Settings::values.resolution_factor != 0
             ? Settings::values.resolution_factor
             : renderer.GetRenderWindow().GetFramebufferLayout().GetScalingRatio());
 }