11 files changed, 234 insertions, 281 deletions
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index d47c63d9f..b427ac873 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -16,7 +16,9 @@
 #include "video_core/shader/shader_ir.h"
 
 namespace VideoCommon::Shader {
+
 namespace {
+
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 
@@ -68,15 +70,15 @@ struct CFGRebuildState {
     const ProgramCode& program_code;
     ConstBufferLocker& locker;
     u32 start{};
-    std::vector<BlockInfo> block_info{};
-    std::list<u32> inspect_queries{};
-    std::list<Query> queries{};
-    std::unordered_map<u32, u32> registered{};
-    std::set<u32> labels{};
-    std::map<u32, u32> ssy_labels{};
-    std::map<u32, u32> pbk_labels{};
-    std::unordered_map<u32, BlockStack> stacks{};
-    ASTManager* manager;
+    std::vector<BlockInfo> block_info;
+    std::list<u32> inspect_queries;
+    std::list<Query> queries;
+    std::unordered_map<u32, u32> registered;
+    std::set<u32> labels;
+    std::map<u32, u32> ssy_labels;
+    std::map<u32, u32> pbk_labels;
+    std::unordered_map<u32, BlockStack> stacks;
+    ASTManager* manager{};
 };
 
 enum class BlockCollision : u32 { None, Found, Inside };
@@ -109,7 +111,7 @@ BlockInfo& CreateBlockInfo(CFGRebuildState& state, u32 start, u32 end) {
 }
 
 Pred GetPredicate(u32 index, bool negated) {
-    return static_cast<Pred>(index + (negated ? 8 : 0));
+    return static_cast<Pred>(static_cast<u64>(index) + (negated ? 8ULL : 0ULL));
 }
 
 /**
@@ -136,15 +138,13 @@ struct BranchIndirectInfo {
     s32 relative_position{};
 };
 
-std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state,
-                                                          u32 start_address, u32 current_position) {
-    const u32 shader_start = state.start;
-    u32 pos = current_position;
-    BranchIndirectInfo result{};
-    u64 track_register = 0;
+struct BufferInfo {
+    u32 index;
+    u32 offset;
+};
 
-    // Step 0 Get BRX Info
-    const Instruction instr = {state.program_code[pos]};
+std::optional<std::pair<s32, u64>> GetBRXInfo(const CFGRebuildState& state, u32& pos) {
+    const Instruction instr = state.program_code[pos];
     const auto opcode = OpCode::Decode(instr);
     if (opcode->get().GetId() != OpCode::Id::BRX) {
         return std::nullopt;
@@ -152,86 +152,94 @@ std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState&
     if (instr.brx.constant_buffer != 0) {
         return std::nullopt;
     }
-    track_register = instr.gpr8.Value();
-    result.relative_position = instr.brx.GetBranchExtend();
-    pos--;
-    bool found_track = false;
+    --pos;
+    return std::make_pair(instr.brx.GetBranchExtend(), instr.gpr8.Value());
+}
 
-    // Step 1 Track LDC
-    while (pos >= shader_start) {
-        if (IsSchedInstruction(pos, shader_start)) {
-            pos--;
+template <typename Result, typename TestCallable, typename PackCallable>
+// requires std::predicate<TestCallable, Instruction, const OpCode::Matcher&>
+// requires std::invocable<PackCallable, Instruction, const OpCode::Matcher&>
+std::optional<Result> TrackInstruction(const CFGRebuildState& state, u32& pos, TestCallable test,
+                                       PackCallable pack) {
+    for (; pos >= state.start; --pos) {
+        if (IsSchedInstruction(pos, state.start)) {
             continue;
         }
-        const Instruction instr = {state.program_code[pos]};
+        const Instruction instr = state.program_code[pos];
         const auto opcode = OpCode::Decode(instr);
-        if (opcode->get().GetId() == OpCode::Id::LD_C) {
-            if (instr.gpr0.Value() == track_register &&
-                instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single) {
-                result.buffer = instr.cbuf36.index.Value();
-                result.offset = static_cast<u32>(instr.cbuf36.GetOffset());
-                track_register = instr.gpr8.Value();
-                pos--;
-                found_track = true;
-                break;
-            }
+        if (!opcode) {
+            continue;
+        }
+        if (test(instr, opcode->get())) {
+            --pos;
+            return std::make_optional(pack(instr, opcode->get()));
         }
-        pos--;
     }
+    return std::nullopt;
+}
 
-    if (!found_track) {
-        return std::nullopt;
-    }
-    found_track = false;
+std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state, u32& pos,
+                                                   u64 brx_tracked_register) {
+    return TrackInstruction<std::pair<BufferInfo, u64>>(
+        state, pos,
+        [brx_tracked_register](auto instr, const auto& opcode) {
+            return opcode.GetId() == OpCode::Id::LD_C &&
+                   instr.gpr0.Value() == brx_tracked_register &&
+                   instr.ld_c.type.Value() == Tegra::Shader::UniformType::Single;
+        },
+        [](auto instr, const auto& opcode) {
+            const BufferInfo info = {static_cast<u32>(instr.cbuf36.index.Value()),
+                                     static_cast<u32>(instr.cbuf36.GetOffset())};
+            return std::make_pair(info, instr.gpr8.Value());
+        });
+}
 
-    // Step 2 Track SHL
-    while (pos >= shader_start) {
-        if (IsSchedInstruction(pos, shader_start)) {
-            pos--;
-            continue;
-        }
-        const Instruction instr = state.program_code[pos];
-        const auto opcode = OpCode::Decode(instr);
-        if (opcode->get().GetId() == OpCode::Id::SHL_IMM) {
-            if (instr.gpr0.Value() == track_register) {
-                track_register = instr.gpr8.Value();
-                pos--;
-                found_track = true;
-                break;
-            }
-        }
-        pos--;
+std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
+                                    u64 ldc_tracked_register) {
+    return TrackInstruction<u64>(state, pos,
+                                 [ldc_tracked_register](auto instr, const auto& opcode) {
+                                     return opcode.GetId() == OpCode::Id::SHL_IMM &&
+                                            instr.gpr0.Value() == ldc_tracked_register;
+                                 },
+                                 [](auto instr, const auto&) { return instr.gpr8.Value(); });
+}
+
+std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
+                                   u64 shl_tracked_register) {
+    return TrackInstruction<u32>(state, pos,
+                                 [shl_tracked_register](auto instr, const auto& opcode) {
+                                     return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+                                            instr.gpr0.Value() == shl_tracked_register;
+                                 },
+                                 [](auto instr, const auto&) {
+                                     return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+                                 });
+}
+
+std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
+    const auto brx_info = GetBRXInfo(state, pos);
+    if (!brx_info) {
+        return std::nullopt;
     }
+    const auto [relative_position, brx_tracked_register] = *brx_info;
 
-    if (!found_track) {
+    const auto ldc_info = TrackLDC(state, pos, brx_tracked_register);
+    if (!ldc_info) {
         return std::nullopt;
     }
-    found_track = false;
+    const auto [buffer_info, ldc_tracked_register] = *ldc_info;
 
-    // Step 3 Track IMNMX
-    while (pos >= shader_start) {
-        if (IsSchedInstruction(pos, shader_start)) {
-            pos--;
-            continue;
-        }
-        const Instruction instr = state.program_code[pos];
-        const auto opcode = OpCode::Decode(instr);
-        if (opcode->get().GetId() == OpCode::Id::IMNMX_IMM) {
-            if (instr.gpr0.Value() == track_register) {
-                track_register = instr.gpr8.Value();
-                result.entries = instr.alu.GetSignedImm20_20() + 1;
-                pos--;
-                found_track = true;
-                break;
-            }
-        }
-        pos--;
+    const auto shl_tracked_register = TrackSHLRegister(state, pos, ldc_tracked_register);
+    if (!shl_tracked_register) {
+        return std::nullopt;
     }
 
-    if (!found_track) {
+    const auto entries = TrackIMNMXValue(state, pos, *shl_tracked_register);
+    if (!entries) {
         return std::nullopt;
     }
-    return result;
+
+    return BranchIndirectInfo{buffer_info.index, buffer_info.offset, *entries, relative_position};
 }
 
 std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address) {
@@ -420,30 +428,30 @@ std::pair<ParseResult, ParseInfo> ParseCode(CFGRebuildState& state, u32 address)
             break;
         }
         case OpCode::Id::BRX: {
-            auto tmp = TrackBranchIndirectInfo(state, address, offset);
-            if (tmp) {
-                auto result = *tmp;
-                std::vector<CaseBranch> branches{};
-                s32 pc_target = offset + result.relative_position;
-                for (u32 i = 0; i < result.entries; i++) {
-                    auto k = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
-                    if (!k) {
-                        return {ParseResult::AbnormalFlow, parse_info};
-                    }
-                    u32 value = *k;
-                    u32 target = static_cast<u32>((value >> 3) + pc_target);
-                    insert_label(state, target);
-                    branches.emplace_back(value, target);
-                }
-                parse_info.end_address = offset;
-                parse_info.branch_info = MakeBranchInfo<MultiBranch>(
-                    static_cast<u32>(instr.gpr8.Value()), std::move(branches));
-
-                return {ParseResult::ControlCaught, parse_info};
-            } else {
+            const auto tmp = TrackBranchIndirectInfo(state, offset);
+            if (!tmp) {
                 LOG_WARNING(HW_GPU, "BRX Track Unsuccesful");
+                return {ParseResult::AbnormalFlow, parse_info};
             }
-            return {ParseResult::AbnormalFlow, parse_info};
+
+            const auto result = *tmp;
+            const s32 pc_target = offset + result.relative_position;
+            std::vector<CaseBranch> branches;
+            for (u32 i = 0; i < result.entries; i++) {
+                auto key = state.locker.ObtainKey(result.buffer, result.offset + i * 4);
+                if (!key) {
+                    return {ParseResult::AbnormalFlow, parse_info};
+                }
+                u32 value = *key;
+                u32 target = static_cast<u32>((value >> 3) + pc_target);
+                insert_label(state, target);
+                branches.emplace_back(value, target);
+            }
+            parse_info.end_address = offset;
+            parse_info.branch_info = MakeBranchInfo<MultiBranch>(
+                static_cast<u32>(instr.gpr8.Value()), std::move(branches));
+
+            return {ParseResult::ControlCaught, parse_info};
         }
         default:
             break;
diff --git a/src/video_core/shader/decode/arithmetic.cpp b/src/video_core/shader/decode/arithmetic.cpp
index 1473c282a..fcedd2af6 100644
--- a/src/video_core/shader/decode/arithmetic.cpp
+++ b/src/video_core/shader/decode/arithmetic.cpp
@@ -43,12 +43,12 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
     case OpCode::Id::FMUL_IMM: {
         // FMUL does not have 'abs' bits and only the second operand has a 'neg' bit.
         if (instr.fmul.tab5cb8_2 != 0) {
-            LOG_WARNING(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
-                        instr.fmul.tab5cb8_2.Value());
+            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_2({}) is not implemented",
+                      instr.fmul.tab5cb8_2.Value());
         }
         if (instr.fmul.tab5c68_0 != 1) {
-            LOG_WARNING(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
-                        instr.fmul.tab5c68_0.Value());
+            LOG_DEBUG(HW_GPU, "FMUL tab5cb8_0({}) is not implemented",
+                      instr.fmul.tab5c68_0.Value());
         }
 
         op_b = GetOperandAbsNegFloat(op_b, false, instr.fmul.negate_b);
@@ -144,10 +144,11 @@ u32 ShaderIR::DecodeArithmetic(NodeBlock& bb, u32 pc) {
     case OpCode::Id::RRO_C:
     case OpCode::Id::RRO_R:
     case OpCode::Id::RRO_IMM: {
+        LOG_DEBUG(HW_GPU, "(STUBBED) RRO used");
+
         // Currently RRO is only implemented as a register move.
         op_b = GetOperandAbsNegFloat(op_b, instr.alu.abs_b, instr.alu.negate_b);
         SetRegister(bb, instr.gpr0, op_b);
-        LOG_WARNING(HW_GPU, "RRO instruction is incomplete");
         break;
     }
     default:
diff --git a/src/video_core/shader/decode/arithmetic_half.cpp b/src/video_core/shader/decode/arithmetic_half.cpp
index b06cbe441..ee7d9a29d 100644
--- a/src/video_core/shader/decode/arithmetic_half.cpp
+++ b/src/video_core/shader/decode/arithmetic_half.cpp
@@ -21,8 +21,8 @@ u32 ShaderIR::DecodeArithmeticHalf(NodeBlock& bb, u32 pc) {
 
     if (opcode->get().GetId() == OpCode::Id::HADD2_C ||
         opcode->get().GetId() == OpCode::Id::HADD2_R) {
-        if (instr.alu_half.ftz != 0) {
-            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        if (instr.alu_half.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
         }
     }
 
diff --git a/src/video_core/shader/decode/arithmetic_half_immediate.cpp b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
index 6466fc011..d179b9873 100644
--- a/src/video_core/shader/decode/arithmetic_half_immediate.cpp
+++ b/src/video_core/shader/decode/arithmetic_half_immediate.cpp
@@ -19,12 +19,12 @@ u32 ShaderIR::DecodeArithmeticHalfImmediate(NodeBlock& bb, u32 pc) {
     const auto opcode = OpCode::Decode(instr);
 
     if (opcode->get().GetId() == OpCode::Id::HADD2_IMM) {
-        if (instr.alu_half_imm.ftz != 0) {
-            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        if (instr.alu_half_imm.ftz == 0) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
         }
     } else {
-        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::None) {
-            LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+        if (instr.alu_half_imm.precision != Tegra::Shader::HalfPrecision::FTZ) {
+            LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
         }
     }
 
diff --git a/src/video_core/shader/decode/ffma.cpp b/src/video_core/shader/decode/ffma.cpp
index ca2f39e8d..5973588d6 100644
--- a/src/video_core/shader/decode/ffma.cpp
+++ b/src/video_core/shader/decode/ffma.cpp
@@ -19,10 +19,10 @@ u32 ShaderIR::DecodeFfma(NodeBlock& bb, u32 pc) {
 
     UNIMPLEMENTED_IF_MSG(instr.ffma.cc != 0, "FFMA cc not implemented");
     if (instr.ffma.tab5980_0 != 1) {
-        LOG_WARNING(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
+        LOG_DEBUG(HW_GPU, "FFMA tab5980_0({}) not implemented", instr.ffma.tab5980_0.Value());
     }
     if (instr.ffma.tab5980_1 != 0) {
-        LOG_WARNING(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
+        LOG_DEBUG(HW_GPU, "FFMA tab5980_1({}) not implemented", instr.ffma.tab5980_1.Value());
     }
 
     const Node op_a = GetRegister(instr.gpr8);
diff --git a/src/video_core/shader/decode/half_set.cpp b/src/video_core/shader/decode/half_set.cpp
index 48ca7a4af..848e46874 100644
--- a/src/video_core/shader/decode/half_set.cpp
+++ b/src/video_core/shader/decode/half_set.cpp
@@ -20,8 +20,8 @@ u32 ShaderIR::DecodeHalfSet(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    if (instr.hset2.ftz != 0) {
-        LOG_WARNING(HW_GPU, "{} FTZ not implemented", opcode->get().GetName());
+    if (instr.hset2.ftz == 0) {
+        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
     }
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hset2.type_a);
diff --git a/src/video_core/shader/decode/half_set_predicate.cpp b/src/video_core/shader/decode/half_set_predicate.cpp
index fec8f2dbe..310655619 100644
--- a/src/video_core/shader/decode/half_set_predicate.cpp
+++ b/src/video_core/shader/decode/half_set_predicate.cpp
@@ -19,7 +19,9 @@ u32 ShaderIR::DecodeHalfSetPredicate(NodeBlock& bb, u32 pc) {
     const Instruction instr = {program_code[pc]};
     const auto opcode = OpCode::Decode(instr);
 
-    LOG_DEBUG(HW_GPU, "ftz={}", static_cast<u32>(instr.hsetp2.ftz));
+    if (instr.hsetp2.ftz != 0) {
+        LOG_DEBUG(HW_GPU, "{} without FTZ is not implemented", opcode->get().GetName());
+    }
 
     Node op_a = UnpackHalfFloat(GetRegister(instr.gpr8), instr.hsetp2.type_a);
     op_a = GetOperandAbsNegHalf(op_a, instr.hsetp2.abs_a, instr.hsetp2.negate_a);
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index b02d2cb95..d2fe4ec5d 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -143,39 +143,37 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) {
 }
 
 Image& ShaderIR::GetImage(Tegra::Shader::Image image, Tegra::Shader::ImageType type) {
-    const auto offset{static_cast<std::size_t>(image.index.Value())};
-    if (const auto existing_image = TryUseExistingImage(offset, type)) {
-        return *existing_image;
+    const auto offset = static_cast<u32>(image.index.Value());
+
+    const auto it =
+        std::find_if(std::begin(used_images), std::end(used_images),
+                     [offset](const Image& entry) { return entry.GetOffset() == offset; });
+    if (it != std::end(used_images)) {
+        ASSERT(!it->IsBindless() && it->GetType() == it->GetType());
+        return *it;
     }
 
-    const std::size_t next_index{used_images.size()};
-    return used_images.emplace(offset, Image{offset, next_index, type}).first->second;
+    const auto next_index = static_cast<u32>(used_images.size());
+    return used_images.emplace_back(next_index, offset, type);
 }
 
 Image& ShaderIR::GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type) {
-    const Node image_register{GetRegister(reg)};
-    const auto [base_image, cbuf_index, cbuf_offset]{
-        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()))};
-    const auto cbuf_key{(static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset)};
-
-    if (const auto image = TryUseExistingImage(cbuf_key, type)) {
-        return *image;
-    }
-
-    const std::size_t next_index{used_images.size()};
-    return used_images.emplace(cbuf_key, Image{cbuf_index, cbuf_offset, next_index, type})
-        .first->second;
-}
-
-Image* ShaderIR::TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type) {
-    auto it = used_images.find(offset);
-    if (it == used_images.end()) {
-        return nullptr;
+    const Node image_register = GetRegister(reg);
+    const auto [base_image, buffer, offset] =
+        TrackCbuf(image_register, global_code, static_cast<s64>(global_code.size()));
+
+    const auto it =
+        std::find_if(std::begin(used_images), std::end(used_images),
+                     [buffer = buffer, offset = offset](const Image& entry) {
+                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+                     });
+    if (it != std::end(used_images)) {
+        ASSERT(it->IsBindless() && it->GetType() == it->GetType());
+        return *it;
     }
-    auto& image = it->second;
-    ASSERT(image.GetType() == type);
 
-    return &image;
+    const auto next_index = static_cast<u32>(used_images.size());
+    return used_images.emplace_back(next_index, offset, buffer, type);
 }
 
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0599ef34f..bb926a132 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -44,10 +44,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     bool is_bindless = false;
     switch (opcode->get().GetId()) {
     case OpCode::Id::TEX: {
-        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
-        }
-
         const TextureType texture_type{instr.tex.texture_type};
         const bool is_array = instr.tex.array != 0;
         const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -62,10 +58,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
 
-        if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
-        }
-
         const TextureType texture_type{instr.tex_b.texture_type};
         const bool is_array = instr.tex_b.array != 0;
         const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
@@ -82,10 +74,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const bool depth_compare = instr.texs.UsesMiscMode(TextureMiscMode::DC);
         const auto process_mode = instr.texs.GetTextureProcessMode();
 
-        if (instr.texs.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TEXS.NODEP implementation is incomplete");
-        }
-
         const Node4 components =
             GetTexsCode(instr, texture_type, process_mode, depth_compare, is_array);
 
@@ -107,10 +95,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
                              "PTP is not implemented");
 
-        if (instr.tld4.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLD4.NODEP implementation is incomplete");
-        }
-
         const auto texture_type = instr.tld4.texture_type.Value();
         const bool depth_compare = is_bindless ? instr.tld4_b.UsesMiscMode(TextureMiscMode::DC)
                                                : instr.tld4.UsesMiscMode(TextureMiscMode::DC);
@@ -119,15 +103,12 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                                           : instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
         WriteTexInstructionFloat(
             bb, instr,
-            GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless), true);
+            GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi, is_bindless));
         break;
     }
     case OpCode::Id::TLD4S: {
         UNIMPLEMENTED_IF_MSG(instr.tld4s.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
-        if (instr.tld4s.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLD4S.NODEP implementation is incomplete");
-        }
 
         const bool depth_compare = instr.tld4s.UsesMiscMode(TextureMiscMode::DC);
         const Node op_a = GetRegister(instr.gpr8);
@@ -164,10 +145,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         is_bindless = true;
         [[fallthrough]];
     case OpCode::Id::TXQ: {
-        if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
-        }
-
         // TODO: The new commits on the texture refactor, change the way samplers work.
         // Sadly, not all texture instructions specify the type of texture their sampler
         // uses. This must be fixed at a later instance.
@@ -205,10 +182,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
                              "NDV is not implemented");
 
-        if (instr.tmml.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TMML.NODEP implementation is incomplete");
-        }
-
         auto texture_type = instr.tmml.texture_type.Value();
         const bool is_array = instr.tmml.array != 0;
         const auto& sampler =
@@ -254,25 +227,17 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         UNIMPLEMENTED_IF_MSG(instr.tld.ms, "MS is not implemented");
         UNIMPLEMENTED_IF_MSG(instr.tld.cl, "CL is not implemented");
 
-        if (instr.tld.nodep_flag) {
-            LOG_WARNING(HW_GPU, "TLD.NODEP implementation is incomplete");
-        }
-
         WriteTexInstructionFloat(bb, instr, GetTldCode(instr));
         break;
     }
     case OpCode::Id::TLDS: {
-        const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
+        const TextureType texture_type{instr.tlds.GetTextureType()};
         const bool is_array{instr.tlds.IsArrayTexture()};
 
         UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
         UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(TextureMiscMode::MZ), "MZ is not implemented");
 
-        if (instr.tlds.UsesMiscMode(TextureMiscMode::NODEP)) {
-            LOG_WARNING(HW_GPU, "TLDS.NODEP implementation is incomplete");
-        }
-
         const Node4 components = GetTldsCode(instr, texture_type, is_array);
 
         if (instr.tlds.fp32_flag) {
@@ -293,84 +258,86 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
                                     std::optional<SamplerInfo> sampler_info) {
     const auto offset = static_cast<u32>(sampler.index.Value());
 
-    Tegra::Shader::TextureType type;
+    TextureType type;
     bool is_array;
     bool is_shadow;
     if (sampler_info) {
         type = sampler_info->type;
         is_array = sampler_info->is_array;
         is_shadow = sampler_info->is_shadow;
-    } else if (auto sampler = locker.ObtainBoundSampler(offset); sampler) {
+    } else if (const auto sampler = locker.ObtainBoundSampler(offset)) {
         type = sampler->texture_type.Value();
         is_array = sampler->is_array.Value() != 0;
         is_shadow = sampler->is_shadow.Value() != 0;
     } else {
-        type = Tegra::Shader::TextureType::Texture2D;
+        LOG_WARNING(HW_GPU, "Unknown sampler info");
+        type = TextureType::Texture2D;
         is_array = false;
         is_shadow = false;
     }
 
     // If this sampler has already been used, return the existing mapping.
-    const auto itr =
+    const auto it =
         std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [&](const Sampler& entry) { return entry.GetOffset() == offset; });
-    if (itr != used_samplers.end()) {
-        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
-               itr->IsShadow() == is_shadow);
-        return *itr;
+                     [offset](const Sampler& entry) { return entry.GetOffset() == offset; });
+    if (it != used_samplers.end()) {
+        ASSERT(!it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+               it->IsShadow() == is_shadow);
+        return *it;
     }
 
     // Otherwise create a new mapping for this sampler
-    const std::size_t next_index = used_samplers.size();
-    const Sampler entry{offset, next_index, type, is_array, is_shadow};
-    return *used_samplers.emplace(entry).first;
-} // namespace VideoCommon::Shader
+    const auto next_index = static_cast<u32>(used_samplers.size());
+    return used_samplers.emplace_back(Sampler(next_index, offset, type, is_array, is_shadow));
+}
 
 const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg,
                                             std::optional<SamplerInfo> sampler_info) {
     const Node sampler_register = GetRegister(reg);
-    const auto [base_sampler, cbuf_index, cbuf_offset] =
+    const auto [base_sampler, buffer, offset] =
         TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
     ASSERT(base_sampler != nullptr);
-    const auto cbuf_key = (static_cast<u64>(cbuf_index) << 32) | static_cast<u64>(cbuf_offset);
-    Tegra::Shader::TextureType type;
+
+    TextureType type;
     bool is_array;
     bool is_shadow;
     if (sampler_info) {
         type = sampler_info->type;
         is_array = sampler_info->is_array;
         is_shadow = sampler_info->is_shadow;
-    } else if (auto sampler = locker.ObtainBindlessSampler(cbuf_index, cbuf_offset); sampler) {
+    } else if (const auto sampler = locker.ObtainBindlessSampler(buffer, offset)) {
         type = sampler->texture_type.Value();
         is_array = sampler->is_array.Value() != 0;
         is_shadow = sampler->is_shadow.Value() != 0;
     } else {
-        type = Tegra::Shader::TextureType::Texture2D;
+        LOG_WARNING(HW_GPU, "Unknown sampler info");
+        type = TextureType::Texture2D;
         is_array = false;
         is_shadow = false;
     }
 
     // If this sampler has already been used, return the existing mapping.
-    const auto itr =
+    const auto it =
         std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
-    if (itr != used_samplers.end()) {
-        ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
-               itr->IsShadow() == is_shadow);
-        return *itr;
+                     [buffer = buffer, offset = offset](const Sampler& entry) {
+                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+                     });
+    if (it != used_samplers.end()) {
+        ASSERT(it->IsBindless() && it->GetType() == type && it->IsArray() == is_array &&
+               it->IsShadow() == is_shadow);
+        return *it;
     }
 
     // Otherwise create a new mapping for this sampler
-    const std::size_t next_index = used_samplers.size();
-    const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
-    return *used_samplers.emplace(entry).first;
+    const auto next_index = static_cast<u32>(used_samplers.size());
+    return used_samplers.emplace_back(
+        Sampler(next_index, offset, buffer, type, is_array, is_shadow));
 }
 
-void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components,
-                                        bool is_tld4) {
+void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
     u32 dest_elem = 0;
     for (u32 elem = 0; elem < 4; ++elem) {
-        if (!is_tld4 && !instr.tex.IsComponentEnabled(elem)) {
+        if (!instr.tex.IsComponentEnabled(elem)) {
             // Skip disabled components
             continue;
         }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 447fb5c1d..4300d9ff4 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -230,62 +230,49 @@ using NodeBlock = std::vector<Node>;
 class Sampler {
 public:
     /// This constructor is for bound samplers
-    explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
-                     bool is_array, bool is_shadow)
-        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
-          is_bindless{false} {}
+    constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
+                               bool is_array, bool is_shadow)
+        : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
 
     /// This constructor is for bindless samplers
-    explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
-                     Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
-        : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
-          is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
-
-    /// This constructor is for serialization/deserialization
-    explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
-                     bool is_array, bool is_shadow, bool is_bindless)
-        : offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
-          is_bindless{is_bindless} {}
-
-    std::size_t GetOffset() const {
+    constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
+                               bool is_array, bool is_shadow)
+        : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
+          is_shadow{is_shadow}, is_bindless{true} {}
+
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+    constexpr u32 GetOffset() const {
         return offset;
     }
 
-    std::size_t GetIndex() const {
-        return index;
+    constexpr u32 GetBuffer() const {
+        return buffer;
     }
 
-    Tegra::Shader::TextureType GetType() const {
+    constexpr Tegra::Shader::TextureType GetType() const {
         return type;
     }
 
-    bool IsArray() const {
+    constexpr bool IsArray() const {
         return is_array;
     }
 
-    bool IsShadow() const {
+    constexpr bool IsShadow() const {
         return is_shadow;
     }
 
-    bool IsBindless() const {
+    constexpr bool IsBindless() const {
         return is_bindless;
     }
 
-    std::pair<u32, u32> GetBindlessCBuf() const {
-        return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
-    }
-
-    bool operator<(const Sampler& rhs) const {
-        return std::tie(index, offset, type, is_array, is_shadow, is_bindless) <
-               std::tie(rhs.index, rhs.offset, rhs.type, rhs.is_array, rhs.is_shadow,
-                        rhs.is_bindless);
-    }
-
 private:
-    /// Offset in TSC memory from which to read the sampler object, as specified by the sampling
-    /// instruction.
-    std::size_t offset{};
-    std::size_t index{}; ///< Value used to index into the generated GLSL sampler array.
+    u32 index{};  ///< Emulated index given for the this sampler.
+    u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
+    u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
     bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
     bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
@@ -294,18 +281,13 @@ private:
 
 class Image final {
 public:
-    constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type)
-        : offset{offset}, index{index}, type{type}, is_bindless{false} {}
-
-    constexpr explicit Image(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
-                             Tegra::Shader::ImageType type)
-        : offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
-          is_bindless{true} {}
+    /// This constructor is for bound images
+    constexpr explicit Image(u32 index, u32 offset, Tegra::Shader::ImageType type)
+        : index{index}, offset{offset}, type{type} {}
 
-    constexpr explicit Image(std::size_t offset, std::size_t index, Tegra::Shader::ImageType type,
-                             bool is_bindless, bool is_written, bool is_read, bool is_atomic)
-        : offset{offset}, index{index}, type{type}, is_bindless{is_bindless},
-          is_written{is_written}, is_read{is_read}, is_atomic{is_atomic} {}
+    /// This constructor is for bindless samplers
+    constexpr explicit Image(u32 index, u32 offset, u32 buffer, Tegra::Shader::ImageType type)
+        : index{index}, offset{offset}, buffer{buffer}, type{type}, is_bindless{true} {}
 
     void MarkWrite() {
         is_written = true;
@@ -321,12 +303,16 @@ public:
         is_atomic = true;
     }
 
-    constexpr std::size_t GetOffset() const {
+    constexpr u32 GetIndex() const {
+        return index;
+    }
+
+    constexpr u32 GetOffset() const {
         return offset;
     }
 
-    constexpr std::size_t GetIndex() const {
-        return index;
+    constexpr u32 GetBuffer() const {
+        return buffer;
     }
 
     constexpr Tegra::Shader::ImageType GetType() const {
@@ -349,18 +335,11 @@ public:
         return is_atomic;
     }
 
-    constexpr std::pair<u32, u32> GetBindlessCBuf() const {
-        return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
-    }
-
-    constexpr bool operator<(const Image& rhs) const {
-        return std::tie(offset, index, type, is_bindless) <
-               std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_bindless);
-    }
-
 private:
-    u64 offset{};
-    std::size_t index{};
+    u32 index{};
+    u32 offset{};
+    u32 buffer{};
+
     Tegra::Shader::ImageType type{};
     bool is_bindless{};
     bool is_written{};
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 7582999a5..26c8fde22 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -5,6 +5,7 @@
 #pragma once
 
 #include <array>
+#include <list>
 #include <map>
 #include <optional>
 #include <set>
@@ -95,11 +96,11 @@ public:
         return used_cbufs;
     }
 
-    const std::set<Sampler>& GetSamplers() const {
+    const std::list<Sampler>& GetSamplers() const {
         return used_samplers;
     }
 
-    const std::map<u64, Image>& GetImages() const {
+    const std::list<Image>& GetImages() const {
         return used_images;
     }
 
@@ -316,9 +317,6 @@ private:
     /// Access a bindless image sampler.
     Image& GetBindlessImage(Tegra::Shader::Register reg, Tegra::Shader::ImageType type);
 
-    /// Tries to access an existing image, updating it's state as needed
-    Image* TryUseExistingImage(u64 offset, Tegra::Shader::ImageType type);
-
     /// Extracts a sequence of bits from a node
     Node BitfieldExtract(Node value, u32 offset, u32 bits);
 
@@ -326,7 +324,7 @@ private:
     Node BitfieldInsert(Node base, Node insert, u32 offset, u32 bits);
 
     void WriteTexInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
-                                  const Node4& components, bool is_tld4 = false);
+                                  const Node4& components);
 
     void WriteTexsInstructionFloat(NodeBlock& bb, Tegra::Shader::Instruction instr,
                                    const Node4& components, bool ignore_mask = false);
@@ -402,8 +400,8 @@ private:
     std::set<Tegra::Shader::Attribute::Index> used_input_attributes;
     std::set<Tegra::Shader::Attribute::Index> used_output_attributes;
     std::map<u32, ConstBuffer> used_cbufs;
-    std::set<Sampler> used_samplers;
-    std::map<u64, Image> used_images;
+    std::list<Sampler> used_samplers;
+    std::list<Image> used_images;
     std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances> used_clip_distances{};
     std::map<GlobalMemoryBase, GlobalMemoryUsage> used_global_memory;
     bool uses_layer{};