12 files changed, 432 insertions, 78 deletions
diff --git a/src/video_core/shader/async_shaders.cpp b/src/video_core/shader/async_shaders.cpp
new file mode 100644
index 000000000..aabd62c5c
--- /dev/null
+++ b/src/video_core/shader/async_shaders.cpp
@@ -0,0 +1,221 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <condition_variable>
+#include <mutex>
+#include <thread>
+#include <vector>
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/shader/async_shaders.h"
+
+namespace VideoCommon::Shader {
+
+AsyncShaders::AsyncShaders(Core::Frontend::EmuWindow& emu_window) : emu_window(emu_window) {}
+
+AsyncShaders::~AsyncShaders() {
+    KillWorkers();
+}
+
+void AsyncShaders::AllocateWorkers() {
+    // Max worker threads we should allow
+    constexpr u32 MAX_THREADS = 4;
+    // Deduce how many threads we can use
+    const u32 threads_used = std::thread::hardware_concurrency() / 4;
+    // Always allow at least 1 thread regardless of our settings
+    const auto max_worker_count = std::max(1U, threads_used);
+    // Don't use more than MAX_THREADS
+    const auto num_workers = std::min(max_worker_count, MAX_THREADS);
+
+    // If we already have workers queued, ignore
+    if (num_workers == worker_threads.size()) {
+        return;
+    }
+
+    // If workers already exist, clear them
+    if (!worker_threads.empty()) {
+        FreeWorkers();
+    }
+
+    // Create workers
+    for (std::size_t i = 0; i < num_workers; i++) {
+        context_list.push_back(emu_window.CreateSharedContext());
+        worker_threads.push_back(
+            std::thread(&AsyncShaders::ShaderCompilerThread, this, context_list[i].get()));
+    }
+}
+
+void AsyncShaders::FreeWorkers() {
+    // Mark all threads to quit
+    is_thread_exiting.store(true);
+    cv.notify_all();
+    for (auto& thread : worker_threads) {
+        thread.join();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+
+    // Clear our worker threads
+    worker_threads.clear();
+}
+
+void AsyncShaders::KillWorkers() {
+    is_thread_exiting.store(true);
+    for (auto& thread : worker_threads) {
+        thread.detach();
+    }
+    // Clear our shared contexts
+    context_list.clear();
+
+    // Clear our worker threads
+    worker_threads.clear();
+}
+
+bool AsyncShaders::HasWorkQueued() const {
+    return !pending_queue.empty();
+}
+
+bool AsyncShaders::HasCompletedWork() const {
+    std::shared_lock lock{completed_mutex};
+    return !finished_work.empty();
+}
+
+bool AsyncShaders::IsShaderAsync(const Tegra::GPU& gpu) const {
+    const auto& regs = gpu.Maxwell3D().regs;
+
+    // If something is using depth, we can assume that games are not rendering anything which will
+    // be used one time.
+    if (regs.zeta_enable) {
+        return true;
+    }
+
+    // If games are using a small index count, we can assume these are full screen quads. Usually
+    // these shaders are only used once for building textures so we can assume they can't be built
+    // async
+    if (regs.index_array.count <= 6 || regs.vertex_buffer.count <= 6) {
+        return false;
+    }
+
+    return true;
+}
+
+std::vector<AsyncShaders::Result> AsyncShaders::GetCompletedWork() {
+    std::vector<Result> results;
+    {
+        std::unique_lock lock{completed_mutex};
+        results.assign(std::make_move_iterator(finished_work.begin()),
+                       std::make_move_iterator(finished_work.end()));
+        finished_work.clear();
+    }
+    return results;
+}
+
+void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
+                                     Tegra::Engines::ShaderType shader_type, u64 uid,
+                                     std::vector<u64> code, std::vector<u64> code_b,
+                                     u32 main_offset,
+                                     VideoCommon::Shader::CompilerSettings compiler_settings,
+                                     const VideoCommon::Shader::Registry& registry,
+                                     VAddr cpu_addr) {
+    WorkerParams params{
+        .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
+        .device = &device,
+        .shader_type = shader_type,
+        .uid = uid,
+        .code = std::move(code),
+        .code_b = std::move(code_b),
+        .main_offset = main_offset,
+        .compiler_settings = compiler_settings,
+        .registry = registry,
+        .cpu_address = cpu_addr,
+    };
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push(std::move(params));
+    cv.notify_one();
+}
+
+void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
+                                     const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
+                                     Vulkan::VKDescriptorPool& descriptor_pool,
+                                     Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                                     Vulkan::VKRenderPassCache& renderpass_cache,
+                                     std::vector<VkDescriptorSetLayoutBinding> bindings,
+                                     Vulkan::SPIRVProgram program,
+                                     Vulkan::GraphicsPipelineCacheKey key) {
+    WorkerParams params{
+        .backend = Backend::Vulkan,
+        .pp_cache = pp_cache,
+        .vk_device = &device,
+        .scheduler = &scheduler,
+        .descriptor_pool = &descriptor_pool,
+        .update_descriptor_queue = &update_descriptor_queue,
+        .renderpass_cache = &renderpass_cache,
+        .bindings = bindings,
+        .program = program,
+        .key = key,
+    };
+
+    std::unique_lock lock(queue_mutex);
+    pending_queue.push(std::move(params));
+    cv.notify_one();
+}
+
+void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
+    while (!is_thread_exiting.load(std::memory_order_relaxed)) {
+        std::unique_lock lock{queue_mutex};
+        cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
+        if (is_thread_exiting) {
+            return;
+        }
+
+        // Partial lock to allow all threads to read at the same time
+        if (!HasWorkQueued()) {
+            continue;
+        }
+        // Another thread beat us, just unlock and wait for the next load
+        if (pending_queue.empty()) {
+            continue;
+        }
+
+        // Pull work from queue
+        WorkerParams work = std::move(pending_queue.front());
+        pending_queue.pop();
+        lock.unlock();
+
+        if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
+            const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
+            const auto scope = context->Acquire();
+            auto program =
+                OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
+            Result result{};
+            result.backend = work.backend;
+            result.cpu_address = work.cpu_address;
+            result.uid = work.uid;
+            result.code = std::move(work.code);
+            result.code_b = std::move(work.code_b);
+            result.shader_type = work.shader_type;
+
+            if (work.backend == Backend::OpenGL) {
+                result.program.opengl = std::move(program->source_program);
+            } else if (work.backend == Backend::GLASM) {
+                result.program.glasm = std::move(program->assembly_program);
+            }
+
+            {
+                std::unique_lock complete_lock(completed_mutex);
+                finished_work.push_back(std::move(result));
+            }
+        } else if (work.backend == Backend::Vulkan) {
+            auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
+                *work.vk_device, *work.scheduler, *work.descriptor_pool,
+                *work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
+                work.program);
+
+            work.pp_cache->EmplacePipeline(std::move(pipeline));
+        }
+    }
+}
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/async_shaders.h b/src/video_core/shader/async_shaders.h
new file mode 100644
index 000000000..7cf8d994c
--- /dev/null
+++ b/src/video_core/shader/async_shaders.h
@@ -0,0 +1,136 @@
+// Copyright 2020 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <condition_variable>
+#include <memory>
+#include <shared_mutex>
+#include <thread>
+
+#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Core::Frontend {
+class EmuWindow;
+class GraphicsContext;
+} // namespace Core::Frontend
+
+namespace Tegra {
+class GPU;
+}
+
+namespace Vulkan {
+class VKPipelineCache;
+}
+
+namespace VideoCommon::Shader {
+
+class AsyncShaders {
+public:
+    enum class Backend {
+        OpenGL,
+        GLASM,
+        Vulkan,
+    };
+
+    struct ResultPrograms {
+        OpenGL::OGLProgram opengl;
+        OpenGL::OGLAssemblyProgram glasm;
+    };
+
+    struct Result {
+        u64 uid;
+        VAddr cpu_address;
+        Backend backend;
+        ResultPrograms program;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        Tegra::Engines::ShaderType shader_type;
+    };
+
+    explicit AsyncShaders(Core::Frontend::EmuWindow& emu_window);
+    ~AsyncShaders();
+
+    /// Start up shader worker threads
+    void AllocateWorkers();
+
+    /// Clear the shader queue and kill all worker threads
+    void FreeWorkers();
+
+    // Force end all threads
+    void KillWorkers();
+
+    /// Check to see if any shaders have actually been compiled
+    [[nodiscard]] bool HasCompletedWork() const;
+
+    /// Deduce if a shader can be build on another thread of MUST be built in sync. We cannot build
+    /// every shader async as some shaders are only built and executed once. We try to "guess" which
+    /// shader would be used only once
+    [[nodiscard]] bool IsShaderAsync(const Tegra::GPU& gpu) const;
+
+    /// Pulls completed compiled shaders
+    [[nodiscard]] std::vector<Result> GetCompletedWork();
+
+    void QueueOpenGLShader(const OpenGL::Device& device, Tegra::Engines::ShaderType shader_type,
+                           u64 uid, std::vector<u64> code, std::vector<u64> code_b, u32 main_offset,
+                           CompilerSettings compiler_settings, const Registry& registry,
+                           VAddr cpu_addr);
+
+    void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
+                           Vulkan::VKScheduler& scheduler,
+                           Vulkan::VKDescriptorPool& descriptor_pool,
+                           Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
+                           Vulkan::VKRenderPassCache& renderpass_cache,
+                           std::vector<VkDescriptorSetLayoutBinding> bindings,
+                           Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
+
+private:
+    void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
+
+    /// Check our worker queue to see if we have any work queued already
+    [[nodiscard]] bool HasWorkQueued() const;
+
+    struct WorkerParams {
+        Backend backend;
+        // For OGL
+        const OpenGL::Device* device;
+        Tegra::Engines::ShaderType shader_type;
+        u64 uid;
+        std::vector<u64> code;
+        std::vector<u64> code_b;
+        u32 main_offset;
+        CompilerSettings compiler_settings;
+        std::optional<Registry> registry;
+        VAddr cpu_address;
+
+        // For Vulkan
+        Vulkan::VKPipelineCache* pp_cache;
+        const Vulkan::VKDevice* vk_device;
+        Vulkan::VKScheduler* scheduler;
+        Vulkan::VKDescriptorPool* descriptor_pool;
+        Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
+        Vulkan::VKRenderPassCache* renderpass_cache;
+        std::vector<VkDescriptorSetLayoutBinding> bindings;
+        Vulkan::SPIRVProgram program;
+        Vulkan::GraphicsPipelineCacheKey key;
+    };
+
+    std::condition_variable cv;
+    mutable std::mutex queue_mutex;
+    mutable std::shared_mutex completed_mutex;
+    std::atomic<bool> is_thread_exiting{};
+    std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
+    std::vector<std::thread> worker_threads;
+    std::queue<WorkerParams> pending_queue;
+    std::vector<Result> finished_work;
+    Core::Frontend::EmuWindow& emu_window;
+};
+
+} // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index 8d86020f6..336397cdb 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -187,24 +187,26 @@ std::optional<std::pair<BufferInfo, u64>> TrackLDC(const CFGRebuildState& state,
 
 std::optional<u64> TrackSHLRegister(const CFGRebuildState& state, u32& pos,
                                     u64 ldc_tracked_register) {
-    return TrackInstruction<u64>(state, pos,
-                                 [ldc_tracked_register](auto instr, const auto& opcode) {
-                                     return opcode.GetId() == OpCode::Id::SHL_IMM &&
-                                            instr.gpr0.Value() == ldc_tracked_register;
-                                 },
-                                 [](auto instr, const auto&) { return instr.gpr8.Value(); });
+    return TrackInstruction<u64>(
+        state, pos,
+        [ldc_tracked_register](auto instr, const auto& opcode) {
+            return opcode.GetId() == OpCode::Id::SHL_IMM &&
+                   instr.gpr0.Value() == ldc_tracked_register;
+        },
+        [](auto instr, const auto&) { return instr.gpr8.Value(); });
 }
 
 std::optional<u32> TrackIMNMXValue(const CFGRebuildState& state, u32& pos,
                                    u64 shl_tracked_register) {
-    return TrackInstruction<u32>(state, pos,
-                                 [shl_tracked_register](auto instr, const auto& opcode) {
-                                     return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
-                                            instr.gpr0.Value() == shl_tracked_register;
-                                 },
-                                 [](auto instr, const auto&) {
-                                     return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
-                                 });
+    return TrackInstruction<u32>(
+        state, pos,
+        [shl_tracked_register](auto instr, const auto& opcode) {
+            return opcode.GetId() == OpCode::Id::IMNMX_IMM &&
+                   instr.gpr0.Value() == shl_tracked_register;
+        },
+        [](auto instr, const auto&) {
+            return static_cast<u32>(instr.alu.GetSignedImm20_20() + 1);
+        });
 }
 
 std::optional<BranchIndirectInfo> TrackBranchIndirectInfo(const CFGRebuildState& state, u32 pos) {
diff --git a/src/video_core/shader/decode/arithmetic_integer.cpp b/src/video_core/shader/decode/arithmetic_integer.cpp
index a041519b7..73155966f 100644
--- a/src/video_core/shader/decode/arithmetic_integer.cpp
+++ b/src/video_core/shader/decode/arithmetic_integer.cpp
@@ -98,12 +98,12 @@ u32 ShaderIR::DecodeArithmeticInteger(NodeBlock& bb, u32 pc) {
         op_b = GetOperandAbsNegInteger(op_b, false, instr.iadd3.neg_b, true);
         op_c = GetOperandAbsNegInteger(op_c, false, instr.iadd3.neg_c, true);
 
-        const Node value = [&]() {
-            const Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
+        const Node value = [&] {
+            Node add_ab = Operation(OperationCode::IAdd, NO_PRECISE, op_a, op_b);
             if (opcode->get().GetId() != OpCode::Id::IADD3_R) {
                 return Operation(OperationCode::IAdd, NO_PRECISE, add_ab, op_c);
             }
-            const Node shifted = [&]() {
+            const Node shifted = [&] {
                 switch (instr.iadd3.mode) {
                 case Tegra::Shader::IAdd3Mode::RightShift:
                     // TODO(tech4me): According to
diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp
index 07778dc3e..e75ca4fdb 100644
--- a/src/video_core/shader/decode/image.cpp
+++ b/src/video_core/shader/decode/image.cpp
@@ -31,11 +31,11 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
                                std::size_t component) {
     const TextureFormat format{descriptor.format};
     switch (format) {
-    case TextureFormat::R16_G16_B16_A16:
-    case TextureFormat::R32_G32_B32_A32:
-    case TextureFormat::R32_G32_B32:
-    case TextureFormat::R32_G32:
-    case TextureFormat::R16_G16:
+    case TextureFormat::R16G16B16A16:
+    case TextureFormat::R32G32B32A32:
+    case TextureFormat::R32G32B32:
+    case TextureFormat::R32G32:
+    case TextureFormat::R16G16:
     case TextureFormat::R32:
     case TextureFormat::R16:
     case TextureFormat::R8:
@@ -97,7 +97,7 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
         break;
     case TextureFormat::B5G6R5:
     case TextureFormat::B6G5R5:
-    case TextureFormat::BF10GF11RF11:
+    case TextureFormat::B10G11R11:
         if (component == 0) {
             return descriptor.b_type;
         }
@@ -108,9 +108,9 @@ ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor,
             return descriptor.r_type;
         }
         break;
-    case TextureFormat::G8R24:
-    case TextureFormat::G24R8:
-    case TextureFormat::G8R8:
+    case TextureFormat::R24G8:
+    case TextureFormat::R8G24:
+    case TextureFormat::R8G8:
     case TextureFormat::G4R4:
         if (component == 0) {
             return descriptor.g_type;
@@ -137,15 +137,15 @@ bool IsComponentEnabled(std::size_t component_mask, std::size_t component) {
 
 u32 GetComponentSize(TextureFormat format, std::size_t component) {
     switch (format) {
-    case TextureFormat::R32_G32_B32_A32:
+    case TextureFormat::R32G32B32A32:
         return 32;
-    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R16G16B16A16:
         return 16;
-    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32G32B32:
         return component <= 2 ? 32 : 0;
-    case TextureFormat::R32_G32:
+    case TextureFormat::R32G32:
         return component <= 1 ? 32 : 0;
-    case TextureFormat::R16_G16:
+    case TextureFormat::R16G16:
         return component <= 1 ? 16 : 0;
     case TextureFormat::R32:
         return component == 0 ? 32 : 0;
@@ -192,7 +192,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 6;
         }
         return 0;
-    case TextureFormat::BF10GF11RF11:
+    case TextureFormat::B10G11R11:
         if (component == 1 || component == 2) {
             return 11;
         }
@@ -200,7 +200,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 10;
         }
         return 0;
-    case TextureFormat::G8R24:
+    case TextureFormat::R24G8:
         if (component == 0) {
             return 8;
         }
@@ -208,7 +208,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 24;
         }
         return 0;
-    case TextureFormat::G24R8:
+    case TextureFormat::R8G24:
         if (component == 0) {
             return 8;
         }
@@ -216,7 +216,7 @@ u32 GetComponentSize(TextureFormat format, std::size_t component) {
             return 24;
         }
         return 0;
-    case TextureFormat::G8R8:
+    case TextureFormat::R8G8:
         return (component == 0 || component == 1) ? 8 : 0;
     case TextureFormat::G4R4:
         return (component == 0 || component == 1) ? 4 : 0;
@@ -231,25 +231,25 @@ std::size_t GetImageComponentMask(TextureFormat format) {
     constexpr u8 B = 0b0100;
     constexpr u8 A = 0b1000;
     switch (format) {
-    case TextureFormat::R32_G32_B32_A32:
-    case TextureFormat::R16_G16_B16_A16:
+    case TextureFormat::R32G32B32A32:
+    case TextureFormat::R16G16B16A16:
     case TextureFormat::A8R8G8B8:
     case TextureFormat::A2B10G10R10:
     case TextureFormat::A4B4G4R4:
     case TextureFormat::A5B5G5R1:
     case TextureFormat::A1B5G5R5:
         return std::size_t{R | G | B | A};
-    case TextureFormat::R32_G32_B32:
+    case TextureFormat::R32G32B32:
     case TextureFormat::R32_B24G8:
     case TextureFormat::B5G6R5:
     case TextureFormat::B6G5R5:
-    case TextureFormat::BF10GF11RF11:
+    case TextureFormat::B10G11R11:
         return std::size_t{R | G | B};
-    case TextureFormat::R32_G32:
-    case TextureFormat::R16_G16:
-    case TextureFormat::G8R24:
-    case TextureFormat::G24R8:
-    case TextureFormat::G8R8:
+    case TextureFormat::R32G32:
+    case TextureFormat::R16G16:
+    case TextureFormat::R24G8:
+    case TextureFormat::R8G24:
+    case TextureFormat::R8G8:
     case TextureFormat::G4R4:
         return std::size_t{R | G};
     case TextureFormat::R32:
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index 63adbc4a3..e2bba88dd 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -386,7 +386,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         break;
     }
     case OpCode::Id::RED: {
-        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
+        UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32, "type={}",
+                             static_cast<int>(instr.red.type.Value()));
         const auto [real_address, base_address, descriptor] =
             TrackGlobalMemory(bb, instr, true, true);
         if (!real_address || !base_address) {
@@ -471,9 +472,9 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
 
     const auto [base_address, index, offset] =
         TrackCbuf(addr_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT_OR_EXECUTE_MSG(base_address != nullptr,
-                          { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
-                          "Global memory tracking failed");
+    ASSERT_OR_EXECUTE_MSG(
+        base_address != nullptr, { return std::make_tuple(nullptr, nullptr, GlobalMemoryBase{}); },
+        "Global memory tracking failed");
 
     bb.push_back(Comment(fmt::format("Base address is c[0x{:x}][0x{:x}]", index, offset)));
 
diff --git a/src/video_core/shader/decode/other.cpp b/src/video_core/shader/decode/other.cpp
index c0a8f233f..29a7cfbfe 100644
--- a/src/video_core/shader/decode/other.cpp
+++ b/src/video_core/shader/decode/other.cpp
@@ -75,8 +75,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) {
         const Node value = [this, instr] {
             switch (instr.sys20) {
             case SystemVariable::LaneId:
-                LOG_WARNING(HW_GPU, "S2R instruction with LaneId is incomplete");
-                return Immediate(0U);
+                return Operation(OperationCode::ThreadId);
             case SystemVariable::InvocationId:
                 return Operation(OperationCode::InvocationId);
             case SystemVariable::Ydirection:
diff --git a/src/video_core/shader/decode/video.cpp b/src/video_core/shader/decode/video.cpp
index 64ba60ea2..1c0957277 100644
--- a/src/video_core/shader/decode/video.cpp
+++ b/src/video_core/shader/decode/video.cpp
@@ -91,29 +91,28 @@ u32 ShaderIR::DecodeVideo(NodeBlock& bb, u32 pc) {
     return pc;
 }
 
-Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed,
-                               Tegra::Shader::VideoType type, u64 byte_height) {
+Node ShaderIR::GetVideoOperand(Node op, bool is_chunk, bool is_signed, VideoType type,
+                               u64 byte_height) {
     if (!is_chunk) {
         return BitfieldExtract(op, static_cast<u32>(byte_height * 8), 8);
     }
-    const Node zero = Immediate(0);
 
     switch (type) {
-    case Tegra::Shader::VideoType::Size16_Low:
+    case VideoType::Size16_Low:
         return BitfieldExtract(op, 0, 16);
-    case Tegra::Shader::VideoType::Size16_High:
+    case VideoType::Size16_High:
         return BitfieldExtract(op, 16, 16);
-    case Tegra::Shader::VideoType::Size32:
+    case VideoType::Size32:
         // TODO(Rodrigo): From my hardware tests it becomes a bit "mad" when this type is used
         // (1 * 1 + 0 == 0x5b800000). Until a better explanation is found: abort.
         UNIMPLEMENTED();
-        return zero;
-    case Tegra::Shader::VideoType::Invalid:
+        return Immediate(0);
+    case VideoType::Invalid:
         UNREACHABLE_MSG("Invalid instruction encoding");
-        return zero;
+        return Immediate(0);
     default:
         UNREACHABLE();
-        return zero;
+        return Immediate(0);
     }
 }
 
diff --git a/src/video_core/shader/decode/xmad.cpp b/src/video_core/shader/decode/xmad.cpp
index c83dc6615..233b8fa42 100644
--- a/src/video_core/shader/decode/xmad.cpp
+++ b/src/video_core/shader/decode/xmad.cpp
@@ -81,20 +81,21 @@ u32 ShaderIR::DecodeXmad(NodeBlock& bb, u32 pc) {
     SetTemporary(bb, 0, product);
     product = GetTemporary(0);
 
-    const Node original_c = op_c;
+    Node original_c = op_c;
     const Tegra::Shader::XmadMode set_mode = mode; // Workaround to clang compile error
-    op_c = [&]() {
+    op_c = [&] {
         switch (set_mode) {
         case Tegra::Shader::XmadMode::None:
             return original_c;
         case Tegra::Shader::XmadMode::CLo:
-            return BitfieldExtract(original_c, 0, 16);
+            return BitfieldExtract(std::move(original_c), 0, 16);
         case Tegra::Shader::XmadMode::CHi:
-            return BitfieldExtract(original_c, 16, 16);
+            return BitfieldExtract(std::move(original_c), 16, 16);
         case Tegra::Shader::XmadMode::CBcc: {
-            const Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
-                                                   original_b, Immediate(16));
-            return SignedOperation(OperationCode::IAdd, is_signed_c, original_c, shifted_b);
+            Node shifted_b = SignedOperation(OperationCode::ILogicalShiftLeft, is_signed_b,
+                                             original_b, Immediate(16));
+            return SignedOperation(OperationCode::IAdd, is_signed_c, std::move(original_c),
+                                   std::move(shifted_b));
         }
         case Tegra::Shader::XmadMode::CSfu: {
             const Node comp_a =
diff --git a/src/video_core/shader/memory_util.cpp b/src/video_core/shader/memory_util.cpp
index 5071c83ca..e18ccba8e 100644
--- a/src/video_core/shader/memory_util.cpp
+++ b/src/video_core/shader/memory_util.cpp
@@ -16,11 +16,10 @@
 
 namespace VideoCommon::Shader {
 
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
                           Tegra::Engines::Maxwell3D::Regs::ShaderProgram program) {
-    const auto& gpu{system.GPU().Maxwell3D()};
-    const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
-    return gpu.regs.code_address.CodeAddress() + shader_config.offset;
+    const auto& shader_config{maxwell3d.regs.shader_config[static_cast<std::size_t>(program)]};
+    return maxwell3d.regs.code_address.CodeAddress() + shader_config.offset;
 }
 
 bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
diff --git a/src/video_core/shader/memory_util.h b/src/video_core/shader/memory_util.h
index be90d24fd..4624d38e6 100644
--- a/src/video_core/shader/memory_util.h
+++ b/src/video_core/shader/memory_util.h
@@ -11,10 +11,6 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_type.h"
 
-namespace Core {
-class System;
-}
-
 namespace Tegra {
 class MemoryManager;
 }
@@ -27,7 +23,7 @@ constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;
 
 /// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Core::System& system,
+GPUVAddr GetShaderAddress(Tegra::Engines::Maxwell3D& maxwell3d,
                           Tegra::Engines::Maxwell3D::Regs::ShaderProgram program);
 
 /// Gets if the current instruction offset is a scheduler instruction
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index e322c3402..29d794b34 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -112,9 +112,9 @@ Node ShaderIR::GetOutputAttribute(Attribute::Index index, u64 element, Node buff
 }
 
 Node ShaderIR::GetInternalFlag(InternalFlag flag, bool negated) const {
-    const Node node = MakeNode<InternalFlagNode>(flag);
+    Node node = MakeNode<InternalFlagNode>(flag);
     if (negated) {
-        return Operation(OperationCode::LogicalNegate, node);
+        return Operation(OperationCode::LogicalNegate, std::move(node));
     }
     return node;
 }