11 files changed, 356 insertions, 159 deletions
diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp
index 1e2ff46b0..3f0939ec9 100644
--- a/src/video_core/gpu_thread.cpp
+++ b/src/video_core/gpu_thread.cpp
@@ -75,7 +75,7 @@ void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPus
 
 void ThreadManager::SubmitList(Tegra::CommandList&& entries) {
     const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))};
-    const s64 synchronization_ticks{Core::Timing::usToCycles(9000)};
+    const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})};
     system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence);
 }
 
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 1d1581f49..65a88b06c 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -2,11 +2,14 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <array>
 #include <cstddef>
 #include <glad/glad.h>
 
 #include "common/logging/log.h"
+#include "common/scope_exit.h"
 #include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
 
 namespace OpenGL {
 
@@ -24,6 +27,7 @@ Device::Device() {
     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
     max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
     has_variable_aoffi = TestVariableAoffi();
+    has_component_indexing_bug = TestComponentIndexingBug();
 }
 
 Device::Device(std::nullptr_t) {
@@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) {
     max_vertex_attributes = 16;
     max_varyings = 15;
     has_variable_aoffi = true;
+    has_component_indexing_bug = false;
 }
 
 bool Device::TestVariableAoffi() {
@@ -52,4 +57,53 @@ void main() {
     return supported;
 }
 
+bool Device::TestComponentIndexingBug() {
+    constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
+    const GLchar* COMPONENT_TEST = R"(#version 430 core
+layout (std430, binding = 0) buffer OutputBuffer {
+    uint output_value;
+};
+layout (std140, binding = 0) uniform InputBuffer {
+    uvec4 input_value[4096];
+};
+layout (location = 0) uniform uint idx;
+void main() {
+    output_value = input_value[idx >> 2][idx & 3];
+})";
+    const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)};
+    SCOPE_EXIT({ glDeleteProgram(shader); });
+    glUseProgram(shader);
+
+    OGLVertexArray vao;
+    vao.Create();
+    glBindVertexArray(vao.handle);
+
+    constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432};
+    OGLBuffer ubo;
+    ubo.Create();
+    glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW);
+    glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle);
+
+    OGLBuffer ssbo;
+    ssbo.Create();
+    glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT);
+
+    for (GLuint index = 4; index < 8; ++index) {
+        glInvalidateBufferData(ssbo.handle);
+        glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle);
+
+        glProgramUniform1ui(shader, 0, index);
+        glDrawArrays(GL_POINTS, 0, 1);
+
+        GLuint result;
+        glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
+        if (result != values.at(index)) {
+            LOG_INFO(Render_OpenGL, log_message, true);
+            return true;
+        }
+    }
+    LOG_INFO(Render_OpenGL, log_message, false);
+    return false;
+}
+
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de8490682..8c8c93760 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -30,13 +30,19 @@ public:
         return has_variable_aoffi;
     }
 
+    bool HasComponentIndexingBug() const {
+        return has_component_indexing_bug;
+    }
+
 private:
     static bool TestVariableAoffi();
+    static bool TestComponentIndexingBug();
 
     std::size_t uniform_buffer_alignment{};
     u32 max_vertex_attributes{};
     u32 max_varyings{};
     bool has_variable_aoffi{};
+    bool has_component_indexing_bug{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index d66252224..ac8a9e6b7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -35,8 +35,8 @@ struct UnspecializedShader {
 namespace {
 
 /// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
-    const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+    const auto& gpu{system.GPU().Maxwell3D()};
     const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
     return gpu.regs.code_address.CodeAddress() + shader_config.offset;
 }
@@ -350,7 +350,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
 
 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
                                      Core::Frontend::EmuWindow& emu_window, const Device& device)
-    : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {}
+    : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
+      disk_cache{system} {}
 
 void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
                                       const VideoCore::DiskResourceLoadCallback& callback) {
@@ -546,42 +547,45 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
 }
 
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
-    if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
-        return last_shaders[static_cast<u32>(program)];
+    if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
+        return last_shaders[static_cast<std::size_t>(program)];
     }
 
-    auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
-    const GPUVAddr program_addr{GetShaderAddress(program)};
+    auto& memory_manager{system.GPU().MemoryManager()};
+    const GPUVAddr program_addr{GetShaderAddress(system, program)};
 
     // Look up shader in the cache based on address
-    const auto& host_ptr{memory_manager.GetPointer(program_addr)};
+    const auto host_ptr{memory_manager.GetPointer(program_addr)};
     Shader shader{TryGet(host_ptr)};
+    if (shader) {
+        return last_shaders[static_cast<std::size_t>(program)] = shader;
+    }
 
-    if (!shader) {
-        // No shader found - create a new one
-        ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
-        ProgramCode program_code_b;
-        if (program == Maxwell::ShaderProgram::VertexA) {
-            const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)};
-            program_code_b = GetShaderCode(memory_manager, program_addr_b,
-                                           memory_manager.GetPointer(program_addr_b));
-        }
-        const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
-        const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
-        const auto found = precompiled_shaders.find(unique_identifier);
-        if (found != precompiled_shaders.end()) {
-            shader =
-                std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
-                                               precompiled_programs, found->second, host_ptr);
-        } else {
-            shader = std::make_shared<CachedShader>(
-                device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
-                std::move(program_code), std::move(program_code_b), host_ptr);
-        }
-        Register(shader);
+    // No shader found - create a new one
+    ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
+    ProgramCode program_code_b;
+    if (program == Maxwell::ShaderProgram::VertexA) {
+        const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
+        program_code_b = GetShaderCode(memory_manager, program_addr_b,
+                                       memory_manager.GetPointer(program_addr_b));
+    }
+
+    const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
+    const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
+    const auto found = precompiled_shaders.find(unique_identifier);
+    if (found != precompiled_shaders.end()) {
+        // Create a shader from the cache
+        shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
+                                                precompiled_programs, found->second, host_ptr);
+    } else {
+        // Create a shader from guest memory
+        shader = std::make_shared<CachedShader>(
+            device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+            std::move(program_code), std::move(program_code_b), host_ptr);
     }
+    Register(shader);
 
-    return last_shaders[static_cast<u32>(program)] = shader;
+    return last_shaders[static_cast<std::size_t>(program)] = shader;
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 64e5a5594..09bd0761d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -137,6 +137,7 @@ private:
     CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
                                              const std::set<GLenum>& supported_formats);
 
+    Core::System& system;
     Core::Frontend::EmuWindow& emu_window;
     const Device& device;
     ShaderDiskCacheOpenGL disk_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e9f8d40db..29de5c9db 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -45,7 +45,6 @@ struct TextureAoffi {};
 using TextureArgument = std::pair<Type, Node>;
 using TextureIR = std::variant<TextureAoffi, TextureArgument>;
 
-enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
 constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
     static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
 
@@ -247,6 +246,12 @@ private:
         code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
         code.AddNewLine();
 
+        code.AddLine("in gl_PerVertex {{");
+        ++code.scope;
+        code.AddLine("vec4 gl_Position;");
+        --code.scope;
+        code.AddLine("}} gl_in[];");
+
         DeclareVertexRedeclarations();
     }
 
@@ -349,7 +354,7 @@ private:
     }
 
     void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
-        const u32 generic_index{GetGenericAttributeIndex(index)};
+        const u32 location{GetGenericAttributeIndex(index)};
 
         std::string name{GetInputAttribute(index)};
         if (stage == ShaderStage::Geometry) {
@@ -358,19 +363,13 @@ private:
 
         std::string suffix;
         if (stage == ShaderStage::Fragment) {
-            const auto input_mode{header.ps.GetAttributeUse(generic_index)};
+            const auto input_mode{header.ps.GetAttributeUse(location)};
             if (skip_unused && input_mode == AttributeUse::Unused) {
                 return;
             }
             suffix = GetInputFlags(input_mode);
         }
 
-        u32 location = generic_index;
-        if (stage != ShaderStage::Vertex) {
-            // If inputs are varyings, add an offset
-            location += GENERIC_VARYING_START_LOCATION;
-        }
-
         code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name);
     }
 
@@ -395,7 +394,7 @@ private:
     }
 
     void DeclareOutputAttribute(Attribute::Index index) {
-        const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION};
+        const u32 location{GetGenericAttributeIndex(index)};
         code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
     }
 
@@ -577,9 +576,26 @@ private:
             if (std::holds_alternative<OperationNode>(*offset)) {
                 // Indirect access
                 const std::string final_offset = code.GenerateTemporary();
-                code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset));
-                return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
-                                   final_offset, final_offset);
+                code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
+
+                if (!device.HasComponentIndexingBug()) {
+                    return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+                                       final_offset, final_offset);
+                }
+
+                // AMD's proprietary GLSL compiler emits ill code for variable component access.
+                // To bypass this driver bug generate 4 ifs, one per each component.
+                const std::string pack = code.GenerateTemporary();
+                code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+                             final_offset);
+
+                const std::string result = code.GenerateTemporary();
+                code.AddLine("float {};", result);
+                for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+                    code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
+                                 pack, GetSwizzle(swizzle));
+                }
+                return result;
             }
 
             UNREACHABLE_MSG("Unmanaged offset node type");
@@ -633,10 +649,14 @@ private:
 
         switch (attribute) {
         case Attribute::Index::Position:
-            if (stage != ShaderStage::Fragment) {
-                return GeometryPass("position") + GetSwizzle(element);
-            } else {
+            switch (stage) {
+            case ShaderStage::Geometry:
+                return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
+                                   GetSwizzle(element));
+            case ShaderStage::Fragment:
                 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
+            default:
+                UNREACHABLE();
             }
         case Attribute::Index::PointCoord:
             switch (element) {
@@ -921,7 +941,7 @@ private:
             target = [&]() -> std::string {
                 switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
                 case Attribute::Index::Position:
-                    return "position"s + GetSwizzle(abuf->GetElement());
+                    return "gl_Position"s + GetSwizzle(abuf->GetElement());
                 case Attribute::Index::PointSize:
                     return "gl_PointSize";
                 case Attribute::Index::ClipDistances0123:
@@ -1506,9 +1526,7 @@ private:
 
         // If a geometry shader is attached, it will always flip (it's the last stage before
         // fragment). For more info about flipping, refer to gl_shader_gen.cpp.
-        code.AddLine("position.xy *= viewport_flip.xy;");
-        code.AddLine("gl_Position = position;");
-        code.AddLine("position.w = 1.0;");
+        code.AddLine("gl_Position.xy *= viewport_flip.xy;");
         code.AddLine("EmitVertex();");
         return {};
     }
@@ -1746,8 +1764,7 @@ private:
     }
 
     u32 GetNumPhysicalVaryings() const {
-        return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION,
-                             Maxwell::NumVaryings);
+        return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
     }
 
     const Device& device;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index d2bb705a9..c845b29aa 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -23,8 +23,6 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu
     out += GetCommonDeclarations();
 
     out += R"(
-layout (location = 0) out vec4 position;
-
 layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
     vec4 viewport_flip;
     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
@@ -48,7 +46,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
 
     out += R"(
 void main() {
-    position = vec4(0.0, 0.0, 0.0, 0.0);
     execute_vertex();
 )";
 
@@ -59,19 +56,12 @@ void main() {
     out += R"(
 
     // Set Position Y direction
-    position.y *= utof(config_pack[2]);
+    gl_Position.y *= utof(config_pack[2]);
     // Check if the flip stage is VertexB
     // Config pack's second value is flip_stage
     if (config_pack[1] == 1) {
         // Viewport can be flipped, which is unsupported by glViewport
-        position.xy *= viewport_flip.xy;
-    }
-    gl_Position = position;
-
-    // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
-    // For now, this is here to bring order in lieu of proper emulation
-    if (config_pack[1] == 1) {
-        position.w = 1.0;
+        gl_Position.xy *= viewport_flip.xy;
     }
 })";
 
@@ -85,9 +75,6 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
     out += GetCommonDeclarations();
 
     out += R"(
-layout (location = 0) in vec4 gs_position[];
-layout (location = 0) out vec4 position;
-
 layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
     vec4 viewport_flip;
     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
@@ -124,8 +111,6 @@ layout (location = 5) out vec4 FragColor5;
 layout (location = 6) out vec4 FragColor6;
 layout (location = 7) out vec4 FragColor7;
 
-layout (location = 0) in noperspective vec4 position;
-
 layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
     vec4 viewport_flip;
     uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp
index 00242ecbe..3b966ddc3 100644
--- a/src/video_core/renderer_vulkan/vk_device.cpp
+++ b/src/video_core/renderer_vulkan/vk_device.cpp
@@ -18,6 +18,7 @@ constexpr std::array<vk::Format, 3> Depth24UnormS8Uint = {
     vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}};
 constexpr std::array<vk::Format, 3> Depth16UnormS8Uint = {
     vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}};
+constexpr std::array<vk::Format, 2> Astc = {vk::Format::eA8B8G8R8UnormPack32, {}};
 
 } // namespace Alternatives
 
@@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy
     : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} {
     SetupFamilies(dldi, surface);
     SetupProperties(dldi);
+    SetupFeatures(dldi);
 }
 
 VKDevice::~VKDevice() = default;
 
 bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) {
-    const auto queue_cis = GetDeviceQueueCreateInfos();
-    vk::PhysicalDeviceFeatures device_features{};
+    vk::PhysicalDeviceFeatures device_features;
+    device_features.vertexPipelineStoresAndAtomics = true;
+    device_features.independentBlend = true;
+    device_features.textureCompressionASTC_LDR = is_optimal_astc_supported;
 
-    const std::vector<const char*> extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME};
+    const auto queue_cis = GetDeviceQueueCreateInfos();
+    const std::vector<const char*> extensions = LoadExtensions(dldi);
     const vk::DeviceCreateInfo device_ci({}, static_cast<u32>(queue_cis.size()), queue_cis.data(),
                                          0, nullptr, static_cast<u32>(extensions.size()),
                                          extensions.data(), &device_features);
@@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
         LOG_CRITICAL(Render_Vulkan,
                      "Format={} with usage={} and type={} has no defined alternatives and host "
                      "hardware does not support it",
-                     static_cast<u32>(wanted_format), static_cast<u32>(wanted_usage),
+                     vk::to_string(wanted_format), vk::to_string(wanted_usage),
                      static_cast<u32>(format_type));
         UNREACHABLE();
         return wanted_format;
@@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format,
     return wanted_format;
 }
 
+bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
+                                      const vk::DispatchLoaderDynamic& dldi) const {
+    if (!features.textureCompressionASTC_LDR) {
+        return false;
+    }
+    const auto format_feature_usage{
+        vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc |
+        vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc |
+        vk::FormatFeatureFlagBits::eTransferDst};
+    constexpr std::array<vk::Format, 9> astc_formats = {
+        vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock,
+        vk::Format::eAstc8x8SrgbBlock,  vk::Format::eAstc8x6SrgbBlock,
+        vk::Format::eAstc5x4SrgbBlock,  vk::Format::eAstc5x5UnormBlock,
+        vk::Format::eAstc5x5SrgbBlock,  vk::Format::eAstc10x8UnormBlock,
+        vk::Format::eAstc10x8SrgbBlock};
+    for (const auto format : astc_formats) {
+        const auto format_properties{physical.getFormatProperties(format, dldi)};
+        if (!(format_properties.optimalTilingFeatures & format_feature_usage)) {
+            return false;
+        }
+    }
+    return true;
+}
+
 bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
                                  FormatType format_type) const {
     const auto it = format_properties.find(wanted_format);
@@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag
 
 bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
                           vk::SurfaceKHR surface) {
-    const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME;
-
     bool has_swapchain{};
     for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
-        has_swapchain |= prop.extensionName == swapchain_extension;
+        has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
     }
     if (!has_swapchain) {
         // The device doesn't support creating swapchains.
@@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
     }
 
     // TODO(Rodrigo): Check if the device matches all requeriments.
-    const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
-    if (props.limits.maxUniformBufferRange < 65536) {
+    const auto properties{physical.getProperties(dldi)};
+    const auto limits{properties.limits};
+    if (limits.maxUniformBufferRange < 65536) {
+        return false;
+    }
+
+    const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)};
+    if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) {
         return false;
     }
 
@@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
     return true;
 }
 
+std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) {
+    std::vector<const char*> extensions;
+    extensions.reserve(2);
+    extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
+
+    const auto Test = [&](const vk::ExtensionProperties& extension,
+                          std::optional<std::reference_wrapper<bool>> status, const char* name,
+                          u32 revision) {
+        if (extension.extensionName != std::string(name)) {
+            return;
+        }
+        extensions.push_back(name);
+        if (status) {
+            status->get() = true;
+        }
+    };
+
+    for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) {
+        Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1);
+    }
+
+    return extensions;
+}
+
 void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) {
     std::optional<u32> graphics_family_, present_family_;
 
@@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) {
     const vk::PhysicalDeviceProperties props = physical.getProperties(dldi);
     device_type = props.deviceType;
     uniform_buffer_alignment = static_cast<u64>(props.limits.minUniformBufferOffsetAlignment);
+    max_storage_buffer_range = static_cast<u64>(props.limits.maxStorageBufferRange);
+}
+
+void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) {
+    const auto supported_features{physical.getFeatures(dldi)};
+    is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi);
 }
 
 std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() const {
-    static const float QUEUE_PRIORITY = 1.f;
+    static const float QUEUE_PRIORITY = 1.0f;
 
     std::set<u32> unique_queue_families = {graphics_family, present_family};
     std::vector<vk::DeviceQueueCreateInfo> queue_cis;
@@ -212,26 +275,43 @@ std::vector<vk::DeviceQueueCreateInfo> VKDevice::GetDeviceQueueCreateInfos() con
 
 std::map<vk::Format, vk::FormatProperties> VKDevice::GetFormatProperties(
     const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) {
+    static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32,
+                                        vk::Format::eB5G6R5UnormPack16,
+                                        vk::Format::eA2B10G10R10UnormPack32,
+                                        vk::Format::eR32G32B32A32Sfloat,
+                                        vk::Format::eR16G16Unorm,
+                                        vk::Format::eR16G16Snorm,
+                                        vk::Format::eR8G8B8A8Srgb,
+                                        vk::Format::eR8Unorm,
+                                        vk::Format::eB10G11R11UfloatPack32,
+                                        vk::Format::eR32Sfloat,
+                                        vk::Format::eR16Sfloat,
+                                        vk::Format::eR16G16B16A16Sfloat,
+                                        vk::Format::eD32Sfloat,
+                                        vk::Format::eD16Unorm,
+                                        vk::Format::eD16UnormS8Uint,
+                                        vk::Format::eD24UnormS8Uint,
+                                        vk::Format::eD32SfloatS8Uint,
+                                        vk::Format::eBc1RgbaUnormBlock,
+                                        vk::Format::eBc2UnormBlock,
+                                        vk::Format::eBc3UnormBlock,
+                                        vk::Format::eBc4UnormBlock,
+                                        vk::Format::eBc5UnormBlock,
+                                        vk::Format::eBc5SnormBlock,
+                                        vk::Format::eBc7UnormBlock,
+                                        vk::Format::eAstc4x4UnormBlock,
+                                        vk::Format::eAstc4x4SrgbBlock,
+                                        vk::Format::eAstc8x8SrgbBlock,
+                                        vk::Format::eAstc8x6SrgbBlock,
+                                        vk::Format::eAstc5x4SrgbBlock,
+                                        vk::Format::eAstc5x5UnormBlock,
+                                        vk::Format::eAstc5x5SrgbBlock,
+                                        vk::Format::eAstc10x8UnormBlock,
+                                        vk::Format::eAstc10x8SrgbBlock};
     std::map<vk::Format, vk::FormatProperties> format_properties;
-
-    const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) {
+    for (const auto format : formats) {
         format_properties.emplace(format, physical.getFormatProperties(format, dldi));
-    };
-    AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32);
-    AddFormatQuery(vk::Format::eB5G6R5UnormPack16);
-    AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32);
-    AddFormatQuery(vk::Format::eR8G8B8A8Srgb);
-    AddFormatQuery(vk::Format::eR8Unorm);
-    AddFormatQuery(vk::Format::eD32Sfloat);
-    AddFormatQuery(vk::Format::eD16Unorm);
-    AddFormatQuery(vk::Format::eD16UnormS8Uint);
-    AddFormatQuery(vk::Format::eD24UnormS8Uint);
-    AddFormatQuery(vk::Format::eD32SfloatS8Uint);
-    AddFormatQuery(vk::Format::eBc1RgbaUnormBlock);
-    AddFormatQuery(vk::Format::eBc2UnormBlock);
-    AddFormatQuery(vk::Format::eBc3UnormBlock);
-    AddFormatQuery(vk::Format::eBc4UnormBlock);
-
+    }
     return format_properties;
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h
index e87c7a508..537825d8b 100644
--- a/src/video_core/renderer_vulkan/vk_device.h
+++ b/src/video_core/renderer_vulkan/vk_device.h
@@ -11,7 +11,7 @@
 
 namespace Vulkan {
 
-/// Format usage descriptor
+/// Format usage descriptor.
 enum class FormatType { Linear, Optimal, Buffer };
 
 /// Handles data specific to a physical device.
@@ -34,12 +34,12 @@ public:
     vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
                                   FormatType format_type) const;
 
-    /// Returns the dispatch loader with direct function pointers of the device
+    /// Returns the dispatch loader with direct function pointers of the device.
     const vk::DispatchLoaderDynamic& GetDispatchLoader() const {
         return dld;
     }
 
-    /// Returns the logical device
+    /// Returns the logical device.
     vk::Device GetLogical() const {
         return logical.get();
     }
@@ -69,30 +69,55 @@ public:
         return present_family;
     }
 
-    /// Returns if the device is integrated with the host CPU
+    /// Returns if the device is integrated with the host CPU.
     bool IsIntegrated() const {
         return device_type == vk::PhysicalDeviceType::eIntegratedGpu;
     }
 
-    /// Returns uniform buffer alignment requeriment
+    /// Returns uniform buffer alignment requeriment.
     u64 GetUniformBufferAlignment() const {
         return uniform_buffer_alignment;
     }
 
+    /// Returns the maximum range for storage buffers.
+    u64 GetMaxStorageBufferRange() const {
+        return max_storage_buffer_range;
+    }
+
+    /// Returns true if ASTC is natively supported.
+    bool IsOptimalAstcSupported() const {
+        return is_optimal_astc_supported;
+    }
+
+    /// Returns true if the device supports VK_EXT_scalar_block_layout.
+    bool IsExtScalarBlockLayoutSupported() const {
+        return ext_scalar_block_layout;
+    }
+
     /// Checks if the physical device is suitable.
     static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical,
                            vk::SurfaceKHR surface);
 
 private:
+    /// Loads extensions into a vector and stores available ones in this object.
+    std::vector<const char*> LoadExtensions(const vk::DispatchLoaderDynamic& dldi);
+
     /// Sets up queue families.
     void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface);
 
     /// Sets up device properties.
     void SetupProperties(const vk::DispatchLoaderDynamic& dldi);
 
+    /// Sets up device features.
+    void SetupFeatures(const vk::DispatchLoaderDynamic& dldi);
+
     /// Returns a list of queue initialization descriptors.
     std::vector<vk::DeviceQueueCreateInfo> GetDeviceQueueCreateInfos() const;
 
+    /// Returns true if ASTC textures are natively supported.
+    bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features,
+                                const vk::DispatchLoaderDynamic& dldi) const;
+
     /// Returns true if a format is supported.
     bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage,
                            FormatType format_type) const;
@@ -101,16 +126,19 @@ private:
     static std::map<vk::Format, vk::FormatProperties> GetFormatProperties(
         const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical);
 
-    const vk::PhysicalDevice physical;  ///< Physical device
-    vk::DispatchLoaderDynamic dld;      ///< Device function pointers
-    UniqueDevice logical;               ///< Logical device
-    vk::Queue graphics_queue;           ///< Main graphics queue
-    vk::Queue present_queue;            ///< Main present queue
-    u32 graphics_family{};              ///< Main graphics queue family index
-    u32 present_family{};               ///< Main present queue family index
-    vk::PhysicalDeviceType device_type; ///< Physical device type
-    u64 uniform_buffer_alignment{};     ///< Uniform buffer alignment requeriment
-    std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary
+    const vk::PhysicalDevice physical;  ///< Physical device.
+    vk::DispatchLoaderDynamic dld;      ///< Device function pointers.
+    UniqueDevice logical;               ///< Logical device.
+    vk::Queue graphics_queue;           ///< Main graphics queue.
+    vk::Queue present_queue;            ///< Main present queue.
+    u32 graphics_family{};              ///< Main graphics queue family index.
+    u32 present_family{};               ///< Main present queue family index.
+    vk::PhysicalDeviceType device_type; ///< Physical device type.
+    u64 uniform_buffer_alignment{};     ///< Uniform buffer alignment requeriment.
+    u64 max_storage_buffer_range{};     ///< Max storage buffer size.
+    bool is_optimal_astc_supported{};   ///< Support for native ASTC.
+    bool ext_scalar_block_layout{};     ///< Support for VK_EXT_scalar_block_layout.
+    std::map<vk::Format, vk::FormatProperties> format_properties; ///< Format properties dictionary.
 };
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index a5b25aeff..a85fcae5a 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -17,6 +17,7 @@
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_header.h"
+#include "video_core/renderer_vulkan/vk_device.h"
 #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
 #include "video_core/shader/shader_ir.h"
 
@@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage;
 using Operation = const OperationNode&;
 
 // TODO(Rodrigo): Use rasterizer's value
-constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000;
+constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000;
+constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4;
 constexpr u32 STAGE_BINDING_STRIDE = 0x100;
 
 enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat };
@@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) {
 
 class SPIRVDecompiler : public Sirit::Module {
 public:
-    explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage)
-        : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} {
+    explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage)
+        : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} {
         AddCapability(spv::Capability::Shader);
         AddExtension("SPV_KHR_storage_buffer_storage_class");
         AddExtension("SPV_KHR_variable_pointers");
@@ -195,7 +197,9 @@ public:
             entries.samplers.emplace_back(sampler);
         }
         for (const auto& attribute : ir.GetInputAttributes()) {
-            entries.attributes.insert(GetGenericAttributeLocation(attribute));
+            if (IsGenericAttribute(attribute)) {
+                entries.attributes.insert(GetGenericAttributeLocation(attribute));
+            }
         }
         entries.clip_distances = ir.GetClipDistances();
         entries.shader_length = ir.GetLength();
@@ -210,7 +214,6 @@ private:
         std::array<OperationDecompilerFn, static_cast<std::size_t>(OperationCode::Amount)>;
 
     static constexpr auto INTERNAL_FLAGS_COUNT = static_cast<std::size_t>(InternalFlag::Amount);
-    static constexpr u32 CBUF_STRIDE = 16;
 
     void AllocateBindings() {
         const u32 binding_base = static_cast<u32>(stage) * STAGE_BINDING_STRIDE;
@@ -315,6 +318,7 @@ private:
         constexpr std::array<const char*, INTERNAL_FLAGS_COUNT> names = {"zero", "sign", "carry",
                                                                          "overflow"};
         for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) {
+            const auto flag_code = static_cast<InternalFlag>(flag);
             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
             internal_flags[flag] = AddGlobalVariable(Name(id, names[flag]));
         }
@@ -374,7 +378,9 @@ private:
         u32 binding = const_buffers_base_binding;
         for (const auto& entry : ir.GetConstantBuffers()) {
             const auto [index, size] = entry;
-            const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform);
+            const Id type =
+                device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo;
+            const Id id = OpVariable(type, spv::StorageClass::Uniform);
             AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index)));
 
             Decorate(id, spv::Decoration::Binding, binding++);
@@ -569,33 +575,35 @@ private:
             const Node offset = cbuf->GetOffset();
             const Id buffer_id = constant_buffers.at(cbuf->GetIndex());
 
-            Id buffer_index{};
-            Id buffer_element{};
-
-            if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
-                // Direct access
-                const u32 offset_imm = immediate->GetValue();
-                ASSERT(offset_imm % 4 == 0);
-                buffer_index = Constant(t_uint, offset_imm / 16);
-                buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
-
-            } else if (std::holds_alternative<OperationNode>(*offset)) {
-                // Indirect access
-                // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which
-                // emits sub-optimal code on GLSL from my testing).
-                const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
-                const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
-                const Id final_offset = Emit(
-                    OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
-                buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
-                buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
-
+            Id pointer{};
+            if (device.IsExtScalarBlockLayoutSupported()) {
+                const Id buffer_offset = Emit(OpShiftRightLogical(
+                    t_uint, BitcastTo<Type::Uint>(Visit(offset)), Constant(t_uint, 2u)));
+                pointer = Emit(
+                    OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset));
             } else {
-                UNREACHABLE_MSG("Unmanaged offset node type");
+                Id buffer_index{};
+                Id buffer_element{};
+                if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+                    // Direct access
+                    const u32 offset_imm = immediate->GetValue();
+                    ASSERT(offset_imm % 4 == 0);
+                    buffer_index = Constant(t_uint, offset_imm / 16);
+                    buffer_element = Constant(t_uint, (offset_imm / 4) % 4);
+                } else if (std::holds_alternative<OperationNode>(*offset)) {
+                    // Indirect access
+                    const Id offset_id = BitcastTo<Type::Uint>(Visit(offset));
+                    const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4)));
+                    const Id final_offset = Emit(OpUMod(
+                        t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1)));
+                    buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4)));
+                    buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4)));
+                } else {
+                    UNREACHABLE_MSG("Unmanaged offset node type");
+                }
+                pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
+                                             buffer_index, buffer_element));
             }
-
-            const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0),
-                                                  buffer_index, buffer_element));
             return Emit(OpLoad(t_float, pointer));
 
         } else if (const auto gmem = std::get_if<GmemNode>(node)) {
@@ -612,7 +620,9 @@ private:
             // It's invalid to call conditional on nested nodes, use an operation instead
             const Id true_label = OpLabel();
             const Id skip_label = OpLabel();
-            Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label));
+            const Id condition = Visit(conditional->GetCondition());
+            Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone));
+            Emit(OpBranchConditional(condition, true_label, skip_label));
             Emit(true_label);
 
             VisitBasicBlock(conditional->GetCode());
@@ -968,11 +978,11 @@ private:
         case ShaderStage::Vertex: {
             // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't
             // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it.
-            const Id position = AccessElement(t_float4, per_vertex, position_index);
-            Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2)));
+            const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u);
+            Id depth = Emit(OpLoad(t_float, z_pointer));
             depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f)));
             depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f)));
-            Emit(OpStore(AccessElement(t_out_float, position, 2), depth));
+            Emit(OpStore(z_pointer, depth));
             break;
         }
         case ShaderStage::Fragment: {
@@ -1311,6 +1321,7 @@ private:
         &SPIRVDecompiler::WorkGroupId<2>,
     };
 
+    const VKDevice& device;
     const ShaderIR& ir;
     const ShaderStage stage;
     const Tegra::Shader::Header header;
@@ -1349,12 +1360,18 @@ private:
     const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4");
 
     const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float);
-    const Id t_cbuf_array =
-        Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"),
-                 spv::Decoration::ArrayStride, CBUF_STRIDE);
-    const Id t_cbuf_struct = MemberDecorate(
-        Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
-    const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct);
+    const Id t_cbuf_std140 = Decorate(
+        Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"),
+        spv::Decoration::ArrayStride, 16u);
+    const Id t_cbuf_scalar = Decorate(
+        Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"),
+        spv::Decoration::ArrayStride, 4u);
+    const Id t_cbuf_std140_struct = MemberDecorate(
+        Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_cbuf_scalar_struct = MemberDecorate(
+        Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
+    const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct);
+    const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct);
 
     const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
     const Id t_gmem_array =
@@ -1403,8 +1420,9 @@ private:
     std::map<u32, Id> labels;
 };
 
-DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) {
-    auto decompiler = std::make_unique<SPIRVDecompiler>(ir, stage);
+DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+                           Maxwell::ShaderStage stage) {
+    auto decompiler = std::make_unique<SPIRVDecompiler>(device, ir, stage);
     decompiler->Decompile();
     return {std::move(decompiler), decompiler->GetShaderEntries()};
 }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 329d8fa38..f90541cc1 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -20,10 +20,13 @@ namespace VideoCommon::Shader {
 class ShaderIR;
 }
 
+namespace Vulkan {
+class VKDevice;
+}
+
 namespace Vulkan::VKShader {
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
 using SamplerEntry = VideoCommon::Shader::Sampler;
 
 constexpr u32 DESCRIPTOR_SET = 0;
@@ -75,6 +78,7 @@ struct ShaderEntries {
 
 using DecompilerResult = std::pair<std::unique_ptr<Sirit::Module>, ShaderEntries>;
 
-DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage);
+DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir,
+                           Maxwell::ShaderStage stage);
 
 } // namespace Vulkan::VKShader