37 files changed, 3330 insertions, 210 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 142852082..ccfed4f2e 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -153,6 +153,9 @@ if (ENABLE_VULKAN)
         renderer_vulkan/fixed_pipeline_state.h
         renderer_vulkan/maxwell_to_vk.cpp
         renderer_vulkan/maxwell_to_vk.h
+        renderer_vulkan/renderer_vulkan.h
+        renderer_vulkan/vk_blit_screen.cpp
+        renderer_vulkan/vk_blit_screen.h
         renderer_vulkan/vk_buffer_cache.cpp
         renderer_vulkan/vk_buffer_cache.h
         renderer_vulkan/vk_compute_pass.cpp
@@ -171,6 +174,7 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_memory_manager.h
         renderer_vulkan/vk_pipeline_cache.cpp
         renderer_vulkan/vk_pipeline_cache.h
+        renderer_vulkan/vk_rasterizer.cpp
         renderer_vulkan/vk_rasterizer.h
         renderer_vulkan/vk_renderpass_cache.cpp
         renderer_vulkan/vk_renderpass_cache.h
@@ -190,8 +194,11 @@ if (ENABLE_VULKAN)
         renderer_vulkan/vk_stream_buffer.h
         renderer_vulkan/vk_swapchain.cpp
         renderer_vulkan/vk_swapchain.h
+        renderer_vulkan/vk_texture_cache.cpp
+        renderer_vulkan/vk_texture_cache.h
         renderer_vulkan/vk_update_descriptor.cpp
-        renderer_vulkan/vk_update_descriptor.h)
+        renderer_vulkan/vk_update_descriptor.h
+    )
 
     target_include_directories(video_core PRIVATE sirit ../../externals/Vulkan-Headers/include)
     target_compile_definitions(video_core PRIVATE HAS_VULKAN)
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 1d1f780e7..58dfa8033 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -91,6 +91,7 @@ void Maxwell3D::InitializeRegisterDefaults() {
     regs.rasterize_enable = 1;
     regs.rt_separate_frag_data = 1;
     regs.framebuffer_srgb = 1;
+    regs.cull.front_face = Maxwell3D::Regs::Cull::FrontFace::ClockWise;
 
     mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_end_gl)] = true;
     mme_inline[MAXWELL3D_REG_INDEX(draw.vertex_begin_gl)] = true;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index a35e7a195..ee79260fc 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1018,7 +1018,14 @@ public:
                     }
                 } instanced_arrays;
 
-                INSERT_UNION_PADDING_WORDS(0x6);
+                INSERT_UNION_PADDING_WORDS(0x4);
+
+                union {
+                    BitField<0, 1, u32> enable;
+                    BitField<4, 8, u32> unk4;
+                } vp_point_size;
+
+                INSERT_UNION_PADDING_WORDS(1);
 
                 Cull cull;
 
@@ -1271,8 +1278,6 @@ public:
 
     } dirty{};
 
-    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
-
     /// Reads a register value located at the input method address
     u32 GetRegisterValue(u32 method) const;
 
@@ -1367,6 +1372,8 @@ private:
 
     bool execute_on{true};
 
+    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
+
     /// Retrieves information about a specific TIC entry from the TIC buffer.
     Texture::TICEntry GetTICEntry(u32 tic_index) const;
 
@@ -1503,6 +1510,7 @@ ASSERT_REG_POSITION(primitive_restart, 0x591);
 ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(polygon_offset_clamp, 0x61F);
 ASSERT_REG_POSITION(instanced_arrays, 0x620);
+ASSERT_REG_POSITION(vp_point_size, 0x644);
 ASSERT_REG_POSITION(cull, 0x646);
 ASSERT_REG_POSITION(pixel_center_integer, 0x649);
 ASSERT_REG_POSITION(viewport_transform_enabled, 0x64B);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 57b57c647..f443ec0fe 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -215,6 +215,40 @@ enum class F2fRoundingOp : u64 {
     Trunc = 11,
 };
 
+enum class AtomicOp : u64 {
+    Add = 0,
+    Min = 1,
+    Max = 2,
+    Inc = 3,
+    Dec = 4,
+    And = 5,
+    Or = 6,
+    Xor = 7,
+    Exch = 8,
+};
+
+enum class GlobalAtomicOp : u64 {
+    Add = 0,
+    Min = 1,
+    Max = 2,
+    Inc = 3,
+    Dec = 4,
+    And = 5,
+    Or = 6,
+    Xor = 7,
+    Exch = 8,
+    SafeAdd = 10,
+};
+
+enum class GlobalAtomicType : u64 {
+    U32 = 0,
+    S32 = 1,
+    U64 = 2,
+    F32_FTZ_RN = 3,
+    F16x2_FTZ_RN = 4,
+    S64 = 5,
+};
+
 enum class UniformType : u64 {
     UnsignedByte = 0,
     SignedByte = 1,
@@ -236,6 +270,13 @@ enum class StoreType : u64 {
     Bits128 = 6,
 };
 
+enum class AtomicType : u64 {
+    U32 = 0,
+    S32 = 1,
+    U64 = 2,
+    S64 = 3,
+};
+
 enum class IMinMaxExchange : u64 {
     None = 0,
     XLo = 1,
@@ -939,6 +980,22 @@ union Instruction {
     } stg;
 
     union {
+        BitField<52, 4, GlobalAtomicOp> operation;
+        BitField<49, 3, GlobalAtomicType> type;
+        BitField<28, 20, s64> offset;
+    } atom;
+
+    union {
+        BitField<52, 4, AtomicOp> operation;
+        BitField<28, 2, AtomicType> type;
+        BitField<30, 22, s64> offset;
+
+        s32 GetImmediateOffset() const {
+            return static_cast<s32>(offset << 2);
+        }
+    } atoms;
+
+    union {
         BitField<32, 1, PhysicalAttributeDirection> direction;
         BitField<47, 3, AttributeSize> size;
         BitField<20, 11, u64> address;
@@ -1659,9 +1716,11 @@ public:
         ST_A,
         ST_L,
         ST_S,
-        ST,   // Store in generic memory
-        STG,  // Store in global memory
-        AL2P, // Transforms attribute memory into physical memory
+        ST,    // Store in generic memory
+        STG,   // Store in global memory
+        ATOM,  // Atomic operation on global memory
+        ATOMS, // Atomic operation on shared memory
+        AL2P,  // Transforms attribute memory into physical memory
         TEX,
         TEX_B,  // Texture Load Bindless
         TXQ,    // Texture Query
@@ -1964,6 +2023,8 @@ private:
             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
             INST("101-------------", Id::ST, Type::Memory, "ST"),
             INST("1110111011011---", Id::STG, Type::Memory, "STG"),
+            INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
+            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
             INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
             INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
             INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 672051102..c428f06e4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1272,6 +1272,7 @@ void RasterizerOpenGL::SyncPointState() {
     const auto& regs = system.GPU().Maxwell3D().regs;
     // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
     // in OpenGL).
+    state.point.program_control = regs.vp_point_size.enable != 0;
     state.point.size = std::max(1.0f, regs.point_size);
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index de742d11c..3c5bdd377 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -34,9 +34,6 @@ using VideoCommon::Shader::ShaderIR;
 
 namespace {
 
-// One UBO is always reserved for emulation values on staged shaders
-constexpr u32 STAGE_RESERVED_UBOS = 1;
-
 constexpr u32 STAGE_MAIN_OFFSET = 10;
 constexpr u32 KERNEL_MAIN_OFFSET = 0;
 
@@ -243,7 +240,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
     if (!code_b.empty()) {
         ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
     }
-    const auto entries = GLShader::GetEntries(ir);
 
     std::string source = fmt::format(R"(// {}
 #version 430 core
@@ -264,6 +260,10 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
                   "#extension GL_NV_shader_thread_group : require\n"
                   "#extension GL_NV_shader_thread_shuffle : require\n";
     }
+    // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations)
+    // on places where we don't want to.
+    // Thanks to Ryujinx for finding this workaround.
+    source += "#pragma optionNV(fastmath off)\n";
 
     if (shader_type == ShaderType::Geometry) {
         const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
@@ -314,9 +314,10 @@ std::unordered_set<GLenum> GetSupportedFormats() {
 
 CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
                            GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
-    : RasterizerCacheObject{params.host_ptr}, system{params.system}, disk_cache{params.disk_cache},
-      device{params.device}, cpu_addr{params.cpu_addr}, unique_identifier{params.unique_identifier},
-      shader_type{shader_type}, entries{entries}, code{std::move(code)}, code_b{std::move(code_b)} {
+    : RasterizerCacheObject{params.host_ptr}, system{params.system},
+      disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
+      unique_identifier{params.unique_identifier}, shader_type{shader_type},
+      entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
     if (!params.precompiled_variants) {
         return;
     }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index f9f7a97b5..a1ac3d7a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1019,7 +1019,6 @@ private:
                 }
                 return {{"gl_ViewportIndex", Type::Int}};
             case 3:
-                UNIMPLEMENTED_MSG("Requires some state changes for gl_PointSize to work in shader");
                 return {{"gl_PointSize", Type::Float}};
             }
             return {};
@@ -1856,6 +1855,13 @@ private:
                 Type::Uint};
     }
 
+    template <const std::string_view& opname, Type type>
+    Expression Atomic(Operation operation) {
+        return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
+                            Visit(operation[1]).As(type)),
+                type};
+    }
+
     Expression Branch(Operation operation) {
         const auto target = std::get_if<ImmediateNode>(&*operation[0]);
         UNIMPLEMENTED_IF(!target);
@@ -2194,6 +2200,8 @@ private:
         &GLSLDecompiler::AtomicImage<Func::Xor>,
         &GLSLDecompiler::AtomicImage<Func::Exchange>,
 
+        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
+
         &GLSLDecompiler::Branch,
         &GLSLDecompiler::BranchIndirect,
         &GLSLDecompiler::PushFlowStack,
@@ -2313,7 +2321,7 @@ public:
     explicit ExprDecompiler(GLSLDecompiler& decomp) : decomp{decomp} {}
 
     void operator()(const ExprAnd& expr) {
-        inner += "( ";
+        inner += '(';
         std::visit(*this, *expr.operand1);
         inner += " && ";
         std::visit(*this, *expr.operand2);
@@ -2321,7 +2329,7 @@ public:
     }
 
     void operator()(const ExprOr& expr) {
-        inner += "( ";
+        inner += '(';
         std::visit(*this, *expr.operand1);
         inner += " || ";
         std::visit(*this, *expr.operand2);
@@ -2339,28 +2347,7 @@ public:
     }
 
     void operator()(const ExprCondCode& expr) {
-        const Node cc = decomp.ir.GetConditionCode(expr.cc);
-        std::string target;
-
-        if (const auto pred = std::get_if<PredicateNode>(&*cc)) {
-            const auto index = pred->GetIndex();
-            switch (index) {
-            case Tegra::Shader::Pred::NeverExecute:
-                target = "false";
-                break;
-            case Tegra::Shader::Pred::UnusedIndex:
-                target = "true";
-                break;
-            default:
-                target = decomp.GetPredicate(index);
-                break;
-            }
-        } else if (const auto flag = std::get_if<InternalFlagNode>(&*cc)) {
-            target = decomp.GetInternalFlag(flag->GetFlag());
-        } else {
-            UNREACHABLE();
-        }
-        inner += target;
+        inner += decomp.Visit(decomp.ir.GetConditionCode(expr.cc)).AsBool();
     }
 
     void operator()(const ExprVar& expr) {
@@ -2372,8 +2359,7 @@ public:
     }
 
     void operator()(VideoCommon::Shader::ExprGprEqual& expr) {
-        inner +=
-            "( ftou(" + decomp.GetRegister(expr.gpr) + ") == " + std::to_string(expr.value) + ')';
+        inner += fmt::format("(ftou({}) == {})", decomp.GetRegister(expr.gpr), expr.value);
     }
 
     const std::string& GetResult() const {
@@ -2381,8 +2367,8 @@ public:
     }
 
 private:
-    std::string inner;
     GLSLDecompiler& decomp;
+    std::string inner;
 };
 
 class ASTDecompiler {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index df2e2395a..cc185e9e1 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -127,6 +127,7 @@ void OpenGLState::ApplyClipDistances() {
 }
 
 void OpenGLState::ApplyPointSize() {
+    Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
     if (UpdateValue(cur_state.point.size, point.size)) {
         glPointSize(point.size);
     }
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index fb180f302..678e5cd89 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -131,7 +131,8 @@ public:
     std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
 
     struct {
-        float size = 1.0f; // GL_POINT_SIZE
+        bool program_control = false; // GL_PROGRAM_POINT_SIZE
+        GLfloat size = 1.0f;          // GL_POINT_SIZE
     } point;
 
     struct {
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b790b0ef4..d4b81cd87 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -44,7 +44,7 @@ struct FormatTuple {
 
 constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format_tuples = {{
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                        // ABGR8U
-    {GL_RGBA8, GL_RGBA, GL_BYTE, false},                                            // ABGR8S
+    {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE, false},                                      // ABGR8S
     {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE, false},                         // ABGR8UI
     {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false},                        // B5G6R5U
     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, false},                  // A2B10G10R10U
@@ -83,9 +83,9 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
     {GL_RGB32F, GL_RGB, GL_FLOAT, false},                                           // RGB32F
     {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false},                 // RGBA8_SRGB
     {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, false},                                       // RG8U
-    {GL_RG8, GL_RG, GL_BYTE, false},                                                // RG8S
+    {GL_RG8_SNORM, GL_RG, GL_BYTE, false},                                          // RG8S
     {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false},                             // RG32UI
-    {GL_RGB16F, GL_RGBA16, GL_HALF_FLOAT, false},                                   // RGBX16F
+    {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false},                                     // RGBX16F
     {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false},                             // R32UI
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X8
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false},                                   // ASTC_2D_8X5
@@ -176,6 +176,19 @@ GLint GetSwizzleSource(SwizzleSource source) {
     return GL_NONE;
 }
 
+GLenum GetComponent(PixelFormat format, bool is_first) {
+    switch (format) {
+    case PixelFormat::Z24S8:
+    case PixelFormat::Z32FS8:
+        return is_first ? GL_DEPTH_COMPONENT : GL_STENCIL_INDEX;
+    case PixelFormat::S8Z24:
+        return is_first ? GL_STENCIL_INDEX : GL_DEPTH_COMPONENT;
+    default:
+        UNREACHABLE();
+        return GL_DEPTH_COMPONENT;
+    }
+}
+
 void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
     if (params.IsBuffer()) {
         return;
@@ -184,7 +197,7 @@ void ApplyTextureDefaults(const SurfaceParams& params, GLuint texture) {
     glTextureParameteri(texture, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
     glTextureParameteri(texture, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
     glTextureParameteri(texture, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, params.num_levels - 1);
+    glTextureParameteri(texture, GL_TEXTURE_MAX_LEVEL, static_cast<GLint>(params.num_levels - 1));
     if (params.num_levels == 1) {
         glTextureParameterf(texture, GL_TEXTURE_LOD_BIAS, 1000.0f);
     }
@@ -253,14 +266,12 @@ void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
         glPixelStorei(GL_PACK_ALIGNMENT, std::min(8U, params.GetRowAlignment(level)));
         glPixelStorei(GL_PACK_ROW_LENGTH, static_cast<GLint>(params.GetMipWidth(level)));
         const std::size_t mip_offset = params.GetHostMipmapLevelOffset(level);
+        u8* const mip_data = staging_buffer.data() + mip_offset;
+        const GLsizei size = static_cast<GLsizei>(params.GetHostMipmapSize(level));
         if (is_compressed) {
-            glGetCompressedTextureImage(texture.handle, level,
-                                        static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                                        staging_buffer.data() + mip_offset);
+            glGetCompressedTextureImage(texture.handle, level, size, mip_data);
         } else {
-            glGetTextureImage(texture.handle, level, format, type,
-                              static_cast<GLsizei>(params.GetHostMipmapSize(level)),
-                              staging_buffer.data() + mip_offset);
+            glGetTextureImage(texture.handle, level, format, type, size, mip_data);
         }
     }
 }
@@ -418,11 +429,21 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou
     if (new_swizzle == swizzle)
         return;
     swizzle = new_swizzle;
-    const std::array<GLint, 4> gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
-                                             GetSwizzleSource(z_source),
-                                             GetSwizzleSource(w_source)};
+    const std::array gl_swizzle = {GetSwizzleSource(x_source), GetSwizzleSource(y_source),
+                                   GetSwizzleSource(z_source), GetSwizzleSource(w_source)};
     const GLuint handle = GetTexture();
-    glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+    const PixelFormat format = surface.GetSurfaceParams().pixel_format;
+    switch (format) {
+    case PixelFormat::Z24S8:
+    case PixelFormat::Z32FS8:
+    case PixelFormat::S8Z24:
+        glTextureParameteri(handle, GL_DEPTH_STENCIL_TEXTURE_MODE,
+                            GetComponent(format, x_source == SwizzleSource::R));
+        break;
+    default:
+        glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, gl_swizzle.data());
+        break;
+    }
 }
 
 OGLTextureView CachedSurfaceView::CreateTextureView() const {
@@ -531,8 +552,11 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
     const Common::Rectangle<u32>& dst_rect = copy_config.dst_rect;
     const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
 
-    glBlitFramebuffer(src_rect.left, src_rect.top, src_rect.right, src_rect.bottom, dst_rect.left,
-                      dst_rect.top, dst_rect.right, dst_rect.bottom, buffers,
+    glBlitFramebuffer(static_cast<GLint>(src_rect.left), static_cast<GLint>(src_rect.top),
+                      static_cast<GLint>(src_rect.right), static_cast<GLint>(src_rect.bottom),
+                      static_cast<GLint>(dst_rect.left), static_cast<GLint>(dst_rect.top),
+                      static_cast<GLint>(dst_rect.right), static_cast<GLint>(dst_rect.bottom),
+                      buffers,
                       is_linear && (buffers == GL_COLOR_BUFFER_BIT) ? GL_LINEAR : GL_NEAREST);
 }
 
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index 9770dda1c..ac99e6385 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -6,16 +6,20 @@
 #include <vector>
 
 #include <fmt/format.h>
-
 #include <glad/glad.h>
 
-#include "common/assert.h"
 #include "common/common_types.h"
-#include "common/scope_exit.h"
 #include "video_core/renderer_opengl/utils.h"
 
 namespace OpenGL {
 
+struct VertexArrayPushBuffer::Entry {
+    GLuint binding_index{};
+    const GLuint* buffer{};
+    GLintptr offset{};
+    GLsizei stride{};
+};
+
 VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
 
 VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
@@ -47,6 +51,13 @@ void VertexArrayPushBuffer::Bind() {
     }
 }
 
+struct BindBuffersRangePushBuffer::Entry {
+    GLuint binding;
+    const GLuint* buffer;
+    GLintptr offset;
+    GLsizeiptr size;
+};
+
 BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
 
 BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index d56153fe7..3ad7c02d4 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -26,12 +26,7 @@ public:
     void Bind();
 
 private:
-    struct Entry {
-        GLuint binding_index{};
-        const GLuint* buffer{};
-        GLintptr offset{};
-        GLsizei stride{};
-    };
+    struct Entry;
 
     GLuint vao{};
     const GLuint* index_buffer{};
@@ -50,12 +45,7 @@ public:
     void Bind();
 
 private:
-    struct Entry {
-        GLuint binding;
-        const GLuint* buffer;
-        GLintptr offset;
-        GLsizeiptr size;
-    };
+    struct Entry;
 
     GLenum target;
     std::vector<Entry> entries;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
index 000e3616d..331808113 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.cpp
@@ -44,7 +44,7 @@ vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filt
     return {};
 }
 
-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
                                 Tegra::Texture::TextureFilter filter) {
     switch (wrap_mode) {
     case Tegra::Texture::WrapMode::Wrap:
@@ -56,7 +56,12 @@ vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
     case Tegra::Texture::WrapMode::Border:
         return vk::SamplerAddressMode::eClampToBorder;
     case Tegra::Texture::WrapMode::Clamp:
-        // TODO(Rodrigo): Emulate GL_CLAMP properly
+        if (device.GetDriverID() == vk::DriverIdKHR::eNvidiaProprietary) {
+            // Nvidia's Vulkan driver defaults to GL_CLAMP on invalid enumerations, we can hack this
+            // by sending an invalid enumeration.
+            return static_cast<vk::SamplerAddressMode>(0xcafe);
+        }
+        // TODO(Rodrigo): Emulate GL_CLAMP properly on other vendors
         switch (filter) {
         case Tegra::Texture::TextureFilter::Nearest:
             return vk::SamplerAddressMode::eClampToEdge;
diff --git a/src/video_core/renderer_vulkan/maxwell_to_vk.h b/src/video_core/renderer_vulkan/maxwell_to_vk.h
index 1534b738b..7e9678b7b 100644
--- a/src/video_core/renderer_vulkan/maxwell_to_vk.h
+++ b/src/video_core/renderer_vulkan/maxwell_to_vk.h
@@ -22,7 +22,7 @@ vk::Filter Filter(Tegra::Texture::TextureFilter filter);
 
 vk::SamplerMipmapMode MipmapMode(Tegra::Texture::TextureMipmapFilter mipmap_filter);
 
-vk::SamplerAddressMode WrapMode(Tegra::Texture::WrapMode wrap_mode,
+vk::SamplerAddressMode WrapMode(const VKDevice& device, Tegra::Texture::WrapMode wrap_mode,
                                 Tegra::Texture::TextureFilter filter);
 
 vk::CompareOp DepthCompareFunction(Tegra::Texture::DepthCompareFunc depth_compare_func);
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h
new file mode 100644
index 000000000..a472c5dc9
--- /dev/null
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.h
@@ -0,0 +1,72 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <optional>
+#include <vector>
+#include "video_core/renderer_base.h"
+#include "video_core/renderer_vulkan/declarations.h"
+
+namespace Core {
+class System;
+}
+
+namespace Vulkan {
+
+class VKBlitScreen;
+class VKDevice;
+class VKFence;
+class VKMemoryManager;
+class VKResourceManager;
+class VKSwapchain;
+class VKScheduler;
+class VKImage;
+
+struct VKScreenInfo {
+    VKImage* image{};
+    u32 width{};
+    u32 height{};
+    bool is_srgb{};
+};
+
+class RendererVulkan final : public VideoCore::RendererBase {
+public:
+    explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system);
+    ~RendererVulkan() override;
+
+    /// Swap buffers (render frame)
+    void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+
+    /// Initialize the renderer
+    bool Init() override;
+
+    /// Shutdown the renderer
+    void ShutDown() override;
+
+private:
+    std::optional<vk::DebugUtilsMessengerEXT> CreateDebugCallback(
+        const vk::DispatchLoaderDynamic& dldi);
+
+    bool PickDevices(const vk::DispatchLoaderDynamic& dldi);
+
+    void Report() const;
+
+    Core::System& system;
+
+    vk::Instance instance;
+    vk::SurfaceKHR surface;
+
+    VKScreenInfo screen_info;
+
+    UniqueDebugUtilsMessengerEXT debug_callback;
+    std::unique_ptr<VKDevice> device;
+    std::unique_ptr<VKSwapchain> swapchain;
+    std::unique_ptr<VKMemoryManager> memory_manager;
+    std::unique_ptr<VKResourceManager> resource_manager;
+    std::unique_ptr<VKScheduler> scheduler;
+    std::unique_ptr<VKBlitScreen> blit_screen;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
new file mode 100644
index 000000000..855cfc883
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp
@@ -0,0 +1,627 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstring>
+#include <memory>
+#include <tuple>
+#include <vector>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/math_util.h"
+
+#include "core/core.h"
+#include "core/frontend/emu_window.h"
+#include "core/memory.h"
+
+#include "video_core/gpu.h"
+#include "video_core/morton.h"
+#include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/renderer_vulkan/vk_blit_screen.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_shader_util.h"
+#include "video_core/renderer_vulkan/vk_swapchain.h"
+#include "video_core/surface.h"
+
+namespace Vulkan {
+
+namespace {
+
+// Generated from the "shaders/" directory, read the instructions there.
+constexpr u8 blit_vertex_code[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x27, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+    0x00, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00,
+    0x25, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00,
+    0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x13, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x24, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x25, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x06, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00,
+    0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x18, 0x00, 0x04, 0x00,
+    0x10, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x02, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x19, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x3f, 0x20, 0x00, 0x04, 0x00, 0x21, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x17, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00,
+    0x24, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00,
+    0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x05, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00,
+    0x1a, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00,
+    0x1d, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x51, 0x00, 0x05, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x50, 0x00, 0x07, 0x00, 0x07, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x91, 0x00, 0x05, 0x00,
+    0x07, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x1f, 0x00, 0x00, 0x00,
+    0x41, 0x00, 0x05, 0x00, 0x21, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
+    0x3d, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00,
+    0x3e, 0x00, 0x03, 0x00, 0x24, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00,
+    0x38, 0x00, 0x01, 0x00};
+
+constexpr u8 blit_fragment_code[] = {
+    0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x14, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30,
+    0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
+    0x0f, 0x00, 0x07, 0x00, 0x04, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e,
+    0x00, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x03, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x0d, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x11, 0x00, 0x00, 0x00,
+    0x1e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x21, 0x00, 0x03, 0x00, 0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x03, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x03, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x08, 0x00, 0x00, 0x00,
+    0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x19, 0x00, 0x09, 0x00, 0x0a, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1b, 0x00, 0x03, 0x00,
+    0x0b, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x00, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00,
+    0x06, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x01, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, 0x02, 0x00, 0x00, 0x00,
+    0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00,
+    0x05, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x00, 0x00,
+    0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00,
+    0x11, 0x00, 0x00, 0x00, 0x57, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00,
+    0x0e, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, 0x09, 0x00, 0x00, 0x00,
+    0x13, 0x00, 0x00, 0x00, 0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00};
+
+struct ScreenRectVertex {
+    ScreenRectVertex() = default;
+    explicit ScreenRectVertex(f32 x, f32 y, f32 u, f32 v) : position{{x, y}}, tex_coord{{u, v}} {}
+
+    std::array<f32, 2> position;
+    std::array<f32, 2> tex_coord;
+
+    static vk::VertexInputBindingDescription GetDescription() {
+        return vk::VertexInputBindingDescription(0, sizeof(ScreenRectVertex),
+                                                 vk::VertexInputRate::eVertex);
+    }
+
+    static std::array<vk::VertexInputAttributeDescription, 2> GetAttributes() {
+        return {vk::VertexInputAttributeDescription(0, 0, vk::Format::eR32G32Sfloat,
+                                                    offsetof(ScreenRectVertex, position)),
+                vk::VertexInputAttributeDescription(1, 0, vk::Format::eR32G32Sfloat,
+                                                    offsetof(ScreenRectVertex, tex_coord))};
+    }
+};
+
+constexpr std::array<f32, 4 * 4> MakeOrthographicMatrix(f32 width, f32 height) {
+    // clang-format off
+    return { 2.f / width, 0.f,          0.f, 0.f,
+             0.f,         2.f / height, 0.f, 0.f,
+             0.f,         0.f,          1.f, 0.f,
+            -1.f,        -1.f,          0.f, 1.f};
+    // clang-format on
+}
+
+std::size_t GetBytesPerPixel(const Tegra::FramebufferConfig& framebuffer) {
+    using namespace VideoCore::Surface;
+    return GetBytesPerPixel(PixelFormatFromGPUPixelFormat(framebuffer.pixel_format));
+}
+
+std::size_t GetSizeInBytes(const Tegra::FramebufferConfig& framebuffer) {
+    return static_cast<std::size_t>(framebuffer.stride) *
+           static_cast<std::size_t>(framebuffer.height) * GetBytesPerPixel(framebuffer);
+}
+
+vk::Format GetFormat(const Tegra::FramebufferConfig& framebuffer) {
+    switch (framebuffer.pixel_format) {
+    case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+        return vk::Format::eA8B8G8R8UnormPack32;
+    case Tegra::FramebufferConfig::PixelFormat::RGB565:
+        return vk::Format::eR5G6B5UnormPack16;
+    default:
+        UNIMPLEMENTED_MSG("Unknown framebuffer pixel format: {}",
+                          static_cast<u32>(framebuffer.pixel_format));
+        return vk::Format::eA8B8G8R8UnormPack32;
+    }
+}
+
+} // Anonymous namespace
+
+struct VKBlitScreen::BufferData {
+    struct {
+        std::array<f32, 4 * 4> modelview_matrix;
+    } uniform;
+
+    std::array<ScreenRectVertex, 4> vertices;
+
+    // Unaligned image data goes here
+};
+
+VKBlitScreen::VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
+                           VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                           VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                           VKSwapchain& swapchain, VKScheduler& scheduler,
+                           const VKScreenInfo& screen_info)
+    : system{system}, render_window{render_window}, rasterizer{rasterizer}, device{device},
+      resource_manager{resource_manager}, memory_manager{memory_manager}, swapchain{swapchain},
+      scheduler{scheduler}, image_count{swapchain.GetImageCount()}, screen_info{screen_info} {
+    watches.resize(image_count);
+    std::generate(watches.begin(), watches.end(),
+                  []() { return std::make_unique<VKFenceWatch>(); });
+
+    CreateStaticResources();
+    CreateDynamicResources();
+}
+
+VKBlitScreen::~VKBlitScreen() = default;
+
+void VKBlitScreen::Recreate() {
+    CreateDynamicResources();
+}
+
+std::tuple<VKFence&, vk::Semaphore> VKBlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
+                                                       bool use_accelerated) {
+    RefreshResources(framebuffer);
+
+    // Finish any pending renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    const std::size_t image_index = swapchain.GetImageIndex();
+    watches[image_index]->Watch(scheduler.GetFence());
+
+    VKImage* blit_image = use_accelerated ? screen_info.image : raw_images[image_index].get();
+
+    UpdateDescriptorSet(image_index, blit_image->GetPresentView());
+
+    BufferData data;
+    SetUniformData(data, framebuffer);
+    SetVertexData(data, framebuffer);
+
+    auto map = buffer_commit->Map();
+    std::memcpy(map.GetAddress(), &data, sizeof(data));
+
+    if (!use_accelerated) {
+        const u64 image_offset = GetRawImageOffset(framebuffer, image_index);
+
+        const auto pixel_format =
+            VideoCore::Surface::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
+        const VAddr framebuffer_addr = framebuffer.address + framebuffer.offset;
+        const auto host_ptr = system.Memory().GetPointer(framebuffer_addr);
+        rasterizer.FlushRegion(ToCacheAddr(host_ptr), GetSizeInBytes(framebuffer));
+
+        // TODO(Rodrigo): Read this from HLE
+        constexpr u32 block_height_log2 = 4;
+        VideoCore::MortonSwizzle(VideoCore::MortonSwizzleMode::MortonToLinear, pixel_format,
+                                 framebuffer.stride, block_height_log2, framebuffer.height, 0, 1, 1,
+                                 map.GetAddress() + image_offset, host_ptr);
+
+        blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eTransfer,
+                               vk::AccessFlagBits::eTransferWrite,
+                               vk::ImageLayout::eTransferDstOptimal);
+
+        const vk::BufferImageCopy copy(image_offset, 0, 0,
+                                       {vk::ImageAspectFlagBits::eColor, 0, 0, 1}, {0, 0, 0},
+                                       {framebuffer.width, framebuffer.height, 1});
+        scheduler.Record([buffer_handle = *buffer, image = blit_image->GetHandle(),
+                          copy](auto cmdbuf, auto& dld) {
+            cmdbuf.copyBufferToImage(buffer_handle, image, vk::ImageLayout::eTransferDstOptimal,
+                                     {copy}, dld);
+        });
+    }
+    map.Release();
+
+    blit_image->Transition(0, 1, 0, 1, vk::PipelineStageFlagBits::eFragmentShader,
+                           vk::AccessFlagBits::eShaderRead,
+                           vk::ImageLayout::eShaderReadOnlyOptimal);
+
+    scheduler.Record([renderpass = *renderpass, framebuffer = *framebuffers[image_index],
+                      descriptor_set = descriptor_sets[image_index], buffer = *buffer,
+                      size = swapchain.GetSize(), pipeline = *pipeline,
+                      layout = *pipeline_layout](auto cmdbuf, auto& dld) {
+        const vk::ClearValue clear_color{std::array{0.0f, 0.0f, 0.0f, 1.0f}};
+        const vk::RenderPassBeginInfo renderpass_bi(renderpass, framebuffer, {{0, 0}, size}, 1,
+                                                    &clear_color);
+
+        cmdbuf.beginRenderPass(renderpass_bi, vk::SubpassContents::eInline, dld);
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, dld);
+        cmdbuf.setViewport(
+            0,
+            {{0.0f, 0.0f, static_cast<f32>(size.width), static_cast<f32>(size.height), 0.0f, 1.0f}},
+            dld);
+        cmdbuf.setScissor(0, {{{0, 0}, size}}, dld);
+
+        cmdbuf.bindVertexBuffers(0, {buffer}, {offsetof(BufferData, vertices)}, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, layout, 0, {descriptor_set}, {},
+                                  dld);
+        cmdbuf.draw(4, 1, 0, 0, dld);
+        cmdbuf.endRenderPass(dld);
+    });
+
+    return {scheduler.GetFence(), *semaphores[image_index]};
+}
+
+void VKBlitScreen::CreateStaticResources() {
+    CreateShaders();
+    CreateSemaphores();
+    CreateDescriptorPool();
+    CreateDescriptorSetLayout();
+    CreateDescriptorSets();
+    CreatePipelineLayout();
+    CreateSampler();
+}
+
+void VKBlitScreen::CreateDynamicResources() {
+    CreateRenderPass();
+    CreateFramebuffers();
+    CreateGraphicsPipeline();
+}
+
+void VKBlitScreen::RefreshResources(const Tegra::FramebufferConfig& framebuffer) {
+    if (framebuffer.width == raw_width && framebuffer.height == raw_height && !raw_images.empty()) {
+        return;
+    }
+    raw_width = framebuffer.width;
+    raw_height = framebuffer.height;
+    ReleaseRawImages();
+
+    CreateStagingBuffer(framebuffer);
+    CreateRawImages(framebuffer);
+}
+
+void VKBlitScreen::CreateShaders() {
+    vertex_shader = BuildShader(device, sizeof(blit_vertex_code), blit_vertex_code);
+    fragment_shader = BuildShader(device, sizeof(blit_fragment_code), blit_fragment_code);
+}
+
+void VKBlitScreen::CreateSemaphores() {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    semaphores.resize(image_count);
+    for (std::size_t i = 0; i < image_count; ++i) {
+        semaphores[i] = dev.createSemaphoreUnique({}, nullptr, dld);
+    }
+}
+
+void VKBlitScreen::CreateDescriptorPool() {
+    const std::array<vk::DescriptorPoolSize, 2> pool_sizes{
+        vk::DescriptorPoolSize{vk::DescriptorType::eUniformBuffer, static_cast<u32>(image_count)},
+        vk::DescriptorPoolSize{vk::DescriptorType::eCombinedImageSampler,
+                               static_cast<u32>(image_count)}};
+    const vk::DescriptorPoolCreateInfo pool_ci(
+        {}, static_cast<u32>(image_count), static_cast<u32>(pool_sizes.size()), pool_sizes.data());
+    const auto dev = device.GetLogical();
+    descriptor_pool = dev.createDescriptorPoolUnique(pool_ci, nullptr, device.GetDispatchLoader());
+}
+
+void VKBlitScreen::CreateRenderPass() {
+    const vk::AttachmentDescription color_attachment(
+        {}, swapchain.GetImageFormat(), vk::SampleCountFlagBits::e1, vk::AttachmentLoadOp::eClear,
+        vk::AttachmentStoreOp::eStore, vk::AttachmentLoadOp::eDontCare,
+        vk::AttachmentStoreOp::eDontCare, vk::ImageLayout::eUndefined,
+        vk::ImageLayout::ePresentSrcKHR);
+
+    const vk::AttachmentReference color_attachment_ref(0, vk::ImageLayout::eColorAttachmentOptimal);
+
+    const vk::SubpassDescription subpass_description({}, vk::PipelineBindPoint::eGraphics, 0,
+                                                     nullptr, 1, &color_attachment_ref, nullptr,
+                                                     nullptr, 0, nullptr);
+
+    const vk::SubpassDependency dependency(
+        VK_SUBPASS_EXTERNAL, 0, vk::PipelineStageFlagBits::eColorAttachmentOutput,
+        vk::PipelineStageFlagBits::eColorAttachmentOutput, {},
+        vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite, {});
+
+    const vk::RenderPassCreateInfo renderpass_ci({}, 1, &color_attachment, 1, &subpass_description,
+                                                 1, &dependency);
+
+    const auto dev = device.GetLogical();
+    renderpass = dev.createRenderPassUnique(renderpass_ci, nullptr, device.GetDispatchLoader());
+}
+
+void VKBlitScreen::CreateDescriptorSetLayout() {
+    const std::array<vk::DescriptorSetLayoutBinding, 2> layout_bindings{
+        vk::DescriptorSetLayoutBinding(0, vk::DescriptorType::eUniformBuffer, 1,
+                                       vk::ShaderStageFlagBits::eVertex, nullptr),
+        vk::DescriptorSetLayoutBinding(1, vk::DescriptorType::eCombinedImageSampler, 1,
+                                       vk::ShaderStageFlagBits::eFragment, nullptr)};
+    const vk::DescriptorSetLayoutCreateInfo descriptor_layout_ci(
+        {}, static_cast<u32>(layout_bindings.size()), layout_bindings.data());
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    descriptor_set_layout = dev.createDescriptorSetLayoutUnique(descriptor_layout_ci, nullptr, dld);
+}
+
+void VKBlitScreen::CreateDescriptorSets() {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    descriptor_sets.resize(image_count);
+    for (std::size_t i = 0; i < image_count; ++i) {
+        const vk::DescriptorSetLayout layout = *descriptor_set_layout;
+        const vk::DescriptorSetAllocateInfo descriptor_set_ai(*descriptor_pool, 1, &layout);
+        const vk::Result result =
+            dev.allocateDescriptorSets(&descriptor_set_ai, &descriptor_sets[i], dld);
+        ASSERT(result == vk::Result::eSuccess);
+    }
+}
+
+void VKBlitScreen::CreatePipelineLayout() {
+    const vk::PipelineLayoutCreateInfo pipeline_layout_ci({}, 1, &descriptor_set_layout.get(), 0,
+                                                          nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    pipeline_layout = dev.createPipelineLayoutUnique(pipeline_layout_ci, nullptr, dld);
+}
+
+void VKBlitScreen::CreateGraphicsPipeline() {
+    const std::array shader_stages = {
+        vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eVertex, *vertex_shader,
+                                          "main", nullptr),
+        vk::PipelineShaderStageCreateInfo({}, vk::ShaderStageFlagBits::eFragment, *fragment_shader,
+                                          "main", nullptr)};
+
+    const auto vertex_binding_description = ScreenRectVertex::GetDescription();
+    const auto vertex_attrs_description = ScreenRectVertex::GetAttributes();
+    const vk::PipelineVertexInputStateCreateInfo vertex_input(
+        {}, 1, &vertex_binding_description, static_cast<u32>(vertex_attrs_description.size()),
+        vertex_attrs_description.data());
+
+    const vk::PipelineInputAssemblyStateCreateInfo input_assembly(
+        {}, vk::PrimitiveTopology::eTriangleStrip, false);
+
+    // Set a dummy viewport, it's going to be replaced by dynamic states.
+    const vk::Viewport viewport(0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f);
+    const vk::Rect2D scissor({0, 0}, {1, 1});
+
+    const vk::PipelineViewportStateCreateInfo viewport_state({}, 1, &viewport, 1, &scissor);
+
+    const vk::PipelineRasterizationStateCreateInfo rasterizer(
+        {}, false, false, vk::PolygonMode::eFill, vk::CullModeFlagBits::eNone,
+        vk::FrontFace::eClockwise, false, 0.0f, 0.0f, 0.0f, 1.0f);
+
+    const vk::PipelineMultisampleStateCreateInfo multisampling({}, vk::SampleCountFlagBits::e1,
+                                                               false, 0.0f, nullptr, false, false);
+
+    const vk::PipelineColorBlendAttachmentState color_blend_attachment(
+        false, vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd,
+        vk::BlendFactor::eZero, vk::BlendFactor::eZero, vk::BlendOp::eAdd,
+        vk::ColorComponentFlagBits::eR | vk::ColorComponentFlagBits::eG |
+            vk::ColorComponentFlagBits::eB | vk::ColorComponentFlagBits::eA);
+
+    const vk::PipelineColorBlendStateCreateInfo color_blending(
+        {}, false, vk::LogicOp::eCopy, 1, &color_blend_attachment, {0.0f, 0.0f, 0.0f, 0.0f});
+
+    const std::array<vk::DynamicState, 2> dynamic_states = {vk::DynamicState::eViewport,
+                                                            vk::DynamicState::eScissor};
+
+    const vk::PipelineDynamicStateCreateInfo dynamic_state(
+        {}, static_cast<u32>(dynamic_states.size()), dynamic_states.data());
+
+    const vk::GraphicsPipelineCreateInfo pipeline_ci(
+        {}, static_cast<u32>(shader_stages.size()), shader_stages.data(), &vertex_input,
+        &input_assembly, nullptr, &viewport_state, &rasterizer, &multisampling, nullptr,
+        &color_blending, &dynamic_state, *pipeline_layout, *renderpass, 0, nullptr, 0);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    pipeline = dev.createGraphicsPipelineUnique({}, pipeline_ci, nullptr, dld);
+}
+
+void VKBlitScreen::CreateSampler() {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    const vk::SamplerCreateInfo sampler_ci(
+        {}, vk::Filter::eLinear, vk::Filter::eLinear, vk::SamplerMipmapMode::eLinear,
+        vk::SamplerAddressMode::eClampToBorder, vk::SamplerAddressMode::eClampToBorder,
+        vk::SamplerAddressMode::eClampToBorder, 0.0f, false, 0.0f, false, vk::CompareOp::eNever,
+        0.0f, 0.0f, vk::BorderColor::eFloatOpaqueBlack, false);
+    sampler = dev.createSamplerUnique(sampler_ci, nullptr, dld);
+}
+
+void VKBlitScreen::CreateFramebuffers() {
+    const vk::Extent2D size{swapchain.GetSize()};
+    framebuffers.clear();
+    framebuffers.resize(image_count);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    for (std::size_t i = 0; i < image_count; ++i) {
+        const vk::ImageView image_view{swapchain.GetImageViewIndex(i)};
+        const vk::FramebufferCreateInfo framebuffer_ci({}, *renderpass, 1, &image_view, size.width,
+                                                       size.height, 1);
+        framebuffers[i] = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
+    }
+}
+
+void VKBlitScreen::ReleaseRawImages() {
+    for (std::size_t i = 0; i < raw_images.size(); ++i) {
+        watches[i]->Wait();
+    }
+    raw_images.clear();
+    raw_buffer_commits.clear();
+    buffer.reset();
+    buffer_commit.reset();
+}
+
+void VKBlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+
+    const vk::BufferCreateInfo buffer_ci({}, CalculateBufferSize(framebuffer),
+                                         vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eVertexBuffer |
+                                             vk::BufferUsageFlagBits::eUniformBuffer,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+    buffer = dev.createBufferUnique(buffer_ci, nullptr, dld);
+    buffer_commit = memory_manager.Commit(*buffer, true);
+}
+
+void VKBlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
+    raw_images.resize(image_count);
+    raw_buffer_commits.resize(image_count);
+
+    const auto format = GetFormat(framebuffer);
+    for (std::size_t i = 0; i < image_count; ++i) {
+        const vk::ImageCreateInfo image_ci(
+            {}, vk::ImageType::e2D, format, {framebuffer.width, framebuffer.height, 1}, 1, 1,
+            vk::SampleCountFlagBits::e1, vk::ImageTiling::eOptimal,
+            vk::ImageUsageFlagBits::eTransferDst | vk::ImageUsageFlagBits::eSampled,
+            vk::SharingMode::eExclusive, 0, nullptr, vk::ImageLayout::eUndefined);
+
+        raw_images[i] =
+            std::make_unique<VKImage>(device, scheduler, image_ci, vk::ImageAspectFlagBits::eColor);
+        raw_buffer_commits[i] = memory_manager.Commit(raw_images[i]->GetHandle(), false);
+    }
+}
+
+void VKBlitScreen::UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const {
+    const vk::DescriptorSet descriptor_set = descriptor_sets[image_index];
+
+    const vk::DescriptorBufferInfo buffer_info(*buffer, offsetof(BufferData, uniform),
+                                               sizeof(BufferData::uniform));
+    const vk::WriteDescriptorSet ubo_write(descriptor_set, 0, 0, 1,
+                                           vk::DescriptorType::eUniformBuffer, nullptr,
+                                           &buffer_info, nullptr);
+
+    const vk::DescriptorImageInfo image_info(*sampler, image_view,
+                                             vk::ImageLayout::eShaderReadOnlyOptimal);
+    const vk::WriteDescriptorSet sampler_write(descriptor_set, 1, 0, 1,
+                                               vk::DescriptorType::eCombinedImageSampler,
+                                               &image_info, nullptr, nullptr);
+
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    dev.updateDescriptorSets({ubo_write, sampler_write}, {}, dld);
+}
+
+void VKBlitScreen::SetUniformData(BufferData& data,
+                                  const Tegra::FramebufferConfig& framebuffer) const {
+    const auto& layout = render_window.GetFramebufferLayout();
+    data.uniform.modelview_matrix =
+        MakeOrthographicMatrix(static_cast<f32>(layout.width), static_cast<f32>(layout.height));
+}
+
+void VKBlitScreen::SetVertexData(BufferData& data,
+                                 const Tegra::FramebufferConfig& framebuffer) const {
+    const auto& framebuffer_transform_flags = framebuffer.transform_flags;
+    const auto& framebuffer_crop_rect = framebuffer.crop_rect;
+
+    static constexpr Common::Rectangle<f32> texcoords{0.f, 0.f, 1.f, 1.f};
+    auto left = texcoords.left;
+    auto right = texcoords.right;
+
+    switch (framebuffer_transform_flags) {
+    case Tegra::FramebufferConfig::TransformFlags::Unset:
+        break;
+    case Tegra::FramebufferConfig::TransformFlags::FlipV:
+        // Flip the framebuffer vertically
+        left = texcoords.right;
+        right = texcoords.left;
+        break;
+    default:
+        UNIMPLEMENTED_MSG("Unsupported framebuffer_transform_flags={}",
+                          static_cast<u32>(framebuffer_transform_flags));
+        break;
+    }
+
+    UNIMPLEMENTED_IF(framebuffer_crop_rect.top != 0);
+    UNIMPLEMENTED_IF(framebuffer_crop_rect.left != 0);
+
+    // Scale the output by the crop width/height. This is commonly used with 1280x720 rendering
+    // (e.g. handheld mode) on a 1920x1080 framebuffer.
+    f32 scale_u = 1.0f;
+    f32 scale_v = 1.0f;
+    if (framebuffer_crop_rect.GetWidth() > 0) {
+        scale_u = static_cast<f32>(framebuffer_crop_rect.GetWidth()) /
+                  static_cast<f32>(screen_info.width);
+    }
+    if (framebuffer_crop_rect.GetHeight() > 0) {
+        scale_v = static_cast<f32>(framebuffer_crop_rect.GetHeight()) /
+                  static_cast<f32>(screen_info.height);
+    }
+
+    const auto& screen = render_window.GetFramebufferLayout().screen;
+    const auto x = static_cast<f32>(screen.left);
+    const auto y = static_cast<f32>(screen.top);
+    const auto w = static_cast<f32>(screen.GetWidth());
+    const auto h = static_cast<f32>(screen.GetHeight());
+    data.vertices[0] = ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v);
+    data.vertices[1] = ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v);
+    data.vertices[2] = ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v);
+    data.vertices[3] = ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v);
+}
+
+u64 VKBlitScreen::CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const {
+    return sizeof(BufferData) + GetSizeInBytes(framebuffer) * image_count;
+}
+
+u64 VKBlitScreen::GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
+                                    std::size_t image_index) const {
+    constexpr auto first_image_offset = static_cast<u64>(sizeof(BufferData));
+    return first_image_offset + GetSizeInBytes(framebuffer) * image_index;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.h b/src/video_core/renderer_vulkan/vk_blit_screen.h
new file mode 100644
index 000000000..ea680b3f5
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_blit_screen.h
@@ -0,0 +1,119 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <memory>
+#include <tuple>
+
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+
+namespace Core {
+class System;
+}
+
+namespace Core::Frontend {
+class EmuWindow;
+}
+
+namespace Tegra {
+struct FramebufferConfig;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+struct ScreenInfo;
+class RasterizerVulkan;
+class VKDevice;
+class VKFence;
+class VKImage;
+class VKScheduler;
+class VKSwapchain;
+
+class VKBlitScreen final {
+public:
+    explicit VKBlitScreen(Core::System& system, Core::Frontend::EmuWindow& render_window,
+                          VideoCore::RasterizerInterface& rasterizer, const VKDevice& device,
+                          VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                          VKSwapchain& swapchain, VKScheduler& scheduler,
+                          const VKScreenInfo& screen_info);
+    ~VKBlitScreen();
+
+    void Recreate();
+
+    std::tuple<VKFence&, vk::Semaphore> Draw(const Tegra::FramebufferConfig& framebuffer,
+                                             bool use_accelerated);
+
+private:
+    struct BufferData;
+
+    void CreateStaticResources();
+    void CreateShaders();
+    void CreateSemaphores();
+    void CreateDescriptorPool();
+    void CreateRenderPass();
+    void CreateDescriptorSetLayout();
+    void CreateDescriptorSets();
+    void CreatePipelineLayout();
+    void CreateGraphicsPipeline();
+    void CreateSampler();
+
+    void CreateDynamicResources();
+    void CreateFramebuffers();
+
+    void RefreshResources(const Tegra::FramebufferConfig& framebuffer);
+    void ReleaseRawImages();
+    void CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer);
+    void CreateRawImages(const Tegra::FramebufferConfig& framebuffer);
+
+    void UpdateDescriptorSet(std::size_t image_index, vk::ImageView image_view) const;
+    void SetUniformData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
+    void SetVertexData(BufferData& data, const Tegra::FramebufferConfig& framebuffer) const;
+
+    u64 CalculateBufferSize(const Tegra::FramebufferConfig& framebuffer) const;
+    u64 GetRawImageOffset(const Tegra::FramebufferConfig& framebuffer,
+                          std::size_t image_index) const;
+
+    Core::System& system;
+    Core::Frontend::EmuWindow& render_window;
+    VideoCore::RasterizerInterface& rasterizer;
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKSwapchain& swapchain;
+    VKScheduler& scheduler;
+    const std::size_t image_count;
+    const VKScreenInfo& screen_info;
+
+    UniqueShaderModule vertex_shader;
+    UniqueShaderModule fragment_shader;
+    UniqueDescriptorPool descriptor_pool;
+    UniqueDescriptorSetLayout descriptor_set_layout;
+    UniquePipelineLayout pipeline_layout;
+    UniquePipeline pipeline;
+    UniqueRenderPass renderpass;
+    std::vector<UniqueFramebuffer> framebuffers;
+    std::vector<vk::DescriptorSet> descriptor_sets;
+    UniqueSampler sampler;
+
+    UniqueBuffer buffer;
+    VKMemoryCommit buffer_commit;
+
+    std::vector<std::unique_ptr<VKFenceWatch>> watches;
+
+    std::vector<UniqueSemaphore> semaphores;
+    std::vector<std::unique_ptr<VKImage>> raw_images;
+    std::vector<VKMemoryCommit> raw_buffer_commits;
+    u32 raw_width = 0;
+    u32 raw_height = 0;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
index 2e0536bf6..b155dfb49 100644
--- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
+++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp
@@ -191,8 +191,7 @@ UniquePipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& render
     const vk::PipelineRasterizationStateCreateInfo rasterizer_ci(
         {}, rs.depth_clamp_enable, false, vk::PolygonMode::eFill,
         rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : vk::CullModeFlagBits::eNone,
-        rs.cull_enable ? MaxwellToVK::FrontFace(rs.front_face) : vk::FrontFace::eCounterClockwise,
-        rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
+        MaxwellToVK::FrontFace(rs.front_face), rs.depth_bias_enable, 0.0f, 0.0f, 0.0f, 1.0f);
 
     const vk::PipelineMultisampleStateCreateInfo multisampling_ci(
         {}, vk::SampleCountFlagBits::e1, false, 0.0f, nullptr, false, false);
diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
index 48e23d4cd..7ddf7d3ee 100644
--- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
@@ -325,9 +325,6 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) {
     specialization.tessellation.primitive = fixed_state.tessellation.primitive;
     specialization.tessellation.spacing = fixed_state.tessellation.spacing;
     specialization.tessellation.clockwise = fixed_state.tessellation.clockwise;
-    for (const auto& rt : key.renderpass_params.color_attachments) {
-        specialization.enabled_rendertargets.set(rt.index);
-    }
 
     SPIRVProgram program;
     std::vector<vk::DescriptorSetLayoutBinding> bindings;
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
new file mode 100644
index 000000000..d2c6b1189
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp
@@ -0,0 +1,1141 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <memory>
+#include <mutex>
+#include <vector>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/logging/log.h"
+#include "common/microprofile.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/kepler_compute.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/renderer_vulkan.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
+#include "video_core/renderer_vulkan/vk_compute_pipeline.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_graphics_pipeline.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_sampler_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Vulkan {
+
+using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+
+MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192));
+MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128));
+MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128));
+
+namespace {
+
+constexpr auto ComputeShaderIndex = static_cast<std::size_t>(Tegra::Engines::ShaderType::Compute);
+
+vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) {
+    const auto& viewport = regs.viewport_transform[index];
+    const float x = viewport.translate_x - viewport.scale_x;
+    const float y = viewport.translate_y - viewport.scale_y;
+    const float width = viewport.scale_x * 2.0f;
+    const float height = viewport.scale_y * 2.0f;
+
+    const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
+    float near = viewport.translate_z - viewport.scale_z * reduce_z;
+    float far = viewport.translate_z + viewport.scale_z;
+    if (!device.IsExtDepthRangeUnrestrictedSupported()) {
+        near = std::clamp(near, 0.0f, 1.0f);
+        far = std::clamp(far, 0.0f, 1.0f);
+    }
+
+    return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far);
+}
+
+constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) {
+    const auto& scissor = regs.scissor_test[index];
+    if (!scissor.enable) {
+        return {{0, 0}, {INT32_MAX, INT32_MAX}};
+    }
+    const u32 width = scissor.max_x - scissor.min_x;
+    const u32 height = scissor.max_y - scissor.min_y;
+    return {{static_cast<s32>(scissor.min_x), static_cast<s32>(scissor.min_y)}, {width, height}};
+}
+
+std::array<GPUVAddr, Maxwell::MaxShaderProgram> GetShaderAddresses(
+    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+    std::array<GPUVAddr, Maxwell::MaxShaderProgram> addresses;
+    for (std::size_t i = 0; i < std::size(addresses); ++i) {
+        addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0;
+    }
+    return addresses;
+}
+
+void TransitionImages(const std::vector<ImageView>& views, vk::PipelineStageFlags pipeline_stage,
+                      vk::AccessFlags access) {
+    for (auto& [view, layout] : views) {
+        view->Transition(*layout, pipeline_stage, access);
+    }
+}
+
+template <typename Engine, typename Entry>
+Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
+                                               std::size_t stage) {
+    const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
+    if (entry.IsBindless()) {
+        const Tegra::Texture::TextureHandle tex_handle =
+            engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset());
+        return engine.GetTextureInfo(tex_handle);
+    }
+    if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
+        return engine.GetStageTexture(stage_type, entry.GetOffset());
+    } else {
+        return engine.GetTexture(entry.GetOffset());
+    }
+}
+
+} // Anonymous namespace
+
+class BufferBindings final {
+public:
+    void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) {
+        vertex.buffer_ptrs[vertex.num_buffers] = buffer;
+        vertex.offsets[vertex.num_buffers] = offset;
+        ++vertex.num_buffers;
+    }
+
+    void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) {
+        index.buffer = buffer;
+        index.offset = offset;
+        index.type = type;
+    }
+
+    void Bind(VKScheduler& scheduler) const {
+        // Use this large switch case to avoid dispatching more memory in the record lambda than
+        // what we need. It looks horrible, but it's the best we can do on standard C++.
+        switch (vertex.num_buffers) {
+        case 0:
+            return BindStatic<0>(scheduler);
+        case 1:
+            return BindStatic<1>(scheduler);
+        case 2:
+            return BindStatic<2>(scheduler);
+        case 3:
+            return BindStatic<3>(scheduler);
+        case 4:
+            return BindStatic<4>(scheduler);
+        case 5:
+            return BindStatic<5>(scheduler);
+        case 6:
+            return BindStatic<6>(scheduler);
+        case 7:
+            return BindStatic<7>(scheduler);
+        case 8:
+            return BindStatic<8>(scheduler);
+        case 9:
+            return BindStatic<9>(scheduler);
+        case 10:
+            return BindStatic<10>(scheduler);
+        case 11:
+            return BindStatic<11>(scheduler);
+        case 12:
+            return BindStatic<12>(scheduler);
+        case 13:
+            return BindStatic<13>(scheduler);
+        case 14:
+            return BindStatic<14>(scheduler);
+        case 15:
+            return BindStatic<15>(scheduler);
+        case 16:
+            return BindStatic<16>(scheduler);
+        case 17:
+            return BindStatic<17>(scheduler);
+        case 18:
+            return BindStatic<18>(scheduler);
+        case 19:
+            return BindStatic<19>(scheduler);
+        case 20:
+            return BindStatic<20>(scheduler);
+        case 21:
+            return BindStatic<21>(scheduler);
+        case 22:
+            return BindStatic<22>(scheduler);
+        case 23:
+            return BindStatic<23>(scheduler);
+        case 24:
+            return BindStatic<24>(scheduler);
+        case 25:
+            return BindStatic<25>(scheduler);
+        case 26:
+            return BindStatic<26>(scheduler);
+        case 27:
+            return BindStatic<27>(scheduler);
+        case 28:
+            return BindStatic<28>(scheduler);
+        case 29:
+            return BindStatic<29>(scheduler);
+        case 30:
+            return BindStatic<30>(scheduler);
+        case 31:
+            return BindStatic<31>(scheduler);
+        case 32:
+            return BindStatic<32>(scheduler);
+        }
+        UNREACHABLE();
+    }
+
+private:
+    // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
+    struct {
+        std::size_t num_buffers = 0;
+        std::array<const vk::Buffer*, Maxwell::NumVertexArrays> buffer_ptrs;
+        std::array<vk::DeviceSize, Maxwell::NumVertexArrays> offsets;
+    } vertex;
+
+    struct {
+        const vk::Buffer* buffer = nullptr;
+        vk::DeviceSize offset;
+        vk::IndexType type;
+    } index;
+
+    template <std::size_t N>
+    void BindStatic(VKScheduler& scheduler) const {
+        if (index.buffer != nullptr) {
+            BindStatic<N, true>(scheduler);
+        } else {
+            BindStatic<N, false>(scheduler);
+        }
+    }
+
+    template <std::size_t N, bool is_indexed>
+    void BindStatic(VKScheduler& scheduler) const {
+        static_assert(N <= Maxwell::NumVertexArrays);
+        if constexpr (N == 0) {
+            return;
+        }
+
+        std::array<vk::Buffer, N> buffers;
+        std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
+                       [](const auto ptr) { return *ptr; });
+
+        std::array<vk::DeviceSize, N> offsets;
+        std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
+
+        if constexpr (is_indexed) {
+            // Indexed draw
+            scheduler.Record([buffers, offsets, index_buffer = *index.buffer,
+                              index_offset = index.offset,
+                              index_type = index.type](auto cmdbuf, auto& dld) {
+                cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld);
+                cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
+                                         dld);
+            });
+        } else {
+            // Array draw
+            scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) {
+                cmdbuf.bindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data(),
+                                         dld);
+            });
+        }
+    }
+};
+
+void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf,
+                                            const vk::DispatchLoaderDynamic& dld) const {
+    if (is_indexed) {
+        cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld);
+    } else {
+        cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld);
+    }
+}
+
+RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer,
+                                   VKScreenInfo& screen_info, const VKDevice& device,
+                                   VKResourceManager& resource_manager,
+                                   VKMemoryManager& memory_manager, VKScheduler& scheduler)
+    : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer},
+      screen_info{screen_info}, device{device}, resource_manager{resource_manager},
+      memory_manager{memory_manager}, scheduler{scheduler},
+      staging_pool(device, memory_manager, scheduler), descriptor_pool(device),
+      update_descriptor_queue(device, scheduler),
+      quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
+      uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue),
+      texture_cache(system, *this, device, resource_manager, memory_manager, scheduler,
+                    staging_pool),
+      pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
+      buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
+      sampler_cache(device) {}
+
+RasterizerVulkan::~RasterizerVulkan() = default;
+
+bool RasterizerVulkan::DrawBatch(bool is_indexed) {
+    Draw(is_indexed, false);
+    return true;
+}
+
+bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) {
+    Draw(is_indexed, true);
+    return true;
+}
+
+void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
+    MICROPROFILE_SCOPE(Vulkan_Drawing);
+
+    FlushWork();
+
+    const auto& gpu = system.GPU().Maxwell3D();
+    GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
+
+    buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed));
+
+    BufferBindings buffer_bindings;
+    const DrawParameters draw_params =
+        SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced);
+
+    update_descriptor_queue.Acquire();
+    sampled_views.clear();
+    image_views.clear();
+
+    const auto shaders = pipeline_cache.GetShaders();
+    key.shaders = GetShaderAddresses(shaders);
+    SetupShaderDescriptors(shaders);
+
+    buffer_cache.Unmap();
+
+    const auto texceptions = UpdateAttachments();
+    SetupImageTransitions(texceptions, color_attachments, zeta_attachment);
+
+    key.renderpass_params = GetRenderPassParams(texceptions);
+
+    auto& pipeline = pipeline_cache.GetGraphicsPipeline(key);
+    scheduler.BindGraphicsPipeline(pipeline.GetHandle());
+
+    const auto renderpass = pipeline.GetRenderPass();
+    const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
+    scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr});
+
+    UpdateDynamicStates();
+
+    buffer_bindings.Bind(scheduler);
+
+    if (device.IsNvDeviceDiagnosticCheckpoints()) {
+        scheduler.Record(
+            [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); });
+    }
+
+    const auto pipeline_layout = pipeline.GetLayout();
+    const auto descriptor_set = pipeline.CommitDescriptorSet();
+    scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) {
+        if (descriptor_set) {
+            cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout,
+                                      DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld);
+        }
+        draw_params.Draw(cmdbuf, dld);
+    });
+}
+
+void RasterizerVulkan::Clear() {
+    MICROPROFILE_SCOPE(Vulkan_Clearing);
+
+    const auto& gpu = system.GPU().Maxwell3D();
+    if (!system.GPU().Maxwell3D().ShouldExecute()) {
+        return;
+    }
+
+    const auto& regs = gpu.regs;
+    const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
+                           regs.clear_buffers.A;
+    const bool use_depth = regs.clear_buffers.Z;
+    const bool use_stencil = regs.clear_buffers.S;
+    if (!use_color && !use_depth && !use_stencil) {
+        return;
+    }
+    // Clearing images requires to be out of a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass.
+
+    if (use_color) {
+        View color_view;
+        {
+            MICROPROFILE_SCOPE(Vulkan_RenderTargets);
+            color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false);
+        }
+
+        color_view->Transition(vk::ImageLayout::eTransferDstOptimal,
+                               vk::PipelineStageFlagBits::eTransfer,
+                               vk::AccessFlagBits::eTransferWrite);
+
+        const std::array clear_color = {regs.clear_color[0], regs.clear_color[1],
+                                        regs.clear_color[2], regs.clear_color[3]};
+        const vk::ClearColorValue clear(clear_color);
+        scheduler.Record([image = color_view->GetImage(),
+                          subresource = color_view->GetImageSubresourceRange(),
+                          clear](auto cmdbuf, auto& dld) {
+            cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource,
+                                   dld);
+        });
+    }
+    if (use_depth || use_stencil) {
+        View zeta_surface;
+        {
+            MICROPROFILE_SCOPE(Vulkan_RenderTargets);
+            zeta_surface = texture_cache.GetDepthBufferSurface(false);
+        }
+
+        zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal,
+                                 vk::PipelineStageFlagBits::eTransfer,
+                                 vk::AccessFlagBits::eTransferWrite);
+
+        const vk::ClearDepthStencilValue clear(regs.clear_depth,
+                                               static_cast<u32>(regs.clear_stencil));
+        scheduler.Record([image = zeta_surface->GetImage(),
+                          subresource = zeta_surface->GetImageSubresourceRange(),
+                          clear](auto cmdbuf, auto& dld) {
+            cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear,
+                                          subresource, dld);
+        });
+    }
+}
+
+void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
+    MICROPROFILE_SCOPE(Vulkan_Compute);
+    update_descriptor_queue.Acquire();
+    sampled_views.clear();
+    image_views.clear();
+
+    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+    const ComputePipelineCacheKey key{
+        code_addr,
+        launch_desc.shared_alloc,
+        {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}};
+    auto& pipeline = pipeline_cache.GetComputePipeline(key);
+
+    // Compute dispatches can't be executed inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    buffer_cache.Map(CalculateComputeStreamBufferSize());
+
+    const auto& entries = pipeline.GetEntries();
+    SetupComputeConstBuffers(entries);
+    SetupComputeGlobalBuffers(entries);
+    SetupComputeTexelBuffers(entries);
+    SetupComputeTextures(entries);
+    SetupComputeImages(entries);
+
+    buffer_cache.Unmap();
+
+    TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader,
+                     vk::AccessFlagBits::eShaderRead);
+    TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader,
+                     vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
+
+    if (device.IsNvDeviceDiagnosticCheckpoints()) {
+        scheduler.Record(
+            [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); });
+    }
+
+    scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y,
+                      grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(),
+                      layout = pipeline.GetLayout(),
+                      descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) {
+        cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld);
+        cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1,
+                                  &descriptor_set, 0, nullptr, dld);
+        cmdbuf.dispatch(grid_x, grid_y, grid_z, dld);
+    });
+}
+
+void RasterizerVulkan::FlushAll() {}
+
+void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
+    texture_cache.FlushRegion(addr, size);
+    buffer_cache.FlushRegion(addr, size);
+}
+
+void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
+    texture_cache.InvalidateRegion(addr, size);
+    pipeline_cache.InvalidateRegion(addr, size);
+    buffer_cache.InvalidateRegion(addr, size);
+}
+
+void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
+    FlushRegion(addr, size);
+    InvalidateRegion(addr, size);
+}
+
+void RasterizerVulkan::FlushCommands() {
+    if (draw_counter > 0) {
+        draw_counter = 0;
+        scheduler.Flush();
+    }
+}
+
+void RasterizerVulkan::TickFrame() {
+    draw_counter = 0;
+    update_descriptor_queue.TickFrame();
+    buffer_cache.TickFrame();
+    staging_pool.TickFrame();
+}
+
+bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
+                                             const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                                             const Tegra::Engines::Fermi2D::Config& copy_config) {
+    texture_cache.DoFermiCopy(src, dst, copy_config);
+    return true;
+}
+
+bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config,
+                                         VAddr framebuffer_addr, u32 pixel_stride) {
+    if (!framebuffer_addr) {
+        return false;
+    }
+
+    const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)};
+    const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)};
+    if (!surface) {
+        return false;
+    }
+
+    // Verify that the cached surface is the same size and format as the requested framebuffer
+    const auto& params{surface->GetSurfaceParams()};
+    const auto& pixel_format{
+        VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)};
+    ASSERT_MSG(params.width == config.width, "Framebuffer width is different");
+    ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
+
+    screen_info.image = &surface->GetImage();
+    screen_info.width = params.width;
+    screen_info.height = params.height;
+    screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion;
+    return true;
+}
+
+void RasterizerVulkan::FlushWork() {
+    static constexpr u32 DRAWS_TO_DISPATCH = 4096;
+
+    // Only check multiples of 8 draws
+    static_assert(DRAWS_TO_DISPATCH % 8 == 0);
+    if ((++draw_counter & 7) != 7) {
+        return;
+    }
+
+    if (draw_counter < DRAWS_TO_DISPATCH) {
+        // Send recorded tasks to the worker thread
+        scheduler.DispatchWork();
+        return;
+    }
+
+    // Otherwise (every certain number of draws) flush execution.
+    // This submits commands to the Vulkan driver.
+    scheduler.Flush();
+    draw_counter = 0;
+}
+
+RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() {
+    MICROPROFILE_SCOPE(Vulkan_RenderTargets);
+    auto& dirty = system.GPU().Maxwell3D().dirty;
+    const bool update_rendertargets = dirty.render_settings;
+    dirty.render_settings = false;
+
+    texture_cache.GuardRenderTargets(true);
+
+    Texceptions texceptions;
+    for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+        if (update_rendertargets) {
+            color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true);
+        }
+        if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) {
+            texceptions.set(rt);
+        }
+    }
+
+    if (update_rendertargets) {
+        zeta_attachment = texture_cache.GetDepthBufferSurface(true);
+    }
+    if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) {
+        texceptions.set(ZETA_TEXCEPTION_INDEX);
+    }
+
+    texture_cache.GuardRenderTargets(false);
+
+    return texceptions;
+}
+
+bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) {
+    bool overlap = false;
+    for (auto& [view, layout] : sampled_views) {
+        if (!attachment.IsSameSurface(*view)) {
+            continue;
+        }
+        overlap = true;
+        *layout = vk::ImageLayout::eGeneral;
+    }
+    return overlap;
+}
+
+std::tuple<vk::Framebuffer, vk::Extent2D> RasterizerVulkan::ConfigureFramebuffers(
+    vk::RenderPass renderpass) {
+    FramebufferCacheKey key{renderpass, std::numeric_limits<u32>::max(),
+                            std::numeric_limits<u32>::max()};
+
+    const auto MarkAsModifiedAndPush = [&](const View& view) {
+        if (view == nullptr) {
+            return false;
+        }
+        key.views.push_back(view->GetHandle());
+        key.width = std::min(key.width, view->GetWidth());
+        key.height = std::min(key.height, view->GetHeight());
+        return true;
+    };
+
+    for (std::size_t index = 0; index < std::size(color_attachments); ++index) {
+        if (MarkAsModifiedAndPush(color_attachments[index])) {
+            texture_cache.MarkColorBufferInUse(index);
+        }
+    }
+    if (MarkAsModifiedAndPush(zeta_attachment)) {
+        texture_cache.MarkDepthBufferInUse();
+    }
+
+    const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key);
+    auto& framebuffer = fbentry->second;
+    if (is_cache_miss) {
+        const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass,
+                                                       static_cast<u32>(key.views.size()),
+                                                       key.views.data(), key.width, key.height, 1);
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld);
+    }
+
+    return {*framebuffer, vk::Extent2D{key.width, key.height}};
+}
+
+RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state,
+                                                                 BufferBindings& buffer_bindings,
+                                                                 bool is_indexed,
+                                                                 bool is_instanced) {
+    MICROPROFILE_SCOPE(Vulkan_Geometry);
+
+    const auto& gpu = system.GPU().Maxwell3D();
+    const auto& regs = gpu.regs;
+
+    SetupVertexArrays(fixed_state.vertex_input, buffer_bindings);
+
+    const u32 base_instance = regs.vb_base_instance;
+    const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1;
+    const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first;
+    const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count;
+
+    DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed};
+    SetupIndexBuffer(buffer_bindings, params, is_indexed);
+
+    return params;
+}
+
+void RasterizerVulkan::SetupShaderDescriptors(
+    const std::array<Shader, Maxwell::MaxShaderProgram>& shaders) {
+    texture_cache.GuardSamplers(true);
+
+    for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) {
+        // Skip VertexA stage
+        const auto& shader = shaders[stage + 1];
+        if (!shader) {
+            continue;
+        }
+        const auto& entries = shader->GetEntries();
+        SetupGraphicsConstBuffers(entries, stage);
+        SetupGraphicsGlobalBuffers(entries, stage);
+        SetupGraphicsTexelBuffers(entries, stage);
+        SetupGraphicsTextures(entries, stage);
+        SetupGraphicsImages(entries, stage);
+    }
+    texture_cache.GuardSamplers(false);
+}
+
+void RasterizerVulkan::SetupImageTransitions(
+    Texceptions texceptions, const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
+    const View& zeta_attachment) {
+    TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics,
+                     vk::AccessFlagBits::eShaderRead);
+    TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics,
+                     vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite);
+
+    for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) {
+        const auto color_attachment = color_attachments[rt];
+        if (color_attachment == nullptr) {
+            continue;
+        }
+        const auto image_layout =
+            texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal;
+        color_attachment->Transition(
+            image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput,
+            vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite);
+    }
+
+    if (zeta_attachment != nullptr) {
+        const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX]
+                                      ? vk::ImageLayout::eGeneral
+                                      : vk::ImageLayout::eDepthStencilAttachmentOptimal;
+        zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests,
+                                    vk::AccessFlagBits::eDepthStencilAttachmentRead |
+                                        vk::AccessFlagBits::eDepthStencilAttachmentWrite);
+    }
+}
+
+void RasterizerVulkan::UpdateDynamicStates() {
+    auto& gpu = system.GPU().Maxwell3D();
+    UpdateViewportsState(gpu);
+    UpdateScissorsState(gpu);
+    UpdateDepthBias(gpu);
+    UpdateBlendConstants(gpu);
+    UpdateDepthBounds(gpu);
+    UpdateStencilFaces(gpu);
+}
+
+void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
+                                         BufferBindings& buffer_bindings) {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+
+    for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexAttributes); ++index) {
+        const auto& attrib = regs.vertex_attrib_format[index];
+        if (!attrib.IsValid()) {
+            continue;
+        }
+
+        const auto& buffer = regs.vertex_array[attrib.buffer];
+        ASSERT(buffer.IsEnabled());
+
+        vertex_input.attributes[vertex_input.num_attributes++] =
+            FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size,
+                                                attrib.offset);
+    }
+
+    for (u32 index = 0; index < static_cast<u32>(Maxwell::NumVertexArrays); ++index) {
+        const auto& vertex_array = regs.vertex_array[index];
+        if (!vertex_array.IsEnabled()) {
+            continue;
+        }
+
+        const GPUVAddr start{vertex_array.StartAddress()};
+        const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
+
+        ASSERT(end > start);
+        const std::size_t size{end - start + 1};
+        const auto [buffer, offset] = buffer_cache.UploadMemory(start, size);
+
+        vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding(
+            index, vertex_array.stride,
+            regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0);
+        buffer_bindings.AddVertexBinding(buffer, offset);
+    }
+}
+
+void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params,
+                                        bool is_indexed) {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    switch (regs.draw.topology) {
+    case Maxwell::PrimitiveTopology::Quads:
+        if (params.is_indexed) {
+            UNIMPLEMENTED();
+        } else {
+            const auto [buffer, offset] =
+                quad_array_pass.Assemble(params.num_vertices, params.base_vertex);
+            buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32);
+            params.base_vertex = 0;
+            params.num_vertices = params.num_vertices * 6 / 4;
+            params.is_indexed = true;
+        }
+        break;
+    default: {
+        if (!is_indexed) {
+            break;
+        }
+        const GPUVAddr gpu_addr = regs.index_array.IndexStart();
+        auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize());
+
+        auto format = regs.index_array.format;
+        const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
+        if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
+            std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset);
+            format = Maxwell::IndexFormat::UnsignedShort;
+        }
+
+        buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format));
+        break;
+    }
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
+    const auto& gpu = system.GPU().Maxwell3D();
+    const auto& shader_stage = gpu.state.shader_stages[stage];
+    for (const auto& entry : entries.const_buffers) {
+        SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]);
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
+    auto& gpu{system.GPU()};
+    const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]};
+
+    for (const auto& entry : entries.global_buffers) {
+        const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset();
+        SetupGlobalBuffer(entry, addr);
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_Textures);
+    const auto& gpu = system.GPU().Maxwell3D();
+    for (const auto& entry : entries.texel_buffers) {
+        const auto image = GetTextureInfo(gpu, entry, stage).tic;
+        SetupTexelBuffer(image, entry);
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_Textures);
+    const auto& gpu = system.GPU().Maxwell3D();
+    for (const auto& entry : entries.samplers) {
+        const auto texture = GetTextureInfo(gpu, entry, stage);
+        SetupTexture(texture, entry);
+    }
+}
+
+void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) {
+    MICROPROFILE_SCOPE(Vulkan_Images);
+    const auto& gpu = system.GPU().KeplerCompute();
+    for (const auto& entry : entries.images) {
+        const auto tic = GetTextureInfo(gpu, entry, stage).tic;
+        SetupImage(tic, entry);
+    }
+}
+
+void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_ConstBuffers);
+    const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
+    for (const auto& entry : entries.const_buffers) {
+        const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
+        const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
+        Tegra::Engines::ConstBufferInfo buffer;
+        buffer.address = config.Address();
+        buffer.size = config.size;
+        buffer.enabled = mask[entry.GetIndex()];
+        SetupConstBuffer(entry, buffer);
+    }
+}
+
+void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_GlobalBuffers);
+    const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config};
+    for (const auto& entry : entries.global_buffers) {
+        const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
+        SetupGlobalBuffer(entry, addr);
+    }
+}
+
+void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_Textures);
+    const auto& gpu = system.GPU().KeplerCompute();
+    for (const auto& entry : entries.texel_buffers) {
+        const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+        SetupTexelBuffer(image, entry);
+    }
+}
+
+void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_Textures);
+    const auto& gpu = system.GPU().KeplerCompute();
+    for (const auto& entry : entries.samplers) {
+        const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex);
+        SetupTexture(texture, entry);
+    }
+}
+
+void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) {
+    MICROPROFILE_SCOPE(Vulkan_Images);
+    const auto& gpu = system.GPU().KeplerCompute();
+    for (const auto& entry : entries.images) {
+        const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic;
+        SetupImage(tic, entry);
+    }
+}
+
+void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry,
+                                        const Tegra::Engines::ConstBufferInfo& buffer) {
+    // Align the size to avoid bad std140 interactions
+    const std::size_t size =
+        Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float));
+    ASSERT(size <= MaxConstbufferSize);
+
+    const auto [buffer_handle, offset] =
+        buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
+
+    update_descriptor_queue.AddBuffer(buffer_handle, offset, size);
+}
+
+void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) {
+    auto& memory_manager{system.GPU().MemoryManager()};
+    const auto actual_addr = memory_manager.Read<u64>(address);
+    const auto size = memory_manager.Read<u32>(address + 8);
+
+    if (size == 0) {
+        // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because
+        // Vulkan doesn't like empty buffers.
+        constexpr std::size_t dummy_size = 4;
+        const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
+        update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);
+        return;
+    }
+
+    const auto [buffer, offset] = buffer_cache.UploadMemory(
+        actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten());
+    update_descriptor_queue.AddBuffer(buffer, offset, size);
+}
+
+void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic,
+                                        const TexelBufferEntry& entry) {
+    const auto view = texture_cache.GetTextureSurface(tic, entry);
+    ASSERT(view->IsBufferView());
+
+    update_descriptor_queue.AddTexelBuffer(view->GetBufferView());
+}
+
+void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture,
+                                    const SamplerEntry& entry) {
+    auto view = texture_cache.GetTextureSurface(texture.tic, entry);
+    ASSERT(!view->IsBufferView());
+
+    const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source,
+                                            texture.tic.z_source, texture.tic.w_source);
+    const auto sampler = sampler_cache.GetSampler(texture.tsc);
+    update_descriptor_queue.AddSampledImage(sampler, image_view);
+
+    const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+    *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal;
+    sampled_views.push_back(ImageView{std::move(view), image_layout});
+}
+
+void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) {
+    auto view = texture_cache.GetImageSurface(tic, entry);
+
+    if (entry.IsWritten()) {
+        view->MarkAsModified(texture_cache.Tick());
+    }
+
+    UNIMPLEMENTED_IF(tic.IsBuffer());
+
+    const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source);
+    update_descriptor_queue.AddImage(image_view);
+
+    const auto image_layout = update_descriptor_queue.GetLastImageLayout();
+    *image_layout = vk::ImageLayout::eGeneral;
+    image_views.push_back(ImageView{std::move(view), image_layout});
+}
+
+void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) {
+        return;
+    }
+    gpu.dirty.viewport_transform = false;
+    const auto& regs = gpu.regs;
+    const std::array viewports{
+        GetViewportState(device, regs, 0),  GetViewportState(device, regs, 1),
+        GetViewportState(device, regs, 2),  GetViewportState(device, regs, 3),
+        GetViewportState(device, regs, 4),  GetViewportState(device, regs, 5),
+        GetViewportState(device, regs, 6),  GetViewportState(device, regs, 7),
+        GetViewportState(device, regs, 8),  GetViewportState(device, regs, 9),
+        GetViewportState(device, regs, 10), GetViewportState(device, regs, 11),
+        GetViewportState(device, regs, 12), GetViewportState(device, regs, 13),
+        GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)};
+    scheduler.Record([viewports](auto cmdbuf, auto& dld) {
+        cmdbuf.setViewport(0, static_cast<u32>(viewports.size()), viewports.data(), dld);
+    });
+}
+
+void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) {
+        return;
+    }
+    gpu.dirty.scissor_test = false;
+    const auto& regs = gpu.regs;
+    const std::array scissors = {
+        GetScissorState(regs, 0),  GetScissorState(regs, 1),  GetScissorState(regs, 2),
+        GetScissorState(regs, 3),  GetScissorState(regs, 4),  GetScissorState(regs, 5),
+        GetScissorState(regs, 6),  GetScissorState(regs, 7),  GetScissorState(regs, 8),
+        GetScissorState(regs, 9),  GetScissorState(regs, 10), GetScissorState(regs, 11),
+        GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14),
+        GetScissorState(regs, 15)};
+    scheduler.Record([scissors](auto cmdbuf, auto& dld) {
+        cmdbuf.setScissor(0, static_cast<u32>(scissors.size()), scissors.data(), dld);
+    });
+}
+
+void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) {
+        return;
+    }
+    gpu.dirty.polygon_offset = false;
+    const auto& regs = gpu.regs;
+    scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp,
+                      factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) {
+        cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld);
+    });
+}
+
+void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) {
+        return;
+    }
+    gpu.dirty.blend_state = false;
+    const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g,
+                                    gpu.regs.blend_color.b, gpu.regs.blend_color.a};
+    scheduler.Record([blend_color](auto cmdbuf, auto& dld) {
+        cmdbuf.setBlendConstants(blend_color.data(), dld);
+    });
+}
+
+void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) {
+        return;
+    }
+    gpu.dirty.depth_bounds_values = false;
+    const auto& regs = gpu.regs;
+    scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]](
+                         auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); });
+}
+
+void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) {
+    if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) {
+        return;
+    }
+    gpu.dirty.stencil_test = false;
+    const auto& regs = gpu.regs;
+    if (regs.stencil_two_side_enable) {
+        // Separate values per face
+        scheduler.Record(
+            [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask,
+             front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref,
+             back_write_mask = regs.stencil_back_mask,
+             back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
+                // Front face
+                cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld);
+                cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld);
+                cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld);
+
+                // Back face
+                cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld);
+                cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld);
+                cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld);
+            });
+    } else {
+        // Front face defines both faces
+        scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask,
+                          test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) {
+            cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld);
+            cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld);
+            cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld);
+        });
+    }
+}
+
+std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const {
+    std::size_t size = CalculateVertexArraysSize();
+    if (is_indexed) {
+        size = Common::AlignUp(size, 4) + CalculateIndexBufferSize();
+    }
+    size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment());
+    return size;
+}
+
+std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const {
+    return Tegra::Engines::KeplerCompute::NumConstBuffers *
+           (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
+}
+
+std::size_t RasterizerVulkan::CalculateVertexArraysSize() const {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+
+    std::size_t size = 0;
+    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+        // This implementation assumes that all attributes are used in the shader.
+        const GPUVAddr start{regs.vertex_array[index].StartAddress()};
+        const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()};
+        DEBUG_ASSERT(end > start);
+
+        size += (end - start + 1) * regs.vertex_array[index].enable;
+    }
+    return size;
+}
+
+std::size_t RasterizerVulkan::CalculateIndexBufferSize() const {
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    return static_cast<std::size_t>(regs.index_array.count) *
+           static_cast<std::size_t>(regs.index_array.FormatSizeInBytes());
+}
+
+std::size_t RasterizerVulkan::CalculateConstBufferSize(
+    const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const {
+    if (entry.IsIndirect()) {
+        // Buffer is accessed indirectly, so upload the entire thing
+        return buffer.size;
+    } else {
+        // Buffer is accessed directly, upload just what we use
+        return entry.GetSize();
+    }
+}
+
+RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const {
+    using namespace VideoCore::Surface;
+
+    const auto& regs = system.GPU().Maxwell3D().regs;
+    RenderPassParams renderpass_params;
+
+    for (std::size_t rt = 0; rt < static_cast<std::size_t>(regs.rt_control.count); ++rt) {
+        const auto& rendertarget = regs.rt[rt];
+        if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE)
+            continue;
+        renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{
+            static_cast<u32>(rt), PixelFormatFromRenderTargetFormat(rendertarget.format),
+            texceptions.test(rt)});
+    }
+
+    renderpass_params.has_zeta = regs.zeta_enable;
+    if (renderpass_params.has_zeta) {
+        renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format);
+        renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX];
+    }
+
+    return renderpass_params;
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h
index fc324952b..7be71e734 100644
--- a/src/video_core/renderer_vulkan/vk_rasterizer.h
+++ b/src/video_core/renderer_vulkan/vk_rasterizer.h
@@ -4,10 +4,260 @@
 
 #pragma once
 
+#include <array>
+#include <bitset>
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include <boost/container/static_vector.hpp>
+#include <boost/functional/hash.hpp>
+
+#include "common/common_types.h"
+#include "video_core/memory_manager.h"
+#include "video_core/rasterizer_accelerated.h"
 #include "video_core/rasterizer_interface.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/fixed_pipeline_state.h"
+#include "video_core/renderer_vulkan/vk_buffer_cache.h"
+#include "video_core/renderer_vulkan/vk_compute_pass.h"
+#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
+#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
+#include "video_core/renderer_vulkan/vk_sampler_cache.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/renderer_vulkan/vk_update_descriptor.h"
+
+namespace Core {
+class System;
+}
+
+namespace Core::Frontend {
+class EmuWindow;
+}
+
+namespace Tegra::Engines {
+class Maxwell3D;
+}
+
+namespace Vulkan {
+
+struct VKScreenInfo;
+
+using ImageViewsPack =
+    boost::container::static_vector<vk::ImageView, Maxwell::NumRenderTargets + 1>;
+
+struct FramebufferCacheKey {
+    vk::RenderPass renderpass{};
+    u32 width = 0;
+    u32 height = 0;
+    ImageViewsPack views;
+
+    std::size_t Hash() const noexcept {
+        std::size_t hash = 0;
+        boost::hash_combine(hash, static_cast<VkRenderPass>(renderpass));
+        for (const auto& view : views) {
+            boost::hash_combine(hash, static_cast<VkImageView>(view));
+        }
+        boost::hash_combine(hash, width);
+        boost::hash_combine(hash, height);
+        return hash;
+    }
+
+    bool operator==(const FramebufferCacheKey& rhs) const noexcept {
+        return std::tie(renderpass, views, width, height) ==
+               std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height);
+    }
+};
+
+} // namespace Vulkan
+
+namespace std {
+
+template <>
+struct hash<Vulkan::FramebufferCacheKey> {
+    std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept {
+        return k.Hash();
+    }
+};
+
+} // namespace std
 
 namespace Vulkan {
 
-class RasterizerVulkan : public VideoCore::RasterizerInterface {};
+class BufferBindings;
+
+struct ImageView {
+    View view;
+    vk::ImageLayout* layout = nullptr;
+};
+
+class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
+public:
+    explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
+                              VKScreenInfo& screen_info, const VKDevice& device,
+                              VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                              VKScheduler& scheduler);
+    ~RasterizerVulkan() override;
+
+    bool DrawBatch(bool is_indexed) override;
+    bool DrawMultiBatch(bool is_indexed) override;
+    void Clear() override;
+    void DispatchCompute(GPUVAddr code_addr) override;
+    void FlushAll() override;
+    void FlushRegion(CacheAddr addr, u64 size) override;
+    void InvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override;
+    void FlushCommands() override;
+    void TickFrame() override;
+    bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src,
+                               const Tegra::Engines::Fermi2D::Regs::Surface& dst,
+                               const Tegra::Engines::Fermi2D::Config& copy_config) override;
+    bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr,
+                           u32 pixel_stride) override;
+
+    /// Maximum supported size that a constbuffer can have in bytes.
+    static constexpr std::size_t MaxConstbufferSize = 0x10000;
+    static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
+                  "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
+
+private:
+    struct DrawParameters {
+        void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const;
+
+        u32 base_instance = 0;
+        u32 num_instances = 0;
+        u32 base_vertex = 0;
+        u32 num_vertices = 0;
+        bool is_indexed = 0;
+    };
+
+    using Texceptions = std::bitset<Maxwell::NumRenderTargets + 1>;
+
+    static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8;
+
+    void Draw(bool is_indexed, bool is_instanced);
+
+    void FlushWork();
+
+    Texceptions UpdateAttachments();
+
+    std::tuple<vk::Framebuffer, vk::Extent2D> ConfigureFramebuffers(vk::RenderPass renderpass);
+
+    /// Setups geometry buffers and state.
+    DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings,
+                                 bool is_indexed, bool is_instanced);
+
+    /// Setup descriptors in the graphics pipeline.
+    void SetupShaderDescriptors(const std::array<Shader, Maxwell::MaxShaderProgram>& shaders);
+
+    void SetupImageTransitions(Texceptions texceptions,
+                               const std::array<View, Maxwell::NumRenderTargets>& color_attachments,
+                               const View& zeta_attachment);
+
+    void UpdateDynamicStates();
+
+    bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment);
+
+    void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input,
+                           BufferBindings& buffer_bindings);
+
+    void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed);
+
+    /// Setup constant buffers in the graphics pipeline.
+    void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup global buffers in the graphics pipeline.
+    void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup texel buffers in the graphics pipeline.
+    void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup textures in the graphics pipeline.
+    void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup images in the graphics pipeline.
+    void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage);
+
+    /// Setup constant buffers in the compute pipeline.
+    void SetupComputeConstBuffers(const ShaderEntries& entries);
+
+    /// Setup global buffers in the compute pipeline.
+    void SetupComputeGlobalBuffers(const ShaderEntries& entries);
+
+    /// Setup texel buffers in the compute pipeline.
+    void SetupComputeTexelBuffers(const ShaderEntries& entries);
+
+    /// Setup textures in the compute pipeline.
+    void SetupComputeTextures(const ShaderEntries& entries);
+
+    /// Setup images in the compute pipeline.
+    void SetupComputeImages(const ShaderEntries& entries);
+
+    void SetupConstBuffer(const ConstBufferEntry& entry,
+                          const Tegra::Engines::ConstBufferInfo& buffer);
+
+    void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address);
+
+    void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry);
+
+    void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry);
+
+    void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
+
+    void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu);
+    void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu);
+
+    std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const;
+
+    std::size_t CalculateComputeStreamBufferSize() const;
+
+    std::size_t CalculateVertexArraysSize() const;
+
+    std::size_t CalculateIndexBufferSize() const;
+
+    std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry,
+                                         const Tegra::Engines::ConstBufferInfo& buffer) const;
+
+    RenderPassParams GetRenderPassParams(Texceptions texceptions) const;
+
+    Core::System& system;
+    Core::Frontend::EmuWindow& render_window;
+    VKScreenInfo& screen_info;
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+
+    VKStagingBufferPool staging_pool;
+    VKDescriptorPool descriptor_pool;
+    VKUpdateDescriptorQueue update_descriptor_queue;
+    QuadArrayPass quad_array_pass;
+    Uint8Pass uint8_pass;
+
+    VKTextureCache texture_cache;
+    VKPipelineCache pipeline_cache;
+    VKBufferCache buffer_cache;
+    VKSamplerCache sampler_cache;
+
+    std::array<View, Maxwell::NumRenderTargets> color_attachments;
+    View zeta_attachment;
+
+    std::vector<ImageView> sampled_views;
+    std::vector<ImageView> image_views;
+
+    u32 draw_counter = 0;
+
+    // TODO(Rodrigo): Invalidate on image destruction
+    std::unordered_map<FramebufferCacheKey, UniqueFramebuffer> framebuffer_cache;
+};
 
 } // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
index 1ce583f75..0a8ec8398 100644
--- a/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_sampler_cache.cpp
@@ -46,9 +46,9 @@ UniqueSampler VKSamplerCache::CreateSampler(const Tegra::Texture::TSCEntry& tsc)
         {}, MaxwellToVK::Sampler::Filter(tsc.mag_filter),
         MaxwellToVK::Sampler::Filter(tsc.min_filter),
         MaxwellToVK::Sampler::MipmapMode(tsc.mipmap_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_u, tsc.mag_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_v, tsc.mag_filter),
-        MaxwellToVK::Sampler::WrapMode(tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_u, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_v, tsc.mag_filter),
+        MaxwellToVK::Sampler::WrapMode(device, tsc.wrap_p, tsc.mag_filter), tsc.GetLodBias(),
         has_anisotropy, max_anisotropy, tsc.depth_compare_enabled,
         MaxwellToVK::Sampler::DepthCompareFunction(tsc.depth_compare_func), tsc.GetMinLod(),
         tsc.GetMaxLod(), vk_border_color.value_or(vk::BorderColor::eFloatTransparentBlack),
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 8fe852ce8..1ab22251e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -542,11 +542,10 @@ private:
             return;
         }
 
-        for (u32 rt = 0; rt < static_cast<u32>(frag_colors.size()); ++rt) {
-            if (!specialization.enabled_rendertargets[rt]) {
+        for (u32 rt = 0; rt < static_cast<u32>(std::size(frag_colors)); ++rt) {
+            if (!IsRenderTargetEnabled(rt)) {
                 continue;
             }
-
             const Id id = AddGlobalVariable(OpVariable(t_out_float4, spv::StorageClass::Output));
             Name(id, fmt::format("frag_color{}", rt));
             Decorate(id, spv::Decoration::Location, rt);
@@ -852,6 +851,15 @@ private:
         return binding;
     }
 
+    bool IsRenderTargetEnabled(u32 rt) const {
+        for (u32 component = 0; component < 4; ++component) {
+            if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
     bool IsInputAttributeArray() const {
         return stage == ShaderType::TesselationControl || stage == ShaderType::TesselationEval ||
                stage == ShaderType::Geometry;
@@ -1115,15 +1123,7 @@ private:
         }
 
         if (const auto gmem = std::get_if<GmemNode>(&*node)) {
-            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
-            const Id real = AsUint(Visit(gmem->GetRealAddress()));
-            const Id base = AsUint(Visit(gmem->GetBaseAddress()));
-
-            Id offset = OpISub(t_uint, real, base);
-            offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
-            return {OpLoad(t_float,
-                           OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
-                    Type::Float};
+            return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
         }
 
         if (const auto lmem = std::get_if<LmemNode>(&*node)) {
@@ -1134,10 +1134,7 @@ private:
         }
 
         if (const auto smem = std::get_if<SmemNode>(&*node)) {
-            Id address = AsUint(Visit(smem->GetAddress()));
-            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-            const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
-            return {OpLoad(t_uint, pointer), Type::Uint};
+            return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
         }
 
         if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@@ -1331,20 +1328,10 @@ private:
             target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
 
         } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
-            ASSERT(stage == ShaderType::Compute);
-            Id address = AsUint(Visit(smem->GetAddress()));
-            address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
-            target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
+            target = {GetSharedMemoryPointer(*smem), Type::Uint};
 
         } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
-            const Id real = AsUint(Visit(gmem->GetRealAddress()));
-            const Id base = AsUint(Visit(gmem->GetBaseAddress()));
-            const Id diff = OpISub(t_uint, real, base);
-            const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
-
-            const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
-            target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
-                      Type::Float};
+            target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
 
         } else {
             UNIMPLEMENTED();
@@ -1796,6 +1783,24 @@ private:
         return {};
     }
 
+    Expression AtomicAdd(Operation operation) {
+        Id pointer;
+        if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
+            pointer = GetSharedMemoryPointer(*smem);
+        } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
+            pointer = GetGlobalMemoryPointer(*gmem);
+        } else {
+            UNREACHABLE();
+            return {Constant(t_uint, 0), Type::Uint};
+        }
+
+        const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
+        const Id semantics = Constant(t_uint, 0U);
+
+        const Id value = AsUint(Visit(operation[1]));
+        return {OpAtomicIAdd(t_uint, pointer, scope, semantics, value), Type::Uint};
+    }
+
     Expression Branch(Operation operation) {
         const auto& target = std::get<ImmediateNode>(*operation[0]);
         OpStore(jmp_to, Constant(t_uint, target.GetValue()));
@@ -1876,19 +1881,14 @@ private:
             // rendertargets/components are skipped in the register assignment.
             u32 current_reg = 0;
             for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
-                if (!specialization.enabled_rendertargets[rt]) {
-                    // Skip rendertargets that are not enabled
-                    continue;
-                }
                 // TODO(Subv): Figure out how dual-source blending is configured in the Switch.
                 for (u32 component = 0; component < 4; ++component) {
-                    const Id pointer = AccessElement(t_out_float, frag_colors.at(rt), component);
-                    if (header.ps.IsColorComponentOutputEnabled(rt, component)) {
-                        OpStore(pointer, SafeGetRegister(current_reg));
-                        ++current_reg;
-                    } else {
-                        OpStore(pointer, component == 3 ? v_float_one : v_float_zero);
+                    if (!header.ps.IsColorComponentOutputEnabled(rt, component)) {
+                        continue;
                     }
+                    const Id pointer = AccessElement(t_out_float, frag_colors[rt], component);
+                    OpStore(pointer, SafeGetRegister(current_reg));
+                    ++current_reg;
                 }
             }
             if (header.ps.omap.depth) {
@@ -2227,6 +2227,22 @@ private:
         return {};
     }
 
+    Id GetGlobalMemoryPointer(const GmemNode& gmem) {
+        const Id real = AsUint(Visit(gmem.GetRealAddress()));
+        const Id base = AsUint(Visit(gmem.GetBaseAddress()));
+        const Id diff = OpISub(t_uint, real, base);
+        const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
+        const Id buffer = global_buffers.at(gmem.GetDescriptor());
+        return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
+    }
+
+    Id GetSharedMemoryPointer(const SmemNode& smem) {
+        ASSERT(stage == ShaderType::Compute);
+        Id address = AsUint(Visit(smem.GetAddress()));
+        address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
+        return OpAccessChain(t_smem_uint, shared_memory, address);
+    }
+
     static constexpr std::array operation_decompilers = {
         &SPIRVDecompiler::Assign,
 
@@ -2373,6 +2389,8 @@ private:
         &SPIRVDecompiler::AtomicImageXor,
         &SPIRVDecompiler::AtomicImageExchange,
 
+        &SPIRVDecompiler::AtomicAdd,
+
         &SPIRVDecompiler::Branch,
         &SPIRVDecompiler::BranchIndirect,
         &SPIRVDecompiler::PushFlowStack,
@@ -2467,9 +2485,9 @@ private:
 
     Id t_smem_uint{};
 
-    const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
+    const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
     const Id t_gmem_array =
-        Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray");
+        Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
     const Id t_gmem_struct = MemberDecorate(
         Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
     const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
index 10794be1c..f5dc14d9e 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h
@@ -102,9 +102,6 @@ struct Specialization final {
         Maxwell::TessellationSpacing spacing{};
         bool clockwise{};
     } tessellation;
-
-    // Fragment specific
-    std::bitset<8> enabled_rendertargets;
 };
 // Old gcc versions don't consider this trivially copyable.
 // static_assert(std::is_trivially_copyable_v<Specialization>);
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 02310375f..4d9488f49 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -13,6 +13,7 @@
 
 #include "video_core/renderer_vulkan/declarations.h"
 #include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_resource_manager.h"
 
 namespace Vulkan {
 
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.cpp b/src/video_core/renderer_vulkan/vk_swapchain.cpp
index ebc68f030..f47b691a8 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.cpp
+++ b/src/video_core/renderer_vulkan/vk_swapchain.cpp
@@ -123,7 +123,7 @@ bool VKSwapchain::Present(vk::Semaphore render_semaphore, VKFence& fence) {
 
     ASSERT(fences[image_index] == nullptr);
     fences[image_index] = &fence;
-    frame_index = (frame_index + 1) % image_count;
+    frame_index = (frame_index + 1) % static_cast<u32>(image_count);
     return recreated;
 }
 
diff --git a/src/video_core/renderer_vulkan/vk_swapchain.h b/src/video_core/renderer_vulkan/vk_swapchain.h
index a1e7938d2..2f3b2ccd5 100644
--- a/src/video_core/renderer_vulkan/vk_swapchain.h
+++ b/src/video_core/renderer_vulkan/vk_swapchain.h
@@ -40,19 +40,19 @@ public:
         return extent;
     }
 
-    u32 GetImageCount() const {
+    std::size_t GetImageCount() const {
         return image_count;
     }
 
-    u32 GetImageIndex() const {
+    std::size_t GetImageIndex() const {
         return image_index;
     }
 
-    vk::Image GetImageIndex(u32 index) const {
+    vk::Image GetImageIndex(std::size_t index) const {
         return images[index];
     }
 
-    vk::ImageView GetImageViewIndex(u32 index) const {
+    vk::ImageView GetImageViewIndex(std::size_t index) const {
         return *image_views[index];
     }
 
@@ -77,7 +77,7 @@ private:
 
     UniqueSwapchainKHR swapchain;
 
-    u32 image_count{};
+    std::size_t image_count{};
     std::vector<vk::Image> images;
     std::vector<UniqueImageView> image_views;
     std::vector<UniqueFramebuffer> framebuffers;
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
new file mode 100644
index 000000000..51b0d38a6
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp
@@ -0,0 +1,475 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+#include <cstring>
+#include <memory>
+#include <variant>
+#include <vector>
+
+#include "common/alignment.h"
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "core/core.h"
+#include "core/memory.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/morton.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/maxwell_to_vk.h"
+#include "video_core/renderer_vulkan/vk_device.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_rasterizer.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
+#include "video_core/renderer_vulkan/vk_texture_cache.h"
+#include "video_core/surface.h"
+#include "video_core/textures/convert.h"
+
+namespace Vulkan {
+
+using VideoCore::MortonSwizzle;
+using VideoCore::MortonSwizzleMode;
+
+using Tegra::Texture::SwizzleSource;
+using VideoCore::Surface::PixelFormat;
+using VideoCore::Surface::SurfaceCompression;
+using VideoCore::Surface::SurfaceTarget;
+
+namespace {
+
+vk::ImageType SurfaceTargetToImage(SurfaceTarget target) {
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture1DArray:
+        return vk::ImageType::e1D;
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2DArray:
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        return vk::ImageType::e2D;
+    case SurfaceTarget::Texture3D:
+        return vk::ImageType::e3D;
+    }
+    UNREACHABLE_MSG("Unknown texture target={}", static_cast<u32>(target));
+    return {};
+}
+
+vk::ImageAspectFlags PixelFormatToImageAspect(PixelFormat pixel_format) {
+    if (pixel_format < PixelFormat::MaxColorFormat) {
+        return vk::ImageAspectFlagBits::eColor;
+    } else if (pixel_format < PixelFormat::MaxDepthFormat) {
+        return vk::ImageAspectFlagBits::eDepth;
+    } else if (pixel_format < PixelFormat::MaxDepthStencilFormat) {
+        return vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil;
+    } else {
+        UNREACHABLE_MSG("Invalid pixel format={}", static_cast<u32>(pixel_format));
+        return vk::ImageAspectFlagBits::eColor;
+    }
+}
+
+vk::ImageViewType GetImageViewType(SurfaceTarget target) {
+    switch (target) {
+    case SurfaceTarget::Texture1D:
+        return vk::ImageViewType::e1D;
+    case SurfaceTarget::Texture2D:
+        return vk::ImageViewType::e2D;
+    case SurfaceTarget::Texture3D:
+        return vk::ImageViewType::e3D;
+    case SurfaceTarget::Texture1DArray:
+        return vk::ImageViewType::e1DArray;
+    case SurfaceTarget::Texture2DArray:
+        return vk::ImageViewType::e2DArray;
+    case SurfaceTarget::TextureCubemap:
+        return vk::ImageViewType::eCube;
+    case SurfaceTarget::TextureCubeArray:
+        return vk::ImageViewType::eCubeArray;
+    case SurfaceTarget::TextureBuffer:
+        break;
+    }
+    UNREACHABLE();
+    return {};
+}
+
+UniqueBuffer CreateBuffer(const VKDevice& device, const SurfaceParams& params) {
+    // TODO(Rodrigo): Move texture buffer creation to the buffer cache
+    const vk::BufferCreateInfo buffer_ci({}, params.GetHostSizeInBytes(),
+                                         vk::BufferUsageFlagBits::eUniformTexelBuffer |
+                                             vk::BufferUsageFlagBits::eTransferSrc |
+                                             vk::BufferUsageFlagBits::eTransferDst,
+                                         vk::SharingMode::eExclusive, 0, nullptr);
+    const auto dev = device.GetLogical();
+    const auto& dld = device.GetDispatchLoader();
+    return dev.createBufferUnique(buffer_ci, nullptr, dld);
+}
+
+vk::BufferViewCreateInfo GenerateBufferViewCreateInfo(const VKDevice& device,
+                                                      const SurfaceParams& params,
+                                                      vk::Buffer buffer) {
+    ASSERT(params.IsBuffer());
+
+    const auto format =
+        MaxwellToVK::SurfaceFormat(device, FormatType::Buffer, params.pixel_format).format;
+    return vk::BufferViewCreateInfo({}, buffer, format, 0, params.GetHostSizeInBytes());
+}
+
+vk::ImageCreateInfo GenerateImageCreateInfo(const VKDevice& device, const SurfaceParams& params) {
+    constexpr auto sample_count = vk::SampleCountFlagBits::e1;
+    constexpr auto tiling = vk::ImageTiling::eOptimal;
+
+    ASSERT(!params.IsBuffer());
+
+    const auto [format, attachable, storage] =
+        MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.pixel_format);
+
+    auto image_usage = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eTransferDst |
+                       vk::ImageUsageFlagBits::eTransferSrc;
+    if (attachable) {
+        image_usage |= params.IsPixelFormatZeta() ? vk::ImageUsageFlagBits::eDepthStencilAttachment
+                                                  : vk::ImageUsageFlagBits::eColorAttachment;
+    }
+    if (storage) {
+        image_usage |= vk::ImageUsageFlagBits::eStorage;
+    }
+
+    vk::ImageCreateFlags flags;
+    vk::Extent3D extent;
+    switch (params.target) {
+    case SurfaceTarget::TextureCubemap:
+    case SurfaceTarget::TextureCubeArray:
+        flags |= vk::ImageCreateFlagBits::eCubeCompatible;
+        [[fallthrough]];
+    case SurfaceTarget::Texture1D:
+    case SurfaceTarget::Texture1DArray:
+    case SurfaceTarget::Texture2D:
+    case SurfaceTarget::Texture2DArray:
+        extent = vk::Extent3D(params.width, params.height, 1);
+        break;
+    case SurfaceTarget::Texture3D:
+        extent = vk::Extent3D(params.width, params.height, params.depth);
+        break;
+    case SurfaceTarget::TextureBuffer:
+        UNREACHABLE();
+    }
+
+    return vk::ImageCreateInfo(flags, SurfaceTargetToImage(params.target), format, extent,
+                               params.num_levels, static_cast<u32>(params.GetNumLayers()),
+                               sample_count, tiling, image_usage, vk::SharingMode::eExclusive, 0,
+                               nullptr, vk::ImageLayout::eUndefined);
+}
+
+} // Anonymous namespace
+
+CachedSurface::CachedSurface(Core::System& system, const VKDevice& device,
+                             VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                             VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
+                             GPUVAddr gpu_addr, const SurfaceParams& params)
+    : SurfaceBase<View>{gpu_addr, params}, system{system}, device{device},
+      resource_manager{resource_manager}, memory_manager{memory_manager}, scheduler{scheduler},
+      staging_pool{staging_pool} {
+    if (params.IsBuffer()) {
+        buffer = CreateBuffer(device, params);
+        commit = memory_manager.Commit(*buffer, false);
+
+        const auto buffer_view_ci = GenerateBufferViewCreateInfo(device, params, *buffer);
+        format = buffer_view_ci.format;
+
+        const auto dev = device.GetLogical();
+        const auto& dld = device.GetDispatchLoader();
+        buffer_view = dev.createBufferViewUnique(buffer_view_ci, nullptr, dld);
+    } else {
+        const auto image_ci = GenerateImageCreateInfo(device, params);
+        format = image_ci.format;
+
+        image.emplace(device, scheduler, image_ci, PixelFormatToImageAspect(params.pixel_format));
+        commit = memory_manager.Commit(image->GetHandle(), false);
+    }
+
+    // TODO(Rodrigo): Move this to a virtual function.
+    main_view = CreateViewInner(
+        ViewParams(params.target, 0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels),
+        true);
+}
+
+CachedSurface::~CachedSurface() = default;
+
+void CachedSurface::UploadTexture(const std::vector<u8>& staging_buffer) {
+    // To upload data we have to be outside of a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    if (params.IsBuffer()) {
+        UploadBuffer(staging_buffer);
+    } else {
+        UploadImage(staging_buffer);
+    }
+}
+
+void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
+    UNIMPLEMENTED_IF(params.IsBuffer());
+
+    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+        LOG_WARNING(Render_Vulkan, "A1B5G5R5 flushing is stubbed");
+    }
+
+    // We can't copy images to buffers inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
+                   vk::ImageLayout::eTransferSrcOptimal);
+
+    const auto& buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    // TODO(Rodrigo): Do this in a single copy
+    for (u32 level = 0; level < params.num_levels; ++level) {
+        scheduler.Record([image = image->GetHandle(), buffer = *buffer.handle,
+                          copy = GetBufferImageCopy(level)](auto cmdbuf, auto& dld) {
+            cmdbuf.copyImageToBuffer(image, vk::ImageLayout::eTransferSrcOptimal, buffer, {copy},
+                                     dld);
+        });
+    }
+    scheduler.Finish();
+
+    // TODO(Rodrigo): Use an intern buffer for staging buffers and avoid this unnecessary memcpy.
+    std::memcpy(staging_buffer.data(), buffer.commit->Map(host_memory_size), host_memory_size);
+}
+
+void CachedSurface::DecorateSurfaceName() {
+    // TODO(Rodrigo): Add name decorations
+}
+
+View CachedSurface::CreateView(const ViewParams& params) {
+    return CreateViewInner(params, false);
+}
+
+View CachedSurface::CreateViewInner(const ViewParams& params, bool is_proxy) {
+    // TODO(Rodrigo): Add name decorations
+    return views[params] = std::make_shared<CachedSurfaceView>(device, *this, params, is_proxy);
+}
+
+void CachedSurface::UploadBuffer(const std::vector<u8>& staging_buffer) {
+    const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
+
+    scheduler.Record([src_buffer = *src_buffer.handle, dst_buffer = *buffer,
+                      size = params.GetHostSizeInBytes()](auto cmdbuf, auto& dld) {
+        const vk::BufferCopy copy(0, 0, size);
+        cmdbuf.copyBuffer(src_buffer, dst_buffer, {copy}, dld);
+
+        cmdbuf.pipelineBarrier(
+            vk::PipelineStageFlagBits::eTransfer, vk::PipelineStageFlagBits::eVertexShader, {}, {},
+            {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite,
+                                     vk::AccessFlagBits::eShaderRead, 0, 0, dst_buffer, 0, size)},
+            {}, dld);
+    });
+}
+
+void CachedSurface::UploadImage(const std::vector<u8>& staging_buffer) {
+    const auto& src_buffer = staging_pool.GetUnusedBuffer(host_memory_size, true);
+    std::memcpy(src_buffer.commit->Map(host_memory_size), staging_buffer.data(), host_memory_size);
+
+    FullTransition(vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferWrite,
+                   vk::ImageLayout::eTransferDstOptimal);
+
+    for (u32 level = 0; level < params.num_levels; ++level) {
+        vk::BufferImageCopy copy = GetBufferImageCopy(level);
+        const auto& dld = device.GetDispatchLoader();
+        if (image->GetAspectMask() ==
+            (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
+            vk::BufferImageCopy depth = copy;
+            vk::BufferImageCopy stencil = copy;
+            depth.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eDepth;
+            stencil.imageSubresource.aspectMask = vk::ImageAspectFlagBits::eStencil;
+            scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(), depth,
+                              stencil](auto cmdbuf, auto& dld) {
+                cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
+                                         {depth, stencil}, dld);
+            });
+        } else {
+            scheduler.Record([buffer = *src_buffer.handle, image = image->GetHandle(),
+                              copy](auto cmdbuf, auto& dld) {
+                cmdbuf.copyBufferToImage(buffer, image, vk::ImageLayout::eTransferDstOptimal,
+                                         {copy}, dld);
+            });
+        }
+    }
+}
+
+vk::BufferImageCopy CachedSurface::GetBufferImageCopy(u32 level) const {
+    const u32 vk_depth = params.target == SurfaceTarget::Texture3D ? params.GetMipDepth(level) : 1;
+    const auto compression_type = params.GetCompressionType();
+    const std::size_t mip_offset = compression_type == SurfaceCompression::Converted
+                                       ? params.GetConvertedMipmapOffset(level)
+                                       : params.GetHostMipmapLevelOffset(level);
+
+    return vk::BufferImageCopy(
+        mip_offset, 0, 0,
+        {image->GetAspectMask(), level, 0, static_cast<u32>(params.GetNumLayers())}, {0, 0, 0},
+        {params.GetMipWidth(level), params.GetMipHeight(level), vk_depth});
+}
+
+vk::ImageSubresourceRange CachedSurface::GetImageSubresourceRange() const {
+    return {image->GetAspectMask(), 0, params.num_levels, 0,
+            static_cast<u32>(params.GetNumLayers())};
+}
+
+CachedSurfaceView::CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
+                                     const ViewParams& params, bool is_proxy)
+    : VideoCommon::ViewBase{params}, params{surface.GetSurfaceParams()},
+      image{surface.GetImageHandle()}, buffer_view{surface.GetBufferViewHandle()},
+      aspect_mask{surface.GetAspectMask()}, device{device}, surface{surface},
+      base_layer{params.base_layer}, num_layers{params.num_layers}, base_level{params.base_level},
+      num_levels{params.num_levels}, image_view_type{image ? GetImageViewType(params.target)
+                                                           : vk::ImageViewType{}} {}
+
+CachedSurfaceView::~CachedSurfaceView() = default;
+
+vk::ImageView CachedSurfaceView::GetHandle(SwizzleSource x_source, SwizzleSource y_source,
+                                           SwizzleSource z_source, SwizzleSource w_source) {
+    const u32 swizzle = EncodeSwizzle(x_source, y_source, z_source, w_source);
+    if (last_image_view && last_swizzle == swizzle) {
+        return last_image_view;
+    }
+    last_swizzle = swizzle;
+
+    const auto [entry, is_cache_miss] = view_cache.try_emplace(swizzle);
+    auto& image_view = entry->second;
+    if (!is_cache_miss) {
+        return last_image_view = *image_view;
+    }
+
+    auto swizzle_x = MaxwellToVK::SwizzleSource(x_source);
+    auto swizzle_y = MaxwellToVK::SwizzleSource(y_source);
+    auto swizzle_z = MaxwellToVK::SwizzleSource(z_source);
+    auto swizzle_w = MaxwellToVK::SwizzleSource(w_source);
+
+    if (params.pixel_format == VideoCore::Surface::PixelFormat::A1B5G5R5U) {
+        // A1B5G5R5 is implemented as A1R5G5B5, we have to change the swizzle here.
+        std::swap(swizzle_x, swizzle_z);
+    }
+
+    // Games can sample depth or stencil values on textures. This is decided by the swizzle value on
+    // hardware. To emulate this on Vulkan we specify it in the aspect.
+    vk::ImageAspectFlags aspect = aspect_mask;
+    if (aspect == (vk::ImageAspectFlagBits::eDepth | vk::ImageAspectFlagBits::eStencil)) {
+        UNIMPLEMENTED_IF(x_source != SwizzleSource::R && x_source != SwizzleSource::G);
+        const bool is_first = x_source == SwizzleSource::R;
+        switch (params.pixel_format) {
+        case VideoCore::Surface::PixelFormat::Z24S8:
+        case VideoCore::Surface::PixelFormat::Z32FS8:
+            aspect = is_first ? vk::ImageAspectFlagBits::eDepth : vk::ImageAspectFlagBits::eStencil;
+            break;
+        case VideoCore::Surface::PixelFormat::S8Z24:
+            aspect = is_first ? vk::ImageAspectFlagBits::eStencil : vk::ImageAspectFlagBits::eDepth;
+            break;
+        default:
+            aspect = vk::ImageAspectFlagBits::eDepth;
+            UNIMPLEMENTED();
+        }
+
+        // Vulkan doesn't seem to understand swizzling of a depth stencil image, use identity
+        swizzle_x = vk::ComponentSwizzle::eR;
+        swizzle_y = vk::ComponentSwizzle::eG;
+        swizzle_z = vk::ComponentSwizzle::eB;
+        swizzle_w = vk::ComponentSwizzle::eA;
+    }
+
+    const vk::ImageViewCreateInfo image_view_ci(
+        {}, surface.GetImageHandle(), image_view_type, surface.GetImage().GetFormat(),
+        {swizzle_x, swizzle_y, swizzle_z, swizzle_w},
+        {aspect, base_level, num_levels, base_layer, num_layers});
+
+    const auto dev = device.GetLogical();
+    image_view = dev.createImageViewUnique(image_view_ci, nullptr, device.GetDispatchLoader());
+    return last_image_view = *image_view;
+}
+
+VKTextureCache::VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                               const VKDevice& device, VKResourceManager& resource_manager,
+                               VKMemoryManager& memory_manager, VKScheduler& scheduler,
+                               VKStagingBufferPool& staging_pool)
+    : TextureCache(system, rasterizer), device{device}, resource_manager{resource_manager},
+      memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{staging_pool} {}
+
+VKTextureCache::~VKTextureCache() = default;
+
+Surface VKTextureCache::CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) {
+    return std::make_shared<CachedSurface>(system, device, resource_manager, memory_manager,
+                                           scheduler, staging_pool, gpu_addr, params);
+}
+
+void VKTextureCache::ImageCopy(Surface& src_surface, Surface& dst_surface,
+                               const VideoCommon::CopyParams& copy_params) {
+    const bool src_3d = src_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
+    const bool dst_3d = dst_surface->GetSurfaceParams().target == SurfaceTarget::Texture3D;
+    UNIMPLEMENTED_IF(src_3d);
+
+    // The texture cache handles depth in OpenGL terms, we have to handle it as subresource and
+    // dimension respectively.
+    const u32 dst_base_layer = dst_3d ? 0 : copy_params.dest_z;
+    const u32 dst_offset_z = dst_3d ? copy_params.dest_z : 0;
+
+    const u32 extent_z = dst_3d ? copy_params.depth : 1;
+    const u32 num_layers = dst_3d ? 1 : copy_params.depth;
+
+    // We can't copy inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    src_surface->Transition(copy_params.source_z, copy_params.depth, copy_params.source_level, 1,
+                            vk::PipelineStageFlagBits::eTransfer, vk::AccessFlagBits::eTransferRead,
+                            vk::ImageLayout::eTransferSrcOptimal);
+    dst_surface->Transition(
+        dst_base_layer, num_layers, copy_params.dest_level, 1, vk::PipelineStageFlagBits::eTransfer,
+        vk::AccessFlagBits::eTransferWrite, vk::ImageLayout::eTransferDstOptimal);
+
+    const auto& dld{device.GetDispatchLoader()};
+    const vk::ImageSubresourceLayers src_subresource(
+        src_surface->GetAspectMask(), copy_params.source_level, copy_params.source_z, num_layers);
+    const vk::ImageSubresourceLayers dst_subresource(
+        dst_surface->GetAspectMask(), copy_params.dest_level, dst_base_layer, num_layers);
+    const vk::Offset3D src_offset(copy_params.source_x, copy_params.source_y, 0);
+    const vk::Offset3D dst_offset(copy_params.dest_x, copy_params.dest_y, dst_offset_z);
+    const vk::Extent3D extent(copy_params.width, copy_params.height, extent_z);
+    const vk::ImageCopy copy(src_subresource, src_offset, dst_subresource, dst_offset, extent);
+    const vk::Image src_image = src_surface->GetImageHandle();
+    const vk::Image dst_image = dst_surface->GetImageHandle();
+    scheduler.Record([src_image, dst_image, copy](auto cmdbuf, auto& dld) {
+        cmdbuf.copyImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
+                         vk::ImageLayout::eTransferDstOptimal, {copy}, dld);
+    });
+}
+
+void VKTextureCache::ImageBlit(View& src_view, View& dst_view,
+                               const Tegra::Engines::Fermi2D::Config& copy_config) {
+    // We can't blit inside a renderpass
+    scheduler.RequestOutsideRenderPassOperationContext();
+
+    src_view->Transition(vk::ImageLayout::eTransferSrcOptimal, vk::PipelineStageFlagBits::eTransfer,
+                         vk::AccessFlagBits::eTransferRead);
+    dst_view->Transition(vk::ImageLayout::eTransferDstOptimal, vk::PipelineStageFlagBits::eTransfer,
+                         vk::AccessFlagBits::eTransferWrite);
+
+    const auto& cfg = copy_config;
+    const auto src_top_left = vk::Offset3D(cfg.src_rect.left, cfg.src_rect.top, 0);
+    const auto src_bot_right = vk::Offset3D(cfg.src_rect.right, cfg.src_rect.bottom, 1);
+    const auto dst_top_left = vk::Offset3D(cfg.dst_rect.left, cfg.dst_rect.top, 0);
+    const auto dst_bot_right = vk::Offset3D(cfg.dst_rect.right, cfg.dst_rect.bottom, 1);
+    const vk::ImageBlit blit(src_view->GetImageSubresourceLayers(), {src_top_left, src_bot_right},
+                             dst_view->GetImageSubresourceLayers(), {dst_top_left, dst_bot_right});
+    const bool is_linear = copy_config.filter == Tegra::Engines::Fermi2D::Filter::Linear;
+
+    const auto& dld{device.GetDispatchLoader()};
+    scheduler.Record([src_image = src_view->GetImage(), dst_image = dst_view->GetImage(), blit,
+                      is_linear](auto cmdbuf, auto& dld) {
+        cmdbuf.blitImage(src_image, vk::ImageLayout::eTransferSrcOptimal, dst_image,
+                         vk::ImageLayout::eTransferDstOptimal, {blit},
+                         is_linear ? vk::Filter::eLinear : vk::Filter::eNearest, dld);
+    });
+}
+
+void VKTextureCache::BufferCopy(Surface& src_surface, Surface& dst_surface) {
+    // Currently unimplemented. PBO copies should be dropped and we should use a render pass to
+    // convert from color to depth and viceversa.
+    LOG_WARNING(Render_Vulkan, "Unimplemented");
+}
+
+} // namespace Vulkan
diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h
new file mode 100644
index 000000000..d3edbe80c
--- /dev/null
+++ b/src/video_core/renderer_vulkan/vk_texture_cache.h
@@ -0,0 +1,239 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <unordered_map>
+
+#include "common/assert.h"
+#include "common/common_types.h"
+#include "common/logging/log.h"
+#include "common/math_util.h"
+#include "video_core/gpu.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_vulkan/declarations.h"
+#include "video_core/renderer_vulkan/vk_image.h"
+#include "video_core/renderer_vulkan/vk_memory_manager.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/texture_cache/surface_base.h"
+#include "video_core/texture_cache/texture_cache.h"
+#include "video_core/textures/decoders.h"
+
+namespace Core {
+class System;
+}
+
+namespace VideoCore {
+class RasterizerInterface;
+}
+
+namespace Vulkan {
+
+class RasterizerVulkan;
+class VKDevice;
+class VKResourceManager;
+class VKScheduler;
+class VKStagingBufferPool;
+
+class CachedSurfaceView;
+class CachedSurface;
+
+using Surface = std::shared_ptr<CachedSurface>;
+using View = std::shared_ptr<CachedSurfaceView>;
+using TextureCacheBase = VideoCommon::TextureCache<Surface, View>;
+
+using VideoCommon::SurfaceParams;
+using VideoCommon::ViewParams;
+
+class CachedSurface final : public VideoCommon::SurfaceBase<View> {
+    friend CachedSurfaceView;
+
+public:
+    explicit CachedSurface(Core::System& system, const VKDevice& device,
+                           VKResourceManager& resource_manager, VKMemoryManager& memory_manager,
+                           VKScheduler& scheduler, VKStagingBufferPool& staging_pool,
+                           GPUVAddr gpu_addr, const SurfaceParams& params);
+    ~CachedSurface();
+
+    void UploadTexture(const std::vector<u8>& staging_buffer) override;
+    void DownloadTexture(std::vector<u8>& staging_buffer) override;
+
+    void FullTransition(vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                        vk::ImageLayout new_layout) {
+        image->Transition(0, static_cast<u32>(params.GetNumLayers()), 0, params.num_levels,
+                          new_stage_mask, new_access, new_layout);
+    }
+
+    void Transition(u32 base_layer, u32 num_layers, u32 base_level, u32 num_levels,
+                    vk::PipelineStageFlags new_stage_mask, vk::AccessFlags new_access,
+                    vk::ImageLayout new_layout) {
+        image->Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
+                          new_access, new_layout);
+    }
+
+    VKImage& GetImage() {
+        return *image;
+    }
+
+    const VKImage& GetImage() const {
+        return *image;
+    }
+
+    vk::Image GetImageHandle() const {
+        return image->GetHandle();
+    }
+
+    vk::ImageAspectFlags GetAspectMask() const {
+        return image->GetAspectMask();
+    }
+
+    vk::BufferView GetBufferViewHandle() const {
+        return *buffer_view;
+    }
+
+protected:
+    void DecorateSurfaceName();
+
+    View CreateView(const ViewParams& params) override;
+    View CreateViewInner(const ViewParams& params, bool is_proxy);
+
+private:
+    void UploadBuffer(const std::vector<u8>& staging_buffer);
+
+    void UploadImage(const std::vector<u8>& staging_buffer);
+
+    vk::BufferImageCopy GetBufferImageCopy(u32 level) const;
+
+    vk::ImageSubresourceRange GetImageSubresourceRange() const;
+
+    Core::System& system;
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
+
+    std::optional<VKImage> image;
+    UniqueBuffer buffer;
+    UniqueBufferView buffer_view;
+    VKMemoryCommit commit;
+
+    vk::Format format;
+};
+
+class CachedSurfaceView final : public VideoCommon::ViewBase {
+public:
+    explicit CachedSurfaceView(const VKDevice& device, CachedSurface& surface,
+                               const ViewParams& params, bool is_proxy);
+    ~CachedSurfaceView();
+
+    vk::ImageView GetHandle(Tegra::Texture::SwizzleSource x_source,
+                            Tegra::Texture::SwizzleSource y_source,
+                            Tegra::Texture::SwizzleSource z_source,
+                            Tegra::Texture::SwizzleSource w_source);
+
+    bool IsSameSurface(const CachedSurfaceView& rhs) const {
+        return &surface == &rhs.surface;
+    }
+
+    vk::ImageView GetHandle() {
+        return GetHandle(Tegra::Texture::SwizzleSource::R, Tegra::Texture::SwizzleSource::G,
+                         Tegra::Texture::SwizzleSource::B, Tegra::Texture::SwizzleSource::A);
+    }
+
+    u32 GetWidth() const {
+        return params.GetMipWidth(base_level);
+    }
+
+    u32 GetHeight() const {
+        return params.GetMipHeight(base_level);
+    }
+
+    bool IsBufferView() const {
+        return buffer_view;
+    }
+
+    vk::Image GetImage() const {
+        return image;
+    }
+
+    vk::BufferView GetBufferView() const {
+        return buffer_view;
+    }
+
+    vk::ImageSubresourceRange GetImageSubresourceRange() const {
+        return {aspect_mask, base_level, num_levels, base_layer, num_layers};
+    }
+
+    vk::ImageSubresourceLayers GetImageSubresourceLayers() const {
+        return {surface.GetAspectMask(), base_level, base_layer, num_layers};
+    }
+
+    void Transition(vk::ImageLayout new_layout, vk::PipelineStageFlags new_stage_mask,
+                    vk::AccessFlags new_access) const {
+        surface.Transition(base_layer, num_layers, base_level, num_levels, new_stage_mask,
+                           new_access, new_layout);
+    }
+
+    void MarkAsModified(u64 tick) {
+        surface.MarkAsModified(true, tick);
+    }
+
+private:
+    static u32 EncodeSwizzle(Tegra::Texture::SwizzleSource x_source,
+                             Tegra::Texture::SwizzleSource y_source,
+                             Tegra::Texture::SwizzleSource z_source,
+                             Tegra::Texture::SwizzleSource w_source) {
+        return (static_cast<u32>(x_source) << 24) | (static_cast<u32>(y_source) << 16) |
+               (static_cast<u32>(z_source) << 8) | static_cast<u32>(w_source);
+    }
+
+    // Store a copy of these values to avoid double dereference when reading them
+    const SurfaceParams params;
+    const vk::Image image;
+    const vk::BufferView buffer_view;
+    const vk::ImageAspectFlags aspect_mask;
+
+    const VKDevice& device;
+    CachedSurface& surface;
+    const u32 base_layer;
+    const u32 num_layers;
+    const u32 base_level;
+    const u32 num_levels;
+    const vk::ImageViewType image_view_type;
+
+    vk::ImageView last_image_view;
+    u32 last_swizzle{};
+
+    std::unordered_map<u32, UniqueImageView> view_cache;
+};
+
+class VKTextureCache final : public TextureCacheBase {
+public:
+    explicit VKTextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
+                            const VKDevice& device, VKResourceManager& resource_manager,
+                            VKMemoryManager& memory_manager, VKScheduler& scheduler,
+                            VKStagingBufferPool& staging_pool);
+    ~VKTextureCache();
+
+private:
+    Surface CreateSurface(GPUVAddr gpu_addr, const SurfaceParams& params) override;
+
+    void ImageCopy(Surface& src_surface, Surface& dst_surface,
+                   const VideoCommon::CopyParams& copy_params) override;
+
+    void ImageBlit(View& src_view, View& dst_view,
+                   const Tegra::Engines::Fermi2D::Config& copy_config) override;
+
+    void BufferCopy(Surface& src_surface, Surface& dst_surface) override;
+
+    const VKDevice& device;
+    VKResourceManager& resource_manager;
+    VKMemoryManager& memory_manager;
+    VKScheduler& scheduler;
+    VKStagingBufferPool& staging_pool;
+};
+
+} // namespace Vulkan
diff --git a/src/video_core/shader/control_flow.cpp b/src/video_core/shader/control_flow.cpp
index b427ac873..0229733b6 100644
--- a/src/video_core/shader/control_flow.cpp
+++ b/src/video_core/shader/control_flow.cpp
@@ -65,7 +65,7 @@ struct BlockInfo {
 
 struct CFGRebuildState {
     explicit CFGRebuildState(const ProgramCode& program_code, u32 start, ConstBufferLocker& locker)
-        : program_code{program_code}, start{start}, locker{locker} {}
+        : program_code{program_code}, locker{locker}, start{start} {}
 
     const ProgramCode& program_code;
     ConstBufferLocker& locker;
diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp
index c934d0719..b5fbc4d58 100644
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@@ -6,6 +6,7 @@
 #include <vector>
 #include <fmt/format.h>
 
+#include "common/alignment.h"
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
@@ -15,44 +16,75 @@
 
 namespace VideoCommon::Shader {
 
+using Tegra::Shader::AtomicOp;
+using Tegra::Shader::AtomicType;
 using Tegra::Shader::Attribute;
+using Tegra::Shader::GlobalAtomicOp;
+using Tegra::Shader::GlobalAtomicType;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::StoreType;
 
 namespace {
 
-u32 GetLdgMemorySize(Tegra::Shader::UniformType uniform_type) {
+bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
+    return uniform_type == Tegra::Shader::UniformType::UnsignedByte ||
+           uniform_type == Tegra::Shader::UniformType::UnsignedShort;
+}
+
+u32 GetUnalignedMask(Tegra::Shader::UniformType uniform_type) {
     switch (uniform_type) {
     case Tegra::Shader::UniformType::UnsignedByte:
-    case Tegra::Shader::UniformType::Single:
-        return 1;
-    case Tegra::Shader::UniformType::Double:
-        return 2;
-    case Tegra::Shader::UniformType::Quad:
-    case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 0b11;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 0b10;
     default:
-        UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        UNREACHABLE();
+        return 0;
     }
 }
 
-u32 GetStgMemorySize(Tegra::Shader::UniformType uniform_type) {
+u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
     switch (uniform_type) {
+    case Tegra::Shader::UniformType::UnsignedByte:
+        return 8;
+    case Tegra::Shader::UniformType::UnsignedShort:
+        return 16;
     case Tegra::Shader::UniformType::Single:
-        return 1;
+        return 32;
     case Tegra::Shader::UniformType::Double:
-        return 2;
+        return 64;
     case Tegra::Shader::UniformType::Quad:
     case Tegra::Shader::UniformType::UnsignedQuad:
-        return 4;
+        return 128;
     default:
         UNIMPLEMENTED_MSG("Unimplemented size={}!", static_cast<u32>(uniform_type));
-        return 1;
+        return 32;
     }
 }
 
+Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
+    Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
+                     Immediate(size));
+}
+
+Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
+    Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
+    offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
+    return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
+                     std::move(offset), Immediate(size));
+}
+
+Node Sign16Extend(Node value) {
+    Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
+    Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
+    Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
+    return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
+}
+
 } // Anonymous namespace
 
 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@@ -128,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
         [[fallthrough]];
     case OpCode::Id::LD_S: {
-        const auto GetMemory = [&](s32 offset) {
+        const auto GetAddress = [&](s32 offset) {
             ASSERT(offset % 4 == 0);
             const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
-            const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8),
-                                           immediate_offset);
-            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address)
-                                                             : GetLocalMemory(address);
+            return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
+        };
+        const auto GetMemory = [&](s32 offset) {
+            return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
+                                                             : GetLocalMemory(GetAddress(offset));
         };
 
         switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bits32:
-        case Tegra::Shader::StoreType::Bits64:
-        case Tegra::Shader::StoreType::Bits128: {
-            const u32 count = [&]() {
+        case StoreType::Signed16:
+            SetRegister(bb, instr.gpr0,
+                        Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
+            break;
+        case StoreType::Bits32:
+        case StoreType::Bits64:
+        case StoreType::Bits128: {
+            const u32 count = [&] {
                 switch (instr.ldst_sl.type.Value()) {
-                case Tegra::Shader::StoreType::Bits32:
+                case StoreType::Bits32:
                     return 1;
-                case Tegra::Shader::StoreType::Bits64:
+                case StoreType::Bits64:
                     return 2;
-                case Tegra::Shader::StoreType::Bits128:
+                case StoreType::Bits128:
                     return 4;
                 default:
                     UNREACHABLE();
@@ -184,9 +221,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
         }();
 
         const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, false);
+            TrackGlobalMemory(bb, instr, true, false);
 
-        const u32 count = GetLdgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
         if (!real_address_base || !base_address) {
             // Tracking failed, load zeroes.
             for (u32 i = 0; i < count; ++i) {
@@ -200,14 +238,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
             Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
 
-            if (type == Tegra::Shader::UniformType::UnsignedByte) {
-                // To handle unaligned loads get the byte used to dereferenced global memory
-                // and extract that byte from the loaded uint32.
-                Node byte = Operation(OperationCode::UBitwiseAnd, real_address, Immediate(3));
-                byte = Operation(OperationCode::ULogicalShiftLeft, std::move(byte), Immediate(3));
-
-                gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem), std::move(byte),
-                                 Immediate(8));
+            // To handle unaligned loads get the bytes used to dereference global memory and extract
+            // those bytes from the loaded u32.
+            if (IsUnaligned(type)) {
+                gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
             }
 
             SetTemporary(bb, i, gmem);
@@ -259,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
         };
 
-        const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L
-                                    ? &ShaderIR::SetLocalMemory
-                                    : &ShaderIR::SetSharedMemory;
+        const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
+        const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
+        const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
 
         switch (instr.ldst_sl.type.Value()) {
-        case Tegra::Shader::StoreType::Bits128:
+        case StoreType::Bits128:
             (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
             (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
             [[fallthrough]];
-        case Tegra::Shader::StoreType::Bits64:
+        case StoreType::Bits64:
             (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
             [[fallthrough]];
-        case Tegra::Shader::StoreType::Bits32:
+        case StoreType::Bits32:
             (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
             break;
+        case StoreType::Signed16: {
+            Node address = GetAddress(0);
+            Node memory = (this->*get_memory)(address);
+            (this->*set_memory)(
+                bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
+            break;
+        }
         default:
             UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
                               static_cast<u32>(instr.ldst_sl.type.Value()));
@@ -295,23 +336,67 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
             }
         }();
 
+        // For unaligned reads we have to read memory too.
+        const bool is_read = IsUnaligned(type);
         const auto [real_address_base, base_address, descriptor] =
-            TrackGlobalMemory(bb, instr, true);
+            TrackGlobalMemory(bb, instr, is_read, true);
         if (!real_address_base || !base_address) {
             // Tracking failed, skip the store.
             break;
         }
 
-        const u32 count = GetStgMemorySize(type);
+        const u32 size = GetMemorySize(type);
+        const u32 count = Common::AlignUp(size, 32) / 32;
         for (u32 i = 0; i < count; ++i) {
             const Node it_offset = Immediate(i * 4);
             const Node real_address = Operation(OperationCode::UAdd, real_address_base, it_offset);
             const Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
-            const Node value = GetRegister(instr.gpr0.Value() + i);
+            Node value = GetRegister(instr.gpr0.Value() + i);
+
+            if (IsUnaligned(type)) {
+                const u32 mask = GetUnalignedMask(type);
+                value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
+            }
+
             bb.push_back(Operation(OperationCode::Assign, gmem, value));
         }
         break;
     }
+    case OpCode::Id::ATOM: {
+        UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
+                             static_cast<int>(instr.atom.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
+                             static_cast<int>(instr.atom.type.Value()));
+
+        const auto [real_address, base_address, descriptor] =
+            TrackGlobalMemory(bb, instr, true, true);
+        if (!real_address || !base_address) {
+            // Tracking failed, skip atomic.
+            break;
+        }
+
+        Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
+        Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
+    case OpCode::Id::ATOMS: {
+        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
+                             static_cast<int>(instr.atoms.operation.Value()));
+        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
+                             static_cast<int>(instr.atoms.type.Value()));
+
+        const s32 offset = instr.atoms.GetImmediateOffset();
+        Node address = GetRegister(instr.gpr8);
+        address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
+
+        Node memory = GetSharedMemory(std::move(address));
+        Node data = GetRegister(instr.gpr20);
+
+        Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
+        SetRegister(bb, instr.gpr0, std::move(value));
+        break;
+    }
     case OpCode::Id::AL2P: {
         // Ignore al2p.direction since we don't care about it.
 
@@ -336,7 +421,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
 
 std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock& bb,
                                                                      Instruction instr,
-                                                                     bool is_write) {
+                                                                     bool is_read, bool is_write) {
     const auto addr_register{GetRegister(instr.gmem.gpr)};
     const auto immediate_offset{static_cast<u32>(instr.gmem.offset)};
 
@@ -351,11 +436,8 @@ std::tuple<Node, Node, GlobalMemoryBase> ShaderIR::TrackGlobalMemory(NodeBlock&
     const GlobalMemoryBase descriptor{index, offset};
     const auto& [entry, is_new] = used_global_memory.try_emplace(descriptor);
     auto& usage = entry->second;
-    if (is_write) {
-        usage.is_written = true;
-    } else {
-        usage.is_read = true;
-    }
+    usage.is_written |= is_write;
+    usage.is_read |= is_read;
 
     const auto real_address =
         Operation(OperationCode::UAdd, NO_PRECISE, Immediate(immediate_offset), addr_register);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 4b14cdf58..0b567e39d 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -161,16 +161,16 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
     case OpCode::Id::TXD: {
         UNIMPLEMENTED_IF_MSG(instr.txd.UsesMiscMode(TextureMiscMode::AOFFI),
                              "AOFFI is not implemented");
-        UNIMPLEMENTED_IF_MSG(instr.txd.is_array != 0, "TXD Array is not implemented");
 
+        const bool is_array = instr.txd.is_array != 0;
         u64 base_reg = instr.gpr8.Value();
         const auto derivate_reg = instr.gpr20.Value();
         const auto texture_type = instr.txd.texture_type.Value();
         const auto coord_count = GetCoordCount(texture_type);
 
-        const Sampler* sampler = is_bindless
-                                     ? GetBindlessSampler(base_reg, {{texture_type, false, false}})
-                                     : GetSampler(instr.sampler, {{texture_type, false, false}});
+        const Sampler* sampler =
+            is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}})
+                        : GetSampler(instr.sampler, {{texture_type, is_array, false}});
         Node4 values;
         if (sampler == nullptr) {
             for (u32 element = 0; element < values.size(); ++element) {
@@ -179,6 +179,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             WriteTexInstructionFloat(bb, instr, values);
             break;
         }
+
         if (is_bindless) {
             base_reg++;
         }
@@ -192,8 +193,14 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
             derivates.push_back(GetRegister(derivate_reg + derivate + 1));
         }
 
+        Node array_node = {};
+        if (is_array) {
+            const Node info_reg = GetRegister(base_reg + coord_count);
+            array_node = BitfieldExtract(info_reg, 0, 16);
+        }
+
         for (u32 element = 0; element < values.size(); ++element) {
-            MetaTexture meta{*sampler, {}, {}, {}, {}, derivates, {}, {}, {}, element};
+            MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element};
             values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
         }
 
@@ -794,14 +801,10 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
 
 std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
                                                 bool is_tld4) {
-    const auto [coord_offsets, size, wrap_value,
-                diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
-        if (is_tld4) {
-            return {{0, 8, 16}, 6, 32, 64};
-        } else {
-            return {{0, 4, 8}, 4, 8, 16};
-        }
-    }();
+    const std::array coord_offsets = is_tld4 ? std::array{0U, 8U, 16U} : std::array{0U, 4U, 8U};
+    const u32 size = is_tld4 ? 6 : 4;
+    const s32 wrap_value = is_tld4 ? 32 : 8;
+    const s32 diff_value = is_tld4 ? 64 : 16;
     const u32 mask = (1U << size) - 1;
 
     std::vector<Node> aoffi;
@@ -814,7 +817,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
         LOG_WARNING(HW_GPU,
                     "AOFFI constant folding failed, some hardware might have graphical issues");
         for (std::size_t coord = 0; coord < coord_count; ++coord) {
-            const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
+            const Node value = BitfieldExtract(aoffi_reg, coord_offsets[coord], size);
             const Node condition =
                 Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
             const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
@@ -824,7 +827,7 @@ std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coor
     }
 
     for (std::size_t coord = 0; coord < coord_count; ++coord) {
-        s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
+        s32 value = (*aoffi_immediate >> coord_offsets[coord]) & mask;
         if (value >= wrap_value) {
             value -= diff_value;
         }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 4e155542a..9af1f0228 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -162,6 +162,8 @@ enum class OperationCode {
     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 
+    AtomicAdd, /// (memory, {u}int) -> {u}int
+
     Branch,         /// (uint branch_target) -> void
     BranchIndirect, /// (uint branch_target) -> void
     PushFlowStack,  /// (uint branch_target) -> void
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index aacd0a0da..ba1db4c11 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -394,7 +394,7 @@ private:
 
     std::tuple<Node, Node, GlobalMemoryBase> TrackGlobalMemory(NodeBlock& bb,
                                                                Tegra::Shader::Instruction instr,
-                                                               bool is_write);
+                                                               bool is_read, bool is_write);
 
     /// Register new amending code and obtain the reference id.
     std::size_t DeclareAmend(Node new_amend);
diff --git a/src/video_core/texture_cache/format_lookup_table.cpp b/src/video_core/texture_cache/format_lookup_table.cpp
index 271e67533..81fb9f633 100644
--- a/src/video_core/texture_cache/format_lookup_table.cpp
+++ b/src/video_core/texture_cache/format_lookup_table.cpp
@@ -95,7 +95,7 @@ constexpr std::array<Table, 74> DefinitionTable = {{
     {TextureFormat::ZF32, C, FLOAT, FLOAT, FLOAT, FLOAT, PixelFormat::Z32F},
     {TextureFormat::Z16, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z16},
     {TextureFormat::S8Z24, C, UINT, UNORM, UNORM, UNORM, PixelFormat::S8Z24},
-    {TextureFormat::ZF32_X24S8, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::Z32FS8},
+    {TextureFormat::ZF32_X24S8, C, FLOAT, UINT, UNORM, UNORM, PixelFormat::Z32FS8},
 
     {TextureFormat::DXT1, C, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1},
     {TextureFormat::DXT1, S, UNORM, UNORM, UNORM, UNORM, PixelFormat::DXT1_SRGB},
diff --git a/src/video_core/texture_cache/surface_base.cpp b/src/video_core/texture_cache/surface_base.cpp
index 829268b4c..84469b7ba 100644
--- a/src/video_core/texture_cache/surface_base.cpp
+++ b/src/video_core/texture_cache/surface_base.cpp
@@ -135,7 +135,7 @@ std::vector<CopyParams> SurfaceBaseImpl::BreakDownLayered(const SurfaceParams& i
         for (u32 level = 0; level < mipmaps; level++) {
             const u32 width = SurfaceParams::IntersectWidth(params, in_params, level, level);
             const u32 height = SurfaceParams::IntersectHeight(params, in_params, level, level);
-            result.emplace_back(width, height, layer, level);
+            result.emplace_back(0, 0, layer, 0, 0, layer, level, level, width, height, 1);
         }
     }
     return result;
diff --git a/src/video_core/texture_cache/surface_params.h b/src/video_core/texture_cache/surface_params.h
index 992b5c022..9256fd6d9 100644
--- a/src/video_core/texture_cache/surface_params.h
+++ b/src/video_core/texture_cache/surface_params.h
@@ -209,6 +209,11 @@ public:
         return target == VideoCore::Surface::SurfaceTarget::TextureBuffer;
     }
 
+    /// Returns the number of layers in the surface.
+    std::size_t GetNumLayers() const {
+        return is_layered ? depth : 1;
+    }
+
     /// Returns the debug name of the texture for use in graphic debuggers.
     std::string TargetName() const;
 
@@ -287,10 +292,6 @@ private:
     /// Returns the size of a layer
     std::size_t GetLayerSize(bool as_host_size, bool uncompressed) const;
 
-    std::size_t GetNumLayers() const {
-        return is_layered ? depth : 1;
-    }
-
     /// Returns true if these parameters are from a layered surface.
     bool IsLayered() const;
 };