13 files changed, 951 insertions, 682 deletions
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
new file mode 100644
index 000000000..7992b82c4
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -0,0 +1,24 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <glad/glad.h>
+
+#include "video_core/renderer_opengl/gl_global_cache.h"
+#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/utils.h"
+
+namespace OpenGL {
+
+CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} {
+    buffer.Create();
+    // Bind and unbind the buffer so it gets allocated by the driver
+    glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle);
+    glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0);
+    LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory");
+}
+
+GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
+    : RasterizerCache{rasterizer} {}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h
new file mode 100644
index 000000000..406a735bc
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_global_cache.h
@@ -0,0 +1,60 @@
+// Copyright 2018 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "video_core/rasterizer_cache.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
+
+namespace OpenGL {
+
+namespace GLShader {
+class GlobalMemoryEntry;
+} // namespace GLShader
+
+class RasterizerOpenGL;
+class CachedGlobalRegion;
+using GlobalRegion = std::shared_ptr<CachedGlobalRegion>;
+
+class CachedGlobalRegion final : public RasterizerCacheObject {
+public:
+    explicit CachedGlobalRegion(VAddr addr, u32 size);
+
+    /// Gets the address of the shader in guest memory, required for cache management
+    VAddr GetAddr() const {
+        return addr;
+    }
+
+    /// Gets the size of the shader in guest memory, required for cache management
+    std::size_t GetSizeInBytes() const {
+        return size;
+    }
+
+    /// Gets the GL program handle for the buffer
+    GLuint GetBufferHandle() const {
+        return buffer.handle;
+    }
+
+    // TODO(Rodrigo): When global memory is written (STG), implement flushing
+    void Flush() override {
+        UNIMPLEMENTED();
+    }
+
+private:
+    VAddr addr{};
+    u32 size{};
+
+    OGLBuffer buffer;
+};
+
+class GlobalRegionCacheOpenGL final : public RasterizerCache<GlobalRegion> {
+public:
+    explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer);
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 9e93bd609..73567eb8c 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -79,9 +79,29 @@ struct DrawParameters {
     }
 };
 
+struct FramebufferCacheKey {
+    bool is_single_buffer = false;
+    bool stencil_enable = false;
+
+    std::array<GLenum, Maxwell::NumRenderTargets> color_attachments{};
+    std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> colors{};
+    u32 colors_count = 0;
+
+    GLuint zeta = 0;
+
+    auto Tie() const {
+        return std::tie(is_single_buffer, stencil_enable, color_attachments, colors, colors_count,
+                        zeta);
+    }
+
+    bool operator<(const FramebufferCacheKey& rhs) const {
+        return Tie() < rhs.Tie();
+    }
+};
+
 RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info)
     : res_cache{*this}, shader_cache{*this}, emu_window{window}, screen_info{info},
-      buffer_cache(*this, STREAM_BUFFER_SIZE) {
+      buffer_cache(*this, STREAM_BUFFER_SIZE), global_cache{*this} {
     // Create sampler objects
     for (std::size_t i = 0; i < texture_samplers.size(); ++i) {
         texture_samplers[i].Create();
@@ -90,9 +110,6 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo
 
     OpenGLState::ApplyDefaultState();
 
-    // Create render framebuffer
-    framebuffer.Create();
-
     shader_program_manager = std::make_unique<GLShader::ProgramManager>();
     state.draw.shader_program = 0;
     state.Apply();
@@ -276,7 +293,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
 
 void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     MICROPROFILE_SCOPE(OpenGL_Shader);
-    const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
+    auto& gpu = Core::System::GetInstance().GPU().Maxwell3D();
 
     // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
     // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
@@ -359,6 +376,46 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
     }
 
     SyncClipEnabled(clip_distances);
+
+    gpu.dirty_flags.shaders = false;
+}
+
+void RasterizerOpenGL::SetupCachedFramebuffer(const FramebufferCacheKey& fbkey,
+                                              OpenGLState& current_state) {
+    const auto [entry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey);
+    auto& framebuffer = entry->second;
+
+    if (is_cache_miss)
+        framebuffer.Create();
+
+    current_state.draw.draw_framebuffer = framebuffer.handle;
+    current_state.ApplyFramebufferState();
+
+    if (!is_cache_miss)
+        return;
+
+    if (fbkey.is_single_buffer) {
+        if (fbkey.color_attachments[0] != GL_NONE) {
+            glFramebufferTexture(GL_DRAW_FRAMEBUFFER, fbkey.color_attachments[0], fbkey.colors[0],
+                                 0);
+        }
+        glDrawBuffer(fbkey.color_attachments[0]);
+    } else {
+        for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
+            if (fbkey.colors[index]) {
+                glFramebufferTexture(GL_DRAW_FRAMEBUFFER,
+                                     GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
+                                     fbkey.colors[index], 0);
+            }
+        }
+        glDrawBuffers(fbkey.colors_count, fbkey.color_attachments.data());
+    }
+
+    if (fbkey.zeta) {
+        GLenum zeta_attachment =
+            fbkey.stencil_enable ? GL_DEPTH_STENCIL_ATTACHMENT : GL_DEPTH_ATTACHMENT;
+        glFramebufferTexture(GL_DRAW_FRAMEBUFFER, zeta_attachment, fbkey.zeta, 0);
+    }
 }
 
 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -444,10 +501,10 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
     UNIMPLEMENTED_IF(regs.rt_separate_frag_data != 0);
 
     // Bind the framebuffer surfaces
-    current_state.draw.draw_framebuffer = framebuffer.handle;
-    current_state.ApplyFramebufferState();
     current_state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
 
+    FramebufferCacheKey fbkey;
+
     if (using_color_fb) {
         if (single_color_target) {
             // Used when just a single color attachment is enabled, e.g. for clearing a color buffer
@@ -463,14 +520,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
                 state.framebuffer_srgb.enabled |= color_surface->GetSurfaceParams().srgb_conversion;
             }
 
-            glFramebufferTexture2D(
-                GL_DRAW_FRAMEBUFFER,
-                GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target), GL_TEXTURE_2D,
-                color_surface != nullptr ? color_surface->Texture().handle : 0, 0);
-            glDrawBuffer(GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target));
+            fbkey.is_single_buffer = true;
+            fbkey.color_attachments[0] =
+                GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(*single_color_target);
+            fbkey.colors[0] = color_surface != nullptr ? color_surface->Texture().handle : 0;
         } else {
             // Multiple color attachments are enabled
-            std::array<GLenum, Maxwell::NumRenderTargets> buffers;
             for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
                 Surface color_surface = res_cache.GetColorBufferSurface(index, preserve_contents);
 
@@ -485,22 +540,17 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
                         color_surface->GetSurfaceParams().srgb_conversion;
                 }
 
-                buffers[index] = GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
-                glFramebufferTexture2D(
-                    GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index),
-                    GL_TEXTURE_2D, color_surface != nullptr ? color_surface->Texture().handle : 0,
-                    0);
+                fbkey.color_attachments[index] =
+                    GL_COLOR_ATTACHMENT0 + regs.rt_control.GetMap(index);
+                fbkey.colors[index] =
+                    color_surface != nullptr ? color_surface->Texture().handle : 0;
             }
-            glDrawBuffers(regs.rt_control.count, buffers.data());
+            fbkey.is_single_buffer = false;
+            fbkey.colors_count = regs.rt_control.count;
         }
     } else {
-        // No color attachments are enabled - zero out all of them
-        for (std::size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER,
-                                   GL_COLOR_ATTACHMENT0 + static_cast<GLenum>(index), GL_TEXTURE_2D,
-                                   0, 0);
-        }
-        glDrawBuffer(GL_NONE);
+        // No color attachments are enabled - leave them as zero
+        fbkey.is_single_buffer = true;
     }
 
     if (depth_surface) {
@@ -508,22 +558,12 @@ void RasterizerOpenGL::ConfigureFramebuffers(OpenGLState& current_state, bool us
         // the shader doesn't actually write to it.
         depth_surface->MarkAsModified(true, res_cache);
 
-        if (regs.stencil_enable) {
-            // Attach both depth and stencil
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->Texture().handle, 0);
-        } else {
-            // Attach depth
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
-                                   depth_surface->Texture().handle, 0);
-            // Clear stencil attachment
-            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
-        }
-    } else {
-        // Clear both depth and stencil attachment
-        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
-                               0);
+        fbkey.zeta = depth_surface->Texture().handle;
+        fbkey.stencil_enable = regs.stencil_enable;
     }
+
+    SetupCachedFramebuffer(fbkey, current_state);
+
     SyncViewport(current_state);
 }
 
@@ -723,6 +763,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
     res_cache.InvalidateRegion(addr, size);
     shader_cache.InvalidateRegion(addr, size);
+    global_cache.InvalidateRegion(addr, size);
     buffer_cache.InvalidateRegion(addr, size);
 }
 
@@ -976,8 +1017,11 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, Shader& shader,
         texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);
         Surface surface = res_cache.GetTextureSurface(texture, entry);
         if (surface != nullptr) {
-            state.texture_units[current_bindpoint].texture = surface->Texture().handle;
-            state.texture_units[current_bindpoint].target = surface->Target();
+            const GLuint handle =
+                entry.IsArray() ? surface->TextureLayer().handle : surface->Texture().handle;
+            const GLenum target = entry.IsArray() ? surface->TargetLayer() : surface->Target();
+            state.texture_units[current_bindpoint].texture = handle;
+            state.texture_units[current_bindpoint].target = target;
             state.texture_units[current_bindpoint].swizzle.r =
                 MaxwellToGL::SwizzleSource(texture.tic.x_source);
             state.texture_units[current_bindpoint].swizzle.g =
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 988fa3e27..a53edee6d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -23,6 +23,7 @@
 #include "video_core/rasterizer_cache.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
+#include "video_core/renderer_opengl/gl_global_cache.h"
 #include "video_core/renderer_opengl/gl_primitive_assembler.h"
 #include "video_core/renderer_opengl/gl_rasterizer_cache.h"
 #include "video_core/renderer_opengl/gl_resource_manager.h"
@@ -40,6 +41,7 @@ namespace OpenGL {
 
 struct ScreenInfo;
 struct DrawParameters;
+struct FramebufferCacheKey;
 
 class RasterizerOpenGL : public VideoCore::RasterizerInterface {
 public:
@@ -65,6 +67,10 @@ public:
     static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0,
                   "The maximum size of a constbuffer must be a multiple of the size of GLvec4");
 
+    static constexpr std::size_t MaxGlobalMemorySize = 0x10000;
+    static_assert(MaxGlobalMemorySize % sizeof(float) == 0,
+                  "The maximum size of a global memory must be a multiple of the size of float");
+
 private:
     class SamplerInfo {
     public:
@@ -104,7 +110,7 @@ private:
                                bool using_depth_fb = true, bool preserve_contents = true,
                                std::optional<std::size_t> single_color_target = {});
 
-    /*
+    /**
      * Configures the current constbuffers to use for the draw command.
      * @param stage The shader stage to configure buffers for.
      * @param shader The shader object that contains the specified stage.
@@ -114,7 +120,7 @@ private:
     u32 SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, Shader& shader,
                           GLenum primitive_mode, u32 current_bindpoint);
 
-    /*
+    /**
      * Configures the current textures to use for the draw command.
      * @param stage The shader stage to configure textures for.
      * @param shader The shader object that contains the specified stage.
@@ -184,6 +190,7 @@ private:
 
     RasterizerCacheOpenGL res_cache;
     ShaderCacheOpenGL shader_cache;
+    GlobalRegionCacheOpenGL global_cache;
 
     Core::Frontend::EmuWindow& emu_window;
 
@@ -195,11 +202,12 @@ private:
              OGLVertexArray>
         vertex_array_cache;
 
+    std::map<FramebufferCacheKey, OGLFramebuffer> framebuffer_cache;
+
     std::array<SamplerInfo, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> texture_samplers;
 
     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
     OGLBufferCache buffer_cache;
-    OGLFramebuffer framebuffer;
     PrimitiveAssembler primitive_assembler{buffer_cache};
     GLint uniform_buffer_alignment;
 
@@ -214,6 +222,8 @@ private:
 
     void SetupShaders(GLenum primitive_mode);
 
+    void SetupCachedFramebuffer(const FramebufferCacheKey& fbkey, OpenGLState& current_state);
+
     enum class AccelDraw { Disabled, Arrays, Indexed };
     AccelDraw accelerate_draw = AccelDraw::Disabled;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 5f4cdd119..bff0c65cd 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -44,6 +44,17 @@ struct FormatTuple {
     bool compressed;
 };
 
+static void ApplyTextureDefaults(GLenum target, u32 max_mip_level) {
+    glTexParameteri(target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexParameteri(target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    glTexParameteri(target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    glTexParameteri(target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    glTexParameteri(target, GL_TEXTURE_MAX_LEVEL, max_mip_level - 1);
+    if (max_mip_level == 1) {
+        glTexParameterf(target, GL_TEXTURE_LOD_BIAS, 1000.0);
+    }
+}
+
 void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) {
     auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
     const auto cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr_)};
@@ -101,8 +112,18 @@ std::size_t SurfaceParams::InnerMemorySize(bool force_gl, bool layer_only,
     params.srgb_conversion = config.tic.IsSrgbConversionEnabled();
     params.pixel_format = PixelFormatFromTextureFormat(config.tic.format, config.tic.r_type.Value(),
                                                        params.srgb_conversion);
+
+    if (params.pixel_format == PixelFormat::R16U && config.tsc.depth_compare_enabled) {
+        // Some titles create a 'R16U' (normalized 16-bit) texture with depth_compare enabled,
+        // then attempt to sample from it via a shadow sampler. Convert format to Z16 (which also
+        // causes GetFormatType to properly return 'Depth' below).
+        params.pixel_format = PixelFormat::Z16;
+    }
+
     params.component_type = ComponentTypeFromTexture(config.tic.r_type.Value());
     params.type = GetFormatType(params.pixel_format);
+    UNIMPLEMENTED_IF(params.type == SurfaceType::ColorTexture && config.tsc.depth_compare_enabled);
+
     params.width = Common::AlignUp(config.tic.Width(), GetCompressionFactor(params.pixel_format));
     params.height = Common::AlignUp(config.tic.Height(), GetCompressionFactor(params.pixel_format));
     params.unaligned_height = config.tic.Height();
@@ -257,7 +278,7 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex
     {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, ComponentType::UInt, false},           // R8UI
     {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, ComponentType::Float, false},                 // RGBA16F
     {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UNorm, false},              // RGBA16U
-    {GL_RGBA16UI, GL_RGBA, GL_UNSIGNED_SHORT, ComponentType::UInt, false},             // RGBA16UI
+    {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, ComponentType::UInt, false},     // RGBA16UI
     {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, ComponentType::Float,
      false},                                                                     // R11FG11FB10F
     {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, ComponentType::UInt, false}, // RGBA32UI
@@ -278,8 +299,6 @@ static constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex
     {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::Float,
      true},                                                                    // BC6H_SF16
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // ASTC_2D_4X4
-    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},            // G8R8U
-    {GL_RG8, GL_RG, GL_BYTE, ComponentType::SNorm, false},                     // G8R8S
     {GL_RGBA8, GL_BGRA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},        // BGRA8
     {GL_RGBA32F, GL_RGBA, GL_FLOAT, ComponentType::Float, false},              // RGBA32F
     {GL_RG32F, GL_RG, GL_FLOAT, ComponentType::Float, false},                  // RG32F
@@ -433,7 +452,7 @@ static void CopySurface(const Surface& src_surface, const Surface& dst_surface,
     const std::size_t buffer_size = std::max(src_params.size_in_bytes, dst_params.size_in_bytes);
 
     glBindBuffer(GL_PIXEL_PACK_BUFFER, copy_pbo_handle);
-    glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_DRAW);
+    glBufferData(GL_PIXEL_PACK_BUFFER, buffer_size, nullptr, GL_STREAM_COPY);
     if (source_format.compressed) {
         glGetCompressedTextureImage(src_surface->Texture().handle, src_attachment,
                                     static_cast<GLsizei>(src_params.size_in_bytes), nullptr);
@@ -522,6 +541,9 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
     glActiveTexture(GL_TEXTURE0);
 
     const auto& format_tuple = GetFormatTuple(params.pixel_format, params.component_type);
+    gl_internal_format = format_tuple.internal_format;
+    gl_is_compressed = format_tuple.compressed;
+
     if (!format_tuple.compressed) {
         // Only pre-create the texture for non-compressed textures.
         switch (params.target) {
@@ -550,15 +572,7 @@ CachedSurface::CachedSurface(const SurfaceParams& params)
         }
     }
 
-    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-    glTexParameteri(SurfaceTargetToGL(params.target), GL_TEXTURE_MAX_LEVEL,
-                    params.max_mip_level - 1);
-    if (params.max_mip_level == 1) {
-        glTexParameterf(SurfaceTargetToGL(params.target), GL_TEXTURE_LOD_BIAS, 1000.0);
-    }
+    ApplyTextureDefaults(SurfaceTargetToGL(params.target), params.max_mip_level);
 
     LabelGLObject(GL_TEXTURE, texture.handle, params.addr,
                   SurfaceParams::SurfaceTargetName(params.target));
@@ -610,18 +624,6 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height, bo
     }
 }
 
-static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
-    constexpr auto bpp{GetBytesPerPixel(PixelFormat::G8R8U)};
-    for (std::size_t y = 0; y < height; ++y) {
-        for (std::size_t x = 0; x < width; ++x) {
-            const std::size_t offset{bpp * (y * width + x)};
-            const u8 temp{data[offset]};
-            data[offset] = data[offset + 1];
-            data[offset + 1] = temp;
-        }
-    }
-}
-
 /**
  * Helper function to perform software conversion (as needed) when loading a buffer from Switch
  * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
@@ -654,12 +656,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
         // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
         ConvertS8Z24ToZ24S8(data, width, height, false);
         break;
-
-    case PixelFormat::G8R8U:
-    case PixelFormat::G8R8S:
-        // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8.
-        ConvertG8R8ToR8G8(data, width, height);
-        break;
     }
 }
 
@@ -671,8 +667,6 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
 static void ConvertFormatAsNeeded_FlushGLBuffer(std::vector<u8>& data, PixelFormat pixel_format,
                                                 u32 width, u32 height) {
     switch (pixel_format) {
-    case PixelFormat::G8R8U:
-    case PixelFormat::G8R8S:
     case PixelFormat::ASTC_2D_4X4:
     case PixelFormat::ASTC_2D_8X8:
     case PixelFormat::ASTC_2D_4X4_SRGB:
@@ -876,6 +870,31 @@ void CachedSurface::UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle,
     glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
 }
 
+void CachedSurface::EnsureTextureView() {
+    if (texture_view.handle != 0)
+        return;
+    // Compressed texture are not being created with immutable storage
+    UNIMPLEMENTED_IF(gl_is_compressed);
+
+    const GLenum target{TargetLayer()};
+
+    texture_view.Create();
+    glTextureView(texture_view.handle, target, texture.handle, gl_internal_format, 0,
+                  params.max_mip_level, 0, 1);
+
+    OpenGLState cur_state = OpenGLState::GetCurState();
+    const auto& old_tex = cur_state.texture_units[0];
+    SCOPE_EXIT({
+        cur_state.texture_units[0] = old_tex;
+        cur_state.Apply();
+    });
+    cur_state.texture_units[0].texture = texture_view.handle;
+    cur_state.texture_units[0].target = target;
+    cur_state.Apply();
+
+    ApplyTextureDefaults(target, params.max_mip_level);
+}
+
 MICROPROFILE_DEFINE(OpenGL_TextureUL, "OpenGL", "Texture Upload", MP_RGB(128, 192, 64));
 void CachedSurface::UploadGLTexture(GLuint read_fb_handle, GLuint draw_fb_handle) {
     if (params.type == SurfaceType::Fill)
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index c710aa245..7223700c4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -293,10 +293,31 @@ public:
         return texture;
     }
 
+    const OGLTexture& TextureLayer() {
+        if (params.is_layered) {
+            return Texture();
+        }
+        EnsureTextureView();
+        return texture_view;
+    }
+
     GLenum Target() const {
         return gl_target;
     }
 
+    GLenum TargetLayer() const {
+        using VideoCore::Surface::SurfaceTarget;
+        switch (params.target) {
+        case SurfaceTarget::Texture1D:
+            return GL_TEXTURE_1D_ARRAY;
+        case SurfaceTarget::Texture2D:
+            return GL_TEXTURE_2D_ARRAY;
+        case SurfaceTarget::TextureCubemap:
+            return GL_TEXTURE_CUBE_MAP_ARRAY;
+        }
+        return Target();
+    }
+
     const SurfaceParams& GetSurfaceParams() const {
         return params;
     }
@@ -311,11 +332,16 @@ public:
 private:
     void UploadGLMipmapTexture(u32 mip_map, GLuint read_fb_handle, GLuint draw_fb_handle);
 
+    void EnsureTextureView();
+
     OGLTexture texture;
+    OGLTexture texture_view;
     std::vector<std::vector<u8>> gl_buffer;
-    SurfaceParams params;
-    GLenum gl_target;
-    std::size_t cached_size_in_bytes;
+    SurfaceParams params{};
+    GLenum gl_target{};
+    GLenum gl_internal_format{};
+    bool gl_is_compressed{};
+    std::size_t cached_size_in_bytes{};
 };
 
 class RasterizerCacheOpenGL final : public RasterizerCache<Surface> {
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 038b25c75..c785fffa3 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -2,7 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <boost/functional/hash.hpp>
 #include "common/assert.h"
+#include "common/hash.h"
 #include "core/core.h"
 #include "core/memory.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -66,14 +68,17 @@ CachedShader::CachedShader(VAddr addr, Maxwell::ShaderProgram program_type)
         // stage here.
         setup.SetProgramB(GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)));
     case Maxwell::ShaderProgram::VertexB:
+        CalculateProperties();
         program_result = GLShader::GenerateVertexShader(setup);
         gl_type = GL_VERTEX_SHADER;
         break;
     case Maxwell::ShaderProgram::Geometry:
+        CalculateProperties();
         program_result = GLShader::GenerateGeometryShader(setup);
         gl_type = GL_GEOMETRY_SHADER;
         break;
     case Maxwell::ShaderProgram::Fragment:
+        CalculateProperties();
         program_result = GLShader::GenerateFragmentShader(setup);
         gl_type = GL_FRAGMENT_SHADER;
         break;
@@ -140,9 +145,53 @@ GLuint CachedShader::LazyGeometryProgram(OGLProgram& target_program,
     return target_program.handle;
 };
 
+static bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
+    // sched instructions appear once every 4 instructions.
+    static constexpr std::size_t SchedPeriod = 4;
+    const std::size_t absolute_offset = offset - main_offset;
+    return (absolute_offset % SchedPeriod) == 0;
+}
+
+static std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+    constexpr std::size_t start_offset = 10;
+    std::size_t offset = start_offset;
+    std::size_t size = start_offset * sizeof(u64);
+    while (offset < program.size()) {
+        const u64 inst = program[offset];
+        if (!IsSchedInstruction(offset, start_offset)) {
+            if (inst == 0 || (inst >> 52) == 0x50b) {
+                break;
+            }
+        }
+        size += sizeof(inst);
+        offset++;
+    }
+    return size;
+}
+
+void CachedShader::CalculateProperties() {
+    setup.program.real_size = CalculateProgramSize(setup.program.code);
+    setup.program.real_size_b = 0;
+    setup.program.unique_identifier = Common::CityHash64(
+        reinterpret_cast<const char*>(setup.program.code.data()), setup.program.real_size);
+    if (program_type == Maxwell::ShaderProgram::VertexA) {
+        std::size_t seed = 0;
+        boost::hash_combine(seed, setup.program.unique_identifier);
+        setup.program.real_size_b = CalculateProgramSize(setup.program.code_b);
+        const u64 identifier_b = Common::CityHash64(
+            reinterpret_cast<const char*>(setup.program.code_b.data()), setup.program.real_size_b);
+        boost::hash_combine(seed, identifier_b);
+        setup.program.unique_identifier = static_cast<u64>(seed);
+    }
+}
+
 ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer) : RasterizerCache{rasterizer} {}
 
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
+    if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
+        return last_shaders[static_cast<u32>(program)];
+    }
+
     const VAddr program_addr{GetShaderAddress(program)};
 
     // Look up shader in the cache based on address
@@ -154,7 +203,7 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
         Register(shader);
     }
 
-    return shader;
+    return last_shaders[static_cast<u32>(program)] = shader;
 }
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 08f470de3..768747968 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <array>
 #include <map>
 #include <memory>
 
@@ -67,6 +68,7 @@ public:
                                        6, "ShaderTrianglesAdjacency");
         default:
             UNREACHABLE_MSG("Unknown primitive mode.");
+            return LazyGeometryProgram(geometry_programs.points, "points", 1, "ShaderPoints");
         }
     }
 
@@ -81,6 +83,8 @@ private:
     GLuint LazyGeometryProgram(OGLProgram& target_program, const std::string& glsl_topology,
                                u32 max_vertices, const std::string& debug_name);
 
+    void CalculateProperties();
+
     VAddr addr;
     std::size_t shader_length;
     Maxwell::ShaderProgram program_type;
@@ -112,6 +116,9 @@ public:
 
     /// Gets the current specified shader stage program
     Shader GetStageProgram(Maxwell::ShaderProgram program);
+
+private:
+    std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 0c1632bd1..1bb09e61b 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -50,6 +50,14 @@ public:
     using std::runtime_error::runtime_error;
 };
 
+/// Generates code to use for a swizzle operation.
+static std::string GetSwizzle(u64 elem) {
+    ASSERT(elem <= 3);
+    std::string swizzle = ".";
+    swizzle += "xyzw"[elem];
+    return swizzle;
+}
+
 /// Translate topology
 static std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
     switch (topology) {
@@ -201,14 +209,53 @@ private:
     }
 };
 
+template <typename T>
+class ShaderScopedScope {
+public:
+    explicit ShaderScopedScope(T& writer, std::string_view begin_expr, std::string end_expr)
+        : writer(writer), end_expr(std::move(end_expr)) {
+
+        if (begin_expr.empty()) {
+            writer.AddLine('{');
+        } else {
+            writer.AddExpression(begin_expr);
+            writer.AddLine(" {");
+        }
+        ++writer.scope;
+    }
+
+    ShaderScopedScope(const ShaderScopedScope&) = delete;
+
+    ~ShaderScopedScope() {
+        --writer.scope;
+        if (end_expr.empty()) {
+            writer.AddLine('}');
+        } else {
+            writer.AddExpression("} ");
+            writer.AddExpression(end_expr);
+            writer.AddLine(';');
+        }
+    }
+
+    ShaderScopedScope& operator=(const ShaderScopedScope&) = delete;
+
+private:
+    T& writer;
+    std::string end_expr;
+};
+
 class ShaderWriter {
 public:
-    void AddLine(std::string_view text) {
+    void AddExpression(std::string_view text) {
         DEBUG_ASSERT(scope >= 0);
         if (!text.empty()) {
             AppendIndentation();
         }
         shader_source += text;
+    }
+
+    void AddLine(std::string_view text) {
+        AddExpression(text);
         AddNewLine();
     }
 
@@ -228,6 +275,11 @@ public:
         return std::move(shader_source);
     }
 
+    ShaderScopedScope<ShaderWriter> Scope(std::string_view begin_expr = {},
+                                          std::string end_expr = {}) {
+        return ShaderScopedScope(*this, begin_expr, end_expr);
+    }
+
     int scope = 0;
 
 private:
@@ -295,6 +347,15 @@ public:
         BuildInputList();
     }
 
+    void SetConditionalCodesFromExpression(const std::string& expresion) {
+        SetInternalFlag(InternalFlag::ZeroFlag, "(" + expresion + ") == 0");
+        LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete.");
+    }
+
+    void SetConditionalCodesFromRegister(const Register& reg, u64 dest_elem = 0) {
+        SetConditionalCodesFromExpression(GetRegister(reg, static_cast<u32>(dest_elem)));
+    }
+
     /**
      * Returns code that does an integer size conversion for the specified size.
      * @param value Value to perform integer size conversion on.
@@ -311,7 +372,8 @@ public:
             // Default - do nothing
             return value;
         default:
-            UNIMPLEMENTED_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
+            UNREACHABLE_MSG("Unimplemented conversion size: {}", static_cast<u32>(size));
+            return value;
         }
     }
 
@@ -348,14 +410,24 @@ public:
      * @param dest_num_components Number of components in the destination.
      * @param value_num_components Number of components in the value.
      * @param is_saturated Optional, when True, saturates the provided value.
+     * @param sets_cc Optional, when True, sets the corresponding values to the implemented
+     * condition flags.
      * @param dest_elem Optional, the destination element to use for the operation.
      */
     void SetRegisterToFloat(const Register& reg, u64 elem, const std::string& value,
                             u64 dest_num_components, u64 value_num_components,
-                            bool is_saturated = false, u64 dest_elem = 0, bool precise = false) {
-
-        SetRegister(reg, elem, is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value,
-                    dest_num_components, value_num_components, dest_elem, precise);
+                            bool is_saturated = false, bool sets_cc = false, u64 dest_elem = 0,
+                            bool precise = false) {
+        const std::string clamped_value = is_saturated ? "clamp(" + value + ", 0.0, 1.0)" : value;
+        SetRegister(reg, elem, clamped_value, dest_num_components, value_num_components, dest_elem,
+                    precise);
+        if (sets_cc) {
+            if (reg == Register::ZeroIndex) {
+                SetConditionalCodesFromExpression(clamped_value);
+            } else {
+                SetConditionalCodesFromRegister(reg, dest_elem);
+            }
+        }
     }
 
     /**
@@ -366,25 +438,29 @@ public:
      * @param dest_num_components Number of components in the destination.
      * @param value_num_components Number of components in the value.
      * @param is_saturated Optional, when True, saturates the provided value.
+     * @param sets_cc Optional, when True, sets the corresponding values to the implemented
+     * condition flags.
      * @param dest_elem Optional, the destination element to use for the operation.
      * @param size Register size to use for conversion instructions.
      */
     void SetRegisterToInteger(const Register& reg, bool is_signed, u64 elem,
                               const std::string& value, u64 dest_num_components,
                               u64 value_num_components, bool is_saturated = false,
-                              u64 dest_elem = 0, Register::Size size = Register::Size::Word,
-                              bool sets_cc = false) {
+                              bool sets_cc = false, u64 dest_elem = 0,
+                              Register::Size size = Register::Size::Word) {
         UNIMPLEMENTED_IF(is_saturated);
-
+        const std::string final_value = ConvertIntegerSize(value, size);
         const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"};
 
-        SetRegister(reg, elem, func + '(' + ConvertIntegerSize(value, size) + ')',
-                    dest_num_components, value_num_components, dest_elem, false);
+        SetRegister(reg, elem, func + '(' + final_value + ')', dest_num_components,
+                    value_num_components, dest_elem, false);
 
         if (sets_cc) {
-            const std::string zero_condition = "( " + ConvertIntegerSize(value, size) + " == 0 )";
-            SetInternalFlag(InternalFlag::ZeroFlag, zero_condition);
-            LOG_WARNING(HW_GPU, "Condition codes implementation is incomplete.");
+            if (reg == Register::ZeroIndex) {
+                SetConditionalCodesFromExpression(final_value);
+            } else {
+                SetConditionalCodesFromRegister(reg, dest_elem);
+            }
         }
     }
 
@@ -417,10 +493,10 @@ public:
                 // pack. I couldn't test this on hardware but it shouldn't really matter since most
                 // of the time when a Mrg_* flag is used both components will be mirrored. That
                 // being said, it deserves a test.
-                return "((" + GetRegisterAsInteger(reg, 0, false) +
+                return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) +
                        " & 0xffff0000) | (packHalf2x16(" + value + ") & 0x0000ffff))";
             case Tegra::Shader::HalfMerge::Mrg_H1:
-                return "((" + GetRegisterAsInteger(reg, 0, false) +
+                return "uintBitsToFloat((" + GetRegisterAsInteger(reg, 0, false) +
                        " & 0x0000ffff) | (packHalf2x16(" + value + ") & 0xffff0000))";
             default:
                 UNREACHABLE();
@@ -574,6 +650,7 @@ public:
             return "floatBitsToInt(" + value + ')';
         } else {
             UNREACHABLE();
+            return value;
         }
     }
 
@@ -816,14 +893,12 @@ private:
         }
 
         if (precise && stage != Maxwell3D::Regs::ShaderStage::Fragment) {
-            shader.AddLine('{');
-            ++shader.scope;
+            const auto scope = shader.Scope();
+
             // This avoids optimizations of constant propagation and keeps the code as the original
             // Sadly using the precise keyword causes "linking" errors on fragment shaders.
             shader.AddLine("precise float tmp = " + src + ';');
             shader.AddLine(dest + " = tmp;");
-            --shader.scope;
-            shader.AddLine('}');
         } else {
             shader.AddLine(dest + " = " + src + ';');
         }
@@ -878,7 +953,7 @@ private:
         case Attribute::Index::FrontFacing:
             // TODO(Subv): Find out what the values are for the other elements.
             ASSERT(stage == Maxwell3D::Regs::ShaderStage::Fragment);
-            return "vec4(0, 0, 0, uintBitsToFloat(gl_FrontFacing ? 1 : 0))";
+            return "vec4(0, 0, 0, intBitsToFloat(gl_FrontFacing ? -1 : 0))";
         default:
             const u32 index{static_cast<u32>(attribute) -
                             static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -962,14 +1037,6 @@ private:
         }
     }
 
-    /// Generates code to use for a swizzle operation.
-    static std::string GetSwizzle(u64 elem) {
-        ASSERT(elem <= 3);
-        std::string swizzle = ".";
-        swizzle += "xyzw"[elem];
-        return swizzle;
-    }
-
     ShaderWriter& shader;
     ShaderWriter& declarations;
     std::vector<GLSLRegister> regs;
@@ -1231,7 +1298,7 @@ private:
     void WriteLogicOperation(Register dest, LogicOperation logic_op, const std::string& op_a,
                              const std::string& op_b,
                              Tegra::Shader::PredicateResultMode predicate_mode,
-                             Tegra::Shader::Pred predicate) {
+                             Tegra::Shader::Pred predicate, const bool set_cc) {
         std::string result{};
         switch (logic_op) {
         case LogicOperation::And: {
@@ -1255,7 +1322,7 @@ private:
         }
 
         if (dest != Tegra::Shader::Register::ZeroIndex) {
-            regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
+            regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc);
         }
 
         using Tegra::Shader::PredicateResultMode;
@@ -1275,7 +1342,8 @@ private:
     }
 
     void WriteLop3Instruction(Register dest, const std::string& op_a, const std::string& op_b,
-                              const std::string& op_c, const std::string& imm_lut) {
+                              const std::string& op_c, const std::string& imm_lut,
+                              const bool set_cc) {
         if (dest == Tegra::Shader::Register::ZeroIndex) {
             return;
         }
@@ -1298,18 +1366,10 @@ private:
 
         result += ')';
 
-        regs.SetRegisterToInteger(dest, true, 0, result, 1, 1);
+        regs.SetRegisterToInteger(dest, true, 0, result, 1, 1, false, set_cc);
     }
 
-    void WriteTexsInstruction(const Instruction& instr, const std::string& coord,
-                              const std::string& texture) {
-        // Add an extra scope and declare the texture coords inside to prevent
-        // overwriting them in case they are used as outputs of the texs instruction.
-        shader.AddLine('{');
-        ++shader.scope;
-        shader.AddLine(coord);
-        shader.AddLine("vec4 texture_tmp = " + texture + ';');
-
+    void WriteTexsInstructionFloat(const Instruction& instr, const std::string& texture) {
         // TEXS has two destination registers and a swizzle. The first two elements in the swizzle
         // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1
 
@@ -1321,19 +1381,49 @@ private:
 
             if (written_components < 2) {
                 // Write the first two swizzle components to gpr0 and gpr0+1
-                regs.SetRegisterToFloat(instr.gpr0, component, "texture_tmp", 1, 4, false,
+                regs.SetRegisterToFloat(instr.gpr0, component, texture, 1, 4, false, false,
                                         written_components % 2);
             } else {
                 ASSERT(instr.texs.HasTwoDestinations());
                 // Write the rest of the swizzle components to gpr28 and gpr28+1
-                regs.SetRegisterToFloat(instr.gpr28, component, "texture_tmp", 1, 4, false,
+                regs.SetRegisterToFloat(instr.gpr28, component, texture, 1, 4, false, false,
                                         written_components % 2);
             }
 
             ++written_components;
         }
-        --shader.scope;
-        shader.AddLine('}');
+    }
+
+    void WriteTexsInstructionHalfFloat(const Instruction& instr, const std::string& texture) {
+        // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half
+        // float instruction).
+
+        std::array<std::string, 4> components;
+        u32 written_components = 0;
+
+        for (u32 component = 0; component < 4; ++component) {
+            if (!instr.texs.IsComponentEnabled(component))
+                continue;
+            components[written_components++] = texture + GetSwizzle(component);
+        }
+        if (written_components == 0)
+            return;
+
+        const auto BuildComponent = [&](std::string low, std::string high, bool high_enabled) {
+            return "vec2(" + low + ", " + (high_enabled ? high : "0") + ')';
+        };
+
+        regs.SetRegisterToHalfFloat(
+            instr.gpr0, 0, BuildComponent(components[0], components[1], written_components > 1),
+            Tegra::Shader::HalfMerge::H0_H1, 1, 1);
+
+        if (written_components > 2) {
+            ASSERT(instr.texs.HasTwoDestinations());
+            regs.SetRegisterToHalfFloat(
+                instr.gpr28, 0,
+                BuildComponent(components[2], components[3], written_components > 3),
+                Tegra::Shader::HalfMerge::H0_H1, 1, 1);
+        }
     }
 
     static u32 TextureCoordinates(Tegra::Shader::TextureType texture_type) {
@@ -1356,12 +1446,10 @@ private:
      * top.
      */
     void EmitPushToFlowStack(u32 target) {
-        shader.AddLine('{');
-        ++shader.scope;
+        const auto scope = shader.Scope();
+
         shader.AddLine("flow_stack[flow_stack_top] = " + std::to_string(target) + "u;");
         shader.AddLine("flow_stack_top++;");
-        --shader.scope;
-        shader.AddLine('}');
     }
 
     /*
@@ -1369,13 +1457,11 @@ private:
      * popped address and decrementing the stack top.
      */
     void EmitPopFromFlowStack() {
-        shader.AddLine('{');
-        ++shader.scope;
+        const auto scope = shader.Scope();
+
         shader.AddLine("flow_stack_top--;");
         shader.AddLine("jmp_to = flow_stack[flow_stack_top];");
         shader.AddLine("break;");
-        --shader.scope;
-        shader.AddLine('}');
     }
 
     /// Writes the output values from a fragment shader to the corresponding GLSL output variables.
@@ -1487,6 +1573,252 @@ private:
         }
     }
 
+    std::pair<size_t, std::string> ValidateAndGetCoordinateElement(
+        const Tegra::Shader::TextureType texture_type, const bool depth_compare,
+        const bool is_array, const bool lod_bias_enabled, size_t max_coords, size_t max_inputs) {
+        const size_t coord_count = TextureCoordinates(texture_type);
+
+        size_t total_coord_count = coord_count + (is_array ? 1 : 0) + (depth_compare ? 1 : 0);
+        const size_t total_reg_count = total_coord_count + (lod_bias_enabled ? 1 : 0);
+        if (total_coord_count > max_coords || total_reg_count > max_inputs) {
+            UNIMPLEMENTED_MSG("Unsupported Texture operation");
+            total_coord_count = std::min(total_coord_count, max_coords);
+        }
+        // 1D.DC opengl is using a vec3 but 2nd component is ignored later.
+        total_coord_count +=
+            (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D)
+                ? 1
+                : 0;
+
+        constexpr std::array<const char*, 5> coord_container{
+            {"", "float coord = (", "vec2 coord = vec2(", "vec3 coord = vec3(",
+             "vec4 coord = vec4("}};
+
+        return std::pair<size_t, std::string>(coord_count, coord_container[total_coord_count]);
+    }
+
+    std::string GetTextureCode(const Tegra::Shader::Instruction& instr,
+                               const Tegra::Shader::TextureType texture_type,
+                               const Tegra::Shader::TextureProcessMode process_mode,
+                               const bool depth_compare, const bool is_array,
+                               const size_t bias_offset) {
+
+        if ((texture_type == Tegra::Shader::TextureType::Texture3D &&
+             (is_array || depth_compare)) ||
+            (texture_type == Tegra::Shader::TextureType::TextureCube && is_array &&
+             depth_compare)) {
+            UNIMPLEMENTED_MSG("This method is not supported.");
+        }
+
+        const std::string sampler =
+            GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+        const bool lod_needed = process_mode == Tegra::Shader::TextureProcessMode::LZ ||
+                                process_mode == Tegra::Shader::TextureProcessMode::LL ||
+                                process_mode == Tegra::Shader::TextureProcessMode::LLA;
+
+        // LOD selection (either via bias or explicit textureLod) not supported in GL for
+        // sampler2DArrayShadow and samplerCubeArrayShadow.
+        const bool gl_lod_supported = !(
+            (texture_type == Tegra::Shader::TextureType::Texture2D && is_array && depth_compare) ||
+            (texture_type == Tegra::Shader::TextureType::TextureCube && is_array && depth_compare));
+
+        const std::string read_method = lod_needed && gl_lod_supported ? "textureLod(" : "texture(";
+        std::string texture = read_method + sampler + ", coord";
+
+        UNIMPLEMENTED_IF(process_mode != Tegra::Shader::TextureProcessMode::None &&
+                         !gl_lod_supported);
+
+        if (process_mode != Tegra::Shader::TextureProcessMode::None && gl_lod_supported) {
+            if (process_mode == Tegra::Shader::TextureProcessMode::LZ) {
+                texture += ", 0.0";
+            } else {
+                // If present, lod or bias are always stored in the register indexed by the
+                // gpr20
+                // field with an offset depending on the usage of the other registers
+                texture += ',' + regs.GetRegisterAsFloat(instr.gpr20.Value() + bias_offset);
+            }
+        }
+        texture += ")";
+        return texture;
+    }
+
+    std::pair<std::string, std::string> GetTEXCode(
+        const Instruction& instr, const Tegra::Shader::TextureType texture_type,
+        const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
+        const bool is_array) {
+        const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
+                                       process_mode != Tegra::Shader::TextureProcessMode::LZ);
+
+        const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
+            texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
+        // If enabled arrays index is always stored in the gpr8 field
+        const u64 array_register = instr.gpr8.Value();
+        // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+        const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+        std::string coord = coord_dcl;
+        for (size_t i = 0; i < coord_count;) {
+            coord += regs.GetRegisterAsFloat(coord_register + i);
+            ++i;
+            if (i != coord_count) {
+                coord += ',';
+            }
+        }
+        // 1D.DC in opengl the 2nd component is ignored.
+        if (depth_compare && !is_array && texture_type == Tegra::Shader::TextureType::Texture1D) {
+            coord += ",0.0";
+        }
+        if (is_array) {
+            coord += ',' + regs.GetRegisterAsInteger(array_register);
+        }
+        if (depth_compare) {
+            // Depth is always stored in the register signaled by gpr20
+            // or in the next register if lod or bias are used
+            const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+            coord += ',' + regs.GetRegisterAsFloat(depth_register);
+        }
+        coord += ");";
+        return std::make_pair(
+            coord, GetTextureCode(instr, texture_type, process_mode, depth_compare, is_array, 0));
+    }
+
+    std::pair<std::string, std::string> GetTEXSCode(
+        const Instruction& instr, const Tegra::Shader::TextureType texture_type,
+        const Tegra::Shader::TextureProcessMode process_mode, const bool depth_compare,
+        const bool is_array) {
+        const bool lod_bias_enabled = (process_mode != Tegra::Shader::TextureProcessMode::None &&
+                                       process_mode != Tegra::Shader::TextureProcessMode::LZ);
+
+        const auto [coord_count, coord_dcl] = ValidateAndGetCoordinateElement(
+            texture_type, depth_compare, is_array, lod_bias_enabled, 4, 4);
+        // If enabled arrays index is always stored in the gpr8 field
+        const u64 array_register = instr.gpr8.Value();
+        // First coordinate index is stored in gpr8 field or (gpr8 + 1) when arrays are used
+        const u64 coord_register = array_register + (is_array ? 1 : 0);
+        const u64 last_coord_register =
+            (is_array || !(lod_bias_enabled || depth_compare) || (coord_count > 2))
+                ? static_cast<u64>(instr.gpr20.Value())
+                : coord_register + 1;
+
+        std::string coord = coord_dcl;
+        for (size_t i = 0; i < coord_count; ++i) {
+            const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+            coord += regs.GetRegisterAsFloat(last ? last_coord_register : coord_register + i);
+            if (i < coord_count - 1) {
+                coord += ',';
+            }
+        }
+
+        if (is_array) {
+            coord += ',' + regs.GetRegisterAsInteger(array_register);
+        }
+        if (depth_compare) {
+            // Depth is always stored in the register signaled by gpr20
+            // or in the next register if lod or bias are used
+            const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
+            coord += ',' + regs.GetRegisterAsFloat(depth_register);
+        }
+        coord += ");";
+
+        return std::make_pair(coord,
+                              GetTextureCode(instr, texture_type, process_mode, depth_compare,
+                                             is_array, (coord_count > 2 ? 1 : 0)));
+    }
+
+    std::pair<std::string, std::string> GetTLD4Code(const Instruction& instr,
+                                                    const Tegra::Shader::TextureType texture_type,
+                                                    const bool depth_compare, const bool is_array) {
+
+        const size_t coord_count = TextureCoordinates(texture_type);
+        const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
+        const size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
+
+        constexpr std::array<const char*, 5> coord_container{
+            {"", "", "vec2 coord = vec2(", "vec3 coord = vec3(", "vec4 coord = vec4("}};
+
+        // If enabled arrays index is always stored in the gpr8 field
+        const u64 array_register = instr.gpr8.Value();
+        // First coordinate index is the gpr8 or gpr8 + 1 when arrays are used
+        const u64 coord_register = array_register + (is_array ? 1 : 0);
+
+        std::string coord = coord_container[total_coord_count];
+        for (size_t i = 0; i < coord_count;) {
+            coord += regs.GetRegisterAsFloat(coord_register + i);
+            ++i;
+            if (i != coord_count) {
+                coord += ',';
+            }
+        }
+
+        if (is_array) {
+            coord += ',' + regs.GetRegisterAsInteger(array_register);
+        }
+        coord += ");";
+
+        const std::string sampler =
+            GetSampler(instr.sampler, texture_type, is_array, depth_compare);
+
+        std::string texture = "textureGather(" + sampler + ", coord, ";
+        if (depth_compare) {
+            // Depth is always stored in the register signaled by gpr20
+            texture += regs.GetRegisterAsFloat(instr.gpr20.Value()) + ')';
+        } else {
+            texture += std::to_string(instr.tld4.component) + ')';
+        }
+        return std::make_pair(coord, texture);
+    }
+
+    std::pair<std::string, std::string> GetTLDSCode(const Instruction& instr,
+                                                    const Tegra::Shader::TextureType texture_type,
+                                                    const bool is_array) {
+
+        const size_t coord_count = TextureCoordinates(texture_type);
+        const size_t total_coord_count = coord_count + (is_array ? 1 : 0);
+        const bool lod_enabled =
+            instr.tlds.GetTextureProcessMode() == Tegra::Shader::TextureProcessMode::LL;
+
+        constexpr std::array<const char*, 4> coord_container{
+            {"", "int coords = (", "ivec2 coords = ivec2(", "ivec3 coords = ivec3("}};
+
+        std::string coord = coord_container[total_coord_count];
+
+        // If enabled arrays index is always stored in the gpr8 field
+        const u64 array_register = instr.gpr8.Value();
+
+        // if is array gpr20 is used
+        const u64 coord_register = is_array ? instr.gpr20.Value() : instr.gpr8.Value();
+
+        const u64 last_coord_register =
+            ((coord_count > 2) || (coord_count == 2 && !lod_enabled)) && !is_array
+                ? static_cast<u64>(instr.gpr20.Value())
+                : coord_register + 1;
+
+        for (size_t i = 0; i < coord_count; ++i) {
+            const bool last = (i == (coord_count - 1)) && (coord_count > 1);
+            coord += regs.GetRegisterAsInteger(last ? last_coord_register : coord_register + i);
+            if (i < coord_count - 1) {
+                coord += ',';
+            }
+        }
+        if (is_array) {
+            coord += ',' + regs.GetRegisterAsInteger(array_register);
+        }
+        coord += ");";
+
+        const std::string sampler = GetSampler(instr.sampler, texture_type, is_array, false);
+
+        std::string texture = "texelFetch(" + sampler + ", coords";
+
+        if (lod_enabled) {
+            // When lod is used always is in grp20
+            texture += ", " + regs.GetRegisterAsInteger(instr.gpr20) + ')';
+        } else {
+            texture += ", 0)";
+        }
+        return std::make_pair(coord, texture);
+    }
+
     /**
      * Compiles a single instruction from Tegra to GLSL.
      * @param offset the offset of the Tegra shader instruction.
@@ -1559,33 +1891,44 @@ private:
                 UNIMPLEMENTED_IF_MSG(instr.fmul.tab5cb8_2 != 0,
                                      "FMUL tab5cb8_2({}) is not implemented",
                                      instr.fmul.tab5cb8_2.Value());
-                UNIMPLEMENTED_IF_MSG(instr.fmul.tab5c68_1 != 0,
-                                     "FMUL tab5cb8_1({}) is not implemented",
-                                     instr.fmul.tab5c68_1.Value());
                 UNIMPLEMENTED_IF_MSG(
                     instr.fmul.tab5c68_0 != 1, "FMUL tab5cb8_0({}) is not implemented",
                     instr.fmul.tab5c68_0
                         .Value()); // SMO typical sends 1 here which seems to be the default
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in FMUL is not implemented");
 
                 op_b = GetOperandAbsNeg(op_b, false, instr.fmul.negate_b);
 
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b, 1, 1,
-                                        instr.alu.saturate_d, 0, true);
+                std::string postfactor_op;
+                if (instr.fmul.postfactor != 0) {
+                    s8 postfactor = static_cast<s8>(instr.fmul.postfactor);
+
+                    // postfactor encoded as 3-bit 1's complement in instruction,
+                    // interpreted with below logic.
+                    if (postfactor >= 4) {
+                        postfactor = 7 - postfactor;
+                    } else {
+                        postfactor = 0 - postfactor;
+                    }
+
+                    if (postfactor > 0) {
+                        postfactor_op = " * " + std::to_string(1 << postfactor);
+                    } else {
+                        postfactor_op = " / " + std::to_string(1 << -postfactor);
+                    }
+                }
+
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " * " + op_b + postfactor_op, 1, 1,
+                                        instr.alu.saturate_d, instr.generates_cc, 0, true);
                 break;
             }
             case OpCode::Id::FADD_C:
             case OpCode::Id::FADD_R:
             case OpCode::Id::FADD_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in FADD is not implemented");
-
                 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
                 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
 
                 regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1,
-                                        instr.alu.saturate_d, 0, true);
+                                        instr.alu.saturate_d, instr.generates_cc, 0, true);
                 break;
             }
             case OpCode::Id::MUFU: {
@@ -1593,31 +1936,31 @@ private:
                 switch (instr.sub_op) {
                 case SubOp::Cos:
                     regs.SetRegisterToFloat(instr.gpr0, 0, "cos(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, 0, true);
+                                            instr.alu.saturate_d, false, 0, true);
                     break;
                 case SubOp::Sin:
                     regs.SetRegisterToFloat(instr.gpr0, 0, "sin(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, 0, true);
+                                            instr.alu.saturate_d, false, 0, true);
                     break;
                 case SubOp::Ex2:
                     regs.SetRegisterToFloat(instr.gpr0, 0, "exp2(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, 0, true);
+                                            instr.alu.saturate_d, false, 0, true);
                     break;
                 case SubOp::Lg2:
                     regs.SetRegisterToFloat(instr.gpr0, 0, "log2(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, 0, true);
+                                            instr.alu.saturate_d, false, 0, true);
                     break;
                 case SubOp::Rcp:
                     regs.SetRegisterToFloat(instr.gpr0, 0, "1.0 / " + op_a, 1, 1,
-                                            instr.alu.saturate_d, 0, true);
+                                            instr.alu.saturate_d, false, 0, true);
                     break;
                 case SubOp::Rsq:
                     regs.SetRegisterToFloat(instr.gpr0, 0, "inversesqrt(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, 0, true);
+                                            instr.alu.saturate_d, false, 0, true);
                     break;
                 case SubOp::Sqrt:
                     regs.SetRegisterToFloat(instr.gpr0, 0, "sqrt(" + op_a + ')', 1, 1,
-                                            instr.alu.saturate_d, 0, true);
+                                            instr.alu.saturate_d, false, 0, true);
                     break;
                 default:
                     UNIMPLEMENTED_MSG("Unhandled MUFU sub op={0:x}",
@@ -1628,8 +1971,9 @@ private:
             case OpCode::Id::FMNMX_C:
             case OpCode::Id::FMNMX_R:
             case OpCode::Id::FMNMX_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in FMNMX is not implemented");
+                UNIMPLEMENTED_IF_MSG(
+                    instr.generates_cc,
+                    "Condition codes generation in FMNMX is partially implemented");
 
                 op_a = GetOperandAbsNeg(op_a, instr.alu.abs_a, instr.alu.negate_a);
                 op_b = GetOperandAbsNeg(op_b, instr.alu.abs_b, instr.alu.negate_b);
@@ -1640,7 +1984,7 @@ private:
                 regs.SetRegisterToFloat(instr.gpr0, 0,
                                         '(' + condition + ") ? min(" + parameters + ") : max(" +
                                             parameters + ')',
-                                        1, 1, false, 0, true);
+                                        1, 1, false, instr.generates_cc, 0, true);
                 break;
             }
             case OpCode::Id::RRO_C:
@@ -1665,18 +2009,16 @@ private:
                 break;
             }
             case OpCode::Id::FMUL32_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
-                                     "Condition codes generation in FMUL32 is not implemented");
-
-                regs.SetRegisterToFloat(instr.gpr0, 0,
-                                        regs.GetRegisterAsFloat(instr.gpr8) + " * " +
-                                            GetImmediate32(instr),
-                                        1, 1, instr.fmul32.saturate, 0, true);
+                regs.SetRegisterToFloat(
+                    instr.gpr0, 0,
+                    regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1,
+                    instr.fmul32.saturate, instr.op_32.generates_cc, 0, true);
                 break;
             }
             case OpCode::Id::FADD32I: {
-                UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
-                                     "Condition codes generation in FADD32I is not implemented");
+                UNIMPLEMENTED_IF_MSG(
+                    instr.op_32.generates_cc,
+                    "Condition codes generation in FADD32I is partially implemented");
 
                 std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
                 std::string op_b = GetImmediate32(instr);
@@ -1697,7 +2039,8 @@ private:
                     op_b = "-(" + op_b + ')';
                 }
 
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false, 0, true);
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1, false,
+                                        instr.op_32.generates_cc, 0, true);
                 break;
             }
             }
@@ -1711,16 +2054,14 @@ private:
 
             switch (opcode->get().GetId()) {
             case OpCode::Id::BFE_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in BFE is not implemented");
-
                 std::string inner_shift =
                     '(' + op_a + " << " + std::to_string(instr.bfe.GetLeftShiftValue()) + ')';
                 std::string outer_shift =
                     '(' + inner_shift + " >> " +
                     std::to_string(instr.bfe.GetLeftShiftValue() + instr.bfe.shift_position) + ')';
 
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1);
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, outer_shift, 1, 1, false,
+                                          instr.generates_cc);
                 break;
             }
             default: {
@@ -1731,8 +2072,6 @@ private:
             break;
         }
         case OpCode::Type::Bfi: {
-            UNIMPLEMENTED_IF(instr.generates_cc);
-
             const auto [base, packed_shift] = [&]() -> std::tuple<std::string, std::string> {
                 switch (opcode->get().GetId()) {
                 case OpCode::Id::BFI_IMM_R:
@@ -1740,14 +2079,17 @@ private:
                             std::to_string(instr.alu.GetSignedImm20_20())};
                 default:
                     UNREACHABLE();
+                    return {regs.GetRegisterAsInteger(instr.gpr39, 0, false),
+                            std::to_string(instr.alu.GetSignedImm20_20())};
                 }
             }();
             const std::string offset = '(' + packed_shift + " & 0xff)";
             const std::string bits = "((" + packed_shift + " >> 8) & 0xff)";
             const std::string insert = regs.GetRegisterAsInteger(instr.gpr8, 0, false);
-            regs.SetRegisterToInteger(
-                instr.gpr0, false, 0,
-                "bitfieldInsert(" + base + ", " + insert + ", " + offset + ", " + bits + ')', 1, 1);
+            regs.SetRegisterToInteger(instr.gpr0, false, 0,
+                                      "bitfieldInsert(" + base + ", " + insert + ", " + offset +
+                                          ", " + bits + ')',
+                                      1, 1, false, instr.generates_cc);
             break;
         }
         case OpCode::Type::Shift: {
@@ -1769,9 +2111,6 @@ private:
             case OpCode::Id::SHR_C:
             case OpCode::Id::SHR_R:
             case OpCode::Id::SHR_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in SHR is not implemented");
-
                 if (!instr.shift.is_signed) {
                     // Logical shift right
                     op_a = "uint(" + op_a + ')';
@@ -1779,7 +2118,7 @@ private:
 
                 // Cast to int is superfluous for arithmetic shift, it's only for a logical shift
                 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(" + op_a + " >> " + op_b + ')',
-                                          1, 1);
+                                          1, 1, false, instr.generates_cc);
                 break;
             }
             case OpCode::Id::SHL_C:
@@ -1787,7 +2126,8 @@ private:
             case OpCode::Id::SHL_IMM:
                 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
                                      "Condition codes generation in SHL is not implemented");
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1);
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " << " + op_b, 1, 1, false,
+                                          instr.generates_cc);
                 break;
             default: {
                 UNIMPLEMENTED_MSG("Unhandled shift instruction: {}", opcode->get().GetName());
@@ -1801,18 +2141,17 @@ private:
 
             switch (opcode->get().GetId()) {
             case OpCode::Id::IADD32I:
-                UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
-                                     "Condition codes generation in IADD32I is not implemented");
+                UNIMPLEMENTED_IF_MSG(
+                    instr.op_32.generates_cc,
+                    "Condition codes generation in IADD32I is partially implemented");
 
                 if (instr.iadd32i.negate_a)
                     op_a = "-(" + op_a + ')';
 
                 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
-                                          instr.iadd32i.saturate != 0);
+                                          instr.iadd32i.saturate, instr.op_32.generates_cc);
                 break;
             case OpCode::Id::LOP32I: {
-                UNIMPLEMENTED_IF_MSG(instr.op_32.generates_cc,
-                                     "Condition codes generation in LOP32I is not implemented");
 
                 if (instr.alu.lop32i.invert_a)
                     op_a = "~(" + op_a + ')';
@@ -1822,7 +2161,7 @@ private:
 
                 WriteLogicOperation(instr.gpr0, instr.alu.lop32i.operation, op_a, op_b,
                                     Tegra::Shader::PredicateResultMode::None,
-                                    Tegra::Shader::Pred::UnusedIndex);
+                                    Tegra::Shader::Pred::UnusedIndex, instr.op_32.generates_cc);
                 break;
             }
             default: {
@@ -1851,7 +2190,7 @@ private:
             case OpCode::Id::IADD_R:
             case OpCode::Id::IADD_IMM: {
                 UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in IADD is not implemented");
+                                     "Condition codes generation in IADD is partially implemented");
 
                 if (instr.alu_integer.negate_a)
                     op_a = "-(" + op_a + ')';
@@ -1860,14 +2199,15 @@ private:
                     op_b = "-(" + op_b + ')';
 
                 regs.SetRegisterToInteger(instr.gpr0, true, 0, op_a + " + " + op_b, 1, 1,
-                                          instr.alu.saturate_d);
+                                          instr.alu.saturate_d, instr.generates_cc);
                 break;
             }
             case OpCode::Id::IADD3_C:
             case OpCode::Id::IADD3_R:
             case OpCode::Id::IADD3_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in IADD3 is not implemented");
+                UNIMPLEMENTED_IF_MSG(
+                    instr.generates_cc,
+                    "Condition codes generation in IADD3 is partially implemented");
 
                 std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
 
@@ -1923,14 +2263,16 @@ private:
                     result = '(' + op_a + " + " + op_b + " + " + op_c + ')';
                 }
 
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1);
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, result, 1, 1, false,
+                                          instr.generates_cc);
                 break;
             }
             case OpCode::Id::ISCADD_C:
             case OpCode::Id::ISCADD_R:
             case OpCode::Id::ISCADD_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in ISCADD is not implemented");
+                UNIMPLEMENTED_IF_MSG(
+                    instr.generates_cc,
+                    "Condition codes generation in ISCADD is partially implemented");
 
                 if (instr.alu_integer.negate_a)
                     op_a = "-(" + op_a + ')';
@@ -1941,7 +2283,8 @@ private:
                 const std::string shift = std::to_string(instr.alu_integer.shift_amount.Value());
 
                 regs.SetRegisterToInteger(instr.gpr0, true, 0,
-                                          "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1);
+                                          "((" + op_a + " << " + shift + ") + " + op_b + ')', 1, 1,
+                                          false, instr.generates_cc);
                 break;
             }
             case OpCode::Id::POPC_C:
@@ -1965,8 +2308,6 @@ private:
             case OpCode::Id::LOP_C:
             case OpCode::Id::LOP_R:
             case OpCode::Id::LOP_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in LOP is not implemented");
 
                 if (instr.alu.lop.invert_a)
                     op_a = "~(" + op_a + ')';
@@ -1975,15 +2316,13 @@ private:
                     op_b = "~(" + op_b + ')';
 
                 WriteLogicOperation(instr.gpr0, instr.alu.lop.operation, op_a, op_b,
-                                    instr.alu.lop.pred_result_mode, instr.alu.lop.pred48);
+                                    instr.alu.lop.pred_result_mode, instr.alu.lop.pred48,
+                                    instr.generates_cc);
                 break;
             }
             case OpCode::Id::LOP3_C:
             case OpCode::Id::LOP3_R:
             case OpCode::Id::LOP3_IMM: {
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in LOP3 is not implemented");
-
                 const std::string op_c = regs.GetRegisterAsInteger(instr.gpr39);
                 std::string lut;
 
@@ -1993,15 +2332,16 @@ private:
                     lut = '(' + std::to_string(instr.alu.lop3.GetImmLut48()) + ')';
                 }
 
-                WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut);
+                WriteLop3Instruction(instr.gpr0, op_a, op_b, op_c, lut, instr.generates_cc);
                 break;
             }
             case OpCode::Id::IMNMX_C:
             case OpCode::Id::IMNMX_R:
             case OpCode::Id::IMNMX_IMM: {
                 UNIMPLEMENTED_IF(instr.imnmx.exchange != Tegra::Shader::IMinMaxExchange::None);
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in IMNMX is not implemented");
+                UNIMPLEMENTED_IF_MSG(
+                    instr.generates_cc,
+                    "Condition codes generation in IMNMX is partially implemented");
 
                 const std::string condition =
                     GetPredicateCondition(instr.imnmx.pred, instr.imnmx.negate_pred != 0);
@@ -2009,7 +2349,7 @@ private:
                 regs.SetRegisterToInteger(instr.gpr0, instr.imnmx.is_signed, 0,
                                           '(' + condition + ") ? min(" + parameters + ") : max(" +
                                               parameters + ')',
-                                          1, 1);
+                                          1, 1, false, instr.generates_cc);
                 break;
             }
             case OpCode::Id::LEA_R2:
@@ -2070,7 +2410,8 @@ private:
                 UNIMPLEMENTED_IF_MSG(instr.lea.pred48 != static_cast<u64>(Pred::UnusedIndex),
                                      "Unhandled LEA Predicate");
                 const std::string value = '(' + op_a + " + (" + op_b + "*(1 << " + op_c + ")))";
-                regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1);
+                regs.SetRegisterToInteger(instr.gpr0, true, 0, value, 1, 1, false,
+                                          instr.generates_cc);
 
                 break;
             }
@@ -2175,7 +2516,7 @@ private:
             UNIMPLEMENTED_IF_MSG(instr.ffma.tab5980_1 != 0, "FFMA tab5980_1({}) not implemented",
                                  instr.ffma.tab5980_1.Value());
             UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                 "Condition codes generation in FFMA is not implemented");
+                                 "Condition codes generation in FFMA is partially implemented");
 
             switch (opcode->get().GetId()) {
             case OpCode::Id::FFMA_CR: {
@@ -2206,7 +2547,7 @@ private:
             }
 
             regs.SetRegisterToFloat(instr.gpr0, 0, "fma(" + op_a + ", " + op_b + ", " + op_c + ')',
-                                    1, 1, instr.alu.saturate_d, 0, true);
+                                    1, 1, instr.alu.saturate_d, instr.generates_cc, 0, true);
             break;
         }
         case OpCode::Type::Hfma2: {
@@ -2277,18 +2618,15 @@ private:
                 }
 
                 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
-                                          1, instr.alu.saturate_d, 0, instr.conversion.dest_size,
-                                          instr.generates_cc.Value() != 0);
+                                          1, instr.alu.saturate_d, instr.generates_cc, 0,
+                                          instr.conversion.dest_size);
                 break;
             }
             case OpCode::Id::I2F_R:
             case OpCode::Id::I2F_C: {
                 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
                 UNIMPLEMENTED_IF(instr.conversion.selector);
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in I2F is not implemented");
-
-                std::string op_a{};
+                std::string op_a;
 
                 if (instr.is_b_gpr) {
                     op_a =
@@ -2310,14 +2648,12 @@ private:
                     op_a = "-(" + op_a + ')';
                 }
 
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1);
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, false, instr.generates_cc);
                 break;
             }
             case OpCode::Id::F2F_R: {
                 UNIMPLEMENTED_IF(instr.conversion.dest_size != Register::Size::Word);
                 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in F2F is not implemented");
                 std::string op_a = regs.GetRegisterAsFloat(instr.gpr20);
 
                 if (instr.conversion.abs_a) {
@@ -2349,14 +2685,13 @@ private:
                     break;
                 }
 
-                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d);
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1, instr.alu.saturate_d,
+                                        instr.generates_cc);
                 break;
             }
             case OpCode::Id::F2I_R:
             case OpCode::Id::F2I_C: {
                 UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word);
-                UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                     "Condition codes generation in F2I is not implemented");
                 std::string op_a{};
 
                 if (instr.is_b_gpr) {
@@ -2399,7 +2734,8 @@ private:
                 }
 
                 regs.SetRegisterToInteger(instr.gpr0, instr.conversion.is_output_signed, 0, op_a, 1,
-                                          1, false, 0, instr.conversion.dest_size);
+                                          1, false, instr.generates_cc, 0,
+                                          instr.conversion.dest_size);
                 break;
             }
             default: {
@@ -2444,10 +2780,7 @@ private:
             case OpCode::Id::LD_C: {
                 UNIMPLEMENTED_IF(instr.ld_c.unknown != 0);
 
-                // Add an extra scope and declare the index register inside to prevent
-                // overwriting it in case it is used as an output of the LD instruction.
-                shader.AddLine("{");
-                ++shader.scope;
+                const auto scope = shader.Scope();
 
                 shader.AddLine("uint index = (" + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
                                " / 4) & (MAX_CONSTBUFFER_ELEMENTS - 1);");
@@ -2473,19 +2806,13 @@ private:
                     UNIMPLEMENTED_MSG("Unhandled type: {}",
                                       static_cast<unsigned>(instr.ld_c.type.Value()));
                 }
-
-                --shader.scope;
-                shader.AddLine("}");
                 break;
             }
             case OpCode::Id::LD_L: {
                 UNIMPLEMENTED_IF_MSG(instr.ld_l.unknown == 1, "LD_L Unhandled mode: {}",
                                      static_cast<unsigned>(instr.ld_l.unknown.Value()));
 
-                // Add an extra scope and declare the index register inside to prevent
-                // overwriting it in case it is used as an output of the LD instruction.
-                shader.AddLine('{');
-                ++shader.scope;
+                const auto scope = shader.Scope();
 
                 std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
                                  std::to_string(instr.smem_imm.Value()) + ')';
@@ -2502,9 +2829,6 @@ private:
                     UNIMPLEMENTED_MSG("LD_L Unhandled type: {}",
                                       static_cast<unsigned>(instr.ldst_sl.type.Value()));
                 }
-
-                --shader.scope;
-                shader.AddLine('}');
                 break;
             }
             case OpCode::Id::ST_A: {
@@ -2539,10 +2863,7 @@ private:
                 UNIMPLEMENTED_IF_MSG(instr.st_l.unknown == 0, "ST_L Unhandled mode: {}",
                                      static_cast<unsigned>(instr.st_l.unknown.Value()));
 
-                // Add an extra scope and declare the index register inside to prevent
-                // overwriting it in case it is used as an output of the LD instruction.
-                shader.AddLine('{');
-                ++shader.scope;
+                const auto scope = shader.Scope();
 
                 std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " +
                                  std::to_string(instr.smem_imm.Value()) + ')';
@@ -2557,179 +2878,28 @@ private:
                     UNIMPLEMENTED_MSG("ST_L Unhandled type: {}",
                                       static_cast<unsigned>(instr.ldst_sl.type.Value()));
                 }
-
-                --shader.scope;
-                shader.AddLine('}');
                 break;
             }
             case OpCode::Id::TEX: {
                 Tegra::Shader::TextureType texture_type{instr.tex.texture_type};
-                std::string coord;
                 const bool is_array = instr.tex.array != 0;
-
+                const bool depth_compare =
+                    instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
+                const auto process_mode = instr.tex.GetTextureProcessMode();
                 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
                                      "NODEP is not implemented");
                 UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
                                      "AOFFI is not implemented");
 
-                const bool depth_compare =
-                    instr.tex.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
-                u32 num_coordinates = TextureCoordinates(texture_type);
-                u32 start_index = 0;
-                std::string array_elem;
-                if (is_array) {
-                    array_elem = regs.GetRegisterAsInteger(instr.gpr8);
-                    start_index = 1;
-                }
-                const auto process_mode = instr.tex.GetTextureProcessMode();
-                u32 start_index_b = 0;
-                std::string lod_value;
-                if (process_mode != Tegra::Shader::TextureProcessMode::LZ &&
-                    process_mode != Tegra::Shader::TextureProcessMode::None) {
-                    start_index_b = 1;
-                    lod_value = regs.GetRegisterAsFloat(instr.gpr20);
-                }
-
-                std::string depth_value;
-                if (depth_compare) {
-                    depth_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + start_index_b);
-                }
-
-                bool depth_compare_extra = false;
+                const auto [coord, texture] =
+                    GetTEXCode(instr, texture_type, process_mode, depth_compare, is_array);
 
-                switch (num_coordinates) {
-                case 1: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
-                    if (is_array) {
-                        if (depth_compare) {
-                            coord = "vec3 coords = vec3(" + x + ", " + depth_value + ", " +
-                                    array_elem + ");";
-                        } else {
-                            coord = "vec2 coords = vec2(" + x + ", " + array_elem + ");";
-                        }
-                    } else {
-                        if (depth_compare) {
-                            coord = "vec2 coords = vec2(" + x + ", " + depth_value + ");";
-                        } else {
-                            coord = "float coords = " + x + ';';
-                        }
-                    }
-                    break;
-                }
-                case 2: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
-                    const std::string y =
-                        regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
-                    if (is_array) {
-                        if (depth_compare) {
-                            coord = "vec4 coords = vec4(" + x + ", " + y + ", " + depth_value +
-                                    ", " + array_elem + ");";
-                        } else {
-                            coord = "vec3 coords = vec3(" + x + ", " + y + ", " + array_elem + ");";
-                        }
-                    } else {
-                        if (depth_compare) {
-                            coord =
-                                "vec3 coords = vec3(" + x + ", " + y + ", " + depth_value + ");";
-                        } else {
-                            coord = "vec2 coords = vec2(" + x + ", " + y + ");";
-                        }
-                    }
-                    break;
-                }
-                case 3: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index);
-                    const std::string y =
-                        regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 1);
-                    const std::string z =
-                        regs.GetRegisterAsFloat(instr.gpr8.Value() + start_index + 2);
-                    if (is_array) {
-                        depth_compare_extra = depth_compare;
-                        coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
-                                array_elem + ");";
-                    } else {
-                        if (depth_compare) {
-                            coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " +
-                                    depth_value + ");";
-                        } else {
-                            coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
-                        }
-                    }
-                    break;
-                }
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
-                                      static_cast<u32>(num_coordinates));
-
-                    // Fallback to interpreting as a 2D texture for now
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
-                    texture_type = Tegra::Shader::TextureType::Texture2D;
-                }
-
-                const std::string sampler =
-                    GetSampler(instr.sampler, texture_type, is_array, depth_compare);
-                // Add an extra scope and declare the texture coords inside to prevent
-                // overwriting them in case they are used as outputs of the texs instruction.
-
-                shader.AddLine('{');
-                ++shader.scope;
+                const auto scope = shader.Scope();
                 shader.AddLine(coord);
-                std::string texture;
 
-                switch (instr.tex.GetTextureProcessMode()) {
-                case Tegra::Shader::TextureProcessMode::None: {
-                    if (!depth_compare_extra) {
-                        texture = "texture(" + sampler + ", coords)";
-                    } else {
-                        texture = "texture(" + sampler + ", coords, " + depth_value + ')';
-                    }
-                    break;
-                }
-                case Tegra::Shader::TextureProcessMode::LZ: {
-                    if (!depth_compare_extra) {
-                        texture = "textureLod(" + sampler + ", coords, 0.0)";
-                    } else {
-                        texture = "texture(" + sampler + ", coords, " + depth_value + ')';
-                    }
-                    break;
-                }
-                case Tegra::Shader::TextureProcessMode::LB:
-                case Tegra::Shader::TextureProcessMode::LBA: {
-                    // TODO: Figure if A suffix changes the equation at all.
-                    if (!depth_compare_extra) {
-                        texture = "texture(" + sampler + ", coords, " + lod_value + ')';
-                    } else {
-                        texture = "texture(" + sampler + ", coords, " + depth_value + ')';
-                        LOG_WARNING(HW_GPU,
-                                    "OpenGL Limitation: can't set bias value along depth compare");
-                    }
-                    break;
-                }
-                case Tegra::Shader::TextureProcessMode::LL:
-                case Tegra::Shader::TextureProcessMode::LLA: {
-                    // TODO: Figure if A suffix changes the equation at all.
-                    if (!depth_compare_extra) {
-                        texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
-                    } else {
-                        texture = "texture(" + sampler + ", coords, " + depth_value + ')';
-                        LOG_WARNING(HW_GPU,
-                                    "OpenGL Limitation: can't set lod value along depth compare");
-                    }
-                    break;
-                }
-                default: {
-                    if (!depth_compare_extra) {
-                        texture = "texture(" + sampler + ", coords)";
-                    } else {
-                        texture = "texture(" + sampler + ", coords, " + depth_value + ')';
-                    }
-                    UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
-                                      static_cast<u32>(instr.tex.GetTextureProcessMode()));
-                }
-                }
-                if (!depth_compare) {
+                if (depth_compare) {
+                    regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1);
+                } else {
                     shader.AddLine("vec4 texture_tmp = " + texture + ';');
                     std::size_t dest_elem{};
                     for (std::size_t elem = 0; elem < 4; ++elem) {
@@ -2737,151 +2907,46 @@ private:
                             // Skip disabled components
                             continue;
                         }
-                        regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
+                        regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false,
                                                 dest_elem);
                         ++dest_elem;
                     }
-                } else {
-                    regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
                 }
-                --shader.scope;
-                shader.AddLine('}');
                 break;
             }
             case OpCode::Id::TEXS: {
                 Tegra::Shader::TextureType texture_type{instr.texs.GetTextureType()};
-                bool is_array{instr.texs.IsArrayTexture()};
+                const bool is_array{instr.texs.IsArrayTexture()};
+                const bool depth_compare =
+                    instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
+                const auto process_mode = instr.texs.GetTextureProcessMode();
 
                 UNIMPLEMENTED_IF_MSG(instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
                                      "NODEP is not implemented");
 
-                const bool depth_compare =
-                    instr.texs.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
-                u32 num_coordinates = TextureCoordinates(texture_type);
-                const auto process_mode = instr.texs.GetTextureProcessMode();
-                std::string lod_value;
-                std::string coord;
-                u32 lod_offset = 0;
-                if (process_mode == Tegra::Shader::TextureProcessMode::LL) {
-                    if (num_coordinates > 2) {
-                        lod_value = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
-                        lod_offset = 2;
-                    } else {
-                        lod_value = regs.GetRegisterAsFloat(instr.gpr20);
-                        lod_offset = 1;
-                    }
-                }
+                const auto scope = shader.Scope();
 
-                switch (num_coordinates) {
-                case 1: {
-                    coord = "float coords = " + regs.GetRegisterAsFloat(instr.gpr8) + ';';
-                    break;
-                }
-                case 2: {
-                    if (is_array) {
-                        if (depth_compare) {
-                            const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
-                            const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                            const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
-                            const std::string z = regs.GetRegisterAsFloat(instr.gpr20.Value() + 1);
-                            coord = "vec4 coords = vec4(" + x + ", " + y + ", " + z + ", " + index +
-                                    ");";
-                        } else {
-                            const std::string index = regs.GetRegisterAsInteger(instr.gpr8);
-                            const std::string x = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                            const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
-                            coord = "vec3 coords = vec3(" + x + ", " + y + ", " + index + ");";
-                        }
-                    } else {
-                        if (lod_offset != 0) {
-                            if (depth_compare) {
-                                const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                                const std::string y =
-                                    regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                                const std::string z =
-                                    regs.GetRegisterAsFloat(instr.gpr20.Value() + lod_offset);
-                                coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
-                            } else {
-                                const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                                const std::string y =
-                                    regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                                coord = "vec2 coords = vec2(" + x + ", " + y + ");";
-                            }
-                        } else {
-                            if (depth_compare) {
-                                const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                                const std::string y =
-                                    regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                                const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
-                                coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
-                            } else {
-                                const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                                const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
-                                coord = "vec2 coords = vec2(" + x + ", " + y + ");";
-                            }
-                        }
-                    }
-                    break;
-                }
-                case 3: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    const std::string z = regs.GetRegisterAsFloat(instr.gpr20);
-                    coord = "vec3 coords = vec3(" + x + ", " + y + ", " + z + ");";
-                    break;
-                }
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
-                                      static_cast<u32>(num_coordinates));
+                auto [coord, texture] =
+                    GetTEXSCode(instr, texture_type, process_mode, depth_compare, is_array);
 
-                    // Fallback to interpreting as a 2D texture for now
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr20);
-                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
-                    texture_type = Tegra::Shader::TextureType::Texture2D;
-                    is_array = false;
-                }
-                const std::string sampler =
-                    GetSampler(instr.sampler, texture_type, is_array, depth_compare);
-                std::string texture;
-                switch (process_mode) {
-                case Tegra::Shader::TextureProcessMode::None: {
-                    texture = "texture(" + sampler + ", coords)";
-                    break;
-                }
-                case Tegra::Shader::TextureProcessMode::LZ: {
-                    if (depth_compare && is_array) {
-                        texture = "texture(" + sampler + ", coords)";
-                    } else {
-                        texture = "textureLod(" + sampler + ", coords, 0.0)";
-                    }
-                    break;
-                }
-                case Tegra::Shader::TextureProcessMode::LL: {
-                    texture = "textureLod(" + sampler + ", coords, " + lod_value + ')';
-                    break;
-                }
-                default: {
-                    texture = "texture(" + sampler + ", coords)";
-                    UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
-                                      static_cast<u32>(instr.texs.GetTextureProcessMode()));
-                }
+                shader.AddLine(coord);
+
+                if (depth_compare) {
+                    texture = "vec4(" + texture + ')';
                 }
-                if (!depth_compare) {
-                    WriteTexsInstruction(instr, coord, texture);
+                shader.AddLine("vec4 texture_tmp = " + texture + ';');
+
+                if (instr.texs.fp32_flag) {
+                    WriteTexsInstructionFloat(instr, "texture_tmp");
                 } else {
-                    WriteTexsInstruction(instr, coord, "vec4(" + texture + ')');
+                    WriteTexsInstructionHalfFloat(instr, "texture_tmp");
                 }
-
                 break;
             }
             case OpCode::Id::TLDS: {
                 const Tegra::Shader::TextureType texture_type{instr.tlds.GetTextureType()};
                 const bool is_array{instr.tlds.IsArrayTexture()};
 
-                ASSERT(texture_type == Tegra::Shader::TextureType::Texture2D);
-                ASSERT(is_array == false);
-
                 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
                                      "NODEP is not implemented");
                 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
@@ -2889,63 +2954,16 @@ private:
                 UNIMPLEMENTED_IF_MSG(instr.tlds.UsesMiscMode(Tegra::Shader::TextureMiscMode::MZ),
                                      "MZ is not implemented");
 
-                u32 extra_op_offset = 0;
-
-                // Scope to avoid variable name overlaps.
-                shader.AddLine('{');
-                ++shader.scope;
-                std::string coords;
-
-                switch (texture_type) {
-                case Tegra::Shader::TextureType::Texture1D: {
-                    const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
-                    coords = "float coords = " + x + ';';
-                    break;
-                }
-                case Tegra::Shader::TextureType::Texture2D: {
-                    UNIMPLEMENTED_IF_MSG(is_array, "Unhandled 2d array texture");
+                const auto [coord, texture] = GetTLDSCode(instr, texture_type, is_array);
 
-                    const std::string x = regs.GetRegisterAsInteger(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsInteger(instr.gpr20);
-                    // shader.AddLine("ivec2 coords = ivec2(" + x + ", " + y + ");");
-                    coords = "ivec2 coords = ivec2(" + x + ", " + y + ");";
-                    extra_op_offset = 1;
-                    break;
-                }
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled texture type {}", static_cast<u32>(texture_type));
-                }
-                const std::string sampler =
-                    GetSampler(instr.sampler, texture_type, is_array, false);
-                std::string texture = "texelFetch(" + sampler + ", coords, 0)";
-                switch (instr.tlds.GetTextureProcessMode()) {
-                case Tegra::Shader::TextureProcessMode::LZ: {
-                    texture = "texelFetch(" + sampler + ", coords, 0)";
-                    break;
-                }
-                case Tegra::Shader::TextureProcessMode::LL: {
-                    shader.AddLine(
-                        "float lod = " +
-                        regs.GetRegisterAsInteger(instr.gpr20.Value() + extra_op_offset) + ';');
-                    texture = "texelFetch(" + sampler + ", coords, lod)";
-                    break;
-                }
-                default: {
-                    texture = "texelFetch(" + sampler + ", coords, 0)";
-                    UNIMPLEMENTED_MSG("Unhandled texture process mode {}",
-                                      static_cast<u32>(instr.tlds.GetTextureProcessMode()));
-                }
-                }
-                WriteTexsInstruction(instr, coords, texture);
+                const auto scope = shader.Scope();
 
-                --shader.scope;
-                shader.AddLine('}');
+                shader.AddLine(coord);
+                shader.AddLine("vec4 texture_tmp = " + texture + ';');
+                WriteTexsInstructionFloat(instr, "texture_tmp");
                 break;
             }
             case OpCode::Id::TLD4: {
-                ASSERT(instr.tld4.texture_type == Tegra::Shader::TextureType::Texture2D);
-                ASSERT(instr.tld4.array == 0);
-                std::string coord;
 
                 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
                                      "NODEP is not implemented");
@@ -2955,64 +2973,30 @@ private:
                                      "NDV is not implemented");
                 UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::PTP),
                                      "PTP is not implemented");
+
+                auto texture_type = instr.tld4.texture_type.Value();
                 const bool depth_compare =
                     instr.tld4.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
-                auto texture_type = instr.tld4.texture_type.Value();
-                u32 num_coordinates = TextureCoordinates(texture_type);
-                if (depth_compare)
-                    num_coordinates += 1;
-
-                // Add an extra scope and declare the texture coords inside to prevent
-                // overwriting them in case they are used as outputs of the texs instruction.
-                shader.AddLine('{');
-                ++shader.scope;
+                const bool is_array = instr.tld4.array != 0;
 
-                switch (num_coordinates) {
-                case 2: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
-                    break;
-                }
-                case 3: {
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    const std::string z = regs.GetRegisterAsFloat(instr.gpr8.Value() + 2);
-                    shader.AddLine("vec3 coords = vec3(" + x + ", " + y + ", " + z + ");");
-                    break;
-                }
-                default:
-                    UNIMPLEMENTED_MSG("Unhandled coordinates number {}",
-                                      static_cast<u32>(num_coordinates));
-                    const std::string x = regs.GetRegisterAsFloat(instr.gpr8);
-                    const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
-                    texture_type = Tegra::Shader::TextureType::Texture2D;
-                }
+                const auto [coord, texture] =
+                    GetTLD4Code(instr, texture_type, depth_compare, is_array);
 
-                const std::string sampler =
-                    GetSampler(instr.sampler, texture_type, false, depth_compare);
+                const auto scope = shader.Scope();
 
-                const std::string texture = "textureGather(" + sampler + ", coords, " +
-                                            std::to_string(instr.tld4.component) + ')';
+                shader.AddLine(coord);
+                std::size_t dest_elem{};
 
-                if (!depth_compare) {
-                    shader.AddLine("vec4 texture_tmp = " + texture + ';');
-                    std::size_t dest_elem{};
-                    for (std::size_t elem = 0; elem < 4; ++elem) {
-                        if (!instr.tex.IsComponentEnabled(elem)) {
-                            // Skip disabled components
-                            continue;
-                        }
-                        regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false,
-                                                dest_elem);
-                        ++dest_elem;
+                shader.AddLine("vec4 texture_tmp = " + texture + ';');
+                for (std::size_t elem = 0; elem < 4; ++elem) {
+                    if (!instr.tex.IsComponentEnabled(elem)) {
+                        // Skip disabled components
+                        continue;
                     }
-                } else {
-                    regs.SetRegisterToFloat(instr.gpr0, 0, texture, 1, 1, false);
+                    regs.SetRegisterToFloat(instr.gpr0, elem, "texture_tmp", 1, 4, false, false,
+                                            dest_elem);
+                    ++dest_elem;
                 }
-                --shader.scope;
-                shader.AddLine('}');
                 break;
             }
             case OpCode::Id::TLD4S: {
@@ -3023,45 +3007,42 @@ private:
                     instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::AOFFI),
                     "AOFFI is not implemented");
 
-                // Scope to avoid variable name overlaps.
-                shader.AddLine('{');
-                ++shader.scope;
+                const auto scope = shader.Scope();
+
                 std::string coords;
 
                 const bool depth_compare =
                     instr.tld4s.UsesMiscMode(Tegra::Shader::TextureMiscMode::DC);
-                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
-                const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
-                // TODO(Subv): Figure out how the sampler type is encoded in the TLD4S instruction.
+
                 const std::string sampler = GetSampler(
                     instr.sampler, Tegra::Shader::TextureType::Texture2D, false, depth_compare);
-                if (!depth_compare) {
-                    coords = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
-                } else {
-                    // Note: TLD4S coordinate encoding works just like TEXS's
-                    const std::string op_y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    coords = "vec3 coords = vec3(" + op_a + ", " + op_y + ", " + op_b + ");";
-                }
-                const std::string texture = "textureGather(" + sampler + ", coords, " +
-                                            std::to_string(instr.tld4s.component) + ')';
+
+                const std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+                coords = "vec2 coords = vec2(" + op_a + ", ";
+                std::string texture = "textureGather(" + sampler + ", coords, ";
 
                 if (!depth_compare) {
-                    WriteTexsInstruction(instr, coords, texture);
+                    const std::string op_b = regs.GetRegisterAsFloat(instr.gpr20);
+                    coords += op_b + ");";
+                    texture += std::to_string(instr.tld4s.component) + ')';
                 } else {
-                    WriteTexsInstruction(instr, coords, "vec4(" + texture + ')');
+                    const std::string op_b = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
+                    const std::string op_c = regs.GetRegisterAsFloat(instr.gpr20);
+                    coords += op_b + ");";
+                    texture += op_c + ')';
                 }
-
-                --shader.scope;
-                shader.AddLine('}');
+                shader.AddLine(coords);
+                shader.AddLine("vec4 texture_tmp = " + texture + ';');
+                WriteTexsInstructionFloat(instr, "texture_tmp");
                 break;
             }
             case OpCode::Id::TXQ: {
                 UNIMPLEMENTED_IF_MSG(instr.txq.UsesMiscMode(Tegra::Shader::TextureMiscMode::NODEP),
                                      "NODEP is not implemented");
 
-                ++shader.scope;
-                shader.AddLine('{');
-                // TODO: the new commits on the texture refactor, change the way samplers work.
+                const auto scope = shader.Scope();
+
+                // TODO: The new commits on the texture refactor, change the way samplers work.
                 // Sadly, not all texture instructions specify the type of texture their sampler
                 // uses. This must be fixed at a later instance.
                 const std::string sampler =
@@ -3072,7 +3053,8 @@ private:
                                                 regs.GetRegisterAsInteger(instr.gpr8) + ')';
                     const std::string mip_level = "textureQueryLevels(" + sampler + ')';
                     shader.AddLine("ivec2 sizes = " + texture + ';');
-                    regs.SetRegisterToInteger(instr.gpr0, true, 0, "sizes.x", 1, 1);
+
+                    regs.SetRegisterToInteger(instr.gpr0.Value() + 0, true, 0, "sizes.x", 1, 1);
                     regs.SetRegisterToInteger(instr.gpr0.Value() + 1, true, 0, "sizes.y", 1, 1);
                     regs.SetRegisterToInteger(instr.gpr0.Value() + 2, true, 0, "0", 1, 1);
                     regs.SetRegisterToInteger(instr.gpr0.Value() + 3, true, 0, mip_level, 1, 1);
@@ -3083,8 +3065,6 @@ private:
                                       static_cast<u32>(instr.txq.query_type.Value()));
                 }
                 }
-                --shader.scope;
-                shader.AddLine('}');
                 break;
             }
             case OpCode::Id::TMML: {
@@ -3099,17 +3079,18 @@ private:
                 const std::string sampler =
                     GetSampler(instr.sampler, texture_type, is_array, false);
 
-                // TODO: add coordinates for different samplers once other texture types are
+                const auto scope = shader.Scope();
+
+                // TODO: Add coordinates for different samplers once other texture types are
                 // implemented.
-                std::string coord;
                 switch (texture_type) {
                 case Tegra::Shader::TextureType::Texture1D: {
-                    coord = "float coords = " + x + ';';
+                    shader.AddLine("float coords = " + x + ';');
                     break;
                 }
                 case Tegra::Shader::TextureType::Texture2D: {
                     const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
                     break;
                 }
                 default:
@@ -3117,22 +3098,15 @@ private:
 
                     // Fallback to interpreting as a 2D texture for now
                     const std::string y = regs.GetRegisterAsFloat(instr.gpr8.Value() + 1);
-                    coord = "vec2 coords = vec2(" + x + ", " + y + ");";
+                    shader.AddLine("vec2 coords = vec2(" + x + ", " + y + ");");
                     texture_type = Tegra::Shader::TextureType::Texture2D;
                 }
-                // Add an extra scope and declare the texture coords inside to prevent
-                // overwriting them in case they are used as outputs of the texs instruction.
-                shader.AddLine('{');
-                ++shader.scope;
-                shader.AddLine(coord);
+
                 const std::string texture = "textureQueryLod(" + sampler + ", coords)";
-                const std::string tmp = "vec2 tmp = " + texture + "*vec2(256.0, 256.0);";
-                shader.AddLine(tmp);
+                shader.AddLine("vec2 tmp = " + texture + " * vec2(256.0, 256.0);");
 
                 regs.SetRegisterToInteger(instr.gpr0, true, 0, "int(tmp.y)", 1, 1);
                 regs.SetRegisterToInteger(instr.gpr0.Value() + 1, false, 0, "uint(tmp.x)", 1, 1);
-                --shader.scope;
-                shader.AddLine('}');
                 break;
             }
             default: {
@@ -3268,7 +3242,7 @@ private:
         }
         case OpCode::Type::PredicateSetRegister: {
             UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                 "Condition codes generation in PSET is not implemented");
+                                 "Condition codes generation in PSET is partially implemented");
 
             const std::string op_a =
                 GetPredicateCondition(instr.pset.pred12, instr.pset.neg_pred12 != 0);
@@ -3285,10 +3259,11 @@ private:
             const std::string result = '(' + predicate + ") " + combiner + " (" + second_pred + ')';
             if (instr.pset.bf == 0) {
                 const std::string value = '(' + result + ") ? 0xFFFFFFFF : 0";
-                regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1);
+                regs.SetRegisterToInteger(instr.gpr0, false, 0, value, 1, 1, false,
+                                          instr.generates_cc);
             } else {
                 const std::string value = '(' + result + ") ? 1.0 : 0.0";
-                regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1);
+                regs.SetRegisterToFloat(instr.gpr0, 0, value, 1, 1, false, instr.generates_cc);
             }
             break;
         }
@@ -3353,6 +3328,7 @@ private:
                     return std::to_string(instr.r2p.immediate_mask);
                 default:
                     UNREACHABLE();
+                    return std::to_string(instr.r2p.immediate_mask);
                 }
             }();
             const std::string mask = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) +
@@ -3404,14 +3380,11 @@ private:
                                           ") " + combiner + " (" + second_pred + "))";
 
             if (instr.fset.bf) {
-                regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1);
+                regs.SetRegisterToFloat(instr.gpr0, 0, predicate + " ? 1.0 : 0.0", 1, 1, false,
+                                        instr.generates_cc);
             } else {
                 regs.SetRegisterToInteger(instr.gpr0, false, 0, predicate + " ? 0xFFFFFFFF : 0", 1,
-                                          1);
-            }
-            if (instr.generates_cc.Value() != 0) {
-                regs.SetInternalFlag(InternalFlag::ZeroFlag, predicate);
-                LOG_WARNING(HW_GPU, "FSET Condition Code is incomplete");
+                                          1, false, instr.generates_cc);
             }
             break;
         }
@@ -3498,7 +3471,7 @@ private:
             UNIMPLEMENTED_IF(instr.xmad.sign_a);
             UNIMPLEMENTED_IF(instr.xmad.sign_b);
             UNIMPLEMENTED_IF_MSG(instr.generates_cc,
-                                 "Condition codes generation in XMAD is not implemented");
+                                 "Condition codes generation in XMAD is partially implemented");
 
             std::string op_a{regs.GetRegisterAsInteger(instr.gpr8, 0, instr.xmad.sign_a)};
             std::string op_b;
@@ -3584,7 +3557,8 @@ private:
                 sum = "((" + sum + " & 0xFFFF) | (" + src2 + "<< 16))";
             }
 
-            regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1);
+            regs.SetRegisterToInteger(instr.gpr0, is_signed, 0, sum, 1, 1, false,
+                                      instr.generates_cc);
             break;
         }
         default: {
@@ -3788,8 +3762,7 @@ private:
                 }
 
                 regs.SetRegisterToInteger(instr.gpr0, result_signed, 1, result, 1, 1,
-                                          instr.vmad.saturate == 1, 0, Register::Size::Word,
-                                          instr.vmad.cc);
+                                          instr.vmad.saturate, instr.vmad.cc);
                 break;
             }
             case OpCode::Id::VSETP: {
@@ -3816,7 +3789,10 @@ private:
                 }
                 break;
             }
-            default: { UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName()); }
+            default: {
+                UNIMPLEMENTED_MSG("Unhandled instruction: {}", opcode->get().GetName());
+                break;
+            }
             }
 
             break;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 23ed91e27..5d0819dc5 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -2,6 +2,7 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <fmt/format.h>
 #include "common/assert.h"
 #include "video_core/engines/maxwell_3d.h"
 #include "video_core/renderer_opengl/gl_shader_decompiler.h"
@@ -16,6 +17,8 @@ static constexpr u32 PROGRAM_OFFSET{10};
 ProgramResult GenerateVertexShader(const ShaderSetup& setup) {
     std::string out = "#version 430 core\n";
     out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+    out += "// Shader Unique Id: VS" + id + "\n\n";
     out += Decompiler::GetCommonDeclarations();
 
     out += R"(
@@ -84,6 +87,8 @@ void main() {
 ProgramResult GenerateGeometryShader(const ShaderSetup& setup) {
     // Version is intentionally skipped in shader generation, it's added by the lazy compilation.
     std::string out = "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+    out += "// Shader Unique Id: GS" + id + "\n\n";
     out += Decompiler::GetCommonDeclarations();
     out += "bool exec_geometry();\n";
 
@@ -117,6 +122,8 @@ void main() {
 ProgramResult GenerateFragmentShader(const ShaderSetup& setup) {
     std::string out = "#version 430 core\n";
     out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
+    const std::string id = fmt::format("{:016x}", setup.program.unique_identifier);
+    out += "// Shader Unique Id: FS" + id + "\n\n";
     out += Decompiler::GetCommonDeclarations();
     out += "bool exec_fragment();\n";
 
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index 4fa6d7612..fcc20d3b4 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -177,6 +177,9 @@ struct ShaderSetup {
     struct {
         ProgramCode code;
         ProgramCode code_b; // Used for dual vertex shaders
+        u64 unique_identifier;
+        std::size_t real_size;
+        std::size_t real_size_b;
     } program;
 
     /// Used in scenarios where we have a dual vertex shaders
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 4fd0d66c5..235732d86 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -138,7 +138,12 @@ void RendererOpenGL::SwapBuffers(
 
         // Load the framebuffer from memory, draw it to the screen, and swap buffers
         LoadFBToScreenInfo(*framebuffer);
-        DrawScreen();
+
+        if (renderer_settings.screenshot_requested)
+            CaptureScreenshot();
+
+        DrawScreen(render_window.GetFramebufferLayout());
+
         render_window.SwapBuffers();
     }
 
@@ -383,14 +388,13 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
 /**
  * Draws the emulated screens to the emulator window.
  */
-void RendererOpenGL::DrawScreen() {
+void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
     if (renderer_settings.set_background_color) {
         // Update background color before drawing
         glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
                      0.0f);
     }
 
-    const auto& layout = render_window.GetFramebufferLayout();
     const auto& screen = layout.screen;
 
     glViewport(0, 0, layout.width, layout.height);
@@ -414,6 +418,37 @@ void RendererOpenGL::DrawScreen() {
 /// Updates the framerate
 void RendererOpenGL::UpdateFramerate() {}
 
+void RendererOpenGL::CaptureScreenshot() {
+    // Draw the current frame to the screenshot framebuffer
+    screenshot_framebuffer.Create();
+    GLuint old_read_fb = state.draw.read_framebuffer;
+    GLuint old_draw_fb = state.draw.draw_framebuffer;
+    state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
+    state.Apply();
+
+    Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
+
+    GLuint renderbuffer;
+    glGenRenderbuffers(1, &renderbuffer);
+    glBindRenderbuffer(GL_RENDERBUFFER, renderbuffer);
+    glRenderbufferStorage(GL_RENDERBUFFER, GL_RGB8, layout.width, layout.height);
+    glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, renderbuffer);
+
+    DrawScreen(layout);
+
+    glReadPixels(0, 0, layout.width, layout.height, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV,
+                 renderer_settings.screenshot_bits);
+
+    screenshot_framebuffer.Release();
+    state.draw.read_framebuffer = old_read_fb;
+    state.draw.draw_framebuffer = old_draw_fb;
+    state.Apply();
+    glDeleteRenderbuffers(1, &renderbuffer);
+
+    renderer_settings.screenshot_complete_callback();
+    renderer_settings.screenshot_requested = false;
+}
+
 static const char* GetSource(GLenum source) {
 #define RET(s)                                                                                     \
     case GL_DEBUG_SOURCE_##s:                                                                      \
@@ -427,6 +462,7 @@ static const char* GetSource(GLenum source) {
         RET(OTHER);
     default:
         UNREACHABLE();
+        return "Unknown source";
     }
 #undef RET
 }
@@ -445,6 +481,7 @@ static const char* GetType(GLenum type) {
         RET(MARKER);
     default:
         UNREACHABLE();
+        return "Unknown type";
     }
 #undef RET
 }
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index c0868c0e4..b85cc262f 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -16,6 +16,10 @@ namespace Core::Frontend {
 class EmuWindow;
 }
 
+namespace Layout {
+struct FramebufferLayout;
+}
+
 namespace OpenGL {
 
 /// Structure used for storing information about the textures for the Switch screen
@@ -66,10 +70,12 @@ private:
 
     void ConfigureFramebufferTexture(TextureInfo& texture,
                                      const Tegra::FramebufferConfig& framebuffer);
-    void DrawScreen();
+    void DrawScreen(const Layout::FramebufferLayout& layout);
     void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
     void UpdateFramerate();
 
+    void CaptureScreenshot();
+
     // Loads framebuffer from emulated memory into the display information structure
     void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
     // Fills active OpenGL texture with the given RGBA color.
@@ -82,6 +88,7 @@ private:
     OGLVertexArray vertex_array;
     OGLBuffer vertex_buffer;
     OGLProgram shader;
+    OGLFramebuffer screenshot_framebuffer;
 
     /// Display information for Switch screen
     ScreenInfo screen_info;