13 files changed, 311 insertions, 114 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 3bca16364..dfbf80abd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -398,27 +398,6 @@ u32 Maxwell3D::GetRegisterValue(u32 method) const {
     return regs.reg_array[method];
 }
 
-bool Maxwell3D::IsShaderStageEnabled(Regs::ShaderStage stage) const {
-    // The Vertex stage is always enabled.
-    if (stage == Regs::ShaderStage::Vertex)
-        return true;
-
-    switch (stage) {
-    case Regs::ShaderStage::TesselationControl:
-        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationControl)]
-                   .enable != 0;
-    case Regs::ShaderStage::TesselationEval:
-        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::TesselationEval)]
-                   .enable != 0;
-    case Regs::ShaderStage::Geometry:
-        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Geometry)].enable != 0;
-    case Regs::ShaderStage::Fragment:
-        return regs.shader_config[static_cast<size_t>(Regs::ShaderProgram::Fragment)].enable != 0;
-    }
-
-    UNREACHABLE();
-}
-
 void Maxwell3D::ProcessClearBuffers() {
     ASSERT(regs.clear_buffers.R == regs.clear_buffers.G &&
            regs.clear_buffers.R == regs.clear_buffers.B &&
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 5a7cf0107..6f0170ff7 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -379,6 +379,14 @@ public:
             }
         };
 
+        bool IsShaderConfigEnabled(size_t index) const {
+            // The VertexB is always enabled.
+            if (index == static_cast<size_t>(Regs::ShaderProgram::VertexB)) {
+                return true;
+            }
+            return shader_config[index].enable != 0;
+        }
+
         union {
             struct {
                 INSERT_PADDING_WORDS(0x45);
@@ -780,9 +788,6 @@ public:
     /// Returns the texture information for a specific texture in a specific shader stage.
     Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, size_t offset) const;
 
-    /// Returns whether the specified shader stage is enabled or not.
-    bool IsShaderStageEnabled(Regs::ShaderStage stage) const;
-
 private:
     std::unordered_map<u32, std::vector<u32>> uploaded_macros;
 
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index 2bc1782ad..65fa1495f 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -142,6 +142,7 @@ enum class PredCondition : u64 {
     GreaterThan = 4,
     NotEqual = 5,
     GreaterEqual = 6,
+    LessThanWithNan = 9,
     NotEqualWithNan = 13,
     // TODO(Subv): Other condition types
 };
@@ -201,6 +202,11 @@ enum class IMinMaxExchange : u64 {
     XHi = 3,
 };
 
+enum class FlowCondition : u64 {
+    Always = 0xF,
+    Fcsm_Tr = 0x1C, // TODO(bunnei): What is this used for?
+};
+
 union Instruction {
     Instruction& operator=(const Instruction& instr) {
         value = instr.value;
@@ -298,6 +304,13 @@ union Instruction {
     } iadd32i;
 
     union {
+        BitField<53, 1, u64> negate_b;
+        BitField<54, 1, u64> abs_a;
+        BitField<56, 1, u64> negate_a;
+        BitField<57, 1, u64> abs_b;
+    } fadd32i;
+
+    union {
         BitField<20, 8, u64> shift_position;
         BitField<28, 8, u64> shift_length;
         BitField<48, 1, u64> negate_b;
@@ -309,6 +322,10 @@ union Instruction {
     } bfe;
 
     union {
+        BitField<0, 5, FlowCondition> cond;
+    } flow;
+
+    union {
         BitField<48, 1, u64> negate_b;
         BitField<49, 1, u64> negate_c;
     } ffma;
@@ -487,6 +504,7 @@ public:
         FADD_C,
         FADD_R,
         FADD_IMM,
+        FADD32I,
         FMUL_C,
         FMUL_R,
         FMUL_IMM,
@@ -679,13 +697,14 @@ private:
             INST("1101101---------", Id::TLDS, Type::Memory, "TLDS"),
             INST("111000110000----", Id::EXIT, Type::Trivial, "EXIT"),
             INST("11100000--------", Id::IPA, Type::Trivial, "IPA"),
-            INST("001100101-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
+            INST("0011001-1-------", Id::FFMA_IMM, Type::Ffma, "FFMA_IMM"),
             INST("010010011-------", Id::FFMA_CR, Type::Ffma, "FFMA_CR"),
             INST("010100011-------", Id::FFMA_RC, Type::Ffma, "FFMA_RC"),
             INST("010110011-------", Id::FFMA_RR, Type::Ffma, "FFMA_RR"),
             INST("0100110001011---", Id::FADD_C, Type::Arithmetic, "FADD_C"),
             INST("0101110001011---", Id::FADD_R, Type::Arithmetic, "FADD_R"),
             INST("0011100-01011---", Id::FADD_IMM, Type::Arithmetic, "FADD_IMM"),
+            INST("000010----------", Id::FADD32I, Type::ArithmeticImmediate, "FADD32I"),
             INST("0100110001101---", Id::FMUL_C, Type::Arithmetic, "FMUL_C"),
             INST("0101110001101---", Id::FMUL_R, Type::Arithmetic, "FMUL_R"),
             INST("0011100-01101---", Id::FMUL_IMM, Type::Arithmetic, "FMUL_IMM"),
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ea138d402..eecbc5ff0 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -15,6 +15,7 @@
 #include "common/microprofile.h"
 #include "common/scope_exit.h"
 #include "core/core.h"
+#include "core/frontend/emu_window.h"
 #include "core/hle/kernel/process.h"
 #include "core/settings.h"
 #include "video_core/engines/maxwell_3d.h"
@@ -22,6 +23,7 @@
 #include "video_core/renderer_opengl/gl_shader_gen.h"
 #include "video_core/renderer_opengl/maxwell_to_gl.h"
 #include "video_core/renderer_opengl/renderer_opengl.h"
+#include "video_core/video_core.h"
 
 using Maxwell = Tegra::Engines::Maxwell3D::Regs;
 using PixelFormat = SurfaceParams::PixelFormat;
@@ -181,6 +183,19 @@ std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr,
     return {array_ptr, buffer_offset};
 }
 
+static GLShader::ProgramCode GetShaderProgramCode(Maxwell::ShaderProgram program) {
+    auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
+
+    // Fetch program code from memory
+    GLShader::ProgramCode program_code;
+    auto& shader_config = gpu.regs.shader_config[static_cast<size_t>(program)];
+    const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
+    const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
+    Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
+
+    return program_code;
+}
+
 void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
     // Helper function for uploading uniform data
     const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) {
@@ -193,26 +208,23 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
     };
 
     auto& gpu = Core::System().GetInstance().GPU().Maxwell3D();
-    ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!");
 
     // Next available bindpoints to use when uploading the const buffers and textures to the GLSL
     // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points.
     u32 current_constbuffer_bindpoint = uniform_buffers.size();
     u32 current_texture_bindpoint = 0;
 
-    for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) {
+    for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
         auto& shader_config = gpu.regs.shader_config[index];
         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)};
 
-        const auto& stage = index - 1; // Stage indices are 0 - 5
-
-        const bool is_enabled = gpu.IsShaderStageEnabled(static_cast<Maxwell::ShaderStage>(stage));
-
         // Skip stages that are not enabled
-        if (!is_enabled) {
+        if (!gpu.regs.IsShaderConfigEnabled(index)) {
             continue;
         }
 
+        const size_t stage{index == 0 ? 0 : index - 1}; // Stage indices are 0 - 5
+
         GLShader::MaxwellUniformData ubo{};
         ubo.SetFromRegs(gpu.state.shader_stages[stage]);
         std::memcpy(buffer_ptr, &ubo, sizeof(ubo));
@@ -228,16 +240,21 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
         buffer_ptr += sizeof(GLShader::MaxwellUniformData);
         buffer_offset += sizeof(GLShader::MaxwellUniformData);
 
-        // Fetch program code from memory
-        GLShader::ProgramCode program_code;
-        const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset};
-        const boost::optional<VAddr> cpu_address{gpu.memory_manager.GpuToCpuAddress(gpu_address)};
-        Memory::ReadBlock(*cpu_address, program_code.data(), program_code.size() * sizeof(u64));
-        GLShader::ShaderSetup setup{std::move(program_code)};
-
+        GLShader::ShaderSetup setup{GetShaderProgramCode(program)};
         GLShader::ShaderEntries shader_resources;
 
         switch (program) {
+        case Maxwell::ShaderProgram::VertexA: {
+            // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
+            // Conventional HW does not support this, so we combine VertexA and VertexB into one
+            // stage here.
+            setup.SetProgramB(GetShaderProgramCode(Maxwell::ShaderProgram::VertexB));
+            GLShader::MaxwellVSConfig vs_config{setup};
+            shader_resources =
+                shader_program_manager->UseProgrammableVertexShader(vs_config, setup);
+            break;
+        }
+
         case Maxwell::ShaderProgram::VertexB: {
             GLShader::MaxwellVSConfig vs_config{setup};
             shader_resources =
@@ -268,6 +285,12 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) {
         current_texture_bindpoint =
             SetupTextures(static_cast<Maxwell::ShaderStage>(stage), gl_stage_program,
                           current_texture_bindpoint, shader_resources.texture_samplers);
+
+        // When VertexA is enabled, we have dual vertex shaders
+        if (program == Maxwell::ShaderProgram::VertexA) {
+            // VertexB was combined with VertexA, so we skip the VertexB iteration
+            index++;
+        }
     }
 
     shader_program_manager->UseTrivialGeometryShader();
@@ -301,9 +324,6 @@ std::pair<Surface, Surface> RasterizerOpenGL::ConfigureFramebuffers(bool using_c
                                                                     bool using_depth_fb) {
     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
 
-    // Sync the depth test state before configuring the framebuffer surfaces.
-    SyncDepthTestState();
-
     // TODO(bunnei): Implement this
     const bool has_stencil = false;
 
@@ -368,11 +388,20 @@ void RasterizerOpenGL::Clear() {
     if (regs.clear_buffers.Z) {
         clear_mask |= GL_DEPTH_BUFFER_BIT;
         use_depth_fb = true;
+
+        // Always enable the depth write when clearing the depth buffer. The depth write mask is
+        // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to true.
+        state.depth.test_enabled = true;
+        state.depth.write_mask = GL_TRUE;
+        state.depth.test_func = GL_ALWAYS;
+        state.Apply();
     }
 
     if (clear_mask == 0)
         return;
 
+    ScopeAcquireGLContext acquire_context;
+
     auto [dirty_color_surface, dirty_depth_surface] =
         ConfigureFramebuffers(use_color_fb, use_depth_fb);
 
@@ -399,9 +428,12 @@ void RasterizerOpenGL::DrawArrays() {
     MICROPROFILE_SCOPE(OpenGL_Drawing);
     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
 
+    ScopeAcquireGLContext acquire_context;
+
     auto [dirty_color_surface, dirty_depth_surface] =
         ConfigureFramebuffers(true, regs.zeta.Address() != 0);
 
+    SyncDepthTestState();
     SyncBlendState();
     SyncCullMode();
 
@@ -605,9 +637,6 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr
     auto& gpu = Core::System::GetInstance().GPU();
     auto& maxwell3d = gpu.Get3DEngine();
 
-    ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
-               "Attempted to upload constbuffer of disabled shader stage");
-
     // Reset all buffer draw state for this stage.
     for (auto& buffer : state.draw.const_buffers[static_cast<size_t>(stage)]) {
         buffer.bindpoint = 0;
@@ -674,9 +703,6 @@ u32 RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, GLuint program,
     auto& gpu = Core::System::GetInstance().GPU();
     auto& maxwell3d = gpu.Get3DEngine();
 
-    ASSERT_MSG(maxwell3d.IsShaderStageEnabled(stage),
-               "Attempted to upload textures of disabled shader stage");
-
     ASSERT_MSG(current_unit + entries.size() <= std::size(state.texture_units),
                "Exceeded the number of active textures.");
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 323ff7408..c171c4c5b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -105,6 +105,7 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_COMPRESSED_RGBA_BPTC_UNORM_ARB, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, ComponentType::UNorm,
      true},                                                             // BC7U
     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, ComponentType::UNorm, false}, // ASTC_2D_4X4
+    {GL_RG8, GL_RG, GL_UNSIGNED_BYTE, ComponentType::UNorm, false},     // G8R8
 
     // DepthStencil formats
     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
@@ -112,6 +113,8 @@ static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_form
     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, ComponentType::UNorm,
      false},                                                                            // S8Z24
     {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT, ComponentType::Float, false}, // Z32F
+    {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, ComponentType::UNorm,
+     false}, // Z16
 }};
 
 static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
@@ -194,8 +197,9 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         MortonCopy<true, PixelFormat::DXT1>,         MortonCopy<true, PixelFormat::DXT23>,
         MortonCopy<true, PixelFormat::DXT45>,        MortonCopy<true, PixelFormat::DXN1>,
         MortonCopy<true, PixelFormat::BC7U>,         MortonCopy<true, PixelFormat::ASTC_2D_4X4>,
-        MortonCopy<true, PixelFormat::Z24S8>,        MortonCopy<true, PixelFormat::S8Z24>,
-        MortonCopy<true, PixelFormat::Z32F>,
+        MortonCopy<true, PixelFormat::G8R8>,         MortonCopy<true, PixelFormat::Z24S8>,
+        MortonCopy<true, PixelFormat::S8Z24>,        MortonCopy<true, PixelFormat::Z32F>,
+        MortonCopy<true, PixelFormat::Z16>,
 };
 
 static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
@@ -215,10 +219,12 @@ static constexpr std::array<void (*)(u32, u32, u32, u8*, Tegra::GPUVAddr),
         nullptr,
         nullptr,
         nullptr,
-        MortonCopy<false, PixelFormat::ABGR8>,
+        nullptr,
+        MortonCopy<false, PixelFormat::G8R8>,
         MortonCopy<false, PixelFormat::Z24S8>,
         MortonCopy<false, PixelFormat::S8Z24>,
         MortonCopy<false, PixelFormat::Z32F>,
+        MortonCopy<false, PixelFormat::Z16>,
 };
 
 // Allocate an uninitialized texture of appropriate size and format for the surface
@@ -271,9 +277,10 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
 
     S8Z24 input_pixel{};
     Z24S8 output_pixel{};
+    const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::S8Z24)};
     for (size_t y = 0; y < height; ++y) {
         for (size_t x = 0; x < width; ++x) {
-            const size_t offset{y * width + x};
+            const size_t offset{bpp * (y * width + x)};
             std::memcpy(&input_pixel, &data[offset], sizeof(S8Z24));
             output_pixel.s8.Assign(input_pixel.s8);
             output_pixel.z24.Assign(input_pixel.z24);
@@ -281,6 +288,19 @@ static void ConvertS8Z24ToZ24S8(std::vector<u8>& data, u32 width, u32 height) {
         }
     }
 }
+
+static void ConvertG8R8ToR8G8(std::vector<u8>& data, u32 width, u32 height) {
+    const auto bpp{CachedSurface::GetGLBytesPerPixel(PixelFormat::G8R8)};
+    for (size_t y = 0; y < height; ++y) {
+        for (size_t x = 0; x < width; ++x) {
+            const size_t offset{bpp * (y * width + x)};
+            const u8 temp{data[offset]};
+            data[offset] = data[offset + 1];
+            data[offset + 1] = temp;
+        }
+    }
+}
+
 /**
  * Helper function to perform software conversion (as needed) when loading a buffer from Switch
  * memory. This is for Maxwell pixel formats that cannot be represented as-is in OpenGL or with
@@ -301,6 +321,11 @@ static void ConvertFormatAsNeeded_LoadGLBuffer(std::vector<u8>& data, PixelForma
         // Convert the S8Z24 depth format to Z24S8, as OpenGL does not support S8Z24.
         ConvertS8Z24ToZ24S8(data, width, height);
         break;
+
+    case PixelFormat::G8R8:
+        // Convert the G8R8 color format to R8G8, as OpenGL does not support G8R8.
+        ConvertG8R8ToR8G8(data, width, height);
+        break;
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index 1bedae992..718c45ce1 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -37,13 +37,15 @@ struct SurfaceParams {
         DXN1 = 11, // This is also known as BC4
         BC7U = 12,
         ASTC_2D_4X4 = 13,
+        G8R8 = 14,
 
         MaxColorFormat,
 
         // DepthStencil formats
-        Z24S8 = 14,
-        S8Z24 = 15,
-        Z32F = 16,
+        Z24S8 = 15,
+        S8Z24 = 16,
+        Z32F = 17,
+        Z16 = 18,
 
         MaxDepthStencilFormat,
 
@@ -95,9 +97,11 @@ struct SurfaceParams {
             4, // DXN1
             4, // BC7U
             4, // ASTC_2D_4X4
+            1, // G8R8
             1, // Z24S8
             1, // S8Z24
             1, // Z32F
+            1, // Z16
         }};
 
         ASSERT(static_cast<size_t>(format) < compression_factor_table.size());
@@ -123,9 +127,11 @@ struct SurfaceParams {
             64,  // DXN1
             128, // BC7U
             32,  // ASTC_2D_4X4
+            16,  // G8R8
             32,  // Z24S8
             32,  // S8Z24
             32,  // Z32F
+            16,  // Z16
         }};
 
         ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -143,6 +149,8 @@ struct SurfaceParams {
             return PixelFormat::Z24S8;
         case Tegra::DepthFormat::Z32_FLOAT:
             return PixelFormat::Z32F;
+        case Tegra::DepthFormat::Z16_UNORM:
+            return PixelFormat::Z16;
         default:
             LOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
@@ -181,6 +189,8 @@ struct SurfaceParams {
             return PixelFormat::A1B5G5R5;
         case Tegra::Texture::TextureFormat::R8:
             return PixelFormat::R8;
+        case Tegra::Texture::TextureFormat::G8R8:
+            return PixelFormat::G8R8;
         case Tegra::Texture::TextureFormat::R16_G16_B16_A16:
             return PixelFormat::RGBA16F;
         case Tegra::Texture::TextureFormat::BF10GF11RF11:
@@ -218,6 +228,8 @@ struct SurfaceParams {
             return Tegra::Texture::TextureFormat::A1B5G5R5;
         case PixelFormat::R8:
             return Tegra::Texture::TextureFormat::R8;
+        case PixelFormat::G8R8:
+            return Tegra::Texture::TextureFormat::G8R8;
         case PixelFormat::RGBA16F:
             return Tegra::Texture::TextureFormat::R16_G16_B16_A16;
         case PixelFormat::R11FG11FB10F:
@@ -249,6 +261,8 @@ struct SurfaceParams {
             return Tegra::DepthFormat::Z24_S8_UNORM;
         case PixelFormat::Z32F:
             return Tegra::DepthFormat::Z32_FLOAT;
+        case PixelFormat::Z16:
+            return Tegra::DepthFormat::Z16_UNORM;
         default:
             UNREACHABLE();
         }
@@ -295,6 +309,7 @@ struct SurfaceParams {
 
     static ComponentType ComponentTypeFromDepthFormat(Tegra::DepthFormat format) {
         switch (format) {
+        case Tegra::DepthFormat::Z16_UNORM:
         case Tegra::DepthFormat::S8_Z24_UNORM:
         case Tegra::DepthFormat::Z24_S8_UNORM:
             return ComponentType::UNorm;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 5914077e8..5fae95788 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -42,13 +42,14 @@ enum class ExitMethod {
 struct Subroutine {
     /// Generates a name suitable for GLSL source code.
     std::string GetName() const {
-        return "sub_" + std::to_string(begin) + '_' + std::to_string(end);
+        return "sub_" + std::to_string(begin) + '_' + std::to_string(end) + '_' + suffix;
     }
 
-    u32 begin;              ///< Entry point of the subroutine.
-    u32 end;                ///< Return point of the subroutine.
-    ExitMethod exit_method; ///< Exit method of the subroutine.
-    std::set<u32> labels;   ///< Addresses refereced by JMP instructions.
+    u32 begin;                 ///< Entry point of the subroutine.
+    u32 end;                   ///< Return point of the subroutine.
+    const std::string& suffix; ///< Suffix of the shader, used to make a unique subroutine name
+    ExitMethod exit_method;    ///< Exit method of the subroutine.
+    std::set<u32> labels;      ///< Addresses refereced by JMP instructions.
 
     bool operator<(const Subroutine& rhs) const {
         return std::tie(begin, end) < std::tie(rhs.begin, rhs.end);
@@ -58,11 +59,11 @@ struct Subroutine {
 /// Analyzes shader code and produces a set of subroutines.
 class ControlFlowAnalyzer {
 public:
-    ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset)
+    ControlFlowAnalyzer(const ProgramCode& program_code, u32 main_offset, const std::string& suffix)
         : program_code(program_code) {
 
         // Recursively finds all subroutines.
-        const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END);
+        const Subroutine& program_main = AddSubroutine(main_offset, PROGRAM_END, suffix);
         if (program_main.exit_method != ExitMethod::AlwaysEnd)
             throw DecompileFail("Program does not always end");
     }
@@ -77,12 +78,12 @@ private:
     std::map<std::pair<u32, u32>, ExitMethod> exit_method_map;
 
     /// Adds and analyzes a new subroutine if it is not added yet.
-    const Subroutine& AddSubroutine(u32 begin, u32 end) {
-        auto iter = subroutines.find(Subroutine{begin, end});
+    const Subroutine& AddSubroutine(u32 begin, u32 end, const std::string& suffix) {
+        auto iter = subroutines.find(Subroutine{begin, end, suffix});
         if (iter != subroutines.end())
             return *iter;
 
-        Subroutine subroutine{begin, end};
+        Subroutine subroutine{begin, end, suffix};
         subroutine.exit_method = Scan(begin, end, subroutine.labels);
         if (subroutine.exit_method == ExitMethod::Undetermined)
             throw DecompileFail("Recursive function detected");
@@ -191,7 +192,8 @@ public:
         UnsignedInteger,
     };
 
-    GLSLRegister(size_t index, ShaderWriter& shader) : index{index}, shader{shader} {}
+    GLSLRegister(size_t index, ShaderWriter& shader, const std::string& suffix)
+        : index{index}, shader{shader}, suffix{suffix} {}
 
     /// Gets the GLSL type string for a register
     static std::string GetTypeString(Type type) {
@@ -216,7 +218,7 @@ public:
     /// Returns a GLSL string representing the current state of the register
     const std::string GetActiveString() {
         declr_type.insert(active_type);
-        return GetPrefixString(active_type) + std::to_string(index);
+        return GetPrefixString(active_type) + std::to_string(index) + '_' + suffix;
     }
 
     /// Returns true if the active type is a float
@@ -251,6 +253,7 @@ private:
     ShaderWriter& shader;
     Type active_type{Type::Float};
     std::set<Type> declr_type;
+    const std::string& suffix;
 };
 
 /**
@@ -262,8 +265,8 @@ private:
 class GLSLRegisterManager {
 public:
     GLSLRegisterManager(ShaderWriter& shader, ShaderWriter& declarations,
-                        const Maxwell3D::Regs::ShaderStage& stage)
-        : shader{shader}, declarations{declarations}, stage{stage} {
+                        const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix)
+        : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix} {
         BuildRegisterList();
     }
 
@@ -430,12 +433,12 @@ public:
     }
 
     /// Add declarations for registers
-    void GenerateDeclarations() {
+    void GenerateDeclarations(const std::string& suffix) {
         for (const auto& reg : regs) {
             for (const auto& type : reg.DeclaredTypes()) {
                 declarations.AddLine(GLSLRegister::GetTypeString(type) + ' ' +
-                                     GLSLRegister::GetPrefixString(type) +
-                                     std::to_string(reg.GetIndex()) + " = 0;");
+                                     reg.GetPrefixString(type) + std::to_string(reg.GetIndex()) +
+                                     '_' + suffix + " = 0;");
             }
         }
         declarations.AddNewLine();
@@ -558,7 +561,7 @@ private:
     /// Build the GLSL register list.
     void BuildRegisterList() {
         for (size_t index = 0; index < Register::NumRegisters; ++index) {
-            regs.emplace_back(index, shader);
+            regs.emplace_back(index, shader, suffix);
         }
     }
 
@@ -620,16 +623,17 @@ private:
     std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
     std::vector<SamplerEntry> used_samplers;
     const Maxwell3D::Regs::ShaderStage& stage;
+    const std::string& suffix;
 };
 
 class GLSLGenerator {
 public:
     GLSLGenerator(const std::set<Subroutine>& subroutines, const ProgramCode& program_code,
-                  u32 main_offset, Maxwell3D::Regs::ShaderStage stage)
+                  u32 main_offset, Maxwell3D::Regs::ShaderStage stage, const std::string& suffix)
         : subroutines(subroutines), program_code(program_code), main_offset(main_offset),
-          stage(stage) {
+          stage(stage), suffix(suffix) {
 
-        Generate();
+        Generate(suffix);
     }
 
     std::string GetShaderCode() {
@@ -644,7 +648,7 @@ public:
 private:
     /// Gets the Subroutine object corresponding to the specified address.
     const Subroutine& GetSubroutine(u32 begin, u32 end) const {
-        auto iter = subroutines.find(Subroutine{begin, end});
+        auto iter = subroutines.find(Subroutine{begin, end, suffix});
         ASSERT(iter != subroutines.end());
         return *iter;
     }
@@ -689,7 +693,7 @@ private:
         // Can't assign to the constant predicate.
         ASSERT(pred != static_cast<u64>(Pred::UnusedIndex));
 
-        std::string variable = 'p' + std::to_string(pred);
+        std::string variable = 'p' + std::to_string(pred) + '_' + suffix;
         shader.AddLine(variable + " = " + value + ';');
         declr_predicates.insert(std::move(variable));
     }
@@ -707,7 +711,7 @@ private:
         if (index == static_cast<u64>(Pred::UnusedIndex))
             variable = "true";
         else
-            variable = 'p' + std::to_string(index);
+            variable = 'p' + std::to_string(index) + '_' + suffix;
 
         if (negate) {
             return "!(" + variable + ')';
@@ -728,10 +732,10 @@ private:
                                        const std::string& op_a, const std::string& op_b) const {
         using Tegra::Shader::PredCondition;
         static const std::unordered_map<PredCondition, const char*> PredicateComparisonStrings = {
-            {PredCondition::LessThan, "<"},         {PredCondition::Equal, "=="},
-            {PredCondition::LessEqual, "<="},       {PredCondition::GreaterThan, ">"},
-            {PredCondition::NotEqual, "!="},        {PredCondition::GreaterEqual, ">="},
-            {PredCondition::NotEqualWithNan, "!="},
+            {PredCondition::LessThan, "<"},        {PredCondition::Equal, "=="},
+            {PredCondition::LessEqual, "<="},      {PredCondition::GreaterThan, ">"},
+            {PredCondition::NotEqual, "!="},       {PredCondition::GreaterEqual, ">="},
+            {PredCondition::LessThanWithNan, "<"}, {PredCondition::NotEqualWithNan, "!="},
         };
 
         const auto& comparison{PredicateComparisonStrings.find(condition)};
@@ -739,7 +743,8 @@ private:
                    "Unknown predicate comparison operation");
 
         std::string predicate{'(' + op_a + ") " + comparison->second + " (" + op_b + ')'};
-        if (condition == PredCondition::NotEqualWithNan) {
+        if (condition == PredCondition::LessThanWithNan ||
+            condition == PredCondition::NotEqualWithNan) {
             predicate += " || isnan(" + op_a + ") || isnan(" + op_b + ')';
         }
 
@@ -968,6 +973,29 @@ private:
                     regs.GetRegisterAsFloat(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
                 break;
             }
+            case OpCode::Id::FADD32I: {
+                std::string op_a = regs.GetRegisterAsFloat(instr.gpr8);
+                std::string op_b = GetImmediate32(instr);
+
+                if (instr.fadd32i.abs_a) {
+                    op_a = "abs(" + op_a + ')';
+                }
+
+                if (instr.fadd32i.negate_a) {
+                    op_a = "-(" + op_a + ')';
+                }
+
+                if (instr.fadd32i.abs_b) {
+                    op_b = "abs(" + op_b + ')';
+                }
+
+                if (instr.fadd32i.negate_b) {
+                    op_b = "-(" + op_b + ')';
+                }
+
+                regs.SetRegisterToFloat(instr.gpr0, 0, op_a + " + " + op_b, 1, 1);
+                break;
+            }
             }
             break;
         }
@@ -1616,16 +1644,32 @@ private:
                     shader.AddLine("color.a = " + regs.GetRegisterAsFloat(3) + ';');
                 }
 
-                shader.AddLine("return true;");
-                if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
-                    // If this is an unconditional exit then just end processing here, otherwise
-                    // we have to account for the possibility of the condition not being met, so
-                    // continue processing the next instruction.
-                    offset = PROGRAM_END - 1;
+                switch (instr.flow.cond) {
+                case Tegra::Shader::FlowCondition::Always:
+                    shader.AddLine("return true;");
+                    if (instr.pred.pred_index == static_cast<u64>(Pred::UnusedIndex)) {
+                        // If this is an unconditional exit then just end processing here,
+                        // otherwise we have to account for the possibility of the condition
+                        // not being met, so continue processing the next instruction.
+                        offset = PROGRAM_END - 1;
+                    }
+                    break;
+
+                case Tegra::Shader::FlowCondition::Fcsm_Tr:
+                    // TODO(bunnei): What is this used for? If we assume this conditon is not
+                    // satisifed, dual vertex shaders in Farming Simulator make more sense
+                    LOG_CRITICAL(HW_GPU, "Skipping unknown FlowCondition::Fcsm_Tr");
+                    break;
+
+                default:
+                    LOG_CRITICAL(HW_GPU, "Unhandled flow condition: {}",
+                                 static_cast<u32>(instr.flow.cond.Value()));
+                    UNREACHABLE();
                 }
                 break;
             }
             case OpCode::Id::KIL: {
+                ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
                 shader.AddLine("discard;");
                 break;
             }
@@ -1646,8 +1690,9 @@ private:
                 // can ignore this when generating GLSL code.
                 break;
             }
-            case OpCode::Id::DEPBAR:
-            case OpCode::Id::SYNC: {
+            case OpCode::Id::SYNC:
+                ASSERT(instr.flow.cond == Tegra::Shader::FlowCondition::Always);
+            case OpCode::Id::DEPBAR: {
                 // TODO(Subv): Find out if we actually have to care about these instructions or if
                 // the GLSL compiler takes care of that for us.
                 LOG_WARNING(HW_GPU, "DEPBAR/SYNC instruction is stubbed");
@@ -1687,7 +1732,7 @@ private:
         return program_counter;
     }
 
-    void Generate() {
+    void Generate(const std::string& suffix) {
         // Add declarations for all subroutines
         for (const auto& subroutine : subroutines) {
             shader.AddLine("bool " + subroutine.GetName() + "();");
@@ -1695,7 +1740,7 @@ private:
         shader.AddNewLine();
 
         // Add the main entry point
-        shader.AddLine("bool exec_shader() {");
+        shader.AddLine("bool exec_" + suffix + "() {");
         ++shader.scope;
         CallSubroutine(GetSubroutine(main_offset, PROGRAM_END));
         --shader.scope;
@@ -1758,7 +1803,7 @@ private:
 
     /// Add declarations for registers
     void GenerateDeclarations() {
-        regs.GenerateDeclarations();
+        regs.GenerateDeclarations(suffix);
 
         for (const auto& pred : declr_predicates) {
             declarations.AddLine("bool " + pred + " = false;");
@@ -1771,27 +1816,30 @@ private:
     const ProgramCode& program_code;
     const u32 main_offset;
     Maxwell3D::Regs::ShaderStage stage;
+    const std::string& suffix;
 
     ShaderWriter shader;
     ShaderWriter declarations;
-    GLSLRegisterManager regs{shader, declarations, stage};
+    GLSLRegisterManager regs{shader, declarations, stage, suffix};
 
     // Declarations
     std::set<std::string> declr_predicates;
 }; // namespace Decompiler
 
 std::string GetCommonDeclarations() {
-    std::string declarations = "bool exec_shader();\n";
+    std::string declarations;
     declarations += "#define MAX_CONSTBUFFER_ELEMENTS " +
                     std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4)));
+    declarations += '\n';
     return declarations;
 }
 
 boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
-                                                Maxwell3D::Regs::ShaderStage stage) {
+                                                Maxwell3D::Regs::ShaderStage stage,
+                                                const std::string& suffix) {
     try {
-        auto subroutines = ControlFlowAnalyzer(program_code, main_offset).GetSubroutines();
-        GLSLGenerator generator(subroutines, program_code, main_offset, stage);
+        auto subroutines = ControlFlowAnalyzer(program_code, main_offset, suffix).GetSubroutines();
+        GLSLGenerator generator(subroutines, program_code, main_offset, stage, suffix);
         return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
     } catch (const DecompileFail& exception) {
         LOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 382c76b7a..7610dad3a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -20,7 +20,8 @@ using Tegra::Engines::Maxwell3D;
 std::string GetCommonDeclarations();
 
 boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code, u32 main_offset,
-                                                Maxwell3D::Regs::ShaderStage stage);
+                                                Maxwell3D::Regs::ShaderStage stage,
+                                                const std::string& suffix);
 
 } // namespace Decompiler
 } // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index c1e6fac9f..129c777d1 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -17,10 +17,17 @@ ProgramResult GenerateVertexShader(const ShaderSetup& setup, const MaxwellVSConf
     std::string out = "#version 430 core\n";
     out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
     out += Decompiler::GetCommonDeclarations();
+    out += "bool exec_vertex();\n";
+
+    if (setup.IsDualProgram()) {
+        out += "bool exec_vertex_b();\n";
+    }
+
+    ProgramResult program =
+        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
+                                     Maxwell3D::Regs::ShaderStage::Vertex, "vertex")
+            .get_value_or({});
 
-    ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
-                                                         Maxwell3D::Regs::ShaderStage::Vertex)
-                                .get_value_or({});
     out += R"(
 
 out gl_PerVertex {
@@ -34,7 +41,14 @@ layout (std140) uniform vs_config {
 };
 
 void main() {
-    exec_shader();
+    exec_vertex();
+)";
+
+    if (setup.IsDualProgram()) {
+        out += "    exec_vertex_b();";
+    }
+
+    out += R"(
 
     // Viewport can be flipped, which is unsupported by glViewport
     position.xy *= viewport_flip.xy;
@@ -44,8 +58,19 @@ void main() {
     // For now, this is here to bring order in lieu of proper emulation
     position.w = 1.0;
 }
+
 )";
+
     out += program.first;
+
+    if (setup.IsDualProgram()) {
+        ProgramResult program_b =
+            Decompiler::DecompileProgram(setup.program.code_b, PROGRAM_OFFSET,
+                                         Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b")
+                .get_value_or({});
+        out += program_b.first;
+    }
+
     return {out, program.second};
 }
 
@@ -53,12 +78,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
     std::string out = "#version 430 core\n";
     out += "#extension GL_ARB_separate_shader_objects : enable\n\n";
     out += Decompiler::GetCommonDeclarations();
+    out += "bool exec_fragment();\n";
 
-    ProgramResult program = Decompiler::DecompileProgram(setup.program_code, PROGRAM_OFFSET,
-                                                         Maxwell3D::Regs::ShaderStage::Fragment)
-                                .get_value_or({});
+    ProgramResult program =
+        Decompiler::DecompileProgram(setup.program.code, PROGRAM_OFFSET,
+                                     Maxwell3D::Regs::ShaderStage::Fragment, "fragment")
+            .get_value_or({});
     out += R"(
-
 in vec4 position;
 out vec4 color;
 
@@ -67,7 +93,7 @@ layout (std140) uniform fs_config {
 };
 
 void main() {
-    exec_shader();
+    exec_fragment();
 }
 
 )";
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index ed890e0f9..4729ce0fc 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -115,21 +115,48 @@ struct ShaderEntries {
 using ProgramResult = std::pair<std::string, ShaderEntries>;
 
 struct ShaderSetup {
-    ShaderSetup(ProgramCode&& program_code) : program_code(std::move(program_code)) {}
+    ShaderSetup(const ProgramCode& program_code) {
+        program.code = program_code;
+    }
+
+    struct {
+        ProgramCode code;
+        ProgramCode code_b; // Used for dual vertex shaders
+    } program;
 
-    ProgramCode program_code;
     bool program_code_hash_dirty = true;
 
     u64 GetProgramCodeHash() {
         if (program_code_hash_dirty) {
-            program_code_hash = Common::ComputeHash64(&program_code, sizeof(program_code));
+            program_code_hash = GetNewHash();
             program_code_hash_dirty = false;
         }
         return program_code_hash;
     }
 
+    /// Used in scenarios where we have a dual vertex shaders
+    void SetProgramB(const ProgramCode& program_b) {
+        program.code_b = program_b;
+        has_program_b = true;
+    }
+
+    bool IsDualProgram() const {
+        return has_program_b;
+    }
+
 private:
+    u64 GetNewHash() const {
+        if (has_program_b) {
+            // Compute hash over dual shader programs
+            return Common::ComputeHash64(&program, sizeof(program));
+        } else {
+            // Compute hash over a single shader program
+            return Common::ComputeHash64(&program.code, program.code.size());
+        }
+    }
+
     u64 program_code_hash{};
+    bool has_program_b{};
 };
 
 struct MaxwellShaderConfigCommon {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 00841e937..1930fa6ef 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -92,11 +92,24 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
     return matrix;
 }
 
+ScopeAcquireGLContext::ScopeAcquireGLContext() {
+    if (Settings::values.use_multi_core) {
+        VideoCore::g_emu_window->MakeCurrent();
+    }
+}
+ScopeAcquireGLContext::~ScopeAcquireGLContext() {
+    if (Settings::values.use_multi_core) {
+        VideoCore::g_emu_window->DoneCurrent();
+    }
+}
+
 RendererOpenGL::RendererOpenGL() = default;
 RendererOpenGL::~RendererOpenGL() = default;
 
 /// Swap buffers (render frame)
 void RendererOpenGL::SwapBuffers(boost::optional<const Tegra::FramebufferConfig&> framebuffer) {
+    ScopeAcquireGLContext acquire_context;
+
     Core::System::GetInstance().perf_stats.EndSystemFrame();
 
     // Maintain the rasterizer's state as a priority
@@ -418,7 +431,7 @@ static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum
 
 /// Initialize the renderer
 bool RendererOpenGL::Init() {
-    render_window->MakeCurrent();
+    ScopeAcquireGLContext acquire_context;
 
     if (GLAD_GL_KHR_debug) {
         glEnable(GL_DEBUG_OUTPUT);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index 21f0d298c..fd0267cf5 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -31,6 +31,13 @@ struct ScreenInfo {
     TextureInfo texture;
 };
 
+/// Helper class to acquire/release OpenGL context within a given scope
+class ScopeAcquireGLContext : NonCopyable {
+public:
+    ScopeAcquireGLContext();
+    ~ScopeAcquireGLContext();
+};
+
 class RendererOpenGL : public RendererBase {
 public:
     RendererOpenGL();
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index b3937b2fe..be18aa299 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -62,6 +62,7 @@ u32 BytesPerPixel(TextureFormat format) {
         return 4;
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
+    case TextureFormat::G8R8:
         return 2;
     case TextureFormat::R8:
         return 1;
@@ -77,6 +78,8 @@ u32 BytesPerPixel(TextureFormat format) {
 
 static u32 DepthBytesPerPixel(DepthFormat format) {
     switch (format) {
+    case DepthFormat::Z16_UNORM:
+        return 2;
     case DepthFormat::S8_Z24_UNORM:
     case DepthFormat::Z24_S8_UNORM:
     case DepthFormat::Z32_FLOAT:
@@ -110,6 +113,7 @@ std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width,
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
     case TextureFormat::R8:
+    case TextureFormat::G8R8:
     case TextureFormat::R16_G16_B16_A16:
     case TextureFormat::R32_G32_B32_A32:
     case TextureFormat::BF10GF11RF11:
@@ -133,6 +137,7 @@ std::vector<u8> UnswizzleDepthTexture(VAddr address, DepthFormat format, u32 wid
     std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
 
     switch (format) {
+    case DepthFormat::Z16_UNORM:
     case DepthFormat::S8_Z24_UNORM:
     case DepthFormat::Z24_S8_UNORM:
     case DepthFormat::Z32_FLOAT:
@@ -164,6 +169,7 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     case TextureFormat::A1B5G5R5:
     case TextureFormat::B5G6R5:
     case TextureFormat::R8:
+    case TextureFormat::G8R8:
     case TextureFormat::BF10GF11RF11:
     case TextureFormat::R32_G32_B32_A32:
         // TODO(Subv): For the time being just forward the same data without any decoding.