20 files changed, 760 insertions, 237 deletions
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 98ed11ec5..2a3ff234a 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -74,8 +74,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
 
     regs.reg_array[method] = value;
 
-#define MAXWELL3D_REG_INDEX(field_name) (offsetof(Regs, field_name) / sizeof(u32))
-
     switch (method) {
     case MAXWELL3D_REG_INDEX(code_address.code_address_high):
     case MAXWELL3D_REG_INDEX(code_address.code_address_low): {
@@ -136,7 +134,7 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
         break;
     }
 
-#undef MAXWELL3D_REG_INDEX
+    VideoCore::g_renderer->Rasterizer()->NotifyMaxwellRegisterChanged(method);
 
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
@@ -165,6 +163,7 @@ void Maxwell3D::ProcessQueryGet() {
 void Maxwell3D::DrawArrays() {
     LOG_DEBUG(HW_GPU, "called, topology=%d, count=%d", regs.draw.topology.Value(),
               regs.vertex_buffer.count);
+    ASSERT_MSG(!(regs.index_array.count && regs.vertex_buffer.count), "Both indexed and direct?");
 
     auto debug_context = Core::System::GetInstance().GetGPUDebugContext();
 
@@ -176,7 +175,8 @@ void Maxwell3D::DrawArrays() {
         debug_context->OnEvent(Tegra::DebugContext::Event::FinishedPrimitiveBatch, nullptr);
     }
 
-    VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(false /*is_indexed*/);
+    const bool is_indexed{regs.index_array.count && !regs.vertex_buffer.count};
+    VideoCore::g_renderer->Rasterizer()->AccelerateDrawBatch(is_indexed);
 }
 
 void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
@@ -218,10 +218,12 @@ Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
     Texture::TICEntry tic_entry;
     Memory::ReadBlock(tic_address_cpu, &tic_entry, sizeof(Texture::TICEntry));
 
-    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear,
-               "TIC versions other than BlockLinear are unimplemented");
+    ASSERT_MSG(tic_entry.header_version == Texture::TICHeaderVersion::BlockLinear ||
+                   tic_entry.header_version == Texture::TICHeaderVersion::Pitch,
+               "TIC versions other than BlockLinear or Pitch are unimplemented");
 
-    ASSERT_MSG(tic_entry.texture_type == Texture::TextureType::Texture2D,
+    ASSERT_MSG((tic_entry.texture_type == Texture::TextureType::Texture2D) ||
+                   (tic_entry.texture_type == Texture::TextureType::Texture2DNoMipmap),
                "Texture types other than Texture2D are unimplemented");
 
     auto r_type = tic_entry.r_type.Value();
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 1fae41cb2..d4fcedace 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -20,6 +20,9 @@
 namespace Tegra {
 namespace Engines {
 
+#define MAXWELL3D_REG_INDEX(field_name)                                                            \
+    (offsetof(Tegra::Engines::Maxwell3D::Regs, field_name) / sizeof(u32))
+
 class Maxwell3D final {
 public:
     explicit Maxwell3D(MemoryManager& memory_manager);
@@ -248,6 +251,52 @@ public:
             Patches = 0xe,
         };
 
+        enum class IndexFormat : u32 {
+            UnsignedByte = 0x0,
+            UnsignedShort = 0x1,
+            UnsignedInt = 0x2,
+        };
+
+        struct Blend {
+            enum class Equation : u32 {
+                Add = 1,
+                Subtract = 2,
+                ReverseSubtract = 3,
+                Min = 4,
+                Max = 5,
+            };
+
+            enum class Factor : u32 {
+                Zero = 0x1,
+                One = 0x2,
+                SourceColor = 0x3,
+                OneMinusSourceColor = 0x4,
+                SourceAlpha = 0x5,
+                OneMinusSourceAlpha = 0x6,
+                DestAlpha = 0x7,
+                OneMinusDestAlpha = 0x8,
+                DestColor = 0x9,
+                OneMinusDestColor = 0xa,
+                SourceAlphaSaturate = 0xb,
+                Source1Color = 0x10,
+                OneMinusSource1Color = 0x11,
+                Source1Alpha = 0x12,
+                OneMinusSource1Alpha = 0x13,
+                ConstantColor = 0x61,
+                OneMinusConstantColor = 0x62,
+                ConstantAlpha = 0x63,
+                OneMinusConstantAlpha = 0x64,
+            };
+
+            u32 separate_alpha;
+            Equation equation_rgb;
+            Factor factor_source_rgb;
+            Factor factor_dest_rgb;
+            Equation equation_a;
+            Factor factor_source_a;
+            Factor factor_dest_a;
+        };
+
         union {
             struct {
                 INSERT_PADDING_WORDS(0x200);
@@ -270,7 +319,15 @@ public:
                     }
                 } rt[NumRenderTargets];
 
-                INSERT_PADDING_WORDS(0x80);
+                struct {
+                    f32 scale_x;
+                    f32 scale_y;
+                    f32 scale_z;
+                    u32 translate_x;
+                    u32 translate_y;
+                    u32 translate_z;
+                    INSERT_PADDING_WORDS(2);
+                } viewport_transform[NumViewports];
 
                 struct {
                     union {
@@ -375,7 +432,42 @@ public:
                     };
                 } draw;
 
-                INSERT_PADDING_WORDS(0x139);
+                INSERT_PADDING_WORDS(0x6B);
+
+                struct {
+                    u32 start_addr_high;
+                    u32 start_addr_low;
+                    u32 end_addr_high;
+                    u32 end_addr_low;
+                    IndexFormat format;
+                    u32 first;
+                    u32 count;
+
+                    unsigned FormatSizeInBytes() const {
+                        switch (format) {
+                        case IndexFormat::UnsignedByte:
+                            return 1;
+                        case IndexFormat::UnsignedShort:
+                            return 2;
+                        case IndexFormat::UnsignedInt:
+                            return 4;
+                        }
+                        UNREACHABLE();
+                    }
+
+                    GPUVAddr StartAddress() const {
+                        return static_cast<GPUVAddr>(
+                            (static_cast<GPUVAddr>(start_addr_high) << 32) | start_addr_low);
+                    }
+
+                    GPUVAddr EndAddress() const {
+                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) |
+                                                     end_addr_low);
+                    }
+                } index_array;
+
+                INSERT_PADDING_WORDS(0xC7);
+
                 struct {
                     u32 query_address_high;
                     u32 query_address_low;
@@ -410,7 +502,9 @@ public:
                     }
                 } vertex_array[NumVertexArrays];
 
-                INSERT_PADDING_WORDS(0x40);
+                Blend blend;
+
+                INSERT_PADDING_WORDS(0x39);
 
                 struct {
                     u32 limit_high;
@@ -563,6 +657,7 @@ private:
                   "Field " #field_name " has invalid position")
 
 ASSERT_REG_POSITION(rt, 0x200);
+ASSERT_REG_POSITION(viewport_transform[0], 0x280);
 ASSERT_REG_POSITION(viewport, 0x300);
 ASSERT_REG_POSITION(vertex_buffer, 0x35D);
 ASSERT_REG_POSITION(zeta, 0x3F8);
@@ -572,8 +667,10 @@ ASSERT_REG_POSITION(tsc, 0x557);
 ASSERT_REG_POSITION(tic, 0x55D);
 ASSERT_REG_POSITION(code_address, 0x582);
 ASSERT_REG_POSITION(draw, 0x585);
+ASSERT_REG_POSITION(index_array, 0x5F2);
 ASSERT_REG_POSITION(query, 0x6C0);
 ASSERT_REG_POSITION(vertex_array[0], 0x700);
+ASSERT_REG_POSITION(blend, 0x780);
 ASSERT_REG_POSITION(vertex_array_limit[0], 0x7C0);
 ASSERT_REG_POSITION(shader_config[0], 0x800);
 ASSERT_REG_POSITION(const_buffer, 0x8E0);
diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h
index eff0c35a1..7cd125f05 100644
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <cstring>
 #include <map>
 #include <string>
 #include "common/bit_field.h"
@@ -12,14 +13,10 @@ namespace Tegra {
 namespace Shader {
 
 struct Register {
-    Register() = default;
+    constexpr Register() = default;
 
     constexpr Register(u64 value) : value(value) {}
 
-    constexpr u64 GetIndex() const {
-        return value;
-    }
-
     constexpr operator u64() const {
         return value;
     }
@@ -43,13 +40,13 @@ struct Register {
     }
 
 private:
-    u64 value;
+    u64 value{};
 };
 
 union Attribute {
     Attribute() = default;
 
-    constexpr Attribute(u64 value) : value(value) {}
+    constexpr explicit Attribute(u64 value) : value(value) {}
 
     enum class Index : u64 {
         Position = 7,
@@ -68,7 +65,20 @@ union Attribute {
     } fmt28;
 
     BitField<39, 8, u64> reg;
-    u64 value;
+    u64 value{};
+};
+
+union Sampler {
+    Sampler() = default;
+
+    constexpr explicit Sampler(u64 value) : value(value) {}
+
+    enum class Index : u64 {
+        Sampler_0 = 8,
+    };
+
+    BitField<36, 13, Index> index;
+    u64 value{};
 };
 
 union Uniform {
@@ -80,6 +90,7 @@ union OpCode {
     enum class Id : u64 {
         TEXS = 0x6C,
         IPA = 0xE0,
+        FMUL32_IMM = 0x1E,
         FFMA_IMM = 0x65,
         FFMA_CR = 0x93,
         FFMA_RC = 0xA3,
@@ -132,6 +143,7 @@ union OpCode {
 
         switch (op2) {
         case Id::IPA:
+        case Id::FMUL32_IMM:
             return op2;
         }
 
@@ -225,6 +237,7 @@ union OpCode {
         info_table[Id::FMUL_R] = {Type::Arithmetic, "fmul_r"};
         info_table[Id::FMUL_C] = {Type::Arithmetic, "fmul_c"};
         info_table[Id::FMUL_IMM] = {Type::Arithmetic, "fmul_imm"};
+        info_table[Id::FMUL32_IMM] = {Type::Arithmetic, "fmul32_imm"};
         info_table[Id::FSETP_C] = {Type::Arithmetic, "fsetp_c"};
         info_table[Id::FSETP_R] = {Type::Arithmetic, "fsetp_r"};
         info_table[Id::EXIT] = {Type::Trivial, "exit"};
@@ -238,7 +251,7 @@ union OpCode {
     BitField<55, 9, Id> op3;
     BitField<52, 12, Id> op4;
     BitField<51, 13, Id> op5;
-    u64 value;
+    u64 value{};
 };
 static_assert(sizeof(OpCode) == 0x8, "Incorrect structure size");
 
@@ -280,6 +293,7 @@ enum class SubOp : u64 {
     Lg2 = 0x3,
     Rcp = 0x4,
     Rsq = 0x5,
+    Min = 0x8,
 };
 
 union Instruction {
@@ -295,15 +309,33 @@ union Instruction {
     BitField<20, 8, Register> gpr20;
     BitField<20, 7, SubOp> sub_op;
     BitField<28, 8, Register> gpr28;
-    BitField<36, 13, u64> imm36;
     BitField<39, 8, Register> gpr39;
 
     union {
+        BitField<20, 19, u64> imm20_19;
+        BitField<20, 32, u64> imm20_32;
         BitField<45, 1, u64> negate_b;
         BitField<46, 1, u64> abs_a;
         BitField<48, 1, u64> negate_a;
         BitField<49, 1, u64> abs_b;
         BitField<50, 1, u64> abs_d;
+        BitField<56, 1, u64> negate_imm;
+
+        float GetImm20_19() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20_19)};
+            imm <<= 12;
+            imm |= negate_imm ? 0x80000000 : 0;
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
+
+        float GetImm20_32() const {
+            float result{};
+            u32 imm{static_cast<u32>(imm20_32)};
+            std::memcpy(&result, &imm, sizeof(imm));
+            return result;
+        }
     } alu;
 
     union {
@@ -311,11 +343,13 @@ union Instruction {
         BitField<49, 1, u64> negate_c;
     } ffma;
 
+    BitField<61, 1, u64> is_b_imm;
     BitField<60, 1, u64> is_b_gpr;
     BitField<59, 1, u64> is_c_gpr;
 
     Attribute attribute;
     Uniform uniform;
+    Sampler sampler;
 
     u64 hex;
 };
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index 71a8661b4..2888daedc 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -15,7 +15,10 @@ namespace Tegra {
 
 enum class RenderTargetFormat : u32 {
     NONE = 0x0,
+    RGBA16_FLOAT = 0xCA,
+    RGB10_A2_UNORM = 0xD1,
     RGBA8_UNORM = 0xD5,
+    RGBA8_SRGB = 0xD6,
 };
 
 class DebugContext;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 35d262189..36629dd11 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -19,7 +19,7 @@ public:
     virtual void DrawArrays() = 0;
 
     /// Notify rasterizer that the specified Maxwell register has been changed
-    virtual void NotifyMaxwellRegisterChanged(u32 id) = 0;
+    virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
 
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 28abc563a..13e2a77ce 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -97,7 +97,6 @@ RasterizerOpenGL::RasterizerOpenGL() {
     state.draw.vertex_buffer = stream_buffer->GetHandle();
 
     shader_program_manager = std::make_unique<GLShader::ProgramManager>();
-
     state.draw.shader_program = 0;
     state.draw.vertex_array = hw_vao.handle;
     state.Apply();
@@ -128,17 +127,6 @@ RasterizerOpenGL::~RasterizerOpenGL() {
     }
 }
 
-void RasterizerOpenGL::AnalyzeVertexArray(bool is_indexed) {
-    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
-
-    if (is_indexed) {
-        UNREACHABLE();
-    }
-
-    // TODO(bunnei): Add support for 1+ vertex arrays
-    vs_input_size = regs.vertex_buffer.count * regs.vertex_array[0].stride;
-}
-
 void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
     MICROPROFILE_SCOPE(OpenGL_VAO);
     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
@@ -150,6 +138,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
 
     // TODO(bunnei): Add support for 1+ vertex arrays
     const auto& vertex_array{regs.vertex_array[0]};
+    const auto& vertex_array_limit{regs.vertex_array_limit[0]};
     ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?");
     ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!");
     for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) {
@@ -162,6 +151,10 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
     // to avoid OpenGL errors.
     for (unsigned index = 0; index < 16; ++index) {
         auto& attrib = regs.vertex_attrib_format[index];
+        NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
+                    index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
+                    attrib.offset.Value(), attrib.IsNormalized());
+
         glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
                               attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride,
                               reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset));
@@ -170,7 +163,7 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) {
     }
 
     // Copy vertex array data
-    const u32 data_size{vertex_array.stride * regs.vertex_buffer.count};
+    const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1};
     const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())};
     res_cache.FlushRegion(data_addr, data_size, nullptr);
     Memory::ReadBlock(data_addr, array_ptr, data_size);
@@ -333,13 +326,18 @@ void RasterizerOpenGL::DrawArrays() {
 
     // Draw the vertex batch
     const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
-    AnalyzeVertexArray(is_indexed);
+    const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()};
+    const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count};
+
+    // TODO(bunnei): Add support for 1+ vertex arrays
+    vs_input_size = vertex_num * regs.vertex_array[0].stride;
+
     state.draw.vertex_buffer = stream_buffer->GetHandle();
     state.Apply();
 
     size_t buffer_size = static_cast<size_t>(vs_input_size);
     if (is_indexed) {
-        UNREACHABLE();
+        buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size;
     }
 
     // Uniform space for the 5 shader stages
@@ -354,9 +352,18 @@ void RasterizerOpenGL::DrawArrays() {
     SetupVertexArray(buffer_ptr, buffer_offset);
     ptr_pos += vs_input_size;
 
+    // If indexed mode, copy the index buffer
     GLintptr index_buffer_offset = 0;
     if (is_indexed) {
-        UNREACHABLE();
+        ptr_pos = Common::AlignUp(ptr_pos, 4);
+
+        const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager;
+        const VAddr index_data_addr{
+            memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())};
+        Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size);
+
+        index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos);
+        ptr_pos += index_buffer_size;
     }
 
     SetupShaders(buffer_ptr, buffer_offset, ptr_pos);
@@ -366,11 +373,16 @@ void RasterizerOpenGL::DrawArrays() {
     shader_program_manager->ApplyTo(state);
     state.Apply();
 
+    const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)};
     if (is_indexed) {
-        UNREACHABLE();
+        const GLint index_min{static_cast<GLint>(regs.index_array.first)};
+        const GLint index_max{static_cast<GLint>(regs.index_array.first + regs.index_array.count)};
+        glDrawRangeElementsBaseVertex(primitive_mode, index_min, index_max, regs.index_array.count,
+                                      MaxwellToGL::IndexFormat(regs.index_array.format),
+                                      reinterpret_cast<const void*>(index_buffer_offset),
+                                      -index_min);
     } else {
-        glDrawArrays(MaxwellToGL::PrimitiveTopology(regs.draw.topology), 0,
-                     regs.vertex_buffer.count);
+        glDrawArrays(primitive_mode, 0, regs.vertex_buffer.count);
     }
 
     // Disable scissor test
@@ -434,7 +446,32 @@ void RasterizerOpenGL::BindTextures() {
     }
 }
 
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 id) {}
+void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+    switch (method) {
+    case MAXWELL3D_REG_INDEX(blend.separate_alpha):
+        ASSERT_MSG(false, "unimplemented");
+        break;
+    case MAXWELL3D_REG_INDEX(blend.equation_rgb):
+        state.blend.rgb_equation = MaxwellToGL::BlendEquation(regs.blend.equation_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_source_rgb):
+        state.blend.src_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_dest_rgb):
+        state.blend.dst_rgb_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.equation_a):
+        state.blend.a_equation = MaxwellToGL::BlendEquation(regs.blend.equation_a);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_source_a):
+        state.blend.src_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_source_a);
+        break;
+    case MAXWELL3D_REG_INDEX(blend.factor_dest_a):
+        state.blend.dst_a_func = MaxwellToGL::BlendFunc(regs.blend.factor_dest_a);
+        break;
+    }
+}
 
 void RasterizerOpenGL::FlushAll() {
     MICROPROFILE_SCOPE(OpenGL_CacheManagement);
@@ -486,9 +523,12 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& framebu
     src_params.width = std::min(framebuffer.width, pixel_stride);
     src_params.height = framebuffer.height;
     src_params.stride = pixel_stride;
-    src_params.is_tiled = false;
+    src_params.is_tiled = true;
+    src_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
     src_params.pixel_format =
         SurfaceParams::PixelFormatFromGPUPixelFormat(framebuffer.pixel_format);
+    src_params.component_type =
+        SurfaceParams::ComponentTypeFromGPUPixelFormat(framebuffer.pixel_format);
     src_params.UpdateParams();
 
     MathUtil::Rectangle<u32> src_rect;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 548ce0453..9ece415f7 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -32,7 +32,7 @@ public:
     ~RasterizerOpenGL() override;
 
     void DrawArrays() override;
-    void NotifyMaxwellRegisterChanged(u32 id) override;
+    void NotifyMaxwellRegisterChanged(u32 method) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;
@@ -155,7 +155,6 @@ private:
 
     GLsizeiptr vs_input_size;
 
-    void AnalyzeVertexArray(bool is_indexed);
     void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset);
 
     std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
index 213b20a21..561c6913d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp
@@ -36,6 +36,7 @@
 
 using SurfaceType = SurfaceParams::SurfaceType;
 using PixelFormat = SurfaceParams::PixelFormat;
+using ComponentType = SurfaceParams::ComponentType;
 
 struct FormatTuple {
     GLint internal_format;
@@ -47,26 +48,24 @@ struct FormatTuple {
     u32 compression_factor;
 };
 
-static constexpr std::array<FormatTuple, 1> fb_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1}, // RGBA8
+static constexpr std::array<FormatTuple, SurfaceParams::MaxPixelFormat> tex_format_tuples = {{
+    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, false, 1},                     // ABGR8
+    {GL_RGB, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV, false, 1},                        // B5G6R5
+    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16},   // DXT1
+    {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT23
+    {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT45
 }};
 
-static constexpr std::array<FormatTuple, 2> tex_format_tuples = {{
-    {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, false, 1},                       // RGBA8
-    {GL_COMPRESSED_RGB_S3TC_DXT1_EXT, GL_RGB, GL_UNSIGNED_INT_8_8_8_8, true, 16}, // DXT1
-}};
-
-static const FormatTuple& GetFormatTuple(PixelFormat pixel_format) {
+static const FormatTuple& GetFormatTuple(PixelFormat pixel_format, ComponentType component_type) {
     const SurfaceType type = SurfaceParams::GetFormatType(pixel_format);
-    if (type == SurfaceType::Color) {
-        ASSERT(static_cast<size_t>(pixel_format) < fb_format_tuples.size());
-        return fb_format_tuples[static_cast<unsigned int>(pixel_format)];
+    if (type == SurfaceType::ColorTexture) {
+        ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
+        // For now only UNORM components are supported
+        ASSERT(component_type == ComponentType::UNorm);
+        return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
     } else if (type == SurfaceType::Depth || type == SurfaceType::DepthStencil) {
         // TODO(Subv): Implement depth formats
         ASSERT_MSG(false, "Unimplemented");
-    } else if (type == SurfaceType::Texture) {
-        ASSERT(static_cast<size_t>(pixel_format) < tex_format_tuples.size());
-        return tex_format_tuples[static_cast<unsigned int>(pixel_format)];
     }
 
     UNREACHABLE();
@@ -85,56 +84,42 @@ static u16 GetResolutionScaleFactor() {
 }
 
 template <bool morton_to_gl, PixelFormat format>
-static void MortonCopyTile(u32 stride, u8* tile_buffer, u8* gl_buffer) {
+void MortonCopy(u32 stride, u32 block_height, u32 height, u8* gl_buffer, VAddr base, VAddr start,
+                VAddr end) {
     constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
     constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
-    for (u32 y = 0; y < 8; ++y) {
-        for (u32 x = 0; x < 8; ++x) {
-            u8* tile_ptr = tile_buffer + VideoCore::MortonInterleave(x, y) * bytes_per_pixel;
-            u8* gl_ptr = gl_buffer + ((7 - y) * stride + x) * gl_bytes_per_pixel;
-            if (morton_to_gl) {
-                std::memcpy(gl_ptr, tile_ptr, bytes_per_pixel);
-            } else {
-                std::memcpy(tile_ptr, gl_ptr, bytes_per_pixel);
-            }
-        }
-    }
-}
-
-template <bool morton_to_gl, PixelFormat format>
-void MortonCopy(u32 stride, u32 height, u8* gl_buffer, VAddr base, VAddr start, VAddr end) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(format) / 8;
-    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(format);
-
-    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
-    // configuration for this and perform more generic un/swizzle
-    LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-    VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
-                                   Memory::GetPointer(base), gl_buffer, morton_to_gl);
-}
 
-template <>
-void MortonCopy<true, PixelFormat::DXT1>(u32 stride, u32 height, u8* gl_buffer, VAddr base,
-                                         VAddr start, VAddr end) {
-    constexpr u32 bytes_per_pixel = SurfaceParams::GetFormatBpp(PixelFormat::DXT1) / 8;
-    constexpr u32 gl_bytes_per_pixel = CachedSurface::GetGLBytesPerPixel(PixelFormat::DXT1);
-
-    // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check the
-    // configuration for this and perform more generic un/swizzle
-    LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-    auto data =
-        Tegra::Texture::UnswizzleTexture(base, Tegra::Texture::TextureFormat::DXT1, stride, height);
-    std::memcpy(gl_buffer, data.data(), data.size());
+    if (morton_to_gl) {
+        auto data = Tegra::Texture::UnswizzleTexture(
+            base, SurfaceParams::TextureFormatFromPixelFormat(format), stride, height,
+            block_height);
+        std::memcpy(gl_buffer, data.data(), data.size());
+    } else {
+        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
+        // the configuration for this and perform more generic un/swizzle
+        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
+        VideoCore::MortonCopyPixels128(stride, height, bytes_per_pixel, gl_bytes_per_pixel,
+                                       Memory::GetPointer(base), gl_buffer, morton_to_gl);
+    }
 }
 
-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> morton_to_gl_fns = {
-    MortonCopy<true, PixelFormat::RGBA8>,
-    MortonCopy<true, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
+                            SurfaceParams::MaxPixelFormat>
+    morton_to_gl_fns = {
+        MortonCopy<true, PixelFormat::ABGR8>, MortonCopy<true, PixelFormat::B5G6R5>,
+        MortonCopy<true, PixelFormat::DXT1>,  MortonCopy<true, PixelFormat::DXT23>,
+        MortonCopy<true, PixelFormat::DXT45>,
 };
 
-static constexpr std::array<void (*)(u32, u32, u8*, VAddr, VAddr, VAddr), 2> gl_to_morton_fns = {
-    MortonCopy<false, PixelFormat::RGBA8>,
-    MortonCopy<false, PixelFormat::DXT1>,
+static constexpr std::array<void (*)(u32, u32, u32, u8*, VAddr, VAddr, VAddr),
+                            SurfaceParams::MaxPixelFormat>
+    gl_to_morton_fns = {
+        MortonCopy<false, PixelFormat::ABGR8>,
+        MortonCopy<false, PixelFormat::B5G6R5>,
+        // TODO(Subv): Swizzling the DXT1/DXT23/DXT45 formats is not yet supported
+        nullptr,
+        nullptr,
+        nullptr,
 };
 
 // Allocate an uninitialized texture of appropriate size and format for the surface
@@ -183,7 +168,7 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
 
     u32 buffers = 0;
 
-    if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+    if (type == SurfaceType::ColorTexture) {
         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, src_tex,
                                0);
         glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
@@ -311,15 +296,18 @@ MathUtil::Rectangle<u32> SurfaceParams::GetScaledSubRect(const SurfaceParams& su
 
 bool SurfaceParams::ExactMatch(const SurfaceParams& other_surface) const {
     return std::tie(other_surface.addr, other_surface.width, other_surface.height,
-                    other_surface.stride, other_surface.pixel_format, other_surface.is_tiled) ==
-               std::tie(addr, width, height, stride, pixel_format, is_tiled) &&
+                    other_surface.stride, other_surface.block_height, other_surface.pixel_format,
+                    other_surface.component_type,
+                    other_surface.is_tiled) == std::tie(addr, width, height, stride, block_height,
+                                                        pixel_format, component_type, is_tiled) &&
            pixel_format != PixelFormat::Invalid;
 }
 
 bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
     return sub_surface.addr >= addr && sub_surface.end <= end &&
            sub_surface.pixel_format == pixel_format && pixel_format != PixelFormat::Invalid &&
-           sub_surface.is_tiled == is_tiled &&
+           sub_surface.is_tiled == is_tiled && sub_surface.block_height == block_height &&
+           sub_surface.component_type == component_type &&
            (sub_surface.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
            (sub_surface.stride == stride || sub_surface.height <= (is_tiled ? 8u : 1u)) &&
            GetSubRect(sub_surface).left + sub_surface.width <= stride;
@@ -328,7 +316,8 @@ bool SurfaceParams::CanSubRect(const SurfaceParams& sub_surface) const {
 bool SurfaceParams::CanExpand(const SurfaceParams& expanded_surface) const {
     return pixel_format != PixelFormat::Invalid && pixel_format == expanded_surface.pixel_format &&
            addr <= expanded_surface.end && expanded_surface.addr <= end &&
-           is_tiled == expanded_surface.is_tiled && stride == expanded_surface.stride &&
+           is_tiled == expanded_surface.is_tiled && block_height == expanded_surface.block_height &&
+           component_type == expanded_surface.component_type && stride == expanded_surface.stride &&
            (std::max(expanded_surface.addr, addr) - std::min(expanded_surface.addr, addr)) %
                    BytesInPixels(stride * (is_tiled ? 8 : 1)) ==
                0;
@@ -339,6 +328,10 @@ bool SurfaceParams::CanTexCopy(const SurfaceParams& texcopy_params) const {
         end < texcopy_params.end) {
         return false;
     }
+    if (texcopy_params.block_height != block_height ||
+        texcopy_params.component_type != component_type)
+        return false;
+
     if (texcopy_params.width != texcopy_params.stride) {
         const u32 tile_stride = static_cast<u32>(BytesInPixels(stride * (is_tiled ? 8 : 1)));
         return (texcopy_params.addr - addr) % BytesInPixels(is_tiled ? 64 : 1) == 0 &&
@@ -481,18 +474,13 @@ void CachedSurface::LoadGLBuffer(VAddr load_start, VAddr load_end) {
     const u64 start_offset = load_start - addr;
 
     if (!is_tiled) {
-        ASSERT(type == SurfaceType::Color);
         const u32 bytes_per_pixel{GetFormatBpp() >> 3};
 
-        // TODO(bunnei): Assumes the default rendering GOB size of 16 (128 lines). We should check
-        // the configuration for this and perform more generic un/swizzle
-        LOG_WARNING(Render_OpenGL, "need to use correct swizzle/GOB parameters!");
-        VideoCore::MortonCopyPixels128(width, height, bytes_per_pixel, 4,
-                                       texture_src_data + start_offset, &gl_buffer[start_offset],
-                                       true);
+        std::memcpy(&gl_buffer[start_offset], texture_src_data + start_offset,
+                    bytes_per_pixel * width * height);
     } else {
-        morton_to_gl_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
-                                                            load_start, load_end);
+        morton_to_gl_fns[static_cast<size_t>(pixel_format)](
+            stride, block_height, height, &gl_buffer[0], addr, load_start, load_end);
     }
 }
 
@@ -533,11 +521,10 @@ void CachedSurface::FlushGLBuffer(VAddr flush_start, VAddr flush_end) {
         if (backup_bytes)
             std::memcpy(&dst_buffer[coarse_start_offset], &backup_data[0], backup_bytes);
     } else if (!is_tiled) {
-        ASSERT(type == SurfaceType::Color);
         std::memcpy(dst_buffer + start_offset, &gl_buffer[start_offset], flush_end - flush_start);
     } else {
-        gl_to_morton_fns[static_cast<size_t>(pixel_format)](stride, height, &gl_buffer[0], addr,
-                                                            flush_start, flush_end);
+        gl_to_morton_fns[static_cast<size_t>(pixel_format)](
+            stride, block_height, height, &gl_buffer[0], addr, flush_start, flush_end);
     }
 }
 
@@ -556,7 +543,7 @@ void CachedSurface::UploadGLTexture(const MathUtil::Rectangle<u32>& rect, GLuint
     GLint y0 = static_cast<GLint>(rect.bottom);
     size_t buffer_offset = (y0 * stride + x0) * GetGLBytesPerPixel(pixel_format);
 
-    const FormatTuple& tuple = GetFormatTuple(pixel_format);
+    const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
     GLuint target_tex = texture.handle;
 
     // If not 1x scale, create 1x texture that we will blit from to replace texture subrect in
@@ -629,7 +616,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
     OpenGLState prev_state = state;
     SCOPE_EXIT({ prev_state.Apply(); });
 
-    const FormatTuple& tuple = GetFormatTuple(pixel_format);
+    const FormatTuple& tuple = GetFormatTuple(pixel_format, component_type);
 
     // Ensure no bad interactions with GL_PACK_ALIGNMENT
     ASSERT(stride * GetGLBytesPerPixel(pixel_format) % 4 == 0);
@@ -662,7 +649,7 @@ void CachedSurface::DownloadGLTexture(const MathUtil::Rectangle<u32>& rect, GLui
         state.draw.read_framebuffer = read_fb_handle;
         state.Apply();
 
-        if (type == SurfaceType::Color || type == SurfaceType::Texture) {
+        if (type == SurfaceType::ColorTexture) {
             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
                                    texture.handle, 0);
             glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
@@ -1041,9 +1028,25 @@ Surface RasterizerCacheOpenGL::GetTextureSurface(const Tegra::Texture::FullTextu
     params.height = config.tic.Height();
     params.is_tiled = config.tic.IsTiled();
     params.pixel_format = SurfaceParams::PixelFormatFromTextureFormat(config.tic.format);
+
+    // TODO(Subv): Different types per component are not supported.
+    ASSERT(config.tic.r_type.Value() == config.tic.g_type.Value() &&
+           config.tic.r_type.Value() == config.tic.b_type.Value() &&
+           config.tic.r_type.Value() == config.tic.a_type.Value());
+
+    params.component_type = SurfaceParams::ComponentTypeFromTexture(config.tic.r_type.Value());
+
+    if (config.tic.IsTiled()) {
+        params.block_height = config.tic.BlockHeight();
+    } else {
+        // Use the texture-provided stride value if the texture isn't tiled.
+        params.stride = params.PixelsInBytes(config.tic.Pitch());
+    }
+
     params.UpdateParams();
 
-    if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0) {
+    if (config.tic.Width() % 8 != 0 || config.tic.Height() % 8 != 0 ||
+        params.stride != params.width) {
         Surface src_surface;
         MathUtil::Rectangle<u32> rect;
         std::tie(src_surface, rect) = GetSurfaceSubRect(params, ScaleMatch::Ignore, true);
@@ -1094,10 +1097,13 @@ SurfaceSurfaceRect_Tuple RasterizerCacheOpenGL::GetFramebufferSurfaces(
     color_params.res_scale = resolution_scale_factor;
     color_params.width = config.width;
     color_params.height = config.height;
+    // TODO(Subv): Can framebuffers use a different block height?
+    color_params.block_height = Tegra::Texture::TICEntry::DefaultBlockHeight;
     SurfaceParams depth_params = color_params;
 
     color_params.addr = memory_manager->PhysicalToVirtualAddress(config.Address());
     color_params.pixel_format = SurfaceParams::PixelFormatFromRenderTargetFormat(config.format);
+    color_params.component_type = SurfaceParams::ComponentTypeFromRenderTarget(config.format);
     color_params.UpdateParams();
 
     ASSERT_MSG(!using_depth_fb, "depth buffer is unimplemented");
@@ -1293,7 +1299,6 @@ void RasterizerCacheOpenGL::InvalidateRegion(VAddr addr, u64 size, const Surface
     const SurfaceInterval invalid_interval(addr, addr + size);
 
     if (region_owner != nullptr) {
-        ASSERT(region_owner->type != SurfaceType::Texture);
         ASSERT(addr >= region_owner->addr && addr + size <= region_owner->end);
         // Surfaces can't have a gap
         ASSERT(region_owner->width == region_owner->stride);
@@ -1355,7 +1360,8 @@ Surface RasterizerCacheOpenGL::CreateSurface(const SurfaceParams& params) {
 
     surface->gl_buffer_size = 0;
     surface->invalid_regions.insert(surface->GetInterval());
-    AllocateSurfaceTexture(surface->texture.handle, GetFormatTuple(surface->pixel_format),
+    AllocateSurfaceTexture(surface->texture.handle,
+                           GetFormatTuple(surface->pixel_format, surface->component_type),
                            surface->GetScaledWidth(), surface->GetScaledHeight());
 
     return surface;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.h b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
index e7ce506cf..6861efe16 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer_cache.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.h
@@ -52,27 +52,45 @@ enum class ScaleMatch {
 
 struct SurfaceParams {
     enum class PixelFormat {
-        RGBA8 = 0,
-        DXT1 = 1,
+        ABGR8 = 0,
+        B5G6R5 = 1,
+        DXT1 = 2,
+        DXT23 = 3,
+        DXT45 = 4,
+
+        Max,
         Invalid = 255,
     };
 
+    static constexpr size_t MaxPixelFormat = static_cast<size_t>(PixelFormat::Max);
+
+    enum class ComponentType {
+        Invalid = 0,
+        SNorm = 1,
+        UNorm = 2,
+        SInt = 3,
+        UInt = 4,
+        Float = 5,
+    };
+
     enum class SurfaceType {
-        Color = 0,
-        Texture = 1,
-        Depth = 2,
-        DepthStencil = 3,
-        Fill = 4,
-        Invalid = 5
+        ColorTexture = 0,
+        Depth = 1,
+        DepthStencil = 2,
+        Fill = 3,
+        Invalid = 4,
     };
 
     static constexpr unsigned int GetFormatBpp(PixelFormat format) {
         if (format == PixelFormat::Invalid)
             return 0;
 
-        constexpr std::array<unsigned int, 2> bpp_table = {
-            32, // RGBA8
-            64, // DXT1
+        constexpr std::array<unsigned int, MaxPixelFormat> bpp_table = {
+            32,  // ABGR8
+            16,  // B5G6R5
+            64,  // DXT1
+            128, // DXT23
+            128, // DXT45
         };
 
         ASSERT(static_cast<size_t>(format) < bpp_table.size());
@@ -85,8 +103,9 @@ struct SurfaceParams {
     static PixelFormat PixelFormatFromRenderTargetFormat(Tegra::RenderTargetFormat format) {
         switch (format) {
         case Tegra::RenderTargetFormat::RGBA8_UNORM:
-            return PixelFormat::RGBA8;
+            return PixelFormat::ABGR8;
         default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
         }
     }
@@ -94,8 +113,9 @@ struct SurfaceParams {
     static PixelFormat PixelFormatFromGPUPixelFormat(Tegra::FramebufferConfig::PixelFormat format) {
         switch (format) {
         case Tegra::FramebufferConfig::PixelFormat::ABGR8:
-            return PixelFormat::RGBA8;
+            return PixelFormat::ABGR8;
         default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
         }
     }
@@ -104,10 +124,69 @@ struct SurfaceParams {
         // TODO(Subv): Properly implement this
         switch (format) {
         case Tegra::Texture::TextureFormat::A8R8G8B8:
-            return PixelFormat::RGBA8;
+            return PixelFormat::ABGR8;
+        case Tegra::Texture::TextureFormat::B5G6R5:
+            return PixelFormat::B5G6R5;
         case Tegra::Texture::TextureFormat::DXT1:
             return PixelFormat::DXT1;
+        case Tegra::Texture::TextureFormat::DXT23:
+            return PixelFormat::DXT23;
+        case Tegra::Texture::TextureFormat::DXT45:
+            return PixelFormat::DXT45;
+        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+            UNREACHABLE();
+        }
+    }
+
+    static Tegra::Texture::TextureFormat TextureFormatFromPixelFormat(PixelFormat format) {
+        // TODO(Subv): Properly implement this
+        switch (format) {
+        case PixelFormat::ABGR8:
+            return Tegra::Texture::TextureFormat::A8R8G8B8;
+        case PixelFormat::B5G6R5:
+            return Tegra::Texture::TextureFormat::B5G6R5;
+        case PixelFormat::DXT1:
+            return Tegra::Texture::TextureFormat::DXT1;
+        case PixelFormat::DXT23:
+            return Tegra::Texture::TextureFormat::DXT23;
+        case PixelFormat::DXT45:
+            return Tegra::Texture::TextureFormat::DXT45;
+        default:
+            UNREACHABLE();
+        }
+    }
+
+    static ComponentType ComponentTypeFromTexture(Tegra::Texture::ComponentType type) {
+        // TODO(Subv): Implement more component types
+        switch (type) {
+        case Tegra::Texture::ComponentType::UNORM:
+            return ComponentType::UNorm;
         default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented component type={}", static_cast<u32>(type));
+            UNREACHABLE();
+        }
+    }
+
+    static ComponentType ComponentTypeFromRenderTarget(Tegra::RenderTargetFormat format) {
+        // TODO(Subv): Implement more render targets
+        switch (format) {
+        case Tegra::RenderTargetFormat::RGBA8_UNORM:
+        case Tegra::RenderTargetFormat::RGB10_A2_UNORM:
+            return ComponentType::UNorm;
+        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
+            UNREACHABLE();
+        }
+    }
+
+    static ComponentType ComponentTypeFromGPUPixelFormat(
+        Tegra::FramebufferConfig::PixelFormat format) {
+        switch (format) {
+        case Tegra::FramebufferConfig::PixelFormat::ABGR8:
+            return ComponentType::UNorm;
+        default:
+            NGLOG_CRITICAL(HW_GPU, "Unimplemented format={}", static_cast<u32>(format));
             UNREACHABLE();
         }
     }
@@ -116,8 +195,7 @@ struct SurfaceParams {
         SurfaceType a_type = GetFormatType(pixel_format_a);
         SurfaceType b_type = GetFormatType(pixel_format_b);
 
-        if ((a_type == SurfaceType::Color || a_type == SurfaceType::Texture) &&
-            (b_type == SurfaceType::Color || b_type == SurfaceType::Texture)) {
+        if (a_type == SurfaceType::ColorTexture && b_type == SurfaceType::ColorTexture) {
             return true;
         }
 
@@ -133,12 +211,8 @@ struct SurfaceParams {
     }
 
     static SurfaceType GetFormatType(PixelFormat pixel_format) {
-        if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::RGBA8)) {
-            return SurfaceType::Color;
-        }
-
-        if ((unsigned int)pixel_format <= static_cast<unsigned int>(PixelFormat::DXT1)) {
-            return SurfaceType::Texture;
+        if (static_cast<size_t>(pixel_format) < MaxPixelFormat) {
+            return SurfaceType::ColorTexture;
         }
 
         // TODO(Subv): Implement the other formats
@@ -210,11 +284,13 @@ struct SurfaceParams {
     u32 width = 0;
     u32 height = 0;
     u32 stride = 0;
+    u32 block_height = 0;
     u16 res_scale = 1;
 
     bool is_tiled = false;
     PixelFormat pixel_format = PixelFormat::Invalid;
     SurfaceType type = SurfaceType::Invalid;
+    ComponentType component_type = ComponentType::Invalid;
 };
 
 struct CachedSurface : SurfaceParams {
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e11711533..de137558d 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -5,6 +5,7 @@
 #include <map>
 #include <set>
 #include <string>
+#include <string_view>
 #include "common/assert.h"
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
@@ -17,6 +18,7 @@ using Tegra::Shader::Attribute;
 using Tegra::Shader::Instruction;
 using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
+using Tegra::Shader::Sampler;
 using Tegra::Shader::SubOp;
 using Tegra::Shader::Uniform;
 
@@ -108,12 +110,25 @@ private:
 
 class ShaderWriter {
 public:
-    void AddLine(const std::string& text) {
+    void AddLine(std::string_view text) {
         DEBUG_ASSERT(scope >= 0);
         if (!text.empty()) {
-            shader_source += std::string(static_cast<size_t>(scope) * 4, ' ');
+            AppendIndentation();
         }
-        shader_source += text + '\n';
+        shader_source += text;
+        AddNewLine();
+    }
+
+    void AddLine(char character) {
+        DEBUG_ASSERT(scope >= 0);
+        AppendIndentation();
+        shader_source += character;
+        AddNewLine();
+    }
+
+    void AddNewLine() {
+        DEBUG_ASSERT(scope >= 0);
+        shader_source += '\n';
     }
 
     std::string GetResult() {
@@ -123,6 +138,10 @@ public:
     int scope = 0;
 
 private:
+    void AppendIndentation() {
+        shader_source.append(static_cast<size_t>(scope) * 4, ' ');
+    }
+
     std::string shader_source;
 };
 
@@ -155,23 +174,27 @@ private:
 
     /// Generates code representing an input attribute register.
     std::string GetInputAttribute(Attribute::Index attribute) {
-        declr_input_attribute.insert(attribute);
+        switch (attribute) {
+        case Attribute::Index::Position:
+            return "position";
+        default:
+            const u32 index{static_cast<u32>(attribute) -
+                            static_cast<u32>(Attribute::Index::Attribute_0)};
+            if (attribute >= Attribute::Index::Attribute_0) {
+                declr_input_attribute.insert(attribute);
+                return "input_attribute_" + std::to_string(index);
+            }
 
-        const u32 index{static_cast<u32>(attribute) -
-                        static_cast<u32>(Attribute::Index::Attribute_0)};
-        if (attribute >= Attribute::Index::Attribute_0) {
-            return "input_attribute_" + std::to_string(index);
+            NGLOG_CRITICAL(HW_GPU, "Unhandled input attribute: {}", index);
+            UNREACHABLE();
         }
-
-        LOG_CRITICAL(HW_GPU, "Unhandled input attribute: 0x%02x", index);
-        UNREACHABLE();
     }
 
     /// Generates code representing an output attribute register.
     std::string GetOutputAttribute(Attribute::Index attribute) {
         switch (attribute) {
         case Attribute::Index::Position:
-            return "gl_Position";
+            return "position";
         default:
             const u32 index{static_cast<u32>(attribute) -
                             static_cast<u32>(Attribute::Index::Attribute_0)};
@@ -180,22 +203,47 @@ private:
                 return "output_attribute_" + std::to_string(index);
             }
 
-            LOG_CRITICAL(HW_GPU, "Unhandled output attribute: 0x%02x", index);
+            NGLOG_CRITICAL(HW_GPU, "Unhandled output attribute: {}", index);
             UNREACHABLE();
         }
     }
 
+    /// Generates code representing a 19-bit immediate value
+    static std::string GetImmediate19(const Instruction& instr) {
+        return std::to_string(instr.alu.GetImm20_19());
+    }
+
+    /// Generates code representing a 32-bit immediate value
+    static std::string GetImmediate32(const Instruction& instr) {
+        return std::to_string(instr.alu.GetImm20_32());
+    }
+
     /// Generates code representing a temporary (GPR) register.
-    std::string GetRegister(const Register& reg) {
-        return *declr_register.insert("register_" + std::to_string(reg)).first;
+    std::string GetRegister(const Register& reg, unsigned elem = 0) {
+        if (stage == Maxwell3D::Regs::ShaderStage::Fragment && reg < 4) {
+            // GPRs 0-3 are output color for the fragment shader
+            return std::string{"color."} + "rgba"[(reg + elem) & 3];
+        }
+
+        return *declr_register.insert("register_" + std::to_string(reg + elem)).first;
     }
 
     /// Generates code representing a uniform (C buffer) register.
     std::string GetUniform(const Uniform& reg) {
-        declr_const_buffers[reg.index].MarkAsUsed(reg.index, reg.offset, stage);
+        declr_const_buffers[reg.index].MarkAsUsed(static_cast<unsigned>(reg.index),
+                                                  static_cast<unsigned>(reg.offset), stage);
         return 'c' + std::to_string(reg.index) + '[' + std::to_string(reg.offset) + ']';
     }
 
+    /// Generates code representing a texture sampler.
+    std::string GetSampler(const Sampler& sampler) const {
+        // TODO(Subv): Support more than just texture sampler 0
+        ASSERT_MSG(sampler.index == Sampler::Index::Sampler_0, "unsupported");
+        const unsigned index{static_cast<unsigned>(sampler.index.Value()) -
+                             static_cast<unsigned>(Sampler::Index::Sampler_0)};
+        return "tex[" + std::to_string(index) + "]";
+    }
+
     /**
      * Adds code that calls a subroutine.
      * @param subroutine the subroutine to call.
@@ -217,12 +265,13 @@ private:
      * @param value the code representing the value to assign.
      */
     void SetDest(u64 elem, const std::string& reg, const std::string& value,
-                 u64 dest_num_components, u64 value_num_components) {
+                 u64 dest_num_components, u64 value_num_components, bool is_abs = false) {
         std::string swizzle = ".";
         swizzle += "xyzw"[elem];
 
         std::string dest = reg + (dest_num_components != 1 ? swizzle : "");
         std::string src = "(" + value + ")" + (value_num_components != 1 ? swizzle : "");
+        src = is_abs ? "abs(" + src + ")" : src;
 
         shader.AddLine(dest + " = " + src + ";");
     }
@@ -240,8 +289,6 @@ private:
 
         switch (OpCode::GetInfo(instr.opcode).type) {
         case OpCode::Type::Arithmetic: {
-            ASSERT(!instr.alu.abs_d);
-
             std::string dest = GetRegister(instr.gpr0);
             std::string op_a = instr.alu.negate_a ? "-" : "";
             op_a += GetRegister(instr.gpr8);
@@ -250,63 +297,114 @@ private:
             }
 
             std::string op_b = instr.alu.negate_b ? "-" : "";
-            if (instr.is_b_gpr) {
-                op_b += GetRegister(instr.gpr20);
+
+            if (instr.is_b_imm) {
+                op_b += GetImmediate19(instr);
             } else {
-                op_b += GetUniform(instr.uniform);
+                if (instr.is_b_gpr) {
+                    op_b += GetRegister(instr.gpr20);
+                } else {
+                    op_b += GetUniform(instr.uniform);
+                }
             }
+
             if (instr.alu.abs_b) {
                 op_b = "abs(" + op_b + ")";
             }
 
             switch (instr.opcode.EffectiveOpCode()) {
             case OpCode::Id::FMUL_C:
-            case OpCode::Id::FMUL_R: {
-                SetDest(0, dest, op_a + " * " + op_b, 1, 1);
+            case OpCode::Id::FMUL_R:
+            case OpCode::Id::FMUL_IMM: {
+                SetDest(0, dest, op_a + " * " + op_b, 1, 1, instr.alu.abs_d);
+                break;
+            }
+            case OpCode::Id::FMUL32_IMM: {
+                // fmul32i doesn't have abs or neg bits.
+                SetDest(0, dest, GetRegister(instr.gpr8) + " * " + GetImmediate32(instr), 1, 1);
                 break;
             }
             case OpCode::Id::FADD_C:
-            case OpCode::Id::FADD_R: {
-                SetDest(0, dest, op_a + " + " + op_b, 1, 1);
+            case OpCode::Id::FADD_R:
+            case OpCode::Id::FADD_IMM: {
+                SetDest(0, dest, op_a + " + " + op_b, 1, 1, instr.alu.abs_d);
                 break;
             }
-            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
+            case OpCode::Id::MUFU: {
+                switch (instr.sub_op) {
+                case SubOp::Cos:
+                    SetDest(0, dest, "cos(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Sin:
+                    SetDest(0, dest, "sin(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Ex2:
+                    SetDest(0, dest, "exp2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Lg2:
+                    SetDest(0, dest, "log2(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Rcp:
+                    SetDest(0, dest, "1.0 / " + op_a, 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Rsq:
+                    SetDest(0, dest, "inversesqrt(" + op_a + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                case SubOp::Min:
+                    SetDest(0, dest, "min(" + op_a + "," + op_b + ")", 1, 1, instr.alu.abs_d);
+                    break;
+                default:
+                    NGLOG_CRITICAL(HW_GPU, "Unhandled MUFU sub op: {}",
+                                   static_cast<unsigned>(instr.sub_op.Value()));
+                    UNREACHABLE();
+                }
                 break;
             }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled arithmetic instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
             }
             break;
         }
         case OpCode::Type::Ffma: {
-            ASSERT_MSG(!instr.ffma.negate_b, "untested");
-            ASSERT_MSG(!instr.ffma.negate_c, "untested");
-
             std::string dest = GetRegister(instr.gpr0);
             std::string op_a = GetRegister(instr.gpr8);
-
             std::string op_b = instr.ffma.negate_b ? "-" : "";
-            op_b += GetUniform(instr.uniform);
-
             std::string op_c = instr.ffma.negate_c ? "-" : "";
-            op_c += GetRegister(instr.gpr39);
 
             switch (instr.opcode.EffectiveOpCode()) {
             case OpCode::Id::FFMA_CR: {
-                SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
+                op_b += GetUniform(instr.uniform);
+                op_c += GetRegister(instr.gpr39);
                 break;
             }
-
-            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled arithmetic FFMA instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
+            case OpCode::Id::FFMA_RR: {
+                op_b += GetRegister(instr.gpr20);
+                op_c += GetRegister(instr.gpr39);
+                break;
+            }
+            case OpCode::Id::FFMA_RC: {
+                op_b += GetRegister(instr.gpr39);
+                op_c += GetUniform(instr.uniform);
+                break;
+            }
+            case OpCode::Id::FFMA_IMM: {
+                op_b += GetImmediate19(instr);
+                op_c += GetRegister(instr.gpr39);
                 break;
             }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled FFMA instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
             }
+
+            SetDest(0, dest, op_a + " * " + op_b + " + " + op_c, 1, 1);
             break;
         }
         case OpCode::Type::Memory: {
@@ -315,22 +413,40 @@ private:
 
             switch (instr.opcode.EffectiveOpCode()) {
             case OpCode::Id::LD_A: {
-                ASSERT(instr.attribute.fmt20.size == 0);
+                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                 SetDest(instr.attribute.fmt20.element, gpr0, GetInputAttribute(attribute), 1, 4);
                 break;
             }
             case OpCode::Id::ST_A: {
-                ASSERT(instr.attribute.fmt20.size == 0);
+                ASSERT_MSG(instr.attribute.fmt20.size == 0, "untested");
                 SetDest(instr.attribute.fmt20.element, GetOutputAttribute(attribute), gpr0, 4, 1);
                 break;
             }
-            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled memory instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
+            case OpCode::Id::TEXS: {
+                ASSERT_MSG(instr.attribute.fmt20.size == 4, "untested");
+                const std::string op_a = GetRegister(instr.gpr8);
+                const std::string op_b = GetRegister(instr.gpr20);
+                const std::string sampler = GetSampler(instr.sampler);
+                const std::string coord = "vec2 coords = vec2(" + op_a + ", " + op_b + ");";
+                // Add an extra scope and declare the texture coords inside to prevent overwriting
+                // them in case they are used as outputs of the texs instruction.
+                shader.AddLine("{");
+                ++shader.scope;
+                shader.AddLine(coord);
+                const std::string texture = "texture(" + sampler + ", coords)";
+                for (unsigned elem = 0; elem < instr.attribute.fmt20.size; ++elem) {
+                    SetDest(elem, GetRegister(instr.gpr0, elem), texture, 1, 4);
+                }
+                --shader.scope;
+                shader.AddLine("}");
                 break;
             }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled memory instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
             }
             break;
         }
@@ -342,14 +458,18 @@ private:
                 offset = PROGRAM_END - 1;
                 break;
             }
-
-            default: {
-                LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
-                             static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
-                             OpCode::GetInfo(instr.opcode).name.c_str(), instr.hex);
-                throw DecompileFail("Unhandled instruction");
+            case OpCode::Id::IPA: {
+                const auto& attribute = instr.attribute.fmt28;
+                std::string dest = GetRegister(instr.gpr0);
+                SetDest(attribute.element, dest, GetInputAttribute(attribute.index), 1, 4);
                 break;
             }
+            default: {
+                NGLOG_CRITICAL(HW_GPU, "Unhandled instruction: {} ({}): {}",
+                               static_cast<unsigned>(instr.opcode.EffectiveOpCode()),
+                               OpCode::GetInfo(instr.opcode).name, instr.hex);
+                UNREACHABLE();
+            }
             }
 
             break;
@@ -379,7 +499,7 @@ private:
         for (const auto& subroutine : subroutines) {
             shader.AddLine("bool " + subroutine.GetName() + "();");
         }
-        shader.AddLine("");
+        shader.AddNewLine();
 
         // Add the main entry point
         shader.AddLine("bool exec_shader() {");
@@ -422,14 +542,14 @@ private:
                     }
 
                     --shader.scope;
-                    shader.AddLine("}");
+                    shader.AddLine('}');
                 }
 
                 shader.AddLine("default: return false;");
-                shader.AddLine("}");
+                shader.AddLine('}');
 
                 --shader.scope;
-                shader.AddLine("}");
+                shader.AddLine('}');
 
                 shader.AddLine("return false;");
             }
@@ -456,7 +576,7 @@ private:
         for (const auto& reg : declr_register) {
             declarations.AddLine("float " + reg + " = 0.0;");
         }
-        declarations.AddLine("");
+        declarations.AddNewLine();
 
         for (const auto& index : declr_input_attribute) {
             // TODO(bunnei): Use proper number of elements for these
@@ -465,7 +585,7 @@ private:
                                                 static_cast<u32>(Attribute::Index::Attribute_0)) +
                                  ") in vec4 " + GetInputAttribute(index) + ";");
         }
-        declarations.AddLine("");
+        declarations.AddNewLine();
 
         for (const auto& index : declr_output_attribute) {
             // TODO(bunnei): Use proper number of elements for these
@@ -474,15 +594,15 @@ private:
                                                 static_cast<u32>(Attribute::Index::Attribute_0)) +
                                  ") out vec4 " + GetOutputAttribute(index) + ";");
         }
-        declarations.AddLine("");
+        declarations.AddNewLine();
 
         unsigned const_buffer_layout = 0;
         for (const auto& entry : GetConstBuffersDeclarations()) {
             declarations.AddLine("layout(std430) buffer " + entry.GetName());
-            declarations.AddLine("{");
+            declarations.AddLine('{');
             declarations.AddLine("    float c" + std::to_string(entry.GetIndex()) + "[];");
             declarations.AddLine("};");
-            declarations.AddLine("");
+            declarations.AddNewLine();
             ++const_buffer_layout;
         }
     }
@@ -501,7 +621,7 @@ private:
     std::set<Attribute::Index> declr_input_attribute;
     std::set<Attribute::Index> declr_output_attribute;
     std::array<ConstBufferEntry, Maxwell3D::Regs::MaxConstBuffers> declr_const_buffers;
-};
+}; // namespace Decompiler
 
 std::string GetCommonDeclarations() {
     return "bool exec_shader();";
@@ -514,7 +634,7 @@ boost::optional<ProgramResult> DecompileProgram(const ProgramCode& program_code,
         GLSLGenerator generator(subroutines, program_code, main_offset, stage);
         return ProgramResult{generator.GetShaderCode(), generator.GetEntries()};
     } catch (const DecompileFail& exception) {
-        LOG_ERROR(HW_GPU, "Shader decompilation failed: %s", exception.what());
+        NGLOG_ERROR(HW_GPU, "Shader decompilation failed: {}", exception.what());
     }
     return boost::none;
 }
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 9f6e0ef58..382c76b7a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#pragma once
+
 #include <array>
 #include <functional>
 #include <string>
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index aeea1c805..254f6e2c3 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -27,10 +27,19 @@ out gl_PerVertex {
     vec4 gl_Position;
 };
 
+out vec4 position;
+
+layout (std140) uniform vs_config {
+    vec4 viewport_flip;
+};
+
 void main() {
     exec_shader();
-}
 
+    // Viewport can be flipped, which is unsupported by glViewport
+    position.xy *= viewport_flip.xy;
+    gl_Position = position;
+}
 )";
     out += program.first;
     return {out, program.second};
@@ -46,8 +55,13 @@ ProgramResult GenerateFragmentShader(const ShaderSetup& setup, const MaxwellFSCo
                                 .get_value_or({});
     out += R"(
 
+in vec4 position;
 out vec4 color;
 
+layout (std140) uniform fs_config {
+    vec4 viewport_flip;
+};
+
 uniform sampler2D tex[32];
 
 void main() {
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 85b838faa..17b3925a0 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -53,6 +53,12 @@ void SetShaderSamplerBindings(GLuint shader) {
 
 } // namespace Impl
 
-void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {}
+void MaxwellUniformData::SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage) {
+    const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs;
+
+    // TODO(bunnei): Support more than one viewport
+    viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0 : 1.0;
+    viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0 : 1.0;
+}
 
 } // namespace GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index be63320e0..e963b4b7e 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -30,10 +30,9 @@ void SetShaderSamplerBindings(GLuint shader);
 //       Not following that rule will cause problems on some AMD drivers.
 struct MaxwellUniformData {
     void SetFromRegs(const Maxwell3D::State::ShaderStageInfo& shader_stage);
-    // TODO(Subv): Use this for something.
+    alignas(16) GLvec4 viewport_flip;
 };
-// static_assert(sizeof(MaxwellUniformData) == 1024, "MaxwellUniformData structure size is
-// incorrect");
+static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect");
 static_assert(sizeof(MaxwellUniformData) < 16384,
               "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
 
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 7b8a15ed2..f91dfe36a 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -2,8 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <iterator>
 #include <glad/glad.h>
-#include "common/common_funcs.h"
 #include "common/logging/log.h"
 #include "video_core/renderer_opengl/gl_state.h"
 
@@ -192,7 +192,7 @@ void OpenGLState::Apply() const {
     }
 
     // Textures
-    for (unsigned i = 0; i < ARRAY_SIZE(texture_units); ++i) {
+    for (size_t i = 0; i < std::size(texture_units); ++i) {
         if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) {
             glActiveTexture(TextureUnits::MaxwellTexture(i).Enum());
             glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d);
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.h b/src/video_core/renderer_opengl/gl_stream_buffer.h
index 4bc2f52e0..e78dc5784 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.h
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.h
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#pragma once
+
 #include <memory>
 #include <glad/glad.h>
 #include "common/common_types.h"
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 7909dcfc3..a49265b38 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -31,7 +31,7 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
             return GL_UNSIGNED_BYTE;
         }
 
-        LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size=%s", attrib.SizeString().c_str());
+        NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
         UNREACHABLE();
         return {};
     }
@@ -40,7 +40,21 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
         return GL_FLOAT;
     }
 
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type=%s", attrib.TypeString().c_str());
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented vertex type={}", attrib.TypeString());
+    UNREACHABLE();
+    return {};
+}
+
+inline GLenum IndexFormat(Maxwell::IndexFormat index_format) {
+    switch (index_format) {
+    case Maxwell::IndexFormat::UnsignedByte:
+        return GL_UNSIGNED_BYTE;
+    case Maxwell::IndexFormat::UnsignedShort:
+        return GL_UNSIGNED_SHORT;
+    case Maxwell::IndexFormat::UnsignedInt:
+        return GL_UNSIGNED_INT;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented index_format={}", static_cast<u32>(index_format));
     UNREACHABLE();
     return {};
 }
@@ -52,7 +66,7 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
     case Maxwell::PrimitiveTopology::TriangleStrip:
         return GL_TRIANGLE_STRIP;
     }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented primitive topology=%d", topology);
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
     UNREACHABLE();
     return {};
 }
@@ -64,18 +78,90 @@ inline GLenum TextureFilterMode(Tegra::Texture::TextureFilter filter_mode) {
     case Tegra::Texture::TextureFilter::Nearest:
         return GL_NEAREST;
     }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode=%u",
-                 static_cast<u32>(filter_mode));
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture filter mode={}",
+                   static_cast<u32>(filter_mode));
     UNREACHABLE();
     return {};
 }
 
 inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
     switch (wrap_mode) {
+    case Tegra::Texture::WrapMode::Wrap:
+        return GL_REPEAT;
     case Tegra::Texture::WrapMode::ClampToEdge:
         return GL_CLAMP_TO_EDGE;
+    case Tegra::Texture::WrapMode::ClampOGL:
+        // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
+        // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
+        // manually mix them. However the shader part of this is not yet implemented.
+        return GL_CLAMP_TO_BORDER;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode={}",
+                   static_cast<u32>(wrap_mode));
+    UNREACHABLE();
+    return {};
+}
+
+inline GLenum BlendEquation(Maxwell::Blend::Equation equation) {
+    switch (equation) {
+    case Maxwell::Blend::Equation::Add:
+        return GL_FUNC_ADD;
+    case Maxwell::Blend::Equation::Subtract:
+        return GL_FUNC_SUBTRACT;
+    case Maxwell::Blend::Equation::ReverseSubtract:
+        return GL_FUNC_REVERSE_SUBTRACT;
+    case Maxwell::Blend::Equation::Min:
+        return GL_MIN;
+    case Maxwell::Blend::Equation::Max:
+        return GL_MAX;
+    }
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend equation={}", static_cast<u32>(equation));
+    UNREACHABLE();
+    return {};
+}
+
+inline GLenum BlendFunc(Maxwell::Blend::Factor factor) {
+    switch (factor) {
+    case Maxwell::Blend::Factor::Zero:
+        return GL_ZERO;
+    case Maxwell::Blend::Factor::One:
+        return GL_ONE;
+    case Maxwell::Blend::Factor::SourceColor:
+        return GL_SRC_COLOR;
+    case Maxwell::Blend::Factor::OneMinusSourceColor:
+        return GL_ONE_MINUS_SRC_COLOR;
+    case Maxwell::Blend::Factor::SourceAlpha:
+        return GL_SRC_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusSourceAlpha:
+        return GL_ONE_MINUS_SRC_ALPHA;
+    case Maxwell::Blend::Factor::DestAlpha:
+        return GL_DST_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusDestAlpha:
+        return GL_ONE_MINUS_DST_ALPHA;
+    case Maxwell::Blend::Factor::DestColor:
+        return GL_DST_COLOR;
+    case Maxwell::Blend::Factor::OneMinusDestColor:
+        return GL_ONE_MINUS_DST_COLOR;
+    case Maxwell::Blend::Factor::SourceAlphaSaturate:
+        return GL_SRC_ALPHA_SATURATE;
+    case Maxwell::Blend::Factor::Source1Color:
+        return GL_SRC1_COLOR;
+    case Maxwell::Blend::Factor::OneMinusSource1Color:
+        return GL_ONE_MINUS_SRC1_COLOR;
+    case Maxwell::Blend::Factor::Source1Alpha:
+        return GL_SRC1_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusSource1Alpha:
+        return GL_ONE_MINUS_SRC1_ALPHA;
+    case Maxwell::Blend::Factor::ConstantColor:
+        return GL_CONSTANT_COLOR;
+    case Maxwell::Blend::Factor::OneMinusConstantColor:
+        return GL_ONE_MINUS_CONSTANT_COLOR;
+    case Maxwell::Blend::Factor::ConstantAlpha:
+        return GL_CONSTANT_ALPHA;
+    case Maxwell::Blend::Factor::OneMinusConstantAlpha:
+        return GL_ONE_MINUS_CONSTANT_ALPHA;
     }
-    LOG_CRITICAL(Render_OpenGL, "Unimplemented texture wrap mode=%u", static_cast<u32>(wrap_mode));
+    NGLOG_CRITICAL(Render_OpenGL, "Unimplemented blend factor={}", static_cast<u32>(factor));
     UNREACHABLE();
     return {};
 }
diff --git a/src/video_core/textures/decoders.cpp b/src/video_core/textures/decoders.cpp
index 2e87281eb..4df687786 100644
--- a/src/video_core/textures/decoders.cpp
+++ b/src/video_core/textures/decoders.cpp
@@ -48,31 +48,39 @@ u32 BytesPerPixel(TextureFormat format) {
     case TextureFormat::DXT1:
         // In this case a 'pixel' actually refers to a 4x4 tile.
         return 8;
+    case TextureFormat::DXT23:
+    case TextureFormat::DXT45:
+        // In this case a 'pixel' actually refers to a 4x4 tile.
+        return 16;
     case TextureFormat::A8R8G8B8:
         return 4;
+    case TextureFormat::B5G6R5:
+        return 2;
     default:
         UNIMPLEMENTED_MSG("Format not implemented");
         break;
     }
 }
 
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height) {
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+                                 u32 block_height) {
     u8* data = Memory::GetPointer(address);
     u32 bytes_per_pixel = BytesPerPixel(format);
 
-    static constexpr u32 DefaultBlockHeight = 16;
-
     std::vector<u8> unswizzled_data(width * height * bytes_per_pixel);
 
     switch (format) {
     case TextureFormat::DXT1:
-        // In the DXT1 format, each 4x4 tile is swizzled instead of just individual pixel values.
+    case TextureFormat::DXT23:
+    case TextureFormat::DXT45:
+        // In the DXT formats, each 4x4 tile is swizzled instead of just individual pixel values.
         CopySwizzledData(width / 4, height / 4, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, DefaultBlockHeight);
+                         unswizzled_data.data(), true, block_height);
         break;
     case TextureFormat::A8R8G8B8:
+    case TextureFormat::B5G6R5:
         CopySwizzledData(width, height, bytes_per_pixel, bytes_per_pixel, data,
-                         unswizzled_data.data(), true, DefaultBlockHeight);
+                         unswizzled_data.data(), true, block_height);
         break;
     default:
         UNIMPLEMENTED_MSG("Format not implemented");
@@ -89,7 +97,10 @@ std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat
     // TODO(Subv): Implement.
     switch (format) {
     case TextureFormat::DXT1:
+    case TextureFormat::DXT23:
+    case TextureFormat::DXT45:
     case TextureFormat::A8R8G8B8:
+    case TextureFormat::B5G6R5:
         // TODO(Subv): For the time being just forward the same data without any decoding.
         rgba_data = texture_data;
         break;
diff --git a/src/video_core/textures/decoders.h b/src/video_core/textures/decoders.h
index 0c21694ff..a700911cf 100644
--- a/src/video_core/textures/decoders.h
+++ b/src/video_core/textures/decoders.h
@@ -14,7 +14,8 @@ namespace Texture {
 /**
  * Unswizzles a swizzled texture without changing its format.
  */
-std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height);
+std::vector<u8> UnswizzleTexture(VAddr address, TextureFormat format, u32 width, u32 height,
+                                 u32 block_height = TICEntry::DefaultBlockHeight);
 
 /**
  * Decodes an unswizzled texture into a A8R8G8B8 texture.
diff --git a/src/video_core/textures/texture.h b/src/video_core/textures/texture.h
index c12ed6e1d..86e45aa88 100644
--- a/src/video_core/textures/texture.h
+++ b/src/video_core/textures/texture.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include "common/assert.h"
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@@ -13,8 +14,11 @@ namespace Tegra {
 namespace Texture {
 
 enum class TextureFormat : u32 {
-    A8R8G8B8 = 8,
+    A8R8G8B8 = 0x8,
+    B5G6R5 = 0x15,
     DXT1 = 0x24,
+    DXT23 = 0x25,
+    DXT45 = 0x26,
 };
 
 enum class TextureType : u32 {
@@ -55,6 +59,8 @@ union TextureHandle {
 static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size");
 
 struct TICEntry {
+    static constexpr u32 DefaultBlockHeight = 16;
+
     union {
         u32 raw;
         BitField<0, 7, TextureFormat> format;
@@ -68,7 +74,12 @@ struct TICEntry {
         BitField<0, 16, u32> address_high;
         BitField<21, 3, TICHeaderVersion> header_version;
     };
-    INSERT_PADDING_BYTES(4);
+    union {
+        BitField<3, 3, u32> block_height;
+
+        // High 16 bits of the pitch value
+        BitField<0, 16, u32> pitch_high;
+    };
     union {
         BitField<0, 16, u32> width_minus_1;
         BitField<23, 4, TextureType> texture_type;
@@ -80,6 +91,13 @@ struct TICEntry {
         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low);
     }
 
+    u32 Pitch() const {
+        ASSERT(header_version == TICHeaderVersion::Pitch ||
+               header_version == TICHeaderVersion::PitchColorKey);
+        // The pitch value is 21 bits, and is 32B aligned.
+        return pitch_high << 5;
+    }
+
     u32 Width() const {
         return width_minus_1 + 1;
     }
@@ -88,6 +106,13 @@ struct TICEntry {
         return height_minus_1 + 1;
     }
 
+    u32 BlockHeight() const {
+        ASSERT(header_version == TICHeaderVersion::BlockLinear ||
+               header_version == TICHeaderVersion::BlockLinearColorKey);
+        // The block height is stored in log2 format.
+        return 1 << block_height;
+    }
+
     bool IsTiled() const {
         return header_version == TICHeaderVersion::BlockLinear ||
                header_version == TICHeaderVersion::BlockLinearColorKey;