9 files changed, 113 insertions, 84 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 2f92be661..4b59984ad 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -5,6 +5,7 @@
 #include <cmath>
 #include <boost/range/algorithm/fill.hpp>
 
+#include "common/alignment.h"
 #include "common/microprofile.h"
 #include "common/profiler.h"
 
@@ -142,7 +143,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                         // Send to renderer
                         using Pica::Shader::OutputVertex;
                         auto AddTriangle = [](const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
-                            VideoCore::g_renderer->rasterizer->AddTriangle(v0, v1, v2);
+                            VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
                         };
 
                         g_state.immediate.primitive_assembler.SubmitVertex(output, AddTriangle);
@@ -155,7 +156,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
         case PICA_REG_INDEX(gpu_mode):
             if (regs.gpu_mode == Regs::GPUMode::Configuring && regs.vs_default_attributes_setup.index == 15) {
                 // Draw immediate mode triangles when GPU Mode is set to GPUMode::Configuring
-                VideoCore::g_renderer->rasterizer->DrawTriangles();
+                VideoCore::g_renderer->Rasterizer()->DrawTriangles();
             }
             break;
 
@@ -199,7 +200,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
             for (int loader = 0; loader < 12; ++loader) {
                 const auto& loader_config = attribute_config.attribute_loaders[loader];
 
-                u32 load_address = base_address + loader_config.data_offset;
+                u32 offset = 0;
 
                 // TODO: What happens if a loader overwrites a previous one's data?
                 for (unsigned component = 0; component < loader_config.component_count; ++component) {
@@ -210,15 +211,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 
                     u32 attribute_index = loader_config.GetComponent(component);
                     if (attribute_index < 12) {
-                        vertex_attribute_sources[attribute_index] = load_address;
+                        int element_size = attribute_config.GetElementSizeInBytes(attribute_index);
+                        offset = Common::AlignUp(offset, element_size);
+                        vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset;
                         vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count);
                         vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index);
                         vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index);
-                        vertex_attribute_element_size[attribute_index] = attribute_config.GetElementSizeInBytes(attribute_index);
-                        load_address += attribute_config.GetStride(attribute_index);
+                        vertex_attribute_element_size[attribute_index] = element_size;
+                        offset += attribute_config.GetStride(attribute_index);
                     } else if (attribute_index < 16) {
                         // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
-                        load_address += (attribute_index - 11) * 4;
+                        offset = Common::AlignUp(offset, 4);
+                        offset += (attribute_index - 11) * 4;
                     } else {
                         UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
                     }
@@ -230,7 +234,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
 
             const auto& index_info = regs.index_array;
             const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
-            const u16* index_address_16 = (u16*)index_address_8;
+            const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8);
             bool index_u16 = index_info.format != 0;
 
 #if PICA_DUMP_GEOMETRY
@@ -341,10 +345,11 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                                         : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
                                 }
 
-                                const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata :
-                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata :
-                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata :
-                                    *(float*)srcdata;
+                                const float srcval =
+                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE)  ? *reinterpret_cast<const s8*>(srcdata) :
+                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) :
+                                    (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) :
+                                    *reinterpret_cast<const float*>(srcdata);
 
                                 input.attr[i][comp] = float24::FromFloat32(srcval);
                                 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
@@ -396,7 +401,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                 using Pica::Shader::OutputVertex;
                 auto AddTriangle = [](
                         const OutputVertex& v0, const OutputVertex& v1, const OutputVertex& v2) {
-                    VideoCore::g_renderer->rasterizer->AddTriangle(v0, v1, v2);
+                    VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
                 };
 
                 primitive_assembler.SubmitVertex(output, AddTriangle);
@@ -407,7 +412,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
                                                           range.second, range.first);
             }
 
-            VideoCore::g_renderer->rasterizer->DrawTriangles();
+            VideoCore::g_renderer->Rasterizer()->DrawTriangles();
 
 #if PICA_DUMP_GEOMETRY
             geometry_dumper.Dump();
@@ -542,7 +547,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
             break;
     }
 
-    VideoCore::g_renderer->rasterizer->NotifyPicaRegisterChanged(id);
+    VideoCore::g_renderer->Rasterizer()->NotifyPicaRegisterChanged(id);
 
     if (g_debug_context)
         g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, reinterpret_cast<void*>(&id));
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 6e21caa78..bac6d69c7 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -48,7 +48,7 @@ void DebugContext::OnEvent(Event event, void* data) {
         std::unique_lock<std::mutex> lock(breakpoint_mutex);
 
         // Commit the hardware renderer's framebuffer so it will show on debug widgets
-        VideoCore::g_renderer->rasterizer->FlushFramebuffer();
+        VideoCore::g_renderer->Rasterizer()->FlushFramebuffer();
 
         // TODO: Should stop the CPU thread here once we multithread emulation.
 
@@ -117,13 +117,13 @@ void GeometryDumper::Dump() {
 void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, const Shader::ShaderSetup& setup, const Regs::VSOutputAttributes* output_attributes)
 {
     struct StuffToWrite {
-        u8* pointer;
+        const u8* pointer;
         u32 size;
     };
     std::vector<StuffToWrite> writing_queue;
     u32 write_offset = 0;
 
-    auto QueueForWriting = [&writing_queue,&write_offset](u8* pointer, u32 size) {
+    auto QueueForWriting = [&writing_queue,&write_offset](const u8* pointer, u32 size) {
         writing_queue.push_back({pointer, size});
         u32 old_write_offset = write_offset;
         write_offset += size;
@@ -228,27 +228,27 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
     DVLPHeader dvlp{ DVLPHeader::MAGIC_WORD };
     DVLEHeader dvle{ DVLEHeader::MAGIC_WORD };
 
-    QueueForWriting((u8*)&dvlb, sizeof(dvlb));
-    u32 dvlp_offset = QueueForWriting((u8*)&dvlp, sizeof(dvlp));
-    dvlb.dvle_offset = QueueForWriting((u8*)&dvle, sizeof(dvle));
+    QueueForWriting(reinterpret_cast<const u8*>(&dvlb), sizeof(dvlb));
+    u32 dvlp_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvlp), sizeof(dvlp));
+    dvlb.dvle_offset = QueueForWriting(reinterpret_cast<const u8*>(&dvle), sizeof(dvle));
 
     // TODO: Reduce the amount of binary code written to relevant portions
     dvlp.binary_offset = write_offset - dvlp_offset;
     dvlp.binary_size_words = setup.program_code.size();
-    QueueForWriting((u8*)setup.program_code.data(), setup.program_code.size() * sizeof(u32));
+    QueueForWriting(reinterpret_cast<const u8*>(setup.program_code.data()), setup.program_code.size() * sizeof(u32));
 
     dvlp.swizzle_info_offset = write_offset - dvlp_offset;
     dvlp.swizzle_info_num_entries = setup.swizzle_data.size();
     u32 dummy = 0;
     for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) {
-        QueueForWriting((u8*)&setup.swizzle_data[i], sizeof(setup.swizzle_data[i]));
-        QueueForWriting((u8*)&dummy, sizeof(dummy));
+        QueueForWriting(reinterpret_cast<const u8*>(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i]));
+        QueueForWriting(reinterpret_cast<const u8*>(&dummy), sizeof(dummy));
     }
 
     dvle.main_offset_words = config.main_offset;
     dvle.output_register_table_offset = write_offset - dvlb.dvle_offset;
     dvle.output_register_table_size = static_cast<u32>(output_info_table.size());
-    QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo)));
+    QueueForWriting(reinterpret_cast<const u8*>(output_info_table.data()), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo)));
 
     // TODO: Create a label table for "main"
 
@@ -292,14 +292,14 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c
     dvle.constant_table_offset = write_offset - dvlb.dvle_offset;
     dvle.constant_table_size = constant_table.size();
     for (const auto& constant : constant_table) {
-        QueueForWriting((uint8_t*)&constant, sizeof(constant));
+        QueueForWriting(reinterpret_cast<const u8*>(&constant), sizeof(constant));
     }
 
     // Write data to file
     std::ofstream file(filename, std::ios_base::out | std::ios_base::binary);
 
-    for (auto& chunk : writing_queue) {
-        file.write((char*)chunk.pointer, chunk.size);
+    for (const auto& chunk : writing_queue) {
+        file.write(reinterpret_cast<const char*>(chunk.pointer), chunk.size);
     }
 }
 
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 2e0c33201..337cff8ce 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -117,8 +117,8 @@ struct Regs {
     INSERT_PADDING_WORDS(0x11);
 
     union {
-        BitField< 0, 16, u32> x;
-        BitField<16, 16, u32> y;
+        BitField< 0, 10, s32> x;
+        BitField<16, 10, s32> y;
     } viewport_corner;
 
     INSERT_PADDING_WORDS(0x17);
@@ -1221,17 +1221,17 @@ struct Regs {
     // Used for debugging purposes, so performance is not an issue here
     static std::string GetCommandName(int index);
 
-    static inline size_t NumIds() {
+    static constexpr size_t NumIds() {
         return sizeof(Regs) / sizeof(u32);
     }
 
-    u32& operator [] (int index) const {
-        u32* content = (u32*)this;
+    const u32& operator [] (int index) const {
+        const u32* content = reinterpret_cast<const u32*>(this);
         return content[index];
     }
 
     u32& operator [] (int index) {
-        u32* content = (u32*)this;
+        u32* content = reinterpret_cast<u32*>(this);
         return content[index];
     }
 
diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h
index 91a7b7f17..f68091cc8 100644
--- a/src/video_core/renderer_base.h
+++ b/src/video_core/renderer_base.h
@@ -22,9 +22,6 @@ public:
         kFramebuffer_Texture
     };
 
-    RendererBase() : m_current_fps(0), m_current_frame(0) {
-    }
-
     virtual ~RendererBase() {
     }
 
@@ -46,21 +43,24 @@ public:
     // Getter/setter functions:
     // ------------------------
 
-    f32 GetCurrentframe() const {
+    f32 GetCurrentFPS() const {
         return m_current_fps;
     }
 
-    int current_frame() const {
+    int GetCurrentFrame() const {
         return m_current_frame;
     }
 
-    void RefreshRasterizerSetting();
+    VideoCore::RasterizerInterface* Rasterizer() const {
+        return rasterizer.get();
+    }
 
-    std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
+    void RefreshRasterizerSetting();
 
 protected:
-    f32 m_current_fps;              ///< Current framerate, should be set by the renderer
-    int m_current_frame;            ///< Current frame, should be set by the renderer
+    std::unique_ptr<VideoCore::RasterizerInterface> rasterizer;
+    f32 m_current_fps   = 0.0f;     ///< Current framerate, should be set by the renderer
+    int m_current_frame = 0;        ///< Current frame, should be set by the renderer
 
 private:
     bool opengl_rasterizer_active = false;
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 5e8930476..509558fc0 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -32,6 +32,12 @@ namespace Shader {
 static std::unordered_map<u64, CompiledShader*> shader_map;
 static JitCompiler jit;
 static CompiledShader* jit_shader;
+
+static void ClearCache() {
+    shader_map.clear();
+    jit.Clear();
+    LOG_INFO(HW_GPU, "Shader JIT cache cleared");
+}
 #endif // ARCHITECTURE_x86_64
 
 void Setup(UnitState<false>& state) {
@@ -45,6 +51,12 @@ void Setup(UnitState<false>& state) {
         if (iter != shader_map.end()) {
             jit_shader = iter->second;
         } else {
+            // Check if remaining JIT code space is enough for at least one more (massive) shader
+            if (jit.GetSpaceLeft() < jit_shader_size) {
+                // If not, clear the cache of all previously compiled shaders
+                ClearCache();
+            }
+
             jit_shader = jit.Compile();
             shader_map.emplace(cache_key, jit_shader);
         }
@@ -54,7 +66,7 @@ void Setup(UnitState<false>& state) {
 
 void Shutdown() {
 #ifdef ARCHITECTURE_x86_64
-    shader_map.clear();
+    ClearCache();
 #endif // ARCHITECTURE_x86_64
 }
 
@@ -135,7 +147,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
             std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
     }
 
-    LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
+    LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
         "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
         ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
         ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 1be4e3734..7af8f1fa1 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -82,7 +82,7 @@ struct ShaderSetup {
     struct {
         // The float uniforms are accessed by the shader JIT using SSE instructions, and are
         // therefore required to be 16-byte aligned.
-        Math::Vec4<float24> MEMORY_ALIGNED16(f[96]);
+        alignas(16) Math::Vec4<float24> f[96];
 
         std::array<bool, 16> b;
         std::array<Math::Vec4<u8>, 4> i;
@@ -276,9 +276,9 @@ struct UnitState {
     struct Registers {
         // The registers are accessed by the shader JIT using SSE instructions, and are therefore
         // required to be 16-byte aligned.
-        Math::Vec4<float24> MEMORY_ALIGNED16(input[16]);
-        Math::Vec4<float24> MEMORY_ALIGNED16(output[16]);
-        Math::Vec4<float24> MEMORY_ALIGNED16(temporary[16]);
+        alignas(16) Math::Vec4<float24> input[16];
+        alignas(16) Math::Vec4<float24> output[16];
+        alignas(16) Math::Vec4<float24> temporary[16];
     } registers;
     static_assert(std::is_pod<Registers>::value, "Structure is not POD");
 
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 79fcc56b9..02e1a1cb1 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -409,13 +409,16 @@ void RunInterpreter(UnitState<Debug>& state) {
         {
             if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
                 (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
-                const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
+                const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]);
 
                 bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
 
+                const int address_offset = (instr.mad.address_register_index == 0)
+                                           ? 0 : state.address_registers[instr.mad.address_register_index - 1];
+
                 const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
-                const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted));
-                const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));
+                const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset));
+                const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset));
 
                 const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
                 const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 5083d7e54..dffe051ef 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -160,40 +160,41 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
     ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type");
 
     unsigned operand_desc_id;
+
+    const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
+
+    unsigned address_register_index;
+    unsigned offset_src;
+
     if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
         instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
-        // The MAD and MADI instructions do not use the address offset registers, so loading the
-        // source is a bit simpler here
-
         operand_desc_id = instr.mad.operand_desc_id;
-
-        // Load the source
-        MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
+        offset_src = is_inverted ? 3 : 2;
+        address_register_index = instr.mad.address_register_index;
     } else {
         operand_desc_id = instr.common.operand_desc_id;
+        offset_src = is_inverted ? 2 : 1;
+        address_register_index = instr.common.address_register_index;
+    }
 
-        const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
-        unsigned offset_src = is_inverted ? 2 : 1;
-
-        if (src_num == offset_src && instr.common.address_register_index != 0) {
-            switch (instr.common.address_register_index) {
-            case 1: // address offset 1
-                MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_0, SCALE_1, src_offset_disp));
-                break;
-            case 2: // address offset 2
-                MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_1, SCALE_1, src_offset_disp));
-                break;
-            case 3: // address offset 3
-                MOVAPS(dest, MComplex(src_ptr, LOOPCOUNT_REG, SCALE_1, src_offset_disp));
-                break;
-            default:
-                UNREACHABLE();
-                break;
-            }
-        } else {
-            // Load the source
-            MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
+    if (src_num == offset_src && address_register_index != 0) {
+        switch (address_register_index) {
+        case 1: // address offset 1
+            MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_0, SCALE_1, src_offset_disp));
+            break;
+        case 2: // address offset 2
+            MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_1, SCALE_1, src_offset_disp));
+            break;
+        case 3: // address offset 3
+            MOVAPS(dest, MComplex(src_ptr, LOOPCOUNT_REG, SCALE_1, src_offset_disp));
+            break;
+        default:
+            UNREACHABLE();
+            break;
         }
+    } else {
+        // Load the source
+        MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
     }
 
     SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
@@ -644,7 +645,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
 }
 
 void JitCompiler::Compile_IF(Instruction instr) {
-    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements not supported");
+    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported",
+            *offset_ptr, instr.flow_control.dest_offset.Value());
 
     // Evaluate the "IF" condition
     if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -675,7 +677,8 @@ void JitCompiler::Compile_IF(Instruction instr) {
 }
 
 void JitCompiler::Compile_LOOP(Instruction instr) {
-    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops not supported");
+    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported",
+            *offset_ptr, instr.flow_control.dest_offset.Value());
     ASSERT_MSG(!looping, "Nested loops not supported");
 
     looping = true;
@@ -703,7 +706,8 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
 }
 
 void JitCompiler::Compile_JMP(Instruction instr) {
-    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps not supported");
+    ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported",
+            *offset_ptr, instr.flow_control.dest_offset.Value());
 
     if (instr.opcode.Value() == OpCode::Id::JMPC)
         Compile_EvaluateCondition(instr);
@@ -747,7 +751,7 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
     } else {
         // Unhandled instruction
         LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
-                     instr.opcode.Value().EffectiveOpCode(), instr.hex);
+                instr.opcode.Value().EffectiveOpCode(), instr.hex);
     }
 }
 
@@ -786,7 +790,7 @@ CompiledShader* JitCompiler::Compile() {
 }
 
 JitCompiler::JitCompiler() {
-    AllocCodeSpace(1024 * 1024 * 4);
+    AllocCodeSpace(jit_cache_size);
 }
 
 void JitCompiler::Clear() {
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5ad2d9606..5357c964b 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -19,6 +19,11 @@ namespace Pica {
 
 namespace Shader {
 
+/// Memory needed to be available to compile the next shader (otherwise, clear the cache)
+constexpr size_t jit_shader_size = 1024 * 512;
+/// Memory allocated for the JIT code space cache
+constexpr size_t jit_cache_size = 1024 * 1024 * 8;
+
 using CompiledShader = void(void* registers);
 
 /**