diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 68 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 5 |
4 files changed, 60 insertions, 36 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 5e8930476..509558fc0 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -32,6 +32,12 @@ namespace Shader { static std::unordered_map<u64, CompiledShader*> shader_map; static JitCompiler jit; static CompiledShader* jit_shader; + +static void ClearCache() { + shader_map.clear(); + jit.Clear(); + LOG_INFO(HW_GPU, "Shader JIT cache cleared"); +} #endif // ARCHITECTURE_x86_64 void Setup(UnitState<false>& state) { @@ -45,6 +51,12 @@ void Setup(UnitState<false>& state) { if (iter != shader_map.end()) { jit_shader = iter->second; } else { + // Check if remaining JIT code space is enough for at least one more (massive) shader + if (jit.GetSpaceLeft() < jit_shader_size) { + // If not, clear the cache of all previously compiled shaders + ClearCache(); + } + jit_shader = jit.Compile(); shader_map.emplace(cache_key, jit_shader); } @@ -54,7 +66,7 @@ void Setup(UnitState<false>& state) { void Shutdown() { #ifdef ARCHITECTURE_x86_64 - shader_map.clear(); + ClearCache(); #endif // ARCHITECTURE_x86_64 } @@ -135,7 +147,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); } - LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " + LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 79fcc56b9..295a2466b 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -413,9 +413,12 @@ void RunInterpreter(UnitState<Debug>& state) { bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI); + const int address_offset = (instr.mad.address_register_index == 0) + ? 0 : state.address_registers[instr.mad.address_register_index - 1]; + const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted)); - const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted)); - const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset)); + const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset)); const bool negate_src1 = ((bool)swizzle.negate_src1 != false); const bool negate_src2 = ((bool)swizzle.negate_src2 != false); diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 5083d7e54..dffe051ef 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -160,40 +160,41 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type"); unsigned operand_desc_id; + + const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); + + unsigned address_register_index; + unsigned offset_src; + if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD || instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) { - // The MAD and MADI instructions do not use the address offset registers, so loading the - // source is a bit simpler here - operand_desc_id = instr.mad.operand_desc_id; - - // Load the source - MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); + offset_src = is_inverted ? 3 : 2; + address_register_index = instr.mad.address_register_index; } else { operand_desc_id = instr.common.operand_desc_id; + offset_src = is_inverted ? 2 : 1; + address_register_index = instr.common.address_register_index; + } - const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed)); - unsigned offset_src = is_inverted ? 2 : 1; - - if (src_num == offset_src && instr.common.address_register_index != 0) { - switch (instr.common.address_register_index) { - case 1: // address offset 1 - MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_0, SCALE_1, src_offset_disp)); - break; - case 2: // address offset 2 - MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_1, SCALE_1, src_offset_disp)); - break; - case 3: // address offset 3 - MOVAPS(dest, MComplex(src_ptr, LOOPCOUNT_REG, SCALE_1, src_offset_disp)); - break; - default: - UNREACHABLE(); - break; - } - } else { - // Load the source - MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); + if (src_num == offset_src && address_register_index != 0) { + switch (address_register_index) { + case 1: // address offset 1 + MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_0, SCALE_1, src_offset_disp)); + break; + case 2: // address offset 2 + MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_1, SCALE_1, src_offset_disp)); + break; + case 3: // address offset 3 + MOVAPS(dest, MComplex(src_ptr, LOOPCOUNT_REG, SCALE_1, src_offset_disp)); + break; + default: + UNREACHABLE(); + break; } + } else { + // Load the source + MOVAPS(dest, MDisp(src_ptr, src_offset_disp)); } SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] }; @@ -644,7 +645,8 @@ void JitCompiler::Compile_MAD(Instruction instr) { } void JitCompiler::Compile_IF(Instruction instr) { - ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements not supported"); + ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported", + *offset_ptr, instr.flow_control.dest_offset.Value()); // Evaluate the "IF" condition if (instr.opcode.Value() == OpCode::Id::IFU) { @@ -675,7 +677,8 @@ void JitCompiler::Compile_IF(Instruction instr) { } void JitCompiler::Compile_LOOP(Instruction instr) { - ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops not supported"); + ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported", + *offset_ptr, instr.flow_control.dest_offset.Value()); ASSERT_MSG(!looping, "Nested loops not supported"); looping = true; @@ -703,7 +706,8 @@ void JitCompiler::Compile_LOOP(Instruction instr) { } void JitCompiler::Compile_JMP(Instruction instr) { - ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps not supported"); + ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported", + *offset_ptr, instr.flow_control.dest_offset.Value()); if (instr.opcode.Value() == OpCode::Id::JMPC) Compile_EvaluateCondition(instr); @@ -747,7 +751,7 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) { } else { // Unhandled instruction LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", - instr.opcode.Value().EffectiveOpCode(), instr.hex); + instr.opcode.Value().EffectiveOpCode(), instr.hex); } } @@ -786,7 +790,7 @@ CompiledShader* JitCompiler::Compile() { } JitCompiler::JitCompiler() { - AllocCodeSpace(1024 * 1024 * 4); + AllocCodeSpace(jit_cache_size); } void JitCompiler::Clear() { diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 5ad2d9606..5357c964b 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -19,6 +19,11 @@ namespace Pica { namespace Shader { +/// Memory needed to be available to compile the next shader (otherwise, clear the cache) +constexpr size_t jit_shader_size = 1024 * 512; +/// Memory allocated for the JIT code space cache +constexpr size_t jit_cache_size = 1024 * 1024 * 8; + using CompiledShader = void(void* registers); /** |
