aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/shader.cpp16
-rw-r--r--src/video_core/shader/shader_interpreter.cpp7
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp68
-rw-r--r--src/video_core/shader/shader_jit_x64.h5
4 files changed, 60 insertions, 36 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 5e8930476..509558fc0 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -32,6 +32,12 @@ namespace Shader {
static std::unordered_map<u64, CompiledShader*> shader_map;
static JitCompiler jit;
static CompiledShader* jit_shader;
+
+static void ClearCache() {
+ shader_map.clear();
+ jit.Clear();
+ LOG_INFO(HW_GPU, "Shader JIT cache cleared");
+}
#endif // ARCHITECTURE_x86_64
void Setup(UnitState<false>& state) {
@@ -45,6 +51,12 @@ void Setup(UnitState<false>& state) {
if (iter != shader_map.end()) {
jit_shader = iter->second;
} else {
+ // Check if remaining JIT code space is enough for at least one more (massive) shader
+ if (jit.GetSpaceLeft() < jit_shader_size) {
+ // If not, clear the cache of all previously compiled shaders
+ ClearCache();
+ }
+
jit_shader = jit.Compile();
shader_map.emplace(cache_key, jit_shader);
}
@@ -54,7 +66,7 @@ void Setup(UnitState<false>& state) {
void Shutdown() {
#ifdef ARCHITECTURE_x86_64
- shader_map.clear();
+ ClearCache();
#endif // ARCHITECTURE_x86_64
}
@@ -135,7 +147,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
}
- LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
+ LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
"col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 79fcc56b9..295a2466b 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -413,9 +413,12 @@ void RunInterpreter(UnitState<Debug>& state) {
bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
+ const int address_offset = (instr.mad.address_register_index == 0)
+ ? 0 : state.address_registers[instr.mad.address_register_index - 1];
+
const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
- const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted));
- const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));
+ const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset));
+ const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 5083d7e54..dffe051ef 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -160,40 +160,41 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type");
unsigned operand_desc_id;
+
+ const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
+
+ unsigned address_register_index;
+ unsigned offset_src;
+
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
- // The MAD and MADI instructions do not use the address offset registers, so loading the
- // source is a bit simpler here
-
operand_desc_id = instr.mad.operand_desc_id;
-
- // Load the source
- MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
+ offset_src = is_inverted ? 3 : 2;
+ address_register_index = instr.mad.address_register_index;
} else {
operand_desc_id = instr.common.operand_desc_id;
+ offset_src = is_inverted ? 2 : 1;
+ address_register_index = instr.common.address_register_index;
+ }
- const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
- unsigned offset_src = is_inverted ? 2 : 1;
-
- if (src_num == offset_src && instr.common.address_register_index != 0) {
- switch (instr.common.address_register_index) {
- case 1: // address offset 1
- MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_0, SCALE_1, src_offset_disp));
- break;
- case 2: // address offset 2
- MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_1, SCALE_1, src_offset_disp));
- break;
- case 3: // address offset 3
- MOVAPS(dest, MComplex(src_ptr, LOOPCOUNT_REG, SCALE_1, src_offset_disp));
- break;
- default:
- UNREACHABLE();
- break;
- }
- } else {
- // Load the source
- MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
+ if (src_num == offset_src && address_register_index != 0) {
+ switch (address_register_index) {
+ case 1: // address offset 1
+ MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_0, SCALE_1, src_offset_disp));
+ break;
+ case 2: // address offset 2
+ MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_1, SCALE_1, src_offset_disp));
+ break;
+ case 3: // address offset 3
+ MOVAPS(dest, MComplex(src_ptr, LOOPCOUNT_REG, SCALE_1, src_offset_disp));
+ break;
+ default:
+ UNREACHABLE();
+ break;
}
+ } else {
+ // Load the source
+ MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
}
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
@@ -644,7 +645,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
}
void JitCompiler::Compile_IF(Instruction instr) {
- ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements not supported");
+ ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported",
+ *offset_ptr, instr.flow_control.dest_offset.Value());
// Evaluate the "IF" condition
if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -675,7 +677,8 @@ void JitCompiler::Compile_IF(Instruction instr) {
}
void JitCompiler::Compile_LOOP(Instruction instr) {
- ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops not supported");
+ ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported",
+ *offset_ptr, instr.flow_control.dest_offset.Value());
ASSERT_MSG(!looping, "Nested loops not supported");
looping = true;
@@ -703,7 +706,8 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
}
void JitCompiler::Compile_JMP(Instruction instr) {
- ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps not supported");
+ ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported",
+ *offset_ptr, instr.flow_control.dest_offset.Value());
if (instr.opcode.Value() == OpCode::Id::JMPC)
Compile_EvaluateCondition(instr);
@@ -747,7 +751,7 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
} else {
// Unhandled instruction
LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
- instr.opcode.Value().EffectiveOpCode(), instr.hex);
+ instr.opcode.Value().EffectiveOpCode(), instr.hex);
}
}
@@ -786,7 +790,7 @@ CompiledShader* JitCompiler::Compile() {
}
JitCompiler::JitCompiler() {
- AllocCodeSpace(1024 * 1024 * 4);
+ AllocCodeSpace(jit_cache_size);
}
void JitCompiler::Clear() {
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5ad2d9606..5357c964b 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -19,6 +19,11 @@ namespace Pica {
namespace Shader {
+/// Memory needed to be available to compile the next shader (otherwise, clear the cache)
+constexpr size_t jit_shader_size = 1024 * 512;
+/// Memory allocated for the JIT code space cache
+constexpr size_t jit_cache_size = 1024 * 1024 * 8;
+
using CompiledShader = void(void* registers);
/**