aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/shader.cpp34
-rw-r--r--src/video_core/shader/shader_interpreter.cpp17
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp68
-rw-r--r--src/video_core/shader/shader_jit_x64.h5
4 files changed, 78 insertions, 46 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 5e8930476..eb1db0778 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -32,6 +32,12 @@ namespace Shader {
static std::unordered_map<u64, CompiledShader*> shader_map;
static JitCompiler jit;
static CompiledShader* jit_shader;
+
+static void ClearCache() {
+ shader_map.clear();
+ jit.Clear();
+ LOG_INFO(HW_GPU, "Shader JIT cache cleared");
+}
#endif // ARCHITECTURE_x86_64
void Setup(UnitState<false>& state) {
@@ -45,6 +51,12 @@ void Setup(UnitState<false>& state) {
if (iter != shader_map.end()) {
jit_shader = iter->second;
} else {
+ // Check if remaining JIT code space is enough for at least one more (massive) shader
+ if (jit.GetSpaceLeft() < jit_shader_size) {
+ // If not, clear the cache of all previously compiled shaders
+ ClearCache();
+ }
+
jit_shader = jit.Compile();
shader_map.emplace(cache_key, jit_shader);
}
@@ -54,7 +66,7 @@ void Setup(UnitState<false>& state) {
void Shutdown() {
#ifdef ARCHITECTURE_x86_64
- shader_map.clear();
+ ClearCache();
#endif // ARCHITECTURE_x86_64
}
@@ -109,15 +121,23 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
OutputVertex ret;
// TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
// figure out what those circumstances are and enable the remaining outputs then.
- for (int i = 0; i < 7; ++i) {
- const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here
+ unsigned index = 0;
+ for (unsigned i = 0; i < 7; ++i) {
+
+ if (index >= g_state.regs.vs_output_total)
+ break;
+
+ if ((g_state.regs.vs.output_mask & (1 << i)) == 0)
+ continue;
+
+ const auto& output_register_map = g_state.regs.vs_output_attributes[index]; // TODO: Don't hardcode VS here
u32 semantics[4] = {
output_register_map.map_x, output_register_map.map_y,
output_register_map.map_z, output_register_map.map_w
};
- for (int comp = 0; comp < 4; ++comp) {
+ for (unsigned comp = 0; comp < 4; ++comp) {
float24* out = ((float24*)&ret) + semantics[comp];
if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
*out = state.registers.output[i][comp];
@@ -127,15 +147,17 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
memset(out, 0, sizeof(*out));
}
}
+
+ index++;
}
// The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
- for (int i = 0; i < 4; ++i) {
+ for (unsigned i = 0; i < 4; ++i) {
ret.color[i] = float24::FromFloat32(
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
}
- LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
+ LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
"col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 79fcc56b9..9b978583e 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -2,10 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
-#include <common/file_util.h>
-
+#include <numeric>
#include <nihstro/shader_bytecode.h>
+#include "common/file_util.h"
#include "video_core/pica.h"
#include "video_core/pica_state.h"
#include "video_core/shader/shader.h"
@@ -214,10 +214,8 @@ void RunInterpreter(UnitState<Debug>& state) {
if (opcode == OpCode::Id::DPH || opcode == OpCode::Id::DPHI)
src1[3] = float24::FromFloat32(1.0f);
- float24 dot = float24::FromFloat32(0.f);
int num_components = (opcode == OpCode::Id::DP3) ? 3 : 4;
- for (int i = 0; i < num_components; ++i)
- dot = dot + src1[i] * src2[i];
+ float24 dot = std::inner_product(src1, src1 + num_components, src2, float24::FromFloat32(0.f));
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -409,13 +407,16 @@ void RunInterpreter(UnitState<Debug>& state) {
{
if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
(instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
- const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
+ const SwizzlePattern& swizzle = *reinterpret_cast<const SwizzlePattern*>(&swizzle_data[instr.mad.operand_desc_id]);
bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
+ const int address_offset = (instr.mad.address_register_index == 0)
+ ? 0 : state.address_registers[instr.mad.address_register_index - 1];
+
const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
- const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted));
- const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));
+ const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted) + (!is_inverted * address_offset));
+ const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted) + ( is_inverted * address_offset));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 5083d7e54..dffe051ef 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -160,40 +160,41 @@ void JitCompiler::Compile_SwizzleSrc(Instruction instr, unsigned src_num, Source
ASSERT_MSG(src_offset == src_offset_disp, "Source register offset too large for int type");
unsigned operand_desc_id;
+
+ const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
+
+ unsigned address_register_index;
+ unsigned offset_src;
+
if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD ||
instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI) {
- // The MAD and MADI instructions do not use the address offset registers, so loading the
- // source is a bit simpler here
-
operand_desc_id = instr.mad.operand_desc_id;
-
- // Load the source
- MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
+ offset_src = is_inverted ? 3 : 2;
+ address_register_index = instr.mad.address_register_index;
} else {
operand_desc_id = instr.common.operand_desc_id;
+ offset_src = is_inverted ? 2 : 1;
+ address_register_index = instr.common.address_register_index;
+ }
- const bool is_inverted = (0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed));
- unsigned offset_src = is_inverted ? 2 : 1;
-
- if (src_num == offset_src && instr.common.address_register_index != 0) {
- switch (instr.common.address_register_index) {
- case 1: // address offset 1
- MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_0, SCALE_1, src_offset_disp));
- break;
- case 2: // address offset 2
- MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_1, SCALE_1, src_offset_disp));
- break;
- case 3: // address offset 3
- MOVAPS(dest, MComplex(src_ptr, LOOPCOUNT_REG, SCALE_1, src_offset_disp));
- break;
- default:
- UNREACHABLE();
- break;
- }
- } else {
- // Load the source
- MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
+ if (src_num == offset_src && address_register_index != 0) {
+ switch (address_register_index) {
+ case 1: // address offset 1
+ MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_0, SCALE_1, src_offset_disp));
+ break;
+ case 2: // address offset 2
+ MOVAPS(dest, MComplex(src_ptr, ADDROFFS_REG_1, SCALE_1, src_offset_disp));
+ break;
+ case 3: // address offset 3
+ MOVAPS(dest, MComplex(src_ptr, LOOPCOUNT_REG, SCALE_1, src_offset_disp));
+ break;
+ default:
+ UNREACHABLE();
+ break;
}
+ } else {
+ // Load the source
+ MOVAPS(dest, MDisp(src_ptr, src_offset_disp));
}
SwizzlePattern swiz = { g_state.vs.swizzle_data[operand_desc_id] };
@@ -644,7 +645,8 @@ void JitCompiler::Compile_MAD(Instruction instr) {
}
void JitCompiler::Compile_IF(Instruction instr) {
- ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements not supported");
+ ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards if-statements (%d -> %d) not supported",
+ *offset_ptr, instr.flow_control.dest_offset.Value());
// Evaluate the "IF" condition
if (instr.opcode.Value() == OpCode::Id::IFU) {
@@ -675,7 +677,8 @@ void JitCompiler::Compile_IF(Instruction instr) {
}
void JitCompiler::Compile_LOOP(Instruction instr) {
- ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops not supported");
+ ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards loops (%d -> %d) not supported",
+ *offset_ptr, instr.flow_control.dest_offset.Value());
ASSERT_MSG(!looping, "Nested loops not supported");
looping = true;
@@ -703,7 +706,8 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
}
void JitCompiler::Compile_JMP(Instruction instr) {
- ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps not supported");
+ ASSERT_MSG(instr.flow_control.dest_offset > *offset_ptr, "Backwards jumps (%d -> %d) not supported",
+ *offset_ptr, instr.flow_control.dest_offset.Value());
if (instr.opcode.Value() == OpCode::Id::JMPC)
Compile_EvaluateCondition(instr);
@@ -747,7 +751,7 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
} else {
// Unhandled instruction
LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
- instr.opcode.Value().EffectiveOpCode(), instr.hex);
+ instr.opcode.Value().EffectiveOpCode(), instr.hex);
}
}
@@ -786,7 +790,7 @@ CompiledShader* JitCompiler::Compile() {
}
JitCompiler::JitCompiler() {
- AllocCodeSpace(1024 * 1024 * 4);
+ AllocCodeSpace(jit_cache_size);
}
void JitCompiler::Clear() {
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 5ad2d9606..5357c964b 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -19,6 +19,11 @@ namespace Pica {
namespace Shader {
+/// Memory needed to be available to compile the next shader (otherwise, clear the cache)
+constexpr size_t jit_shader_size = 1024 * 512;
+/// Memory allocated for the JIT code space cache
+constexpr size_t jit_cache_size = 1024 * 1024 * 8;
+
using CompiledShader = void(void* registers);
/**