aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp5
-rw-r--r--src/video_core/pica.cpp2
-rw-r--r--src/video_core/pica.h13
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp52
-rw-r--r--src/video_core/shader/shader_jit_x64.h3
5 files changed, 39 insertions, 36 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index a78985510..6e9cb2586 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -235,7 +235,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
for (unsigned int index = 0; index < regs.num_vertices; ++index)
{
- unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
+ // Indexed rendering doesn't use the start offset
+ unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : (index + regs.vertex_offset);
// -1 is a common special value used for primitive restart. Since it's unknown if
// the PICA supports it, and it would mess up the caching, guard against it here.
@@ -289,7 +290,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
*(float*)srcdata;
input.attr[i][comp] = float24::FromFloat32(srcval);
- LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
+ LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f",
comp, i, vertex, index,
attribute_config.GetPhysicalBaseAddress(),
vertex_attribute_sources[i] - base_address,
diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp
index c73a8178e..61983bc6c 100644
--- a/src/video_core/pica.cpp
+++ b/src/video_core/pica.cpp
@@ -49,11 +49,13 @@ std::string Regs::GetCommandName(int index) {
ADD_FIELD(vertex_attributes);
ADD_FIELD(index_array);
ADD_FIELD(num_vertices);
+ ADD_FIELD(vertex_offset);
ADD_FIELD(trigger_draw);
ADD_FIELD(trigger_draw_indexed);
ADD_FIELD(vs_default_attributes_setup);
ADD_FIELD(command_buffer);
ADD_FIELD(triangle_topology);
+ ADD_FIELD(restart_primitive);
ADD_FIELD(gs.bool_uniforms);
ADD_FIELD(gs.int_uniforms);
ADD_FIELD(gs.main_offset);
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index f40684d83..855cb442e 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -769,7 +769,12 @@ struct Regs {
// Number of vertices to render
u32 num_vertices;
- INSERT_PADDING_WORDS(0x5);
+ INSERT_PADDING_WORDS(0x1);
+
+ // The index of the first vertex to render
+ u32 vertex_offset;
+
+ INSERT_PADDING_WORDS(0x3);
// These two trigger rendering of triangles
u32 trigger_draw;
@@ -823,7 +828,9 @@ struct Regs {
BitField<8, 2, TriangleTopology> triangle_topology;
- INSERT_PADDING_WORDS(0x21);
+ u32 restart_primitive;
+
+ INSERT_PADDING_WORDS(0x20);
struct ShaderConfig {
BitField<0, 16, u32> bool_uniforms;
@@ -992,11 +999,13 @@ ASSERT_REG_POSITION(framebuffer, 0x110);
ASSERT_REG_POSITION(vertex_attributes, 0x200);
ASSERT_REG_POSITION(index_array, 0x227);
ASSERT_REG_POSITION(num_vertices, 0x228);
+ASSERT_REG_POSITION(vertex_offset, 0x22a);
ASSERT_REG_POSITION(trigger_draw, 0x22e);
ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f);
ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232);
ASSERT_REG_POSITION(command_buffer, 0x238);
ASSERT_REG_POSITION(triangle_topology, 0x25e);
+ASSERT_REG_POSITION(restart_primitive, 0x25f);
ASSERT_REG_POSITION(gs, 0x280);
ASSERT_REG_POSITION(vs, 0x2b0);
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index d3cfe109e..b9a0b19e3 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -122,6 +122,14 @@ static const X64Reg ONE = XMM14;
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
static const X64Reg NEGBIT = XMM15;
+// State registers that must not be modified by external functions calls
+// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
+static const BitSet32 persistent_regs = {
+ UNIFORMS, REGISTERS, // Pointers to register blocks
+ ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
+ ONE+16, NEGBIT+16, // Constants
+};
+
/// Raw constant for the source register selector that indicates no swizzling is performed
static const u8 NO_SRC_REG_SWIZZLE = 0x1b;
/// Raw constant for the destination register enable mask that indicates all components are enabled
@@ -295,20 +303,8 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) {
CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
}
-void JitCompiler::Compile_PushCallerSavedXMM() {
-#ifndef _WIN32
- SUB(64, R(RSP), Imm8(2 * 16));
- MOVUPS(MDisp(RSP, 16), ONE);
- MOVUPS(MDisp(RSP, 0), NEGBIT);
-#endif
-}
-
-void JitCompiler::Compile_PopCallerSavedXMM() {
-#ifndef _WIN32
- MOVUPS(NEGBIT, MDisp(RSP, 0));
- MOVUPS(ONE, MDisp(RSP, 16));
- ADD(64, R(RSP), Imm8(2 * 16));
-#endif
+BitSet32 JitCompiler::PersistentCallerSavedRegs() {
+ return persistent_regs & ABI_ALL_CALLER_SAVED;
}
void JitCompiler::Compile_ADD(Instruction instr) {
@@ -390,12 +386,9 @@ void JitCompiler::Compile_EX2(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
MOVSS(XMM0, R(SRC1));
- // The following will actually break the stack alignment
- ABI_PushAllCallerSavedRegsAndAdjustStack();
- Compile_PushCallerSavedXMM();
+ ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
ABI_CallFunction(reinterpret_cast<const void*>(exp2f));
- Compile_PopCallerSavedXMM();
- ABI_PopAllCallerSavedRegsAndAdjustStack();
+ ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
MOVAPS(SRC1, R(XMM0));
@@ -406,12 +399,9 @@ void JitCompiler::Compile_LG2(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
MOVSS(XMM0, R(SRC1));
- // The following will actually break the stack alignment
- ABI_PushAllCallerSavedRegsAndAdjustStack();
- Compile_PushCallerSavedXMM();
+ ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
ABI_CallFunction(reinterpret_cast<const void*>(log2f));
- Compile_PopCallerSavedXMM();
- ABI_PopAllCallerSavedRegsAndAdjustStack();
+ ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0);
SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0));
MOVAPS(SRC1, R(XMM0));
@@ -434,10 +424,10 @@ void JitCompiler::Compile_SGE(Instruction instr) {
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
}
- CMPPS(SRC1, R(SRC2), CMP_NLT);
- ANDPS(SRC1, R(ONE));
+ CMPPS(SRC2, R(SRC1), CMP_LE);
+ ANDPS(SRC2, R(ONE));
- Compile_DestEnable(instr, SRC1);
+ Compile_DestEnable(instr, SRC2);
}
void JitCompiler::Compile_SLT(Instruction instr) {
@@ -560,7 +550,7 @@ void JitCompiler::Compile_NOP(Instruction instr) {
}
void JitCompiler::Compile_END(Instruction instr) {
- ABI_PopAllCalleeSavedRegsAndAdjustStack();
+ ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
RET();
}
@@ -746,7 +736,8 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) {
((*this).*instr_func)(instr);
} else {
// Unhandled instruction
- LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", instr.opcode.Value(), instr.hex);
+ LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)",
+ instr.opcode.Value().EffectiveOpCode(), instr.hex);
}
}
@@ -755,7 +746,8 @@ CompiledShader* JitCompiler::Compile() {
const auto& code = g_state.vs.program_code;
unsigned offset = g_state.regs.vs.main_offset;
- ABI_PushAllCalleeSavedRegsAndAdjustStack();
+ // The stack pointer is 8 modulo 16 at the entry of a procedure
+ ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 58828ecc8..8668cfff4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -77,8 +77,7 @@ private:
void Compile_EvaluateCondition(Instruction instr);
void Compile_UniformCondition(Instruction instr);
- void Compile_PushCallerSavedXMM();
- void Compile_PopCallerSavedXMM();
+ BitSet32 PersistentCallerSavedRegs();
/// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks.
unsigned* offset_ptr = nullptr;