diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_processor.cpp | 5 | ||||
| -rw-r--r-- | src/video_core/pica.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/pica.h | 13 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 52 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 3 |
5 files changed, 39 insertions, 36 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index a78985510..6e9cb2586 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -235,7 +235,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { for (unsigned int index = 0; index < regs.num_vertices; ++index) { - unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; + // Indexed rendering doesn't use the start offset + unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : (index + regs.vertex_offset); // -1 is a common special value used for primitive restart. Since it's unknown if // the PICA supports it, and it would mess up the caching, guard against it here. @@ -289,7 +290,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { *(float*)srcdata; input.attr[i][comp] = float24::FromFloat32(srcval); - LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f", + LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", comp, i, vertex, index, attribute_config.GetPhysicalBaseAddress(), vertex_attribute_sources[i] - base_address, diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index c73a8178e..61983bc6c 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -49,11 +49,13 @@ std::string Regs::GetCommandName(int index) { ADD_FIELD(vertex_attributes); ADD_FIELD(index_array); ADD_FIELD(num_vertices); + ADD_FIELD(vertex_offset); ADD_FIELD(trigger_draw); ADD_FIELD(trigger_draw_indexed); ADD_FIELD(vs_default_attributes_setup); ADD_FIELD(command_buffer); ADD_FIELD(triangle_topology); + ADD_FIELD(restart_primitive); ADD_FIELD(gs.bool_uniforms); ADD_FIELD(gs.int_uniforms); ADD_FIELD(gs.main_offset); diff --git a/src/video_core/pica.h b/src/video_core/pica.h index f40684d83..855cb442e 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -769,7 +769,12 @@ struct Regs { // Number of vertices to render u32 num_vertices; - INSERT_PADDING_WORDS(0x5); + INSERT_PADDING_WORDS(0x1); + + // The index of the first vertex to render + u32 vertex_offset; + + INSERT_PADDING_WORDS(0x3); // These two trigger rendering of triangles u32 trigger_draw; @@ -823,7 +828,9 @@ struct Regs { BitField<8, 2, TriangleTopology> triangle_topology; - INSERT_PADDING_WORDS(0x21); + u32 restart_primitive; + + INSERT_PADDING_WORDS(0x20); struct ShaderConfig { BitField<0, 16, u32> bool_uniforms; @@ -992,11 +999,13 @@ ASSERT_REG_POSITION(framebuffer, 0x110); ASSERT_REG_POSITION(vertex_attributes, 0x200); ASSERT_REG_POSITION(index_array, 0x227); ASSERT_REG_POSITION(num_vertices, 0x228); +ASSERT_REG_POSITION(vertex_offset, 0x22a); ASSERT_REG_POSITION(trigger_draw, 0x22e); ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); ASSERT_REG_POSITION(command_buffer, 0x238); ASSERT_REG_POSITION(triangle_topology, 0x25e); +ASSERT_REG_POSITION(restart_primitive, 0x25f); ASSERT_REG_POSITION(gs, 0x280); ASSERT_REG_POSITION(vs, 0x2b0); diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index d3cfe109e..b9a0b19e3 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -122,6 +122,14 @@ static const X64Reg ONE = XMM14; /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR static const X64Reg NEGBIT = XMM15; +// State registers that must not be modified by external functions calls +// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed +static const BitSet32 persistent_regs = { + UNIFORMS, REGISTERS, // Pointers to register blocks + ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers + ONE+16, NEGBIT+16, // Constants +}; + /// Raw constant for the source register selector that indicates no swizzling is performed static const u8 NO_SRC_REG_SWIZZLE = 0x1b; /// Raw constant for the destination register enable mask that indicates all components are enabled @@ -295,20 +303,8 @@ void JitCompiler::Compile_UniformCondition(Instruction instr) { CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); } -void JitCompiler::Compile_PushCallerSavedXMM() { -#ifndef _WIN32 - SUB(64, R(RSP), Imm8(2 * 16)); - MOVUPS(MDisp(RSP, 16), ONE); - MOVUPS(MDisp(RSP, 0), NEGBIT); -#endif -} - -void JitCompiler::Compile_PopCallerSavedXMM() { -#ifndef _WIN32 - MOVUPS(NEGBIT, MDisp(RSP, 0)); - MOVUPS(ONE, MDisp(RSP, 16)); - ADD(64, R(RSP), Imm8(2 * 16)); -#endif +BitSet32 JitCompiler::PersistentCallerSavedRegs() { + return persistent_regs & ABI_ALL_CALLER_SAVED; } void JitCompiler::Compile_ADD(Instruction instr) { @@ -390,12 +386,9 @@ void JitCompiler::Compile_EX2(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); MOVSS(XMM0, R(SRC1)); - // The following will actually break the stack alignment - ABI_PushAllCallerSavedRegsAndAdjustStack(); - Compile_PushCallerSavedXMM(); + ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); ABI_CallFunction(reinterpret_cast<const void*>(exp2f)); - Compile_PopCallerSavedXMM(); - ABI_PopAllCallerSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); MOVAPS(SRC1, R(XMM0)); @@ -406,12 +399,9 @@ void JitCompiler::Compile_LG2(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); MOVSS(XMM0, R(SRC1)); - // The following will actually break the stack alignment - ABI_PushAllCallerSavedRegsAndAdjustStack(); - Compile_PushCallerSavedXMM(); + ABI_PushRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); ABI_CallFunction(reinterpret_cast<const void*>(log2f)); - Compile_PopCallerSavedXMM(); - ABI_PopAllCallerSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(PersistentCallerSavedRegs(), 0); SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(0, 0, 0, 0)); MOVAPS(SRC1, R(XMM0)); @@ -434,10 +424,10 @@ void JitCompiler::Compile_SGE(Instruction instr) { Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); } - CMPPS(SRC1, R(SRC2), CMP_NLT); - ANDPS(SRC1, R(ONE)); + CMPPS(SRC2, R(SRC1), CMP_LE); + ANDPS(SRC2, R(ONE)); - Compile_DestEnable(instr, SRC1); + Compile_DestEnable(instr, SRC2); } void JitCompiler::Compile_SLT(Instruction instr) { @@ -560,7 +550,7 @@ void JitCompiler::Compile_NOP(Instruction instr) { } void JitCompiler::Compile_END(Instruction instr) { - ABI_PopAllCalleeSavedRegsAndAdjustStack(); + ABI_PopRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); RET(); } @@ -746,7 +736,8 @@ void JitCompiler::Compile_NextInstr(unsigned* offset) { ((*this).*instr_func)(instr); } else { // Unhandled instruction - LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", instr.opcode.Value(), instr.hex); + LOG_CRITICAL(HW_GPU, "Unhandled instruction: 0x%02x (0x%08x)", + instr.opcode.Value().EffectiveOpCode(), instr.hex); } } @@ -755,7 +746,8 @@ CompiledShader* JitCompiler::Compile() { const auto& code = g_state.vs.program_code; unsigned offset = g_state.regs.vs.main_offset; - ABI_PushAllCalleeSavedRegsAndAdjustStack(); + // The stack pointer is 8 modulo 16 at the entry of a procedure + ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 58828ecc8..8668cfff4 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -77,8 +77,7 @@ private: void Compile_EvaluateCondition(Instruction instr); void Compile_UniformCondition(Instruction instr); - void Compile_PushCallerSavedXMM(); - void Compile_PopCallerSavedXMM(); + BitSet32 PersistentCallerSavedRegs(); /// Pointer to the variable that stores the current Pica code offset. Used to handle nested code blocks. unsigned* offset_ptr = nullptr; |
