diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 9 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 32 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 4 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 2 |
5 files changed, 45 insertions, 18 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 59f54236b..5e8930476 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -14,6 +14,7 @@ #include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/video_core.h" #include "shader.h" @@ -134,16 +135,18 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); } - LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", + LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " + "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), - ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); + ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), + ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); return ret; } -DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { +DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { UnitState<true> state; state.program_counter = config.main_offset; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 1c6fa592c..7af8f1fa1 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -37,17 +37,19 @@ struct OutputVertex { Math::Vec4<float24> color; Math::Vec2<float24> tc0; Math::Vec2<float24> tc1; - float24 pad[6]; + INSERT_PADDING_WORDS(2); + Math::Vec3<float24> view; + INSERT_PADDING_WORDS(1); Math::Vec2<float24> tc2; // Padding for optimal alignment - float24 pad2[4]; + INSERT_PADDING_WORDS(4); // Attributes used to store intermediate results // position after perspective divide Math::Vec3<float24> screenpos; - float24 pad3; + INSERT_PADDING_WORDS(1); // Linear interpolation // factor: 0=this, 1=vtx @@ -75,6 +77,22 @@ struct OutputVertex { static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); +/// Vertex shader memory +struct ShaderSetup { + struct { + // The float uniforms are accessed by the shader JIT using SSE instructions, and are + // therefore required to be 16-byte aligned. + alignas(16) Math::Vec4<float24> f[96]; + + std::array<bool, 16> b; + std::array<Math::Vec4<u8>, 4> i; + } uniforms; + + Math::Vec4<float24> default_attributes[16]; + + std::array<u32, 1024> program_code; + std::array<u32, 1024> swizzle_data; +}; // Helper structure used to keep track of data useful for inspection of shader emulation template<bool full_debugging> @@ -258,9 +276,9 @@ struct UnitState { struct Registers { // The registers are accessed by the shader JIT using SSE instructions, and are therefore // required to be 16-byte aligned. - Math::Vec4<float24> MEMORY_ALIGNED16(input[16]); - Math::Vec4<float24> MEMORY_ALIGNED16(output[16]); - Math::Vec4<float24> MEMORY_ALIGNED16(temporary[16]); + alignas(16) Math::Vec4<float24> input[16]; + alignas(16) Math::Vec4<float24> output[16]; + alignas(16) Math::Vec4<float24> temporary[16]; } registers; static_assert(std::is_pod<Registers>::value, "Structure is not POD"); @@ -345,7 +363,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr * @param setup Setup object for the shader pipeline * @return Debug information for this shader with regards to the given vertex */ -DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); +DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); } // namespace Shader diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 7b0c20b74..79fcc56b9 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -7,6 +7,7 @@ #include <nihstro/shader_bytecode.h> #include "video_core/pica.h" +#include "video_core/pica_state.h" #include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" @@ -515,7 +516,8 @@ void RunInterpreter(UnitState<Debug>& state) { case OpCode::Id::JMPU: Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); - if (uniforms.b[instr.flow_control.bool_uniform_id]) { + + if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { state.program_counter = instr.flow_control.dest_offset - 1; } break; diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 00415e402..5083d7e54 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -11,6 +11,8 @@ #include "shader.h" #include "shader_jit_x64.h" +#include "video_core/pica_state.h" + namespace Pica { namespace Shader { @@ -653,7 +655,7 @@ void JitCompiler::Compile_IF(Instruction instr) { FixupBranch b = J_CC(CC_Z, true); // Compile the code that corresponds to the condition evaluating as true - Compile_Block(instr.flow_control.dest_offset - 1); + Compile_Block(instr.flow_control.dest_offset); // If there isn't an "ELSE" condition, we are done here if (instr.flow_control.num_instructions == 0) { @@ -667,7 +669,7 @@ void JitCompiler::Compile_IF(Instruction instr) { // This code corresponds to the "ELSE" condition // Comple the code that corresponds to the condition evaluating as false - Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions - 1); + Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions); SetJumpTarget(b2); } @@ -691,7 +693,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) { auto loop_start = GetCodePtr(); - Compile_Block(instr.flow_control.dest_offset); + Compile_Block(instr.flow_control.dest_offset + 1); ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1 @@ -710,19 +712,21 @@ void JitCompiler::Compile_JMP(Instruction instr) { else UNREACHABLE(); - FixupBranch b = J_CC(CC_NZ, true); + bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) && + (instr.flow_control.num_instructions & 1); + FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true); Compile_Block(instr.flow_control.dest_offset); SetJumpTarget(b); } -void JitCompiler::Compile_Block(unsigned stop) { +void JitCompiler::Compile_Block(unsigned end) { // Save current offset pointer unsigned* prev_offset_ptr = offset_ptr; unsigned offset = *prev_offset_ptr; - while (offset <= stop) + while (offset < end) Compile_NextInstr(&offset); // Restore current offset pointer diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 3afbceccf..5ad2d9606 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -61,7 +61,7 @@ public: void Compile_MAD(Instruction instr); private: - void Compile_Block(unsigned stop); + void Compile_Block(unsigned end); void Compile_NextInstr(unsigned* offset); void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest); |
