aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/shader
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/shader')
-rw-r--r--src/video_core/shader/shader.cpp9
-rw-r--r--src/video_core/shader/shader.h32
-rw-r--r--src/video_core/shader/shader_interpreter.cpp4
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp16
-rw-r--r--src/video_core/shader/shader_jit_x64.h2
5 files changed, 45 insertions, 18 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index 59f54236b..5e8930476 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -14,6 +14,7 @@
#include "video_core/debug_utils/debug_utils.h"
#include "video_core/pica.h"
+#include "video_core/pica_state.h"
#include "video_core/video_core.h"
#include "shader.h"
@@ -134,16 +135,18 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
}
- LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
+ LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
+ "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
- ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
+ ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
+ ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
return ret;
}
-DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) {
+DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
UnitState<true> state;
state.program_counter = config.main_offset;
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 1c6fa592c..7af8f1fa1 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -37,17 +37,19 @@ struct OutputVertex {
Math::Vec4<float24> color;
Math::Vec2<float24> tc0;
Math::Vec2<float24> tc1;
- float24 pad[6];
+ INSERT_PADDING_WORDS(2);
+ Math::Vec3<float24> view;
+ INSERT_PADDING_WORDS(1);
Math::Vec2<float24> tc2;
// Padding for optimal alignment
- float24 pad2[4];
+ INSERT_PADDING_WORDS(4);
// Attributes used to store intermediate results
// position after perspective divide
Math::Vec3<float24> screenpos;
- float24 pad3;
+ INSERT_PADDING_WORDS(1);
// Linear interpolation
// factor: 0=this, 1=vtx
@@ -75,6 +77,22 @@ struct OutputVertex {
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
+/// Vertex shader memory
+struct ShaderSetup {
+ struct {
+ // The float uniforms are accessed by the shader JIT using SSE instructions, and are
+ // therefore required to be 16-byte aligned.
+ alignas(16) Math::Vec4<float24> f[96];
+
+ std::array<bool, 16> b;
+ std::array<Math::Vec4<u8>, 4> i;
+ } uniforms;
+
+ Math::Vec4<float24> default_attributes[16];
+
+ std::array<u32, 1024> program_code;
+ std::array<u32, 1024> swizzle_data;
+};
// Helper structure used to keep track of data useful for inspection of shader emulation
template<bool full_debugging>
@@ -258,9 +276,9 @@ struct UnitState {
struct Registers {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
// required to be 16-byte aligned.
- Math::Vec4<float24> MEMORY_ALIGNED16(input[16]);
- Math::Vec4<float24> MEMORY_ALIGNED16(output[16]);
- Math::Vec4<float24> MEMORY_ALIGNED16(temporary[16]);
+ alignas(16) Math::Vec4<float24> input[16];
+ alignas(16) Math::Vec4<float24> output[16];
+ alignas(16) Math::Vec4<float24> temporary[16];
} registers;
static_assert(std::is_pod<Registers>::value, "Structure is not POD");
@@ -345,7 +363,7 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
* @param setup Setup object for the shader pipeline
* @return Debug information for this shader with regards to the given vertex
*/
-DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup);
+DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup);
} // namespace Shader
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 7b0c20b74..79fcc56b9 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -7,6 +7,7 @@
#include <nihstro/shader_bytecode.h>
#include "video_core/pica.h"
+#include "video_core/pica_state.h"
#include "video_core/shader/shader.h"
#include "video_core/shader/shader_interpreter.h"
@@ -515,7 +516,8 @@ void RunInterpreter(UnitState<Debug>& state) {
case OpCode::Id::JMPU:
Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]);
- if (uniforms.b[instr.flow_control.bool_uniform_id]) {
+
+ if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) {
state.program_counter = instr.flow_control.dest_offset - 1;
}
break;
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 00415e402..5083d7e54 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -11,6 +11,8 @@
#include "shader.h"
#include "shader_jit_x64.h"
+#include "video_core/pica_state.h"
+
namespace Pica {
namespace Shader {
@@ -653,7 +655,7 @@ void JitCompiler::Compile_IF(Instruction instr) {
FixupBranch b = J_CC(CC_Z, true);
// Compile the code that corresponds to the condition evaluating as true
- Compile_Block(instr.flow_control.dest_offset - 1);
+ Compile_Block(instr.flow_control.dest_offset);
// If there isn't an "ELSE" condition, we are done here
if (instr.flow_control.num_instructions == 0) {
@@ -667,7 +669,7 @@ void JitCompiler::Compile_IF(Instruction instr) {
// This code corresponds to the "ELSE" condition
// Comple the code that corresponds to the condition evaluating as false
- Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions - 1);
+ Compile_Block(instr.flow_control.dest_offset + instr.flow_control.num_instructions);
SetJumpTarget(b2);
}
@@ -691,7 +693,7 @@ void JitCompiler::Compile_LOOP(Instruction instr) {
auto loop_start = GetCodePtr();
- Compile_Block(instr.flow_control.dest_offset);
+ Compile_Block(instr.flow_control.dest_offset + 1);
ADD(32, R(LOOPCOUNT_REG), R(LOOPINC)); // Increment LOOPCOUNT_REG by Z-component
SUB(32, R(LOOPCOUNT), Imm8(1)); // Increment loop count by 1
@@ -710,19 +712,21 @@ void JitCompiler::Compile_JMP(Instruction instr) {
else
UNREACHABLE();
- FixupBranch b = J_CC(CC_NZ, true);
+ bool inverted_condition = (instr.opcode.Value() == OpCode::Id::JMPU) &&
+ (instr.flow_control.num_instructions & 1);
+ FixupBranch b = J_CC(inverted_condition ? CC_Z : CC_NZ, true);
Compile_Block(instr.flow_control.dest_offset);
SetJumpTarget(b);
}
-void JitCompiler::Compile_Block(unsigned stop) {
+void JitCompiler::Compile_Block(unsigned end) {
// Save current offset pointer
unsigned* prev_offset_ptr = offset_ptr;
unsigned offset = *prev_offset_ptr;
- while (offset <= stop)
+ while (offset < end)
Compile_NextInstr(&offset);
// Restore current offset pointer
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 3afbceccf..5ad2d9606 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -61,7 +61,7 @@ public:
void Compile_MAD(Instruction instr);
private:
- void Compile_Block(unsigned stop);
+ void Compile_Block(unsigned end);
void Compile_NextInstr(unsigned* offset);
void Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRegister src_reg, Gen::X64Reg dest);