From 4d4572c697616c43ce47f43fc5de1a1b9ae27d5f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sun, 7 Dec 2014 22:22:04 +0100 Subject: Integrate Boost into build system and perform a trivial cleanup in vertex_shader.cpp. --- src/video_core/vertex_shader.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 96625791c..0dff11a0f 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -2,11 +2,16 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include + +#include + +#include + +#include "debug_utils/debug_utils.h" + #include "pica.h" #include "vertex_shader.h" -#include "debug_utils/debug_utils.h" -#include -#include namespace Pica { @@ -238,7 +243,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) // Setup input register table const auto& attribute_register_map = registers.vs_input_register_map; float24 dummy_register; - std::fill(&state.input_register_table[0], &state.input_register_table[16], &dummy_register); + boost::fill(state.input_register_table, &dummy_register); if(num_attributes > 0) state.input_register_table[attribute_register_map.attribute0_register] = &input.attr[0].x; if(num_attributes > 1) state.input_register_table[attribute_register_map.attribute1_register] = &input.attr[1].x; if(num_attributes > 2) state.input_register_table[attribute_register_map.attribute2_register] = &input.attr[2].x; @@ -272,8 +277,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.status_registers[0] = false; state.status_registers[1] = false; - std::fill(state.call_stack, state.call_stack + sizeof(state.call_stack) / sizeof(state.call_stack[0]), - VertexShaderState::INVALID_ADDRESS); + boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); state.call_stack_pointer = &state.call_stack[0]; ProcessShaderCode(state); -- cgit v1.2.3 From 0600e2d8b5b30bd68c8b19cb1f2051e096e7caa9 Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Fri, 5 Dec 2014 23:53:49 -0200 Subject: Convert old logging calls to new logging macros --- src/video_core/vertex_shader.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 0dff11a0f..477e78cfe 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -206,7 +206,7 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::CALL: increment_pc = false; - _dbg_assert_(GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); + _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); *++state.call_stack_pointer = state.program_counter - shader_memory; // TODO: Does this offset refer to the beginning of shader memory? @@ -218,7 +218,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; default: - ERROR_LOG(GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", + LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); break; } @@ -285,7 +285,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.debug.max_opdesc_id, registers.vs_main_offset, registers.vs_output_attributes); - DEBUG_LOG(GPU, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", + LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)", ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32()); -- cgit v1.2.3 From 8ce1d324602001e1102648319a9281ee08a1af95 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 00:32:49 +0100 Subject: Pica/VertexShader: Remove (now) duplicated shader bytecode definitions in favor of nihstro's ones. --- src/video_core/vertex_shader.cpp | 43 ++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 13 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 477e78cfe..064a703eb 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -8,11 +8,18 @@ #include +#include + #include "debug_utils/debug_utils.h" #include "pica.h" #include "vertex_shader.h" +using nihstro::Instruction; +using nihstro::RegisterType; +using nihstro::SourceRegister; +using nihstro::SwizzlePattern; + namespace Pica { namespace VertexShader { @@ -70,19 +77,28 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); - const float24* src1_ = (instr.common.src1 < 0x10) ? state.input_register_table[instr.common.src1.GetIndex()] - : (instr.common.src1 < 0x20) ? &state.temporary_registers[instr.common.src1.GetIndex()].x - : (instr.common.src1 < 0x80) ? &shader_uniforms.f[instr.common.src1.GetIndex()].x - : nullptr; - const float24* src2_ = (instr.common.src2 < 0x10) ? state.input_register_table[instr.common.src2.GetIndex()] - : &state.temporary_registers[instr.common.src2.GetIndex()].x; + auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { + switch (source_reg.GetRegisterType()) { + case RegisterType::Input: + return state.input_register_table[source_reg.GetIndex()]; + + case RegisterType::Temporary: + return &state.temporary_registers[source_reg.GetIndex()].x; + + case RegisterType::FloatUniform: + return &shader_uniforms.f[source_reg.GetIndex()].x; + } + }; + bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] : (instr.common.dest < 0x10) ? nullptr : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] : nullptr; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - const bool negate_src1 = (swizzle.negate != 0); + const bool negate_src1 = (swizzle.negate_src1 != 0); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -192,7 +208,9 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - case Instruction::OpCode::RET: + // NOP is currently used as a heuristic for leaving from a function. + // TODO: This is completely incorrect. + case Instruction::OpCode::NOP: if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { exit_loop = true; } else { @@ -209,17 +227,16 @@ static void ProcessShaderCode(VertexShaderState& state) { _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); *++state.call_stack_pointer = state.program_counter - shader_memory; - // TODO: Does this offset refer to the beginning of shader memory? - state.program_counter = &shader_memory[instr.flow_control.offset_words]; + state.program_counter = &shader_memory[instr.flow_control.dest_offset]; break; - case Instruction::OpCode::FLS: - // TODO: Do whatever needs to be done here? + case Instruction::OpCode::END: + // TODO break; default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", - (int)instr.opcode.Value(), instr.GetOpCodeName().c_str(), instr.hex); + (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); break; } -- cgit v1.2.3 From ce36ad454ecd4707a77916fdb79954c8924b50ee Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 17:55:43 +0100 Subject: Pica/VertexShader: Support negating src2. --- src/video_core/vertex_shader.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 064a703eb..c5c5261fe 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -10,10 +10,10 @@ #include -#include "debug_utils/debug_utils.h" #include "pica.h" #include "vertex_shader.h" +#include "debug_utils/debug_utils.h" using nihstro::Instruction; using nihstro::RegisterType; @@ -99,6 +99,7 @@ static void ProcessShaderCode(VertexShaderState& state) { const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; const bool negate_src1 = (swizzle.negate_src1 != 0); + const bool negate_src2 = (swizzle.negate_src2 != 0); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -112,12 +113,18 @@ static void ProcessShaderCode(VertexShaderState& state) { src1[2] = src1[2] * float24::FromFloat32(-1); src1[3] = src1[3] * float24::FromFloat32(-1); } - const float24 src2[4] = { + float24 src2[4] = { src2_[(int)swizzle.GetSelectorSrc2(0)], src2_[(int)swizzle.GetSelectorSrc2(1)], src2_[(int)swizzle.GetSelectorSrc2(2)], src2_[(int)swizzle.GetSelectorSrc2(3)], }; + if (negate_src2) { + src2[0] = src2[0] * float24::FromFloat32(-1); + src2[1] = src2[1] * float24::FromFloat32(-1); + src2[2] = src2[2] * float24::FromFloat32(-1); + src2[3] = src2[3] * float24::FromFloat32(-1); + } switch (instr.opcode) { case Instruction::OpCode::ADD: -- cgit v1.2.3 From b85524c760989f3d053d05df6b244b28252b2f4e Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Tue, 16 Dec 2014 01:20:29 +0100 Subject: Pica/VertexShader: Some cleanups using std::array. --- src/video_core/vertex_shader.cpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index c5c5261fe..c98c625c2 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -31,8 +31,8 @@ static struct { // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! // For now, we just keep these local arrays around. -static u32 shader_memory[1024]; -static u32 swizzle_data[1024]; +static std::array shader_memory; +static std::array swizzle_data; void SubmitShaderMemoryChange(u32 addr, u32 value) { @@ -49,6 +49,17 @@ Math::Vec4& GetFloatUniform(u32 index) return shader_uniforms.f[index]; } +const std::array& GetShaderBinary() +{ + return shader_memory; +} + +const std::array& GetSwizzlePatterns() +{ + return swizzle_data; +} + + struct VertexShaderState { u32* program_counter; @@ -75,7 +86,7 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; - state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory)); + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -233,7 +244,7 @@ static void ProcessShaderCode(VertexShaderState& state) { _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); - *++state.call_stack_pointer = state.program_counter - shader_memory; + *++state.call_stack_pointer = state.program_counter - shader_memory.data(); state.program_counter = &shader_memory[instr.flow_control.dest_offset]; break; @@ -305,7 +316,7 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.call_stack_pointer = &state.call_stack[0]; ProcessShaderCode(state); - DebugUtils::DumpShader(shader_memory, state.debug.max_offset, swizzle_data, + DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), state.debug.max_opdesc_id, registers.vs_main_offset, registers.vs_output_attributes); -- cgit v1.2.3 From cb1804e0aba48826d671afb0500ae5eaeebd5c5a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 18:31:37 +0100 Subject: Pica/VertexShader: Move code around a bit. --- src/video_core/vertex_shader.cpp | 100 +++++++++++++++++++++++---------------- 1 file changed, 58 insertions(+), 42 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index c98c625c2..33a862b74 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -86,6 +86,8 @@ static void ProcessShaderCode(VertexShaderState& state) { bool increment_pc = true; bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; + const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; + state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { @@ -100,47 +102,52 @@ static void ProcessShaderCode(VertexShaderState& state) { return &shader_uniforms.f[source_reg.GetIndex()].x; } }; - bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); - const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); - float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] - : (instr.common.dest < 0x10) ? nullptr - : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] - : nullptr; - const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - const bool negate_src1 = (swizzle.negate_src1 != 0); - const bool negate_src2 = (swizzle.negate_src2 != 0); - - float24 src1[4] = { - src1_[(int)swizzle.GetSelectorSrc1(0)], - src1_[(int)swizzle.GetSelectorSrc1(1)], - src1_[(int)swizzle.GetSelectorSrc1(2)], - src1_[(int)swizzle.GetSelectorSrc1(3)], - }; - if (negate_src1) { - src1[0] = src1[0] * float24::FromFloat32(-1); - src1[1] = src1[1] * float24::FromFloat32(-1); - src1[2] = src1[2] * float24::FromFloat32(-1); - src1[3] = src1[3] * float24::FromFloat32(-1); - } - float24 src2[4] = { - src2_[(int)swizzle.GetSelectorSrc2(0)], - src2_[(int)swizzle.GetSelectorSrc2(1)], - src2_[(int)swizzle.GetSelectorSrc2(2)], - src2_[(int)swizzle.GetSelectorSrc2(3)], - }; - if (negate_src2) { - src2[0] = src2[0] * float24::FromFloat32(-1); - src2[1] = src2[1] * float24::FromFloat32(-1); - src2[2] = src2[2] * float24::FromFloat32(-1); - src2[3] = src2[3] * float24::FromFloat32(-1); - } + switch (instr.opcode.GetInfo().type) { + case Instruction::OpCodeType::Arithmetic: + { + bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); + + const bool negate_src1 = (swizzle.negate_src1 != 0); + const bool negate_src2 = (swizzle.negate_src2 != 0); + + float24 src1[4] = { + src1_[(int)swizzle.GetSelectorSrc1(0)], + src1_[(int)swizzle.GetSelectorSrc1(1)], + src1_[(int)swizzle.GetSelectorSrc1(2)], + src1_[(int)swizzle.GetSelectorSrc1(3)], + }; + if (negate_src1) { + src1[0] = src1[0] * float24::FromFloat32(-1); + src1[1] = src1[1] * float24::FromFloat32(-1); + src1[2] = src1[2] * float24::FromFloat32(-1); + src1[3] = src1[3] * float24::FromFloat32(-1); + } + float24 src2[4] = { + src2_[(int)swizzle.GetSelectorSrc2(0)], + src2_[(int)swizzle.GetSelectorSrc2(1)], + src2_[(int)swizzle.GetSelectorSrc2(2)], + src2_[(int)swizzle.GetSelectorSrc2(3)], + }; + if (negate_src2) { + src2[0] = src2[0] * float24::FromFloat32(-1); + src2[1] = src2[1] * float24::FromFloat32(-1); + src2[2] = src2[2] * float24::FromFloat32(-1); + src2[3] = src2[3] * float24::FromFloat32(-1); + } + + float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] + : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] + : nullptr; + + state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); - switch (instr.opcode) { + switch (instr.opcode) { case Instruction::OpCode::ADD: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -153,7 +160,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MUL: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -167,7 +173,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); float24 dot = float24::FromFloat32(0.f); int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4; for (int i = 0; i < num_components; ++i) @@ -185,7 +190,6 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal case Instruction::OpCode::RCP: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -201,7 +205,6 @@ static void ProcessShaderCode(VertexShaderState& state) { // Reciprocal Square Root case Instruction::OpCode::RSQ: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -216,7 +219,6 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::MOV: { - state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -226,6 +228,17 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + default: + LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", + (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + break; + } + + break; + } + default: + // Process instruction explicitly + switch (instr.opcode) { // NOP is currently used as a heuristic for leaving from a function. // TODO: This is completely incorrect. case Instruction::OpCode::NOP: @@ -256,6 +269,9 @@ static void ProcessShaderCode(VertexShaderState& state) { LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); break; + } + + break; } if (increment_pc) -- cgit v1.2.3 From 67618a2c55e0b6860bbb083962cdd28a543bf82a Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 12 Dec 2014 22:50:09 +0100 Subject: Pica/VertexShader: Add support for MOVA, CMP and IFC. --- src/video_core/vertex_shader.cpp | 137 +++++++++++++++++++++++++++++++++++++-- 1 file changed, 130 insertions(+), 7 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 33a862b74..5d9203c86 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -59,6 +59,8 @@ const std::array& GetSwizzlePatterns() return swizzle_data; } +// TODO: Is there actually a limit on hardware? +const int if_stack_size = 8; struct VertexShaderState { u32* program_counter; @@ -67,7 +69,11 @@ struct VertexShaderState { float24* output_register_table[7*4]; Math::Vec4 temporary_registers[16]; - bool status_registers[2]; + bool conditional_code[2]; + + // Two Address registers and one loop counter + // TODO: How many bits do these actually have? + s32 address_registers[3]; enum { INVALID_ADDRESS = 0xFFFFFFFF @@ -75,6 +81,12 @@ struct VertexShaderState { u32 call_stack[8]; // TODO: What is the maximal call stack depth? u32* call_stack_pointer; + struct IfStackElement { + u32 else_addr; + u32 else_instructions; + } if_stack[if_stack_size]; + IfStackElement* if_stack_pointer; + struct { u32 max_offset; // maximum program counter ever reached u32 max_opdesc_id; // maximum swizzle pattern index ever used @@ -107,11 +119,20 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCodeType::Arithmetic: { bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); - const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted)); + if (is_inverted) { + // We don't really support this properly and/or reliably + LOG_ERROR(HW_GPU, "Bad condition..."); + exit(0); + } + + const int address_offset = (instr.common.address_register_index == 0) + ? 0 : state.address_registers[instr.common.address_register_index - 1]; + + const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); - const bool negate_src1 = (swizzle.negate_src1 != 0); - const bool negate_src2 = (swizzle.negate_src2 != 0); + const bool negate_src1 = (swizzle.negate_src1 != false); + const bool negate_src2 = (swizzle.negate_src2 != false); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -217,6 +238,19 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::MOVA: + { + for (int i = 0; i < 2; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + // TODO: Figure out how the rounding is done on hardware + state.address_registers[i] = static_cast(src1[i].ToFloat32()); + } + + break; + } + case Instruction::OpCode::MOV: { for (int i = 0; i < 4; ++i) { @@ -228,16 +262,56 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::CMP: + for (int i = 0; i < 2; ++i) { + // TODO: Can you restrict to one compare via dest masking? + + auto compare_op = instr.common.compare_op; + auto op = (i == 0) ? compare_op.x.Value() : compare_op.y.Value(); + + switch (op) { + case compare_op.Equal: + state.conditional_code[i] = (src1[i] == src2[i]); + break; + + case compare_op.NotEqual: + state.conditional_code[i] = (src1[i] != src2[i]); + break; + + case compare_op.LessThan: + state.conditional_code[i] = (src1[i] < src2[i]); + break; + + case compare_op.LessEqual: + state.conditional_code[i] = (src1[i] <= src2[i]); + break; + + case compare_op.GreaterThan: + state.conditional_code[i] = (src1[i] > src2[i]); + break; + + case compare_op.GreaterEqual: + state.conditional_code[i] = (src1[i] >= src2[i]); + break; + + default: + LOG_ERROR(HW_GPU, "Unknown compare mode %x", static_cast(op)); + break; + } + } + break; + default: LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); + _dbg_assert_(HW_GPU, 0); break; } break; } default: - // Process instruction explicitly + // Handle each instruction on its own switch (instr.opcode) { // NOP is currently used as a heuristic for leaving from a function. // TODO: This is completely incorrect. @@ -265,6 +339,44 @@ static void ProcessShaderCode(VertexShaderState& state) { // TODO break; + case Instruction::OpCode::IFC: + { + // TODO: Do we need to consider swizzlers here? + + auto flow_control = instr.flow_control; + bool results[3] = { flow_control.refx == state.conditional_code[0], + flow_control.refy == state.conditional_code[1] }; + + switch (flow_control.op) { + case flow_control.Or: + results[2] = results[0] || results[1]; + break; + + case flow_control.And: + results[2] = results[0] && results[1]; + break; + + case flow_control.JustX: + results[2] = results[0]; + break; + + case flow_control.JustY: + results[2] = results[1]; + break; + } + + if (results[2]) { + ++state.if_stack_pointer; + + state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; + state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; + } else { + state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; + } + + break; + } + default: LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x", (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex); @@ -277,6 +389,13 @@ static void ProcessShaderCode(VertexShaderState& state) { if (increment_pc) ++state.program_counter; + if (state.if_stack_pointer >= &state.if_stack[0]) { + if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { + state.program_counter += state.if_stack_pointer->else_instructions; + state.if_stack_pointer--; + } + } + if (exit_loop) break; } @@ -326,11 +445,15 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp]; } - state.status_registers[0] = false; - state.status_registers[1] = false; + state.conditional_code[0] = false; + state.conditional_code[1] = false; boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); state.call_stack_pointer = &state.call_stack[0]; + std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), + VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); + state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly + ProcessShaderCode(state); DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), state.debug.max_opdesc_id, registers.vs_main_offset, -- cgit v1.2.3 From aff808b2fdfd9605179a13eb55b72d68a7cdd8c2 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:20:47 +0100 Subject: Pica: Add support for boolean uniforms. --- src/video_core/vertex_shader.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 5d9203c86..fbec1bcc8 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -26,8 +26,9 @@ namespace VertexShader { static struct { Math::Vec4 f[96]; -} shader_uniforms; + std::array b; +} shader_uniforms; // TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! // For now, we just keep these local arrays around. @@ -49,6 +50,11 @@ Math::Vec4& GetFloatUniform(u32 index) return shader_uniforms.f[index]; } +bool& GetBoolUniform(u32 index) +{ + return shader_uniforms.b[index]; +} + const std::array& GetShaderBinary() { return shader_memory; -- cgit v1.2.3 From cd163fb59ae2922d33aa931f51ef5d116c0adc3f Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:22:55 +0100 Subject: Pica/VertexShader: Implement MAX instructions. --- src/video_core/vertex_shader.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index fbec1bcc8..742e5a9f2 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -197,6 +197,15 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } + case Instruction::OpCode::MAX: + for (int i = 0; i < 4; ++i) { + if (!swizzle.DestComponentEnabled(i)) + continue; + + dest[i] = std::max(src1[i], src2[i]); + } + break; + case Instruction::OpCode::DP3: case Instruction::OpCode::DP4: { -- cgit v1.2.3 From 22afb9d8309f56494d95f6132561a413b8e7895c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:23:41 +0100 Subject: Pica/VertexShader: Run instruction handlers according to the effective opcode. This allows for proper emulation of the different CMP/LRP/MAD instructions. --- src/video_core/vertex_shader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 742e5a9f2..dd406f9ca 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -172,7 +172,7 @@ static void ProcessShaderCode(VertexShaderState& state) { state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); - switch (instr.opcode) { + switch (instr.opcode.EffectiveOpCode()) { case Instruction::OpCode::ADD: { for (int i = 0; i < 4; ++i) { -- cgit v1.2.3 From 6bd41de276a97fee1d4f07789a33ff49d494a20d Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 13 Dec 2014 21:30:13 +0100 Subject: Pica/VertexShader: Cleanup flow control logic and implement CMP/IFU instructions. --- src/video_core/vertex_shader.cpp | 106 +++++++++++++++++++++------------------ 1 file changed, 56 insertions(+), 50 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index dd406f9ca..af9332975 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -2,6 +2,8 @@ // Licensed under GPLv2 // Refer to the license.txt file included. +#include + #include #include @@ -65,9 +67,6 @@ const std::array& GetSwizzlePatterns() return swizzle_data; } -// TODO: Is there actually a limit on hardware? -const int if_stack_size = 8; - struct VertexShaderState { u32* program_counter; @@ -84,14 +83,14 @@ struct VertexShaderState { enum { INVALID_ADDRESS = 0xFFFFFFFF }; - u32 call_stack[8]; // TODO: What is the maximal call stack depth? - u32* call_stack_pointer; - struct IfStackElement { - u32 else_addr; - u32 else_instructions; - } if_stack[if_stack_size]; - IfStackElement* if_stack_pointer; + struct CallStackElement { + u32 final_address; + u32 return_address; + }; + + // TODO: Is there a maximal size for this? + std::stack call_stack; struct { u32 max_offset; // maximum program counter ever reached @@ -101,12 +100,27 @@ struct VertexShaderState { static void ProcessShaderCode(VertexShaderState& state) { while (true) { - bool increment_pc = true; + if (!state.call_stack.empty()) { + if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { + state.program_counter = &shader_memory[state.call_stack.top().return_address]; + state.call_stack.pop(); + + // TODO: Is "trying again" accurate to hardware? + continue; + } + } + bool exit_loop = false; const Instruction& instr = *(const Instruction*)state.program_counter; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - state.debug.max_offset = std::max(state.debug.max_offset, 1 + (state.program_counter - shader_memory.data())); + auto call = [&](std::stack& stack, u32 offset, u32 num_instructions, u32 return_offset) { + state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + stack.push({ offset + num_instructions, return_offset }); + }; + u32 binary_offset = state.program_counter - shader_memory.data(); + + state.debug.max_offset = std::max(state.debug.max_offset, 1 + binary_offset); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -328,30 +342,33 @@ static void ProcessShaderCode(VertexShaderState& state) { default: // Handle each instruction on its own switch (instr.opcode) { - // NOP is currently used as a heuristic for leaving from a function. - // TODO: This is completely incorrect. - case Instruction::OpCode::NOP: - if (*state.call_stack_pointer == VertexShaderState::INVALID_ADDRESS) { - exit_loop = true; - } else { - // Jump back to call stack position, invalidate call stack entry, move up call stack pointer - state.program_counter = &shader_memory[*state.call_stack_pointer]; - *state.call_stack_pointer-- = VertexShaderState::INVALID_ADDRESS; - } - + case Instruction::OpCode::END: + exit_loop = true; break; case Instruction::OpCode::CALL: - increment_pc = false; - - _dbg_assert_(HW_GPU, state.call_stack_pointer - state.call_stack < sizeof(state.call_stack)); + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + binary_offset + 1); + break; - *++state.call_stack_pointer = state.program_counter - shader_memory.data(); - state.program_counter = &shader_memory[instr.flow_control.dest_offset]; + case Instruction::OpCode::NOP: break; - case Instruction::OpCode::END: - // TODO + case Instruction::OpCode::IFU: + if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { + call(state.call_stack, + binary_offset + 1, + instr.flow_control.dest_offset - binary_offset - 1, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); + } else { + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); + } + break; case Instruction::OpCode::IFC: @@ -381,12 +398,15 @@ static void ProcessShaderCode(VertexShaderState& state) { } if (results[2]) { - ++state.if_stack_pointer; - - state.if_stack_pointer->else_addr = instr.flow_control.dest_offset; - state.if_stack_pointer->else_instructions = instr.flow_control.num_instructions; + call(state.call_stack, + binary_offset + 1, + instr.flow_control.dest_offset - binary_offset - 1, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; + call(state.call_stack, + instr.flow_control.dest_offset, + instr.flow_control.num_instructions, + instr.flow_control.dest_offset + instr.flow_control.num_instructions); } break; @@ -401,15 +421,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; } - if (increment_pc) - ++state.program_counter; - - if (state.if_stack_pointer >= &state.if_stack[0]) { - if (state.program_counter - shader_memory.data() == state.if_stack_pointer->else_addr) { - state.program_counter += state.if_stack_pointer->else_instructions; - state.if_stack_pointer--; - } - } + ++state.program_counter; if (exit_loop) break; @@ -462,12 +474,6 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) state.conditional_code[0] = false; state.conditional_code[1] = false; - boost::fill(state.call_stack, VertexShaderState::INVALID_ADDRESS); - state.call_stack_pointer = &state.call_stack[0]; - - std::fill(state.if_stack, state.if_stack + sizeof(state.if_stack) / sizeof(state.if_stack[0]), - VertexShaderState::IfStackElement{VertexShaderState::INVALID_ADDRESS, VertexShaderState::INVALID_ADDRESS}); - state.if_stack_pointer = state.if_stack - 1; // Meh. TODO: Make this less ugly ProcessShaderCode(state); DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), -- cgit v1.2.3 From ad5db467d7e9a598e7f8e998066bc5ffe99f1436 Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:49:17 +0100 Subject: Pica/VertexShader: Clarify a comment. --- src/video_core/vertex_shader.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index af9332975..5ca30ba53 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -140,7 +140,9 @@ static void ProcessShaderCode(VertexShaderState& state) { { bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed); if (is_inverted) { - // We don't really support this properly and/or reliably + // TODO: We don't really support this properly: For instance, the address register + // offset needs to be applied to SRC2 instead, etc. + // For now, we just abort in this situation. LOG_ERROR(HW_GPU, "Bad condition..."); exit(0); } -- cgit v1.2.3 From a664574ecbddb643dd12fb9815f4c4526f59f9ff Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Fri, 19 Dec 2014 19:58:21 +0100 Subject: Pica/VertexShader: Be robust against invalid inputs. More specifically, this also fixes crashes by Citra trying to load a src2 register even if the current instruction does not use that. --- src/video_core/vertex_shader.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 5ca30ba53..345f3c3fe 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -99,6 +99,10 @@ struct VertexShaderState { }; static void ProcessShaderCode(VertexShaderState& state) { + + // Placeholder for invalid inputs + static float24 dummy_vec4_float24[4]; + while (true) { if (!state.call_stack.empty()) { if (state.program_counter - shader_memory.data() == state.call_stack.top().final_address) { @@ -132,6 +136,9 @@ static void ProcessShaderCode(VertexShaderState& state) { case RegisterType::FloatUniform: return &shader_uniforms.f[source_reg.GetIndex()].x; + + default: + return dummy_vec4_float24; } }; @@ -182,9 +189,9 @@ static void ProcessShaderCode(VertexShaderState& state) { } float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()] - : (instr.common.dest < 0x10) ? nullptr + : (instr.common.dest < 0x10) ? dummy_vec4_float24 : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0] - : nullptr; + : dummy_vec4_float24; state.debug.max_opdesc_id = std::max(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id); -- cgit v1.2.3 From 17f31de364df294337963cabad106a5f0a9d302b Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 20 Dec 2014 15:19:36 +0100 Subject: Pica/VertexShader: Small optimization. --- src/video_core/vertex_shader.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 345f3c3fe..de963f5e9 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -118,9 +118,9 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - auto call = [&](std::stack& stack, u32 offset, u32 num_instructions, u32 return_offset) { + auto call = [&](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset) { state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset - stack.push({ offset + num_instructions, return_offset }); + state.call_stack.push({ offset + num_instructions, return_offset }); }; u32 binary_offset = state.program_counter - shader_memory.data(); @@ -356,7 +356,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; case Instruction::OpCode::CALL: - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, binary_offset + 1); @@ -367,12 +367,12 @@ static void ProcessShaderCode(VertexShaderState& state) { case Instruction::OpCode::IFU: if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { - call(state.call_stack, + call(state, binary_offset + 1, instr.flow_control.dest_offset - binary_offset - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, instr.flow_control.dest_offset + instr.flow_control.num_instructions); @@ -407,12 +407,12 @@ static void ProcessShaderCode(VertexShaderState& state) { } if (results[2]) { - call(state.call_stack, + call(state, binary_offset + 1, instr.flow_control.dest_offset - binary_offset - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions); } else { - call(state.call_stack, + call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, instr.flow_control.dest_offset + instr.flow_control.num_instructions); -- cgit v1.2.3 From 08f42c2b8c30d55f5c931f2260a0900ff902735c Mon Sep 17 00:00:00 2001 From: Tony Wasserka Date: Sat, 20 Dec 2014 15:31:17 +0100 Subject: Pica/VertexShader: Promote a log message to critical status. --- src/video_core/vertex_shader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index de963f5e9..4ba69fa51 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -150,7 +150,7 @@ static void ProcessShaderCode(VertexShaderState& state) { // TODO: We don't really support this properly: For instance, the address register // offset needs to be applied to SRC2 instead, etc. // For now, we just abort in this situation. - LOG_ERROR(HW_GPU, "Bad condition..."); + LOG_CRITICAL(HW_GPU, "Bad condition..."); exit(0); } -- cgit v1.2.3 From ebfd831ccba32bce097491db3d6bdff0be05935e Mon Sep 17 00:00:00 2001 From: purpasmart96 Date: Tue, 16 Dec 2014 21:38:14 -0800 Subject: License change --- src/video_core/vertex_shader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 477e78cfe..04d439cc6 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -1,5 +1,5 @@ // Copyright 2014 Citra Emulator Project -// Licensed under GPLv2 +// Licensed under GPLv2 or any later version // Refer to the license.txt file included. #include -- cgit v1.2.3 From 8d81e23d6ea998fcd4f6045160b0f9fa89d64c9d Mon Sep 17 00:00:00 2001 From: Apology11 Date: Sun, 21 Dec 2014 18:34:20 +0100 Subject: Fix visual studio ambiguous symbol error --- src/video_core/vertex_shader.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 4ba69fa51..859b4836d 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -160,8 +160,8 @@ static void ProcessShaderCode(VertexShaderState& state) { const float24* src1_ = LookupSourceRegister(instr.common.GetSrc1(is_inverted) + address_offset); const float24* src2_ = LookupSourceRegister(instr.common.GetSrc2(is_inverted)); - const bool negate_src1 = (swizzle.negate_src1 != false); - const bool negate_src2 = (swizzle.negate_src2 != false); + const bool negate_src1 = ((bool)swizzle.negate_src1 != false); + const bool negate_src2 = ((bool)swizzle.negate_src2 != false); float24 src1[4] = { src1_[(int)swizzle.GetSelectorSrc1(0)], @@ -385,8 +385,8 @@ static void ProcessShaderCode(VertexShaderState& state) { // TODO: Do we need to consider swizzlers here? auto flow_control = instr.flow_control; - bool results[3] = { flow_control.refx == state.conditional_code[0], - flow_control.refy == state.conditional_code[1] }; + bool results[3] = { (bool)flow_control.refx == state.conditional_code[0], + (bool)flow_control.refy == state.conditional_code[1] }; switch (flow_control.op) { case flow_control.Or: -- cgit v1.2.3 From d151d797b1c281d5813ca705722f43b4be20ca6d Mon Sep 17 00:00:00 2001 From: Yuri Kunde Schlesner Date: Sun, 28 Dec 2014 18:20:33 -0200 Subject: Vertex Shader: Zero OutputVertex to avoid denormals Unused OutputVertex attributes were being left un-initialized. The leftover garbage sometimes decoded as floating-point denormalized values, causing fallbacks to microcode and massive slowdowns in the rest of the rasterization pipeline even though the results were unused. By zeroing the structure we ensure these attributes only contain harmless zeros. --- src/video_core/vertex_shader.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/video_core/vertex_shader.cpp') diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index e31bc3bc7..bed5081a0 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -469,6 +469,10 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) // Setup output register table OutputVertex ret; + // Zero output so that attributes which aren't output won't have denormals in them, which will + // slow us down later. + memset(&ret, 0, sizeof(ret)); + for (int i = 0; i < 7; ++i) { const auto& output_register_map = registers.vs_output_attributes[i]; -- cgit v1.2.3