diff options
Diffstat (limited to 'src/video_core/shader')
| -rw-r--r-- | src/video_core/shader/shader.cpp | 83 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 111 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 80 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 4 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 16 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 5 |
6 files changed, 147 insertions, 152 deletions
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 75301accd..e93a9d92a 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -2,27 +2,30 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <memory> +#include <atomic> +#include <cmath> +#include <cstring> #include <unordered_map> +#include <utility> #include <boost/range/algorithm/fill.hpp> +#include "common/bit_field.h" #include "common/hash.h" +#include "common/logging/log.h" #include "common/microprofile.h" -#include "common/profiler.h" -#include "video_core/debug_utils/debug_utils.h" #include "video_core/pica.h" #include "video_core/pica_state.h" -#include "video_core/video_core.h" - -#include "shader.h" -#include "shader_interpreter.h" +#include "video_core/shader/shader.h" +#include "video_core/shader/shader_interpreter.h" #ifdef ARCHITECTURE_x86_64 -#include "shader_jit_x64.h" +#include "video_core/shader/shader_jit_x64.h" #endif // ARCHITECTURE_x86_64 +#include "video_core/video_core.h" + namespace Pica { namespace Shader { @@ -32,7 +35,13 @@ static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; static const JitShader* jit_shader; #endif // ARCHITECTURE_x86_64 -void Setup() { +void ClearCache() { +#ifdef ARCHITECTURE_x86_64 + shader_map.clear(); +#endif // ARCHITECTURE_x86_64 +} + +void ShaderSetup::Setup() { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ @@ -51,46 +60,21 @@ void Setup() { #endif // ARCHITECTURE_x86_64 } -void Shutdown() { -#ifdef ARCHITECTURE_x86_64 - shader_map.clear(); -#endif // ARCHITECTURE_x86_64 -} - -static Common::Profiling::TimingCategory shader_category("Vertex Shader"); -MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); +MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { +OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; - Common::Profiling::ScopeTimer timer(shader_category); - MICROPROFILE_SCOPE(GPU_VertexShader); + MICROPROFILE_SCOPE(GPU_Shader); - state.program_counter = config.main_offset; state.debug.max_offset = 0; state.debug.max_opdesc_id = 0; // Setup input register table const auto& attribute_register_map = config.input_register_map; - // TODO: Instead of this cumbersome logic, just load the input data directly like - // for (int attr = 0; attr < num_attributes; ++attr) { input_attr[0] = state.registers.input[attribute_register_map.attribute0_register]; } - if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = input.attr[0]; - if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = input.attr[1]; - if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = input.attr[2]; - if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = input.attr[3]; - if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = input.attr[4]; - if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = input.attr[5]; - if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = input.attr[6]; - if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = input.attr[7]; - if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = input.attr[8]; - if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = input.attr[9]; - if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = input.attr[10]; - if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = input.attr[11]; - if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = input.attr[12]; - if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = input.attr[13]; - if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = input.attr[14]; - if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = input.attr[15]; + for (unsigned i = 0; i < num_attributes; i++) + state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; state.conditional_code[0] = false; state.conditional_code[1] = false; @@ -155,10 +139,9 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr return ret; } -DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { +DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { UnitState<true> state; - state.program_counter = config.main_offset; state.debug.max_offset = 0; state.debug.max_opdesc_id = 0; @@ -167,22 +150,8 @@ DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, c float24 dummy_register; boost::fill(state.registers.input, &dummy_register); - if (num_attributes > 0) state.registers.input[attribute_register_map.attribute0_register] = &input.attr[0].x; - if (num_attributes > 1) state.registers.input[attribute_register_map.attribute1_register] = &input.attr[1].x; - if (num_attributes > 2) state.registers.input[attribute_register_map.attribute2_register] = &input.attr[2].x; - if (num_attributes > 3) state.registers.input[attribute_register_map.attribute3_register] = &input.attr[3].x; - if (num_attributes > 4) state.registers.input[attribute_register_map.attribute4_register] = &input.attr[4].x; - if (num_attributes > 5) state.registers.input[attribute_register_map.attribute5_register] = &input.attr[5].x; - if (num_attributes > 6) state.registers.input[attribute_register_map.attribute6_register] = &input.attr[6].x; - if (num_attributes > 7) state.registers.input[attribute_register_map.attribute7_register] = &input.attr[7].x; - if (num_attributes > 8) state.registers.input[attribute_register_map.attribute8_register] = &input.attr[8].x; - if (num_attributes > 9) state.registers.input[attribute_register_map.attribute9_register] = &input.attr[9].x; - if (num_attributes > 10) state.registers.input[attribute_register_map.attribute10_register] = &input.attr[10].x; - if (num_attributes > 11) state.registers.input[attribute_register_map.attribute11_register] = &input.attr[11].x; - if (num_attributes > 12) state.registers.input[attribute_register_map.attribute12_register] = &input.attr[12].x; - if (num_attributes > 13) state.registers.input[attribute_register_map.attribute13_register] = &input.attr[13].x; - if (num_attributes > 14) state.registers.input[attribute_register_map.attribute14_register] = &input.attr[14].x; - if (num_attributes > 15) state.registers.input[attribute_register_map.attribute15_register] = &input.attr[15].x; + for (unsigned i = 0; i < num_attributes; i++) + state.registers.input[attribute_register_map.GetRegisterForAttribute(i)] = input.attr[i]; state.conditional_code[0] = false; state.conditional_code[1] = false; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 9c5bd97bd..983e4a967 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -4,17 +4,23 @@ #pragma once +#include <array> +#include <cstddef> +#include <memory> +#include <type_traits> #include <vector> #include <boost/container/static_vector.hpp> -#include <nihstro/shader_binary.h> +#include <nihstro/shader_bytecode.h> +#include "common/assert.h" #include "common/common_funcs.h" #include "common/common_types.h" #include "common/vector_math.h" #include "video_core/pica.h" +#include "video_core/pica_types.h" using nihstro::RegisterType; using nihstro::SourceRegister; @@ -25,7 +31,7 @@ namespace Pica { namespace Shader { struct InputVertex { - Math::Vec4<float24> attr[16]; + alignas(16) Math::Vec4<float24> attr[16]; }; struct OutputVertex { @@ -37,7 +43,8 @@ struct OutputVertex { Math::Vec4<float24> color; Math::Vec2<float24> tc0; Math::Vec2<float24> tc1; - INSERT_PADDING_WORDS(2); + float24 tc0_w; + INSERT_PADDING_WORDS(1); Math::Vec3<float24> view; INSERT_PADDING_WORDS(1); Math::Vec2<float24> tc2; @@ -77,23 +84,6 @@ struct OutputVertex { static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); -/// Vertex shader memory -struct ShaderSetup { - struct { - // The float uniforms are accessed by the shader JIT using SSE instructions, and are - // therefore required to be 16-byte aligned. - alignas(16) Math::Vec4<float24> f[96]; - - std::array<bool, 16> b; - std::array<Math::Vec4<u8>, 4> i; - } uniforms; - - Math::Vec4<float24> default_attributes[16]; - - std::array<u32, 1024> program_code; - std::array<u32, 1024> swizzle_data; -}; - // Helper structure used to keep track of data useful for inspection of shader emulation template<bool full_debugging> struct DebugData; @@ -282,29 +272,12 @@ struct UnitState { } registers; static_assert(std::is_pod<Registers>::value, "Structure is not POD"); - u32 program_counter; bool conditional_code[2]; // Two Address registers and one loop counter // TODO: How many bits do these actually have? s32 address_registers[3]; - enum { - INVALID_ADDRESS = 0xFFFFFFFF - }; - - struct CallStackElement { - u32 final_address; // Address upon which we jump to return_address - u32 return_address; // Where to jump when leaving scope - u8 repeat_counter; // How often to repeat until this call stack element is removed - u8 loop_increment; // Which value to add to the loop counter after an iteration - // TODO: Should this be a signed value? Does it even matter? - u32 loop_address; // The address where we'll return to after each loop iteration - }; - - // TODO: Is there a maximal size for this? - boost::container::static_vector<CallStackElement, 16> call_stack; - DebugData<Debug> debug; static size_t InputOffset(const SourceRegister& reg) { @@ -336,33 +309,49 @@ struct UnitState { } }; -/** - * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per - * vertex, which would happen within the `Run` function). - */ -void Setup(); +/// Clears the shader cache +void ClearCache(); -/// Performs any cleanup when the emulator is shutdown -void Shutdown(); +struct ShaderSetup { -/** - * Runs the currently setup shader - * @param state Shader unit state, must be setup per shader and per shader unit - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes - * @return The output vertex, after having been processed by the vertex shader - */ -OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); + struct { + // The float uniforms are accessed by the shader JIT using SSE instructions, and are + // therefore required to be 16-byte aligned. + alignas(16) Math::Vec4<float24> f[96]; -/** - * Produce debug information based on the given shader and input vertex - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes - * @param config Configuration object for the shader pipeline - * @param setup Setup object for the shader pipeline - * @return Debug information for this shader with regards to the given vertex - */ -DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); + std::array<bool, 16> b; + std::array<Math::Vec4<u8>, 4> i; + } uniforms; + + std::array<u32, 1024> program_code; + std::array<u32, 1024> swizzle_data; + + /** + * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per + * vertex, which would happen within the `Run` function). + */ + void Setup(); + + /** + * Runs the currently setup shader + * @param state Shader unit state, must be setup per shader and per shader unit + * @param input Input vertex into the shader + * @param num_attributes The number of vertex shader attributes + * @return The output vertex, after having been processed by the vertex shader + */ + OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); + + /** + * Produce debug information based on the given shader and input vertex + * @param input Input vertex into the shader + * @param num_attributes The number of vertex shader attributes + * @param config Configuration object for the shader pipeline + * @param setup Setup object for the shader pipeline + * @return Debug information for this shader with regards to the given vertex + */ + DebugData<true> ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); + +}; } // namespace Shader diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 9b978583e..3a827d11f 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -2,12 +2,20 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <array> +#include <cmath> #include <numeric> + #include <nihstro/shader_bytecode.h> -#include "common/file_util.h" -#include "video_core/pica.h" +#include "common/assert.h" +#include "common/common_types.h" +#include "common/logging/log.h" +#include "common/vector_math.h" + #include "video_core/pica_state.h" +#include "video_core/pica_types.h" #include "video_core/shader/shader.h" #include "video_core/shader/shader_interpreter.h" @@ -21,8 +29,24 @@ namespace Pica { namespace Shader { +constexpr u32 INVALID_ADDRESS = 0xFFFFFFFF; + +struct CallStackElement { + u32 final_address; // Address upon which we jump to return_address + u32 return_address; // Where to jump when leaving scope + u8 repeat_counter; // How often to repeat until this call stack element is removed + u8 loop_increment; // Which value to add to the loop counter after an iteration + // TODO: Should this be a signed value? Does it even matter? + u32 loop_address; // The address where we'll return to after each loop iteration +}; + template<bool Debug> void RunInterpreter(UnitState<Debug>& state) { + // TODO: Is there a maximal size for this? + boost::container::static_vector<CallStackElement, 16> call_stack; + + u32 program_counter = g_state.regs.vs.main_offset; + const auto& uniforms = g_state.vs.uniforms; const auto& swizzle_data = g_state.vs.swizzle_data; const auto& program_code = g_state.vs.program_code; @@ -33,16 +57,16 @@ void RunInterpreter(UnitState<Debug>& state) { unsigned iteration = 0; bool exit_loop = false; while (!exit_loop) { - if (!state.call_stack.empty()) { - auto& top = state.call_stack.back(); - if (state.program_counter == top.final_address) { + if (!call_stack.empty()) { + auto& top = call_stack.back(); + if (program_counter == top.final_address) { state.address_registers[2] += top.loop_increment; if (top.repeat_counter-- == 0) { - state.program_counter = top.return_address; - state.call_stack.pop_back(); + program_counter = top.return_address; + call_stack.pop_back(); } else { - state.program_counter = top.loop_address; + program_counter = top.loop_address; } // TODO: Is "trying again" accurate to hardware? @@ -50,20 +74,20 @@ void RunInterpreter(UnitState<Debug>& state) { } } - const Instruction instr = { program_code[state.program_counter] }; + const Instruction instr = { program_code[program_counter] }; const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; - static auto call = [](UnitState<Debug>& state, u32 offset, u32 num_instructions, + static auto call = [&program_counter, &call_stack](UnitState<Debug>& state, u32 offset, u32 num_instructions, u32 return_offset, u8 repeat_count, u8 loop_increment) { - state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset - ASSERT(state.call_stack.size() < state.call_stack.capacity()); - state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); + program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + ASSERT(call_stack.size() < call_stack.capacity()); + call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); }; - Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, state.program_counter); + Record<DebugDataRecord::CUR_INSTR>(state.debug, iteration, program_counter); if (iteration > 0) - Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, state.program_counter); + Record<DebugDataRecord::NEXT_INSTR>(state.debug, iteration - 1, program_counter); - state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); + state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + program_counter); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -511,7 +535,7 @@ void RunInterpreter(UnitState<Debug>& state) { case OpCode::Id::JMPC: Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { - state.program_counter = instr.flow_control.dest_offset - 1; + program_counter = instr.flow_control.dest_offset - 1; } break; @@ -519,7 +543,7 @@ void RunInterpreter(UnitState<Debug>& state) { Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id] == !(instr.flow_control.num_instructions & 1)) { - state.program_counter = instr.flow_control.dest_offset - 1; + program_counter = instr.flow_control.dest_offset - 1; } break; @@ -527,7 +551,7 @@ void RunInterpreter(UnitState<Debug>& state) { call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, - state.program_counter + 1, 0, 0); + program_counter + 1, 0, 0); break; case OpCode::Id::CALLU: @@ -536,7 +560,7 @@ void RunInterpreter(UnitState<Debug>& state) { call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, - state.program_counter + 1, 0, 0); + program_counter + 1, 0, 0); } break; @@ -546,7 +570,7 @@ void RunInterpreter(UnitState<Debug>& state) { call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, - state.program_counter + 1, 0, 0); + program_counter + 1, 0, 0); } break; @@ -557,8 +581,8 @@ void RunInterpreter(UnitState<Debug>& state) { Record<DebugDataRecord::COND_BOOL_IN>(state.debug, iteration, uniforms.b[instr.flow_control.bool_uniform_id]); if (uniforms.b[instr.flow_control.bool_uniform_id]) { call(state, - state.program_counter + 1, - instr.flow_control.dest_offset - state.program_counter - 1, + program_counter + 1, + instr.flow_control.dest_offset - program_counter - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); } else { call(state, @@ -576,8 +600,8 @@ void RunInterpreter(UnitState<Debug>& state) { Record<DebugDataRecord::COND_CMP_IN>(state.debug, iteration, state.conditional_code); if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { call(state, - state.program_counter + 1, - instr.flow_control.dest_offset - state.program_counter - 1, + program_counter + 1, + instr.flow_control.dest_offset - program_counter - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); } else { call(state, @@ -599,8 +623,8 @@ void RunInterpreter(UnitState<Debug>& state) { Record<DebugDataRecord::LOOP_INT_IN>(state.debug, iteration, loop_param); call(state, - state.program_counter + 1, - instr.flow_control.dest_offset - state.program_counter + 1, + program_counter + 1, + instr.flow_control.dest_offset - program_counter + 1, instr.flow_control.dest_offset + 1, loop_param.x, loop_param.z); @@ -617,7 +641,7 @@ void RunInterpreter(UnitState<Debug>& state) { } } - ++state.program_counter; + ++program_counter; ++iteration; } } diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 294bca50e..6048cdf3a 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -4,12 +4,12 @@ #pragma once -#include "video_core/shader/shader.h" - namespace Pica { namespace Shader { +template <bool Debug> struct UnitState; + template<bool Debug> void RunInterpreter(UnitState<Debug>& state); diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index b47d3beda..99f6c51eb 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -3,8 +3,15 @@ // Refer to the license.txt file included. #include <algorithm> -#include <smmintrin.h> +#include <cmath> +#include <cstdint> +#include <xmmintrin.h> +#include <nihstro/shader_bytecode.h> + +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/vector_math.h" #include "common/x64/abi.h" #include "common/x64/cpu_detect.h" #include "common/x64/emitter.h" @@ -13,6 +20,7 @@ #include "shader_jit_x64.h" #include "video_core/pica_state.h" +#include "video_core/pica_types.h" namespace Pica { @@ -148,7 +156,7 @@ static Instruction GetVertexShaderInstruction(size_t offset) { } static void LogCritical(const char* msg) { - LOG_CRITICAL(HW_GPU, msg); + LOG_CRITICAL(HW_GPU, "%s", msg); } void JitShader::Compile_Assert(bool condition, const char* msg) { @@ -795,6 +803,8 @@ void JitShader::FindReturnOffsets() { case OpCode::Id::CALLU: return_offsets.push_back(instr.flow_control.dest_offset + instr.flow_control.num_instructions); break; + default: + break; } } @@ -854,7 +864,7 @@ void JitShader::Compile() { uintptr_t size = reinterpret_cast<uintptr_t>(GetCodePtr()) - reinterpret_cast<uintptr_t>(program); ASSERT_MSG(size <= MAX_SHADER_SIZE, "Compiled a shader that exceeds the allocated size!"); - LOG_DEBUG(HW_GPU, "Compiled shader size=%d", size); + LOG_DEBUG(HW_GPU, "Compiled shader size=%lu", size); } JitShader::JitShader() { diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index cd6280ade..30aa7ff30 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -4,14 +4,17 @@ #pragma once +#include <array> +#include <cstddef> #include <utility> #include <vector> #include <nihstro/shader_bytecode.h> +#include "common/bit_set.h" +#include "common/common_types.h" #include "common/x64/emitter.h" -#include "video_core/pica.h" #include "video_core/shader/shader.h" using nihstro::Instruction; |
