diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_processor.cpp | 24 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 199 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.h | 6 | ||||
| -rw-r--r-- | src/video_core/pica.h | 40 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 121 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 53 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 7 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 20 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/shader/shader.cpp | 112 | ||||
| -rw-r--r-- | src/video_core/shader/shader.h | 40 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.cpp | 12 | ||||
| -rw-r--r-- | src/video_core/shader/shader_interpreter.h | 2 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.cpp | 32 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64.h | 6 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/vertex_loader.h | 23 |
19 files changed, 463 insertions, 252 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index ad0da796e..19e03adf4 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -149,7 +149,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Send to vertex shader if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input)); - Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); + g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); + Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs); // Send to renderer using Pica::Shader::OutputVertex; @@ -157,7 +158,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); }; - g_state.primitive_assembler.SubmitVertex(output, AddTriangle); + g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle); } } } @@ -199,9 +200,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Processes information about internal vertex attributes to figure out how a vertex is loaded. // Later, these can be compiled and cached. - VertexLoader loader; const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress(); - loader.Setup(regs); + VertexLoader loader(regs); // Load vertices bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); @@ -231,7 +231,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // The size has been tuned for optimal balance between hit-rate and the cost of lookup const size_t VERTEX_CACHE_SIZE = 32; std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids; - std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache; + std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache; unsigned int vertex_cache_pos = 0; vertex_cache_ids.fill(-1); @@ -249,7 +249,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { ASSERT(vertex != -1); bool vertex_cache_hit = false; - Shader::OutputVertex output; + Shader::OutputRegisters output_registers; if (is_indexed) { if (g_debug_context && Pica::g_debug_context->recorder) { @@ -259,7 +259,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { if (vertex == vertex_cache_ids[i]) { - output = vertex_cache[i]; + output_registers = vertex_cache[i]; vertex_cache_hit = true; break; } @@ -274,15 +274,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { // Send to vertex shader if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); - output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); + g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); + output_registers = shader_unit.output_registers; if (is_indexed) { - vertex_cache[vertex_cache_pos] = output; + vertex_cache[vertex_cache_pos] = output_registers; vertex_cache_ids[vertex_cache_pos] = vertex; vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE; } } + // Retreive vertex from register data + Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs); + // Send to renderer using Pica::Shader::OutputVertex; auto AddTriangle = []( @@ -290,7 +294,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2); }; - primitive_assembler.SubmitVertex(output, AddTriangle); + primitive_assembler.SubmitVertex(output_vertex, AddTriangle); } for (auto& range : memory_accesses.ranges) { diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 2f645b441..871368323 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -696,106 +696,125 @@ finalise: #endif } -void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages) -{ +static std::string ReplacePattern(const std::string& input, const std::string& pattern, const std::string& replacement) { + size_t start = input.find(pattern); + if (start == std::string::npos) + return input; + + std::string ret = input; + ret.replace(start, pattern.length(), replacement); + return ret; +} + +static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) { using Source = Pica::Regs::TevStageConfig::Source; + static const std::map<Source, std::string> source_map = { + { Source::PrimaryColor, "PrimaryColor" }, + { Source::PrimaryFragmentColor, "PrimaryFragmentColor" }, + { Source::SecondaryFragmentColor, "SecondaryFragmentColor" }, + { Source::Texture0, "Texture0" }, + { Source::Texture1, "Texture1" }, + { Source::Texture2, "Texture2" }, + { Source::Texture3, "Texture3" }, + { Source::PreviousBuffer, "PreviousBuffer" }, + { Source::Constant, "Constant" }, + { Source::Previous, "Previous" }, + }; + + const auto src_it = source_map.find(source); + if (src_it == source_map.end()) + return "Unknown"; + + return src_it->second; +} + +static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::ColorModifier modifier) { using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier; + static const std::map<ColorModifier, std::string> color_modifier_map = { + { ColorModifier::SourceColor, "%source.rgb" }, + { ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)" }, + { ColorModifier::SourceAlpha, "%source.aaa" }, + { ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)" }, + { ColorModifier::SourceRed, "%source.rrr" }, + { ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)" }, + { ColorModifier::SourceGreen, "%source.ggg" }, + { ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)" }, + { ColorModifier::SourceBlue, "%source.bbb" }, + { ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)" }, + }; + + auto src_str = GetTevStageConfigSourceString(source); + auto modifier_it = color_modifier_map.find(modifier); + std::string modifier_str = "%source.????"; + if (modifier_it != color_modifier_map.end()) + modifier_str = modifier_it->second; + + return ReplacePattern(modifier_str, "%source", src_str); +} + +static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::AlphaModifier modifier) { using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier; + static const std::map<AlphaModifier, std::string> alpha_modifier_map = { + { AlphaModifier::SourceAlpha, "%source.a" }, + { AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)" }, + { AlphaModifier::SourceRed, "%source.r" }, + { AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)" }, + { AlphaModifier::SourceGreen, "%source.g" }, + { AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)" }, + { AlphaModifier::SourceBlue, "%source.b" }, + { AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)" }, + }; + + auto src_str = GetTevStageConfigSourceString(source); + auto modifier_it = alpha_modifier_map.find(modifier); + std::string modifier_str = "%source.????"; + if (modifier_it != alpha_modifier_map.end()) + modifier_str = modifier_it->second; + + return ReplacePattern(modifier_str, "%source", src_str); +} + +static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) { using Operation = Pica::Regs::TevStageConfig::Operation; + static const std::map<Operation, std::string> combiner_map = { + { Operation::Replace, "%source1" }, + { Operation::Modulate, "(%source1 * %source2)" }, + { Operation::Add, "(%source1 + %source2)" }, + { Operation::AddSigned, "(%source1 + %source2) - 0.5" }, + { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, + { Operation::Subtract, "(%source1 - %source2)" }, + { Operation::Dot3_RGB, "dot(%source1, %source2)" }, + { Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)" }, + { Operation::AddThenMultiply, "((%source1 + %source2) * %source3)" }, + }; - std::string stage_info = "Tev setup:\n"; - for (size_t index = 0; index < stages.size(); ++index) { - const auto& tev_stage = stages[index]; + const auto op_it = combiner_map.find(operation); + if (op_it == combiner_map.end()) + return "Unknown op (%source1, %source2, %source3)"; - static const std::map<Source, std::string> source_map = { - { Source::PrimaryColor, "PrimaryColor" }, - { Source::Texture0, "Texture0" }, - { Source::Texture1, "Texture1" }, - { Source::Texture2, "Texture2" }, - { Source::Constant, "Constant" }, - { Source::Previous, "Previous" }, - }; + return op_it->second; +} - static const std::map<ColorModifier, std::string> color_modifier_map = { - { ColorModifier::SourceColor, { "%source.rgb" } }, - { ColorModifier::SourceAlpha, { "%source.aaa" } }, - }; - static const std::map<AlphaModifier, std::string> alpha_modifier_map = { - { AlphaModifier::SourceAlpha, "%source.a" }, - { AlphaModifier::OneMinusSourceAlpha, "(255 - %source.a)" }, - }; +std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { + auto op_str = GetTevStageConfigOperationString(tev_stage.color_op); + op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1)); + op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2)); + return ReplacePattern(op_str, "%source3", GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3)); +} - static const std::map<Operation, std::string> combiner_map = { - { Operation::Replace, "%source1" }, - { Operation::Modulate, "(%source1 * %source2) / 255" }, - { Operation::Add, "(%source1 + %source2)" }, - { Operation::Lerp, "lerp(%source1, %source2, %source3)" }, - }; +std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) { + auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op); + op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); + op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); + return ReplacePattern(op_str, "%source3", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); +} - static auto ReplacePattern = - [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string { - size_t start = input.find(pattern); - if (start == std::string::npos) - return input; - - std::string ret = input; - ret.replace(start, pattern.length(), replacement); - return ret; - }; - static auto GetColorSourceStr = - [](const Source& src, const ColorModifier& modifier) { - auto src_it = source_map.find(src); - std::string src_str = "Unknown"; - if (src_it != source_map.end()) - src_str = src_it->second; - - auto modifier_it = color_modifier_map.find(modifier); - std::string modifier_str = "%source.????"; - if (modifier_it != color_modifier_map.end()) - modifier_str = modifier_it->second; - - return ReplacePattern(modifier_str, "%source", src_str); - }; - static auto GetColorCombinerStr = - [](const Regs::TevStageConfig& tev_stage) { - auto op_it = combiner_map.find(tev_stage.color_op); - std::string op_str = "Unknown op (%source1, %source2, %source3)"; - if (op_it != combiner_map.end()) - op_str = op_it->second; - - op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1)); - op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2)); - return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3)); - }; - static auto GetAlphaSourceStr = - [](const Source& src, const AlphaModifier& modifier) { - auto src_it = source_map.find(src); - std::string src_str = "Unknown"; - if (src_it != source_map.end()) - src_str = src_it->second; - - auto modifier_it = alpha_modifier_map.find(modifier); - std::string modifier_str = "%source.????"; - if (modifier_it != alpha_modifier_map.end()) - modifier_str = modifier_it->second; - - return ReplacePattern(modifier_str, "%source", src_str); - }; - static auto GetAlphaCombinerStr = - [](const Regs::TevStageConfig& tev_stage) { - auto op_it = combiner_map.find(tev_stage.alpha_op); - std::string op_str = "Unknown op (%source1, %source2, %source3)"; - if (op_it != combiner_map.end()) - op_str = op_it->second; - - op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1)); - op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2)); - return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3)); - }; - - stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n"; +void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) { + std::string stage_info = "Tev setup:\n"; + for (size_t index = 0; index < stages.size(); ++index) { + const auto& tev_stage = stages[index]; + stage_info += "Stage " + std::to_string(index) + ": " + GetTevStageConfigColorCombinerString(tev_stage) + " " + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n"; } - LOG_TRACE(HW_GPU, "%s", stage_info.c_str()); } diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index f628292a4..92e9734ae 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -224,7 +224,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const Texture void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data); -void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages); +std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage); +std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage); + +/// Dumps the Tev stage config to log at trace level +void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages); /** * Used in the vertex loader to merge access records. TODO: Investigate if actually useful. diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 86c0a0096..544ea037f 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -787,23 +787,21 @@ struct Regs { LightColor diffuse; // material.diffuse * light.diffuse LightColor ambient; // material.ambient * light.ambient - struct { - // Encoded as 16-bit floating point - union { - BitField< 0, 16, u32> x; - BitField<16, 16, u32> y; - }; - union { - BitField< 0, 16, u32> z; - }; + // Encoded as 16-bit floating point + union { + BitField< 0, 16, u32> x; + BitField<16, 16, u32> y; + }; + union { + BitField< 0, 16, u32> z; + }; - INSERT_PADDING_WORDS(0x3); + INSERT_PADDING_WORDS(0x3); - union { - BitField<0, 1, u32> directional; - BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0 - }; - }; + union { + BitField<0, 1, u32> directional; + BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0 + } config; BitField<0, 20, u32> dist_atten_bias; BitField<0, 20, u32> dist_atten_scale; @@ -824,7 +822,7 @@ struct Regs { BitField<27, 1, u32> clamp_highlights; BitField<28, 2, LightingBumpMode> bump_mode; BitField<30, 1, u32> disable_bump_renorm; - }; + } config0; union { BitField<16, 1, u32> disable_lut_d0; @@ -845,13 +843,13 @@ struct Regs { BitField<29, 1, u32> disable_dist_atten_light_5; BitField<30, 1, u32> disable_dist_atten_light_6; BitField<31, 1, u32> disable_dist_atten_light_7; - }; + } config1; bool IsDistAttenDisabled(unsigned index) const { - const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1, - disable_dist_atten_light_2, disable_dist_atten_light_3, - disable_dist_atten_light_4, disable_dist_atten_light_5, - disable_dist_atten_light_6, disable_dist_atten_light_7 }; + const unsigned disable[] = { config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1, + config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3, + config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5, + config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7 }; return disable[index] != 0; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ed2e2f3ae..931c34a37 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -104,7 +104,6 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { // Sync fixed function OpenGL state SyncCullMode(); - SyncDepthModifiers(); SyncBlendEnabled(); SyncBlendFuncs(); SyncBlendColor(); @@ -259,8 +258,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Depth modifiers case PICA_REG_INDEX(viewport_depth_range): + SyncDepthScale(); + break; case PICA_REG_INDEX(viewport_depth_near_plane): - SyncDepthModifiers(); + SyncDepthOffset(); break; // Depth buffering @@ -379,6 +380,17 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncCombinerColor(); break; + // Fragment lighting switches + case PICA_REG_INDEX(lighting.disable): + case PICA_REG_INDEX(lighting.num_lights): + case PICA_REG_INDEX(lighting.config0): + case PICA_REG_INDEX(lighting.config1): + case PICA_REG_INDEX(lighting.abs_lut_input): + case PICA_REG_INDEX(lighting.lut_input): + case PICA_REG_INDEX(lighting.lut_scale): + case PICA_REG_INDEX(lighting.light_enable): + break; + // Fragment lighting specular 0 color case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10): SyncLightSpecular0(0); @@ -517,6 +529,70 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncLightPosition(7); break; + // Fragment lighting light source config + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].config, 0x149 + 0 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].config, 0x149 + 1 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].config, 0x149 + 2 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].config, 0x149 + 3 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].config, 0x149 + 4 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].config, 0x149 + 5 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].config, 0x149 + 6 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].config, 0x149 + 7 * 0x10): + shader_dirty = true; + break; + + // Fragment lighting distance attenuation bias + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].dist_atten_bias, 0x014A + 0 * 0x10): + SyncLightDistanceAttenuationBias(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].dist_atten_bias, 0x014A + 1 * 0x10): + SyncLightDistanceAttenuationBias(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].dist_atten_bias, 0x014A + 2 * 0x10): + SyncLightDistanceAttenuationBias(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].dist_atten_bias, 0x014A + 3 * 0x10): + SyncLightDistanceAttenuationBias(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].dist_atten_bias, 0x014A + 4 * 0x10): + SyncLightDistanceAttenuationBias(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].dist_atten_bias, 0x014A + 5 * 0x10): + SyncLightDistanceAttenuationBias(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].dist_atten_bias, 0x014A + 6 * 0x10): + SyncLightDistanceAttenuationBias(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].dist_atten_bias, 0x014A + 7 * 0x10): + SyncLightDistanceAttenuationBias(7); + break; + + // Fragment lighting distance attenuation scale + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].dist_atten_scale, 0x014B + 0 * 0x10): + SyncLightDistanceAttenuationScale(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].dist_atten_scale, 0x014B + 1 * 0x10): + SyncLightDistanceAttenuationScale(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].dist_atten_scale, 0x014B + 2 * 0x10): + SyncLightDistanceAttenuationScale(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].dist_atten_scale, 0x014B + 3 * 0x10): + SyncLightDistanceAttenuationScale(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].dist_atten_scale, 0x014B + 4 * 0x10): + SyncLightDistanceAttenuationScale(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].dist_atten_scale, 0x014B + 5 * 0x10): + SyncLightDistanceAttenuationScale(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].dist_atten_scale, 0x014B + 6 * 0x10): + SyncLightDistanceAttenuationScale(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].dist_atten_scale, 0x014B + 7 * 0x10): + SyncLightDistanceAttenuationScale(7); + break; + // Fragment lighting global ambient color (emission + ambient * ambient) case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0): SyncGlobalAmbient(); @@ -880,6 +956,8 @@ void RasterizerOpenGL::SetShader() { glUniformBlockBinding(current_shader->shader.handle, block_index, 0); // Update uniforms + SyncDepthScale(); + SyncDepthOffset(); SyncAlphaTest(); SyncCombinerColor(); auto& tev_stages = Pica::g_state.regs.GetTevStages(); @@ -893,6 +971,8 @@ void RasterizerOpenGL::SetShader() { SyncLightDiffuse(light_index); SyncLightAmbient(light_index); SyncLightPosition(light_index); + SyncLightDistanceAttenuationBias(light_index); + SyncLightDistanceAttenuationScale(light_index); } } } @@ -922,13 +1002,20 @@ void RasterizerOpenGL::SyncCullMode() { } } -void RasterizerOpenGL::SyncDepthModifiers() { +void RasterizerOpenGL::SyncDepthScale() { float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); - float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); + if (depth_scale != uniform_block_data.data.depth_scale) { + uniform_block_data.data.depth_scale = depth_scale; + uniform_block_data.dirty = true; + } +} - uniform_block_data.data.depth_scale = depth_scale; - uniform_block_data.data.depth_offset = depth_offset; - uniform_block_data.dirty = true; +void RasterizerOpenGL::SyncDepthOffset() { + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); + if (depth_offset != uniform_block_data.data.depth_offset) { + uniform_block_data.data.depth_offset = depth_offset; + uniform_block_data.dirty = true; + } } void RasterizerOpenGL::SyncBlendEnabled() { @@ -937,6 +1024,8 @@ void RasterizerOpenGL::SyncBlendEnabled() { void RasterizerOpenGL::SyncBlendFuncs() { const auto& regs = Pica::g_state.regs; + state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb); + state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a); state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); @@ -1093,3 +1182,21 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) { uniform_block_data.dirty = true; } } + +void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { + GLfloat dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias).ToFloat32(); + + if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) { + uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias; + uniform_block_data.dirty = true; + } +} + +void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) { + GLfloat dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale).ToFloat32(); + + if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) { + uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale; + uniform_block_data.dirty = true; + } +} diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index eed00011a..bb7f20161 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -89,49 +89,47 @@ union PicaShaderConfig { unsigned num = regs.lighting.light_enable.GetNum(light_index); const auto& light = regs.lighting.light[num]; state.lighting.light[light_index].num = num; - state.lighting.light[light_index].directional = light.directional != 0; - state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; + state.lighting.light[light_index].directional = light.config.directional != 0; + state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); - state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); - state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); } - state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; + state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; + state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0; state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; + state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0; state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; + state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0; state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; + state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0; state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; + state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0; state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); - state.lighting.config = regs.lighting.config; - state.lighting.fresnel_selector = regs.lighting.fresnel_selector; - state.lighting.bump_mode = regs.lighting.bump_mode; - state.lighting.bump_selector = regs.lighting.bump_selector; - state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; - state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; + state.lighting.config = regs.lighting.config0.config; + state.lighting.fresnel_selector = regs.lighting.config0.fresnel_selector; + state.lighting.bump_mode = regs.lighting.config0.bump_mode; + state.lighting.bump_selector = regs.lighting.config0.bump_selector; + state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0; + state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0; return res; } @@ -184,8 +182,6 @@ union PicaShaderConfig { bool directional; bool two_sided_diffuse; bool dist_atten_enable; - GLfloat dist_atten_scale; - GLfloat dist_atten_bias; } light[8]; bool enable; @@ -316,6 +312,8 @@ private: alignas(16) GLvec3 diffuse; alignas(16) GLvec3 ambient; alignas(16) GLvec3 position; + GLfloat dist_atten_bias; + GLfloat dist_atten_scale; }; /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned @@ -330,7 +328,7 @@ private: LightSrc light_src[8]; }; - static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) == 0x390, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); /// Sets the OpenGL shader in accordance with the current PICA register state @@ -339,8 +337,11 @@ private: /// Syncs the cull mode to match the PICA register void SyncCullMode(); - /// Syncs the depth scale and offset to match the PICA registers - void SyncDepthModifiers(); + /// Syncs the depth scale to match the PICA register + void SyncDepthScale(); + + /// Syncs the depth offset to match the PICA register + void SyncDepthOffset(); /// Syncs the blend enabled status to match the PICA register void SyncBlendEnabled(); @@ -399,6 +400,12 @@ private: /// Syncs the specified light's position to match the PICA register void SyncLightPosition(int light_index); + /// Syncs the specified light's distance attenuation bias to match the PICA register + void SyncLightDistanceAttenuationBias(int light_index); + + /// Syncs the specified light's distance attenuation scale to match the PICA register + void SyncLightDistanceAttenuationScale(int light_index); + OpenGLState state; RasterizerCacheOpenGL res_cache; @@ -413,7 +420,7 @@ private: UniformData data; bool lut_dirty[6]; bool dirty; - } uniform_block_data; + } uniform_block_data = {}; std::array<SamplerInfo, 3> texture_samplers; OGLVertexArray vertex_array; @@ -422,5 +429,5 @@ private: OGLFramebuffer framebuffer; std::array<OGLTexture, 6> lighting_luts; - std::array<std::array<GLvec4, 256>, 6> lighting_lut_data; + std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 71d60e69c..8332e722d 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -439,9 +439,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // If enabled, compute distance attenuation value std::string dist_atten = "1.0"; if (light_config.dist_atten_enable) { - std::string scale = std::to_string(light_config.dist_atten_scale); - std::string bias = std::to_string(light_config.dist_atten_bias); - std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")"; + std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + light_src + ".position) + " + light_src + ".dist_atten_bias)"; index = "((clamp(" + index + ", 0.0, FLOAT_255)))"; const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num); dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index); @@ -549,6 +547,8 @@ struct LightSrc { vec3 diffuse; vec3 ambient; vec3 position; + float dist_atten_bias; + float dist_atten_scale; }; layout (std140) uniform shader_data { diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 02cd9f417..fa141fc9a 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -36,6 +36,8 @@ OpenGLState::OpenGLState() { stencil.action_stencil_fail = GL_KEEP; blend.enabled = false; + blend.rgb_equation = GL_FUNC_ADD; + blend.a_equation = GL_FUNC_ADD; blend.src_rgb_func = GL_ONE; blend.dst_rgb_func = GL_ZERO; blend.src_a_func = GL_ONE; @@ -165,6 +167,11 @@ void OpenGLState::Apply() const { blend.src_a_func, blend.dst_a_func); } + if (blend.rgb_equation != cur_state.blend.rgb_equation || + blend.a_equation != cur_state.blend.a_equation) { + glBlendEquationSeparate(blend.rgb_equation, blend.a_equation); + } + if (logic_op != cur_state.logic_op) { glLogicOp(logic_op); } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 24f20e47c..228727054 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -40,6 +40,8 @@ public: struct { bool enabled; // GL_BLEND + GLenum rgb_equation; // GL_BLEND_EQUATION_RGB + GLenum a_equation; // GL_BLEND_EQUATION_ALPHA GLenum src_rgb_func; // GL_BLEND_SRC_RGB GLenum dst_rgb_func; // GL_BLEND_DST_RGB GLenum src_a_func; // GL_BLEND_SRC_ALPHA diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 976d1f364..6dc2758c5 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -78,6 +78,26 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { return gl_mode; } +inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) { + static const GLenum blend_equation_table[] = { + GL_FUNC_ADD, // BlendEquation::Add + GL_FUNC_SUBTRACT, // BlendEquation::Subtract + GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract + GL_MIN, // BlendEquation::Min + GL_MAX, // BlendEquation::Max + }; + + // Range check table for input + if (static_cast<size_t>(equation) >= ARRAY_SIZE(blend_equation_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown blend equation %d", equation); + UNREACHABLE(); + + return GL_FUNC_ADD; + } + + return blend_equation_table[(unsigned)equation]; +} + inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { static const GLenum blend_func_table[] = { GL_ZERO, // BlendFactor::Zero diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 8f424a435..8410e0a64 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -450,7 +450,7 @@ static const char* GetType(GLenum type) { #undef RET } -static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, +static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length, const GLchar* message, const void* user_param) { Log::Level level; switch (severity) { diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index e93a9d92a..f565e2c91 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -30,6 +30,58 @@ namespace Pica { namespace Shader { +OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) { + // Setup output data + OutputVertex ret; + // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to + // figure out what those circumstances are and enable the remaining outputs then. + unsigned index = 0; + for (unsigned i = 0; i < 7; ++i) { + + if (index >= g_state.regs.vs_output_total) + break; + + if ((config.output_mask & (1 << i)) == 0) + continue; + + const auto& output_register_map = g_state.regs.vs_output_attributes[index]; + + u32 semantics[4] = { + output_register_map.map_x, output_register_map.map_y, + output_register_map.map_z, output_register_map.map_w + }; + + for (unsigned comp = 0; comp < 4; ++comp) { + float24* out = ((float24*)&ret) + semantics[comp]; + if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { + *out = value[i][comp]; + } else { + // Zero output so that attributes which aren't output won't have denormals in them, + // which would slow us down later. + memset(out, 0, sizeof(*out)); + } + } + + index++; + } + + // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation + for (unsigned i = 0; i < 4; ++i) { + ret.color[i] = float24::FromFloat32( + std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); + } + + LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " + "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", + ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), + ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), + ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), + ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), + ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); + + return ret; +} + #ifdef ARCHITECTURE_x86_64 static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map; static const JitShader* jit_shader; @@ -62,8 +114,9 @@ void ShaderSetup::Setup() { MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { +void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; + auto& setup = g_state.vs; MICROPROFILE_SCOPE(GPU_Shader); @@ -81,62 +134,13 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) - jit_shader->Run(&state.registers, g_state.regs.vs.main_offset); + jit_shader->Run(setup, state, config.main_offset); else - RunInterpreter(state); + RunInterpreter(setup, state, config.main_offset); #else - RunInterpreter(state); + RunInterpreter(setup, state, config.main_offset); #endif // ARCHITECTURE_x86_64 - // Setup output data - OutputVertex ret; - // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to - // figure out what those circumstances are and enable the remaining outputs then. - unsigned index = 0; - for (unsigned i = 0; i < 7; ++i) { - - if (index >= g_state.regs.vs_output_total) - break; - - if ((g_state.regs.vs.output_mask & (1 << i)) == 0) - continue; - - const auto& output_register_map = g_state.regs.vs_output_attributes[index]; // TODO: Don't hardcode VS here - - u32 semantics[4] = { - output_register_map.map_x, output_register_map.map_y, - output_register_map.map_z, output_register_map.map_w - }; - - for (unsigned comp = 0; comp < 4; ++comp) { - float24* out = ((float24*)&ret) + semantics[comp]; - if (semantics[comp] != Regs::VSOutputAttributes::INVALID) { - *out = state.registers.output[i][comp]; - } else { - // Zero output so that attributes which aren't output won't have denormals in them, - // which would slow us down later. - memset(out, 0, sizeof(*out)); - } - } - - index++; - } - - // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation - for (unsigned i = 0; i < 4; ++i) { - ret.color[i] = float24::FromFloat32( - std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f)); - } - - LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), " - "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)", - ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(), - ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(), - ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(), - ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(), - ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32()); - - return ret; } DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { @@ -156,7 +160,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_ state.conditional_code[0] = false; state.conditional_code[1] = false; - RunInterpreter(state); + RunInterpreter(setup, state, config.main_offset); return state.debug; } diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 983e4a967..fee16df62 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -84,6 +84,15 @@ struct OutputVertex { static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); +struct OutputRegisters { + OutputRegisters() = default; + + alignas(16) Math::Vec4<float24> value[16]; + + OutputVertex ToVertex(const Regs::ShaderConfig& config); +}; +static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD"); + // Helper structure used to keep track of data useful for inspection of shader emulation template<bool full_debugging> struct DebugData; @@ -267,11 +276,12 @@ struct UnitState { // The registers are accessed by the shader JIT using SSE instructions, and are therefore // required to be 16-byte aligned. alignas(16) Math::Vec4<float24> input[16]; - alignas(16) Math::Vec4<float24> output[16]; alignas(16) Math::Vec4<float24> temporary[16]; } registers; static_assert(std::is_pod<Registers>::value, "Structure is not POD"); + OutputRegisters output_registers; + bool conditional_code[2]; // Two Address registers and one loop counter @@ -283,10 +293,10 @@ struct UnitState { static size_t InputOffset(const SourceRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Input: - return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>); case RegisterType::Temporary: - return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); default: UNREACHABLE(); @@ -297,10 +307,10 @@ struct UnitState { static size_t OutputOffset(const DestRegister& reg) { switch (reg.GetRegisterType()) { case RegisterType::Output: - return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>); case RegisterType::Temporary: - return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); + return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>); default: UNREACHABLE(); @@ -323,6 +333,23 @@ struct ShaderSetup { std::array<Math::Vec4<u8>, 4> i; } uniforms; + static size_t UniformOffset(RegisterType type, unsigned index) { + switch (type) { + case RegisterType::FloatUniform: + return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>); + + case RegisterType::BoolUniform: + return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool); + + case RegisterType::IntUniform: + return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>); + + default: + UNREACHABLE(); + return 0; + } + } + std::array<u32, 1024> program_code; std::array<u32, 1024> swizzle_data; @@ -337,9 +364,8 @@ struct ShaderSetup { * @param state Shader unit state, must be setup per shader and per shader unit * @param input Input vertex into the shader * @param num_attributes The number of vertex shader attributes - * @return The output vertex, after having been processed by the vertex shader */ - OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes); + void Run(UnitState<false>& state, const InputVertex& input, int num_attributes); /** * Produce debug information based on the given shader and input vertex diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp index 3a827d11f..b1eadc071 100644 --- a/src/video_core/shader/shader_interpreter.cpp +++ b/src/video_core/shader/shader_interpreter.cpp @@ -41,11 +41,11 @@ struct CallStackElement { }; template<bool Debug> -void RunInterpreter(UnitState<Debug>& state) { +void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) { // TODO: Is there a maximal size for this? boost::container::static_vector<CallStackElement, 16> call_stack; - u32 program_counter = g_state.regs.vs.main_offset; + u32 program_counter = offset; const auto& uniforms = g_state.vs.uniforms; const auto& swizzle_data = g_state.vs.swizzle_data; @@ -144,7 +144,7 @@ void RunInterpreter(UnitState<Debug>& state) { src2[3] = src2[3] * float24::FromFloat32(-1); } - float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0] + float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0] : (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0] : dummy_vec4_float24; @@ -483,7 +483,7 @@ void RunInterpreter(UnitState<Debug>& state) { src3[3] = src3[3] * float24::FromFloat32(-1); } - float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0] + float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0] : (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0] : dummy_vec4_float24; @@ -647,8 +647,8 @@ void RunInterpreter(UnitState<Debug>& state) { } // Explicit instantiation -template void RunInterpreter(UnitState<false>& state); -template void RunInterpreter(UnitState<true>& state); +template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset); +template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset); } // namespace diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h index 6048cdf3a..bb3ce1c6e 100644 --- a/src/video_core/shader/shader_interpreter.h +++ b/src/video_core/shader/shader_interpreter.h @@ -11,7 +11,7 @@ namespace Shader { template <bool Debug> struct UnitState; template<bool Debug> -void RunInterpreter(UnitState<Debug>& state); +void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset); } // namespace diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 99f6c51eb..43e7e6b4c 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -102,7 +102,7 @@ const JitFunction instr_table[64] = { // purposes, as documented below: /// Pointer to the uniform memory -static const X64Reg UNIFORMS = R9; +static const X64Reg SETUP = R9; /// The two 32-bit VS address offset registers set by the MOVA instruction static const X64Reg ADDROFFS_REG_0 = R10; static const X64Reg ADDROFFS_REG_1 = R11; @@ -117,7 +117,7 @@ static const X64Reg COND0 = R13; /// Result of the previous CMP instruction for the Y-component comparison static const X64Reg COND1 = R14; /// Pointer to the UnitState instance for the current VS unit -static const X64Reg REGISTERS = R15; +static const X64Reg STATE = R15; /// SIMD scratch register static const X64Reg SCRATCH = XMM0; /// Loaded with the first swizzled source register, otherwise can be used as a scratch register @@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15; // State registers that must not be modified by external functions calls // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed static const BitSet32 persistent_regs = { - UNIFORMS, REGISTERS, // Pointers to register blocks + SETUP, STATE, // Pointers to register blocks ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers ONE+16, NEGBIT+16, // Constants }; @@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe size_t src_offset; if (src_reg.GetRegisterType() == RegisterType::FloatUniform) { - src_ptr = UNIFORMS; - src_offset = src_reg.GetIndex() * sizeof(float24) * 4; + src_ptr = SETUP; + src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex()); } else { - src_ptr = REGISTERS; + src_ptr = STATE; src_offset = UnitState<false>::InputOffset(src_reg); } @@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { // If all components are enabled, write the result to the destination register if (swiz.dest_mask == NO_DEST_REG_MASK) { // Store dest back to memory - MOVAPS(MDisp(REGISTERS, dest_offset_disp), src); + MOVAPS(MDisp(STATE, dest_offset_disp), src); } else { // Not all components are enabled, so mask the result when storing to the destination register... - MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp)); + MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp)); if (Common::GetCPUCaps().sse4_1) { u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); @@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) { } // Store dest back to memory - MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH); + MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH); } } @@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { } void JitShader::Compile_UniformCondition(Instruction instr) { - int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool)); - CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0)); + int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id); + CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0)); } BitSet32 JitShader::PersistentCallerSavedRegs() { @@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) { looping = true; - int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>)); - MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset)); + int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id); + MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset)); MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT)); SHR(32, R(LOOPCOUNT_REG), Imm8(8)); AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start @@ -826,8 +826,8 @@ void JitShader::Compile() { // The stack pointer is 8 modulo 16 at the entry of a procedure ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); - MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1)); - MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms)); + MOV(PTRBITS, R(SETUP), R(ABI_PARAM1)); + MOV(PTRBITS, R(STATE), R(ABI_PARAM2)); // Zero address/loop registers XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0)); @@ -845,7 +845,7 @@ void JitShader::Compile() { MOVAPS(NEGBIT, MatR(RAX)); // Jump to start of the shader program - JMPptr(R(ABI_PARAM2)); + JMPptr(R(ABI_PARAM3)); // Compile entire program Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size())); diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h index 30aa7ff30..5468459d4 100644 --- a/src/video_core/shader/shader_jit_x64.h +++ b/src/video_core/shader/shader_jit_x64.h @@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock { public: JitShader(); - void Run(void* registers, unsigned offset) const { - program(registers, code_ptr[offset]); + void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const { + program(&setup, &state, code_ptr[offset]); } void Compile(); @@ -117,7 +117,7 @@ private: /// Branches that need to be fixed up once the entire shader program is compiled std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches; - using CompiledShader = void(void* registers, const u8* start_addr); + using CompiledShader = void(const void* setup, void* state, const u8* start_addr); CompiledShader* program = nullptr; }; diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp index 83896814f..e40f0f1ee 100644 --- a/src/video_core/vertex_loader.cpp +++ b/src/video_core/vertex_loader.cpp @@ -2,8 +2,8 @@ #include <boost/range/algorithm/fill.hpp> -#include "common/assert.h" #include "common/alignment.h" +#include "common/assert.h" #include "common/bit_field.h" #include "common/common_types.h" #include "common/logging/log.h" @@ -21,6 +21,8 @@ namespace Pica { void VertexLoader::Setup(const Pica::Regs& regs) { + ASSERT_MSG(!is_setup, "VertexLoader is not intended to be setup more than once."); + const auto& attribute_config = regs.vertex_attributes; num_total_attributes = attribute_config.GetNumTotalAttributes(); @@ -60,9 +62,13 @@ void VertexLoader::Setup(const Pica::Regs& regs) { } } } + + is_setup = true; } void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) { + ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices."); + for (int i = 0; i < num_total_attributes; ++i) { if (vertex_attribute_elements[i] != 0) { // Load per-vertex data from the loader arrays diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h index becf5a403..ac162c254 100644 --- a/src/video_core/vertex_loader.h +++ b/src/video_core/vertex_loader.h @@ -1,7 +1,8 @@ #pragma once -#include "common/common_types.h" +#include <array> +#include "common/common_types.h" #include "video_core/pica.h" namespace Pica { @@ -11,23 +12,29 @@ class MemoryAccessTracker; } namespace Shader { -class InputVertex; +struct InputVertex; } class VertexLoader { public: + VertexLoader() = default; + explicit VertexLoader(const Pica::Regs& regs) { + Setup(regs); + } + void Setup(const Pica::Regs& regs); void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses); int GetNumTotalAttributes() const { return num_total_attributes; } private: - u32 vertex_attribute_sources[16]; - u32 vertex_attribute_strides[16] = {}; - Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; - u32 vertex_attribute_elements[16] = {}; - bool vertex_attribute_is_default[16]; - int num_total_attributes; + std::array<u32, 16> vertex_attribute_sources; + std::array<u32, 16> vertex_attribute_strides{}; + std::array<Regs::VertexAttributeFormat, 16> vertex_attribute_formats; + std::array<u32, 16> vertex_attribute_elements{}; + std::array<bool, 16> vertex_attribute_is_default; + int num_total_attributes = 0; + bool is_setup = false; }; } // namespace Pica |
