aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp24
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp199
-rw-r--r--src/video_core/debug_utils/debug_utils.h6
-rw-r--r--src/video_core/pica.h40
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp121
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h53
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp6
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp7
-rw-r--r--src/video_core/renderer_opengl/gl_state.h2
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h20
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp2
-rw-r--r--src/video_core/shader/shader.cpp112
-rw-r--r--src/video_core/shader/shader.h40
-rw-r--r--src/video_core/shader/shader_interpreter.cpp12
-rw-r--r--src/video_core/shader/shader_interpreter.h2
-rw-r--r--src/video_core/shader/shader_jit_x64.cpp32
-rw-r--r--src/video_core/shader/shader_jit_x64.h6
-rw-r--r--src/video_core/vertex_loader.cpp8
-rw-r--r--src/video_core/vertex_loader.h23
19 files changed, 463 insertions, 252 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index ad0da796e..19e03adf4 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -149,7 +149,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// Send to vertex shader
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast<void*>(&immediate_input));
- Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
+ g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1);
+ Shader::OutputVertex output_vertex = shader_unit.output_registers.ToVertex(regs.vs);
// Send to renderer
using Pica::Shader::OutputVertex;
@@ -157,7 +158,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
};
- g_state.primitive_assembler.SubmitVertex(output, AddTriangle);
+ g_state.primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
}
}
}
@@ -199,9 +200,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// Processes information about internal vertex attributes to figure out how a vertex is loaded.
// Later, these can be compiled and cached.
- VertexLoader loader;
const u32 base_address = regs.vertex_attributes.GetPhysicalBaseAddress();
- loader.Setup(regs);
+ VertexLoader loader(regs);
// Load vertices
bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed));
@@ -231,7 +231,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// The size has been tuned for optimal balance between hit-rate and the cost of lookup
const size_t VERTEX_CACHE_SIZE = 32;
std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
- std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache;
+ std::array<Shader::OutputRegisters, VERTEX_CACHE_SIZE> vertex_cache;
unsigned int vertex_cache_pos = 0;
vertex_cache_ids.fill(-1);
@@ -249,7 +249,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
ASSERT(vertex != -1);
bool vertex_cache_hit = false;
- Shader::OutputVertex output;
+ Shader::OutputRegisters output_registers;
if (is_indexed) {
if (g_debug_context && Pica::g_debug_context->recorder) {
@@ -259,7 +259,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
if (vertex == vertex_cache_ids[i]) {
- output = vertex_cache[i];
+ output_registers = vertex_cache[i];
vertex_cache_hit = true;
break;
}
@@ -274,15 +274,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// Send to vertex shader
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input);
- output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
+ g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes());
+ output_registers = shader_unit.output_registers;
if (is_indexed) {
- vertex_cache[vertex_cache_pos] = output;
+ vertex_cache[vertex_cache_pos] = output_registers;
vertex_cache_ids[vertex_cache_pos] = vertex;
vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
}
}
+ // Retreive vertex from register data
+ Shader::OutputVertex output_vertex = output_registers.ToVertex(regs.vs);
+
// Send to renderer
using Pica::Shader::OutputVertex;
auto AddTriangle = [](
@@ -290,7 +294,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
VideoCore::g_renderer->Rasterizer()->AddTriangle(v0, v1, v2);
};
- primitive_assembler.SubmitVertex(output, AddTriangle);
+ primitive_assembler.SubmitVertex(output_vertex, AddTriangle);
}
for (auto& range : memory_accesses.ranges) {
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 2f645b441..871368323 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -696,106 +696,125 @@ finalise:
#endif
}
-void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages)
-{
+static std::string ReplacePattern(const std::string& input, const std::string& pattern, const std::string& replacement) {
+ size_t start = input.find(pattern);
+ if (start == std::string::npos)
+ return input;
+
+ std::string ret = input;
+ ret.replace(start, pattern.length(), replacement);
+ return ret;
+}
+
+static std::string GetTevStageConfigSourceString(const Pica::Regs::TevStageConfig::Source& source) {
using Source = Pica::Regs::TevStageConfig::Source;
+ static const std::map<Source, std::string> source_map = {
+ { Source::PrimaryColor, "PrimaryColor" },
+ { Source::PrimaryFragmentColor, "PrimaryFragmentColor" },
+ { Source::SecondaryFragmentColor, "SecondaryFragmentColor" },
+ { Source::Texture0, "Texture0" },
+ { Source::Texture1, "Texture1" },
+ { Source::Texture2, "Texture2" },
+ { Source::Texture3, "Texture3" },
+ { Source::PreviousBuffer, "PreviousBuffer" },
+ { Source::Constant, "Constant" },
+ { Source::Previous, "Previous" },
+ };
+
+ const auto src_it = source_map.find(source);
+ if (src_it == source_map.end())
+ return "Unknown";
+
+ return src_it->second;
+}
+
+static std::string GetTevStageConfigColorSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::ColorModifier modifier) {
using ColorModifier = Pica::Regs::TevStageConfig::ColorModifier;
+ static const std::map<ColorModifier, std::string> color_modifier_map = {
+ { ColorModifier::SourceColor, "%source.rgb" },
+ { ColorModifier::OneMinusSourceColor, "(1.0 - %source.rgb)" },
+ { ColorModifier::SourceAlpha, "%source.aaa" },
+ { ColorModifier::OneMinusSourceAlpha, "(1.0 - %source.aaa)" },
+ { ColorModifier::SourceRed, "%source.rrr" },
+ { ColorModifier::OneMinusSourceRed, "(1.0 - %source.rrr)" },
+ { ColorModifier::SourceGreen, "%source.ggg" },
+ { ColorModifier::OneMinusSourceGreen, "(1.0 - %source.ggg)" },
+ { ColorModifier::SourceBlue, "%source.bbb" },
+ { ColorModifier::OneMinusSourceBlue, "(1.0 - %source.bbb)" },
+ };
+
+ auto src_str = GetTevStageConfigSourceString(source);
+ auto modifier_it = color_modifier_map.find(modifier);
+ std::string modifier_str = "%source.????";
+ if (modifier_it != color_modifier_map.end())
+ modifier_str = modifier_it->second;
+
+ return ReplacePattern(modifier_str, "%source", src_str);
+}
+
+static std::string GetTevStageConfigAlphaSourceString(const Pica::Regs::TevStageConfig::Source& source, const Pica::Regs::TevStageConfig::AlphaModifier modifier) {
using AlphaModifier = Pica::Regs::TevStageConfig::AlphaModifier;
+ static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
+ { AlphaModifier::SourceAlpha, "%source.a" },
+ { AlphaModifier::OneMinusSourceAlpha, "(1.0 - %source.a)" },
+ { AlphaModifier::SourceRed, "%source.r" },
+ { AlphaModifier::OneMinusSourceRed, "(1.0 - %source.r)" },
+ { AlphaModifier::SourceGreen, "%source.g" },
+ { AlphaModifier::OneMinusSourceGreen, "(1.0 - %source.g)" },
+ { AlphaModifier::SourceBlue, "%source.b" },
+ { AlphaModifier::OneMinusSourceBlue, "(1.0 - %source.b)" },
+ };
+
+ auto src_str = GetTevStageConfigSourceString(source);
+ auto modifier_it = alpha_modifier_map.find(modifier);
+ std::string modifier_str = "%source.????";
+ if (modifier_it != alpha_modifier_map.end())
+ modifier_str = modifier_it->second;
+
+ return ReplacePattern(modifier_str, "%source", src_str);
+}
+
+static std::string GetTevStageConfigOperationString(const Pica::Regs::TevStageConfig::Operation& operation) {
using Operation = Pica::Regs::TevStageConfig::Operation;
+ static const std::map<Operation, std::string> combiner_map = {
+ { Operation::Replace, "%source1" },
+ { Operation::Modulate, "(%source1 * %source2)" },
+ { Operation::Add, "(%source1 + %source2)" },
+ { Operation::AddSigned, "(%source1 + %source2) - 0.5" },
+ { Operation::Lerp, "lerp(%source1, %source2, %source3)" },
+ { Operation::Subtract, "(%source1 - %source2)" },
+ { Operation::Dot3_RGB, "dot(%source1, %source2)" },
+ { Operation::MultiplyThenAdd, "((%source1 * %source2) + %source3)" },
+ { Operation::AddThenMultiply, "((%source1 + %source2) * %source3)" },
+ };
- std::string stage_info = "Tev setup:\n";
- for (size_t index = 0; index < stages.size(); ++index) {
- const auto& tev_stage = stages[index];
+ const auto op_it = combiner_map.find(operation);
+ if (op_it == combiner_map.end())
+ return "Unknown op (%source1, %source2, %source3)";
- static const std::map<Source, std::string> source_map = {
- { Source::PrimaryColor, "PrimaryColor" },
- { Source::Texture0, "Texture0" },
- { Source::Texture1, "Texture1" },
- { Source::Texture2, "Texture2" },
- { Source::Constant, "Constant" },
- { Source::Previous, "Previous" },
- };
+ return op_it->second;
+}
- static const std::map<ColorModifier, std::string> color_modifier_map = {
- { ColorModifier::SourceColor, { "%source.rgb" } },
- { ColorModifier::SourceAlpha, { "%source.aaa" } },
- };
- static const std::map<AlphaModifier, std::string> alpha_modifier_map = {
- { AlphaModifier::SourceAlpha, "%source.a" },
- { AlphaModifier::OneMinusSourceAlpha, "(255 - %source.a)" },
- };
+std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
+ auto op_str = GetTevStageConfigOperationString(tev_stage.color_op);
+ op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigColorSourceString(tev_stage.color_source1, tev_stage.color_modifier1));
+ op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigColorSourceString(tev_stage.color_source2, tev_stage.color_modifier2));
+ return ReplacePattern(op_str, "%source3", GetTevStageConfigColorSourceString(tev_stage.color_source3, tev_stage.color_modifier3));
+}
- static const std::map<Operation, std::string> combiner_map = {
- { Operation::Replace, "%source1" },
- { Operation::Modulate, "(%source1 * %source2) / 255" },
- { Operation::Add, "(%source1 + %source2)" },
- { Operation::Lerp, "lerp(%source1, %source2, %source3)" },
- };
+std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage) {
+ auto op_str = GetTevStageConfigOperationString(tev_stage.alpha_op);
+ op_str = ReplacePattern(op_str, "%source1", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
+ op_str = ReplacePattern(op_str, "%source2", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
+ return ReplacePattern(op_str, "%source3", GetTevStageConfigAlphaSourceString(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
+}
- static auto ReplacePattern =
- [](const std::string& input, const std::string& pattern, const std::string& replacement) -> std::string {
- size_t start = input.find(pattern);
- if (start == std::string::npos)
- return input;
-
- std::string ret = input;
- ret.replace(start, pattern.length(), replacement);
- return ret;
- };
- static auto GetColorSourceStr =
- [](const Source& src, const ColorModifier& modifier) {
- auto src_it = source_map.find(src);
- std::string src_str = "Unknown";
- if (src_it != source_map.end())
- src_str = src_it->second;
-
- auto modifier_it = color_modifier_map.find(modifier);
- std::string modifier_str = "%source.????";
- if (modifier_it != color_modifier_map.end())
- modifier_str = modifier_it->second;
-
- return ReplacePattern(modifier_str, "%source", src_str);
- };
- static auto GetColorCombinerStr =
- [](const Regs::TevStageConfig& tev_stage) {
- auto op_it = combiner_map.find(tev_stage.color_op);
- std::string op_str = "Unknown op (%source1, %source2, %source3)";
- if (op_it != combiner_map.end())
- op_str = op_it->second;
-
- op_str = ReplacePattern(op_str, "%source1", GetColorSourceStr(tev_stage.color_source1, tev_stage.color_modifier1));
- op_str = ReplacePattern(op_str, "%source2", GetColorSourceStr(tev_stage.color_source2, tev_stage.color_modifier2));
- return ReplacePattern(op_str, "%source3", GetColorSourceStr(tev_stage.color_source3, tev_stage.color_modifier3));
- };
- static auto GetAlphaSourceStr =
- [](const Source& src, const AlphaModifier& modifier) {
- auto src_it = source_map.find(src);
- std::string src_str = "Unknown";
- if (src_it != source_map.end())
- src_str = src_it->second;
-
- auto modifier_it = alpha_modifier_map.find(modifier);
- std::string modifier_str = "%source.????";
- if (modifier_it != alpha_modifier_map.end())
- modifier_str = modifier_it->second;
-
- return ReplacePattern(modifier_str, "%source", src_str);
- };
- static auto GetAlphaCombinerStr =
- [](const Regs::TevStageConfig& tev_stage) {
- auto op_it = combiner_map.find(tev_stage.alpha_op);
- std::string op_str = "Unknown op (%source1, %source2, %source3)";
- if (op_it != combiner_map.end())
- op_str = op_it->second;
-
- op_str = ReplacePattern(op_str, "%source1", GetAlphaSourceStr(tev_stage.alpha_source1, tev_stage.alpha_modifier1));
- op_str = ReplacePattern(op_str, "%source2", GetAlphaSourceStr(tev_stage.alpha_source2, tev_stage.alpha_modifier2));
- return ReplacePattern(op_str, "%source3", GetAlphaSourceStr(tev_stage.alpha_source3, tev_stage.alpha_modifier3));
- };
-
- stage_info += "Stage " + std::to_string(index) + ": " + GetColorCombinerStr(tev_stage) + " " + GetAlphaCombinerStr(tev_stage) + "\n";
+void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages) {
+ std::string stage_info = "Tev setup:\n";
+ for (size_t index = 0; index < stages.size(); ++index) {
+ const auto& tev_stage = stages[index];
+ stage_info += "Stage " + std::to_string(index) + ": " + GetTevStageConfigColorCombinerString(tev_stage) + " " + GetTevStageConfigAlphaCombinerString(tev_stage) + "\n";
}
-
LOG_TRACE(HW_GPU, "%s", stage_info.c_str());
}
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index f628292a4..92e9734ae 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -224,7 +224,11 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int s, int t, const Texture
void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data);
-void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig,6>& stages);
+std::string GetTevStageConfigColorCombinerString(const Pica::Regs::TevStageConfig& tev_stage);
+std::string GetTevStageConfigAlphaCombinerString(const Pica::Regs::TevStageConfig& tev_stage);
+
+/// Dumps the Tev stage config to log at trace level
+void DumpTevStageConfig(const std::array<Pica::Regs::TevStageConfig, 6>& stages);
/**
* Used in the vertex loader to merge access records. TODO: Investigate if actually useful.
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 86c0a0096..544ea037f 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -787,23 +787,21 @@ struct Regs {
LightColor diffuse; // material.diffuse * light.diffuse
LightColor ambient; // material.ambient * light.ambient
- struct {
- // Encoded as 16-bit floating point
- union {
- BitField< 0, 16, u32> x;
- BitField<16, 16, u32> y;
- };
- union {
- BitField< 0, 16, u32> z;
- };
+ // Encoded as 16-bit floating point
+ union {
+ BitField< 0, 16, u32> x;
+ BitField<16, 16, u32> y;
+ };
+ union {
+ BitField< 0, 16, u32> z;
+ };
- INSERT_PADDING_WORDS(0x3);
+ INSERT_PADDING_WORDS(0x3);
- union {
- BitField<0, 1, u32> directional;
- BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
- };
- };
+ union {
+ BitField<0, 1, u32> directional;
+ BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
+ } config;
BitField<0, 20, u32> dist_atten_bias;
BitField<0, 20, u32> dist_atten_scale;
@@ -824,7 +822,7 @@ struct Regs {
BitField<27, 1, u32> clamp_highlights;
BitField<28, 2, LightingBumpMode> bump_mode;
BitField<30, 1, u32> disable_bump_renorm;
- };
+ } config0;
union {
BitField<16, 1, u32> disable_lut_d0;
@@ -845,13 +843,13 @@ struct Regs {
BitField<29, 1, u32> disable_dist_atten_light_5;
BitField<30, 1, u32> disable_dist_atten_light_6;
BitField<31, 1, u32> disable_dist_atten_light_7;
- };
+ } config1;
bool IsDistAttenDisabled(unsigned index) const {
- const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1,
- disable_dist_atten_light_2, disable_dist_atten_light_3,
- disable_dist_atten_light_4, disable_dist_atten_light_5,
- disable_dist_atten_light_6, disable_dist_atten_light_7 };
+ const unsigned disable[] = { config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1,
+ config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3,
+ config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5,
+ config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7 };
return disable[index] != 0;
}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ed2e2f3ae..931c34a37 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -104,7 +104,6 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
// Sync fixed function OpenGL state
SyncCullMode();
- SyncDepthModifiers();
SyncBlendEnabled();
SyncBlendFuncs();
SyncBlendColor();
@@ -259,8 +258,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
// Depth modifiers
case PICA_REG_INDEX(viewport_depth_range):
+ SyncDepthScale();
+ break;
case PICA_REG_INDEX(viewport_depth_near_plane):
- SyncDepthModifiers();
+ SyncDepthOffset();
break;
// Depth buffering
@@ -379,6 +380,17 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
SyncCombinerColor();
break;
+ // Fragment lighting switches
+ case PICA_REG_INDEX(lighting.disable):
+ case PICA_REG_INDEX(lighting.num_lights):
+ case PICA_REG_INDEX(lighting.config0):
+ case PICA_REG_INDEX(lighting.config1):
+ case PICA_REG_INDEX(lighting.abs_lut_input):
+ case PICA_REG_INDEX(lighting.lut_input):
+ case PICA_REG_INDEX(lighting.lut_scale):
+ case PICA_REG_INDEX(lighting.light_enable):
+ break;
+
// Fragment lighting specular 0 color
case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10):
SyncLightSpecular0(0);
@@ -517,6 +529,70 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
SyncLightPosition(7);
break;
+ // Fragment lighting light source config
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[0].config, 0x149 + 0 * 0x10):
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[1].config, 0x149 + 1 * 0x10):
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[2].config, 0x149 + 2 * 0x10):
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[3].config, 0x149 + 3 * 0x10):
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[4].config, 0x149 + 4 * 0x10):
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[5].config, 0x149 + 5 * 0x10):
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[6].config, 0x149 + 6 * 0x10):
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[7].config, 0x149 + 7 * 0x10):
+ shader_dirty = true;
+ break;
+
+ // Fragment lighting distance attenuation bias
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[0].dist_atten_bias, 0x014A + 0 * 0x10):
+ SyncLightDistanceAttenuationBias(0);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[1].dist_atten_bias, 0x014A + 1 * 0x10):
+ SyncLightDistanceAttenuationBias(1);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[2].dist_atten_bias, 0x014A + 2 * 0x10):
+ SyncLightDistanceAttenuationBias(2);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[3].dist_atten_bias, 0x014A + 3 * 0x10):
+ SyncLightDistanceAttenuationBias(3);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[4].dist_atten_bias, 0x014A + 4 * 0x10):
+ SyncLightDistanceAttenuationBias(4);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[5].dist_atten_bias, 0x014A + 5 * 0x10):
+ SyncLightDistanceAttenuationBias(5);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[6].dist_atten_bias, 0x014A + 6 * 0x10):
+ SyncLightDistanceAttenuationBias(6);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[7].dist_atten_bias, 0x014A + 7 * 0x10):
+ SyncLightDistanceAttenuationBias(7);
+ break;
+
+ // Fragment lighting distance attenuation scale
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[0].dist_atten_scale, 0x014B + 0 * 0x10):
+ SyncLightDistanceAttenuationScale(0);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[1].dist_atten_scale, 0x014B + 1 * 0x10):
+ SyncLightDistanceAttenuationScale(1);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[2].dist_atten_scale, 0x014B + 2 * 0x10):
+ SyncLightDistanceAttenuationScale(2);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[3].dist_atten_scale, 0x014B + 3 * 0x10):
+ SyncLightDistanceAttenuationScale(3);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[4].dist_atten_scale, 0x014B + 4 * 0x10):
+ SyncLightDistanceAttenuationScale(4);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[5].dist_atten_scale, 0x014B + 5 * 0x10):
+ SyncLightDistanceAttenuationScale(5);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[6].dist_atten_scale, 0x014B + 6 * 0x10):
+ SyncLightDistanceAttenuationScale(6);
+ break;
+ case PICA_REG_INDEX_WORKAROUND(lighting.light[7].dist_atten_scale, 0x014B + 7 * 0x10):
+ SyncLightDistanceAttenuationScale(7);
+ break;
+
// Fragment lighting global ambient color (emission + ambient * ambient)
case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0):
SyncGlobalAmbient();
@@ -880,6 +956,8 @@ void RasterizerOpenGL::SetShader() {
glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
// Update uniforms
+ SyncDepthScale();
+ SyncDepthOffset();
SyncAlphaTest();
SyncCombinerColor();
auto& tev_stages = Pica::g_state.regs.GetTevStages();
@@ -893,6 +971,8 @@ void RasterizerOpenGL::SetShader() {
SyncLightDiffuse(light_index);
SyncLightAmbient(light_index);
SyncLightPosition(light_index);
+ SyncLightDistanceAttenuationBias(light_index);
+ SyncLightDistanceAttenuationScale(light_index);
}
}
}
@@ -922,13 +1002,20 @@ void RasterizerOpenGL::SyncCullMode() {
}
}
-void RasterizerOpenGL::SyncDepthModifiers() {
+void RasterizerOpenGL::SyncDepthScale() {
float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
- float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
+ if (depth_scale != uniform_block_data.data.depth_scale) {
+ uniform_block_data.data.depth_scale = depth_scale;
+ uniform_block_data.dirty = true;
+ }
+}
- uniform_block_data.data.depth_scale = depth_scale;
- uniform_block_data.data.depth_offset = depth_offset;
- uniform_block_data.dirty = true;
+void RasterizerOpenGL::SyncDepthOffset() {
+ float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32();
+ if (depth_offset != uniform_block_data.data.depth_offset) {
+ uniform_block_data.data.depth_offset = depth_offset;
+ uniform_block_data.dirty = true;
+ }
}
void RasterizerOpenGL::SyncBlendEnabled() {
@@ -937,6 +1024,8 @@ void RasterizerOpenGL::SyncBlendEnabled() {
void RasterizerOpenGL::SyncBlendFuncs() {
const auto& regs = Pica::g_state.regs;
+ state.blend.rgb_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_rgb);
+ state.blend.a_equation = PicaToGL::BlendEquation(regs.output_merger.alpha_blending.blend_equation_a);
state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb);
state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb);
state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a);
@@ -1093,3 +1182,21 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
uniform_block_data.dirty = true;
}
}
+
+void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) {
+ GLfloat dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias).ToFloat32();
+
+ if (dist_atten_bias != uniform_block_data.data.light_src[light_index].dist_atten_bias) {
+ uniform_block_data.data.light_src[light_index].dist_atten_bias = dist_atten_bias;
+ uniform_block_data.dirty = true;
+ }
+}
+
+void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
+ GLfloat dist_atten_scale = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_scale).ToFloat32();
+
+ if (dist_atten_scale != uniform_block_data.data.light_src[light_index].dist_atten_scale) {
+ uniform_block_data.data.light_src[light_index].dist_atten_scale = dist_atten_scale;
+ uniform_block_data.dirty = true;
+ }
+}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index eed00011a..bb7f20161 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -89,49 +89,47 @@ union PicaShaderConfig {
unsigned num = regs.lighting.light_enable.GetNum(light_index);
const auto& light = regs.lighting.light[num];
state.lighting.light[light_index].num = num;
- state.lighting.light[light_index].directional = light.directional != 0;
- state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
+ state.lighting.light[light_index].directional = light.config.directional != 0;
+ state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
- state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
- state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
}
- state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
+ state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
- state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
+ state.lighting.lut_d1.enable = regs.lighting.config1.disable_lut_d1 == 0;
state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
- state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
+ state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
- state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
+ state.lighting.lut_rr.enable = regs.lighting.config1.disable_lut_rr == 0;
state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
- state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
+ state.lighting.lut_rg.enable = regs.lighting.config1.disable_lut_rg == 0;
state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
- state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
+ state.lighting.lut_rb.enable = regs.lighting.config1.disable_lut_rb == 0;
state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
- state.lighting.config = regs.lighting.config;
- state.lighting.fresnel_selector = regs.lighting.fresnel_selector;
- state.lighting.bump_mode = regs.lighting.bump_mode;
- state.lighting.bump_selector = regs.lighting.bump_selector;
- state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
- state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
+ state.lighting.config = regs.lighting.config0.config;
+ state.lighting.fresnel_selector = regs.lighting.config0.fresnel_selector;
+ state.lighting.bump_mode = regs.lighting.config0.bump_mode;
+ state.lighting.bump_selector = regs.lighting.config0.bump_selector;
+ state.lighting.bump_renorm = regs.lighting.config0.disable_bump_renorm == 0;
+ state.lighting.clamp_highlights = regs.lighting.config0.clamp_highlights != 0;
return res;
}
@@ -184,8 +182,6 @@ union PicaShaderConfig {
bool directional;
bool two_sided_diffuse;
bool dist_atten_enable;
- GLfloat dist_atten_scale;
- GLfloat dist_atten_bias;
} light[8];
bool enable;
@@ -316,6 +312,8 @@ private:
alignas(16) GLvec3 diffuse;
alignas(16) GLvec3 ambient;
alignas(16) GLvec3 position;
+ GLfloat dist_atten_bias;
+ GLfloat dist_atten_scale;
};
/// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
@@ -330,7 +328,7 @@ private:
LightSrc light_src[8];
};
- static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
+ static_assert(sizeof(UniformData) == 0x390, "The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
/// Sets the OpenGL shader in accordance with the current PICA register state
@@ -339,8 +337,11 @@ private:
/// Syncs the cull mode to match the PICA register
void SyncCullMode();
- /// Syncs the depth scale and offset to match the PICA registers
- void SyncDepthModifiers();
+ /// Syncs the depth scale to match the PICA register
+ void SyncDepthScale();
+
+ /// Syncs the depth offset to match the PICA register
+ void SyncDepthOffset();
/// Syncs the blend enabled status to match the PICA register
void SyncBlendEnabled();
@@ -399,6 +400,12 @@ private:
/// Syncs the specified light's position to match the PICA register
void SyncLightPosition(int light_index);
+ /// Syncs the specified light's distance attenuation bias to match the PICA register
+ void SyncLightDistanceAttenuationBias(int light_index);
+
+ /// Syncs the specified light's distance attenuation scale to match the PICA register
+ void SyncLightDistanceAttenuationScale(int light_index);
+
OpenGLState state;
RasterizerCacheOpenGL res_cache;
@@ -413,7 +420,7 @@ private:
UniformData data;
bool lut_dirty[6];
bool dirty;
- } uniform_block_data;
+ } uniform_block_data = {};
std::array<SamplerInfo, 3> texture_samplers;
OGLVertexArray vertex_array;
@@ -422,5 +429,5 @@ private:
OGLFramebuffer framebuffer;
std::array<OGLTexture, 6> lighting_luts;
- std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
+ std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 71d60e69c..8332e722d 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -439,9 +439,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
// If enabled, compute distance attenuation value
std::string dist_atten = "1.0";
if (light_config.dist_atten_enable) {
- std::string scale = std::to_string(light_config.dist_atten_scale);
- std::string bias = std::to_string(light_config.dist_atten_bias);
- std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")";
+ std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + light_src + ".position) + " + light_src + ".dist_atten_bias)";
index = "((clamp(" + index + ", 0.0, FLOAT_255)))";
const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num);
dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index);
@@ -549,6 +547,8 @@ struct LightSrc {
vec3 diffuse;
vec3 ambient;
vec3 position;
+ float dist_atten_bias;
+ float dist_atten_scale;
};
layout (std140) uniform shader_data {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 02cd9f417..fa141fc9a 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -36,6 +36,8 @@ OpenGLState::OpenGLState() {
stencil.action_stencil_fail = GL_KEEP;
blend.enabled = false;
+ blend.rgb_equation = GL_FUNC_ADD;
+ blend.a_equation = GL_FUNC_ADD;
blend.src_rgb_func = GL_ONE;
blend.dst_rgb_func = GL_ZERO;
blend.src_a_func = GL_ONE;
@@ -165,6 +167,11 @@ void OpenGLState::Apply() const {
blend.src_a_func, blend.dst_a_func);
}
+ if (blend.rgb_equation != cur_state.blend.rgb_equation ||
+ blend.a_equation != cur_state.blend.a_equation) {
+ glBlendEquationSeparate(blend.rgb_equation, blend.a_equation);
+ }
+
if (logic_op != cur_state.logic_op) {
glLogicOp(logic_op);
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 24f20e47c..228727054 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -40,6 +40,8 @@ public:
struct {
bool enabled; // GL_BLEND
+ GLenum rgb_equation; // GL_BLEND_EQUATION_RGB
+ GLenum a_equation; // GL_BLEND_EQUATION_ALPHA
GLenum src_rgb_func; // GL_BLEND_SRC_RGB
GLenum dst_rgb_func; // GL_BLEND_DST_RGB
GLenum src_a_func; // GL_BLEND_SRC_ALPHA
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 976d1f364..6dc2758c5 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -78,6 +78,26 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) {
return gl_mode;
}
+inline GLenum BlendEquation(Pica::Regs::BlendEquation equation) {
+ static const GLenum blend_equation_table[] = {
+ GL_FUNC_ADD, // BlendEquation::Add
+ GL_FUNC_SUBTRACT, // BlendEquation::Subtract
+ GL_FUNC_REVERSE_SUBTRACT, // BlendEquation::ReverseSubtract
+ GL_MIN, // BlendEquation::Min
+ GL_MAX, // BlendEquation::Max
+ };
+
+ // Range check table for input
+ if (static_cast<size_t>(equation) >= ARRAY_SIZE(blend_equation_table)) {
+ LOG_CRITICAL(Render_OpenGL, "Unknown blend equation %d", equation);
+ UNREACHABLE();
+
+ return GL_FUNC_ADD;
+ }
+
+ return blend_equation_table[(unsigned)equation];
+}
+
inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) {
static const GLenum blend_func_table[] = {
GL_ZERO, // BlendFactor::Zero
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 8f424a435..8410e0a64 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -450,7 +450,7 @@ static const char* GetType(GLenum type) {
#undef RET
}
-static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
+static void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, GLsizei length,
const GLchar* message, const void* user_param) {
Log::Level level;
switch (severity) {
diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp
index e93a9d92a..f565e2c91 100644
--- a/src/video_core/shader/shader.cpp
+++ b/src/video_core/shader/shader.cpp
@@ -30,6 +30,58 @@ namespace Pica {
namespace Shader {
+OutputVertex OutputRegisters::ToVertex(const Regs::ShaderConfig& config) {
+ // Setup output data
+ OutputVertex ret;
+ // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
+ // figure out what those circumstances are and enable the remaining outputs then.
+ unsigned index = 0;
+ for (unsigned i = 0; i < 7; ++i) {
+
+ if (index >= g_state.regs.vs_output_total)
+ break;
+
+ if ((config.output_mask & (1 << i)) == 0)
+ continue;
+
+ const auto& output_register_map = g_state.regs.vs_output_attributes[index];
+
+ u32 semantics[4] = {
+ output_register_map.map_x, output_register_map.map_y,
+ output_register_map.map_z, output_register_map.map_w
+ };
+
+ for (unsigned comp = 0; comp < 4; ++comp) {
+ float24* out = ((float24*)&ret) + semantics[comp];
+ if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
+ *out = value[i][comp];
+ } else {
+ // Zero output so that attributes which aren't output won't have denormals in them,
+ // which would slow us down later.
+ memset(out, 0, sizeof(*out));
+ }
+ }
+
+ index++;
+ }
+
+ // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
+ for (unsigned i = 0; i < 4; ++i) {
+ ret.color[i] = float24::FromFloat32(
+ std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
+ }
+
+ LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
+ "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
+ ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
+ ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
+ ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
+ ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
+ ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
+
+ return ret;
+}
+
#ifdef ARCHITECTURE_x86_64
static std::unordered_map<u64, std::unique_ptr<JitShader>> shader_map;
static const JitShader* jit_shader;
@@ -62,8 +114,9 @@ void ShaderSetup::Setup() {
MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240));
-OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
+void ShaderSetup::Run(UnitState<false>& state, const InputVertex& input, int num_attributes) {
auto& config = g_state.regs.vs;
+ auto& setup = g_state.vs;
MICROPROFILE_SCOPE(GPU_Shader);
@@ -81,62 +134,13 @@ OutputVertex ShaderSetup::Run(UnitState<false>& state, const InputVertex& input,
#ifdef ARCHITECTURE_x86_64
if (VideoCore::g_shader_jit_enabled)
- jit_shader->Run(&state.registers, g_state.regs.vs.main_offset);
+ jit_shader->Run(setup, state, config.main_offset);
else
- RunInterpreter(state);
+ RunInterpreter(setup, state, config.main_offset);
#else
- RunInterpreter(state);
+ RunInterpreter(setup, state, config.main_offset);
#endif // ARCHITECTURE_x86_64
- // Setup output data
- OutputVertex ret;
- // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
- // figure out what those circumstances are and enable the remaining outputs then.
- unsigned index = 0;
- for (unsigned i = 0; i < 7; ++i) {
-
- if (index >= g_state.regs.vs_output_total)
- break;
-
- if ((g_state.regs.vs.output_mask & (1 << i)) == 0)
- continue;
-
- const auto& output_register_map = g_state.regs.vs_output_attributes[index]; // TODO: Don't hardcode VS here
-
- u32 semantics[4] = {
- output_register_map.map_x, output_register_map.map_y,
- output_register_map.map_z, output_register_map.map_w
- };
-
- for (unsigned comp = 0; comp < 4; ++comp) {
- float24* out = ((float24*)&ret) + semantics[comp];
- if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
- *out = state.registers.output[i][comp];
- } else {
- // Zero output so that attributes which aren't output won't have denormals in them,
- // which would slow us down later.
- memset(out, 0, sizeof(*out));
- }
- }
-
- index++;
- }
-
- // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
- for (unsigned i = 0; i < 4; ++i) {
- ret.color[i] = float24::FromFloat32(
- std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
- }
-
- LOG_TRACE(HW_GPU, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
- "col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
- ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
- ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
- ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
- ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
- ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
-
- return ret;
}
DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) {
@@ -156,7 +160,7 @@ DebugData<true> ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_
state.conditional_code[0] = false;
state.conditional_code[1] = false;
- RunInterpreter(state);
+ RunInterpreter(setup, state, config.main_offset);
return state.debug;
}
diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h
index 983e4a967..fee16df62 100644
--- a/src/video_core/shader/shader.h
+++ b/src/video_core/shader/shader.h
@@ -84,6 +84,15 @@ struct OutputVertex {
static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD");
static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size");
+struct OutputRegisters {
+ OutputRegisters() = default;
+
+ alignas(16) Math::Vec4<float24> value[16];
+
+ OutputVertex ToVertex(const Regs::ShaderConfig& config);
+};
+static_assert(std::is_pod<OutputRegisters>::value, "Structure is not POD");
+
// Helper structure used to keep track of data useful for inspection of shader emulation
template<bool full_debugging>
struct DebugData;
@@ -267,11 +276,12 @@ struct UnitState {
// The registers are accessed by the shader JIT using SSE instructions, and are therefore
// required to be 16-byte aligned.
alignas(16) Math::Vec4<float24> input[16];
- alignas(16) Math::Vec4<float24> output[16];
alignas(16) Math::Vec4<float24> temporary[16];
} registers;
static_assert(std::is_pod<Registers>::value, "Structure is not POD");
+ OutputRegisters output_registers;
+
bool conditional_code[2];
// Two Address registers and one loop counter
@@ -283,10 +293,10 @@ struct UnitState {
static size_t InputOffset(const SourceRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Input:
- return offsetof(UnitState::Registers, input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.input) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
- return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
default:
UNREACHABLE();
@@ -297,10 +307,10 @@ struct UnitState {
static size_t OutputOffset(const DestRegister& reg) {
switch (reg.GetRegisterType()) {
case RegisterType::Output:
- return offsetof(UnitState::Registers, output) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, output_registers.value) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
case RegisterType::Temporary:
- return offsetof(UnitState::Registers, temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
+ return offsetof(UnitState, registers.temporary) + reg.GetIndex()*sizeof(Math::Vec4<float24>);
default:
UNREACHABLE();
@@ -323,6 +333,23 @@ struct ShaderSetup {
std::array<Math::Vec4<u8>, 4> i;
} uniforms;
+ static size_t UniformOffset(RegisterType type, unsigned index) {
+ switch (type) {
+ case RegisterType::FloatUniform:
+ return offsetof(ShaderSetup, uniforms.f) + index*sizeof(Math::Vec4<float24>);
+
+ case RegisterType::BoolUniform:
+ return offsetof(ShaderSetup, uniforms.b) + index*sizeof(bool);
+
+ case RegisterType::IntUniform:
+ return offsetof(ShaderSetup, uniforms.i) + index*sizeof(Math::Vec4<u8>);
+
+ default:
+ UNREACHABLE();
+ return 0;
+ }
+ }
+
std::array<u32, 1024> program_code;
std::array<u32, 1024> swizzle_data;
@@ -337,9 +364,8 @@ struct ShaderSetup {
* @param state Shader unit state, must be setup per shader and per shader unit
* @param input Input vertex into the shader
* @param num_attributes The number of vertex shader attributes
- * @return The output vertex, after having been processed by the vertex shader
*/
- OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
+ void Run(UnitState<false>& state, const InputVertex& input, int num_attributes);
/**
* Produce debug information based on the given shader and input vertex
diff --git a/src/video_core/shader/shader_interpreter.cpp b/src/video_core/shader/shader_interpreter.cpp
index 3a827d11f..b1eadc071 100644
--- a/src/video_core/shader/shader_interpreter.cpp
+++ b/src/video_core/shader/shader_interpreter.cpp
@@ -41,11 +41,11 @@ struct CallStackElement {
};
template<bool Debug>
-void RunInterpreter(UnitState<Debug>& state) {
+void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset) {
// TODO: Is there a maximal size for this?
boost::container::static_vector<CallStackElement, 16> call_stack;
- u32 program_counter = g_state.regs.vs.main_offset;
+ u32 program_counter = offset;
const auto& uniforms = g_state.vs.uniforms;
const auto& swizzle_data = g_state.vs.swizzle_data;
@@ -144,7 +144,7 @@ void RunInterpreter(UnitState<Debug>& state) {
src2[3] = src2[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.common.dest.Value() < 0x10) ? &state.registers.output[instr.common.dest.Value().GetIndex()][0]
+ float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers.value[instr.common.dest.Value().GetIndex()][0]
: (instr.common.dest.Value() < 0x20) ? &state.registers.temporary[instr.common.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
@@ -483,7 +483,7 @@ void RunInterpreter(UnitState<Debug>& state) {
src3[3] = src3[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.registers.output[instr.mad.dest.Value().GetIndex()][0]
+ float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers.value[instr.mad.dest.Value().GetIndex()][0]
: (instr.mad.dest.Value() < 0x20) ? &state.registers.temporary[instr.mad.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
@@ -647,8 +647,8 @@ void RunInterpreter(UnitState<Debug>& state) {
}
// Explicit instantiation
-template void RunInterpreter(UnitState<false>& state);
-template void RunInterpreter(UnitState<true>& state);
+template void RunInterpreter(const ShaderSetup& setup, UnitState<false>& state, unsigned offset);
+template void RunInterpreter(const ShaderSetup& setup, UnitState<true>& state, unsigned offset);
} // namespace
diff --git a/src/video_core/shader/shader_interpreter.h b/src/video_core/shader/shader_interpreter.h
index 6048cdf3a..bb3ce1c6e 100644
--- a/src/video_core/shader/shader_interpreter.h
+++ b/src/video_core/shader/shader_interpreter.h
@@ -11,7 +11,7 @@ namespace Shader {
template <bool Debug> struct UnitState;
template<bool Debug>
-void RunInterpreter(UnitState<Debug>& state);
+void RunInterpreter(const ShaderSetup& setup, UnitState<Debug>& state, unsigned offset);
} // namespace
diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp
index 99f6c51eb..43e7e6b4c 100644
--- a/src/video_core/shader/shader_jit_x64.cpp
+++ b/src/video_core/shader/shader_jit_x64.cpp
@@ -102,7 +102,7 @@ const JitFunction instr_table[64] = {
// purposes, as documented below:
/// Pointer to the uniform memory
-static const X64Reg UNIFORMS = R9;
+static const X64Reg SETUP = R9;
/// The two 32-bit VS address offset registers set by the MOVA instruction
static const X64Reg ADDROFFS_REG_0 = R10;
static const X64Reg ADDROFFS_REG_1 = R11;
@@ -117,7 +117,7 @@ static const X64Reg COND0 = R13;
/// Result of the previous CMP instruction for the Y-component comparison
static const X64Reg COND1 = R14;
/// Pointer to the UnitState instance for the current VS unit
-static const X64Reg REGISTERS = R15;
+static const X64Reg STATE = R15;
/// SIMD scratch register
static const X64Reg SCRATCH = XMM0;
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register
@@ -136,7 +136,7 @@ static const X64Reg NEGBIT = XMM15;
// State registers that must not be modified by external functions calls
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
static const BitSet32 persistent_regs = {
- UNIFORMS, REGISTERS, // Pointers to register blocks
+ SETUP, STATE, // Pointers to register blocks
ADDROFFS_REG_0, ADDROFFS_REG_1, LOOPCOUNT_REG, COND0, COND1, // Cached registers
ONE+16, NEGBIT+16, // Constants
};
@@ -177,10 +177,10 @@ void JitShader::Compile_SwizzleSrc(Instruction instr, unsigned src_num, SourceRe
size_t src_offset;
if (src_reg.GetRegisterType() == RegisterType::FloatUniform) {
- src_ptr = UNIFORMS;
- src_offset = src_reg.GetIndex() * sizeof(float24) * 4;
+ src_ptr = SETUP;
+ src_offset = ShaderSetup::UniformOffset(RegisterType::FloatUniform, src_reg.GetIndex());
} else {
- src_ptr = REGISTERS;
+ src_ptr = STATE;
src_offset = UnitState<false>::InputOffset(src_reg);
}
@@ -264,11 +264,11 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
// If all components are enabled, write the result to the destination register
if (swiz.dest_mask == NO_DEST_REG_MASK) {
// Store dest back to memory
- MOVAPS(MDisp(REGISTERS, dest_offset_disp), src);
+ MOVAPS(MDisp(STATE, dest_offset_disp), src);
} else {
// Not all components are enabled, so mask the result when storing to the destination register...
- MOVAPS(SCRATCH, MDisp(REGISTERS, dest_offset_disp));
+ MOVAPS(SCRATCH, MDisp(STATE, dest_offset_disp));
if (Common::GetCPUCaps().sse4_1) {
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
@@ -287,7 +287,7 @@ void JitShader::Compile_DestEnable(Instruction instr,X64Reg src) {
}
// Store dest back to memory
- MOVAPS(MDisp(REGISTERS, dest_offset_disp), SCRATCH);
+ MOVAPS(MDisp(STATE, dest_offset_disp), SCRATCH);
}
}
@@ -336,8 +336,8 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
}
void JitShader::Compile_UniformCondition(Instruction instr) {
- int offset = offsetof(decltype(g_state.vs.uniforms), b) + (instr.flow_control.bool_uniform_id * sizeof(bool));
- CMP(sizeof(bool) * 8, MDisp(UNIFORMS, offset), Imm8(0));
+ int offset = ShaderSetup::UniformOffset(RegisterType::BoolUniform, instr.flow_control.bool_uniform_id);
+ CMP(sizeof(bool) * 8, MDisp(SETUP, offset), Imm8(0));
}
BitSet32 JitShader::PersistentCallerSavedRegs() {
@@ -714,8 +714,8 @@ void JitShader::Compile_LOOP(Instruction instr) {
looping = true;
- int offset = offsetof(decltype(g_state.vs.uniforms), i) + (instr.flow_control.int_uniform_id * sizeof(Math::Vec4<u8>));
- MOV(32, R(LOOPCOUNT), MDisp(UNIFORMS, offset));
+ int offset = ShaderSetup::UniformOffset(RegisterType::IntUniform, instr.flow_control.int_uniform_id);
+ MOV(32, R(LOOPCOUNT), MDisp(SETUP, offset));
MOV(32, R(LOOPCOUNT_REG), R(LOOPCOUNT));
SHR(32, R(LOOPCOUNT_REG), Imm8(8));
AND(32, R(LOOPCOUNT_REG), Imm32(0xff)); // Y-component is the start
@@ -826,8 +826,8 @@ void JitShader::Compile() {
// The stack pointer is 8 modulo 16 at the entry of a procedure
ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8);
- MOV(PTRBITS, R(REGISTERS), R(ABI_PARAM1));
- MOV(PTRBITS, R(UNIFORMS), ImmPtr(&g_state.vs.uniforms));
+ MOV(PTRBITS, R(SETUP), R(ABI_PARAM1));
+ MOV(PTRBITS, R(STATE), R(ABI_PARAM2));
// Zero address/loop registers
XOR(64, R(ADDROFFS_REG_0), R(ADDROFFS_REG_0));
@@ -845,7 +845,7 @@ void JitShader::Compile() {
MOVAPS(NEGBIT, MatR(RAX));
// Jump to start of the shader program
- JMPptr(R(ABI_PARAM2));
+ JMPptr(R(ABI_PARAM3));
// Compile entire program
Compile_Block(static_cast<unsigned>(g_state.vs.program_code.size()));
diff --git a/src/video_core/shader/shader_jit_x64.h b/src/video_core/shader/shader_jit_x64.h
index 30aa7ff30..5468459d4 100644
--- a/src/video_core/shader/shader_jit_x64.h
+++ b/src/video_core/shader/shader_jit_x64.h
@@ -36,8 +36,8 @@ class JitShader : public Gen::XCodeBlock {
public:
JitShader();
- void Run(void* registers, unsigned offset) const {
- program(registers, code_ptr[offset]);
+ void Run(const ShaderSetup& setup, UnitState<false>& state, unsigned offset) const {
+ program(&setup, &state, code_ptr[offset]);
}
void Compile();
@@ -117,7 +117,7 @@ private:
/// Branches that need to be fixed up once the entire shader program is compiled
std::vector<std::pair<Gen::FixupBranch, unsigned>> fixup_branches;
- using CompiledShader = void(void* registers, const u8* start_addr);
+ using CompiledShader = void(const void* setup, void* state, const u8* start_addr);
CompiledShader* program = nullptr;
};
diff --git a/src/video_core/vertex_loader.cpp b/src/video_core/vertex_loader.cpp
index 83896814f..e40f0f1ee 100644
--- a/src/video_core/vertex_loader.cpp
+++ b/src/video_core/vertex_loader.cpp
@@ -2,8 +2,8 @@
#include <boost/range/algorithm/fill.hpp>
-#include "common/assert.h"
#include "common/alignment.h"
+#include "common/assert.h"
#include "common/bit_field.h"
#include "common/common_types.h"
#include "common/logging/log.h"
@@ -21,6 +21,8 @@
namespace Pica {
void VertexLoader::Setup(const Pica::Regs& regs) {
+ ASSERT_MSG(!is_setup, "VertexLoader is not intended to be setup more than once.");
+
const auto& attribute_config = regs.vertex_attributes;
num_total_attributes = attribute_config.GetNumTotalAttributes();
@@ -60,9 +62,13 @@ void VertexLoader::Setup(const Pica::Regs& regs) {
}
}
}
+
+ is_setup = true;
}
void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses) {
+ ASSERT_MSG(is_setup, "A VertexLoader needs to be setup before loading vertices.");
+
for (int i = 0; i < num_total_attributes; ++i) {
if (vertex_attribute_elements[i] != 0) {
// Load per-vertex data from the loader arrays
diff --git a/src/video_core/vertex_loader.h b/src/video_core/vertex_loader.h
index becf5a403..ac162c254 100644
--- a/src/video_core/vertex_loader.h
+++ b/src/video_core/vertex_loader.h
@@ -1,7 +1,8 @@
#pragma once
-#include "common/common_types.h"
+#include <array>
+#include "common/common_types.h"
#include "video_core/pica.h"
namespace Pica {
@@ -11,23 +12,29 @@ class MemoryAccessTracker;
}
namespace Shader {
-class InputVertex;
+struct InputVertex;
}
class VertexLoader {
public:
+ VertexLoader() = default;
+ explicit VertexLoader(const Pica::Regs& regs) {
+ Setup(regs);
+ }
+
void Setup(const Pica::Regs& regs);
void LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, DebugUtils::MemoryAccessTracker& memory_accesses);
int GetNumTotalAttributes() const { return num_total_attributes; }
private:
- u32 vertex_attribute_sources[16];
- u32 vertex_attribute_strides[16] = {};
- Regs::VertexAttributeFormat vertex_attribute_formats[16] = {};
- u32 vertex_attribute_elements[16] = {};
- bool vertex_attribute_is_default[16];
- int num_total_attributes;
+ std::array<u32, 16> vertex_attribute_sources;
+ std::array<u32, 16> vertex_attribute_strides{};
+ std::array<Regs::VertexAttributeFormat, 16> vertex_attribute_formats;
+ std::array<u32, 16> vertex_attribute_elements{};
+ std::array<bool, 16> vertex_attribute_is_default;
+ int num_total_attributes = 0;
+ bool is_setup = false;
};
} // namespace Pica