aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp14
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp2
-rw-r--r--src/video_core/pica.h77
-rw-r--r--src/video_core/pica_state.h16
-rw-r--r--src/video_core/rasterizer.cpp96
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp102
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h38
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp58
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp8
-rw-r--r--src/video_core/renderer_opengl/gl_state.h4
-rw-r--r--src/video_core/renderer_opengl/pica_to_gl.h1
11 files changed, 365 insertions, 51 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 19e03adf4..689859049 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -423,6 +423,20 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
break;
}
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef):
+ {
+ g_state.fog.lut[regs.fog_lut_offset % 128].raw = value;
+ regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1);
+ break;
+ }
+
default:
break;
}
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index 871368323..bfa686380 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -328,7 +328,7 @@ std::unique_ptr<PicaTrace> FinishPicaTracing()
std::lock_guard<std::mutex> lock(pica_trace_mutex);
std::unique_ptr<PicaTrace> ret(std::move(pica_trace));
- return std::move(ret);
+ return ret;
}
const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) {
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index 544ea037f..7099c31a0 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -115,7 +115,28 @@ struct Regs {
BitField<24, 5, Semantic> map_w;
} vs_output_attributes[7];
- INSERT_PADDING_WORDS(0x11);
+ INSERT_PADDING_WORDS(0xe);
+
+ enum class ScissorMode : u32 {
+ Disabled = 0,
+ Exclude = 1, // Exclude pixels inside the scissor box
+
+ Include = 3 // Exclude pixels outside the scissor box
+ };
+
+ struct {
+ BitField<0, 2, ScissorMode> mode;
+
+ union {
+ BitField< 0, 16, u32> x1;
+ BitField<16, 16, u32> y1;
+ };
+
+ union {
+ BitField< 0, 16, u32> x2;
+ BitField<16, 16, u32> y2;
+ };
+ } scissor_test;
union {
BitField< 0, 10, s32> x;
@@ -401,22 +422,47 @@ struct Regs {
TevStageConfig tev_stage3;
INSERT_PADDING_WORDS(0x3);
+ enum class FogMode : u32 {
+ None = 0,
+ Fog = 5,
+ Gas = 7,
+ };
+
union {
- // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
- // these masks are set
- BitField< 8, 4, u32> update_mask_rgb;
- BitField<12, 4, u32> update_mask_a;
+ BitField<0, 3, FogMode> fog_mode;
+ BitField<16, 1, u32> fog_flip;
- bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
- return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
- }
+ union {
+ // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
+ // these masks are set
+ BitField< 8, 4, u32> update_mask_rgb;
+ BitField<12, 4, u32> update_mask_a;
- bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
- return (stage_index < 4) && (update_mask_a & (1 << stage_index));
- }
- } tev_combiner_buffer_input;
+ bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
+ return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
+ }
+
+ bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
+ return (stage_index < 4) && (update_mask_a & (1 << stage_index));
+ }
+ } tev_combiner_buffer_input;
+ };
+
+ union {
+ u32 raw;
+ BitField< 0, 8, u32> r;
+ BitField< 8, 8, u32> g;
+ BitField<16, 8, u32> b;
+ } fog_color;
+
+ INSERT_PADDING_WORDS(0x4);
+
+ BitField<0, 16, u32> fog_lut_offset;
+
+ INSERT_PADDING_WORDS(0x1);
+
+ u32 fog_lut_data[8];
- INSERT_PADDING_WORDS(0xf);
TevStageConfig tev_stage4;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage5;
@@ -1303,6 +1349,7 @@ ASSERT_REG_POSITION(viewport_depth_range, 0x4d);
ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e);
ASSERT_REG_POSITION(vs_output_attributes[0], 0x50);
ASSERT_REG_POSITION(vs_output_attributes[1], 0x51);
+ASSERT_REG_POSITION(scissor_test, 0x65);
ASSERT_REG_POSITION(viewport_corner, 0x68);
ASSERT_REG_POSITION(depthmap_enable, 0x6D);
ASSERT_REG_POSITION(texture0_enable, 0x80);
@@ -1318,6 +1365,10 @@ ASSERT_REG_POSITION(tev_stage1, 0xc8);
ASSERT_REG_POSITION(tev_stage2, 0xd0);
ASSERT_REG_POSITION(tev_stage3, 0xd8);
ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);
+ASSERT_REG_POSITION(fog_mode, 0xe0);
+ASSERT_REG_POSITION(fog_color, 0xe1);
+ASSERT_REG_POSITION(fog_lut_offset, 0xe6);
+ASSERT_REG_POSITION(fog_lut_data, 0xe8);
ASSERT_REG_POSITION(tev_stage4, 0xf0);
ASSERT_REG_POSITION(tev_stage5, 0xf8);
ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 495174c25..01f4285a8 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -33,10 +33,10 @@ struct State {
u32 raw;
// LUT value, encoded as 12-bit fixed point, with 12 fraction bits
- BitField< 0, 12, u32> value;
+ BitField< 0, 12, u32> value; // 0.0.12 fixed point
// Used by HW for efficient interpolation, Citra does not use these
- BitField<12, 12, u32> difference;
+ BitField<12, 12, s32> difference; // 1.0.11 fixed point
float ToFloat() {
return static_cast<float>(value) / 4095.f;
@@ -46,6 +46,18 @@ struct State {
std::array<std::array<LutEntry, 256>, 24> luts;
} lighting;
+ struct {
+ union LutEntry {
+ // Used for raw access
+ u32 raw;
+
+ BitField< 0, 13, s32> difference; // 1.1.11 fixed point
+ BitField<13, 11, u32> value; // 0.0.11 fixed point
+ };
+
+ std::array<LutEntry, 128> lut;
+ } fog;
+
/// Current Pica command list
struct {
const u32* head_ptr;
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 65168f05a..6f369a00e 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -338,12 +338,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
return;
}
- // TODO: Proper scissor rect test!
u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x});
u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y});
+ // Convert the scissor box coordinates to 12.4 fixed point
+ u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4);
+ u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4);
+ // x2,y2 have +1 added to cover the entire sub-pixel area
+ u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4);
+ u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4);
+
+ if (regs.scissor_test.mode == Regs::ScissorMode::Include) {
+ // Calculate the new bounds
+ min_x = std::max(min_x, scissor_x1);
+ min_y = std::max(min_y, scissor_y1);
+ max_x = std::min(max_x, scissor_x2);
+ max_y = std::min(max_y, scissor_y2);
+ }
+
min_x &= Fix12P4::IntMask();
min_y &= Fix12P4::IntMask();
max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask());
@@ -383,6 +397,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
for (u16 x = min_x + 8; x < max_x; x += 0x10) {
+ // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude
+ if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) {
+ if (x >= scissor_x1 && x < scissor_x2 &&
+ y >= scissor_y1 && y < scissor_y2)
+ continue;
+ }
+
// Calculate the barycentric coordinates w0, w1 and w2
int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y});
int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y});
@@ -398,6 +419,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
float24::FromFloat32(static_cast<float>(w2)));
float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates);
+ // interpolated_z = z / w
+ float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 +
+ v1.screenpos[2].ToFloat32() * w1 +
+ v2.screenpos[2].ToFloat32() * w2) / wsum;
+
+ // Not fully accurate. About 3 bits in precision are missing.
+ // Z-Buffer (z / w * scale + offset)
+ float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32();
+ float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32();
+ float depth = interpolated_z_over_w * depth_scale + depth_offset;
+
+ // Potentially switch to W-Buffer
+ if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
+ // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
+ depth *= interpolated_w_inverse.ToFloat32() * wsum;
+ }
+
+ // Clamp the result
+ depth = MathUtil::Clamp(depth, 0.0f, 1.0f);
+
// Perspective correct attribute interpolation:
// Attribute values cannot be calculated by simple linear interpolation since
// they are not linear in screen space. For example, when interpolating a
@@ -833,6 +874,38 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
continue;
}
+ // Apply fog combiner
+ // Not fully accurate. We'd have to know what data type is used to
+ // store the depth etc. Using float for now until we know more
+ // about Pica datatypes
+ if (regs.fog_mode == Regs::FogMode::Fog) {
+ const Math::Vec3<u8> fog_color = {
+ static_cast<u8>(regs.fog_color.r.Value()),
+ static_cast<u8>(regs.fog_color.g.Value()),
+ static_cast<u8>(regs.fog_color.b.Value()),
+ };
+
+ // Get index into fog LUT
+ float fog_index;
+ if (g_state.regs.fog_flip) {
+ fog_index = (1.0f - depth) * 128.0f;
+ } else {
+ fog_index = depth * 128.0f;
+ }
+
+ // Generate clamped fog factor from LUT for given fog index
+ float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f);
+ float fog_f = fog_index - fog_i;
+ const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)];
+ float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11
+ fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f);
+
+ // Blend the fog
+ for (unsigned i = 0; i < 3; i++) {
+ combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i];
+ }
+ }
+
u8 old_stencil = 0;
auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) {
@@ -887,27 +960,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0,
}
}
- // interpolated_z = z / w
- float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 +
- v1.screenpos[2].ToFloat32() * w1 +
- v2.screenpos[2].ToFloat32() * w2) / wsum;
-
- // Not fully accurate. About 3 bits in precision are missing.
- // Z-Buffer (z / w * scale + offset)
- float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32();
- float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32();
- float depth = interpolated_z_over_w * depth_scale + depth_offset;
-
- // Potentially switch to W-Buffer
- if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
-
- // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w)
- depth *= interpolated_w_inverse.ToFloat32() * wsum;
- }
-
- // Clamp the result
- depth = MathUtil::Clamp(depth, 0.0f, 1.0f);
-
// Convert float to integer
unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
u32 z = (u32)(depth * ((1 << num_bits) - 1));
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 931c34a37..f8393c618 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -62,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
uniform_block_data.lut_dirty[index] = true;
}
+ uniform_block_data.fog_lut_dirty = true;
+
// Set vertex attributes
glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position));
glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION);
@@ -102,6 +104,18 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
}
+ // Setup the LUT for the fog
+ {
+ fog_lut.Create();
+ state.fog_lut.texture_1d = fog_lut.handle;
+ }
+ state.Apply();
+
+ glActiveTexture(GL_TEXTURE9);
+ glTexImage1D(GL_TEXTURE_1D, 0, GL_R32UI, 128, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr);
+ glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+
// Sync fixed function OpenGL state
SyncCullMode();
SyncBlendEnabled();
@@ -182,6 +196,14 @@ void RasterizerOpenGL::DrawTriangles() {
(GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height),
(GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height));
+ if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width ||
+ uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) {
+
+ uniform_block_data.data.framebuffer_scale[0] = color_surface->res_scale_width;
+ uniform_block_data.data.framebuffer_scale[1] = color_surface->res_scale_height;
+ uniform_block_data.dirty = true;
+ }
+
// Sync and bind the texture surfaces
const auto pica_textures = regs.GetTextures();
for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) {
@@ -215,6 +237,12 @@ void RasterizerOpenGL::DrawTriangles() {
}
}
+ // Sync the fog lut
+ if (uniform_block_data.fog_lut_dirty) {
+ SyncFogLUT();
+ uniform_block_data.fog_lut_dirty = false;
+ }
+
// Sync the uniform data
if (uniform_block_data.dirty) {
glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
@@ -280,6 +308,21 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
SyncBlendColor();
break;
+ // Fog state
+ case PICA_REG_INDEX(fog_color):
+ SyncFogColor();
+ break;
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee):
+ case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef):
+ uniform_block_data.fog_lut_dirty = true;
+ break;
+
// Alpha test
case PICA_REG_INDEX(output_merger.alpha_test):
SyncAlphaTest();
@@ -318,6 +361,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
SyncColorWriteMask();
break;
+ // Scissor test
+ case PICA_REG_INDEX(scissor_test.mode):
+ shader_dirty = true;
+ break;
+ case PICA_REG_INDEX(scissor_test.x1): // and y1
+ case PICA_REG_INDEX(scissor_test.x2): // and y2
+ SyncScissorTest();
+ break;
+
// Logic op
case PICA_REG_INDEX(output_merger.logic_op):
SyncLogicOp();
@@ -329,6 +381,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
break;
// TEV stages
+ // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input)
case PICA_REG_INDEX(tev_stage0.color_source1):
case PICA_REG_INDEX(tev_stage0.color_modifier1):
case PICA_REG_INDEX(tev_stage0.color_op):
@@ -950,9 +1003,15 @@ void RasterizerOpenGL::SetShader() {
uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); }
+ GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
+ if (uniform_fog_lut != -1) { glUniform1i(uniform_fog_lut, 9); }
+
current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
+ GLint block_size;
+ glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, GL_UNIFORM_BLOCK_DATA_SIZE, &block_size);
+ ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!");
glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
// Update uniforms
@@ -960,6 +1019,7 @@ void RasterizerOpenGL::SetShader() {
SyncDepthOffset();
SyncAlphaTest();
SyncCombinerColor();
+ SyncScissorTest();
auto& tev_stages = Pica::g_state.regs.GetTevStages();
for (int index = 0; index < tev_stages.size(); ++index)
SyncTevConstColor(index, tev_stages[index]);
@@ -974,6 +1034,8 @@ void RasterizerOpenGL::SetShader() {
SyncLightDistanceAttenuationBias(light_index);
SyncLightDistanceAttenuationScale(light_index);
}
+
+ SyncFogColor();
}
}
@@ -1040,6 +1102,30 @@ void RasterizerOpenGL::SyncBlendColor() {
state.blend.color.alpha = blend_color[3];
}
+void RasterizerOpenGL::SyncFogColor() {
+ const auto& regs = Pica::g_state.regs;
+ uniform_block_data.data.fog_color = {
+ regs.fog_color.r.Value() / 255.0f,
+ regs.fog_color.g.Value() / 255.0f,
+ regs.fog_color.b.Value() / 255.0f
+ };
+ uniform_block_data.dirty = true;
+}
+
+void RasterizerOpenGL::SyncFogLUT() {
+ std::array<GLuint, 128> new_data;
+
+ std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), [](const auto& entry) {
+ return entry.raw;
+ });
+
+ if (new_data != fog_lut_data) {
+ fog_lut_data = new_data;
+ glActiveTexture(GL_TEXTURE9);
+ glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, fog_lut_data.data());
+ }
+}
+
void RasterizerOpenGL::SyncAlphaTest() {
const auto& regs = Pica::g_state.regs;
if (regs.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) {
@@ -1098,6 +1184,22 @@ void RasterizerOpenGL::SyncDepthTest() {
PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS;
}
+void RasterizerOpenGL::SyncScissorTest() {
+ const auto& regs = Pica::g_state.regs;
+
+ if (uniform_block_data.data.scissor_x1 != regs.scissor_test.x1 ||
+ uniform_block_data.data.scissor_y1 != regs.scissor_test.y1 ||
+ uniform_block_data.data.scissor_x2 != regs.scissor_test.x2 ||
+ uniform_block_data.data.scissor_y2 != regs.scissor_test.y2) {
+
+ uniform_block_data.data.scissor_x1 = regs.scissor_test.x1;
+ uniform_block_data.data.scissor_y1 = regs.scissor_test.y1;
+ uniform_block_data.data.scissor_x2 = regs.scissor_test.x2;
+ uniform_block_data.data.scissor_y2 = regs.scissor_test.y2;
+ uniform_block_data.dirty = true;
+ }
+}
+
void RasterizerOpenGL::SyncCombinerColor() {
auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw);
if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index bb7f20161..c5029432b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -56,6 +56,8 @@ union PicaShaderConfig {
const auto& regs = Pica::g_state.regs;
+ state.scissor_test_mode = regs.scissor_test.mode;
+
state.depthmap_enable = regs.depthmap_enable;
state.alpha_test_func = regs.output_merger.alpha_test.enable ?
@@ -76,6 +78,9 @@ union PicaShaderConfig {
state.tev_stages[i].scales_raw = tev_stage.scales_raw;
}
+ state.fog_mode = regs.fog_mode;
+ state.fog_flip = regs.fog_flip;
+
state.combiner_buffer_input =
regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
@@ -168,13 +173,15 @@ union PicaShaderConfig {
};
struct State {
-
Pica::Regs::CompareFunc alpha_test_func;
+ Pica::Regs::ScissorMode scissor_test_mode;
Pica::Regs::TextureConfig::TextureType texture0_type;
std::array<TevStageConfigRaw, 6> tev_stages;
u8 combiner_buffer_input;
Pica::Regs::DepthBuffering depthmap_enable;
+ Pica::Regs::FogMode fog_mode;
+ bool fog_flip;
struct {
struct {
@@ -316,19 +323,27 @@ private:
GLfloat dist_atten_scale;
};
- /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
+ /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
+ // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at
+ // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
+ // Not following that rule will cause problems on some AMD drivers.
struct UniformData {
- // A vec4 color for each of the six tev stages
- GLvec4 const_color[6];
- GLvec4 tev_combiner_buffer_color;
+ alignas(8) GLvec2 framebuffer_scale;
GLint alphatest_ref;
GLfloat depth_scale;
GLfloat depth_offset;
+ GLint scissor_x1;
+ GLint scissor_y1;
+ GLint scissor_x2;
+ GLint scissor_y2;
+ alignas(16) GLvec3 fog_color;
alignas(16) GLvec3 lighting_global_ambient;
LightSrc light_src[8];
+ alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages
+ alignas(16) GLvec4 tev_combiner_buffer_color;
};
- static_assert(sizeof(UniformData) == 0x390, "The size of the UniformData structure has changed, update the structure in the shader");
+ static_assert(sizeof(UniformData) == 0x3C0, "The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
/// Sets the OpenGL shader in accordance with the current PICA register state
@@ -352,6 +367,10 @@ private:
/// Syncs the blend color to match the PICA register
void SyncBlendColor();
+ /// Syncs the fog states to match the PICA register
+ void SyncFogColor();
+ void SyncFogLUT();
+
/// Syncs the alpha test states to match the PICA register
void SyncAlphaTest();
@@ -373,6 +392,9 @@ private:
/// Syncs the depth test states to match the PICA register
void SyncDepthTest();
+ /// Syncs the scissor test state to match the PICA register
+ void SyncScissorTest();
+
/// Syncs the TEV combiner color buffer to match the PICA register
void SyncCombinerColor();
@@ -419,6 +441,7 @@ private:
struct {
UniformData data;
bool lut_dirty[6];
+ bool fog_lut_dirty;
bool dirty;
} uniform_block_data = {};
@@ -430,4 +453,7 @@ private:
std::array<OGLTexture, 6> lighting_luts;
std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
+
+ OGLTexture fog_lut;
+ std::array<GLuint, 128> fog_lut_data{};
};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index 8332e722d..36513dedc 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -539,6 +539,8 @@ in float texcoord0_w;
in vec4 normquat;
in vec3 view;
+in vec4 gl_FragCoord;
+
out vec4 color;
struct LightSrc {
@@ -552,17 +554,24 @@ struct LightSrc {
};
layout (std140) uniform shader_data {
- vec4 const_color[NUM_TEV_STAGES];
- vec4 tev_combiner_buffer_color;
+ vec2 framebuffer_scale;
int alphatest_ref;
float depth_scale;
float depth_offset;
+ int scissor_x1;
+ int scissor_y1;
+ int scissor_x2;
+ int scissor_y2;
+ vec3 fog_color;
vec3 lighting_global_ambient;
LightSrc light_src[NUM_LIGHTS];
+ vec4 const_color[NUM_TEV_STAGES];
+ vec4 tev_combiner_buffer_color;
};
uniform sampler2D tex[3];
uniform sampler1D lut[6];
+uniform usampler1D fog_lut;
// Rotate the vector v by the quaternion q
vec3 quaternion_rotate(vec4 q, vec3 v) {
@@ -580,6 +589,25 @@ vec4 secondary_fragment_color = vec4(0.0);
return out;
}
+ // Append the scissor test
+ if (state.scissor_test_mode != Regs::ScissorMode::Disabled) {
+ out += "if (";
+ // Negate the condition if we have to keep only the pixels outside the scissor box
+ if (state.scissor_test_mode == Regs::ScissorMode::Include)
+ out += "!";
+ // x2,y2 have +1 added to cover the entire pixel area
+ out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && "
+ "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && "
+ "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && "
+ "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n";
+ }
+
+ out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
+ out += "float depth = z_over_w * depth_scale + depth_offset;\n";
+ if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
+ out += "depth /= gl_FragCoord.w;\n";
+ }
+
if (state.lighting.enable)
WriteLighting(out, config);
@@ -596,14 +624,30 @@ vec4 secondary_fragment_color = vec4(0.0);
out += ") discard;\n";
}
- out += "color = last_tex_env_out;\n";
+ // Append fog combiner
+ if (state.fog_mode == Regs::FogMode::Fog) {
+ // Get index into fog LUT
+ if (state.fog_flip) {
+ out += "float fog_index = (1.0 - depth) * 128.0;\n";
+ } else {
+ out += "float fog_index = depth * 128.0;\n";
+ }
- out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
- out += "float depth = z_over_w * depth_scale + depth_offset;\n";
- if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) {
- out += "depth /= gl_FragCoord.w;\n";
+ // Generate clamped fog factor from LUT for given fog index
+ out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n";
+ out += "float fog_f = fog_index - fog_i;\n";
+ out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n";
+ out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> 19);\n"; // Extract signed difference
+ out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n";
+ out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / 2047.0;\n";
+ out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
+
+ // Blend the fog
+ out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n";
}
+
out += "gl_FragDepth = depth;\n";
+ out += "color = last_tex_env_out;\n";
out += "}";
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index fa141fc9a..13ee986b9 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -58,6 +58,8 @@ OpenGLState::OpenGLState() {
lut.texture_1d = 0;
}
+ fog_lut.texture_1d = 0;
+
draw.read_framebuffer = 0;
draw.draw_framebuffer = 0;
draw.vertex_array = 0;
@@ -195,6 +197,12 @@ void OpenGLState::Apply() const {
}
}
+ // Fog LUT
+ if (fog_lut.texture_1d != cur_state.fog_lut.texture_1d) {
+ glActiveTexture(GL_TEXTURE9);
+ glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d);
+ }
+
// Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 228727054..13c71b0a6 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -68,6 +68,10 @@ public:
} lighting_luts[6];
struct {
+ GLuint texture_1d; // GL_TEXTURE_BINDING_1D
+ } fog_lut;
+
+ struct {
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 6dc2758c5..d9b9c9cc2 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -17,6 +17,7 @@
#include "video_core/pica.h"
+using GLvec2 = std::array<GLfloat, 2>;
using GLvec3 = std::array<GLfloat, 3>;
using GLvec4 = std::array<GLfloat, 4>;