diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/command_processor.cpp | 14 | ||||
| -rw-r--r-- | src/video_core/debug_utils/debug_utils.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/pica.h | 77 | ||||
| -rw-r--r-- | src/video_core/pica_state.h | 16 | ||||
| -rw-r--r-- | src/video_core/rasterizer.cpp | 96 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 102 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 38 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 58 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 4 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 1 |
11 files changed, 365 insertions, 51 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 19e03adf4..689859049 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -423,6 +423,20 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { break; } + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): + { + g_state.fog.lut[regs.fog_lut_offset % 128].raw = value; + regs.fog_lut_offset.Assign(regs.fog_lut_offset + 1); + break; + } + default: break; } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 871368323..bfa686380 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -328,7 +328,7 @@ std::unique_ptr<PicaTrace> FinishPicaTracing() std::lock_guard<std::mutex> lock(pica_trace_mutex); std::unique_ptr<PicaTrace> ret(std::move(pica_trace)); - return std::move(ret); + return ret; } const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const TextureInfo& info, bool disable_alpha) { diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 544ea037f..7099c31a0 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -115,7 +115,28 @@ struct Regs { BitField<24, 5, Semantic> map_w; } vs_output_attributes[7]; - INSERT_PADDING_WORDS(0x11); + INSERT_PADDING_WORDS(0xe); + + enum class ScissorMode : u32 { + Disabled = 0, + Exclude = 1, // Exclude pixels inside the scissor box + + Include = 3 // Exclude pixels outside the scissor box + }; + + struct { + BitField<0, 2, ScissorMode> mode; + + union { + BitField< 0, 16, u32> x1; + BitField<16, 16, u32> y1; + }; + + union { + BitField< 0, 16, u32> x2; + BitField<16, 16, u32> y2; + }; + } scissor_test; union { BitField< 0, 10, s32> x; @@ -401,22 +422,47 @@ struct Regs { TevStageConfig tev_stage3; INSERT_PADDING_WORDS(0x3); + enum class FogMode : u32 { + None = 0, + Fog = 5, + Gas = 7, + }; + union { - // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in - // these masks are set - BitField< 8, 4, u32> update_mask_rgb; - BitField<12, 4, u32> update_mask_a; + BitField<0, 3, FogMode> fog_mode; + BitField<16, 1, u32> fog_flip; - bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { - return (stage_index < 4) && (update_mask_rgb & (1 << stage_index)); - } + union { + // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in + // these masks are set + BitField< 8, 4, u32> update_mask_rgb; + BitField<12, 4, u32> update_mask_a; - bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { - return (stage_index < 4) && (update_mask_a & (1 << stage_index)); - } - } tev_combiner_buffer_input; + bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { + return (stage_index < 4) && (update_mask_rgb & (1 << stage_index)); + } + + bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { + return (stage_index < 4) && (update_mask_a & (1 << stage_index)); + } + } tev_combiner_buffer_input; + }; + + union { + u32 raw; + BitField< 0, 8, u32> r; + BitField< 8, 8, u32> g; + BitField<16, 8, u32> b; + } fog_color; + + INSERT_PADDING_WORDS(0x4); + + BitField<0, 16, u32> fog_lut_offset; + + INSERT_PADDING_WORDS(0x1); + + u32 fog_lut_data[8]; - INSERT_PADDING_WORDS(0xf); TevStageConfig tev_stage4; INSERT_PADDING_WORDS(0x3); TevStageConfig tev_stage5; @@ -1303,6 +1349,7 @@ ASSERT_REG_POSITION(viewport_depth_range, 0x4d); ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); +ASSERT_REG_POSITION(scissor_test, 0x65); ASSERT_REG_POSITION(viewport_corner, 0x68); ASSERT_REG_POSITION(depthmap_enable, 0x6D); ASSERT_REG_POSITION(texture0_enable, 0x80); @@ -1318,6 +1365,10 @@ ASSERT_REG_POSITION(tev_stage1, 0xc8); ASSERT_REG_POSITION(tev_stage2, 0xd0); ASSERT_REG_POSITION(tev_stage3, 0xd8); ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0); +ASSERT_REG_POSITION(fog_mode, 0xe0); +ASSERT_REG_POSITION(fog_color, 0xe1); +ASSERT_REG_POSITION(fog_lut_offset, 0xe6); +ASSERT_REG_POSITION(fog_lut_data, 0xe8); ASSERT_REG_POSITION(tev_stage4, 0xf0); ASSERT_REG_POSITION(tev_stage5, 0xf8); ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd); diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index 495174c25..01f4285a8 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -33,10 +33,10 @@ struct State { u32 raw; // LUT value, encoded as 12-bit fixed point, with 12 fraction bits - BitField< 0, 12, u32> value; + BitField< 0, 12, u32> value; // 0.0.12 fixed point // Used by HW for efficient interpolation, Citra does not use these - BitField<12, 12, u32> difference; + BitField<12, 12, s32> difference; // 1.0.11 fixed point float ToFloat() { return static_cast<float>(value) / 4095.f; @@ -46,6 +46,18 @@ struct State { std::array<std::array<LutEntry, 256>, 24> luts; } lighting; + struct { + union LutEntry { + // Used for raw access + u32 raw; + + BitField< 0, 13, s32> difference; // 1.1.11 fixed point + BitField<13, 11, u32> value; // 0.0.11 fixed point + }; + + std::array<LutEntry, 128> lut; + } fog; + /// Current Pica command list struct { const u32* head_ptr; diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 65168f05a..6f369a00e 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -338,12 +338,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, return; } - // TODO: Proper scissor rect test! u16 min_x = std::min({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); u16 min_y = std::min({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); u16 max_x = std::max({vtxpos[0].x, vtxpos[1].x, vtxpos[2].x}); u16 max_y = std::max({vtxpos[0].y, vtxpos[1].y, vtxpos[2].y}); + // Convert the scissor box coordinates to 12.4 fixed point + u16 scissor_x1 = (u16)( regs.scissor_test.x1 << 4); + u16 scissor_y1 = (u16)( regs.scissor_test.y1 << 4); + // x2,y2 have +1 added to cover the entire sub-pixel area + u16 scissor_x2 = (u16)((regs.scissor_test.x2 + 1) << 4); + u16 scissor_y2 = (u16)((regs.scissor_test.y2 + 1) << 4); + + if (regs.scissor_test.mode == Regs::ScissorMode::Include) { + // Calculate the new bounds + min_x = std::max(min_x, scissor_x1); + min_y = std::max(min_y, scissor_y1); + max_x = std::min(max_x, scissor_x2); + max_y = std::min(max_y, scissor_y2); + } + min_x &= Fix12P4::IntMask(); min_y &= Fix12P4::IntMask(); max_x = ((max_x + Fix12P4::FracMask()) & Fix12P4::IntMask()); @@ -383,6 +397,13 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, for (u16 y = min_y + 8; y < max_y; y += 0x10) { for (u16 x = min_x + 8; x < max_x; x += 0x10) { + // Do not process the pixel if it's inside the scissor box and the scissor mode is set to Exclude + if (regs.scissor_test.mode == Regs::ScissorMode::Exclude) { + if (x >= scissor_x1 && x < scissor_x2 && + y >= scissor_y1 && y < scissor_y2) + continue; + } + // Calculate the barycentric coordinates w0, w1 and w2 int w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); int w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); @@ -398,6 +419,26 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, float24::FromFloat32(static_cast<float>(w2))); float24 interpolated_w_inverse = float24::FromFloat32(1.0f) / Math::Dot(w_inverse, baricentric_coordinates); + // interpolated_z = z / w + float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) / wsum; + + // Not fully accurate. About 3 bits in precision are missing. + // Z-Buffer (z / w * scale + offset) + float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32(); + float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32(); + float depth = interpolated_z_over_w * depth_scale + depth_offset; + + // Potentially switch to W-Buffer + if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) + depth *= interpolated_w_inverse.ToFloat32() * wsum; + } + + // Clamp the result + depth = MathUtil::Clamp(depth, 0.0f, 1.0f); + // Perspective correct attribute interpolation: // Attribute values cannot be calculated by simple linear interpolation since // they are not linear in screen space. For example, when interpolating a @@ -833,6 +874,38 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, continue; } + // Apply fog combiner + // Not fully accurate. We'd have to know what data type is used to + // store the depth etc. Using float for now until we know more + // about Pica datatypes + if (regs.fog_mode == Regs::FogMode::Fog) { + const Math::Vec3<u8> fog_color = { + static_cast<u8>(regs.fog_color.r.Value()), + static_cast<u8>(regs.fog_color.g.Value()), + static_cast<u8>(regs.fog_color.b.Value()), + }; + + // Get index into fog LUT + float fog_index; + if (g_state.regs.fog_flip) { + fog_index = (1.0f - depth) * 128.0f; + } else { + fog_index = depth * 128.0f; + } + + // Generate clamped fog factor from LUT for given fog index + float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); + float fog_f = fog_index - fog_i; + const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; + float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / 2047.0f; // This is signed fixed point 1.11 + fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); + + // Blend the fog + for (unsigned i = 0; i < 3; i++) { + combiner_output[i] = fog_factor * combiner_output[i] + (1.0f - fog_factor) * fog_color[i]; + } + } + u8 old_stencil = 0; auto UpdateStencil = [stencil_test, x, y, &old_stencil](Pica::Regs::StencilAction action) { @@ -887,27 +960,6 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } } - // interpolated_z = z / w - float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + - v1.screenpos[2].ToFloat32() * w1 + - v2.screenpos[2].ToFloat32() * w2) / wsum; - - // Not fully accurate. About 3 bits in precision are missing. - // Z-Buffer (z / w * scale + offset) - float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32(); - float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32(); - float depth = interpolated_z_over_w * depth_scale + depth_offset; - - // Potentially switch to W-Buffer - if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { - - // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) - depth *= interpolated_w_inverse.ToFloat32() * wsum; - } - - // Clamp the result - depth = MathUtil::Clamp(depth, 0.0f, 1.0f); - // Convert float to integer unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); u32 z = (u32)(depth * ((1 << num_bits) - 1)); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 931c34a37..f8393c618 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -62,6 +62,8 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { uniform_block_data.lut_dirty[index] = true; } + uniform_block_data.fog_lut_dirty = true; + // Set vertex attributes glVertexAttribPointer(GLShader::ATTRIBUTE_POSITION, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, position)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_POSITION); @@ -102,6 +104,18 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); } + // Setup the LUT for the fog + { + fog_lut.Create(); + state.fog_lut.texture_1d = fog_lut.handle; + } + state.Apply(); + + glActiveTexture(GL_TEXTURE9); + glTexImage1D(GL_TEXTURE_1D, 0, GL_R32UI, 128, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + // Sync fixed function OpenGL state SyncCullMode(); SyncBlendEnabled(); @@ -182,6 +196,14 @@ void RasterizerOpenGL::DrawTriangles() { (GLint)(rect.bottom + regs.viewport_corner.y * color_surface->res_scale_height), (GLsizei)(viewport_width * color_surface->res_scale_width), (GLsizei)(viewport_height * color_surface->res_scale_height)); + if (uniform_block_data.data.framebuffer_scale[0] != color_surface->res_scale_width || + uniform_block_data.data.framebuffer_scale[1] != color_surface->res_scale_height) { + + uniform_block_data.data.framebuffer_scale[0] = color_surface->res_scale_width; + uniform_block_data.data.framebuffer_scale[1] = color_surface->res_scale_height; + uniform_block_data.dirty = true; + } + // Sync and bind the texture surfaces const auto pica_textures = regs.GetTextures(); for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { @@ -215,6 +237,12 @@ void RasterizerOpenGL::DrawTriangles() { } } + // Sync the fog lut + if (uniform_block_data.fog_lut_dirty) { + SyncFogLUT(); + uniform_block_data.fog_lut_dirty = false; + } + // Sync the uniform data if (uniform_block_data.dirty) { glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW); @@ -280,6 +308,21 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncBlendColor(); break; + // Fog state + case PICA_REG_INDEX(fog_color): + SyncFogColor(); + break; + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[0], 0xe8): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[1], 0xe9): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[2], 0xea): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[3], 0xeb): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[4], 0xec): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[5], 0xed): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[6], 0xee): + case PICA_REG_INDEX_WORKAROUND(fog_lut_data[7], 0xef): + uniform_block_data.fog_lut_dirty = true; + break; + // Alpha test case PICA_REG_INDEX(output_merger.alpha_test): SyncAlphaTest(); @@ -318,6 +361,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncColorWriteMask(); break; + // Scissor test + case PICA_REG_INDEX(scissor_test.mode): + shader_dirty = true; + break; + case PICA_REG_INDEX(scissor_test.x1): // and y1 + case PICA_REG_INDEX(scissor_test.x2): // and y2 + SyncScissorTest(); + break; + // Logic op case PICA_REG_INDEX(output_merger.logic_op): SyncLogicOp(); @@ -329,6 +381,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { break; // TEV stages + // (This also syncs fog_mode and fog_flip which are part of tev_combiner_buffer_input) case PICA_REG_INDEX(tev_stage0.color_source1): case PICA_REG_INDEX(tev_stage0.color_modifier1): case PICA_REG_INDEX(tev_stage0.color_op): @@ -950,9 +1003,15 @@ void RasterizerOpenGL::SetShader() { uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); } + GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); + if (uniform_fog_lut != -1) { glUniform1i(uniform_fog_lut, 9); } + current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data"); + GLint block_size; + glGetActiveUniformBlockiv(current_shader->shader.handle, block_index, GL_UNIFORM_BLOCK_DATA_SIZE, &block_size); + ASSERT_MSG(block_size == sizeof(UniformData), "Uniform block size did not match!"); glUniformBlockBinding(current_shader->shader.handle, block_index, 0); // Update uniforms @@ -960,6 +1019,7 @@ void RasterizerOpenGL::SetShader() { SyncDepthOffset(); SyncAlphaTest(); SyncCombinerColor(); + SyncScissorTest(); auto& tev_stages = Pica::g_state.regs.GetTevStages(); for (int index = 0; index < tev_stages.size(); ++index) SyncTevConstColor(index, tev_stages[index]); @@ -974,6 +1034,8 @@ void RasterizerOpenGL::SetShader() { SyncLightDistanceAttenuationBias(light_index); SyncLightDistanceAttenuationScale(light_index); } + + SyncFogColor(); } } @@ -1040,6 +1102,30 @@ void RasterizerOpenGL::SyncBlendColor() { state.blend.color.alpha = blend_color[3]; } +void RasterizerOpenGL::SyncFogColor() { + const auto& regs = Pica::g_state.regs; + uniform_block_data.data.fog_color = { + regs.fog_color.r.Value() / 255.0f, + regs.fog_color.g.Value() / 255.0f, + regs.fog_color.b.Value() / 255.0f + }; + uniform_block_data.dirty = true; +} + +void RasterizerOpenGL::SyncFogLUT() { + std::array<GLuint, 128> new_data; + + std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), [](const auto& entry) { + return entry.raw; + }); + + if (new_data != fog_lut_data) { + fog_lut_data = new_data; + glActiveTexture(GL_TEXTURE9); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, fog_lut_data.data()); + } +} + void RasterizerOpenGL::SyncAlphaTest() { const auto& regs = Pica::g_state.regs; if (regs.output_merger.alpha_test.ref != uniform_block_data.data.alphatest_ref) { @@ -1098,6 +1184,22 @@ void RasterizerOpenGL::SyncDepthTest() { PicaToGL::CompareFunc(regs.output_merger.depth_test_func) : GL_ALWAYS; } +void RasterizerOpenGL::SyncScissorTest() { + const auto& regs = Pica::g_state.regs; + + if (uniform_block_data.data.scissor_x1 != regs.scissor_test.x1 || + uniform_block_data.data.scissor_y1 != regs.scissor_test.y1 || + uniform_block_data.data.scissor_x2 != regs.scissor_test.x2 || + uniform_block_data.data.scissor_y2 != regs.scissor_test.y2) { + + uniform_block_data.data.scissor_x1 = regs.scissor_test.x1; + uniform_block_data.data.scissor_y1 = regs.scissor_test.y1; + uniform_block_data.data.scissor_x2 = regs.scissor_test.x2; + uniform_block_data.data.scissor_y2 = regs.scissor_test.y2; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncCombinerColor() { auto combiner_color = PicaToGL::ColorRGBA8(Pica::g_state.regs.tev_combiner_buffer_color.raw); if (combiner_color != uniform_block_data.data.tev_combiner_buffer_color) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bb7f20161..c5029432b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -56,6 +56,8 @@ union PicaShaderConfig { const auto& regs = Pica::g_state.regs; + state.scissor_test_mode = regs.scissor_test.mode; + state.depthmap_enable = regs.depthmap_enable; state.alpha_test_func = regs.output_merger.alpha_test.enable ? @@ -76,6 +78,9 @@ union PicaShaderConfig { state.tev_stages[i].scales_raw = tev_stage.scales_raw; } + state.fog_mode = regs.fog_mode; + state.fog_flip = regs.fog_flip; + state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; @@ -168,13 +173,15 @@ union PicaShaderConfig { }; struct State { - Pica::Regs::CompareFunc alpha_test_func; + Pica::Regs::ScissorMode scissor_test_mode; Pica::Regs::TextureConfig::TextureType texture0_type; std::array<TevStageConfigRaw, 6> tev_stages; u8 combiner_buffer_input; Pica::Regs::DepthBuffering depthmap_enable; + Pica::Regs::FogMode fog_mode; + bool fog_flip; struct { struct { @@ -316,19 +323,27 @@ private: GLfloat dist_atten_scale; }; - /// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned + /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned + // NOTE: Always keep a vec4 at the end. The GL spec is not clear wether the alignment at + // the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not. + // Not following that rule will cause problems on some AMD drivers. struct UniformData { - // A vec4 color for each of the six tev stages - GLvec4 const_color[6]; - GLvec4 tev_combiner_buffer_color; + alignas(8) GLvec2 framebuffer_scale; GLint alphatest_ref; GLfloat depth_scale; GLfloat depth_offset; + GLint scissor_x1; + GLint scissor_y1; + GLint scissor_x2; + GLint scissor_y2; + alignas(16) GLvec3 fog_color; alignas(16) GLvec3 lighting_global_ambient; LightSrc light_src[8]; + alignas(16) GLvec4 const_color[6]; // A vec4 color for each of the six tev stages + alignas(16) GLvec4 tev_combiner_buffer_color; }; - static_assert(sizeof(UniformData) == 0x390, "The size of the UniformData structure has changed, update the structure in the shader"); + static_assert(sizeof(UniformData) == 0x3C0, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); /// Sets the OpenGL shader in accordance with the current PICA register state @@ -352,6 +367,10 @@ private: /// Syncs the blend color to match the PICA register void SyncBlendColor(); + /// Syncs the fog states to match the PICA register + void SyncFogColor(); + void SyncFogLUT(); + /// Syncs the alpha test states to match the PICA register void SyncAlphaTest(); @@ -373,6 +392,9 @@ private: /// Syncs the depth test states to match the PICA register void SyncDepthTest(); + /// Syncs the scissor test state to match the PICA register + void SyncScissorTest(); + /// Syncs the TEV combiner color buffer to match the PICA register void SyncCombinerColor(); @@ -419,6 +441,7 @@ private: struct { UniformData data; bool lut_dirty[6]; + bool fog_lut_dirty; bool dirty; } uniform_block_data = {}; @@ -430,4 +453,7 @@ private: std::array<OGLTexture, 6> lighting_luts; std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{}; + + OGLTexture fog_lut; + std::array<GLuint, 128> fog_lut_data{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 8332e722d..36513dedc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -539,6 +539,8 @@ in float texcoord0_w; in vec4 normquat; in vec3 view; +in vec4 gl_FragCoord; + out vec4 color; struct LightSrc { @@ -552,17 +554,24 @@ struct LightSrc { }; layout (std140) uniform shader_data { - vec4 const_color[NUM_TEV_STAGES]; - vec4 tev_combiner_buffer_color; + vec2 framebuffer_scale; int alphatest_ref; float depth_scale; float depth_offset; + int scissor_x1; + int scissor_y1; + int scissor_x2; + int scissor_y2; + vec3 fog_color; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; + vec4 const_color[NUM_TEV_STAGES]; + vec4 tev_combiner_buffer_color; }; uniform sampler2D tex[3]; uniform sampler1D lut[6]; +uniform usampler1D fog_lut; // Rotate the vector v by the quaternion q vec3 quaternion_rotate(vec4 q, vec3 v) { @@ -580,6 +589,25 @@ vec4 secondary_fragment_color = vec4(0.0); return out; } + // Append the scissor test + if (state.scissor_test_mode != Regs::ScissorMode::Disabled) { + out += "if ("; + // Negate the condition if we have to keep only the pixels outside the scissor box + if (state.scissor_test_mode == Regs::ScissorMode::Include) + out += "!"; + // x2,y2 have +1 added to cover the entire pixel area + out += "(gl_FragCoord.x >= scissor_x1 * framebuffer_scale.x && " + "gl_FragCoord.y >= scissor_y1 * framebuffer_scale.y && " + "gl_FragCoord.x < (scissor_x2 + 1) * framebuffer_scale.x && " + "gl_FragCoord.y < (scissor_y2 + 1) * framebuffer_scale.y)) discard;\n"; + } + + out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; + out += "float depth = z_over_w * depth_scale + depth_offset;\n"; + if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + out += "depth /= gl_FragCoord.w;\n"; + } + if (state.lighting.enable) WriteLighting(out, config); @@ -596,14 +624,30 @@ vec4 secondary_fragment_color = vec4(0.0); out += ") discard;\n"; } - out += "color = last_tex_env_out;\n"; + // Append fog combiner + if (state.fog_mode == Regs::FogMode::Fog) { + // Get index into fog LUT + if (state.fog_flip) { + out += "float fog_index = (1.0 - depth) * 128.0;\n"; + } else { + out += "float fog_index = depth * 128.0;\n"; + } - out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; - out += "float depth = z_over_w * depth_scale + depth_offset;\n"; - if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { - out += "depth /= gl_FragCoord.w;\n"; + // Generate clamped fog factor from LUT for given fog index + out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; + out += "float fog_f = fog_index - fog_i;\n"; + out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n"; + out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> 19);\n"; // Extract signed difference + out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n"; + out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / 2047.0;\n"; + out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; + + // Blend the fog + out += "last_tex_env_out.rgb = mix(fog_color.rgb, last_tex_env_out.rgb, fog_factor);\n"; } + out += "gl_FragDepth = depth;\n"; + out += "color = last_tex_env_out;\n"; out += "}"; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index fa141fc9a..13ee986b9 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -58,6 +58,8 @@ OpenGLState::OpenGLState() { lut.texture_1d = 0; } + fog_lut.texture_1d = 0; + draw.read_framebuffer = 0; draw.draw_framebuffer = 0; draw.vertex_array = 0; @@ -195,6 +197,12 @@ void OpenGLState::Apply() const { } } + // Fog LUT + if (fog_lut.texture_1d != cur_state.fog_lut.texture_1d) { + glActiveTexture(GL_TEXTURE9); + glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d); + } + // Framebuffer if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 228727054..13c71b0a6 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -68,6 +68,10 @@ public: } lighting_luts[6]; struct { + GLuint texture_1d; // GL_TEXTURE_BINDING_1D + } fog_lut; + + struct { GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 6dc2758c5..d9b9c9cc2 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -17,6 +17,7 @@ #include "video_core/pica.h" +using GLvec2 = std::array<GLfloat, 2>; using GLvec3 = std::array<GLfloat, 3>; using GLvec4 = std::array<GLfloat, 4>; |
