diff options
Diffstat (limited to 'src/video_core')
| -rw-r--r-- | src/video_core/pica_state.h | 20 | ||||
| -rw-r--r-- | src/video_core/regs_lighting.h | 67 | ||||
| -rw-r--r-- | src/video_core/regs_texturing.h | 8 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 215 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 16 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 216 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.h | 5 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.cpp | 46 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_state.h | 29 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/pica_to_gl.h | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 15 | ||||
| -rw-r--r-- | src/video_core/shader/shader_jit_x64_compiler.cpp | 26 | ||||
| -rw-r--r-- | src/video_core/swrasterizer/rasterizer.cpp | 23 | ||||
| -rw-r--r-- | src/video_core/swrasterizer/texturing.cpp | 19 |
14 files changed, 471 insertions, 248 deletions
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index f46db09fb..2d23d34e6 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -87,12 +87,18 @@ struct State { // LUT value, encoded as 12-bit fixed point, with 12 fraction bits BitField<0, 12, u32> value; // 0.0.12 fixed point - // Used by HW for efficient interpolation, Citra does not use these - BitField<12, 12, s32> difference; // 1.0.11 fixed point + // Used for efficient interpolation. + BitField<12, 11, u32> difference; // 0.0.11 fixed point + BitField<23, 1, u32> neg_difference; - float ToFloat() { + float ToFloat() const { return static_cast<float>(value) / 4095.f; } + + float DiffToFloat() const { + float diff = static_cast<float>(difference) / 2047.f; + return neg_difference ? -diff : diff; + } }; std::array<std::array<LutEntry, 256>, 24> luts; @@ -105,6 +111,14 @@ struct State { BitField<0, 13, s32> difference; // 1.1.11 fixed point BitField<13, 11, u32> value; // 0.0.11 fixed point + + float ToFloat() const { + return static_cast<float>(value) / 2047.0f; + } + + float DiffToFloat() const { + return static_cast<float>(difference) / 2047.0f; + } }; std::array<LutEntry, 128> lut; diff --git a/src/video_core/regs_lighting.h b/src/video_core/regs_lighting.h index 6793405d9..b89709cfe 100644 --- a/src/video_core/regs_lighting.h +++ b/src/video_core/regs_lighting.h @@ -26,6 +26,18 @@ struct LightingRegs { DistanceAttenuation = 16, }; + static constexpr unsigned NumLightingSampler = 24; + + static LightingSampler SpotlightAttenuationSampler(unsigned index) { + return static_cast<LightingSampler>( + static_cast<unsigned>(LightingSampler::SpotlightAttenuation) + index); + } + + static LightingSampler DistanceAttenuationSampler(unsigned index) { + return static_cast<LightingSampler>( + static_cast<unsigned>(LightingSampler::DistanceAttenuation) + index); + } + /** * Pica fragment lighting supports using different LUTs for each lighting component: Reflectance * R, G, and B channels, distribution function for specular components 0 and 1, fresnel factor, @@ -73,6 +85,8 @@ struct LightingRegs { VH = 1, // Cosine of the angle between the view and half-angle vectors NV = 2, // Cosine of the angle between the normal and the view vector LN = 3, // Cosine of the angle between the light and the normal vectors + SP = 4, // Cosine of the angle between the light and the inverse spotlight vectors + CP = 5, // Cosine of the angle between the tangent and projection of half-angle vectors }; enum class LightingBumpMode : u32 { @@ -104,6 +118,9 @@ struct LightingRegs { return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5); + case LightingSampler::SpotlightAttenuation: + return (config != LightingConfig::Config2) && (config != LightingConfig::Config3); + case LightingSampler::Fresnel: return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4); @@ -116,11 +133,10 @@ struct LightingRegs { return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7); default: - UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached " - "unreachable section, sampler should be one " - "of Distribution0, Distribution1, Fresnel, " - "ReflectRed, ReflectGreen or ReflectBlue, instead " - "got %i", + UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached unreachable section, " + "sampler should be one of Distribution0, Distribution1, " + "SpotlightAttenuation, Fresnel, ReflectRed, ReflectGreen or " + "ReflectBlue, instead got %i", static_cast<int>(config)); } } @@ -140,11 +156,22 @@ struct LightingRegs { BitField<0, 16, u32> z; }; - INSERT_PADDING_WORDS(0x3); + // inverse spotlight direction vector, encoded as fixed1.1.11 + union { + BitField<0, 13, s32> spot_x; + BitField<16, 13, s32> spot_y; + }; + union { + BitField<0, 13, s32> spot_z; + }; + + INSERT_PADDING_WORDS(0x1); union { BitField<0, 1, u32> directional; BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0 + BitField<2, 1, u32> geometric_factor_0; + BitField<3, 1, u32> geometric_factor_1; } config; BitField<0, 20, u32> dist_atten_bias; @@ -169,8 +196,16 @@ struct LightingRegs { } config0; union { + u32 raw; + + // Each bit specifies whether spot light attenuation should be applied for the corresponding + // light. + BitField<8, 8, u32> disable_spot_atten; + BitField<16, 1, u32> disable_lut_d0; BitField<17, 1, u32> disable_lut_d1; + // Note: by intuition, BitField<18, 1, u32> should be disable_lut_sp, but it is actually a + // dummy bit which is always set as 1. BitField<19, 1, u32> disable_lut_fr; BitField<20, 1, u32> disable_lut_rr; BitField<21, 1, u32> disable_lut_rg; @@ -178,23 +213,15 @@ struct LightingRegs { // Each bit specifies whether distance attenuation should be applied for the corresponding // light. - BitField<24, 1, u32> disable_dist_atten_light_0; - BitField<25, 1, u32> disable_dist_atten_light_1; - BitField<26, 1, u32> disable_dist_atten_light_2; - BitField<27, 1, u32> disable_dist_atten_light_3; - BitField<28, 1, u32> disable_dist_atten_light_4; - BitField<29, 1, u32> disable_dist_atten_light_5; - BitField<30, 1, u32> disable_dist_atten_light_6; - BitField<31, 1, u32> disable_dist_atten_light_7; + BitField<24, 8, u32> disable_dist_atten; } config1; bool IsDistAttenDisabled(unsigned index) const { - const unsigned disable[] = { - config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1, - config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3, - config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5, - config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7}; - return disable[index] != 0; + return (config1.disable_dist_atten & (1 << index)) != 0; + } + + bool IsSpotAttenDisabled(unsigned index) const { + return (config1.disable_spot_atten & (1 << index)) != 0; } union { diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h index 3f5355fa9..0b09f2299 100644 --- a/src/video_core/regs_texturing.h +++ b/src/video_core/regs_texturing.h @@ -30,10 +30,10 @@ struct TexturingRegs { Repeat = 2, MirroredRepeat = 3, // Mode 4-7 produces some weird result and may be just invalid: - // 4: Positive coord: clamp to edge; negative coord: repeat - // 5: Positive coord: clamp to border; negative coord: repeat - // 6: Repeat - // 7: Repeat + ClampToEdge2 = 4, // Positive coord: clamp to edge; negative coord: repeat + ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat + Repeat2 = 6, // Same as Repeat + Repeat3 = 7, // Same as Repeat }; enum TextureFilter : u32 { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index aa9b831dd..ff3f69ba3 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -49,9 +49,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { uniform_block_data.dirty = true; - for (unsigned index = 0; index < lighting_luts.size(); index++) { - uniform_block_data.lut_dirty[index] = true; - } + uniform_block_data.lut_dirty.fill(true); uniform_block_data.fog_lut_dirty = true; @@ -96,36 +94,32 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { framebuffer.Create(); // Allocate and bind lighting lut textures - for (size_t i = 0; i < lighting_luts.size(); ++i) { - lighting_luts[i].Create(); - state.lighting_luts[i].texture_1d = lighting_luts[i].handle; - } + lighting_lut.Create(); + state.lighting_lut.texture_buffer = lighting_lut.handle; state.Apply(); - - for (size_t i = 0; i < lighting_luts.size(); ++i) { - glActiveTexture(static_cast<GLenum>(GL_TEXTURE3 + i)); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - } + lighting_lut_buffer.Create(); + glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); + glBufferData(GL_TEXTURE_BUFFER, + sizeof(GLfloat) * 2 * 256 * Pica::LightingRegs::NumLightingSampler, nullptr, + GL_DYNAMIC_DRAW); + glActiveTexture(TextureUnits::LightingLUT.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, lighting_lut_buffer.handle); // Setup the LUT for the fog - { - fog_lut.Create(); - state.fog_lut.texture_1d = fog_lut.handle; - } + fog_lut.Create(); + state.fog_lut.texture_buffer = fog_lut.handle; state.Apply(); - - glActiveTexture(GL_TEXTURE9); - glTexImage1D(GL_TEXTURE_1D, 0, GL_R32UI, 128, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + fog_lut_buffer.Create(); + glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle); + glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW); + glActiveTexture(TextureUnits::FogLUT.Enum()); + glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, fog_lut_buffer.handle); // Setup the noise LUT for proctex proctex_noise_lut.Create(); state.proctex_noise_lut.texture_1d = proctex_noise_lut.handle; state.Apply(); - glActiveTexture(GL_TEXTURE10); + glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -134,7 +128,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { proctex_color_map.Create(); state.proctex_color_map.texture_1d = proctex_color_map.handle; state.Apply(); - glActiveTexture(GL_TEXTURE11); + glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -143,7 +137,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { proctex_alpha_map.Create(); state.proctex_alpha_map.texture_1d = proctex_alpha_map.handle; state.Apply(); - glActiveTexture(GL_TEXTURE12); + glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -152,7 +146,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { proctex_lut.Create(); state.proctex_lut.texture_1d = proctex_lut.handle; state.Apply(); - glActiveTexture(GL_TEXTURE13); + glActiveTexture(TextureUnits::ProcTexLUT.Enum()); glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -161,7 +155,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { proctex_diff_lut.Create(); state.proctex_diff_lut.texture_1d = proctex_diff_lut.handle; state.Apply(); - glActiveTexture(GL_TEXTURE14); + glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); @@ -182,19 +176,22 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { RasterizerOpenGL::~RasterizerOpenGL() {} /** - * This is a helper function to resolve an issue with opposite quaternions being interpolated by - * OpenGL. See below for a detailed description of this issue (yuriks): + * This is a helper function to resolve an issue when interpolating opposite quaternions. See below + * for a detailed description of this issue (yuriks): * * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you * interpolate two quaternions that are opposite, instead of going from one rotation to another * using the shortest path, you'll go around the longest path. You can test if two quaternions are - * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore - * making Dot(-Q1, W2) positive. + * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore + * making Dot(Q1, -Q2) positive. + * + * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is + * correct for most cases but can still rotate around the long way sometimes. An implementation + * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check + * between each step would work for those cases at the cost of being more complex to implement. * - * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This - * should be correct for nearly all cases, however a more correct implementation (but less trivial - * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions - * manually using two Lerps, and doing this correction before each Lerp. + * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around + * these issues, making this basic implementation actually more accurate to the hardware. */ static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) { Math::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()}; @@ -310,7 +307,7 @@ void RasterizerOpenGL::DrawTriangles() { } // Sync the lighting luts - for (unsigned index = 0; index < lighting_luts.size(); index++) { + for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) { if (uniform_block_data.lut_dirty[index]) { SyncLightingLUT(index); uniform_block_data.lut_dirty[index] = false; @@ -735,6 +732,40 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncLightPosition(7); break; + // Fragment spot lighting direction + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].spot_x, 0x146 + 0 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[0].spot_z, 0x147 + 0 * 0x10): + SyncLightSpotDirection(0); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].spot_x, 0x146 + 1 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[1].spot_z, 0x147 + 1 * 0x10): + SyncLightSpotDirection(1); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].spot_x, 0x146 + 2 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[2].spot_z, 0x147 + 2 * 0x10): + SyncLightSpotDirection(2); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].spot_x, 0x146 + 3 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[3].spot_z, 0x147 + 3 * 0x10): + SyncLightSpotDirection(3); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].spot_x, 0x146 + 4 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[4].spot_z, 0x147 + 4 * 0x10): + SyncLightSpotDirection(4); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].spot_x, 0x146 + 5 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[5].spot_z, 0x147 + 5 * 0x10): + SyncLightSpotDirection(5); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].spot_x, 0x146 + 6 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[6].spot_z, 0x147 + 6 * 0x10): + SyncLightSpotDirection(6); + break; + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].spot_x, 0x146 + 7 * 0x10): + case PICA_REG_INDEX_WORKAROUND(lighting.light[7].spot_z, 0x147 + 7 * 0x10): + SyncLightSpotDirection(7); + break; + // Fragment lighting light source config case PICA_REG_INDEX_WORKAROUND(lighting.light[0].config, 0x149 + 0 * 0x10): case PICA_REG_INDEX_WORKAROUND(lighting.light[1].config, 0x149 + 1 * 0x10): @@ -814,7 +845,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce): case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): { auto& lut_config = regs.lighting.lut_config; - uniform_block_data.lut_dirty[lut_config.type / 4] = true; + uniform_block_data.lut_dirty[lut_config.type] = true; break; } } @@ -1150,77 +1181,57 @@ void RasterizerOpenGL::SetShader() { state.Apply(); // Set the texture samplers to correspond to different texture units - GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); + GLint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]"); if (uniform_tex != -1) { - glUniform1i(uniform_tex, 0); + glUniform1i(uniform_tex, TextureUnits::PicaTexture(0).id); } uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]"); if (uniform_tex != -1) { - glUniform1i(uniform_tex, 1); + glUniform1i(uniform_tex, TextureUnits::PicaTexture(1).id); } uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]"); if (uniform_tex != -1) { - glUniform1i(uniform_tex, 2); + glUniform1i(uniform_tex, TextureUnits::PicaTexture(2).id); } // Set the texture samplers to correspond to different lookup table texture units - GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]"); + GLint uniform_lut = glGetUniformLocation(shader->shader.handle, "lighting_lut"); if (uniform_lut != -1) { - glUniform1i(uniform_lut, 3); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 4); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 5); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 6); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 7); - } - uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]"); - if (uniform_lut != -1) { - glUniform1i(uniform_lut, 8); + glUniform1i(uniform_lut, TextureUnits::LightingLUT.id); } - GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); + GLint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut"); if (uniform_fog_lut != -1) { - glUniform1i(uniform_fog_lut, 9); + glUniform1i(uniform_fog_lut, TextureUnits::FogLUT.id); } - GLuint uniform_proctex_noise_lut = + GLint uniform_proctex_noise_lut = glGetUniformLocation(shader->shader.handle, "proctex_noise_lut"); if (uniform_proctex_noise_lut != -1) { - glUniform1i(uniform_proctex_noise_lut, 10); + glUniform1i(uniform_proctex_noise_lut, TextureUnits::ProcTexNoiseLUT.id); } - GLuint uniform_proctex_color_map = + GLint uniform_proctex_color_map = glGetUniformLocation(shader->shader.handle, "proctex_color_map"); if (uniform_proctex_color_map != -1) { - glUniform1i(uniform_proctex_color_map, 11); + glUniform1i(uniform_proctex_color_map, TextureUnits::ProcTexColorMap.id); } - GLuint uniform_proctex_alpha_map = + GLint uniform_proctex_alpha_map = glGetUniformLocation(shader->shader.handle, "proctex_alpha_map"); if (uniform_proctex_alpha_map != -1) { - glUniform1i(uniform_proctex_alpha_map, 12); + glUniform1i(uniform_proctex_alpha_map, TextureUnits::ProcTexAlphaMap.id); } - GLuint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut"); + GLint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut"); if (uniform_proctex_lut != -1) { - glUniform1i(uniform_proctex_lut, 13); + glUniform1i(uniform_proctex_lut, TextureUnits::ProcTexLUT.id); } - GLuint uniform_proctex_diff_lut = + GLint uniform_proctex_diff_lut = glGetUniformLocation(shader->shader.handle, "proctex_diff_lut"); if (uniform_proctex_diff_lut != -1) { - glUniform1i(uniform_proctex_diff_lut, 14); + glUniform1i(uniform_proctex_diff_lut, TextureUnits::ProcTexDiffLUT.id); } current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get(); @@ -1343,16 +1354,17 @@ void RasterizerOpenGL::SyncFogColor() { } void RasterizerOpenGL::SyncFogLUT() { - std::array<GLuint, 128> new_data; + std::array<GLvec2, 128> new_data; std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(), - [](const auto& entry) { return entry.raw; }); + [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); if (new_data != fog_lut_data) { fog_lut_data = new_data; - glActiveTexture(GL_TEXTURE9); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT, - fog_lut_data.data()); + glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data()); } } @@ -1389,17 +1401,18 @@ static void SyncProcTexValueLUT(const std::array<Pica::State::ProcTex::ValueEntr } void RasterizerOpenGL::SyncProcTexNoiseLUT() { - SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, GL_TEXTURE10); + SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, + TextureUnits::ProcTexNoiseLUT.Enum()); } void RasterizerOpenGL::SyncProcTexColorMap() { SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data, - GL_TEXTURE11); + TextureUnits::ProcTexColorMap.Enum()); } void RasterizerOpenGL::SyncProcTexAlphaMap() { SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data, - GL_TEXTURE12); + TextureUnits::ProcTexAlphaMap.Enum()); } void RasterizerOpenGL::SyncProcTexLUT() { @@ -1414,7 +1427,7 @@ void RasterizerOpenGL::SyncProcTexLUT() { if (new_data != proctex_lut_data) { proctex_lut_data = new_data; - glActiveTexture(GL_TEXTURE13); + glActiveTexture(TextureUnits::ProcTexLUT.Enum()); glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_lut_data.data()); } } @@ -1431,7 +1444,7 @@ void RasterizerOpenGL::SyncProcTexDiffLUT() { if (new_data != proctex_diff_lut_data) { proctex_diff_lut_data = new_data; - glActiveTexture(GL_TEXTURE14); + glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_diff_lut_data.data()); } } @@ -1534,20 +1547,17 @@ void RasterizerOpenGL::SyncGlobalAmbient() { } void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) { - std::array<GLvec4, 256> new_data; - - for (unsigned offset = 0; offset < new_data.size(); ++offset) { - new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat(); - new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat(); - new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat(); - new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat(); - } + std::array<GLvec2, 256> new_data; + const auto& source_lut = Pica::g_state.lighting.luts[lut_index]; + std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) { + return GLvec2{entry.ToFloat(), entry.DiffToFloat()}; + }); if (new_data != lighting_lut_data[lut_index]) { lighting_lut_data[lut_index] = new_data; - glActiveTexture(GL_TEXTURE3 + lut_index); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, - lighting_lut_data[lut_index].data()); + glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle); + glBufferSubData(GL_TEXTURE_BUFFER, lut_index * new_data.size() * sizeof(GLvec2), + new_data.size() * sizeof(GLvec2), new_data.data()); } } @@ -1595,6 +1605,17 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) { } } +void RasterizerOpenGL::SyncLightSpotDirection(int light_index) { + const auto& light = Pica::g_state.regs.lighting.light[light_index]; + GLvec3 spot_direction = {light.spot_x / 2047.0f, light.spot_y / 2047.0f, + light.spot_z / 2047.0f}; + + if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) { + uniform_block_data.data.light_src[light_index].spot_direction = spot_direction; + uniform_block_data.dirty = true; + } +} + void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) { GLfloat dist_atten_bias = Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a9ad7d660..a433c1d4a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -125,6 +125,7 @@ private: alignas(16) GLvec3 diffuse; alignas(16) GLvec3 ambient; alignas(16) GLvec3 position; + alignas(16) GLvec3 spot_direction; // negated GLfloat dist_atten_bias; GLfloat dist_atten_scale; }; @@ -153,7 +154,7 @@ private: }; static_assert( - sizeof(UniformData) == 0x3E0, + sizeof(UniformData) == 0x460, "The size of the UniformData structure has changed, update the structure in the shader"); static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec"); @@ -241,6 +242,9 @@ private: /// Syncs the specified light's position to match the PICA register void SyncLightPosition(int light_index); + /// Syncs the specified spot light direcition to match the PICA register + void SyncLightSpotDirection(int light_index); + /// Syncs the specified light's distance attenuation bias to match the PICA register void SyncLightDistanceAttenuationBias(int light_index); @@ -259,7 +263,7 @@ private: struct { UniformData data; - bool lut_dirty[6]; + std::array<bool, Pica::LightingRegs::NumLightingSampler> lut_dirty; bool fog_lut_dirty; bool proctex_noise_lut_dirty; bool proctex_color_map_dirty; @@ -275,11 +279,13 @@ private: OGLBuffer uniform_buffer; OGLFramebuffer framebuffer; - std::array<OGLTexture, 6> lighting_luts; - std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{}; + OGLBuffer lighting_lut_buffer; + OGLTexture lighting_lut; + std::array<std::array<GLvec2, 256>, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{}; + OGLBuffer fog_lut_buffer; OGLTexture fog_lut; - std::array<GLuint, 128> fog_lut_data{}; + std::array<GLvec2, 128> fog_lut_data{}; OGLTexture proctex_noise_lut; std::array<GLvec2, 128> proctex_noise_lut_data{}; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index ffe419863..c93b108fb 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -73,8 +73,12 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { state.lighting.light[light_index].num = num; state.lighting.light[light_index].directional = light.config.directional != 0; state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0; + state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0; + state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0; state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); + state.lighting.light[light_index].spot_atten_enable = + !regs.lighting.IsSpotAttenDisabled(num); } state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0; @@ -87,6 +91,12 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) { state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + // this is a dummy field due to lack of the corresponding register + state.lighting.lut_sp.enable = true; + state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0; + state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value(); + state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp); + state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0; state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); @@ -509,14 +519,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" "vec3 light_vector = vec3(0.0);\n" - "vec3 refl_value = vec3(0.0);\n"; - - // Compute fragment normals + "vec3 refl_value = vec3(0.0);\n" + "vec3 spot_dir = vec3(0.0);\n" + "vec3 half_vector = vec3(0.0);\n" + "float geo_factor = 1.0;\n"; + + // Compute fragment normals and tangents + const std::string pertubation = + "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0"; if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) { - // Bump mapping is enabled using a normal map, read perturbation vector from the selected - // texture - out += "vec3 surface_normal = 2.0 * (" + SampleTexture(config, lighting.bump_selector) + - ").rgb - 1.0;\n"; + // Bump mapping is enabled using a normal map + out += "vec3 surface_normal = " + pertubation + ";\n"; // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher // precision result @@ -525,31 +538,41 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; } + + // The tangent vector is not perturbed by the normal map and is just a unit vector. + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) { // Bump mapping is enabled using a tangent map - LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); - UNIMPLEMENTED(); + out += "vec3 surface_tangent = " + pertubation + ";\n"; + // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant + // computation below, which is also confirmed on 3DS. So we don't bother recomputing here + // even if 'renorm' is enabled. + + // The normal vector is not perturbed by the tangent map and is just a unit vector. + out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; } else { - // No bump mapping - surface local normal is just a unit normal + // No bump mapping - surface local normal and tangent are just unit vectors out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n"; + out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n"; } // Rotate the surface-local normal by the interpolated normal quaternion to convert it to // eyespace. - out += "vec3 normal = quaternion_rotate(normalize(normquat), surface_normal);\n"; + out += "vec4 normalized_normquat = normalize(normquat);\n"; + out += "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n"; + out += "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n"; - // Gets the index into the specified lookup table for specular lighting - auto GetLutIndex = [&lighting](unsigned light_num, LightingRegs::LightingLutInput input, - bool abs) { - const std::string half_angle = "normalize(normalize(view) + light_vector)"; + // Samples the specified lookup table for specular lighting + auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num, + LightingRegs::LightingLutInput input, bool abs) { std::string index; switch (input) { case LightingRegs::LightingLutInput::NH: - index = "dot(normal, " + half_angle + ")"; + index = "dot(normal, normalize(half_vector))"; break; case LightingRegs::LightingLutInput::VH: - index = std::string("dot(normalize(view), " + half_angle + ")"); + index = std::string("dot(normalize(view), normalize(half_vector))"); break; case LightingRegs::LightingLutInput::NV: @@ -560,6 +583,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { index = std::string("dot(light_vector, normal)"); break; + case LightingRegs::LightingLutInput::SP: + index = std::string("dot(light_vector, spot_dir)"); + break; + + case LightingRegs::LightingLutInput::CP: + // CP input is only available with configuration 7 + if (lighting.config == LightingRegs::LightingConfig::Config7) { + // Note: even if the normal vector is modified by normal map, which is not the + // normal of the tangent plane anymore, the half angle vector is still projected + // using the modified normal vector. + std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, " + "normal) * dot(normal, normalize(half_vector))"; + // Note: the half angle vector projection is confirmed not normalized before the dot + // product. The result is in fact not cos(phi) as the name suggested. + index = "dot(" + half_angle_proj + ", tangent)"; + } else { + index = "0.0"; + } + break; + default: LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input); UNIMPLEMENTED(); @@ -567,22 +610,18 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { break; } + std::string sampler_string = std::to_string(static_cast<unsigned>(sampler)); + if (abs) { // LUT index is in the range of (0.0, 1.0) index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.0)"; + return "LookupLightingLUTUnsigned(" + sampler_string + ", " + index + ")"; } else { // LUT index is in the range of (-1.0, 1.0) - index = "((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0"; + return "LookupLightingLUTSigned(" + sampler_string + ", " + index + ")"; } - return "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))"; - }; - - // Gets the lighting lookup table value given the specified sampler and index - auto GetLutValue = [](LightingRegs::LightingSampler sampler, std::string lut_index) { - return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " + - lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]"); }; // Write the code to emulate each enabled light @@ -596,48 +635,71 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { else out += "light_vector = normalize(" + light_src + ".position + view);\n"; + out += "spot_dir = " + light_src + ".spot_direction;\n"; + out += "half_vector = normalize(view) + light_vector;\n"; + // Compute dot product of light_vector and normal, adjust if lighting is one-sided or // two-sided std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)"; + // If enabled, compute spot light attenuation value + std::string spot_atten = "1.0"; + if (light_config.spot_atten_enable && + LightingRegs::IsLightingSamplerSupported( + lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) { + std::string value = + GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num), + light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input); + spot_atten = "(" + std::to_string(lighting.lut_sp.scale) + " * " + value + ")"; + } + // If enabled, compute distance attenuation value std::string dist_atten = "1.0"; if (light_config.dist_atten_enable) { - std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " + - light_src + ".position) + " + light_src + ".dist_atten_bias)"; - index = "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))"; - const unsigned lut_num = - ((unsigned)LightingRegs::LightingSampler::DistanceAttenuation + light_config.num); - dist_atten = GetLutValue((LightingRegs::LightingSampler)lut_num, index); + std::string index = "clamp(" + light_src + ".dist_atten_scale * length(-view - " + + light_src + ".position) + " + light_src + + ".dist_atten_bias, 0.0, 1.0)"; + auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num); + dist_atten = "LookupLightingLUTUnsigned(" + + std::to_string(static_cast<unsigned>(sampler)) + "," + index + ")"; } // If enabled, clamp specular component if lighting result is negative std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + if (light_config.geometric_factor_0 || light_config.geometric_factor_1) { + out += "geo_factor = dot(half_vector, half_vector);\n" + "geo_factor = geo_factor == 0.0 ? 0.0 : min(" + + dot_product + " / geo_factor, 1.0);\n"; + } + // Specular 0 component std::string d0_lut_value = "1.0"; if (lighting.lut_d0.enable && LightingRegs::IsLightingSamplerSupported( lighting.config, LightingRegs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value - std::string index = - GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); - d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::Distribution0, index) + ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num, + lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + value + ")"; } std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; + if (light_config.geometric_factor_0) { + specular_0 = "(" + specular_0 + " * geo_factor)"; + } // If enabled, lookup ReflectRed value, otherwise, 1.0 is used if (lighting.lut_rr.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectRed)) { - std::string index = - GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::ReflectRed, index) + ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num, + lighting.lut_rr.type, lighting.lut_rr.abs_input); + value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + value + ")"; out += "refl_value.r = " + value + ";\n"; } else { out += "refl_value.r = 1.0;\n"; @@ -647,11 +709,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (lighting.lut_rg.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectGreen)) { - std::string index = - GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::ReflectGreen, index) + - ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num, + lighting.lut_rg.type, lighting.lut_rg.abs_input); + value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + value + ")"; out += "refl_value.g = " + value + ";\n"; } else { out += "refl_value.g = refl_value.r;\n"; @@ -661,11 +722,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (lighting.lut_rb.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::ReflectBlue)) { - std::string index = - GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); - std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::ReflectBlue, index) + - ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num, + lighting.lut_rb.type, lighting.lut_rb.abs_input); + value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + value + ")"; out += "refl_value.b = " + value + ";\n"; } else { out += "refl_value.b = refl_value.r;\n"; @@ -677,23 +737,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { LightingRegs::IsLightingSamplerSupported( lighting.config, LightingRegs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value - std::string index = - GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); - d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::Distribution1, index) + ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num, + lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + value + ")"; } std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; + if (light_config.geometric_factor_1) { + specular_1 = "(" + specular_1 + " * geo_factor)"; + } // Fresnel if (lighting.lut_fr.enable && LightingRegs::IsLightingSamplerSupported(lighting.config, LightingRegs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value - std::string index = - GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); - std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + - GetLutValue(LightingRegs::LightingSampler::Fresnel, index) + ")"; + std::string value = + GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num, + lighting.lut_fr.type, lighting.lut_fr.abs_input); + value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")"; // Enabled for difffuse lighting alpha component if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha || @@ -711,11 +774,11 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Compute primary fragment color (diffuse lighting) function out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + - light_src + ".ambient) * " + dist_atten + ";\n"; + light_src + ".ambient) * " + dist_atten + " * " + spot_atten + ";\n"; // Compute secondary fragment color (specular lighting) function out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + - clamp_highlights + " * " + dist_atten + ";\n"; + clamp_highlights + " * " + dist_atten + " * " + spot_atten + ";\n"; } // Sum final lighting result @@ -947,10 +1010,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { #define NUM_TEV_STAGES 6 #define NUM_LIGHTS 8 -// Texture coordinate offsets and scales -#define OFFSET_256 (0.5 / 256.0) -#define SCALE_256 (255.0 / 256.0) - in vec4 primary_color; in vec2 texcoord[3]; in float texcoord0_w; @@ -967,6 +1026,7 @@ struct LightSrc { vec3 diffuse; vec3 ambient; vec3 position; + vec3 spot_direction; float dist_atten_bias; float dist_atten_scale; }; @@ -991,8 +1051,8 @@ layout (std140) uniform shader_data { }; uniform sampler2D tex[3]; -uniform sampler1D lut[6]; -uniform usampler1D fog_lut; +uniform samplerBuffer lighting_lut; +uniform samplerBuffer fog_lut; uniform sampler1D proctex_noise_lut; uniform sampler1D proctex_color_map; uniform sampler1D proctex_alpha_map; @@ -1004,6 +1064,24 @@ vec3 quaternion_rotate(vec4 q, vec3 v) { return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v); } +float LookupLightingLUT(int lut_index, int index, float delta) { + vec2 entry = texelFetch(lighting_lut, lut_index * 256 + index).rg; + return entry.r + entry.g * delta; +} + +float LookupLightingLUTUnsigned(int lut_index, float pos) { + int index = clamp(int(pos * 256.0), 0, 255); + float delta = pos * 256.0 - index; + return LookupLightingLUT(lut_index, index, delta); +} + +float LookupLightingLUTSigned(int lut_index, float pos) { + int index = clamp(int(pos * 128.0), -128, 127); + float delta = pos * 128.0 - index; + if (index < 0) index += 256; + return LookupLightingLUT(lut_index, index, delta); +} + )"; if (config.state.proctex.enable) @@ -1067,12 +1145,8 @@ vec4 secondary_fragment_color = vec4(0.0); // Generate clamped fog factor from LUT for given fog index out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n"; out += "float fog_f = fog_index - fog_i;\n"; - out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n"; - out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> " - "19);\n"; // Extract signed difference - out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n"; - out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / " - "2047.0;\n"; + out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i)).rg;\n"; + out += "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n"; out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n"; // Blend the fog diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index ea6d216d1..2302ae453 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -93,6 +93,9 @@ union PicaShaderConfig { bool directional; bool two_sided_diffuse; bool dist_atten_enable; + bool spot_atten_enable; + bool geometric_factor_0; + bool geometric_factor_1; } light[8]; bool enable; @@ -110,7 +113,7 @@ union PicaShaderConfig { bool abs_input; Pica::LightingRegs::LightingLutInput type; float scale; - } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; + } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb; } lighting; struct { diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index bf837a7fb..eface2dea 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -52,11 +52,9 @@ OpenGLState::OpenGLState() { texture_unit.sampler = 0; } - for (auto& lut : lighting_luts) { - lut.texture_1d = 0; - } + lighting_lut.texture_buffer = 0; - fog_lut.texture_1d = 0; + fog_lut.texture_buffer = 0; proctex_lut.texture_1d = 0; proctex_diff_lut.texture_1d = 0; @@ -185,7 +183,7 @@ void OpenGLState::Apply() const { // Textures for (unsigned i = 0; i < ARRAY_SIZE(texture_units); ++i) { if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) { - glActiveTexture(GL_TEXTURE0 + i); + glActiveTexture(TextureUnits::PicaTexture(i).Enum()); glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d); } if (texture_units[i].sampler != cur_state.texture_units[i].sampler) { @@ -194,46 +192,44 @@ void OpenGLState::Apply() const { } // Lighting LUTs - for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) { - if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) { - glActiveTexture(GL_TEXTURE3 + i); - glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d); - } + if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) { + glActiveTexture(TextureUnits::LightingLUT.Enum()); + glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer); } // Fog LUT - if (fog_lut.texture_1d != cur_state.fog_lut.texture_1d) { - glActiveTexture(GL_TEXTURE9); - glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d); + if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) { + glActiveTexture(TextureUnits::FogLUT.Enum()); + glBindTexture(GL_TEXTURE_BUFFER, fog_lut.texture_buffer); } // ProcTex Noise LUT if (proctex_noise_lut.texture_1d != cur_state.proctex_noise_lut.texture_1d) { - glActiveTexture(GL_TEXTURE10); + glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum()); glBindTexture(GL_TEXTURE_1D, proctex_noise_lut.texture_1d); } // ProcTex Color Map if (proctex_color_map.texture_1d != cur_state.proctex_color_map.texture_1d) { - glActiveTexture(GL_TEXTURE11); + glActiveTexture(TextureUnits::ProcTexColorMap.Enum()); glBindTexture(GL_TEXTURE_1D, proctex_color_map.texture_1d); } // ProcTex Alpha Map if (proctex_alpha_map.texture_1d != cur_state.proctex_alpha_map.texture_1d) { - glActiveTexture(GL_TEXTURE12); + glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum()); glBindTexture(GL_TEXTURE_1D, proctex_alpha_map.texture_1d); } // ProcTex LUT if (proctex_lut.texture_1d != cur_state.proctex_lut.texture_1d) { - glActiveTexture(GL_TEXTURE13); + glActiveTexture(TextureUnits::ProcTexLUT.Enum()); glBindTexture(GL_TEXTURE_1D, proctex_lut.texture_1d); } // ProcTex Diff LUT if (proctex_diff_lut.texture_1d != cur_state.proctex_diff_lut.texture_1d) { - glActiveTexture(GL_TEXTURE14); + glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum()); glBindTexture(GL_TEXTURE_1D, proctex_diff_lut.texture_1d); } @@ -274,6 +270,20 @@ void OpenGLState::ResetTexture(GLuint handle) { unit.texture_2d = 0; } } + if (cur_state.lighting_lut.texture_buffer == handle) + cur_state.lighting_lut.texture_buffer = 0; + if (cur_state.fog_lut.texture_buffer == handle) + cur_state.fog_lut.texture_buffer = 0; + if (cur_state.proctex_noise_lut.texture_1d == handle) + cur_state.proctex_noise_lut.texture_1d = 0; + if (cur_state.proctex_color_map.texture_1d == handle) + cur_state.proctex_color_map.texture_1d = 0; + if (cur_state.proctex_alpha_map.texture_1d == handle) + cur_state.proctex_alpha_map.texture_1d = 0; + if (cur_state.proctex_lut.texture_1d == handle) + cur_state.proctex_lut.texture_1d = 0; + if (cur_state.proctex_diff_lut.texture_1d == handle) + cur_state.proctex_diff_lut.texture_1d = 0; } void OpenGLState::ResetSampler(GLuint handle) { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 7dcc03bd5..1efcf0811 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -6,6 +6,29 @@ #include <glad/glad.h> +namespace TextureUnits { + +struct TextureUnit { + GLint id; + constexpr GLenum Enum() const { + return static_cast<GLenum>(GL_TEXTURE0 + id); + } +}; + +constexpr TextureUnit PicaTexture(int unit) { + return TextureUnit{unit}; +} + +constexpr TextureUnit LightingLUT{3}; +constexpr TextureUnit FogLUT{4}; +constexpr TextureUnit ProcTexNoiseLUT{5}; +constexpr TextureUnit ProcTexColorMap{6}; +constexpr TextureUnit ProcTexAlphaMap{7}; +constexpr TextureUnit ProcTexLUT{8}; +constexpr TextureUnit ProcTexDiffLUT{9}; + +} // namespace TextureUnits + class OpenGLState { public: struct { @@ -64,11 +87,11 @@ public: } texture_units[3]; struct { - GLuint texture_1d; // GL_TEXTURE_BINDING_1D - } lighting_luts[6]; + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER + } lighting_lut; struct { - GLuint texture_1d; // GL_TEXTURE_BINDING_1D + GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER } fog_lut; struct { diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 93d7b0b71..c7fa1f873 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -12,6 +12,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "core/core.h" #include "video_core/regs_framebuffer.h" #include "video_core/regs_lighting.h" #include "video_core/regs_texturing.h" @@ -55,6 +56,12 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) { GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder GL_REPEAT, // WrapMode::Repeat GL_MIRRORED_REPEAT, // WrapMode::MirroredRepeat + // TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the + // comments in enum WrapMode. + GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge2 + GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder2 + GL_REPEAT, // WrapMode::Repeat2 + GL_REPEAT, // WrapMode::Repeat3 }; // Range check table for input @@ -65,6 +72,13 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) { return GL_CLAMP_TO_EDGE; } + if (static_cast<u32>(mode) > 3) { + Core::Telemetry().AddField(Telemetry::FieldType::Session, + "VideoCore_Pica_UnsupportedTextureWrapMode", + static_cast<u32>(mode)); + LOG_WARNING(Render_OpenGL, "Using texture wrap mode %u", static_cast<u32>(mode)); + } + GLenum gl_mode = wrap_mode_table[mode]; // Check for dummy values indicating an unknown mode diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index d90c776f9..65c18aecc 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -481,9 +481,18 @@ bool RendererOpenGL::Init() { glDebugMessageCallback(DebugHandler, nullptr); } - LOG_INFO(Render_OpenGL, "GL_VERSION: %s", glGetString(GL_VERSION)); - LOG_INFO(Render_OpenGL, "GL_VENDOR: %s", glGetString(GL_VENDOR)); - LOG_INFO(Render_OpenGL, "GL_RENDERER: %s", glGetString(GL_RENDERER)); + const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))}; + const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))}; + const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))}; + + LOG_INFO(Render_OpenGL, "GL_VERSION: %s", gl_version); + LOG_INFO(Render_OpenGL, "GL_VENDOR: %s", gpu_vendor); + LOG_INFO(Render_OpenGL, "GL_RENDERER: %s", gpu_model); + + Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor); + Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model); + Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version); + if (!GLAD_GL_VERSION_3_3) { return false; } diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 5d9b6448c..42a57aab1 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -321,27 +321,27 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) { case Instruction::FlowControlType::Or: mov(eax, COND0); mov(ebx, COND1); - xor(eax, (instr.flow_control.refx.Value() ^ 1)); - xor(ebx, (instr.flow_control.refy.Value() ^ 1)); - or (eax, ebx); + xor_(eax, (instr.flow_control.refx.Value() ^ 1)); + xor_(ebx, (instr.flow_control.refy.Value() ^ 1)); + or_(eax, ebx); break; case Instruction::FlowControlType::And: mov(eax, COND0); mov(ebx, COND1); - xor(eax, (instr.flow_control.refx.Value() ^ 1)); - xor(ebx, (instr.flow_control.refy.Value() ^ 1)); - and(eax, ebx); + xor_(eax, (instr.flow_control.refx.Value() ^ 1)); + xor_(ebx, (instr.flow_control.refy.Value() ^ 1)); + and_(eax, ebx); break; case Instruction::FlowControlType::JustX: mov(eax, COND0); - xor(eax, (instr.flow_control.refx.Value() ^ 1)); + xor_(eax, (instr.flow_control.refx.Value() ^ 1)); break; case Instruction::FlowControlType::JustY: mov(eax, COND1); - xor(eax, (instr.flow_control.refy.Value() ^ 1)); + xor_(eax, (instr.flow_control.refy.Value() ^ 1)); break; } } @@ -734,10 +734,10 @@ void JitShader::Compile_LOOP(Instruction instr) { mov(LOOPCOUNT, dword[SETUP + offset]); mov(LOOPCOUNT_REG, LOOPCOUNT); shr(LOOPCOUNT_REG, 4); - and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start + and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start mov(LOOPINC, LOOPCOUNT); shr(LOOPINC, 12); - and(LOOPINC, 0xFF0); // Z-component is the incrementer + and_(LOOPINC, 0xFF0); // Z-component is the incrementer movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count add(LOOPCOUNT, 1); // Iteration count is X-component + 1 @@ -858,9 +858,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_ mov(STATE, ABI_PARAM2); // Zero address/loop registers - xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); - xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); - xor(LOOPCOUNT_REG, LOOPCOUNT_REG); + xor_(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32()); + xor_(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32()); + xor_(LOOPCOUNT_REG, LOOPCOUNT_REG); // Used to set a register to one static const __m128 one = {1.f, 1.f, 1.f, 1.f}; diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp index 8b7b1defb..512e81c08 100644 --- a/src/video_core/swrasterizer/rasterizer.cpp +++ b/src/video_core/swrasterizer/rasterizer.cpp @@ -357,10 +357,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height))) .ToFloat32(); - if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder && - (s < 0 || static_cast<u32>(s) >= texture.config.width)) || - (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder && - (t < 0 || static_cast<u32>(t) >= texture.config.height))) { + bool use_border_s = false; + bool use_border_t = false; + + if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder) { + use_border_s = s < 0 || s >= static_cast<int>(texture.config.width); + } else if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder2) { + use_border_s = s >= static_cast<int>(texture.config.width); + } + + if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder) { + use_border_t = t < 0 || t >= static_cast<int>(texture.config.height); + } else if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder2) { + use_border_t = t >= static_cast<int>(texture.config.height); + } + + if (use_border_s || use_border_t) { auto border_color = texture.config.border_color; texture_color[i] = {border_color.r, border_color.g, border_color.b, border_color.a}; @@ -572,8 +584,7 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f); float fog_f = fog_index - fog_i; const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)]; - float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) / - 2047.0f; // This is signed fixed point 1.11 + float fog_factor = fog_lut_entry.ToFloat() + fog_lut_entry.DiffToFloat() * fog_f; fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f); // Blend the fog diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp index aeb6aeb8c..4f02b93f2 100644 --- a/src/video_core/swrasterizer/texturing.cpp +++ b/src/video_core/swrasterizer/texturing.cpp @@ -18,22 +18,33 @@ using TevStageConfig = TexturingRegs::TevStageConfig; int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) { switch (mode) { + case TexturingRegs::TextureConfig::ClampToEdge2: + // For negative coordinate, ClampToEdge2 behaves the same as Repeat + if (val < 0) { + return static_cast<int>(static_cast<unsigned>(val) % size); + } + // [[fallthrough]] case TexturingRegs::TextureConfig::ClampToEdge: val = std::max(val, 0); - val = std::min(val, (int)size - 1); + val = std::min(val, static_cast<int>(size) - 1); return val; case TexturingRegs::TextureConfig::ClampToBorder: return val; + case TexturingRegs::TextureConfig::ClampToBorder2: + // For ClampToBorder2, the case of positive coordinate beyond the texture size is already + // handled outside. Here we only handle the negative coordinate in the same way as Repeat. + case TexturingRegs::TextureConfig::Repeat2: + case TexturingRegs::TextureConfig::Repeat3: case TexturingRegs::TextureConfig::Repeat: - return (int)((unsigned)val % size); + return static_cast<int>(static_cast<unsigned>(val) % size); case TexturingRegs::TextureConfig::MirroredRepeat: { - unsigned int coord = ((unsigned)val % (2 * size)); + unsigned int coord = (static_cast<unsigned>(val) % (2 * size)); if (coord >= size) coord = 2 * size - 1 - coord; - return (int)coord; + return static_cast<int>(coord); } default: |
