aboutsummaryrefslogtreecommitdiff
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/CMakeLists.txt2
-rw-r--r--src/video_core/command_processor.cpp62
-rw-r--r--src/video_core/pica_state.h2
-rw-r--r--src/video_core/regs_framebuffer.h10
-rw-r--r--src/video_core/regs_pipeline.h9
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp22
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_state.h3
-rw-r--r--src/video_core/swrasterizer/clipper.cpp15
-rw-r--r--src/video_core/swrasterizer/framebuffer.cpp2
-rw-r--r--src/video_core/swrasterizer/lighting.cpp307
-rw-r--r--src/video_core/swrasterizer/lighting.h19
-rw-r--r--src/video_core/swrasterizer/rasterizer.cpp29
-rw-r--r--src/video_core/swrasterizer/rasterizer.h6
-rw-r--r--src/video_core/swrasterizer/texturing.cpp4
16 files changed, 455 insertions, 53 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index 0961a3251..cffa4c952 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -15,6 +15,7 @@ set(SRCS
shader/shader_interpreter.cpp
swrasterizer/clipper.cpp
swrasterizer/framebuffer.cpp
+ swrasterizer/lighting.cpp
swrasterizer/proctex.cpp
swrasterizer/rasterizer.cpp
swrasterizer/swrasterizer.cpp
@@ -55,6 +56,7 @@ set(HEADERS
shader/shader_interpreter.h
swrasterizer/clipper.h
swrasterizer/framebuffer.h
+ swrasterizer/lighting.h
swrasterizer/proctex.h
swrasterizer/rasterizer.h
swrasterizer/swrasterizer.h
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 4633a1df1..f98ca3302 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -119,27 +119,6 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup,
}
}
-static void WriteProgramCode(ShaderRegs& config, Shader::ShaderSetup& setup,
- unsigned max_program_code_length, u32 value) {
- if (config.program.offset >= max_program_code_length) {
- LOG_ERROR(HW_GPU, "Invalid %s program offset %d", GetShaderSetupTypeName(setup),
- (int)config.program.offset);
- } else {
- setup.program_code[config.program.offset] = value;
- config.program.offset++;
- }
-}
-
-static void WriteSwizzlePatterns(ShaderRegs& config, Shader::ShaderSetup& setup, u32 value) {
- if (config.swizzle_patterns.offset >= setup.swizzle_data.size()) {
- LOG_ERROR(HW_GPU, "Invalid %s swizzle pattern offset %d", GetShaderSetupTypeName(setup),
- (int)config.swizzle_patterns.offset);
- } else {
- setup.swizzle_data[config.swizzle_patterns.offset] = value;
- config.swizzle_patterns.offset++;
- }
-}
-
static void WritePicaReg(u32 id, u32 value, u32 mask) {
auto& regs = g_state.regs;
@@ -458,7 +437,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[5], 0x2a1):
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[6], 0x2a2):
case PICA_REG_INDEX_WORKAROUND(gs.program.set_word[7], 0x2a3): {
- WriteProgramCode(g_state.regs.gs, g_state.gs, 4096, value);
+ u32& offset = g_state.regs.gs.program.offset;
+ if (offset >= 4096) {
+ LOG_ERROR(HW_GPU, "Invalid GS program offset %u", offset);
+ } else {
+ g_state.gs.program_code[offset] = value;
+ offset++;
+ }
break;
}
@@ -470,11 +455,18 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[5], 0x2ab):
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[6], 0x2ac):
case PICA_REG_INDEX_WORKAROUND(gs.swizzle_patterns.set_word[7], 0x2ad): {
- WriteSwizzlePatterns(g_state.regs.gs, g_state.gs, value);
+ u32& offset = g_state.regs.gs.swizzle_patterns.offset;
+ if (offset >= g_state.gs.swizzle_data.size()) {
+ LOG_ERROR(HW_GPU, "Invalid GS swizzle pattern offset %u", offset);
+ } else {
+ g_state.gs.swizzle_data[offset] = value;
+ offset++;
+ }
break;
}
case PICA_REG_INDEX(vs.bool_uniforms):
+ // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this?
WriteUniformBoolReg(g_state.vs, g_state.regs.vs.bool_uniforms.Value());
break;
@@ -482,6 +474,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2):
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3):
case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): {
+ // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this?
unsigned index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1));
auto values = regs.vs.int_uniforms[index];
WriteUniformIntReg(g_state.vs, index,
@@ -497,6 +490,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6):
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7):
case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): {
+ // TODO (wwylele): does regs.pipeline.gs_unit_exclusive_configuration affect this?
WriteUniformFloatReg(g_state.regs.vs, g_state.vs, vs_float_regs_counter,
vs_uniform_write_buffer, value);
break;
@@ -510,7 +504,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1):
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2):
case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): {
- WriteProgramCode(g_state.regs.vs, g_state.vs, 512, value);
+ u32& offset = g_state.regs.vs.program.offset;
+ if (offset >= 512) {
+ LOG_ERROR(HW_GPU, "Invalid VS program offset %u", offset);
+ } else {
+ g_state.vs.program_code[offset] = value;
+ if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
+ g_state.gs.program_code[offset] = value;
+ }
+ offset++;
+ }
break;
}
@@ -522,7 +525,16 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db):
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc):
case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): {
- WriteSwizzlePatterns(g_state.regs.vs, g_state.vs, value);
+ u32& offset = g_state.regs.vs.swizzle_patterns.offset;
+ if (offset >= g_state.vs.swizzle_data.size()) {
+ LOG_ERROR(HW_GPU, "Invalid VS swizzle pattern offset %u", offset);
+ } else {
+ g_state.vs.swizzle_data[offset] = value;
+ if (!g_state.regs.pipeline.gs_unit_exclusive_configuration) {
+ g_state.gs.swizzle_data[offset] = value;
+ }
+ offset++;
+ }
break;
}
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index 2d23d34e6..864a2c9e6 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -79,7 +79,7 @@ struct State {
std::array<ColorDifferenceEntry, 256> color_diff_table;
} proctex;
- struct {
+ struct Lighting {
union LutEntry {
// Used for raw access
u32 raw;
diff --git a/src/video_core/regs_framebuffer.h b/src/video_core/regs_framebuffer.h
index a50bd4111..7b565f911 100644
--- a/src/video_core/regs_framebuffer.h
+++ b/src/video_core/regs_framebuffer.h
@@ -256,10 +256,9 @@ struct FramebufferRegs {
return 3;
case DepthFormat::D24S8:
return 4;
- default:
- LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
- UNIMPLEMENTED();
}
+
+ ASSERT_MSG(false, "Unknown depth format %u", format);
}
// Returns the number of bits per depth component of the specified depth format
@@ -270,10 +269,9 @@ struct FramebufferRegs {
case DepthFormat::D24:
case DepthFormat::D24S8:
return 24;
- default:
- LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
- UNIMPLEMENTED();
}
+
+ ASSERT_MSG(false, "Unknown depth format %u", format);
}
INSERT_PADDING_WORDS(0x20);
diff --git a/src/video_core/regs_pipeline.h b/src/video_core/regs_pipeline.h
index 31c747d77..8b6369297 100644
--- a/src/video_core/regs_pipeline.h
+++ b/src/video_core/regs_pipeline.h
@@ -202,7 +202,14 @@ struct PipelineRegs {
/// Number of input attributes to the vertex shader minus 1
BitField<0, 4, u32> max_input_attrib_index;
- INSERT_PADDING_WORDS(2);
+ INSERT_PADDING_WORDS(1);
+
+ // The shader unit 3, which can be used for both vertex and geometry shader, gets its
+ // configuration depending on this register. If this is not set, unit 3 will share some
+ // configuration with other units. It is known that program code and swizzle pattern uploaded
+ // via regs.vs will be also uploaded to unit 3 if this is not set. Although very likely, it is
+ // still unclear whether uniforms and other configuration can be also shared.
+ BitField<0, 1, u32> gs_unit_exclusive_configuration;
enum class GPUMode : u32 {
Drawing = 0,
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 1c6c15a58..aa95ef21d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -28,6 +28,9 @@ MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(100, 100, 255));
MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100));
RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
+ // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
+ state.clip_distance[0] = true;
+
// Create sampler objects
for (size_t i = 0; i < texture_samplers.size(); ++i) {
texture_samplers[i].Create();
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index bb192affd..3f390491a 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -525,11 +525,12 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
"float geo_factor = 1.0;\n";
// Compute fragment normals and tangents
- const std::string pertubation =
- "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0";
+ auto Perturbation = [&]() {
+ return "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0";
+ };
if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
// Bump mapping is enabled using a normal map
- out += "vec3 surface_normal = " + pertubation + ";\n";
+ out += "vec3 surface_normal = " + Perturbation() + ";\n";
// Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
// precision result
@@ -543,7 +544,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
} else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
// Bump mapping is enabled using a tangent map
- out += "vec3 surface_tangent = " + pertubation + ";\n";
+ out += "vec3 surface_tangent = " + Perturbation() + ";\n";
// Mathematically, recomputing Z-component of the tangent vector won't affect the relevant
// computation below, which is also confirmed on 3DS. So we don't bother recomputing here
// even if 'renorm' is enabled.
@@ -593,8 +594,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
// Note: even if the normal vector is modified by normal map, which is not the
// normal of the tangent plane anymore, the half angle vector is still projected
// using the modified normal vector.
- std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, "
- "normal) * dot(normal, normalize(half_vector))";
+ std::string half_angle_proj =
+ "normalize(half_vector) - normal * dot(normal, normalize(half_vector))";
// Note: the half angle vector projection is confirmed not normalized before the dot
// product. The result is in fact not cos(phi) as the name suggested.
index = "dot(" + half_angle_proj + ", tangent)";
@@ -1111,7 +1112,10 @@ vec4 secondary_fragment_color = vec4(0.0);
"gl_FragCoord.y < scissor_y2)) discard;\n";
}
- out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n";
+ // After perspective divide, OpenGL transform z_over_w from [-1, 1] to [near, far]. Here we use
+ // default near = 0 and far = 1, and undo the transformation to get the original z_over_w, then
+ // do our own transformation according to PICA specification.
+ out += "float z_over_w = 2.0 * gl_FragCoord.z - 1.0;\n";
out += "float depth = z_over_w * depth_scale + depth_offset;\n";
if (state.depthmap_enable == RasterizerRegs::DepthBuffering::WBuffering) {
out += "depth /= gl_FragCoord.w;\n";
@@ -1194,7 +1198,9 @@ void main() {
texcoord0_w = vert_texcoord0_w;
normquat = vert_normquat;
view = vert_view;
- gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
+ gl_Position = vert_position;
+ gl_ClipDistance[0] = -vert_position.z; // fixed PICA clipping plane z <= 0
+ // TODO (wwylele): calculate gl_ClipDistance[1] from user-defined clipping plane
}
)";
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index bc9d34b84..06a905766 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -68,6 +68,8 @@ OpenGLState::OpenGLState() {
draw.vertex_buffer = 0;
draw.uniform_buffer = 0;
draw.shader_program = 0;
+
+ clip_distance = {};
}
void OpenGLState::Apply() const {
@@ -261,6 +263,17 @@ void OpenGLState::Apply() const {
glUseProgram(draw.shader_program);
}
+ // Clip distance
+ for (size_t i = 0; i < clip_distance.size(); ++i) {
+ if (clip_distance[i] != cur_state.clip_distance[i]) {
+ if (clip_distance[i]) {
+ glEnable(GL_CLIP_DISTANCE0 + i);
+ } else {
+ glDisable(GL_CLIP_DISTANCE0 + i);
+ }
+ }
+ }
+
cur_state = *this;
}
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 745a74479..437fe34c4 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -4,6 +4,7 @@
#pragma once
+#include <array>
#include <glad/glad.h>
namespace TextureUnits {
@@ -123,6 +124,8 @@ public:
GLuint shader_program; // GL_CURRENT_PROGRAM
} draw;
+ std::array<bool, 2> clip_distance; // GL_CLIP_DISTANCE
+
OpenGLState();
/// Get the currently active OpenGL state
diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp
index 6fb923756..cdbc71502 100644
--- a/src/video_core/swrasterizer/clipper.cpp
+++ b/src/video_core/swrasterizer/clipper.cpp
@@ -95,6 +95,17 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
static const size_t MAX_VERTICES = 9;
static_vector<Vertex, MAX_VERTICES> buffer_a = {v0, v1, v2};
static_vector<Vertex, MAX_VERTICES> buffer_b;
+
+ auto FlipQuaternionIfOpposite = [](auto& a, const auto& b) {
+ if (Math::Dot(a, b) < float24::Zero())
+ a = -a;
+ };
+
+ // Flip the quaternions if they are opposite to prevent interpolating them over the wrong
+ // direction.
+ FlipQuaternionIfOpposite(buffer_a[1].quat, buffer_a[0].quat);
+ FlipQuaternionIfOpposite(buffer_a[2].quat, buffer_a[0].quat);
+
auto* output_list = &buffer_a;
auto* input_list = &buffer_b;
@@ -114,10 +125,6 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu
{Math::MakeVec(f0, f0, f0, -f1), Math::Vec4<float24>(f0, f0, f0, EPSILON)}, // w = EPSILON
}};
- // TODO: If one vertex lies outside one of the depth clipping planes, some platforms (e.g. Wii)
- // drop the whole primitive instead of clipping the primitive properly. We should test if
- // this happens on the 3DS, too.
-
// Simple implementation of the Sutherland-Hodgman clipping algorithm.
// TODO: Make this less inefficient (currently lots of useless buffering overhead happens here)
for (auto edge : clipping_edges) {
diff --git a/src/video_core/swrasterizer/framebuffer.cpp b/src/video_core/swrasterizer/framebuffer.cpp
index 7de3aac75..f34eab6cf 100644
--- a/src/video_core/swrasterizer/framebuffer.cpp
+++ b/src/video_core/swrasterizer/framebuffer.cpp
@@ -352,6 +352,8 @@ u8 LogicOp(u8 src, u8 dest, FramebufferRegs::LogicOp op) {
case FramebufferRegs::LogicOp::OrInverted:
return ~src | dest;
}
+
+ UNREACHABLE();
};
} // namespace Rasterizer
diff --git a/src/video_core/swrasterizer/lighting.cpp b/src/video_core/swrasterizer/lighting.cpp
new file mode 100644
index 000000000..b38964530
--- /dev/null
+++ b/src/video_core/swrasterizer/lighting.cpp
@@ -0,0 +1,307 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/math_util.h"
+#include "video_core/swrasterizer/lighting.h"
+
+namespace Pica {
+
+static float LookupLightingLut(const Pica::State::Lighting& lighting, size_t lut_index, u8 index,
+ float delta) {
+ ASSERT_MSG(lut_index < lighting.luts.size(), "Out of range lut");
+ ASSERT_MSG(index < lighting.luts[lut_index].size(), "Out of range index");
+
+ const auto& lut = lighting.luts[lut_index][index];
+
+ float lut_value = lut.ToFloat();
+ float lut_diff = lut.DiffToFloat();
+
+ return lut_value + lut_diff * delta;
+}
+
+std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
+ const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state,
+ const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view,
+ const Math::Vec4<u8> (&texture_color)[4]) {
+
+ Math::Vec3<float> surface_normal;
+ Math::Vec3<float> surface_tangent;
+
+ if (lighting.config0.bump_mode != LightingRegs::LightingBumpMode::None) {
+ Math::Vec3<float> perturbation =
+ texture_color[lighting.config0.bump_selector].xyz().Cast<float>() / 127.5f -
+ Math::MakeVec(1.0f, 1.0f, 1.0f);
+ if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
+ if (!lighting.config0.disable_bump_renorm) {
+ const float z_square = 1 - perturbation.xy().Length2();
+ perturbation.z = std::sqrt(std::max(z_square, 0.0f));
+ }
+ surface_normal = perturbation;
+ surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f);
+ } else if (lighting.config0.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
+ surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f);
+ surface_tangent = perturbation;
+ } else {
+ LOG_ERROR(HW_GPU, "Unknown bump mode %u", lighting.config0.bump_mode.Value());
+ }
+ } else {
+ surface_normal = Math::MakeVec(0.0f, 0.0f, 1.0f);
+ surface_tangent = Math::MakeVec(1.0f, 0.0f, 0.0f);
+ }
+
+ // Use the normalized the quaternion when performing the rotation
+ auto normal = Math::QuaternionRotate(normquat, surface_normal);
+ auto tangent = Math::QuaternionRotate(normquat, surface_tangent);
+
+ Math::Vec4<float> diffuse_sum = {0.0f, 0.0f, 0.0f, 1.0f};
+ Math::Vec4<float> specular_sum = {0.0f, 0.0f, 0.0f, 1.0f};
+
+ for (unsigned light_index = 0; light_index <= lighting.max_light_index; ++light_index) {
+ unsigned num = lighting.light_enable.GetNum(light_index);
+ const auto& light_config = lighting.light[num];
+
+ Math::Vec3<float> refl_value = {};
+ Math::Vec3<float> position = {float16::FromRaw(light_config.x).ToFloat32(),
+ float16::FromRaw(light_config.y).ToFloat32(),
+ float16::FromRaw(light_config.z).ToFloat32()};
+ Math::Vec3<float> light_vector;
+
+ if (light_config.config.directional)
+ light_vector = position;
+ else
+ light_vector = position + view;
+
+ light_vector.Normalize();
+
+ Math::Vec3<float> norm_view = view.Normalized();
+ Math::Vec3<float> half_vector = norm_view + light_vector;
+
+ float dist_atten = 1.0f;
+ if (!lighting.IsDistAttenDisabled(num)) {
+ auto distance = (-view - position).Length();
+ float scale = Pica::float20::FromRaw(light_config.dist_atten_scale).ToFloat32();
+ float bias = Pica::float20::FromRaw(light_config.dist_atten_bias).ToFloat32();
+ size_t lut =
+ static_cast<size_t>(LightingRegs::LightingSampler::DistanceAttenuation) + num;
+
+ float sample_loc = MathUtil::Clamp(scale * distance + bias, 0.0f, 1.0f);
+
+ u8 lutindex =
+ static_cast<u8>(MathUtil::Clamp(std::floor(sample_loc * 256.0f), 0.0f, 255.0f));
+ float delta = sample_loc * 256 - lutindex;
+ dist_atten = LookupLightingLut(lighting_state, lut, lutindex, delta);
+ }
+
+ auto GetLutValue = [&](LightingRegs::LightingLutInput input, bool abs,
+ LightingRegs::LightingScale scale_enum,
+ LightingRegs::LightingSampler sampler) {
+ float result = 0.0f;
+
+ switch (input) {
+ case LightingRegs::LightingLutInput::NH:
+ result = Math::Dot(normal, half_vector.Normalized());
+ break;
+
+ case LightingRegs::LightingLutInput::VH:
+ result = Math::Dot(norm_view, half_vector.Normalized());
+ break;
+
+ case LightingRegs::LightingLutInput::NV:
+ result = Math::Dot(normal, norm_view);
+ break;
+
+ case LightingRegs::LightingLutInput::LN:
+ result = Math::Dot(light_vector, normal);
+ break;
+
+ case LightingRegs::LightingLutInput::SP: {
+ Math::Vec3<s32> spot_dir{light_config.spot_x.Value(), light_config.spot_y.Value(),
+ light_config.spot_z.Value()};
+ result = Math::Dot(light_vector, spot_dir.Cast<float>() / 2047.0f);
+ break;
+ }
+ case LightingRegs::LightingLutInput::CP:
+ if (lighting.config0.config == LightingRegs::LightingConfig::Config7) {
+ const Math::Vec3<float> norm_half_vector = half_vector.Normalized();
+ const Math::Vec3<float> half_vector_proj =
+ norm_half_vector - normal * Math::Dot(normal, norm_half_vector);
+ result = Math::Dot(half_vector_proj, tangent);
+ } else {
+ result = 0.0f;
+ }
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %u\n", static_cast<u32>(input));
+ UNIMPLEMENTED();
+ result = 0.0f;
+ }
+
+ u8 index;
+ float delta;
+
+ if (abs) {
+ if (light_config.config.two_sided_diffuse)
+ result = std::abs(result);
+ else
+ result = std::max(result, 0.0f);
+
+ float flr = std::floor(result * 256.0f);
+ index = static_cast<u8>(MathUtil::Clamp(flr, 0.0f, 255.0f));
+ delta = result * 256 - index;
+ } else {
+ float flr = std::floor(result * 128.0f);
+ s8 signed_index = static_cast<s8>(MathUtil::Clamp(flr, -128.0f, 127.0f));
+ delta = result * 128.0f - signed_index;
+ index = static_cast<u8>(signed_index);
+ }
+
+ float scale = lighting.lut_scale.GetScale(scale_enum);
+ return scale *
+ LookupLightingLut(lighting_state, static_cast<size_t>(sampler), index, delta);
+ };
+
+ // If enabled, compute spot light attenuation value
+ float spot_atten = 1.0f;
+ if (!lighting.IsSpotAttenDisabled(num) &&
+ LightingRegs::IsLightingSamplerSupported(
+ lighting.config0.config, LightingRegs::LightingSampler::SpotlightAttenuation)) {
+ auto lut = LightingRegs::SpotlightAttenuationSampler(num);
+ spot_atten = GetLutValue(lighting.lut_input.sp, lighting.abs_lut_input.disable_sp == 0,
+ lighting.lut_scale.sp, lut);
+ }
+
+ // Specular 0 component
+ float d0_lut_value = 1.0f;
+ if (lighting.config1.disable_lut_d0 == 0 &&
+ LightingRegs::IsLightingSamplerSupported(
+ lighting.config0.config, LightingRegs::LightingSampler::Distribution0)) {
+ d0_lut_value =
+ GetLutValue(lighting.lut_input.d0, lighting.abs_lut_input.disable_d0 == 0,
+ lighting.lut_scale.d0, LightingRegs::LightingSampler::Distribution0);
+ }
+
+ Math::Vec3<float> specular_0 = d0_lut_value * light_config.specular_0.ToVec3f();
+
+ // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
+ if (lighting.config1.disable_lut_rr == 0 &&
+ LightingRegs::IsLightingSamplerSupported(lighting.config0.config,
+ LightingRegs::LightingSampler::ReflectRed)) {
+ refl_value.x =
+ GetLutValue(lighting.lut_input.rr, lighting.abs_lut_input.disable_rr == 0,
+ lighting.lut_scale.rr, LightingRegs::LightingSampler::ReflectRed);
+ } else {
+ refl_value.x = 1.0f;
+ }
+
+ // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
+ if (lighting.config1.disable_lut_rg == 0 &&
+ LightingRegs::IsLightingSamplerSupported(lighting.config0.config,
+ LightingRegs::LightingSampler::ReflectGreen)) {
+ refl_value.y =
+ GetLutValue(lighting.lut_input.rg, lighting.abs_lut_input.disable_rg == 0,
+ lighting.lut_scale.rg, LightingRegs::LightingSampler::ReflectGreen);
+ } else {
+ refl_value.y = refl_value.x;
+ }
+
+ // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
+ if (lighting.config1.disable_lut_rb == 0 &&
+ LightingRegs::IsLightingSamplerSupported(lighting.config0.config,
+ LightingRegs::LightingSampler::ReflectBlue)) {
+ refl_value.z =
+ GetLutValue(lighting.lut_input.rb, lighting.abs_lut_input.disable_rb == 0,
+ lighting.lut_scale.rb, LightingRegs::LightingSampler::ReflectBlue);
+ } else {
+ refl_value.z = refl_value.x;
+ }
+
+ // Specular 1 component
+ float d1_lut_value = 1.0f;
+ if (lighting.config1.disable_lut_d1 == 0 &&
+ LightingRegs::IsLightingSamplerSupported(
+ lighting.config0.config, LightingRegs::LightingSampler::Distribution1)) {
+ d1_lut_value =
+ GetLutValue(lighting.lut_input.d1, lighting.abs_lut_input.disable_d1 == 0,
+ lighting.lut_scale.d1, LightingRegs::LightingSampler::Distribution1);
+ }
+
+ Math::Vec3<float> specular_1 =
+ d1_lut_value * refl_value * light_config.specular_1.ToVec3f();
+
+ // Fresnel
+ if (lighting.config1.disable_lut_fr == 0 &&
+ LightingRegs::IsLightingSamplerSupported(lighting.config0.config,
+ LightingRegs::LightingSampler::Fresnel)) {
+
+ float lut_value =
+ GetLutValue(lighting.lut_input.fr, lighting.abs_lut_input.disable_fr == 0,
+ lighting.lut_scale.fr, LightingRegs::LightingSampler::Fresnel);
+
+ // Enabled for diffuse lighting alpha component
+ if (lighting.config0.fresnel_selector ==
+ LightingRegs::LightingFresnelSelector::PrimaryAlpha ||
+ lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) {
+ diffuse_sum.a() *= lut_value;
+ }
+
+ // Enabled for the specular lighting alpha component
+ if (lighting.config0.fresnel_selector ==
+ LightingRegs::LightingFresnelSelector::SecondaryAlpha ||
+ lighting.config0.fresnel_selector == LightingRegs::LightingFresnelSelector::Both) {
+ specular_sum.a() *= lut_value;
+ }
+ }
+
+ auto dot_product = Math::Dot(light_vector, normal);
+
+ // Calculate clamp highlights before applying the two-sided diffuse configuration to the dot
+ // product.
+ float clamp_highlights = 1.0f;
+ if (lighting.config0.clamp_highlights) {
+ if (dot_product <= 0.0f)
+ clamp_highlights = 0.0f;
+ else
+ clamp_highlights = 1.0f;
+ }
+
+ if (light_config.config.two_sided_diffuse)
+ dot_product = std::abs(dot_product);
+ else
+ dot_product = std::max(dot_product, 0.0f);
+
+ if (light_config.config.geometric_factor_0 || light_config.config.geometric_factor_1) {
+ float geo_factor = half_vector.Length2();
+ geo_factor = geo_factor == 0.0f ? 0.0f : std::min(dot_product / geo_factor, 1.0f);
+ if (light_config.config.geometric_factor_0) {
+ specular_0 *= geo_factor;
+ }
+ if (light_config.config.geometric_factor_1) {
+ specular_1 *= geo_factor;
+ }
+ }
+
+ auto diffuse =
+ light_config.diffuse.ToVec3f() * dot_product + light_config.ambient.ToVec3f();
+ diffuse_sum += Math::MakeVec(diffuse * dist_atten * spot_atten, 0.0f);
+
+ specular_sum += Math::MakeVec(
+ (specular_0 + specular_1) * clamp_highlights * dist_atten * spot_atten, 0.0f);
+ }
+
+ diffuse_sum += Math::MakeVec(lighting.global_ambient.ToVec3f(), 0.0f);
+
+ auto diffuse = Math::MakeVec<float>(MathUtil::Clamp(diffuse_sum.x, 0.0f, 1.0f) * 255,
+ MathUtil::Clamp(diffuse_sum.y, 0.0f, 1.0f) * 255,
+ MathUtil::Clamp(diffuse_sum.z, 0.0f, 1.0f) * 255,
+ MathUtil::Clamp(diffuse_sum.w, 0.0f, 1.0f) * 255)
+ .Cast<u8>();
+ auto specular = Math::MakeVec<float>(MathUtil::Clamp(specular_sum.x, 0.0f, 1.0f) * 255,
+ MathUtil::Clamp(specular_sum.y, 0.0f, 1.0f) * 255,
+ MathUtil::Clamp(specular_sum.z, 0.0f, 1.0f) * 255,
+ MathUtil::Clamp(specular_sum.w, 0.0f, 1.0f) * 255)
+ .Cast<u8>();
+ return std::make_tuple(diffuse, specular);
+}
+
+} // namespace Pica
diff --git a/src/video_core/swrasterizer/lighting.h b/src/video_core/swrasterizer/lighting.h
new file mode 100644
index 000000000..d807a3d94
--- /dev/null
+++ b/src/video_core/swrasterizer/lighting.h
@@ -0,0 +1,19 @@
+// Copyright 2017 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <tuple>
+#include "common/quaternion.h"
+#include "common/vector_math.h"
+#include "video_core/pica_state.h"
+
+namespace Pica {
+
+std::tuple<Math::Vec4<u8>, Math::Vec4<u8>> ComputeFragmentsColors(
+ const Pica::LightingRegs& lighting, const Pica::State::Lighting& lighting_state,
+ const Math::Quaternion<float>& normquat, const Math::Vec3<float>& view,
+ const Math::Vec4<u8> (&texture_color)[4]);
+
+} // namespace Pica
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index 512e81c08..862135614 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -13,6 +13,7 @@
#include "common/logging/log.h"
#include "common/math_util.h"
#include "common/microprofile.h"
+#include "common/quaternion.h"
#include "common/vector_math.h"
#include "core/hw/gpu.h"
#include "core/memory.h"
@@ -24,6 +25,7 @@
#include "video_core/regs_texturing.h"
#include "video_core/shader/shader.h"
#include "video_core/swrasterizer/framebuffer.h"
+#include "video_core/swrasterizer/lighting.h"
#include "video_core/swrasterizer/proctex.h"
#include "video_core/swrasterizer/rasterizer.h"
#include "video_core/swrasterizer/texturing.h"
@@ -419,6 +421,26 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
regs.texturing.tev_combiner_buffer_color.a,
};
+ Math::Vec4<u8> primary_fragment_color = {0, 0, 0, 0};
+ Math::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0};
+
+ if (!g_state.regs.lighting.disable) {
+ Math::Quaternion<float> normquat = Math::Quaternion<float>{
+ {GetInterpolatedAttribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(),
+ GetInterpolatedAttribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(),
+ GetInterpolatedAttribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()},
+ GetInterpolatedAttribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(),
+ }.Normalized();
+
+ Math::Vec3<float> view{
+ GetInterpolatedAttribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(),
+ GetInterpolatedAttribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(),
+ GetInterpolatedAttribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(),
+ };
+ std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors(
+ g_state.regs.lighting, g_state.lighting, normquat, view, texture_color);
+ }
+
for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size();
++tev_stage_index) {
const auto& tev_stage = tev_stages[tev_stage_index];
@@ -427,14 +449,13 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
auto GetSource = [&](Source source) -> Math::Vec4<u8> {
switch (source) {
case Source::PrimaryColor:
+ return primary_color;
- // HACK: Until we implement fragment lighting, use primary_color
case Source::PrimaryFragmentColor:
- return primary_color;
+ return primary_fragment_color;
- // HACK: Until we implement fragment lighting, use zero
case Source::SecondaryFragmentColor:
- return {0, 0, 0, 0};
+ return secondary_fragment_color;
case Source::Texture0:
return texture_color[0];
diff --git a/src/video_core/swrasterizer/rasterizer.h b/src/video_core/swrasterizer/rasterizer.h
index 2f0877581..66cd6cfd4 100644
--- a/src/video_core/swrasterizer/rasterizer.h
+++ b/src/video_core/swrasterizer/rasterizer.h
@@ -19,10 +19,9 @@ struct Vertex : Shader::OutputVertex {
// Linear interpolation
// factor: 0=this, 1=vtx
+ // Note: This function cannot be called after perspective divide
void Lerp(float24 factor, const Vertex& vtx) {
pos = pos * factor + vtx.pos * (float24::FromFloat32(1) - factor);
-
- // TODO: Should perform perspective correct interpolation here...
quat = quat * factor + vtx.quat * (float24::FromFloat32(1) - factor);
color = color * factor + vtx.color * (float24::FromFloat32(1) - factor);
tc0 = tc0 * factor + vtx.tc0 * (float24::FromFloat32(1) - factor);
@@ -30,12 +29,11 @@ struct Vertex : Shader::OutputVertex {
tc0_w = tc0_w * factor + vtx.tc0_w * (float24::FromFloat32(1) - factor);
view = view * factor + vtx.view * (float24::FromFloat32(1) - factor);
tc2 = tc2 * factor + vtx.tc2 * (float24::FromFloat32(1) - factor);
-
- screenpos = screenpos * factor + vtx.screenpos * (float24::FromFloat32(1) - factor);
}
// Linear interpolation
// factor: 0=v0, 1=v1
+ // Note: This function cannot be called after perspective divide
static Vertex Lerp(float24 factor, const Vertex& v0, const Vertex& v1) {
Vertex ret = v0;
ret.Lerp(factor, v1);
diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp
index 4f02b93f2..79b1ce841 100644
--- a/src/video_core/swrasterizer/texturing.cpp
+++ b/src/video_core/swrasterizer/texturing.cpp
@@ -89,6 +89,8 @@ Math::Vec3<u8> GetColorModifier(TevStageConfig::ColorModifier factor,
case ColorModifier::OneMinusSourceBlue:
return (Math::Vec3<u8>(255, 255, 255) - values.bbb()).Cast<u8>();
}
+
+ UNREACHABLE();
};
u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>& values) {
@@ -119,6 +121,8 @@ u8 GetAlphaModifier(TevStageConfig::AlphaModifier factor, const Math::Vec4<u8>&
case AlphaModifier::OneMinusSourceBlue:
return 255 - values.b();
}
+
+ UNREACHABLE();
};
Math::Vec3<u8> ColorCombine(TevStageConfig::Operation op, const Math::Vec3<u8> input[3]) {