14 files changed, 471 insertions, 248 deletions
diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h
index f46db09fb..2d23d34e6 100644
--- a/src/video_core/pica_state.h
+++ b/src/video_core/pica_state.h
@@ -87,12 +87,18 @@ struct State {
             // LUT value, encoded as 12-bit fixed point, with 12 fraction bits
             BitField<0, 12, u32> value; // 0.0.12 fixed point
 
-            // Used by HW for efficient interpolation, Citra does not use these
-            BitField<12, 12, s32> difference; // 1.0.11 fixed point
+            // Used for efficient interpolation.
+            BitField<12, 11, u32> difference; // 0.0.11 fixed point
+            BitField<23, 1, u32> neg_difference;
 
-            float ToFloat() {
+            float ToFloat() const {
                 return static_cast<float>(value) / 4095.f;
             }
+
+            float DiffToFloat() const {
+                float diff = static_cast<float>(difference) / 2047.f;
+                return neg_difference ? -diff : diff;
+            }
         };
 
         std::array<std::array<LutEntry, 256>, 24> luts;
@@ -105,6 +111,14 @@ struct State {
 
             BitField<0, 13, s32> difference; // 1.1.11 fixed point
             BitField<13, 11, u32> value;     // 0.0.11 fixed point
+
+            float ToFloat() const {
+                return static_cast<float>(value) / 2047.0f;
+            }
+
+            float DiffToFloat() const {
+                return static_cast<float>(difference) / 2047.0f;
+            }
         };
 
         std::array<LutEntry, 128> lut;
diff --git a/src/video_core/regs_lighting.h b/src/video_core/regs_lighting.h
index 6793405d9..b89709cfe 100644
--- a/src/video_core/regs_lighting.h
+++ b/src/video_core/regs_lighting.h
@@ -26,6 +26,18 @@ struct LightingRegs {
         DistanceAttenuation = 16,
     };
 
+    static constexpr unsigned NumLightingSampler = 24;
+
+    static LightingSampler SpotlightAttenuationSampler(unsigned index) {
+        return static_cast<LightingSampler>(
+            static_cast<unsigned>(LightingSampler::SpotlightAttenuation) + index);
+    }
+
+    static LightingSampler DistanceAttenuationSampler(unsigned index) {
+        return static_cast<LightingSampler>(
+            static_cast<unsigned>(LightingSampler::DistanceAttenuation) + index);
+    }
+
     /**
     * Pica fragment lighting supports using different LUTs for each lighting component:  Reflectance
     * R, G, and B channels, distribution function for specular components 0 and 1, fresnel factor,
@@ -73,6 +85,8 @@ struct LightingRegs {
         VH = 1, // Cosine of the angle between the view and half-angle vectors
         NV = 2, // Cosine of the angle between the normal and the view vector
         LN = 3, // Cosine of the angle between the light and the normal vectors
+        SP = 4, // Cosine of the angle between the light and the inverse spotlight vectors
+        CP = 5, // Cosine of the angle between the tangent and projection of half-angle vectors
     };
 
     enum class LightingBumpMode : u32 {
@@ -104,6 +118,9 @@ struct LightingRegs {
             return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) &&
                    (config != LightingConfig::Config5);
 
+        case LightingSampler::SpotlightAttenuation:
+            return (config != LightingConfig::Config2) && (config != LightingConfig::Config3);
+
         case LightingSampler::Fresnel:
             return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) &&
                    (config != LightingConfig::Config4);
@@ -116,11 +133,10 @@ struct LightingRegs {
             return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) ||
                    (config == LightingConfig::Config7);
         default:
-            UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached "
-                            "unreachable section, sampler should be one "
-                            "of Distribution0, Distribution1, Fresnel, "
-                            "ReflectRed, ReflectGreen or ReflectBlue, instead "
-                            "got %i",
+            UNREACHABLE_MSG("Regs::IsLightingSamplerSupported: Reached unreachable section, "
+                            "sampler should be one of Distribution0, Distribution1, "
+                            "SpotlightAttenuation, Fresnel, ReflectRed, ReflectGreen or "
+                            "ReflectBlue, instead got %i",
                             static_cast<int>(config));
         }
     }
@@ -140,11 +156,22 @@ struct LightingRegs {
             BitField<0, 16, u32> z;
         };
 
-        INSERT_PADDING_WORDS(0x3);
+        // inverse spotlight direction vector, encoded as fixed1.1.11
+        union {
+            BitField<0, 13, s32> spot_x;
+            BitField<16, 13, s32> spot_y;
+        };
+        union {
+            BitField<0, 13, s32> spot_z;
+        };
+
+        INSERT_PADDING_WORDS(0x1);
 
         union {
             BitField<0, 1, u32> directional;
             BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
+            BitField<2, 1, u32> geometric_factor_0;
+            BitField<3, 1, u32> geometric_factor_1;
         } config;
 
         BitField<0, 20, u32> dist_atten_bias;
@@ -169,8 +196,16 @@ struct LightingRegs {
     } config0;
 
     union {
+        u32 raw;
+
+        // Each bit specifies whether spot light attenuation should be applied for the corresponding
+        // light.
+        BitField<8, 8, u32> disable_spot_atten;
+
         BitField<16, 1, u32> disable_lut_d0;
         BitField<17, 1, u32> disable_lut_d1;
+        // Note: by intuition, BitField<18, 1, u32> should be disable_lut_sp, but it is actually a
+        // dummy bit which is always set as 1.
         BitField<19, 1, u32> disable_lut_fr;
         BitField<20, 1, u32> disable_lut_rr;
         BitField<21, 1, u32> disable_lut_rg;
@@ -178,23 +213,15 @@ struct LightingRegs {
 
         // Each bit specifies whether distance attenuation should be applied for the corresponding
         // light.
-        BitField<24, 1, u32> disable_dist_atten_light_0;
-        BitField<25, 1, u32> disable_dist_atten_light_1;
-        BitField<26, 1, u32> disable_dist_atten_light_2;
-        BitField<27, 1, u32> disable_dist_atten_light_3;
-        BitField<28, 1, u32> disable_dist_atten_light_4;
-        BitField<29, 1, u32> disable_dist_atten_light_5;
-        BitField<30, 1, u32> disable_dist_atten_light_6;
-        BitField<31, 1, u32> disable_dist_atten_light_7;
+        BitField<24, 8, u32> disable_dist_atten;
     } config1;
 
     bool IsDistAttenDisabled(unsigned index) const {
-        const unsigned disable[] = {
-            config1.disable_dist_atten_light_0, config1.disable_dist_atten_light_1,
-            config1.disable_dist_atten_light_2, config1.disable_dist_atten_light_3,
-            config1.disable_dist_atten_light_4, config1.disable_dist_atten_light_5,
-            config1.disable_dist_atten_light_6, config1.disable_dist_atten_light_7};
-        return disable[index] != 0;
+        return (config1.disable_dist_atten & (1 << index)) != 0;
+    }
+
+    bool IsSpotAttenDisabled(unsigned index) const {
+        return (config1.disable_spot_atten & (1 << index)) != 0;
     }
 
     union {
diff --git a/src/video_core/regs_texturing.h b/src/video_core/regs_texturing.h
index 3f5355fa9..0b09f2299 100644
--- a/src/video_core/regs_texturing.h
+++ b/src/video_core/regs_texturing.h
@@ -30,10 +30,10 @@ struct TexturingRegs {
             Repeat = 2,
             MirroredRepeat = 3,
             // Mode 4-7 produces some weird result and may be just invalid:
-            // 4: Positive coord: clamp to edge; negative coord: repeat
-            // 5: Positive coord: clamp to border; negative coord: repeat
-            // 6: Repeat
-            // 7: Repeat
+            ClampToEdge2 = 4,   // Positive coord: clamp to edge; negative coord: repeat
+            ClampToBorder2 = 5, // Positive coord: clamp to border; negative coord: repeat
+            Repeat2 = 6,        // Same as Repeat
+            Repeat3 = 7,        // Same as Repeat
         };
 
         enum TextureFilter : u32 {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index aa9b831dd..ff3f69ba3 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -49,9 +49,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
 
     uniform_block_data.dirty = true;
 
-    for (unsigned index = 0; index < lighting_luts.size(); index++) {
-        uniform_block_data.lut_dirty[index] = true;
-    }
+    uniform_block_data.lut_dirty.fill(true);
 
     uniform_block_data.fog_lut_dirty = true;
 
@@ -96,36 +94,32 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
     framebuffer.Create();
 
     // Allocate and bind lighting lut textures
-    for (size_t i = 0; i < lighting_luts.size(); ++i) {
-        lighting_luts[i].Create();
-        state.lighting_luts[i].texture_1d = lighting_luts[i].handle;
-    }
+    lighting_lut.Create();
+    state.lighting_lut.texture_buffer = lighting_lut.handle;
     state.Apply();
-
-    for (size_t i = 0; i < lighting_luts.size(); ++i) {
-        glActiveTexture(static_cast<GLenum>(GL_TEXTURE3 + i));
-        glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
-        glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-        glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-    }
+    lighting_lut_buffer.Create();
+    glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle);
+    glBufferData(GL_TEXTURE_BUFFER,
+                 sizeof(GLfloat) * 2 * 256 * Pica::LightingRegs::NumLightingSampler, nullptr,
+                 GL_DYNAMIC_DRAW);
+    glActiveTexture(TextureUnits::LightingLUT.Enum());
+    glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, lighting_lut_buffer.handle);
 
     // Setup the LUT for the fog
-    {
-        fog_lut.Create();
-        state.fog_lut.texture_1d = fog_lut.handle;
-    }
+    fog_lut.Create();
+    state.fog_lut.texture_buffer = fog_lut.handle;
     state.Apply();
-
-    glActiveTexture(GL_TEXTURE9);
-    glTexImage1D(GL_TEXTURE_1D, 0, GL_R32UI, 128, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, nullptr);
-    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
-    glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+    fog_lut_buffer.Create();
+    glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle);
+    glBufferData(GL_TEXTURE_BUFFER, sizeof(GLfloat) * 2 * 128, nullptr, GL_DYNAMIC_DRAW);
+    glActiveTexture(TextureUnits::FogLUT.Enum());
+    glTexBuffer(GL_TEXTURE_BUFFER, GL_RG32F, fog_lut_buffer.handle);
 
     // Setup the noise LUT for proctex
     proctex_noise_lut.Create();
     state.proctex_noise_lut.texture_1d = proctex_noise_lut.handle;
     state.Apply();
-    glActiveTexture(GL_TEXTURE10);
+    glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum());
     glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -134,7 +128,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
     proctex_color_map.Create();
     state.proctex_color_map.texture_1d = proctex_color_map.handle;
     state.Apply();
-    glActiveTexture(GL_TEXTURE11);
+    glActiveTexture(TextureUnits::ProcTexColorMap.Enum());
     glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -143,7 +137,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
     proctex_alpha_map.Create();
     state.proctex_alpha_map.texture_1d = proctex_alpha_map.handle;
     state.Apply();
-    glActiveTexture(GL_TEXTURE12);
+    glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum());
     glTexImage1D(GL_TEXTURE_1D, 0, GL_RG32F, 128, 0, GL_RG, GL_FLOAT, nullptr);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -152,7 +146,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
     proctex_lut.Create();
     state.proctex_lut.texture_1d = proctex_lut.handle;
     state.Apply();
-    glActiveTexture(GL_TEXTURE13);
+    glActiveTexture(TextureUnits::ProcTexLUT.Enum());
     glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -161,7 +155,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
     proctex_diff_lut.Create();
     state.proctex_diff_lut.texture_1d = proctex_diff_lut.handle;
     state.Apply();
-    glActiveTexture(GL_TEXTURE14);
+    glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
     glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
     glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
@@ -182,19 +176,22 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) {
 RasterizerOpenGL::~RasterizerOpenGL() {}
 
 /**
- * This is a helper function to resolve an issue with opposite quaternions being interpolated by
- * OpenGL. See below for a detailed description of this issue (yuriks):
+ * This is a helper function to resolve an issue when interpolating opposite quaternions. See below
+ * for a detailed description of this issue (yuriks):
  *
  * For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
  * interpolate two quaternions that are opposite, instead of going from one rotation to another
  * using the shortest path, you'll go around the longest path. You can test if two quaternions are
- * opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore
- * making Dot(-Q1, W2) positive.
+ * opposite by checking if Dot(Q1, Q2) < 0. In that case, you can flip either of them, therefore
+ * making Dot(Q1, -Q2) positive.
+ *
+ * This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This is
+ * correct for most cases but can still rotate around the long way sometimes. An implementation
+ * which did `lerp(lerp(Q1, Q2), Q3)` (with proper weighting), applying the dot product check
+ * between each step would work for those cases at the cost of being more complex to implement.
  *
- * NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This
- * should be correct for nearly all cases, however a more correct implementation (but less trivial
- * and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions
- * manually using two Lerps, and doing this correction before each Lerp.
+ * Fortunately however, the 3DS hardware happens to also use this exact same logic to work around
+ * these issues, making this basic implementation actually more accurate to the hardware.
  */
 static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) {
     Math::Vec4f a{qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32()};
@@ -310,7 +307,7 @@ void RasterizerOpenGL::DrawTriangles() {
     }
 
     // Sync the lighting luts
-    for (unsigned index = 0; index < lighting_luts.size(); index++) {
+    for (unsigned index = 0; index < uniform_block_data.lut_dirty.size(); index++) {
         if (uniform_block_data.lut_dirty[index]) {
             SyncLightingLUT(index);
             uniform_block_data.lut_dirty[index] = false;
@@ -735,6 +732,40 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
         SyncLightPosition(7);
         break;
 
+    // Fragment spot lighting direction
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[0].spot_x, 0x146 + 0 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[0].spot_z, 0x147 + 0 * 0x10):
+        SyncLightSpotDirection(0);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[1].spot_x, 0x146 + 1 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[1].spot_z, 0x147 + 1 * 0x10):
+        SyncLightSpotDirection(1);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[2].spot_x, 0x146 + 2 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[2].spot_z, 0x147 + 2 * 0x10):
+        SyncLightSpotDirection(2);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[3].spot_x, 0x146 + 3 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[3].spot_z, 0x147 + 3 * 0x10):
+        SyncLightSpotDirection(3);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[4].spot_x, 0x146 + 4 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[4].spot_z, 0x147 + 4 * 0x10):
+        SyncLightSpotDirection(4);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[5].spot_x, 0x146 + 5 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[5].spot_z, 0x147 + 5 * 0x10):
+        SyncLightSpotDirection(5);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[6].spot_x, 0x146 + 6 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[6].spot_z, 0x147 + 6 * 0x10):
+        SyncLightSpotDirection(6);
+        break;
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[7].spot_x, 0x146 + 7 * 0x10):
+    case PICA_REG_INDEX_WORKAROUND(lighting.light[7].spot_z, 0x147 + 7 * 0x10):
+        SyncLightSpotDirection(7);
+        break;
+
     // Fragment lighting light source config
     case PICA_REG_INDEX_WORKAROUND(lighting.light[0].config, 0x149 + 0 * 0x10):
     case PICA_REG_INDEX_WORKAROUND(lighting.light[1].config, 0x149 + 1 * 0x10):
@@ -814,7 +845,7 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
     case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
     case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf): {
         auto& lut_config = regs.lighting.lut_config;
-        uniform_block_data.lut_dirty[lut_config.type / 4] = true;
+        uniform_block_data.lut_dirty[lut_config.type] = true;
         break;
     }
     }
@@ -1150,77 +1181,57 @@ void RasterizerOpenGL::SetShader() {
         state.Apply();
 
         // Set the texture samplers to correspond to different texture units
-        GLuint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]");
+        GLint uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[0]");
         if (uniform_tex != -1) {
-            glUniform1i(uniform_tex, 0);
+            glUniform1i(uniform_tex, TextureUnits::PicaTexture(0).id);
         }
         uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[1]");
         if (uniform_tex != -1) {
-            glUniform1i(uniform_tex, 1);
+            glUniform1i(uniform_tex, TextureUnits::PicaTexture(1).id);
         }
         uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
         if (uniform_tex != -1) {
-            glUniform1i(uniform_tex, 2);
+            glUniform1i(uniform_tex, TextureUnits::PicaTexture(2).id);
         }
 
         // Set the texture samplers to correspond to different lookup table texture units
-        GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]");
+        GLint uniform_lut = glGetUniformLocation(shader->shader.handle, "lighting_lut");
         if (uniform_lut != -1) {
-            glUniform1i(uniform_lut, 3);
-        }
-        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]");
-        if (uniform_lut != -1) {
-            glUniform1i(uniform_lut, 4);
-        }
-        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]");
-        if (uniform_lut != -1) {
-            glUniform1i(uniform_lut, 5);
-        }
-        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]");
-        if (uniform_lut != -1) {
-            glUniform1i(uniform_lut, 6);
-        }
-        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]");
-        if (uniform_lut != -1) {
-            glUniform1i(uniform_lut, 7);
-        }
-        uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
-        if (uniform_lut != -1) {
-            glUniform1i(uniform_lut, 8);
+            glUniform1i(uniform_lut, TextureUnits::LightingLUT.id);
         }
 
-        GLuint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
+        GLint uniform_fog_lut = glGetUniformLocation(shader->shader.handle, "fog_lut");
         if (uniform_fog_lut != -1) {
-            glUniform1i(uniform_fog_lut, 9);
+            glUniform1i(uniform_fog_lut, TextureUnits::FogLUT.id);
         }
 
-        GLuint uniform_proctex_noise_lut =
+        GLint uniform_proctex_noise_lut =
             glGetUniformLocation(shader->shader.handle, "proctex_noise_lut");
         if (uniform_proctex_noise_lut != -1) {
-            glUniform1i(uniform_proctex_noise_lut, 10);
+            glUniform1i(uniform_proctex_noise_lut, TextureUnits::ProcTexNoiseLUT.id);
         }
 
-        GLuint uniform_proctex_color_map =
+        GLint uniform_proctex_color_map =
             glGetUniformLocation(shader->shader.handle, "proctex_color_map");
         if (uniform_proctex_color_map != -1) {
-            glUniform1i(uniform_proctex_color_map, 11);
+            glUniform1i(uniform_proctex_color_map, TextureUnits::ProcTexColorMap.id);
         }
 
-        GLuint uniform_proctex_alpha_map =
+        GLint uniform_proctex_alpha_map =
             glGetUniformLocation(shader->shader.handle, "proctex_alpha_map");
         if (uniform_proctex_alpha_map != -1) {
-            glUniform1i(uniform_proctex_alpha_map, 12);
+            glUniform1i(uniform_proctex_alpha_map, TextureUnits::ProcTexAlphaMap.id);
         }
 
-        GLuint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut");
+        GLint uniform_proctex_lut = glGetUniformLocation(shader->shader.handle, "proctex_lut");
         if (uniform_proctex_lut != -1) {
-            glUniform1i(uniform_proctex_lut, 13);
+            glUniform1i(uniform_proctex_lut, TextureUnits::ProcTexLUT.id);
         }
 
-        GLuint uniform_proctex_diff_lut =
+        GLint uniform_proctex_diff_lut =
             glGetUniformLocation(shader->shader.handle, "proctex_diff_lut");
         if (uniform_proctex_diff_lut != -1) {
-            glUniform1i(uniform_proctex_diff_lut, 14);
+            glUniform1i(uniform_proctex_diff_lut, TextureUnits::ProcTexDiffLUT.id);
         }
 
         current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
@@ -1343,16 +1354,17 @@ void RasterizerOpenGL::SyncFogColor() {
 }
 
 void RasterizerOpenGL::SyncFogLUT() {
-    std::array<GLuint, 128> new_data;
+    std::array<GLvec2, 128> new_data;
 
     std::transform(Pica::g_state.fog.lut.begin(), Pica::g_state.fog.lut.end(), new_data.begin(),
-                   [](const auto& entry) { return entry.raw; });
+                   [](const auto& entry) {
+                       return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
+                   });
 
     if (new_data != fog_lut_data) {
         fog_lut_data = new_data;
-        glActiveTexture(GL_TEXTURE9);
-        glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 128, GL_RED_INTEGER, GL_UNSIGNED_INT,
-                        fog_lut_data.data());
+        glBindBuffer(GL_TEXTURE_BUFFER, fog_lut_buffer.handle);
+        glBufferSubData(GL_TEXTURE_BUFFER, 0, new_data.size() * sizeof(GLvec2), new_data.data());
     }
 }
 
@@ -1389,17 +1401,18 @@ static void SyncProcTexValueLUT(const std::array<Pica::State::ProcTex::ValueEntr
 }
 
 void RasterizerOpenGL::SyncProcTexNoiseLUT() {
-    SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data, GL_TEXTURE10);
+    SyncProcTexValueLUT(Pica::g_state.proctex.noise_table, proctex_noise_lut_data,
+                        TextureUnits::ProcTexNoiseLUT.Enum());
 }
 
 void RasterizerOpenGL::SyncProcTexColorMap() {
     SyncProcTexValueLUT(Pica::g_state.proctex.color_map_table, proctex_color_map_data,
-                        GL_TEXTURE11);
+                        TextureUnits::ProcTexColorMap.Enum());
 }
 
 void RasterizerOpenGL::SyncProcTexAlphaMap() {
     SyncProcTexValueLUT(Pica::g_state.proctex.alpha_map_table, proctex_alpha_map_data,
-                        GL_TEXTURE12);
+                        TextureUnits::ProcTexAlphaMap.Enum());
 }
 
 void RasterizerOpenGL::SyncProcTexLUT() {
@@ -1414,7 +1427,7 @@ void RasterizerOpenGL::SyncProcTexLUT() {
 
     if (new_data != proctex_lut_data) {
         proctex_lut_data = new_data;
-        glActiveTexture(GL_TEXTURE13);
+        glActiveTexture(TextureUnits::ProcTexLUT.Enum());
         glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_lut_data.data());
     }
 }
@@ -1431,7 +1444,7 @@ void RasterizerOpenGL::SyncProcTexDiffLUT() {
 
     if (new_data != proctex_diff_lut_data) {
         proctex_diff_lut_data = new_data;
-        glActiveTexture(GL_TEXTURE14);
+        glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
         glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, proctex_diff_lut_data.data());
     }
 }
@@ -1534,20 +1547,17 @@ void RasterizerOpenGL::SyncGlobalAmbient() {
 }
 
 void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
-    std::array<GLvec4, 256> new_data;
-
-    for (unsigned offset = 0; offset < new_data.size(); ++offset) {
-        new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat();
-        new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat();
-        new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat();
-        new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat();
-    }
+    std::array<GLvec2, 256> new_data;
+    const auto& source_lut = Pica::g_state.lighting.luts[lut_index];
+    std::transform(source_lut.begin(), source_lut.end(), new_data.begin(), [](const auto& entry) {
+        return GLvec2{entry.ToFloat(), entry.DiffToFloat()};
+    });
 
     if (new_data != lighting_lut_data[lut_index]) {
         lighting_lut_data[lut_index] = new_data;
-        glActiveTexture(GL_TEXTURE3 + lut_index);
-        glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT,
-                        lighting_lut_data[lut_index].data());
+        glBindBuffer(GL_TEXTURE_BUFFER, lighting_lut_buffer.handle);
+        glBufferSubData(GL_TEXTURE_BUFFER, lut_index * new_data.size() * sizeof(GLvec2),
+                        new_data.size() * sizeof(GLvec2), new_data.data());
     }
 }
 
@@ -1595,6 +1605,17 @@ void RasterizerOpenGL::SyncLightPosition(int light_index) {
     }
 }
 
+void RasterizerOpenGL::SyncLightSpotDirection(int light_index) {
+    const auto& light = Pica::g_state.regs.lighting.light[light_index];
+    GLvec3 spot_direction = {light.spot_x / 2047.0f, light.spot_y / 2047.0f,
+                             light.spot_z / 2047.0f};
+
+    if (spot_direction != uniform_block_data.data.light_src[light_index].spot_direction) {
+        uniform_block_data.data.light_src[light_index].spot_direction = spot_direction;
+        uniform_block_data.dirty = true;
+    }
+}
+
 void RasterizerOpenGL::SyncLightDistanceAttenuationBias(int light_index) {
     GLfloat dist_atten_bias =
         Pica::float20::FromRaw(Pica::g_state.regs.lighting.light[light_index].dist_atten_bias)
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index a9ad7d660..a433c1d4a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -125,6 +125,7 @@ private:
         alignas(16) GLvec3 diffuse;
         alignas(16) GLvec3 ambient;
         alignas(16) GLvec3 position;
+        alignas(16) GLvec3 spot_direction; // negated
         GLfloat dist_atten_bias;
         GLfloat dist_atten_scale;
     };
@@ -153,7 +154,7 @@ private:
     };
 
     static_assert(
-        sizeof(UniformData) == 0x3E0,
+        sizeof(UniformData) == 0x460,
         "The size of the UniformData structure has changed, update the structure in the shader");
     static_assert(sizeof(UniformData) < 16384,
                   "UniformData structure must be less than 16kb as per the OpenGL spec");
@@ -241,6 +242,9 @@ private:
     /// Syncs the specified light's position to match the PICA register
     void SyncLightPosition(int light_index);
 
+    /// Syncs the specified spot light direcition to match the PICA register
+    void SyncLightSpotDirection(int light_index);
+
     /// Syncs the specified light's distance attenuation bias to match the PICA register
     void SyncLightDistanceAttenuationBias(int light_index);
 
@@ -259,7 +263,7 @@ private:
 
     struct {
         UniformData data;
-        bool lut_dirty[6];
+        std::array<bool, Pica::LightingRegs::NumLightingSampler> lut_dirty;
         bool fog_lut_dirty;
         bool proctex_noise_lut_dirty;
         bool proctex_color_map_dirty;
@@ -275,11 +279,13 @@ private:
     OGLBuffer uniform_buffer;
     OGLFramebuffer framebuffer;
 
-    std::array<OGLTexture, 6> lighting_luts;
-    std::array<std::array<GLvec4, 256>, 6> lighting_lut_data{};
+    OGLBuffer lighting_lut_buffer;
+    OGLTexture lighting_lut;
+    std::array<std::array<GLvec2, 256>, Pica::LightingRegs::NumLightingSampler> lighting_lut_data{};
 
+    OGLBuffer fog_lut_buffer;
     OGLTexture fog_lut;
-    std::array<GLuint, 128> fog_lut_data{};
+    std::array<GLvec2, 128> fog_lut_data{};
 
     OGLTexture proctex_noise_lut;
     std::array<GLvec2, 128> proctex_noise_lut_data{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index ffe419863..c93b108fb 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -73,8 +73,12 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) {
         state.lighting.light[light_index].num = num;
         state.lighting.light[light_index].directional = light.config.directional != 0;
         state.lighting.light[light_index].two_sided_diffuse = light.config.two_sided_diffuse != 0;
+        state.lighting.light[light_index].geometric_factor_0 = light.config.geometric_factor_0 != 0;
+        state.lighting.light[light_index].geometric_factor_1 = light.config.geometric_factor_1 != 0;
         state.lighting.light[light_index].dist_atten_enable =
             !regs.lighting.IsDistAttenDisabled(num);
+        state.lighting.light[light_index].spot_atten_enable =
+            !regs.lighting.IsSpotAttenDisabled(num);
     }
 
     state.lighting.lut_d0.enable = regs.lighting.config1.disable_lut_d0 == 0;
@@ -87,6 +91,12 @@ PicaShaderConfig PicaShaderConfig::BuildFromRegs(const Pica::Regs& regs) {
     state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
     state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
 
+    // this is a dummy field due to lack of the corresponding register
+    state.lighting.lut_sp.enable = true;
+    state.lighting.lut_sp.abs_input = regs.lighting.abs_lut_input.disable_sp == 0;
+    state.lighting.lut_sp.type = regs.lighting.lut_input.sp.Value();
+    state.lighting.lut_sp.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.sp);
+
     state.lighting.lut_fr.enable = regs.lighting.config1.disable_lut_fr == 0;
     state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
     state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
@@ -509,14 +519,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
     out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
            "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
            "vec3 light_vector = vec3(0.0);\n"
-           "vec3 refl_value = vec3(0.0);\n";
-
-    // Compute fragment normals
+           "vec3 refl_value = vec3(0.0);\n"
+           "vec3 spot_dir = vec3(0.0);\n"
+           "vec3 half_vector = vec3(0.0);\n"
+           "float geo_factor = 1.0;\n";
+
+    // Compute fragment normals and tangents
+    const std::string pertubation =
+        "2.0 * (" + SampleTexture(config, lighting.bump_selector) + ").rgb - 1.0";
     if (lighting.bump_mode == LightingRegs::LightingBumpMode::NormalMap) {
-        // Bump mapping is enabled using a normal map, read perturbation vector from the selected
-        // texture
-        out += "vec3 surface_normal = 2.0 * (" + SampleTexture(config, lighting.bump_selector) +
-               ").rgb - 1.0;\n";
+        // Bump mapping is enabled using a normal map
+        out += "vec3 surface_normal = " + pertubation + ";\n";
 
         // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher
         // precision result
@@ -525,31 +538,41 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
                 "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
             out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
         }
+
+        // The tangent vector is not perturbed by the normal map and is just a unit vector.
+        out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
     } else if (lighting.bump_mode == LightingRegs::LightingBumpMode::TangentMap) {
         // Bump mapping is enabled using a tangent map
-        LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
-        UNIMPLEMENTED();
+        out += "vec3 surface_tangent = " + pertubation + ";\n";
+        // Mathematically, recomputing Z-component of the tangent vector won't affect the relevant
+        // computation below, which is also confirmed on 3DS. So we don't bother recomputing here
+        // even if 'renorm' is enabled.
+
+        // The normal vector is not perturbed by the tangent map and is just a unit vector.
+        out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
     } else {
-        // No bump mapping - surface local normal is just a unit normal
+        // No bump mapping - surface local normal and tangent are just unit vectors
         out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
+        out += "vec3 surface_tangent = vec3(1.0, 0.0, 0.0);\n";
     }
 
     // Rotate the surface-local normal by the interpolated normal quaternion to convert it to
     // eyespace.
-    out += "vec3 normal = quaternion_rotate(normalize(normquat), surface_normal);\n";
+    out += "vec4 normalized_normquat = normalize(normquat);\n";
+    out += "vec3 normal = quaternion_rotate(normalized_normquat, surface_normal);\n";
+    out += "vec3 tangent = quaternion_rotate(normalized_normquat, surface_tangent);\n";
 
-    // Gets the index into the specified lookup table for specular lighting
-    auto GetLutIndex = [&lighting](unsigned light_num, LightingRegs::LightingLutInput input,
-                                   bool abs) {
-        const std::string half_angle = "normalize(normalize(view) + light_vector)";
+    // Samples the specified lookup table for specular lighting
+    auto GetLutValue = [&lighting](LightingRegs::LightingSampler sampler, unsigned light_num,
+                                   LightingRegs::LightingLutInput input, bool abs) {
         std::string index;
         switch (input) {
         case LightingRegs::LightingLutInput::NH:
-            index = "dot(normal, " + half_angle + ")";
+            index = "dot(normal, normalize(half_vector))";
             break;
 
         case LightingRegs::LightingLutInput::VH:
-            index = std::string("dot(normalize(view), " + half_angle + ")");
+            index = std::string("dot(normalize(view), normalize(half_vector))");
             break;
 
         case LightingRegs::LightingLutInput::NV:
@@ -560,6 +583,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
             index = std::string("dot(light_vector, normal)");
             break;
 
+        case LightingRegs::LightingLutInput::SP:
+            index = std::string("dot(light_vector, spot_dir)");
+            break;
+
+        case LightingRegs::LightingLutInput::CP:
+            // CP input is only available with configuration 7
+            if (lighting.config == LightingRegs::LightingConfig::Config7) {
+                // Note: even if the normal vector is modified by normal map, which is not the
+                // normal of the tangent plane anymore, the half angle vector is still projected
+                // using the modified normal vector.
+                std::string half_angle_proj = "normalize(half_vector) - normal / dot(normal, "
+                                              "normal) * dot(normal, normalize(half_vector))";
+                // Note: the half angle vector projection is confirmed not normalized before the dot
+                // product. The result is in fact not cos(phi) as the name suggested.
+                index = "dot(" + half_angle_proj + ", tangent)";
+            } else {
+                index = "0.0";
+            }
+            break;
+
         default:
             LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input);
             UNIMPLEMENTED();
@@ -567,22 +610,18 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
             break;
         }
 
+        std::string sampler_string = std::to_string(static_cast<unsigned>(sampler));
+
         if (abs) {
             // LUT index is in the range of (0.0, 1.0)
             index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")"
                                                                 : "max(" + index + ", 0.0)";
+            return "LookupLightingLUTUnsigned(" + sampler_string + ", " + index + ")";
         } else {
             // LUT index is in the range of (-1.0, 1.0)
-            index = "((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0";
+            return "LookupLightingLUTSigned(" + sampler_string + ", " + index + ")";
         }
 
-        return "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))";
-    };
-
-    // Gets the lighting lookup table value given the specified sampler and index
-    auto GetLutValue = [](LightingRegs::LightingSampler sampler, std::string lut_index) {
-        return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " +
-                           lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]");
     };
 
     // Write the code to emulate each enabled light
@@ -596,48 +635,71 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         else
             out += "light_vector = normalize(" + light_src + ".position + view);\n";
 
+        out += "spot_dir = " + light_src + ".spot_direction;\n";
+        out += "half_vector = normalize(view) + light_vector;\n";
+
         // Compute dot product of light_vector and normal, adjust if lighting is one-sided or
         // two-sided
         std::string dot_product = light_config.two_sided_diffuse
                                       ? "abs(dot(light_vector, normal))"
                                       : "max(dot(light_vector, normal), 0.0)";
 
+        // If enabled, compute spot light attenuation value
+        std::string spot_atten = "1.0";
+        if (light_config.spot_atten_enable &&
+            LightingRegs::IsLightingSamplerSupported(
+                lighting.config, LightingRegs::LightingSampler::SpotlightAttenuation)) {
+            std::string value =
+                GetLutValue(LightingRegs::SpotlightAttenuationSampler(light_config.num),
+                            light_config.num, lighting.lut_sp.type, lighting.lut_sp.abs_input);
+            spot_atten = "(" + std::to_string(lighting.lut_sp.scale) + " * " + value + ")";
+        }
+
         // If enabled, compute distance attenuation value
         std::string dist_atten = "1.0";
         if (light_config.dist_atten_enable) {
-            std::string index = "(" + light_src + ".dist_atten_scale * length(-view - " +
-                                light_src + ".position) + " + light_src + ".dist_atten_bias)";
-            index = "(OFFSET_256 + SCALE_256 * clamp(" + index + ", 0.0, 1.0))";
-            const unsigned lut_num =
-                ((unsigned)LightingRegs::LightingSampler::DistanceAttenuation + light_config.num);
-            dist_atten = GetLutValue((LightingRegs::LightingSampler)lut_num, index);
+            std::string index = "clamp(" + light_src + ".dist_atten_scale * length(-view - " +
+                                light_src + ".position) + " + light_src +
+                                ".dist_atten_bias, 0.0, 1.0)";
+            auto sampler = LightingRegs::DistanceAttenuationSampler(light_config.num);
+            dist_atten = "LookupLightingLUTUnsigned(" +
+                         std::to_string(static_cast<unsigned>(sampler)) + "," + index + ")";
         }
 
         // If enabled, clamp specular component if lighting result is negative
         std::string clamp_highlights =
             lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
 
+        if (light_config.geometric_factor_0 || light_config.geometric_factor_1) {
+            out += "geo_factor = dot(half_vector, half_vector);\n"
+                   "geo_factor = geo_factor == 0.0 ? 0.0 : min(" +
+                   dot_product + " / geo_factor, 1.0);\n";
+        }
+
         // Specular 0 component
         std::string d0_lut_value = "1.0";
         if (lighting.lut_d0.enable &&
             LightingRegs::IsLightingSamplerSupported(
                 lighting.config, LightingRegs::LightingSampler::Distribution0)) {
             // Lookup specular "distribution 0" LUT value
-            std::string index =
-                GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input);
-            d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " +
-                           GetLutValue(LightingRegs::LightingSampler::Distribution0, index) + ")";
+            std::string value =
+                GetLutValue(LightingRegs::LightingSampler::Distribution0, light_config.num,
+                            lighting.lut_d0.type, lighting.lut_d0.abs_input);
+            d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + value + ")";
         }
         std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
+        if (light_config.geometric_factor_0) {
+            specular_0 = "(" + specular_0 + " * geo_factor)";
+        }
 
         // If enabled, lookup ReflectRed value, otherwise, 1.0 is used
         if (lighting.lut_rr.enable &&
             LightingRegs::IsLightingSamplerSupported(lighting.config,
                                                      LightingRegs::LightingSampler::ReflectRed)) {
-            std::string index =
-                GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input);
-            std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " +
-                                GetLutValue(LightingRegs::LightingSampler::ReflectRed, index) + ")";
+            std::string value =
+                GetLutValue(LightingRegs::LightingSampler::ReflectRed, light_config.num,
+                            lighting.lut_rr.type, lighting.lut_rr.abs_input);
+            value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + value + ")";
             out += "refl_value.r = " + value + ";\n";
         } else {
             out += "refl_value.r = 1.0;\n";
@@ -647,11 +709,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         if (lighting.lut_rg.enable &&
             LightingRegs::IsLightingSamplerSupported(lighting.config,
                                                      LightingRegs::LightingSampler::ReflectGreen)) {
-            std::string index =
-                GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input);
-            std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " +
-                                GetLutValue(LightingRegs::LightingSampler::ReflectGreen, index) +
-                                ")";
+            std::string value =
+                GetLutValue(LightingRegs::LightingSampler::ReflectGreen, light_config.num,
+                            lighting.lut_rg.type, lighting.lut_rg.abs_input);
+            value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + value + ")";
             out += "refl_value.g = " + value + ";\n";
         } else {
             out += "refl_value.g = refl_value.r;\n";
@@ -661,11 +722,10 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
         if (lighting.lut_rb.enable &&
             LightingRegs::IsLightingSamplerSupported(lighting.config,
                                                      LightingRegs::LightingSampler::ReflectBlue)) {
-            std::string index =
-                GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input);
-            std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " +
-                                GetLutValue(LightingRegs::LightingSampler::ReflectBlue, index) +
-                                ")";
+            std::string value =
+                GetLutValue(LightingRegs::LightingSampler::ReflectBlue, light_config.num,
+                            lighting.lut_rb.type, lighting.lut_rb.abs_input);
+            value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + value + ")";
             out += "refl_value.b = " + value + ";\n";
         } else {
             out += "refl_value.b = refl_value.r;\n";
@@ -677,23 +737,26 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
             LightingRegs::IsLightingSamplerSupported(
                 lighting.config, LightingRegs::LightingSampler::Distribution1)) {
             // Lookup specular "distribution 1" LUT value
-            std::string index =
-                GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input);
-            d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " +
-                           GetLutValue(LightingRegs::LightingSampler::Distribution1, index) + ")";
+            std::string value =
+                GetLutValue(LightingRegs::LightingSampler::Distribution1, light_config.num,
+                            lighting.lut_d1.type, lighting.lut_d1.abs_input);
+            d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + value + ")";
         }
         std::string specular_1 =
             "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
+        if (light_config.geometric_factor_1) {
+            specular_1 = "(" + specular_1 + " * geo_factor)";
+        }
 
         // Fresnel
         if (lighting.lut_fr.enable &&
             LightingRegs::IsLightingSamplerSupported(lighting.config,
                                                      LightingRegs::LightingSampler::Fresnel)) {
             // Lookup fresnel LUT value
-            std::string index =
-                GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input);
-            std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " +
-                                GetLutValue(LightingRegs::LightingSampler::Fresnel, index) + ")";
+            std::string value =
+                GetLutValue(LightingRegs::LightingSampler::Fresnel, light_config.num,
+                            lighting.lut_fr.type, lighting.lut_fr.abs_input);
+            value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + value + ")";
 
             // Enabled for difffuse lighting alpha component
             if (lighting.fresnel_selector == LightingRegs::LightingFresnelSelector::PrimaryAlpha ||
@@ -711,11 +774,11 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
 
         // Compute primary fragment color (diffuse lighting) function
         out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " +
-               light_src + ".ambient) * " + dist_atten + ";\n";
+               light_src + ".ambient) * " + dist_atten + " * " + spot_atten + ";\n";
 
         // Compute secondary fragment color (specular lighting) function
         out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " +
-               clamp_highlights + " * " + dist_atten + ";\n";
+               clamp_highlights + " * " + dist_atten + " * " + spot_atten + ";\n";
     }
 
     // Sum final lighting result
@@ -947,10 +1010,6 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) {
 #define NUM_TEV_STAGES 6
 #define NUM_LIGHTS 8
 
-// Texture coordinate offsets and scales
-#define OFFSET_256 (0.5 / 256.0)
-#define SCALE_256 (255.0 / 256.0)
-
 in vec4 primary_color;
 in vec2 texcoord[3];
 in float texcoord0_w;
@@ -967,6 +1026,7 @@ struct LightSrc {
     vec3 diffuse;
     vec3 ambient;
     vec3 position;
+    vec3 spot_direction;
     float dist_atten_bias;
     float dist_atten_scale;
 };
@@ -991,8 +1051,8 @@ layout (std140) uniform shader_data {
 };
 
 uniform sampler2D tex[3];
-uniform sampler1D lut[6];
-uniform usampler1D fog_lut;
+uniform samplerBuffer lighting_lut;
+uniform samplerBuffer fog_lut;
 uniform sampler1D proctex_noise_lut;
 uniform sampler1D proctex_color_map;
 uniform sampler1D proctex_alpha_map;
@@ -1004,6 +1064,24 @@ vec3 quaternion_rotate(vec4 q, vec3 v) {
     return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
 }
 
+float LookupLightingLUT(int lut_index, int index, float delta) {
+    vec2 entry = texelFetch(lighting_lut, lut_index * 256 + index).rg;
+    return entry.r + entry.g * delta;
+}
+
+float LookupLightingLUTUnsigned(int lut_index, float pos) {
+    int index = clamp(int(pos * 256.0), 0, 255);
+    float delta = pos * 256.0 - index;
+    return LookupLightingLUT(lut_index, index, delta);
+}
+
+float LookupLightingLUTSigned(int lut_index, float pos) {
+    int index = clamp(int(pos * 128.0), -128, 127);
+    float delta = pos * 128.0 - index;
+    if (index < 0) index += 256;
+    return LookupLightingLUT(lut_index, index, delta);
+}
+
 )";
 
     if (config.state.proctex.enable)
@@ -1067,12 +1145,8 @@ vec4 secondary_fragment_color = vec4(0.0);
         // Generate clamped fog factor from LUT for given fog index
         out += "float fog_i = clamp(floor(fog_index), 0.0, 127.0);\n";
         out += "float fog_f = fog_index - fog_i;\n";
-        out += "uint fog_lut_entry = texelFetch(fog_lut, int(fog_i), 0).r;\n";
-        out += "float fog_lut_entry_difference = float(int((fog_lut_entry & 0x1FFFU) << 19U) >> "
-               "19);\n"; // Extract signed difference
-        out += "float fog_lut_entry_value = float((fog_lut_entry >> 13U) & 0x7FFU);\n";
-        out += "float fog_factor = (fog_lut_entry_value + fog_lut_entry_difference * fog_f) / "
-               "2047.0;\n";
+        out += "vec2 fog_lut_entry = texelFetch(fog_lut, int(fog_i)).rg;\n";
+        out += "float fog_factor = fog_lut_entry.r + fog_lut_entry.g * fog_f;\n";
         out += "fog_factor = clamp(fog_factor, 0.0, 1.0);\n";
 
         // Blend the fog
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
index ea6d216d1..2302ae453 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ b/src/video_core/renderer_opengl/gl_shader_gen.h
@@ -93,6 +93,9 @@ union PicaShaderConfig {
                 bool directional;
                 bool two_sided_diffuse;
                 bool dist_atten_enable;
+                bool spot_atten_enable;
+                bool geometric_factor_0;
+                bool geometric_factor_1;
             } light[8];
 
             bool enable;
@@ -110,7 +113,7 @@ union PicaShaderConfig {
                 bool abs_input;
                 Pica::LightingRegs::LightingLutInput type;
                 float scale;
-            } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
+            } lut_d0, lut_d1, lut_sp, lut_fr, lut_rr, lut_rg, lut_rb;
         } lighting;
 
         struct {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index bf837a7fb..eface2dea 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -52,11 +52,9 @@ OpenGLState::OpenGLState() {
         texture_unit.sampler = 0;
     }
 
-    for (auto& lut : lighting_luts) {
-        lut.texture_1d = 0;
-    }
+    lighting_lut.texture_buffer = 0;
 
-    fog_lut.texture_1d = 0;
+    fog_lut.texture_buffer = 0;
 
     proctex_lut.texture_1d = 0;
     proctex_diff_lut.texture_1d = 0;
@@ -185,7 +183,7 @@ void OpenGLState::Apply() const {
     // Textures
     for (unsigned i = 0; i < ARRAY_SIZE(texture_units); ++i) {
         if (texture_units[i].texture_2d != cur_state.texture_units[i].texture_2d) {
-            glActiveTexture(GL_TEXTURE0 + i);
+            glActiveTexture(TextureUnits::PicaTexture(i).Enum());
             glBindTexture(GL_TEXTURE_2D, texture_units[i].texture_2d);
         }
         if (texture_units[i].sampler != cur_state.texture_units[i].sampler) {
@@ -194,46 +192,44 @@ void OpenGLState::Apply() const {
     }
 
     // Lighting LUTs
-    for (unsigned i = 0; i < ARRAY_SIZE(lighting_luts); ++i) {
-        if (lighting_luts[i].texture_1d != cur_state.lighting_luts[i].texture_1d) {
-            glActiveTexture(GL_TEXTURE3 + i);
-            glBindTexture(GL_TEXTURE_1D, lighting_luts[i].texture_1d);
-        }
+    if (lighting_lut.texture_buffer != cur_state.lighting_lut.texture_buffer) {
+        glActiveTexture(TextureUnits::LightingLUT.Enum());
+        glBindTexture(GL_TEXTURE_BUFFER, cur_state.lighting_lut.texture_buffer);
     }
 
     // Fog LUT
-    if (fog_lut.texture_1d != cur_state.fog_lut.texture_1d) {
-        glActiveTexture(GL_TEXTURE9);
-        glBindTexture(GL_TEXTURE_1D, fog_lut.texture_1d);
+    if (fog_lut.texture_buffer != cur_state.fog_lut.texture_buffer) {
+        glActiveTexture(TextureUnits::FogLUT.Enum());
+        glBindTexture(GL_TEXTURE_BUFFER, fog_lut.texture_buffer);
     }
 
     // ProcTex Noise LUT
     if (proctex_noise_lut.texture_1d != cur_state.proctex_noise_lut.texture_1d) {
-        glActiveTexture(GL_TEXTURE10);
+        glActiveTexture(TextureUnits::ProcTexNoiseLUT.Enum());
         glBindTexture(GL_TEXTURE_1D, proctex_noise_lut.texture_1d);
     }
 
     // ProcTex Color Map
     if (proctex_color_map.texture_1d != cur_state.proctex_color_map.texture_1d) {
-        glActiveTexture(GL_TEXTURE11);
+        glActiveTexture(TextureUnits::ProcTexColorMap.Enum());
         glBindTexture(GL_TEXTURE_1D, proctex_color_map.texture_1d);
     }
 
     // ProcTex Alpha Map
     if (proctex_alpha_map.texture_1d != cur_state.proctex_alpha_map.texture_1d) {
-        glActiveTexture(GL_TEXTURE12);
+        glActiveTexture(TextureUnits::ProcTexAlphaMap.Enum());
         glBindTexture(GL_TEXTURE_1D, proctex_alpha_map.texture_1d);
     }
 
     // ProcTex LUT
     if (proctex_lut.texture_1d != cur_state.proctex_lut.texture_1d) {
-        glActiveTexture(GL_TEXTURE13);
+        glActiveTexture(TextureUnits::ProcTexLUT.Enum());
         glBindTexture(GL_TEXTURE_1D, proctex_lut.texture_1d);
     }
 
     // ProcTex Diff LUT
     if (proctex_diff_lut.texture_1d != cur_state.proctex_diff_lut.texture_1d) {
-        glActiveTexture(GL_TEXTURE14);
+        glActiveTexture(TextureUnits::ProcTexDiffLUT.Enum());
         glBindTexture(GL_TEXTURE_1D, proctex_diff_lut.texture_1d);
     }
 
@@ -274,6 +270,20 @@ void OpenGLState::ResetTexture(GLuint handle) {
             unit.texture_2d = 0;
         }
     }
+    if (cur_state.lighting_lut.texture_buffer == handle)
+        cur_state.lighting_lut.texture_buffer = 0;
+    if (cur_state.fog_lut.texture_buffer == handle)
+        cur_state.fog_lut.texture_buffer = 0;
+    if (cur_state.proctex_noise_lut.texture_1d == handle)
+        cur_state.proctex_noise_lut.texture_1d = 0;
+    if (cur_state.proctex_color_map.texture_1d == handle)
+        cur_state.proctex_color_map.texture_1d = 0;
+    if (cur_state.proctex_alpha_map.texture_1d == handle)
+        cur_state.proctex_alpha_map.texture_1d = 0;
+    if (cur_state.proctex_lut.texture_1d == handle)
+        cur_state.proctex_lut.texture_1d = 0;
+    if (cur_state.proctex_diff_lut.texture_1d == handle)
+        cur_state.proctex_diff_lut.texture_1d = 0;
 }
 
 void OpenGLState::ResetSampler(GLuint handle) {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 7dcc03bd5..1efcf0811 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -6,6 +6,29 @@
 
 #include <glad/glad.h>
 
+namespace TextureUnits {
+
+struct TextureUnit {
+    GLint id;
+    constexpr GLenum Enum() const {
+        return static_cast<GLenum>(GL_TEXTURE0 + id);
+    }
+};
+
+constexpr TextureUnit PicaTexture(int unit) {
+    return TextureUnit{unit};
+}
+
+constexpr TextureUnit LightingLUT{3};
+constexpr TextureUnit FogLUT{4};
+constexpr TextureUnit ProcTexNoiseLUT{5};
+constexpr TextureUnit ProcTexColorMap{6};
+constexpr TextureUnit ProcTexAlphaMap{7};
+constexpr TextureUnit ProcTexLUT{8};
+constexpr TextureUnit ProcTexDiffLUT{9};
+
+} // namespace TextureUnits
+
 class OpenGLState {
 public:
     struct {
@@ -64,11 +87,11 @@ public:
     } texture_units[3];
 
     struct {
-        GLuint texture_1d; // GL_TEXTURE_BINDING_1D
-    } lighting_luts[6];
+        GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
+    } lighting_lut;
 
     struct {
-        GLuint texture_1d; // GL_TEXTURE_BINDING_1D
+        GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
     } fog_lut;
 
     struct {
diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h
index 93d7b0b71..c7fa1f873 100644
--- a/src/video_core/renderer_opengl/pica_to_gl.h
+++ b/src/video_core/renderer_opengl/pica_to_gl.h
@@ -12,6 +12,7 @@
 #include "common/common_funcs.h"
 #include "common/common_types.h"
 #include "common/logging/log.h"
+#include "core/core.h"
 #include "video_core/regs_framebuffer.h"
 #include "video_core/regs_lighting.h"
 #include "video_core/regs_texturing.h"
@@ -55,6 +56,12 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
         GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder
         GL_REPEAT,          // WrapMode::Repeat
         GL_MIRRORED_REPEAT, // WrapMode::MirroredRepeat
+        // TODO(wwylele): ClampToEdge2 and ClampToBorder2 are not properly implemented here. See the
+        // comments in enum WrapMode.
+        GL_CLAMP_TO_EDGE,   // WrapMode::ClampToEdge2
+        GL_CLAMP_TO_BORDER, // WrapMode::ClampToBorder2
+        GL_REPEAT,          // WrapMode::Repeat2
+        GL_REPEAT,          // WrapMode::Repeat3
     };
 
     // Range check table for input
@@ -65,6 +72,13 @@ inline GLenum WrapMode(Pica::TexturingRegs::TextureConfig::WrapMode mode) {
         return GL_CLAMP_TO_EDGE;
     }
 
+    if (static_cast<u32>(mode) > 3) {
+        Core::Telemetry().AddField(Telemetry::FieldType::Session,
+                                   "VideoCore_Pica_UnsupportedTextureWrapMode",
+                                   static_cast<u32>(mode));
+        LOG_WARNING(Render_OpenGL, "Using texture wrap mode %u", static_cast<u32>(mode));
+    }
+
     GLenum gl_mode = wrap_mode_table[mode];
 
     // Check for dummy values indicating an unknown mode
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index d90c776f9..65c18aecc 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -481,9 +481,18 @@ bool RendererOpenGL::Init() {
         glDebugMessageCallback(DebugHandler, nullptr);
     }
 
-    LOG_INFO(Render_OpenGL, "GL_VERSION: %s", glGetString(GL_VERSION));
-    LOG_INFO(Render_OpenGL, "GL_VENDOR: %s", glGetString(GL_VENDOR));
-    LOG_INFO(Render_OpenGL, "GL_RENDERER: %s", glGetString(GL_RENDERER));
+    const char* gl_version{reinterpret_cast<char const*>(glGetString(GL_VERSION))};
+    const char* gpu_vendor{reinterpret_cast<char const*>(glGetString(GL_VENDOR))};
+    const char* gpu_model{reinterpret_cast<char const*>(glGetString(GL_RENDERER))};
+
+    LOG_INFO(Render_OpenGL, "GL_VERSION: %s", gl_version);
+    LOG_INFO(Render_OpenGL, "GL_VENDOR: %s", gpu_vendor);
+    LOG_INFO(Render_OpenGL, "GL_RENDERER: %s", gpu_model);
+
+    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Vendor", gpu_vendor);
+    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_Model", gpu_model);
+    Core::Telemetry().AddField(Telemetry::FieldType::UserSystem, "GPU_OpenGL_Version", gl_version);
+
     if (!GLAD_GL_VERSION_3_3) {
         return false;
     }
diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp
index 5d9b6448c..42a57aab1 100644
--- a/src/video_core/shader/shader_jit_x64_compiler.cpp
+++ b/src/video_core/shader/shader_jit_x64_compiler.cpp
@@ -321,27 +321,27 @@ void JitShader::Compile_EvaluateCondition(Instruction instr) {
     case Instruction::FlowControlType::Or:
         mov(eax, COND0);
         mov(ebx, COND1);
-        xor(eax, (instr.flow_control.refx.Value() ^ 1));
-        xor(ebx, (instr.flow_control.refy.Value() ^ 1));
-        or (eax, ebx);
+        xor_(eax, (instr.flow_control.refx.Value() ^ 1));
+        xor_(ebx, (instr.flow_control.refy.Value() ^ 1));
+        or_(eax, ebx);
         break;
 
     case Instruction::FlowControlType::And:
         mov(eax, COND0);
         mov(ebx, COND1);
-        xor(eax, (instr.flow_control.refx.Value() ^ 1));
-        xor(ebx, (instr.flow_control.refy.Value() ^ 1));
-        and(eax, ebx);
+        xor_(eax, (instr.flow_control.refx.Value() ^ 1));
+        xor_(ebx, (instr.flow_control.refy.Value() ^ 1));
+        and_(eax, ebx);
         break;
 
     case Instruction::FlowControlType::JustX:
         mov(eax, COND0);
-        xor(eax, (instr.flow_control.refx.Value() ^ 1));
+        xor_(eax, (instr.flow_control.refx.Value() ^ 1));
         break;
 
     case Instruction::FlowControlType::JustY:
         mov(eax, COND1);
-        xor(eax, (instr.flow_control.refy.Value() ^ 1));
+        xor_(eax, (instr.flow_control.refy.Value() ^ 1));
         break;
     }
 }
@@ -734,10 +734,10 @@ void JitShader::Compile_LOOP(Instruction instr) {
     mov(LOOPCOUNT, dword[SETUP + offset]);
     mov(LOOPCOUNT_REG, LOOPCOUNT);
     shr(LOOPCOUNT_REG, 4);
-    and(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
+    and_(LOOPCOUNT_REG, 0xFF0); // Y-component is the start
     mov(LOOPINC, LOOPCOUNT);
     shr(LOOPINC, 12);
-    and(LOOPINC, 0xFF0);                // Z-component is the incrementer
+    and_(LOOPINC, 0xFF0);               // Z-component is the incrementer
     movzx(LOOPCOUNT, LOOPCOUNT.cvt8()); // X-component is iteration count
     add(LOOPCOUNT, 1);                  // Iteration count is X-component + 1
 
@@ -858,9 +858,9 @@ void JitShader::Compile(const std::array<u32, MAX_PROGRAM_CODE_LENGTH>* program_
     mov(STATE, ABI_PARAM2);
 
     // Zero address/loop  registers
-    xor(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
-    xor(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
-    xor(LOOPCOUNT_REG, LOOPCOUNT_REG);
+    xor_(ADDROFFS_REG_0.cvt32(), ADDROFFS_REG_0.cvt32());
+    xor_(ADDROFFS_REG_1.cvt32(), ADDROFFS_REG_1.cvt32());
+    xor_(LOOPCOUNT_REG, LOOPCOUNT_REG);
 
     // Used to set a register to one
     static const __m128 one = {1.f, 1.f, 1.f, 1.f};
diff --git a/src/video_core/swrasterizer/rasterizer.cpp b/src/video_core/swrasterizer/rasterizer.cpp
index 8b7b1defb..512e81c08 100644
--- a/src/video_core/swrasterizer/rasterizer.cpp
+++ b/src/video_core/swrasterizer/rasterizer.cpp
@@ -357,10 +357,22 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                 int t = (int)(v * float24::FromFloat32(static_cast<float>(texture.config.height)))
                             .ToFloat32();
 
-                if ((texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder &&
-                     (s < 0 || static_cast<u32>(s) >= texture.config.width)) ||
-                    (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder &&
-                     (t < 0 || static_cast<u32>(t) >= texture.config.height))) {
+                bool use_border_s = false;
+                bool use_border_t = false;
+
+                if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder) {
+                    use_border_s = s < 0 || s >= static_cast<int>(texture.config.width);
+                } else if (texture.config.wrap_s == TexturingRegs::TextureConfig::ClampToBorder2) {
+                    use_border_s = s >= static_cast<int>(texture.config.width);
+                }
+
+                if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder) {
+                    use_border_t = t < 0 || t >= static_cast<int>(texture.config.height);
+                } else if (texture.config.wrap_t == TexturingRegs::TextureConfig::ClampToBorder2) {
+                    use_border_t = t >= static_cast<int>(texture.config.height);
+                }
+
+                if (use_border_s || use_border_t) {
                     auto border_color = texture.config.border_color;
                     texture_color[i] = {border_color.r, border_color.g, border_color.b,
                                         border_color.a};
@@ -572,8 +584,7 @@ static void ProcessTriangleInternal(const Vertex& v0, const Vertex& v1, const Ve
                 float fog_i = MathUtil::Clamp(floorf(fog_index), 0.0f, 127.0f);
                 float fog_f = fog_index - fog_i;
                 const auto& fog_lut_entry = g_state.fog.lut[static_cast<unsigned int>(fog_i)];
-                float fog_factor = (fog_lut_entry.value + fog_lut_entry.difference * fog_f) /
-                                   2047.0f; // This is signed fixed point 1.11
+                float fog_factor = fog_lut_entry.ToFloat() + fog_lut_entry.DiffToFloat() * fog_f;
                 fog_factor = MathUtil::Clamp(fog_factor, 0.0f, 1.0f);
 
                 // Blend the fog
diff --git a/src/video_core/swrasterizer/texturing.cpp b/src/video_core/swrasterizer/texturing.cpp
index aeb6aeb8c..4f02b93f2 100644
--- a/src/video_core/swrasterizer/texturing.cpp
+++ b/src/video_core/swrasterizer/texturing.cpp
@@ -18,22 +18,33 @@ using TevStageConfig = TexturingRegs::TevStageConfig;
 
 int GetWrappedTexCoord(TexturingRegs::TextureConfig::WrapMode mode, int val, unsigned size) {
     switch (mode) {
+    case TexturingRegs::TextureConfig::ClampToEdge2:
+        // For negative coordinate, ClampToEdge2 behaves the same as Repeat
+        if (val < 0) {
+            return static_cast<int>(static_cast<unsigned>(val) % size);
+        }
+    // [[fallthrough]]
     case TexturingRegs::TextureConfig::ClampToEdge:
         val = std::max(val, 0);
-        val = std::min(val, (int)size - 1);
+        val = std::min(val, static_cast<int>(size) - 1);
         return val;
 
     case TexturingRegs::TextureConfig::ClampToBorder:
         return val;
 
+    case TexturingRegs::TextureConfig::ClampToBorder2:
+    // For ClampToBorder2, the case of positive coordinate beyond the texture size is already
+    // handled outside. Here we only handle the negative coordinate in the same way as Repeat.
+    case TexturingRegs::TextureConfig::Repeat2:
+    case TexturingRegs::TextureConfig::Repeat3:
     case TexturingRegs::TextureConfig::Repeat:
-        return (int)((unsigned)val % size);
+        return static_cast<int>(static_cast<unsigned>(val) % size);
 
     case TexturingRegs::TextureConfig::MirroredRepeat: {
-        unsigned int coord = ((unsigned)val % (2 * size));
+        unsigned int coord = (static_cast<unsigned>(val) % (2 * size));
         if (coord >= size)
             coord = 2 * size - 1 - coord;
-        return (int)coord;
+        return static_cast<int>(coord);
     }
 
     default: