aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp10
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.h3
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp54
-rw-r--r--src/video_core/renderer_opengl/gl_device.h6
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.cpp63
-rw-r--r--src/video_core/renderer_opengl/gl_primitive_assembler.h31
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp131
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h21
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp66
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h1
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp174
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp46
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp11
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h8
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp13
-rw-r--r--src/video_core/renderer_opengl/gl_state.h7
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h9
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
18 files changed, 284 insertions, 371 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 25652e794..48b86f3bd 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -71,16 +71,6 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s
return uploaded_offset;
}
-std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) {
- AlignBuffer(alignment);
- u8* const uploaded_ptr = buffer_ptr;
- const GLintptr uploaded_offset = buffer_offset;
-
- buffer_ptr += size;
- buffer_offset += size;
- return std::make_tuple(uploaded_ptr, uploaded_offset);
-}
-
bool OGLBufferCache::Map(std::size_t max_size) {
bool invalidate;
std::tie(buffer_ptr, buffer_offset_base, invalidate) =
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index f9247a40e..f2347581b 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -61,9 +61,6 @@ public:
/// Uploads from a host memory. Returns host's buffer offset where it's been allocated.
GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4);
- /// Reserves memory to be used by host's CPU. Returns mapped address and offset.
- std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4);
-
bool Map(std::size_t max_size);
void Unmap();
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 1d1581f49..65a88b06c 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -2,11 +2,14 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <array>
#include <cstddef>
#include <glad/glad.h>
#include "common/logging/log.h"
+#include "common/scope_exit.h"
#include "video_core/renderer_opengl/gl_device.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
namespace OpenGL {
@@ -24,6 +27,7 @@ Device::Device() {
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);
max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS);
has_variable_aoffi = TestVariableAoffi();
+ has_component_indexing_bug = TestComponentIndexingBug();
}
Device::Device(std::nullptr_t) {
@@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) {
max_vertex_attributes = 16;
max_varyings = 15;
has_variable_aoffi = true;
+ has_component_indexing_bug = false;
}
bool Device::TestVariableAoffi() {
@@ -52,4 +57,53 @@ void main() {
return supported;
}
+bool Device::TestComponentIndexingBug() {
+ constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}";
+ const GLchar* COMPONENT_TEST = R"(#version 430 core
+layout (std430, binding = 0) buffer OutputBuffer {
+ uint output_value;
+};
+layout (std140, binding = 0) uniform InputBuffer {
+ uvec4 input_value[4096];
+};
+layout (location = 0) uniform uint idx;
+void main() {
+ output_value = input_value[idx >> 2][idx & 3];
+})";
+ const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)};
+ SCOPE_EXIT({ glDeleteProgram(shader); });
+ glUseProgram(shader);
+
+ OGLVertexArray vao;
+ vao.Create();
+ glBindVertexArray(vao.handle);
+
+ constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432};
+ OGLBuffer ubo;
+ ubo.Create();
+ glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW);
+ glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle);
+
+ OGLBuffer ssbo;
+ ssbo.Create();
+ glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT);
+
+ for (GLuint index = 4; index < 8; ++index) {
+ glInvalidateBufferData(ssbo.handle);
+ glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle);
+
+ glProgramUniform1ui(shader, 0, index);
+ glDrawArrays(GL_POINTS, 0, 1);
+
+ GLuint result;
+ glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result);
+ if (result != values.at(index)) {
+ LOG_INFO(Render_OpenGL, log_message, true);
+ return true;
+ }
+ }
+ LOG_INFO(Render_OpenGL, log_message, false);
+ return false;
+}
+
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h
index de8490682..8c8c93760 100644
--- a/src/video_core/renderer_opengl/gl_device.h
+++ b/src/video_core/renderer_opengl/gl_device.h
@@ -30,13 +30,19 @@ public:
return has_variable_aoffi;
}
+ bool HasComponentIndexingBug() const {
+ return has_component_indexing_bug;
+ }
+
private:
static bool TestVariableAoffi();
+ static bool TestComponentIndexingBug();
std::size_t uniform_buffer_alignment{};
u32 max_vertex_attributes{};
u32 max_varyings{};
bool has_variable_aoffi{};
+ bool has_component_indexing_bug{};
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
deleted file mode 100644
index c3e94d917..000000000
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <array>
-#include "common/assert.h"
-#include "common/common_types.h"
-#include "core/core.h"
-#include "video_core/memory_manager.h"
-#include "video_core/renderer_opengl/gl_buffer_cache.h"
-#include "video_core/renderer_opengl/gl_primitive_assembler.h"
-
-namespace OpenGL {
-
-constexpr u32 TRIANGLES_PER_QUAD = 6;
-constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3};
-
-PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {}
-
-PrimitiveAssembler::~PrimitiveAssembler() = default;
-
-std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const {
- ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4");
- return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint);
-}
-
-GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) {
- const std::size_t size{CalculateQuadSize(count)};
- auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size);
-
- for (u32 primitive = 0; primitive < count / 4; ++primitive) {
- for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) {
- const u32 index = first + primitive * 4 + QUAD_MAP[i];
- std::memcpy(dst_pointer, &index, sizeof(index));
- dst_pointer += sizeof(index);
- }
- }
-
- return index_offset;
-}
-
-GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) {
- const std::size_t map_size{CalculateQuadSize(count)};
- auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size);
-
- auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
- const u8* source{memory_manager.GetPointer(gpu_addr)};
-
- for (u32 primitive = 0; primitive < count / 4; ++primitive) {
- for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) {
- const u32 index = primitive * 4 + QUAD_MAP[i];
- const u8* src_offset = source + (index * index_size);
-
- std::memcpy(dst_pointer, src_offset, index_size);
- dst_pointer += index_size;
- }
- }
-
- return index_offset;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h
deleted file mode 100644
index 4e87ce4d6..000000000
--- a/src/video_core/renderer_opengl/gl_primitive_assembler.h
+++ /dev/null
@@ -1,31 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <glad/glad.h>
-
-#include "common/common_types.h"
-
-namespace OpenGL {
-
-class OGLBufferCache;
-
-class PrimitiveAssembler {
-public:
- explicit PrimitiveAssembler(OGLBufferCache& buffer_cache);
- ~PrimitiveAssembler();
-
- /// Calculates the size required by MakeQuadArray and MakeQuadIndexed.
- std::size_t CalculateQuadSize(u32 count) const;
-
- GLintptr MakeQuadArray(u32 first, u32 count);
-
- GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count);
-
-private:
- OGLBufferCache& buffer_cache;
-};
-
-} // namespace OpenGL \ No newline at end of file
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index f9b6dfeea..d77426067 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -246,29 +246,6 @@ DrawParameters RasterizerOpenGL::SetupDraw() {
DrawParameters params{};
params.current_instance = gpu.state.current_instance;
- if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) {
- MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly);
-
- params.use_indexed = true;
- params.primitive_mode = GL_TRIANGLES;
-
- if (is_indexed) {
- params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format);
- params.count = (regs.index_array.count / 4) * 6;
- params.index_buffer_offset = primitive_assembler.MakeQuadIndexed(
- regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(),
- regs.index_array.count);
- params.base_vertex = static_cast<GLint>(regs.vb_element_base);
- } else {
- // MakeQuadArray always generates u32 indexes
- params.index_format = GL_UNSIGNED_INT;
- params.count = (regs.vertex_buffer.count / 4) * 6;
- params.index_buffer_offset = primitive_assembler.MakeQuadArray(
- regs.vertex_buffer.first, regs.vertex_buffer.count);
- }
- return params;
- }
-
params.use_indexed = is_indexed;
params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology);
@@ -345,9 +322,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
- SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
- SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
- SetupTextures(stage_enum, shader, program_handle, base_bindings);
+ SetupDrawConstBuffers(stage_enum, shader);
+ SetupGlobalRegions(stage_enum, shader);
+ SetupTextures(stage_enum, shader, base_bindings);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -686,30 +663,19 @@ void RasterizerOpenGL::DrawArrays() {
SyncCullMode();
SyncPrimitiveRestart();
SyncScissorTest(state);
- // Alpha Testing is synced on shaders.
SyncTransformFeedback();
SyncPointState();
- CheckAlphaTests();
SyncPolygonOffset();
- // TODO(bunnei): Sync framebuffer_scale uniform here
- // TODO(bunnei): Sync scissorbox uniform(s) here
+ SyncAlphaTest();
// Draw the vertex batch
const bool is_indexed = accelerate_draw == AccelDraw::Indexed;
std::size_t buffer_size = CalculateVertexArraysSize();
- // Add space for index buffer (keeping in mind non-core primitives)
- switch (regs.draw.topology) {
- case Maxwell::PrimitiveTopology::Quads:
- buffer_size = Common::AlignUp(buffer_size, 4) +
- primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count);
- break;
- default:
- if (is_indexed) {
- buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
- }
- break;
+ // Add space for index buffer
+ if (is_indexed) {
+ buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize();
}
// Uniform space for the 5 shader stages
@@ -810,57 +776,55 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, GLuint program_handle,
- BaseBindings base_bindings) {
+void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader) {
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& gpu = system.GPU();
- const auto& maxwell3d = gpu.Maxwell3D();
- const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
+ const auto stage_index = static_cast<std::size_t>(stage);
+ const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index];
const auto& entries = shader->GetShaderEntries().const_buffers;
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
- const auto& used_buffer = entries[bindpoint];
- const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
-
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- bind_ubo_pushbuffer.Push(0, 0, 0);
- continue;
- }
+ const auto& entry = entries[bindpoint];
+ SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry);
+ }
+}
- std::size_t size = 0;
+void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
+ const GLShader::ConstBufferEntry& entry) {
+ if (!buffer.enabled) {
+ // Set values to zero to unbind buffers
+ bind_ubo_pushbuffer.Push(0, 0, 0);
+ return;
+ }
- if (used_buffer.IsIndirect()) {
- // Buffer is accessed indirectly, so upload the entire thing
- size = buffer.size;
+ std::size_t size;
+ if (entry.IsIndirect()) {
+ // Buffer is accessed indirectly, so upload the entire thing
+ size = buffer.size;
- if (size > MaxConstbufferSize) {
- LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
- MaxConstbufferSize);
- size = MaxConstbufferSize;
- }
- } else {
- // Buffer is accessed directly, upload just what we use
- size = used_buffer.GetSize();
+ if (size > MaxConstbufferSize) {
+ LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
+ MaxConstbufferSize);
+ size = MaxConstbufferSize;
}
+ } else {
+ // Buffer is accessed directly, upload just what we use
+ size = entry.GetSize();
+ }
- // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
- // UBO alignment requirements.
- size = Common::AlignUp(size, sizeof(GLvec4));
- ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
-
- const GLintptr const_buffer_offset =
- buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
+ // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
+ // UBO alignment requirements.
+ size = Common::AlignUp(size, sizeof(GLvec4));
+ ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
- bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
- }
+ const std::size_t alignment = device.GetUniformBufferAlignment();
+ const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment);
+ bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size);
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, GLenum primitive_mode,
- BaseBindings base_bindings) {
+ const Shader& shader) {
const auto& entries = shader->GetShaderEntries().global_memory_entries;
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry{entries[bindpoint]};
@@ -874,7 +838,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
}
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
- GLuint program_handle, BaseBindings base_bindings) {
+ BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
@@ -1152,10 +1116,17 @@ void RasterizerOpenGL::SyncPolygonOffset() {
state.polygon_offset.clamp = regs.polygon_offset_clamp;
}
-void RasterizerOpenGL::CheckAlphaTests() {
+void RasterizerOpenGL::SyncAlphaTest() {
const auto& regs = system.GPU().Maxwell3D().regs;
UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
"Alpha Testing is enabled with more than one rendertarget");
+
+ state.alpha_test.enabled = regs.alpha_test_enabled;
+ if (!state.alpha_test.enabled) {
+ return;
+ }
+ state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func);
+ state.alpha_test.ref = regs.alpha_test_ref;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index d78094138..f7671ff5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -17,17 +17,18 @@
#include <glad/glad.h>
#include "common/common_types.h"
+#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_buffer_cache.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_global_cache.h"
-#include "video_core/renderer_opengl/gl_primitive_assembler.h"
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/utils.h"
@@ -106,17 +107,20 @@ private:
bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
/// Configures the current constbuffers to use for the draw command.
- void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
- GLuint program_handle, BaseBindings base_bindings);
+ void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader);
+
+ /// Configures a constant buffer.
+ void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
+ const GLShader::ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, GLenum primitive_mode,
- BaseBindings base_bindings);
+ const Shader& shader);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
- GLuint program_handle, BaseBindings base_bindings);
+ BaseBindings base_bindings);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
@@ -167,8 +171,8 @@ private:
/// Syncs the polygon offsets
void SyncPolygonOffset();
- /// Check asserts for alpha testing.
- void CheckAlphaTests();
+ /// Syncs the alpha test state to match the guest state
+ void SyncAlphaTest();
/// Check for extension that are not strictly required
/// but are needed for correct emulation
@@ -197,7 +201,6 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
- PrimitiveAssembler primitive_assembler{buffer_cache};
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index d66252224..ac8a9e6b7 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -35,8 +35,8 @@ struct UnspecializedShader {
namespace {
/// Gets the address for the specified shader stage program
-GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) {
- const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()};
+GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) {
+ const auto& gpu{system.GPU().Maxwell3D()};
const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]};
return gpu.regs.code_address.CodeAddress() + shader_config.offset;
}
@@ -350,7 +350,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
Core::Frontend::EmuWindow& emu_window, const Device& device)
- : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {}
+ : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device},
+ disk_cache{system} {}
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
@@ -546,42 +547,45 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) {
- return last_shaders[static_cast<u32>(program)];
+ if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
+ return last_shaders[static_cast<std::size_t>(program)];
}
- auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()};
- const GPUVAddr program_addr{GetShaderAddress(program)};
+ auto& memory_manager{system.GPU().MemoryManager()};
+ const GPUVAddr program_addr{GetShaderAddress(system, program)};
// Look up shader in the cache based on address
- const auto& host_ptr{memory_manager.GetPointer(program_addr)};
+ const auto host_ptr{memory_manager.GetPointer(program_addr)};
Shader shader{TryGet(host_ptr)};
+ if (shader) {
+ return last_shaders[static_cast<std::size_t>(program)] = shader;
+ }
- if (!shader) {
- // No shader found - create a new one
- ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
- ProgramCode program_code_b;
- if (program == Maxwell::ShaderProgram::VertexA) {
- const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)};
- program_code_b = GetShaderCode(memory_manager, program_addr_b,
- memory_manager.GetPointer(program_addr_b));
- }
- const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
- const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
- const auto found = precompiled_shaders.find(unique_identifier);
- if (found != precompiled_shaders.end()) {
- shader =
- std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
- precompiled_programs, found->second, host_ptr);
- } else {
- shader = std::make_shared<CachedShader>(
- device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
- std::move(program_code), std::move(program_code_b), host_ptr);
- }
- Register(shader);
+ // No shader found - create a new one
+ ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)};
+ ProgramCode program_code_b;
+ if (program == Maxwell::ShaderProgram::VertexA) {
+ const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)};
+ program_code_b = GetShaderCode(memory_manager, program_addr_b,
+ memory_manager.GetPointer(program_addr_b));
+ }
+
+ const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b);
+ const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)};
+ const auto found = precompiled_shaders.find(unique_identifier);
+ if (found != precompiled_shaders.end()) {
+ // Create a shader from the cache
+ shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache,
+ precompiled_programs, found->second, host_ptr);
+ } else {
+ // Create a shader from guest memory
+ shader = std::make_shared<CachedShader>(
+ device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs,
+ std::move(program_code), std::move(program_code_b), host_ptr);
}
+ Register(shader);
- return last_shaders[static_cast<u32>(program)] = shader;
+ return last_shaders[static_cast<std::size_t>(program)] = shader;
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 64e5a5594..09bd0761d 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -137,6 +137,7 @@ private:
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
const std::set<GLenum>& supported_formats);
+ Core::System& system;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
ShaderDiskCacheOpenGL disk_cache;
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index e9f8d40db..7dc2e0560 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -45,7 +45,6 @@ struct TextureAoffi {};
using TextureArgument = std::pair<Type, Node>;
using TextureIR = std::variant<TextureAoffi, TextureArgument>;
-enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 };
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float));
@@ -124,8 +123,8 @@ bool IsPrecise(Operation operand) {
return false;
}
-bool IsPrecise(Node node) {
- if (const auto operation = std::get_if<OperationNode>(node)) {
+bool IsPrecise(const Node& node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
return IsPrecise(*operation);
}
return false;
@@ -144,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) {
return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
}
+constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
+ switch (stack) {
+ case MetaStackClass::Ssy:
+ return "ssy";
+ case MetaStackClass::Pbk:
+ return "pbk";
+ }
+ return {};
+}
+
+std::string FlowStackName(MetaStackClass stack) {
+ return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
+}
+
+std::string FlowStackTopName(MetaStackClass stack) {
+ return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
+}
+
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
@@ -174,8 +191,10 @@ public:
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
constexpr u32 FLOW_STACK_SIZE = 20;
- code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE);
- code.AddLine("uint flow_stack_top = 0u;");
+ for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
+ code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
+ code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
+ }
code.AddLine("while (true) {{");
++code.scope;
@@ -247,6 +266,12 @@ private:
code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
code.AddNewLine();
+ code.AddLine("in gl_PerVertex {{");
+ ++code.scope;
+ code.AddLine("vec4 gl_Position;");
+ --code.scope;
+ code.AddLine("}} gl_in[];");
+
DeclareVertexRedeclarations();
}
@@ -349,7 +374,7 @@ private:
}
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
- const u32 generic_index{GetGenericAttributeIndex(index)};
+ const u32 location{GetGenericAttributeIndex(index)};
std::string name{GetInputAttribute(index)};
if (stage == ShaderStage::Geometry) {
@@ -358,19 +383,13 @@ private:
std::string suffix;
if (stage == ShaderStage::Fragment) {
- const auto input_mode{header.ps.GetAttributeUse(generic_index)};
+ const auto input_mode{header.ps.GetAttributeUse(location)};
if (skip_unused && input_mode == AttributeUse::Unused) {
return;
}
suffix = GetInputFlags(input_mode);
}
- u32 location = generic_index;
- if (stage != ShaderStage::Vertex) {
- // If inputs are varyings, add an offset
- location += GENERIC_VARYING_START_LOCATION;
- }
-
code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name);
}
@@ -395,7 +414,7 @@ private:
}
void DeclareOutputAttribute(Attribute::Index index) {
- const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION};
+ const u32 location{GetGenericAttributeIndex(index)};
code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
}
@@ -498,15 +517,15 @@ private:
}
void VisitBlock(const NodeBlock& bb) {
- for (const Node node : bb) {
+ for (const auto& node : bb) {
if (const std::string expr = Visit(node); !expr.empty()) {
code.AddLine(expr);
}
}
}
- std::string Visit(Node node) {
- if (const auto operation = std::get_if<OperationNode>(node)) {
+ std::string Visit(const Node& node) {
+ if (const auto operation = std::get_if<OperationNode>(&*node)) {
const auto operation_index = static_cast<std::size_t>(operation->GetCode());
if (operation_index >= operation_decompilers.size()) {
UNREACHABLE_MSG("Out of bounds operation: {}", operation_index);
@@ -520,7 +539,7 @@ private:
return (this->*decompiler)(*operation);
}
- if (const auto gpr = std::get_if<GprNode>(node)) {
+ if (const auto gpr = std::get_if<GprNode>(&*node)) {
const u32 index = gpr->GetIndex();
if (index == Register::ZeroIndex) {
return "0";
@@ -528,7 +547,7 @@ private:
return GetRegister(index);
}
- if (const auto immediate = std::get_if<ImmediateNode>(node)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
const u32 value = immediate->GetValue();
if (value < 10) {
// For eyecandy avoid using hex numbers on single digits
@@ -537,7 +556,7 @@ private:
return fmt::format("utof(0x{:x}u)", immediate->GetValue());
}
- if (const auto predicate = std::get_if<PredicateNode>(node)) {
+ if (const auto predicate = std::get_if<PredicateNode>(&*node)) {
const auto value = [&]() -> std::string {
switch (const auto index = predicate->GetIndex(); index) {
case Tegra::Shader::Pred::UnusedIndex:
@@ -554,7 +573,7 @@ private:
return value;
}
- if (const auto abuf = std::get_if<AbufNode>(node)) {
+ if (const auto abuf = std::get_if<AbufNode>(&*node)) {
UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry,
"Physical attributes in geometry shaders are not implemented");
if (abuf->IsPhysicalBuffer()) {
@@ -564,9 +583,9 @@ private:
return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer());
}
- if (const auto cbuf = std::get_if<CbufNode>(node)) {
+ if (const auto cbuf = std::get_if<CbufNode>(&*node)) {
const Node offset = cbuf->GetOffset();
- if (const auto immediate = std::get_if<ImmediateNode>(offset)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
// Direct access
const u32 offset_imm = immediate->GetValue();
ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access");
@@ -577,30 +596,47 @@ private:
if (std::holds_alternative<OperationNode>(*offset)) {
// Indirect access
const std::string final_offset = code.GenerateTemporary();
- code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset));
- return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()),
- final_offset, final_offset);
+ code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset));
+
+ if (!device.HasComponentIndexingBug()) {
+ return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()),
+ final_offset, final_offset);
+ }
+
+ // AMD's proprietary GLSL compiler emits ill code for variable component access.
+ // To bypass this driver bug generate 4 ifs, one per each component.
+ const std::string pack = code.GenerateTemporary();
+ code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()),
+ final_offset);
+
+ const std::string result = code.GenerateTemporary();
+ code.AddLine("float {};", result);
+ for (u32 swizzle = 0; swizzle < 4; ++swizzle) {
+ code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result,
+ pack, GetSwizzle(swizzle));
+ }
+ return result;
}
UNREACHABLE_MSG("Unmanaged offset node type");
}
- if (const auto gmem = std::get_if<GmemNode>(node)) {
+ if (const auto gmem = std::get_if<GmemNode>(&*node)) {
const std::string real = Visit(gmem->GetRealAddress());
const std::string base = Visit(gmem->GetBaseAddress());
const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset);
}
- if (const auto lmem = std::get_if<LmemNode>(node)) {
+ if (const auto lmem = std::get_if<LmemNode>(&*node)) {
return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
}
- if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) {
+ if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
return GetInternalFlag(internal_flag->GetFlag());
}
- if (const auto conditional = std::get_if<ConditionalNode>(node)) {
+ if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
// It's invalid to call conditional on nested nodes, use an operation instead
code.AddLine("if ({}) {{", Visit(conditional->GetCondition()));
++code.scope;
@@ -612,7 +648,7 @@ private:
return {};
}
- if (const auto comment = std::get_if<CommentNode>(node)) {
+ if (const auto comment = std::get_if<CommentNode>(&*node)) {
return "// " + comment->GetText();
}
@@ -620,7 +656,7 @@ private:
return {};
}
- std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) {
+ std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) {
const auto GeometryPass = [&](std::string_view name) {
if (stage == ShaderStage::Geometry && buffer) {
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
@@ -633,10 +669,14 @@ private:
switch (attribute) {
case Attribute::Index::Position:
- if (stage != ShaderStage::Fragment) {
- return GeometryPass("position") + GetSwizzle(element);
- } else {
+ switch (stage) {
+ case ShaderStage::Geometry:
+ return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer),
+ GetSwizzle(element));
+ case ShaderStage::Fragment:
return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element));
+ default:
+ UNREACHABLE();
}
case Attribute::Index::PointCoord:
switch (element) {
@@ -852,7 +892,7 @@ private:
std::string expr = ", ";
switch (type) {
case Type::Int:
- if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
// Inline the string as an immediate integer in GLSL (some extra arguments are
// required to be constant)
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
@@ -884,7 +924,7 @@ private:
for (std::size_t index = 0; index < aoffi.size(); ++index) {
const auto operand{aoffi.at(index)};
- if (const auto immediate = std::get_if<ImmediateNode>(operand)) {
+ if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) {
// Inline the string as an immediate integer in GLSL (AOFFI arguments are required
// to be constant by the standard).
expr += std::to_string(static_cast<s32>(immediate->GetValue()));
@@ -905,23 +945,23 @@ private:
}
std::string Assign(Operation operation) {
- const Node dest = operation[0];
- const Node src = operation[1];
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
std::string target;
- if (const auto gpr = std::get_if<GprNode>(dest)) {
+ if (const auto gpr = std::get_if<GprNode>(&*dest)) {
if (gpr->GetIndex() == Register::ZeroIndex) {
// Writing to Register::ZeroIndex is a no op
return {};
}
target = GetRegister(gpr->GetIndex());
- } else if (const auto abuf = std::get_if<AbufNode>(dest)) {
+ } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) {
UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer());
target = [&]() -> std::string {
switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) {
case Attribute::Index::Position:
- return "position"s + GetSwizzle(abuf->GetElement());
+ return "gl_Position"s + GetSwizzle(abuf->GetElement());
case Attribute::Index::PointSize:
return "gl_PointSize";
case Attribute::Index::ClipDistances0123:
@@ -937,9 +977,9 @@ private:
return "0";
}
}();
- } else if (const auto lmem = std::get_if<LmemNode>(dest)) {
+ } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) {
target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress()));
- } else if (const auto gmem = std::get_if<GmemNode>(dest)) {
+ } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const std::string real = Visit(gmem->GetRealAddress());
const std::string base = Visit(gmem->GetBaseAddress());
const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base);
@@ -1216,12 +1256,12 @@ private:
}
std::string LogicalAssign(Operation operation) {
- const Node dest = operation[0];
- const Node src = operation[1];
+ const Node& dest = operation[0];
+ const Node& src = operation[1];
std::string target;
- if (const auto pred = std::get_if<PredicateNode>(dest)) {
+ if (const auto pred = std::get_if<PredicateNode>(&*dest)) {
ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment");
const auto index = pred->GetIndex();
@@ -1232,7 +1272,7 @@ private:
return {};
}
target = GetPredicate(index);
- } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) {
+ } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) {
target = GetInternalFlag(flag->GetFlag());
}
@@ -1409,7 +1449,7 @@ private:
}
std::string Branch(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(operation[0]);
+ const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
code.AddLine("jmp_to = 0x{:x}u;", target->GetValue());
@@ -1418,15 +1458,18 @@ private:
}
std::string PushFlowStack(Operation operation) {
- const auto target = std::get_if<ImmediateNode>(operation[0]);
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue());
+ code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack),
+ target->GetValue());
return {};
}
std::string PopFlowStack(Operation operation) {
- code.AddLine("jmp_to = flow_stack[--flow_stack_top];");
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
code.AddLine("break;");
return {};
}
@@ -1447,27 +1490,9 @@ private:
UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented");
- code.AddLine("if (alpha_test[0] != 0) {{");
- ++code.scope;
- // We start on the register containing the alpha value in the first RT.
- u32 current_reg = 3;
- for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
- // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when
- // multiple render targets are used.
- if (header.ps.IsColorComponentOutputEnabled(render_target, 0) ||
- header.ps.IsColorComponentOutputEnabled(render_target, 1) ||
- header.ps.IsColorComponentOutputEnabled(render_target, 2) ||
- header.ps.IsColorComponentOutputEnabled(render_target, 3)) {
- code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg));
- current_reg += 4;
- }
- }
- --code.scope;
- code.AddLine("}}");
-
// Write the color outputs using the data in the shader registers, disabled
// rendertargets/components are skipped in the register assignment.
- current_reg = 0;
+ u32 current_reg = 0;
for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) {
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
@@ -1506,9 +1531,7 @@ private:
// If a geometry shader is attached, it will always flip (it's the last stage before
// fragment). For more info about flipping, refer to gl_shader_gen.cpp.
- code.AddLine("position.xy *= viewport_flip.xy;");
- code.AddLine("gl_Position = position;");
- code.AddLine("position.w = 1.0;");
+ code.AddLine("gl_Position.xy *= viewport_flip.xy;");
code.AddLine("EmitVertex();");
return {};
}
@@ -1746,8 +1769,7 @@ private:
}
u32 GetNumPhysicalVaryings() const {
- return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION,
- Maxwell::NumVaryings);
+ return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
const Device& device;
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
index d2bb705a9..9148629ec 100644
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp
@@ -23,12 +23,9 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu
out += GetCommonDeclarations();
out += R"(
-layout (location = 0) out vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
- uvec4 alpha_test;
};
)";
@@ -48,7 +45,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config {
out += R"(
void main() {
- position = vec4(0.0, 0.0, 0.0, 0.0);
execute_vertex();
)";
@@ -59,19 +55,12 @@ void main() {
out += R"(
// Set Position Y direction
- position.y *= utof(config_pack[2]);
+ gl_Position.y *= utof(config_pack[2]);
// Check if the flip stage is VertexB
// Config pack's second value is flip_stage
if (config_pack[1] == 1) {
// Viewport can be flipped, which is unsupported by glViewport
- position.xy *= viewport_flip.xy;
- }
- gl_Position = position;
-
- // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0
- // For now, this is here to bring order in lieu of proper emulation
- if (config_pack[1] == 1) {
- position.w = 1.0;
+ gl_Position.xy *= viewport_flip.xy;
}
})";
@@ -85,13 +74,9 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se
out += GetCommonDeclarations();
out += R"(
-layout (location = 0) in vec4 gs_position[];
-layout (location = 0) out vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
- uvec4 alpha_test;
};
)";
@@ -124,38 +109,11 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7;
-layout (location = 0) in noperspective vec4 position;
-
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config {
vec4 viewport_flip;
uvec4 config_pack; // instance_id, flip_stage, y_direction, padding
- uvec4 alpha_test;
};
-bool AlphaFunc(in float value) {
- float ref = uintBitsToFloat(alpha_test[2]);
- switch (alpha_test[1]) {
- case 1:
- return false;
- case 2:
- return value < ref;
- case 3:
- return value == ref;
- case 4:
- return value <= ref;
- case 5:
- return value > ref;
- case 6:
- return value != ref;
- case 7:
- return value >= ref;
- case 8:
- return true;
- default:
- return false;
- }
-}
-
)";
const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET);
ProgramResult program =
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 05ab01dcb..b05f90f20 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -48,17 +48,6 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade
viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f;
viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f;
- auto func{static_cast<u32>(regs.alpha_test_func)};
- // Normalize the gl variants of opCompare to be the same as the normal variants
- const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never);
- if (func >= op_gl_variant_base) {
- func = func - op_gl_variant_base + 1U;
- }
-
- alpha_test.enabled = regs.alpha_test_enabled;
- alpha_test.func = func;
- alpha_test.ref = regs.alpha_test_ref;
-
instance_id = state.current_instance;
// Assign in which stage the position has to be flipped
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index cec18a832..6961e702a 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -27,14 +27,8 @@ struct MaxwellUniformData {
GLuint flip_stage;
GLfloat y_direction;
};
- struct alignas(16) {
- GLuint enabled;
- GLuint func;
- GLfloat ref;
- GLuint padding;
- } alpha_test;
};
-static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect");
+static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect");
static_assert(sizeof(MaxwellUniformData) < 16384,
"MaxwellUniformData structure must be less than 16kb as per the OpenGL spec");
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 7425fbe5d..d86e137ac 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -156,6 +156,10 @@ OpenGLState::OpenGLState() {
polygon_offset.factor = 0.0f;
polygon_offset.units = 0.0f;
polygon_offset.clamp = 0.0f;
+
+ alpha_test.enabled = false;
+ alpha_test.func = GL_ALWAYS;
+ alpha_test.ref = 0.0f;
}
void OpenGLState::ApplyDefaultState() {
@@ -461,6 +465,14 @@ void OpenGLState::ApplyPolygonOffset() const {
}
}
+void OpenGLState::ApplyAlphaTest() const {
+ Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
+ if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
+ std::tie(alpha_test.func, alpha_test.ref))) {
+ glAlphaFunc(alpha_test.func, alpha_test.ref);
+ }
+}
+
void OpenGLState::ApplyTextures() const {
bool has_delta{};
std::size_t first{};
@@ -533,6 +545,7 @@ void OpenGLState::Apply() const {
ApplyTextures();
ApplySamplers();
ApplyPolygonOffset();
+ ApplyAlphaTest();
}
void OpenGLState::EmulateViewportWithScissor() {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 41418a7b8..b0140495d 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -172,6 +172,12 @@ public:
GLfloat clamp;
} polygon_offset;
+ struct {
+ bool enabled; // GL_ALPHA_TEST
+ GLenum func; // GL_ALPHA_TEST_FUNC
+ GLfloat ref; // GL_ALPHA_TEST_REF
+ } alpha_test;
+
std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE
OpenGLState();
@@ -215,6 +221,7 @@ public:
void ApplySamplers() const;
void ApplyDepthClamp() const;
void ApplyPolygonOffset() const;
+ void ApplyAlphaTest() const;
/// Set the initial OpenGL state
static void ApplyDefaultState();
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index ed7b5cff0..ea77dd211 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -128,6 +128,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) {
return GL_TRIANGLE_STRIP;
case Maxwell::PrimitiveTopology::TriangleFan:
return GL_TRIANGLE_FAN;
+ case Maxwell::PrimitiveTopology::Quads:
+ return GL_QUADS;
default:
LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology));
UNREACHABLE();
@@ -173,11 +175,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) {
return GL_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::Border:
return GL_CLAMP_TO_BORDER;
- case Tegra::Texture::WrapMode::ClampOGL:
- // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use
- // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to
- // manually mix them. However the shader part of this is not yet implemented.
- return GL_CLAMP_TO_BORDER;
+ case Tegra::Texture::WrapMode::Clamp:
+ return GL_CLAMP;
case Tegra::Texture::WrapMode::MirrorOnceClampToEdge:
return GL_MIRROR_CLAMP_TO_EDGE;
case Tegra::Texture::WrapMode::MirrorOnceBorder:
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 3451d321d..aafd6f31b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -18,7 +18,6 @@
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
-#include "core/tracer/recorder.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/renderer_opengl.h"