diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 54 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.h | 6 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.cpp | 66 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_cache.h | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 61 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_gen.cpp | 19 |
6 files changed, 137 insertions, 70 deletions
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1d1581f49..65a88b06c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -2,11 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cstddef> #include <glad/glad.h> #include "common/logging/log.h" +#include "common/scope_exit.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { @@ -24,6 +27,7 @@ Device::Device() { max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); + has_component_indexing_bug = TestComponentIndexingBug(); } Device::Device(std::nullptr_t) { @@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_variable_aoffi = true; + has_component_indexing_bug = false; } bool Device::TestVariableAoffi() { @@ -52,4 +57,53 @@ void main() { return supported; } +bool Device::TestComponentIndexingBug() { + constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; + const GLchar* COMPONENT_TEST = R"(#version 430 core +layout (std430, binding = 0) buffer OutputBuffer { + uint output_value; +}; +layout (std140, binding = 0) uniform InputBuffer { + uvec4 input_value[4096]; +}; +layout (location = 0) uniform uint idx; +void main() { + output_value = input_value[idx >> 2][idx & 3]; +})"; + const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)}; + SCOPE_EXIT({ glDeleteProgram(shader); }); + glUseProgram(shader); + + OGLVertexArray vao; + vao.Create(); + glBindVertexArray(vao.handle); + + constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432}; + OGLBuffer ubo; + ubo.Create(); + glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle); + + OGLBuffer ssbo; + ssbo.Create(); + glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT); + + for (GLuint index = 4; index < 8; ++index) { + glInvalidateBufferData(ssbo.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle); + + glProgramUniform1ui(shader, 0, index); + glDrawArrays(GL_POINTS, 0, 1); + + GLuint result; + glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); + if (result != values.at(index)) { + LOG_INFO(Render_OpenGL, log_message, true); + return true; + } + } + LOG_INFO(Render_OpenGL, log_message, false); + return false; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de8490682..8c8c93760 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -30,13 +30,19 @@ public: return has_variable_aoffi; } + bool HasComponentIndexingBug() const { + return has_component_indexing_bug; + } + private: static bool TestVariableAoffi(); + static bool TestComponentIndexingBug(); std::size_t uniform_buffer_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_variable_aoffi{}; + bool has_component_indexing_bug{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d66252224..ac8a9e6b7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -35,8 +35,8 @@ struct UnspecializedShader { namespace { /// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { - const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; +GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { + const auto& gpu{system.GPU().Maxwell3D()}; const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; return gpu.regs.code_address.CodeAddress() + shader_config.offset; } @@ -350,7 +350,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, Core::Frontend::EmuWindow& emu_window, const Device& device) - : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} + : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, + disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -546,42 +547,45 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { - if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { - return last_shaders[static_cast<u32>(program)]; + if (!system.GPU().Maxwell3D().dirty_flags.shaders) { + return last_shaders[static_cast<std::size_t>(program)]; } - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(program)}; + auto& memory_manager{system.GPU().MemoryManager()}; + const GPUVAddr program_addr{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto& host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(program_addr)}; Shader shader{TryGet(host_ptr)}; + if (shader) { + return last_shaders[static_cast<std::size_t>(program)] = shader; + } - if (!shader) { - // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); - } - const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); - const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found != precompiled_shaders.end()) { - shader = - std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, - precompiled_programs, found->second, host_ptr); - } else { - shader = std::make_shared<CachedShader>( - device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, - std::move(program_code), std::move(program_code_b), host_ptr); - } - Register(shader); + // No shader found - create a new one + ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; + ProgramCode program_code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + program_code_b = GetShaderCode(memory_manager, program_addr_b, + memory_manager.GetPointer(program_addr_b)); + } + + const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); + const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; + const auto found = precompiled_shaders.find(unique_identifier); + if (found != precompiled_shaders.end()) { + // Create a shader from the cache + shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, + precompiled_programs, found->second, host_ptr); + } else { + // Create a shader from guest memory + shader = std::make_shared<CachedShader>( + device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, + std::move(program_code), std::move(program_code_b), host_ptr); } + Register(shader); - return last_shaders[static_cast<u32>(program)] = shader; + return last_shaders[static_cast<std::size_t>(program)] = shader; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 64e5a5594..09bd0761d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -137,6 +137,7 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats); + Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; ShaderDiskCacheOpenGL disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e9f8d40db..29de5c9db 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,7 +45,6 @@ struct TextureAoffi {}; using TextureArgument = std::pair<Type, Node>; using TextureIR = std::variant<TextureAoffi, TextureArgument>; -enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); @@ -247,6 +246,12 @@ private: code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); code.AddNewLine(); + code.AddLine("in gl_PerVertex {{"); + ++code.scope; + code.AddLine("vec4 gl_Position;"); + --code.scope; + code.AddLine("}} gl_in[];"); + DeclareVertexRedeclarations(); } @@ -349,7 +354,7 @@ private: } void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 generic_index{GetGenericAttributeIndex(index)}; + const u32 location{GetGenericAttributeIndex(index)}; std::string name{GetInputAttribute(index)}; if (stage == ShaderStage::Geometry) { @@ -358,19 +363,13 @@ private: std::string suffix; if (stage == ShaderStage::Fragment) { - const auto input_mode{header.ps.GetAttributeUse(generic_index)}; + const auto input_mode{header.ps.GetAttributeUse(location)}; if (skip_unused && input_mode == AttributeUse::Unused) { return; } suffix = GetInputFlags(input_mode); } - u32 location = generic_index; - if (stage != ShaderStage::Vertex) { - // If inputs are varyings, add an offset - location += GENERIC_VARYING_START_LOCATION; - } - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); } @@ -395,7 +394,7 @@ private: } void DeclareOutputAttribute(Attribute::Index index) { - const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; + const u32 location{GetGenericAttributeIndex(index)}; code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); } @@ -577,9 +576,26 @@ private: if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); - return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset); + code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); + + if (!device.HasComponentIndexingBug()) { + return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset); + } + + // AMD's proprietary GLSL compiler emits ill code for variable component access. + // To bypass this driver bug generate 4 ifs, one per each component. + const std::string pack = code.GenerateTemporary(); + code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + final_offset); + + const std::string result = code.GenerateTemporary(); + code.AddLine("float {};", result); + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, + pack, GetSwizzle(swizzle)); + } + return result; } UNREACHABLE_MSG("Unmanaged offset node type"); @@ -633,10 +649,14 @@ private: switch (attribute) { case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { - return GeometryPass("position") + GetSwizzle(element); - } else { + switch (stage) { + case ShaderStage::Geometry: + return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), + GetSwizzle(element)); + case ShaderStage::Fragment: return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); + default: + UNREACHABLE(); } case Attribute::Index::PointCoord: switch (element) { @@ -921,7 +941,7 @@ private: target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: - return "position"s + GetSwizzle(abuf->GetElement()); + return "gl_Position"s + GetSwizzle(abuf->GetElement()); case Attribute::Index::PointSize: return "gl_PointSize"; case Attribute::Index::ClipDistances0123: @@ -1506,9 +1526,7 @@ private: // If a geometry shader is attached, it will always flip (it's the last stage before // fragment). For more info about flipping, refer to gl_shader_gen.cpp. - code.AddLine("position.xy *= viewport_flip.xy;"); - code.AddLine("gl_Position = position;"); - code.AddLine("position.w = 1.0;"); + code.AddLine("gl_Position.xy *= viewport_flip.xy;"); code.AddLine("EmitVertex();"); return {}; } @@ -1746,8 +1764,7 @@ private: } u32 GetNumPhysicalVaryings() const { - return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION, - Maxwell::NumVaryings); + return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); } const Device& device; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index d2bb705a9..c845b29aa 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -23,8 +23,6 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu out += GetCommonDeclarations(); out += R"( -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding @@ -48,7 +46,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { out += R"( void main() { - position = vec4(0.0, 0.0, 0.0, 0.0); execute_vertex(); )"; @@ -59,19 +56,12 @@ void main() { out += R"( // Set Position Y direction - position.y *= utof(config_pack[2]); + gl_Position.y *= utof(config_pack[2]); // Check if the flip stage is VertexB // Config pack's second value is flip_stage if (config_pack[1] == 1) { // Viewport can be flipped, which is unsupported by glViewport - position.xy *= viewport_flip.xy; - } - gl_Position = position; - - // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 - // For now, this is here to bring order in lieu of proper emulation - if (config_pack[1] == 1) { - position.w = 1.0; + gl_Position.xy *= viewport_flip.xy; } })"; @@ -85,9 +75,6 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se out += GetCommonDeclarations(); out += R"( -layout (location = 0) in vec4 gs_position[]; -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding @@ -124,8 +111,6 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (location = 0) in noperspective vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding |
