diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
18 files changed, 284 insertions, 371 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 25652e794..48b86f3bd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -71,16 +71,6 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s return uploaded_offset; } -std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) { - AlignBuffer(alignment); - u8* const uploaded_ptr = buffer_ptr; - const GLintptr uploaded_offset = buffer_offset; - - buffer_ptr += size; - buffer_offset += size; - return std::make_tuple(uploaded_ptr, uploaded_offset); -} - bool OGLBufferCache::Map(std::size_t max_size) { bool invalidate; std::tie(buffer_ptr, buffer_offset_base, invalidate) = diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f9247a40e..f2347581b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -61,9 +61,6 @@ public: /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); - /// Reserves memory to be used by host's CPU. Returns mapped address and offset. - std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); - bool Map(std::size_t max_size); void Unmap(); diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 1d1581f49..65a88b06c 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -2,11 +2,14 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <array> #include <cstddef> #include <glad/glad.h> #include "common/logging/log.h" +#include "common/scope_exit.h" #include "video_core/renderer_opengl/gl_device.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { @@ -24,6 +27,7 @@ Device::Device() { max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger<u32>(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); + has_component_indexing_bug = TestComponentIndexingBug(); } Device::Device(std::nullptr_t) { @@ -31,6 +35,7 @@ Device::Device(std::nullptr_t) { max_vertex_attributes = 16; max_varyings = 15; has_variable_aoffi = true; + has_component_indexing_bug = false; } bool Device::TestVariableAoffi() { @@ -52,4 +57,53 @@ void main() { return supported; } +bool Device::TestComponentIndexingBug() { + constexpr char log_message[] = "Renderer_ComponentIndexingBug: {}"; + const GLchar* COMPONENT_TEST = R"(#version 430 core +layout (std430, binding = 0) buffer OutputBuffer { + uint output_value; +}; +layout (std140, binding = 0) uniform InputBuffer { + uvec4 input_value[4096]; +}; +layout (location = 0) uniform uint idx; +void main() { + output_value = input_value[idx >> 2][idx & 3]; +})"; + const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &COMPONENT_TEST)}; + SCOPE_EXIT({ glDeleteProgram(shader); }); + glUseProgram(shader); + + OGLVertexArray vao; + vao.Create(); + glBindVertexArray(vao.handle); + + constexpr std::array<GLuint, 8> values{0, 0, 0, 0, 0x1236327, 0x985482, 0x872753, 0x2378432}; + OGLBuffer ubo; + ubo.Create(); + glNamedBufferData(ubo.handle, sizeof(values), values.data(), GL_STATIC_DRAW); + glBindBufferBase(GL_UNIFORM_BUFFER, 0, ubo.handle); + + OGLBuffer ssbo; + ssbo.Create(); + glNamedBufferStorage(ssbo.handle, sizeof(GLuint), nullptr, GL_CLIENT_STORAGE_BIT); + + for (GLuint index = 4; index < 8; ++index) { + glInvalidateBufferData(ssbo.handle); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, ssbo.handle); + + glProgramUniform1ui(shader, 0, index); + glDrawArrays(GL_POINTS, 0, 1); + + GLuint result; + glGetNamedBufferSubData(ssbo.handle, 0, sizeof(result), &result); + if (result != values.at(index)) { + LOG_INFO(Render_OpenGL, log_message, true); + return true; + } + } + LOG_INFO(Render_OpenGL, log_message, false); + return false; +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index de8490682..8c8c93760 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -30,13 +30,19 @@ public: return has_variable_aoffi; } + bool HasComponentIndexingBug() const { + return has_component_indexing_bug; + } + private: static bool TestVariableAoffi(); + static bool TestComponentIndexingBug(); std::size_t uniform_buffer_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_variable_aoffi{}; + bool has_component_indexing_bug{}; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp b/src/video_core/renderer_opengl/gl_primitive_assembler.cpp deleted file mode 100644 index c3e94d917..000000000 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.cpp +++ /dev/null @@ -1,63 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <array> -#include "common/assert.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_buffer_cache.h" -#include "video_core/renderer_opengl/gl_primitive_assembler.h" - -namespace OpenGL { - -constexpr u32 TRIANGLES_PER_QUAD = 6; -constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3}; - -PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {} - -PrimitiveAssembler::~PrimitiveAssembler() = default; - -std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const { - ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4"); - return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint); -} - -GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) { - const std::size_t size{CalculateQuadSize(count)}; - auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size); - - for (u32 primitive = 0; primitive < count / 4; ++primitive) { - for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) { - const u32 index = first + primitive * 4 + QUAD_MAP[i]; - std::memcpy(dst_pointer, &index, sizeof(index)); - dst_pointer += sizeof(index); - } - } - - return index_offset; -} - -GLintptr PrimitiveAssembler::MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count) { - const std::size_t map_size{CalculateQuadSize(count)}; - auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); - - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const u8* source{memory_manager.GetPointer(gpu_addr)}; - - for (u32 primitive = 0; primitive < count / 4; ++primitive) { - for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { - const u32 index = primitive * 4 + QUAD_MAP[i]; - const u8* src_offset = source + (index * index_size); - - std::memcpy(dst_pointer, src_offset, index_size); - dst_pointer += index_size; - } - } - - return index_offset; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_primitive_assembler.h b/src/video_core/renderer_opengl/gl_primitive_assembler.h deleted file mode 100644 index 4e87ce4d6..000000000 --- a/src/video_core/renderer_opengl/gl_primitive_assembler.h +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <glad/glad.h> - -#include "common/common_types.h" - -namespace OpenGL { - -class OGLBufferCache; - -class PrimitiveAssembler { -public: - explicit PrimitiveAssembler(OGLBufferCache& buffer_cache); - ~PrimitiveAssembler(); - - /// Calculates the size required by MakeQuadArray and MakeQuadIndexed. - std::size_t CalculateQuadSize(u32 count) const; - - GLintptr MakeQuadArray(u32 first, u32 count); - - GLintptr MakeQuadIndexed(GPUVAddr gpu_addr, std::size_t index_size, u32 count); - -private: - OGLBufferCache& buffer_cache; -}; - -} // namespace OpenGL
\ No newline at end of file diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f9b6dfeea..d77426067 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -246,29 +246,6 @@ DrawParameters RasterizerOpenGL::SetupDraw() { DrawParameters params{}; params.current_instance = gpu.state.current_instance; - if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { - MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly); - - params.use_indexed = true; - params.primitive_mode = GL_TRIANGLES; - - if (is_indexed) { - params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); - params.count = (regs.index_array.count / 4) * 6; - params.index_buffer_offset = primitive_assembler.MakeQuadIndexed( - regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(), - regs.index_array.count); - params.base_vertex = static_cast<GLint>(regs.vb_element_base); - } else { - // MakeQuadArray always generates u32 indexes - params.index_format = GL_UNSIGNED_INT; - params.count = (regs.vertex_buffer.count / 4) * 6; - params.index_buffer_offset = primitive_assembler.MakeQuadArray( - regs.vertex_buffer.first, regs.vertex_buffer.count); - } - return params; - } - params.use_indexed = is_indexed; params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); @@ -345,9 +322,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); - SetupConstBuffers(stage_enum, shader, program_handle, base_bindings); - SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings); - SetupTextures(stage_enum, shader, program_handle, base_bindings); + SetupDrawConstBuffers(stage_enum, shader); + SetupGlobalRegions(stage_enum, shader); + SetupTextures(stage_enum, shader, base_bindings); // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen @@ -686,30 +663,19 @@ void RasterizerOpenGL::DrawArrays() { SyncCullMode(); SyncPrimitiveRestart(); SyncScissorTest(state); - // Alpha Testing is synced on shaders. SyncTransformFeedback(); SyncPointState(); - CheckAlphaTests(); SyncPolygonOffset(); - // TODO(bunnei): Sync framebuffer_scale uniform here - // TODO(bunnei): Sync scissorbox uniform(s) here + SyncAlphaTest(); // Draw the vertex batch const bool is_indexed = accelerate_draw == AccelDraw::Indexed; std::size_t buffer_size = CalculateVertexArraysSize(); - // Add space for index buffer (keeping in mind non-core primitives) - switch (regs.draw.topology) { - case Maxwell::PrimitiveTopology::Quads: - buffer_size = Common::AlignUp(buffer_size, 4) + - primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); - break; - default: - if (is_indexed) { - buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); - } - break; + // Add space for index buffer + if (is_indexed) { + buffer_size = Common::AlignUp(buffer_size, 4) + CalculateIndexBufferSize(); } // Uniform space for the 5 shader stages @@ -810,57 +776,55 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLuint program_handle, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& gpu = system.GPU(); - const auto& maxwell3d = gpu.Maxwell3D(); - const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; + const auto stage_index = static_cast<std::size_t>(stage); + const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; const auto& entries = shader->GetShaderEntries().const_buffers; // Upload only the enabled buffers from the 16 constbuffers of each shader stage for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { - const auto& used_buffer = entries[bindpoint]; - const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; - - if (!buffer.enabled) { - // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(0, 0, 0); - continue; - } + const auto& entry = entries[bindpoint]; + SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); + } +} - std::size_t size = 0; +void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry) { + if (!buffer.enabled) { + // Set values to zero to unbind buffers + bind_ubo_pushbuffer.Push(0, 0, 0); + return; + } - if (used_buffer.IsIndirect()) { - // Buffer is accessed indirectly, so upload the entire thing - size = buffer.size; + std::size_t size; + if (entry.IsIndirect()) { + // Buffer is accessed indirectly, so upload the entire thing + size = buffer.size; - if (size > MaxConstbufferSize) { - LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); - size = MaxConstbufferSize; - } - } else { - // Buffer is accessed directly, upload just what we use - size = used_buffer.GetSize(); + if (size > MaxConstbufferSize) { + LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); + size = MaxConstbufferSize; } + } else { + // Buffer is accessed directly, upload just what we use + size = entry.GetSize(); + } - // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 - // UBO alignment requirements. - size = Common::AlignUp(size, sizeof(GLvec4)); - ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); - - const GLintptr const_buffer_offset = - buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); + // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 + // UBO alignment requirements. + size = Common::AlignUp(size, sizeof(GLvec4)); + ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); - bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); - } + const std::size_t alignment = device.GetUniformBufferAlignment(); + const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); } void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLenum primitive_mode, - BaseBindings base_bindings) { + const Shader& shader) { const auto& entries = shader->GetShaderEntries().global_memory_entries; for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry{entries[bindpoint]}; @@ -874,7 +838,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade } void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings) { + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); @@ -1152,10 +1116,17 @@ void RasterizerOpenGL::SyncPolygonOffset() { state.polygon_offset.clamp = regs.polygon_offset_clamp; } -void RasterizerOpenGL::CheckAlphaTests() { +void RasterizerOpenGL::SyncAlphaTest() { const auto& regs = system.GPU().Maxwell3D().regs; UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, "Alpha Testing is enabled with more than one rendertarget"); + + state.alpha_test.enabled = regs.alpha_test_enabled; + if (!state.alpha_test.enabled) { + return; + } + state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func); + state.alpha_test.ref = regs.alpha_test_ref; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d78094138..f7671ff5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -17,17 +17,18 @@ #include <glad/glad.h> #include "common/common_types.h" +#include "video_core/engines/const_buffer_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_global_cache.h" -#include "video_core/renderer_opengl/gl_primitive_assembler.h" #include "video_core/renderer_opengl/gl_rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/utils.h" @@ -106,17 +107,20 @@ private: bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); /// Configures the current constbuffers to use for the draw command. - void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings); + void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader); + + /// Configures a constant buffer. + void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry); /// Configures the current global memory entries to use for the draw command. void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLenum primitive_mode, - BaseBindings base_bindings); + const Shader& shader); /// Configures the current textures to use for the draw command. void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings); + BaseBindings base_bindings); /// Syncs the viewport and depth range to match the guest state void SyncViewport(OpenGLState& current_state); @@ -167,8 +171,8 @@ private: /// Syncs the polygon offsets void SyncPolygonOffset(); - /// Check asserts for alpha testing. - void CheckAlphaTests(); + /// Syncs the alpha test state to match the guest state + void SyncAlphaTest(); /// Check for extension that are not strictly required /// but are needed for correct emulation @@ -197,7 +201,6 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - PrimitiveAssembler primitive_assembler{buffer_cache}; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index d66252224..ac8a9e6b7 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -35,8 +35,8 @@ struct UnspecializedShader { namespace { /// Gets the address for the specified shader stage program -GPUVAddr GetShaderAddress(Maxwell::ShaderProgram program) { - const auto& gpu{Core::System::GetInstance().GPU().Maxwell3D()}; +GPUVAddr GetShaderAddress(Core::System& system, Maxwell::ShaderProgram program) { + const auto& gpu{system.GPU().Maxwell3D()}; const auto& shader_config{gpu.regs.shader_config[static_cast<std::size_t>(program)]}; return gpu.regs.code_address.CodeAddress() + shader_config.offset; } @@ -350,7 +350,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode, ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, Core::Frontend::EmuWindow& emu_window, const Device& device) - : RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {} + : RasterizerCache{rasterizer}, system{system}, emu_window{emu_window}, device{device}, + disk_cache{system} {} void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { @@ -546,42 +547,45 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { - if (!Core::System::GetInstance().GPU().Maxwell3D().dirty_flags.shaders) { - return last_shaders[static_cast<u32>(program)]; + if (!system.GPU().Maxwell3D().dirty_flags.shaders) { + return last_shaders[static_cast<std::size_t>(program)]; } - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const GPUVAddr program_addr{GetShaderAddress(program)}; + auto& memory_manager{system.GPU().MemoryManager()}; + const GPUVAddr program_addr{GetShaderAddress(system, program)}; // Look up shader in the cache based on address - const auto& host_ptr{memory_manager.GetPointer(program_addr)}; + const auto host_ptr{memory_manager.GetPointer(program_addr)}; Shader shader{TryGet(host_ptr)}; + if (shader) { + return last_shaders[static_cast<std::size_t>(program)] = shader; + } - if (!shader) { - // No shader found - create a new one - ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; - ProgramCode program_code_b; - if (program == Maxwell::ShaderProgram::VertexA) { - const GPUVAddr program_addr_b{GetShaderAddress(Maxwell::ShaderProgram::VertexB)}; - program_code_b = GetShaderCode(memory_manager, program_addr_b, - memory_manager.GetPointer(program_addr_b)); - } - const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); - const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; - const auto found = precompiled_shaders.find(unique_identifier); - if (found != precompiled_shaders.end()) { - shader = - std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, - precompiled_programs, found->second, host_ptr); - } else { - shader = std::make_shared<CachedShader>( - device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, - std::move(program_code), std::move(program_code_b), host_ptr); - } - Register(shader); + // No shader found - create a new one + ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; + ProgramCode program_code_b; + if (program == Maxwell::ShaderProgram::VertexA) { + const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; + program_code_b = GetShaderCode(memory_manager, program_addr_b, + memory_manager.GetPointer(program_addr_b)); + } + + const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); + const VAddr cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; + const auto found = precompiled_shaders.find(unique_identifier); + if (found != precompiled_shaders.end()) { + // Create a shader from the cache + shader = std::make_shared<CachedShader>(cpu_addr, unique_identifier, program, disk_cache, + precompiled_programs, found->second, host_ptr); + } else { + // Create a shader from guest memory + shader = std::make_shared<CachedShader>( + device, cpu_addr, unique_identifier, program, disk_cache, precompiled_programs, + std::move(program_code), std::move(program_code_b), host_ptr); } + Register(shader); - return last_shaders[static_cast<u32>(program)] = shader; + return last_shaders[static_cast<std::size_t>(program)] = shader; } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 64e5a5594..09bd0761d 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -137,6 +137,7 @@ private: CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, const std::set<GLenum>& supported_formats); + Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; ShaderDiskCacheOpenGL disk_cache; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index e9f8d40db..7dc2e0560 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -45,7 +45,6 @@ struct TextureAoffi {}; using TextureArgument = std::pair<Type, Node>; using TextureIR = std::variant<TextureAoffi, TextureArgument>; -enum : u32 { POSITION_VARYING_LOCATION = 0, GENERIC_VARYING_START_LOCATION = 1 }; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); @@ -124,8 +123,8 @@ bool IsPrecise(Operation operand) { return false; } -bool IsPrecise(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { +bool IsPrecise(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { return IsPrecise(*operation); } return false; @@ -144,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) { return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); } +constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { + switch (stack) { + case MetaStackClass::Ssy: + return "ssy"; + case MetaStackClass::Pbk: + return "pbk"; + } + return {}; +} + +std::string FlowStackName(MetaStackClass stack) { + return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); +} + +std::string FlowStackTopName(MetaStackClass stack) { + return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); +} + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, @@ -174,8 +191,10 @@ public: // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. constexpr u32 FLOW_STACK_SIZE = 20; - code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); - code.AddLine("uint flow_stack_top = 0u;"); + for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { + code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); + code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); + } code.AddLine("while (true) {{"); ++code.scope; @@ -247,6 +266,12 @@ private: code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); code.AddNewLine(); + code.AddLine("in gl_PerVertex {{"); + ++code.scope; + code.AddLine("vec4 gl_Position;"); + --code.scope; + code.AddLine("}} gl_in[];"); + DeclareVertexRedeclarations(); } @@ -349,7 +374,7 @@ private: } void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { - const u32 generic_index{GetGenericAttributeIndex(index)}; + const u32 location{GetGenericAttributeIndex(index)}; std::string name{GetInputAttribute(index)}; if (stage == ShaderStage::Geometry) { @@ -358,19 +383,13 @@ private: std::string suffix; if (stage == ShaderStage::Fragment) { - const auto input_mode{header.ps.GetAttributeUse(generic_index)}; + const auto input_mode{header.ps.GetAttributeUse(location)}; if (skip_unused && input_mode == AttributeUse::Unused) { return; } suffix = GetInputFlags(input_mode); } - u32 location = generic_index; - if (stage != ShaderStage::Vertex) { - // If inputs are varyings, add an offset - location += GENERIC_VARYING_START_LOCATION; - } - code.AddLine("layout (location = {}) {} in vec4 {};", location, suffix, name); } @@ -395,7 +414,7 @@ private: } void DeclareOutputAttribute(Attribute::Index index) { - const u32 location{GetGenericAttributeIndex(index) + GENERIC_VARYING_START_LOCATION}; + const u32 location{GetGenericAttributeIndex(index)}; code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); } @@ -498,15 +517,15 @@ private: } void VisitBlock(const NodeBlock& bb) { - for (const Node node : bb) { + for (const auto& node : bb) { if (const std::string expr = Visit(node); !expr.empty()) { code.AddLine(expr); } } } - std::string Visit(Node node) { - if (const auto operation = std::get_if<OperationNode>(node)) { + std::string Visit(const Node& node) { + if (const auto operation = std::get_if<OperationNode>(&*node)) { const auto operation_index = static_cast<std::size_t>(operation->GetCode()); if (operation_index >= operation_decompilers.size()) { UNREACHABLE_MSG("Out of bounds operation: {}", operation_index); @@ -520,7 +539,7 @@ private: return (this->*decompiler)(*operation); } - if (const auto gpr = std::get_if<GprNode>(node)) { + if (const auto gpr = std::get_if<GprNode>(&*node)) { const u32 index = gpr->GetIndex(); if (index == Register::ZeroIndex) { return "0"; @@ -528,7 +547,7 @@ private: return GetRegister(index); } - if (const auto immediate = std::get_if<ImmediateNode>(node)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*node)) { const u32 value = immediate->GetValue(); if (value < 10) { // For eyecandy avoid using hex numbers on single digits @@ -537,7 +556,7 @@ private: return fmt::format("utof(0x{:x}u)", immediate->GetValue()); } - if (const auto predicate = std::get_if<PredicateNode>(node)) { + if (const auto predicate = std::get_if<PredicateNode>(&*node)) { const auto value = [&]() -> std::string { switch (const auto index = predicate->GetIndex(); index) { case Tegra::Shader::Pred::UnusedIndex: @@ -554,7 +573,7 @@ private: return value; } - if (const auto abuf = std::get_if<AbufNode>(node)) { + if (const auto abuf = std::get_if<AbufNode>(&*node)) { UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, "Physical attributes in geometry shaders are not implemented"); if (abuf->IsPhysicalBuffer()) { @@ -564,9 +583,9 @@ private: return ReadAttribute(abuf->GetIndex(), abuf->GetElement(), abuf->GetBuffer()); } - if (const auto cbuf = std::get_if<CbufNode>(node)) { + if (const auto cbuf = std::get_if<CbufNode>(&*node)) { const Node offset = cbuf->GetOffset(); - if (const auto immediate = std::get_if<ImmediateNode>(offset)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { // Direct access const u32 offset_imm = immediate->GetValue(); ASSERT_MSG(offset_imm % 4 == 0, "Unaligned cbuf direct access"); @@ -577,30 +596,47 @@ private: if (std::holds_alternative<OperationNode>(*offset)) { // Indirect access const std::string final_offset = code.GenerateTemporary(); - code.AddLine("uint {} = (ftou({}) / 4);", final_offset, Visit(offset)); - return fmt::format("{}[{} / 4][{} % 4]", GetConstBuffer(cbuf->GetIndex()), - final_offset, final_offset); + code.AddLine("uint {} = ftou({}) >> 2;", final_offset, Visit(offset)); + + if (!device.HasComponentIndexingBug()) { + return fmt::format("{}[{} >> 2][{} & 3]", GetConstBuffer(cbuf->GetIndex()), + final_offset, final_offset); + } + + // AMD's proprietary GLSL compiler emits ill code for variable component access. + // To bypass this driver bug generate 4 ifs, one per each component. + const std::string pack = code.GenerateTemporary(); + code.AddLine("vec4 {} = {}[{} >> 2];", pack, GetConstBuffer(cbuf->GetIndex()), + final_offset); + + const std::string result = code.GenerateTemporary(); + code.AddLine("float {};", result); + for (u32 swizzle = 0; swizzle < 4; ++swizzle) { + code.AddLine("if (({} & 3) == {}) {} = {}{};", final_offset, swizzle, result, + pack, GetSwizzle(swizzle)); + } + return result; } UNREACHABLE_MSG("Unmanaged offset node type"); } - if (const auto gmem = std::get_if<GmemNode>(node)) { + if (const auto gmem = std::get_if<GmemNode>(&*node)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); return fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset); } - if (const auto lmem = std::get_if<LmemNode>(node)) { + if (const auto lmem = std::get_if<LmemNode>(&*node)) { return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); } - if (const auto internal_flag = std::get_if<InternalFlagNode>(node)) { + if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { return GetInternalFlag(internal_flag->GetFlag()); } - if (const auto conditional = std::get_if<ConditionalNode>(node)) { + if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { // It's invalid to call conditional on nested nodes, use an operation instead code.AddLine("if ({}) {{", Visit(conditional->GetCondition())); ++code.scope; @@ -612,7 +648,7 @@ private: return {}; } - if (const auto comment = std::get_if<CommentNode>(node)) { + if (const auto comment = std::get_if<CommentNode>(&*node)) { return "// " + comment->GetText(); } @@ -620,7 +656,7 @@ private: return {}; } - std::string ReadAttribute(Attribute::Index attribute, u32 element, Node buffer = {}) { + std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { const auto GeometryPass = [&](std::string_view name) { if (stage == ShaderStage::Geometry && buffer) { // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games @@ -633,10 +669,14 @@ private: switch (attribute) { case Attribute::Index::Position: - if (stage != ShaderStage::Fragment) { - return GeometryPass("position") + GetSwizzle(element); - } else { + switch (stage) { + case ShaderStage::Geometry: + return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), + GetSwizzle(element)); + case ShaderStage::Fragment: return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); + default: + UNREACHABLE(); } case Attribute::Index::PointCoord: switch (element) { @@ -852,7 +892,7 @@ private: std::string expr = ", "; switch (type) { case Type::Int: - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { // Inline the string as an immediate integer in GLSL (some extra arguments are // required to be constant) expr += std::to_string(static_cast<s32>(immediate->GetValue())); @@ -884,7 +924,7 @@ private: for (std::size_t index = 0; index < aoffi.size(); ++index) { const auto operand{aoffi.at(index)}; - if (const auto immediate = std::get_if<ImmediateNode>(operand)) { + if (const auto immediate = std::get_if<ImmediateNode>(&*operand)) { // Inline the string as an immediate integer in GLSL (AOFFI arguments are required // to be constant by the standard). expr += std::to_string(static_cast<s32>(immediate->GetValue())); @@ -905,23 +945,23 @@ private: } std::string Assign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; std::string target; - if (const auto gpr = std::get_if<GprNode>(dest)) { + if (const auto gpr = std::get_if<GprNode>(&*dest)) { if (gpr->GetIndex() == Register::ZeroIndex) { // Writing to Register::ZeroIndex is a no op return {}; } target = GetRegister(gpr->GetIndex()); - } else if (const auto abuf = std::get_if<AbufNode>(dest)) { + } else if (const auto abuf = std::get_if<AbufNode>(&*dest)) { UNIMPLEMENTED_IF(abuf->IsPhysicalBuffer()); target = [&]() -> std::string { switch (const auto attribute = abuf->GetIndex(); abuf->GetIndex()) { case Attribute::Index::Position: - return "position"s + GetSwizzle(abuf->GetElement()); + return "gl_Position"s + GetSwizzle(abuf->GetElement()); case Attribute::Index::PointSize: return "gl_PointSize"; case Attribute::Index::ClipDistances0123: @@ -937,9 +977,9 @@ private: return "0"; } }(); - } else if (const auto lmem = std::get_if<LmemNode>(dest)) { + } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); - } else if (const auto gmem = std::get_if<GmemNode>(dest)) { + } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { const std::string real = Visit(gmem->GetRealAddress()); const std::string base = Visit(gmem->GetBaseAddress()); const std::string final_offset = fmt::format("(ftou({}) - ftou({})) / 4", real, base); @@ -1216,12 +1256,12 @@ private: } std::string LogicalAssign(Operation operation) { - const Node dest = operation[0]; - const Node src = operation[1]; + const Node& dest = operation[0]; + const Node& src = operation[1]; std::string target; - if (const auto pred = std::get_if<PredicateNode>(dest)) { + if (const auto pred = std::get_if<PredicateNode>(&*dest)) { ASSERT_MSG(!pred->IsNegated(), "Negating logical assignment"); const auto index = pred->GetIndex(); @@ -1232,7 +1272,7 @@ private: return {}; } target = GetPredicate(index); - } else if (const auto flag = std::get_if<InternalFlagNode>(dest)) { + } else if (const auto flag = std::get_if<InternalFlagNode>(&*dest)) { target = GetInternalFlag(flag->GetFlag()); } @@ -1409,7 +1449,7 @@ private: } std::string Branch(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); code.AddLine("jmp_to = 0x{:x}u;", target->GetValue()); @@ -1418,15 +1458,18 @@ private: } std::string PushFlowStack(Operation operation) { - const auto target = std::get_if<ImmediateNode>(operation[0]); + const auto stack = std::get<MetaStackClass>(operation.GetMeta()); + const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); + code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), + target->GetValue()); return {}; } std::string PopFlowStack(Operation operation) { - code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); + const auto stack = std::get<MetaStackClass>(operation.GetMeta()); + code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); code.AddLine("break;"); return {}; } @@ -1447,27 +1490,9 @@ private: UNIMPLEMENTED_IF_MSG(header.ps.omap.sample_mask != 0, "Sample mask write is unimplemented"); - code.AddLine("if (alpha_test[0] != 0) {{"); - ++code.scope; - // We start on the register containing the alpha value in the first RT. - u32 current_reg = 3; - for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { - // TODO(Blinkhawk): verify the behavior of alpha testing on hardware when - // multiple render targets are used. - if (header.ps.IsColorComponentOutputEnabled(render_target, 0) || - header.ps.IsColorComponentOutputEnabled(render_target, 1) || - header.ps.IsColorComponentOutputEnabled(render_target, 2) || - header.ps.IsColorComponentOutputEnabled(render_target, 3)) { - code.AddLine("if (!AlphaFunc({})) discard;", SafeGetRegister(current_reg)); - current_reg += 4; - } - } - --code.scope; - code.AddLine("}}"); - // Write the color outputs using the data in the shader registers, disabled // rendertargets/components are skipped in the register assignment. - current_reg = 0; + u32 current_reg = 0; for (u32 render_target = 0; render_target < Maxwell::NumRenderTargets; ++render_target) { // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { @@ -1506,9 +1531,7 @@ private: // If a geometry shader is attached, it will always flip (it's the last stage before // fragment). For more info about flipping, refer to gl_shader_gen.cpp. - code.AddLine("position.xy *= viewport_flip.xy;"); - code.AddLine("gl_Position = position;"); - code.AddLine("position.w = 1.0;"); + code.AddLine("gl_Position.xy *= viewport_flip.xy;"); code.AddLine("EmitVertex();"); return {}; } @@ -1746,8 +1769,7 @@ private: } u32 GetNumPhysicalVaryings() const { - return std::min<u32>(device.GetMaxVaryings() - GENERIC_VARYING_START_LOCATION, - Maxwell::NumVaryings); + return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); } const Device& device; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index d2bb705a9..9148629ec 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -23,12 +23,9 @@ ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setu out += GetCommonDeclarations(); out += R"( -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; )"; @@ -48,7 +45,6 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { out += R"( void main() { - position = vec4(0.0, 0.0, 0.0, 0.0); execute_vertex(); )"; @@ -59,19 +55,12 @@ void main() { out += R"( // Set Position Y direction - position.y *= utof(config_pack[2]); + gl_Position.y *= utof(config_pack[2]); // Check if the flip stage is VertexB // Config pack's second value is flip_stage if (config_pack[1] == 1) { // Viewport can be flipped, which is unsupported by glViewport - position.xy *= viewport_flip.xy; - } - gl_Position = position; - - // TODO(bunnei): This is likely a hack, position.w should be interpolated as 1.0 - // For now, this is here to bring order in lieu of proper emulation - if (config_pack[1] == 1) { - position.w = 1.0; + gl_Position.xy *= viewport_flip.xy; } })"; @@ -85,13 +74,9 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se out += GetCommonDeclarations(); out += R"( -layout (location = 0) in vec4 gs_position[]; -layout (location = 0) out vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; )"; @@ -124,38 +109,11 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (location = 0) in noperspective vec4 position; - layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { vec4 viewport_flip; uvec4 config_pack; // instance_id, flip_stage, y_direction, padding - uvec4 alpha_test; }; -bool AlphaFunc(in float value) { - float ref = uintBitsToFloat(alpha_test[2]); - switch (alpha_test[1]) { - case 1: - return false; - case 2: - return value < ref; - case 3: - return value == ref; - case 4: - return value <= ref; - case 5: - return value > ref; - case 6: - return value != ref; - case 7: - return value >= ref; - case 8: - return true; - default: - return false; - } -} - )"; const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET); ProgramResult program = diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 05ab01dcb..b05f90f20 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -48,17 +48,6 @@ void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell, std::size_t shade viewport_flip[0] = regs.viewport_transform[0].scale_x < 0.0 ? -1.0f : 1.0f; viewport_flip[1] = regs.viewport_transform[0].scale_y < 0.0 ? -1.0f : 1.0f; - auto func{static_cast<u32>(regs.alpha_test_func)}; - // Normalize the gl variants of opCompare to be the same as the normal variants - const u32 op_gl_variant_base = static_cast<u32>(Maxwell3D::Regs::ComparisonOp::Never); - if (func >= op_gl_variant_base) { - func = func - op_gl_variant_base + 1U; - } - - alpha_test.enabled = regs.alpha_test_enabled; - alpha_test.func = func; - alpha_test.ref = regs.alpha_test_ref; - instance_id = state.current_instance; // Assign in which stage the position has to be flipped diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index cec18a832..6961e702a 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -27,14 +27,8 @@ struct MaxwellUniformData { GLuint flip_stage; GLfloat y_direction; }; - struct alignas(16) { - GLuint enabled; - GLuint func; - GLfloat ref; - GLuint padding; - } alpha_test; }; -static_assert(sizeof(MaxwellUniformData) == 48, "MaxwellUniformData structure size is incorrect"); +static_assert(sizeof(MaxwellUniformData) == 32, "MaxwellUniformData structure size is incorrect"); static_assert(sizeof(MaxwellUniformData) < 16384, "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 7425fbe5d..d86e137ac 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -156,6 +156,10 @@ OpenGLState::OpenGLState() { polygon_offset.factor = 0.0f; polygon_offset.units = 0.0f; polygon_offset.clamp = 0.0f; + + alpha_test.enabled = false; + alpha_test.func = GL_ALWAYS; + alpha_test.ref = 0.0f; } void OpenGLState::ApplyDefaultState() { @@ -461,6 +465,14 @@ void OpenGLState::ApplyPolygonOffset() const { } } +void OpenGLState::ApplyAlphaTest() const { + Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled); + if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref), + std::tie(alpha_test.func, alpha_test.ref))) { + glAlphaFunc(alpha_test.func, alpha_test.ref); + } +} + void OpenGLState::ApplyTextures() const { bool has_delta{}; std::size_t first{}; @@ -533,6 +545,7 @@ void OpenGLState::Apply() const { ApplyTextures(); ApplySamplers(); ApplyPolygonOffset(); + ApplyAlphaTest(); } void OpenGLState::EmulateViewportWithScissor() { diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index 41418a7b8..b0140495d 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -172,6 +172,12 @@ public: GLfloat clamp; } polygon_offset; + struct { + bool enabled; // GL_ALPHA_TEST + GLenum func; // GL_ALPHA_TEST_FUNC + GLfloat ref; // GL_ALPHA_TEST_REF + } alpha_test; + std::array<bool, 8> clip_distance; // GL_CLIP_DISTANCE OpenGLState(); @@ -215,6 +221,7 @@ public: void ApplySamplers() const; void ApplyDepthClamp() const; void ApplyPolygonOffset() const; + void ApplyAlphaTest() const; /// Set the initial OpenGL state static void ApplyDefaultState(); diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index ed7b5cff0..ea77dd211 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -128,6 +128,8 @@ inline GLenum PrimitiveTopology(Maxwell::PrimitiveTopology topology) { return GL_TRIANGLE_STRIP; case Maxwell::PrimitiveTopology::TriangleFan: return GL_TRIANGLE_FAN; + case Maxwell::PrimitiveTopology::Quads: + return GL_QUADS; default: LOG_CRITICAL(Render_OpenGL, "Unimplemented topology={}", static_cast<u32>(topology)); UNREACHABLE(); @@ -173,11 +175,8 @@ inline GLenum WrapMode(Tegra::Texture::WrapMode wrap_mode) { return GL_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::Border: return GL_CLAMP_TO_BORDER; - case Tegra::Texture::WrapMode::ClampOGL: - // TODO(Subv): GL_CLAMP was removed as of OpenGL 3.1, to implement GL_CLAMP, we can use - // GL_CLAMP_TO_BORDER to get the border color of the texture, and then sample the edge to - // manually mix them. However the shader part of this is not yet implemented. - return GL_CLAMP_TO_BORDER; + case Tegra::Texture::WrapMode::Clamp: + return GL_CLAMP; case Tegra::Texture::WrapMode::MirrorOnceClampToEdge: return GL_MIRROR_CLAMP_TO_EDGE; case Tegra::Texture::WrapMode::MirrorOnceBorder: diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 3451d321d..aafd6f31b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -18,7 +18,6 @@ #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" -#include "core/tracer/recorder.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/renderer_opengl.h" |
