diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
29 files changed, 2449 insertions, 2633 deletions
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp index 874ed3c6e..b8a512cb6 100644 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp @@ -11,7 +11,6 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" -#include "video_core/renderer_opengl/gl_state.h" namespace OpenGL { @@ -36,8 +35,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK framebuffer.Create(); // TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs. - local_state.draw.draw_framebuffer = framebuffer.handle; - local_state.ApplyFramebufferState(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle); if (key.zeta) { const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil; diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h index 02ec80ae9..8f698fee0 100644 --- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h +++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h @@ -13,7 +13,6 @@ #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_texture_cache.h" namespace OpenGL { @@ -63,7 +62,6 @@ public: private: OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key); - OpenGLState local_state; std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e1965fb21..826eee7df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -28,7 +28,6 @@ #include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/renderer_opengl.h" @@ -36,6 +35,7 @@ namespace OpenGL { using Maxwell = Tegra::Engines::Maxwell3D::Regs; +using Tegra::Engines::ShaderType; using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceTarget; using VideoCore::Surface::SurfaceType; @@ -54,10 +54,11 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255 namespace { +constexpr std::size_t NumSupportedVertexAttributes = 16; + template <typename Engine, typename Entry> Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, - Tegra::Engines::ShaderType shader_type, - std::size_t index = 0) { + ShaderType shader_type, std::size_t index = 0) { if (entry.IsBindless()) { const Tegra::Texture::TextureHandle tex_handle = engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset()); @@ -74,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry } std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, - const GLShader::ConstBufferEntry& entry) { + const ConstBufferEntry& entry) { if (!entry.IsIndirect()) { return entry.GetSize(); } @@ -88,18 +89,19 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, return buffer.size; } +void oglEnable(GLenum cap, bool state) { + (state ? glEnable : glDisable)(cap); +} + } // Anonymous namespace RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - ScreenInfo& info) - : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device}, + ScreenInfo& info, GLShader::ProgramManager& program_manager, + StateTracker& state_tracker) + : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker}, shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system}, - screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { - shader_program_manager = std::make_unique<GLShader::ProgramManager>(); - state.draw.shader_program = 0; - state.Apply(); - - LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here"); + screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker}, + buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} { CheckExtensions(); } @@ -113,93 +115,72 @@ void RasterizerOpenGL::CheckExtensions() { } } -GLuint RasterizerOpenGL::SetupVertexFormat() { +void RasterizerOpenGL::SetupVertexFormat() { auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; - - if (!gpu.dirty.vertex_attrib_format) { - return state.draw.vertex_array; + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::VertexFormats]) { + return; } - gpu.dirty.vertex_attrib_format = false; + flags[Dirty::VertexFormats] = false; MICROPROFILE_SCOPE(OpenGL_VAO); - auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format); - auto& vao_entry = iter->second; - - if (is_cache_miss) { - vao_entry.Create(); - const GLuint vao = vao_entry.handle; - - // Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob - // that fails to properly create the vertex array if it's not bound even after creating it - // with glCreateVertexArrays - state.draw.vertex_array = vao; - state.ApplyVertexArrayState(); - - // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. - // Enables the first 16 vertex attributes always, as we don't know which ones are actually - // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 - // for now to avoid OpenGL errors. - // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't - // assume every shader uses them all. - for (u32 index = 0; index < 16; ++index) { - const auto& attrib = regs.vertex_attrib_format[index]; - - // Ignore invalid attributes. - if (!attrib.IsValid()) - continue; - - const auto& buffer = regs.vertex_array[attrib.buffer]; - LOG_TRACE(Render_OpenGL, - "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", - index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), - attrib.offset.Value(), attrib.IsNormalized()); - - ASSERT(buffer.IsEnabled()); - - glEnableVertexArrayAttrib(vao, index); - if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt || - attrib.type == - Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) { - glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(), - MaxwellToGL::VertexType(attrib), attrib.offset); - } else { - glVertexArrayAttribFormat( - vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), - attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); - } - glVertexArrayAttribBinding(vao, index, attrib.buffer); + // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables + // the first 16 vertex attributes always, as we don't know which ones are actually used until + // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to + // avoid OpenGL errors. + // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't + // assume every shader uses them all. + for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + if (!flags[Dirty::VertexFormat0 + index]) { + continue; } - } + flags[Dirty::VertexFormat0 + index] = false; + + const auto attrib = gpu.regs.vertex_attrib_format[index]; + const auto gl_index = static_cast<GLuint>(index); - // Rebinding the VAO invalidates the vertex buffer bindings. - gpu.dirty.ResetVertexArrays(); + // Ignore invalid attributes. + if (!attrib.IsValid()) { + glDisableVertexAttribArray(gl_index); + continue; + } + glEnableVertexAttribArray(gl_index); - state.draw.vertex_array = vao_entry.handle; - return vao_entry.handle; + if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt || + attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) { + glVertexAttribIFormat(gl_index, attrib.ComponentCount(), + MaxwellToGL::VertexType(attrib), attrib.offset); + } else { + glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), + attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); + } + glVertexAttribBinding(gl_index, attrib.buffer); + } } -void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { +void RasterizerOpenGL::SetupVertexBuffer() { auto& gpu = system.GPU().Maxwell3D(); - if (!gpu.dirty.vertex_array_buffers) + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::VertexBuffers]) { return; - gpu.dirty.vertex_array_buffers = false; - - const auto& regs = gpu.regs; + } + flags[Dirty::VertexBuffers] = false; MICROPROFILE_SCOPE(OpenGL_VB); // Upload all guest vertex arrays sequentially to our buffer - for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (!gpu.dirty.vertex_array[index]) + const auto& regs = gpu.regs; + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + if (!flags[Dirty::VertexBuffer0 + index]) { continue; - gpu.dirty.vertex_array[index] = false; - gpu.dirty.vertex_instance[index] = false; + } + flags[Dirty::VertexBuffer0 + index] = false; const auto& vertex_array = regs.vertex_array[index]; - if (!vertex_array.IsEnabled()) + if (!vertex_array.IsEnabled()) { continue; + } const GPUVAddr start = vertex_array.StartAddress(); const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); @@ -209,42 +190,30 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); // Bind the vertex array to the buffer at the current offset. - vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, - vertex_array.stride); - - if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { - // Enable vertex buffer instancing with the specified divisor. - glVertexArrayBindingDivisor(vao, index, vertex_array.divisor); - } else { - // Disable the vertex buffer instancing. - glVertexArrayBindingDivisor(vao, index, 0); - } + vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer, + vertex_buffer_offset, vertex_array.stride); } } -void RasterizerOpenGL::SetupVertexInstances(GLuint vao) { +void RasterizerOpenGL::SetupVertexInstances() { auto& gpu = system.GPU().Maxwell3D(); - - if (!gpu.dirty.vertex_instances) + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::VertexInstances]) { return; - gpu.dirty.vertex_instances = false; + } + flags[Dirty::VertexInstances] = false; const auto& regs = gpu.regs; - // Upload all guest vertex arrays sequentially to our buffer - for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (!gpu.dirty.vertex_instance[index]) + for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) { + if (!flags[Dirty::VertexInstance0 + index]) { continue; - - gpu.dirty.vertex_instance[index] = false; - - if (regs.instanced_arrays.IsInstancingEnabled(index) && - regs.vertex_array[index].divisor != 0) { - // Enable vertex buffer instancing with the specified divisor. - glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor); - } else { - // Disable the vertex buffer instancing. - glVertexArrayBindingDivisor(vao, index, 0); } + flags[Dirty::VertexInstance0 + index] = false; + + const auto gl_index = static_cast<GLuint>(index); + const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index); + const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0; + glVertexBindingDivisor(gl_index, divisor); } } @@ -260,8 +229,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() { void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); - - std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + u32 clip_distances = 0; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; @@ -271,10 +239,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { if (!gpu.regs.IsShaderConfigEnabled(index)) { switch (program) { case Maxwell::ShaderProgram::Geometry: - shader_program_manager->UseTrivialGeometryShader(); + program_manager.UseGeometryShader(0); break; case Maxwell::ShaderProgram::Fragment: - shader_program_manager->UseTrivialFragmentShader(); + program_manager.UseFragmentShader(0); break; default: break; @@ -299,19 +267,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { SetupDrawTextures(stage, shader); SetupDrawImages(stage, shader); - const ProgramVariant variant(primitive_mode); - const auto program_handle = shader->GetHandle(variant); - + const GLuint program_handle = shader->GetHandle(); switch (program) { case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexB: - shader_program_manager->UseProgrammableVertexShader(program_handle); + program_manager.UseVertexShader(program_handle); break; case Maxwell::ShaderProgram::Geometry: - shader_program_manager->UseProgrammableGeometryShader(program_handle); + program_manager.UseGeometryShader(program_handle); break; case Maxwell::ShaderProgram::Fragment: - shader_program_manager->UseProgrammableFragmentShader(program_handle); + program_manager.UseFragmentShader(program_handle); break; default: UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index, @@ -322,9 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // When a clip distance is enabled but not set in the shader it crops parts of the screen // (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the // clip distances only when it's written by a shader stage. - for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) { - clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i]; - } + clip_distances |= shader->GetEntries().clip_distances; // When VertexA is enabled, we have dual vertex shaders if (program == Maxwell::ShaderProgram::VertexA) { @@ -334,8 +298,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } SyncClipEnabled(clip_distances); - - gpu.dirty.shaders = false; + gpu.dirty.flags[Dirty::Shaders] = false; } std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { @@ -368,20 +331,23 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading, shader_cache.LoadDiskCache(stop_loading, callback); } +void RasterizerOpenGL::SetupDirtyFlags() { + state_tracker.Initialize(); +} + void RasterizerOpenGL::ConfigureFramebuffers() { MICROPROFILE_SCOPE(OpenGL_Framebuffer); auto& gpu = system.GPU().Maxwell3D(); - if (!gpu.dirty.render_settings) { + if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) { return; } - gpu.dirty.render_settings = false; + gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false; texture_cache.GuardRenderTargets(true); View depth_surface = texture_cache.GetDepthBufferSurface(true); const auto& regs = gpu.regs; - state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0; UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0); // Bind the framebuffer surfaces @@ -409,14 +375,11 @@ void RasterizerOpenGL::ConfigureFramebuffers() { texture_cache.GuardRenderTargets(false); - state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); - SyncViewport(state); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); } -void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, - bool using_depth_fb, bool using_stencil_fb) { - using VideoCore::Surface::SurfaceType; - +void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, + bool using_stencil_fb) { auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; @@ -435,80 +398,44 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo key.colors[0] = color_surface; key.zeta = depth_surface; - current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key); - current_state.ApplyFramebufferState(); + state_tracker.NotifyFramebuffer(); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key)); } void RasterizerOpenGL::Clear() { - const auto& maxwell3d = system.GPU().Maxwell3D(); - - if (!maxwell3d.ShouldExecute()) { + const auto& gpu = system.GPU().Maxwell3D(); + if (!gpu.ShouldExecute()) { return; } - const auto& regs = maxwell3d.regs; + const auto& regs = gpu.regs; bool use_color{}; bool use_depth{}; bool use_stencil{}; - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ - prev_state.AllDirty(); - prev_state.Apply(); - }); - - OpenGLState clear_state{OpenGLState::GetCurState()}; - clear_state.SetDefaultViewports(); if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || regs.clear_buffers.A) { use_color = true; } if (use_color) { - clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE; - clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE; - clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE; - clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE; + state_tracker.NotifyColorMask0(); + glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0, + regs.clear_buffers.B != 0, regs.clear_buffers.A != 0); + + // TODO(Rodrigo): Determine if clamping is used on clears + SyncFragmentColorClampState(); + SyncFramebufferSRGB(); } if (regs.clear_buffers.Z) { ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!"); use_depth = true; - // Always enable the depth write when clearing the depth buffer. The depth write mask is - // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to - // true. - clear_state.depth.test_enabled = true; - clear_state.depth.test_func = GL_ALWAYS; - clear_state.depth.write_mask = GL_TRUE; + state_tracker.NotifyDepthMask(); + glDepthMask(GL_TRUE); } if (regs.clear_buffers.S) { - ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!"); + ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!"); use_stencil = true; - clear_state.stencil.test_enabled = true; - - if (regs.clear_flags.stencil) { - // Stencil affects the clear so fill it with the used masks - clear_state.stencil.front.test_func = GL_ALWAYS; - clear_state.stencil.front.test_mask = regs.stencil_front_func_mask; - clear_state.stencil.front.action_stencil_fail = GL_KEEP; - clear_state.stencil.front.action_depth_fail = GL_KEEP; - clear_state.stencil.front.action_depth_pass = GL_KEEP; - clear_state.stencil.front.write_mask = regs.stencil_front_mask; - if (regs.stencil_two_side_enable) { - clear_state.stencil.back.test_func = GL_ALWAYS; - clear_state.stencil.back.test_mask = regs.stencil_back_func_mask; - clear_state.stencil.back.action_stencil_fail = GL_KEEP; - clear_state.stencil.back.action_depth_fail = GL_KEEP; - clear_state.stencil.back.action_depth_pass = GL_KEEP; - clear_state.stencil.back.write_mask = regs.stencil_back_mask; - } else { - clear_state.stencil.back.test_func = GL_ALWAYS; - clear_state.stencil.back.test_mask = 0xFFFFFFFF; - clear_state.stencil.back.write_mask = 0xFFFFFFFF; - clear_state.stencil.back.action_stencil_fail = GL_KEEP; - clear_state.stencil.back.action_depth_fail = GL_KEEP; - clear_state.stencil.back.action_depth_pass = GL_KEEP; - } - } } if (!use_color && !use_depth && !use_stencil) { @@ -516,20 +443,18 @@ void RasterizerOpenGL::Clear() { return; } - ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil); + SyncRasterizeEnable(); - SyncViewport(clear_state); - SyncRasterizeEnable(clear_state); if (regs.clear_flags.scissor) { - SyncScissorTest(clear_state); + SyncScissorTest(); + } else { + state_tracker.NotifyScissor0(); + glDisablei(GL_SCISSOR_TEST, 0); } - if (regs.clear_flags.viewport) { - clear_state.EmulateViewportWithScissor(); - } + UNIMPLEMENTED_IF(regs.clear_flags.viewport); - clear_state.AllDirty(); - clear_state.Apply(); + ConfigureClearFramebuffer(use_color, use_depth, use_stencil); if (use_color) { glClearBufferfv(GL_COLOR, 0, regs.clear_color); @@ -549,25 +474,27 @@ void RasterizerOpenGL::Clear() { void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { MICROPROFILE_SCOPE(OpenGL_Drawing); auto& gpu = system.GPU().Maxwell3D(); - const auto& regs = gpu.regs; query_cache.UpdateCounters(); - SyncRasterizeEnable(state); + SyncViewport(); + SyncRasterizeEnable(); + SyncPolygonModes(); SyncColorMask(); SyncFragmentColorClampState(); SyncMultiSampleState(); SyncDepthTestState(); + SyncDepthClamp(); SyncStencilTestState(); SyncBlendState(); SyncLogicOpState(); SyncCullMode(); SyncPrimitiveRestart(); - SyncScissorTest(state); - SyncTransformFeedback(); + SyncScissorTest(); SyncPointState(); SyncPolygonOffset(); SyncAlphaTest(); + SyncFramebufferSRGB(); buffer_cache.Acquire(); @@ -591,14 +518,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Map(buffer_size); // Prepare vertex array format. - const GLuint vao = SetupVertexFormat(); - vertex_array_pushbuffer.Setup(vao); + SetupVertexFormat(); + vertex_array_pushbuffer.Setup(); // Upload vertex and index data. - SetupVertexBuffer(vao); - SetupVertexInstances(vao); - - GLintptr index_buffer_offset; + SetupVertexBuffer(); + SetupVertexInstances(); + GLintptr index_buffer_offset = 0; if (is_indexed) { index_buffer_offset = SetupIndexBuffer(); } @@ -624,27 +550,20 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { ConfigureFramebuffers(); // Signal the buffer cache that we are not going to upload more things. - const bool invalidate = buffer_cache.Unmap(); + buffer_cache.Unmap(); // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. vertex_array_pushbuffer.Bind(); bind_ubo_pushbuffer.Bind(); bind_ssbo_pushbuffer.Bind(); - if (invalidate) { - // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty.ResetVertexArrays(); - } - gpu.dirty.memory_general = false; - - shader_program_manager->ApplyTo(state); - state.Apply(); + program_manager.BindGraphicsPipeline(); if (texture_cache.TextureBarrier()) { glTextureBarrier(); } - ++num_queued_commands; + BeginTransformFeedback(primitive_mode); const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); const GLsizei num_instances = @@ -683,6 +602,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } + + EndTransformFeedback(); + + ++num_queued_commands; } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { @@ -695,13 +618,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { auto kernel = shader_cache.GetComputeKernel(code_addr); SetupComputeTextures(kernel); SetupComputeImages(kernel); - - const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, - launch_desc.block_dim_z, launch_desc.shared_alloc, - launch_desc.local_pos_alloc); - state.draw.shader_program = kernel->GetHandle(variant); - state.draw.program_pipeline = 0; + program_manager.BindComputeShader(kernel->GetHandle()); const std::size_t buffer_size = Tegra::Engines::KeplerCompute::NumConstBuffers * @@ -719,11 +636,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { bind_ubo_pushbuffer.Bind(); bind_ssbo_pushbuffer.Bind(); - state.ApplyTextures(); - state.ApplyImages(); - state.ApplyShaderProgram(); - state.ApplyProgramPipeline(); - + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); ++num_queued_commands; } @@ -828,7 +741,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad const auto& shader_stage = stages[stage_index]; u32 binding = device.GetBaseBindings(stage_index).uniform_buffer; - for (const auto& entry : shader->GetShaderEntries().const_buffers) { + for (const auto& entry : shader->GetEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; SetupConstBuffer(binding++, buffer, entry); } @@ -839,7 +752,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { const auto& launch_desc = system.GPU().KeplerCompute().launch_description; u32 binding = 0; - for (const auto& entry : kernel->GetShaderEntries().const_buffers) { + for (const auto& entry : kernel->GetEntries().const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); Tegra::Engines::ConstBufferInfo buffer; @@ -851,7 +764,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { } void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, - const GLShader::ConstBufferEntry& entry) { + const ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, @@ -875,7 +788,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; - for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { + for (const auto& entry : shader->GetEntries().global_memory_entries) { const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read<u64>(addr)}; const auto size{memory_manager.Read<u32>(addr + 8)}; @@ -889,7 +802,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; u32 binding = 0; - for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { + for (const auto& entry : kernel->GetEntries().global_memory_entries) { const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read<u64>(addr)}; const auto size{memory_manager.Read<u32>(addr + 8)}; @@ -897,7 +810,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { } } -void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, +void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto [ssbo, buffer_offset] = @@ -909,16 +822,11 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& MICROPROFILE_SCOPE(OpenGL_Texture); const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).sampler; - for (const auto& entry : shader->GetShaderEntries().samplers) { - const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); - if (!entry.IsIndexed()) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); + for (const auto& entry : shader->GetEntries().samplers) { + const auto shader_type = static_cast<ShaderType>(stage_index); + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); SetupTexture(binding++, texture, entry); - } else { - for (std::size_t i = 0; i < entry.Size(); ++i) { - const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i); - SetupTexture(binding++, texture, entry); - } } } } @@ -927,46 +835,39 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; - for (const auto& entry : kernel->GetShaderEntries().samplers) { - if (!entry.IsIndexed()) { - const auto texture = - GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); + for (const auto& entry : kernel->GetEntries().samplers) { + for (std::size_t i = 0; i < entry.Size(); ++i) { + const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i); SetupTexture(binding++, texture, entry); - } else { - for (std::size_t i = 0; i < entry.Size(); ++i) { - const auto texture = - GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i); - SetupTexture(binding++, texture, entry); - } } } } void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const GLShader::SamplerEntry& entry) { + const SamplerEntry& entry) { const auto view = texture_cache.GetTextureSurface(texture.tic, entry); if (!view) { // Can occur when texture addr is null or its memory is unmapped/invalid - state.samplers[binding] = 0; - state.textures[binding] = 0; + glBindSampler(binding, 0); + glBindTextureUnit(binding, 0); return; } - state.textures[binding] = view->GetTexture(); + glBindTextureUnit(binding, view->GetTexture()); if (view->GetSurfaceParams().IsBuffer()) { return; } - state.samplers[binding] = sampler_cache.GetSampler(texture.tsc); - // Apply swizzle to textures that are not buffers. view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source, texture.tic.w_source); + + glBindSampler(binding, sampler_cache.GetSampler(texture.tsc)); } void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { const auto& maxwell3d = system.GPU().Maxwell3D(); u32 binding = device.GetBaseBindings(stage_index).image; - for (const auto& entry : shader->GetShaderEntries().images) { + for (const auto& entry : shader->GetEntries().images) { const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; SetupImage(binding++, tic, entry); @@ -976,17 +877,17 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh void RasterizerOpenGL::SetupComputeImages(const Shader& shader) { const auto& compute = system.GPU().KeplerCompute(); u32 binding = 0; - for (const auto& entry : shader->GetShaderEntries().images) { + for (const auto& entry : shader->GetEntries().images) { const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic; SetupImage(binding++, tic, entry); } } void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, - const GLShader::ImageEntry& entry) { + const ImageEntry& entry) { const auto view = texture_cache.GetImageSurface(tic, entry); if (!view) { - state.images[binding] = 0; + glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8); return; } if (!tic.IsBuffer()) { @@ -995,55 +896,87 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t if (entry.IsWritten()) { view->MarkAsModified(texture_cache.Tick()); } - state.images[binding] = view->GetTexture(); + glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE, + view->GetFormat()); } -void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) { - const auto& regs = system.GPU().Maxwell3D().regs; - const bool geometry_shaders_enabled = - regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); - const std::size_t viewport_count = - geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; - for (std::size_t i = 0; i < viewport_count; i++) { - auto& viewport = current_state.viewports[i]; - const auto& src = regs.viewports[i]; - const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()}; - viewport.x = viewport_rect.left; - viewport.y = viewport_rect.bottom; - viewport.width = viewport_rect.GetWidth(); - viewport.height = viewport_rect.GetHeight(); - viewport.depth_range_far = src.depth_range_far; - viewport.depth_range_near = src.depth_range_near; - } - state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0; - state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0; - - bool flip_y = false; - if (regs.viewport_transform[0].scale_y < 0.0) { - flip_y = !flip_y; - } - if (regs.screen_y_control.y_negate != 0) { - flip_y = !flip_y; - } - state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT; - state.clip_control.depth_mode = - regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne - ? GL_ZERO_TO_ONE - : GL_NEGATIVE_ONE_TO_ONE; +void RasterizerOpenGL::SyncViewport() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + const auto& regs = gpu.regs; + + const bool dirty_viewport = flags[Dirty::Viewports]; + if (dirty_viewport || flags[Dirty::ClipControl]) { + flags[Dirty::ClipControl] = false; + + bool flip_y = false; + if (regs.viewport_transform[0].scale_y < 0.0) { + flip_y = !flip_y; + } + if (regs.screen_y_control.y_negate != 0) { + flip_y = !flip_y; + } + glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT, + regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE + : GL_NEGATIVE_ONE_TO_ONE); + } + + if (dirty_viewport) { + flags[Dirty::Viewports] = false; + + const bool force = flags[Dirty::ViewportTransform]; + flags[Dirty::ViewportTransform] = false; + + for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) { + if (!force && !flags[Dirty::Viewport0 + i]) { + continue; + } + flags[Dirty::Viewport0 + i] = false; + + const auto& src = regs.viewport_transform[i]; + const Common::Rectangle<f32> rect{src.GetRect()}; + glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(), + rect.GetHeight()); + + const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; + const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z; + const GLdouble far_depth = src.translate_z + src.scale_z; + glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth); + } + } } -void RasterizerOpenGL::SyncClipEnabled( - const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) { +void RasterizerOpenGL::SyncDepthClamp() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::DepthClampEnabled]) { + return; + } + flags[Dirty::DepthClampEnabled] = false; - const auto& regs = system.GPU().Maxwell3D().regs; - const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{ - regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0, - regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0, - regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0, - regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0}; + const auto& state = gpu.regs.view_volume_clip_control; + UNIMPLEMENTED_IF_MSG(state.depth_clamp_far != state.depth_clamp_near, + "Unimplemented depth clamp separation!"); + + oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near); +} + +void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) { + return; + } + flags[Dirty::ClipDistances] = false; + + clip_mask &= gpu.regs.clip_distance_enabled; + if (clip_mask == last_clip_distance_mask) { + return; + } + last_clip_distance_mask = clip_mask; for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) { - state.clip_distance[i] = reg_state[i] && clip_mask[i]; + oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1); } } @@ -1052,247 +985,442 @@ void RasterizerOpenGL::SyncClipCoef() { } void RasterizerOpenGL::SyncCullMode() { - const auto& regs = system.GPU().Maxwell3D().regs; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + const auto& regs = gpu.regs; - state.cull.enabled = regs.cull.enabled != 0; - if (state.cull.enabled) { - state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face); + if (flags[Dirty::CullTest]) { + flags[Dirty::CullTest] = false; + + if (regs.cull_test_enabled) { + glEnable(GL_CULL_FACE); + glCullFace(MaxwellToGL::CullFace(regs.cull_face)); + } else { + glDisable(GL_CULL_FACE); + } } - state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face); + if (flags[Dirty::FrontFace]) { + flags[Dirty::FrontFace] = false; + glFrontFace(MaxwellToGL::FrontFace(regs.front_face)); + } } void RasterizerOpenGL::SyncPrimitiveRestart() { - const auto& regs = system.GPU().Maxwell3D().regs; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::PrimitiveRestart]) { + return; + } + flags[Dirty::PrimitiveRestart] = false; - state.primitive_restart.enabled = regs.primitive_restart.enabled; - state.primitive_restart.index = regs.primitive_restart.index; + if (gpu.regs.primitive_restart.enabled) { + glEnable(GL_PRIMITIVE_RESTART); + glPrimitiveRestartIndex(gpu.regs.primitive_restart.index); + } else { + glDisable(GL_PRIMITIVE_RESTART); + } } void RasterizerOpenGL::SyncDepthTestState() { - const auto& regs = system.GPU().Maxwell3D().regs; - - state.depth.test_enabled = regs.depth_test_enable != 0; - state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; - if (!state.depth.test_enabled) { - return; + const auto& regs = gpu.regs; + if (flags[Dirty::DepthMask]) { + flags[Dirty::DepthMask] = false; + glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE); } - state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func); + if (flags[Dirty::DepthTest]) { + flags[Dirty::DepthTest] = false; + if (regs.depth_test_enable) { + glEnable(GL_DEPTH_TEST); + glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func)); + } else { + glDisable(GL_DEPTH_TEST); + } + } } void RasterizerOpenGL::SyncStencilTestState() { - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.stencil_test) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::StencilTest]) { return; } - maxwell3d.dirty.stencil_test = false; - - const auto& regs = maxwell3d.regs; - state.stencil.test_enabled = regs.stencil_enable != 0; - state.MarkDirtyStencilState(); + flags[Dirty::StencilTest] = false; + const auto& regs = gpu.regs; if (!regs.stencil_enable) { + glDisable(GL_STENCIL_TEST); return; } - state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func); - state.stencil.front.test_ref = regs.stencil_front_func_ref; - state.stencil.front.test_mask = regs.stencil_front_func_mask; - state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail); - state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail); - state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass); - state.stencil.front.write_mask = regs.stencil_front_mask; + glEnable(GL_STENCIL_TEST); + glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func), + regs.stencil_front_func_ref, regs.stencil_front_func_mask); + glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail), + MaxwellToGL::StencilOp(regs.stencil_front_op_zfail), + MaxwellToGL::StencilOp(regs.stencil_front_op_zpass)); + glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask); + if (regs.stencil_two_side_enable) { - state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func); - state.stencil.back.test_ref = regs.stencil_back_func_ref; - state.stencil.back.test_mask = regs.stencil_back_func_mask; - state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail); - state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail); - state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass); - state.stencil.back.write_mask = regs.stencil_back_mask; + glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func), + regs.stencil_back_func_ref, regs.stencil_back_func_mask); + glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail), + MaxwellToGL::StencilOp(regs.stencil_back_op_zfail), + MaxwellToGL::StencilOp(regs.stencil_back_op_zpass)); + glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask); } else { - state.stencil.back.test_func = GL_ALWAYS; - state.stencil.back.test_ref = 0; - state.stencil.back.test_mask = 0xFFFFFFFF; - state.stencil.back.write_mask = 0xFFFFFFFF; - state.stencil.back.action_stencil_fail = GL_KEEP; - state.stencil.back.action_depth_fail = GL_KEEP; - state.stencil.back.action_depth_pass = GL_KEEP; + glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF); + glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP); + glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF); } } -void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) { - const auto& regs = system.GPU().Maxwell3D().regs; - current_state.rasterizer_discard = regs.rasterize_enable == 0; +void RasterizerOpenGL::SyncRasterizeEnable() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::RasterizeEnable]) { + return; + } + flags[Dirty::RasterizeEnable] = false; + + oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0); +} + +void RasterizerOpenGL::SyncPolygonModes() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::PolygonModes]) { + return; + } + flags[Dirty::PolygonModes] = false; + + if (gpu.regs.fill_rectangle) { + if (!GLAD_GL_NV_fill_rectangle) { + LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported"); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + return; + } + + flags[Dirty::PolygonModeFront] = true; + flags[Dirty::PolygonModeBack] = true; + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV); + return; + } + + if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) { + flags[Dirty::PolygonModeFront] = false; + flags[Dirty::PolygonModeBack] = false; + glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); + return; + } + + if (flags[Dirty::PolygonModeFront]) { + flags[Dirty::PolygonModeFront] = false; + glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front)); + } + + if (flags[Dirty::PolygonModeBack]) { + flags[Dirty::PolygonModeBack] = false; + glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back)); + } } void RasterizerOpenGL::SyncColorMask() { - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.color_mask) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::ColorMasks]) { return; } - const auto& regs = maxwell3d.regs; + flags[Dirty::ColorMasks] = false; + + const bool force = flags[Dirty::ColorMaskCommon]; + flags[Dirty::ColorMaskCommon] = false; + + const auto& regs = gpu.regs; + if (regs.color_mask_common) { + if (!force && !flags[Dirty::ColorMask0]) { + return; + } + flags[Dirty::ColorMask0] = false; - const std::size_t count = - regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1; - for (std::size_t i = 0; i < count; i++) { - const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i]; - auto& dest = state.color_mask[i]; - dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE; - dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE; - dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE; - dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE; + auto& mask = regs.color_mask[0]; + glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0); + return; } - state.MarkDirtyColorMask(); - maxwell3d.dirty.color_mask = false; + // Path without color_mask_common set + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { + if (!force && !flags[Dirty::ColorMask0 + i]) { + continue; + } + flags[Dirty::ColorMask0 + i] = false; + + const auto& mask = regs.color_mask[i]; + glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0); + } } void RasterizerOpenGL::SyncMultiSampleState() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::MultisampleControl]) { + return; + } + flags[Dirty::MultisampleControl] = false; + const auto& regs = system.GPU().Maxwell3D().regs; - state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0; - state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0; + oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage); + oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one); } void RasterizerOpenGL::SyncFragmentColorClampState() { - const auto& regs = system.GPU().Maxwell3D().regs; - state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::FragmentClampColor]) { + return; + } + flags[Dirty::FragmentClampColor] = false; + + glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE); } void RasterizerOpenGL::SyncBlendState() { - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.blend_state) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + const auto& regs = gpu.regs; + + if (flags[Dirty::BlendColor]) { + flags[Dirty::BlendColor] = false; + glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, + regs.blend_color.a); + } + + // TODO(Rodrigo): Revisit blending, there are several registers we are not reading + + if (!flags[Dirty::BlendStates]) { return; } - const auto& regs = maxwell3d.regs; - - state.blend_color.red = regs.blend_color.r; - state.blend_color.green = regs.blend_color.g; - state.blend_color.blue = regs.blend_color.b; - state.blend_color.alpha = regs.blend_color.a; - - state.independant_blend.enabled = regs.independent_blend_enable; - if (!state.independant_blend.enabled) { - auto& blend = state.blend[0]; - const auto& src = regs.blend; - blend.enabled = src.enable[0] != 0; - if (blend.enabled) { - blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb); - blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb); - blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb); - blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a); - blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); - blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); - } - for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - state.blend[i].enabled = false; + flags[Dirty::BlendStates] = false; + + if (!regs.independent_blend_enable) { + if (!regs.blend.enable[0]) { + glDisable(GL_BLEND); + return; } - maxwell3d.dirty.blend_state = false; - state.MarkDirtyBlendState(); + glEnable(GL_BLEND); + glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb), + MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb), + MaxwellToGL::BlendFunc(regs.blend.factor_source_a), + MaxwellToGL::BlendFunc(regs.blend.factor_dest_a)); + glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb), + MaxwellToGL::BlendEquation(regs.blend.equation_a)); return; } - for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) { - auto& blend = state.blend[i]; - const auto& src = regs.independent_blend[i]; - blend.enabled = regs.blend.enable[i] != 0; - if (!blend.enabled) + const bool force = flags[Dirty::BlendIndependentEnabled]; + flags[Dirty::BlendIndependentEnabled] = false; + + for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { + if (!force && !flags[Dirty::BlendState0 + i]) { continue; - blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb); - blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb); - blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb); - blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a); - blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a); - blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a); - } + } + flags[Dirty::BlendState0 + i] = false; + + if (!regs.blend.enable[i]) { + glDisablei(GL_BLEND, static_cast<GLuint>(i)); + continue; + } + glEnablei(GL_BLEND, static_cast<GLuint>(i)); - state.MarkDirtyBlendState(); - maxwell3d.dirty.blend_state = false; + const auto& src = regs.independent_blend[i]; + glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb), + MaxwellToGL::BlendFunc(src.factor_dest_rgb), + MaxwellToGL::BlendFunc(src.factor_source_a), + MaxwellToGL::BlendFunc(src.factor_dest_a)); + glBlendEquationSeparatei(static_cast<GLuint>(i), + MaxwellToGL::BlendEquation(src.equation_rgb), + MaxwellToGL::BlendEquation(src.equation_a)); + } } void RasterizerOpenGL::SyncLogicOpState() { - const auto& regs = system.GPU().Maxwell3D().regs; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::LogicOp]) { + return; + } + flags[Dirty::LogicOp] = false; - state.logic_op.enabled = regs.logic_op.enable != 0; + const auto& regs = gpu.regs; + if (regs.logic_op.enable) { + glEnable(GL_COLOR_LOGIC_OP); + glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation)); + } else { + glDisable(GL_COLOR_LOGIC_OP); + } +} - if (!state.logic_op.enabled) +void RasterizerOpenGL::SyncScissorTest() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::Scissors]) { return; + } + flags[Dirty::Scissors] = false; - ASSERT_MSG(regs.blend.enable[0] == 0, - "Blending and logic op can't be enabled at the same time."); - - state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation); -} + const auto& regs = gpu.regs; + for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) { + if (!flags[Dirty::Scissor0 + index]) { + continue; + } + flags[Dirty::Scissor0 + index] = false; -void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) { - const auto& regs = system.GPU().Maxwell3D().regs; - const bool geometry_shaders_enabled = - regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry)); - const std::size_t viewport_count = - geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1; - for (std::size_t i = 0; i < viewport_count; i++) { - const auto& src = regs.scissor_test[i]; - auto& dst = current_state.viewports[i].scissor; - dst.enabled = (src.enable != 0); - if (dst.enabled == 0) { - return; + const auto& src = regs.scissor_test[index]; + if (src.enable) { + glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); + glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y, + src.max_x - src.min_x, src.max_y - src.min_y); + } else { + glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index)); } - const u32 width = src.max_x - src.min_x; - const u32 height = src.max_y - src.min_y; - dst.x = src.min_x; - dst.y = src.min_y; - dst.width = width; - dst.height = height; } } -void RasterizerOpenGL::SyncTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; - UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); -} - void RasterizerOpenGL::SyncPointState() { - const auto& regs = system.GPU().Maxwell3D().regs; + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::PointSize]) { + return; + } + flags[Dirty::PointSize] = false; + + oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable); + + if (gpu.regs.vp_point_size.enable) { + // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled. + glEnable(GL_PROGRAM_POINT_SIZE); + return; + } + // Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid // in OpenGL). - state.point.program_control = regs.vp_point_size.enable != 0; - state.point.sprite = regs.point_sprite_enable != 0; - state.point.size = std::max(1.0f, regs.point_size); + glPointSize(std::max(1.0f, gpu.regs.point_size)); + glDisable(GL_PROGRAM_POINT_SIZE); } void RasterizerOpenGL::SyncPolygonOffset() { - auto& maxwell3d = system.GPU().Maxwell3D(); - if (!maxwell3d.dirty.polygon_offset) { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::PolygonOffset]) { return; } - const auto& regs = maxwell3d.regs; - - state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0; - state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0; - state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0; + flags[Dirty::PolygonOffset] = false; - // Hardware divides polygon offset units by two - state.polygon_offset.units = regs.polygon_offset_units / 2.0f; - state.polygon_offset.factor = regs.polygon_offset_factor; - state.polygon_offset.clamp = regs.polygon_offset_clamp; + const auto& regs = gpu.regs; + oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable); + oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable); + oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable); - state.MarkDirtyPolygonOffset(); - maxwell3d.dirty.polygon_offset = false; + if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable || + regs.polygon_offset_point_enable) { + // Hardware divides polygon offset units by two + glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f, + regs.polygon_offset_clamp); + } } void RasterizerOpenGL::SyncAlphaTest() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::AlphaTest]) { + return; + } + flags[Dirty::AlphaTest] = false; + + const auto& regs = gpu.regs; + if (regs.alpha_test_enabled && regs.rt_control.count > 1) { + LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested"); + } + + if (regs.alpha_test_enabled) { + glEnable(GL_ALPHA_TEST); + glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref); + } else { + glDisable(GL_ALPHA_TEST); + } +} + +void RasterizerOpenGL::SyncFramebufferSRGB() { + auto& gpu = system.GPU().Maxwell3D(); + auto& flags = gpu.dirty.flags; + if (!flags[Dirty::FramebufferSRGB]) { + return; + } + flags[Dirty::FramebufferSRGB] = false; + + oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); +} + +void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { const auto& regs = system.GPU().Maxwell3D().regs; - UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1, - "Alpha Testing is enabled with more than one rendertarget"); + if (regs.tfb_enabled == 0) { + return; + } + + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); - state.alpha_test.enabled = regs.alpha_test_enabled; - if (!state.alpha_test.enabled) { + for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { + const auto& binding = regs.tfb_bindings[index]; + if (!binding.buffer_enable) { + if (enabled_transform_feedback_buffers[index]) { + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, + 0); + } + enabled_transform_feedback_buffers[index] = false; + continue; + } + enabled_transform_feedback_buffers[index] = true; + + auto& tfb_buffer = transform_feedback_buffers[index]; + tfb_buffer.Create(); + + const GLuint handle = tfb_buffer.handle; + const std::size_t size = binding.buffer_size; + glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, + static_cast<GLsizeiptr>(size)); + } + + glBeginTransformFeedback(GL_POINTS); +} + +void RasterizerOpenGL::EndTransformFeedback() { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { return; } - state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func); - state.alpha_test.ref = regs.alpha_test_ref; + + glEndTransformFeedback(); + + for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { + const auto& binding = regs.tfb_bindings[index]; + if (!binding.buffer_enable) { + continue; + } + UNIMPLEMENTED_IF(binding.buffer_offset != 0); + + const GLuint handle = transform_feedback_buffers[index].handle; + const GPUVAddr gpu_addr = binding.Address(); + const std::size_t size = binding.buffer_size; + const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); + } } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 68abe9a21..2d3be2437 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -30,7 +30,7 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" -#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/textures/texture.h" @@ -55,7 +55,8 @@ struct DrawParameters; class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { public: explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, - ScreenInfo& info); + ScreenInfo& info, GLShader::ProgramManager& program_manager, + StateTracker& state_tracker); ~RasterizerOpenGL() override; void Draw(bool is_indexed, bool is_instanced) override; @@ -76,6 +77,7 @@ public: u32 pixel_stride) override; void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; + void SetupDirtyFlags() override; /// Returns true when there are commands queued to the OpenGL server. bool AnyCommandQueued() const { @@ -86,8 +88,7 @@ private: /// Configures the color and depth framebuffer states. void ConfigureFramebuffers(); - void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb, - bool using_depth_fb, bool using_stencil_fb); + void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb); /// Configures the current constbuffers to use for the draw command. void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader); @@ -97,7 +98,7 @@ private: /// Configures a constant buffer. void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, - const GLShader::ConstBufferEntry& entry); + const ConstBufferEntry& entry); /// Configures the current global memory entries to use for the draw command. void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader); @@ -106,7 +107,7 @@ private: void SetupComputeGlobalMemory(const Shader& kernel); /// Configures a constant buffer. - void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, + void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size); /// Configures the current textures to use for the draw command. @@ -117,7 +118,7 @@ private: /// Configures a texture. void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture, - const GLShader::SamplerEntry& entry); + const SamplerEntry& entry); /// Configures images in a graphics shader. void SetupDrawImages(std::size_t stage_index, const Shader& shader); @@ -126,15 +127,16 @@ private: void SetupComputeImages(const Shader& shader); /// Configures an image. - void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, - const GLShader::ImageEntry& entry); + void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); /// Syncs the viewport and depth range to match the guest state - void SyncViewport(OpenGLState& current_state); + void SyncViewport(); + + /// Syncs the depth clamp state + void SyncDepthClamp(); /// Syncs the clip enabled status to match the guest state - void SyncClipEnabled( - const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask); + void SyncClipEnabled(u32 clip_mask); /// Syncs the clip coefficients to match the guest state void SyncClipCoef(); @@ -164,16 +166,16 @@ private: void SyncMultiSampleState(); /// Syncs the scissor test state to match the guest state - void SyncScissorTest(OpenGLState& current_state); - - /// Syncs the transform feedback state to match the guest state - void SyncTransformFeedback(); + void SyncScissorTest(); /// Syncs the point state to match the guest state void SyncPointState(); /// Syncs the rasterizer enable state to match the guest state - void SyncRasterizeEnable(OpenGLState& current_state); + void SyncRasterizeEnable(); + + /// Syncs polygon modes to match the guest state + void SyncPolygonModes(); /// Syncs Color Mask void SyncColorMask(); @@ -184,6 +186,15 @@ private: /// Syncs the alpha test state to match the guest state void SyncAlphaTest(); + /// Syncs the framebuffer sRGB state to match the guest state + void SyncFramebufferSRGB(); + + /// Begin a transform feedback + void BeginTransformFeedback(GLenum primitive_mode); + + /// End a transform feedback + void EndTransformFeedback(); + /// Check for extension that are not strictly required but are needed for correct emulation void CheckExtensions(); @@ -191,18 +202,17 @@ private: std::size_t CalculateIndexBufferSize() const; - /// Updates and returns a vertex array object representing current vertex format - GLuint SetupVertexFormat(); + /// Updates the current vertex format + void SetupVertexFormat(); - void SetupVertexBuffer(GLuint vao); - void SetupVertexInstances(GLuint vao); + void SetupVertexBuffer(); + void SetupVertexInstances(); GLintptr SetupIndexBuffer(); void SetupShaders(GLenum primitive_mode); const Device device; - OpenGLState state; TextureCacheOpenGL texture_cache; ShaderCacheOpenGL shader_cache; @@ -212,22 +222,25 @@ private: Core::System& system; ScreenInfo& screen_info; - - std::unique_ptr<GLShader::ProgramManager> shader_program_manager; - std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute, - Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>, - OGLVertexArray> - vertex_array_cache; + GLShader::ProgramManager& program_manager; + StateTracker& state_tracker; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; - VertexArrayPushBuffer vertex_array_pushbuffer; + VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker}; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> + transform_feedback_buffers; + std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> + enabled_transform_feedback_buffers; + /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; + + u32 last_clip_distance_mask = 0; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp index f0ddfb276..97803d480 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp @@ -8,13 +8,29 @@ #include "common/microprofile.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_util.h" -#include "video_core/renderer_opengl/gl_state.h" MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192)); MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192)); namespace OpenGL { +void OGLRenderbuffer::Create() { + if (handle != 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceCreation); + glCreateRenderbuffers(1, &handle); +} + +void OGLRenderbuffer::Release() { + if (handle == 0) + return; + + MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); + glDeleteRenderbuffers(1, &handle); + handle = 0; +} + void OGLTexture::Create(GLenum target) { if (handle != 0) return; @@ -29,7 +45,6 @@ void OGLTexture::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteTextures(1, &handle); - OpenGLState::GetCurState().UnbindTexture(handle).Apply(); handle = 0; } @@ -47,7 +62,6 @@ void OGLTextureView::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteTextures(1, &handle); - OpenGLState::GetCurState().UnbindTexture(handle).Apply(); handle = 0; } @@ -65,7 +79,6 @@ void OGLSampler::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteSamplers(1, &handle); - OpenGLState::GetCurState().ResetSampler(handle).Apply(); handle = 0; } @@ -109,7 +122,6 @@ void OGLProgram::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteProgram(handle); - OpenGLState::GetCurState().ResetProgram(handle).Apply(); handle = 0; } @@ -127,7 +139,6 @@ void OGLPipeline::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteProgramPipelines(1, &handle); - OpenGLState::GetCurState().ResetPipeline(handle).Apply(); handle = 0; } @@ -171,24 +182,6 @@ void OGLSync::Release() { handle = 0; } -void OGLVertexArray::Create() { - if (handle != 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceCreation); - glCreateVertexArrays(1, &handle); -} - -void OGLVertexArray::Release() { - if (handle == 0) - return; - - MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); - glDeleteVertexArrays(1, &handle); - OpenGLState::GetCurState().ResetVertexArray(handle).Apply(); - handle = 0; -} - void OGLFramebuffer::Create() { if (handle != 0) return; @@ -203,7 +196,6 @@ void OGLFramebuffer::Release() { MICROPROFILE_SCOPE(OpenGL_ResourceDeletion); glDeleteFramebuffers(1, &handle); - OpenGLState::GetCurState().ResetFramebuffer(handle).Apply(); handle = 0; } diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 514d1d165..de93f4212 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -11,6 +11,31 @@ namespace OpenGL { +class OGLRenderbuffer : private NonCopyable { +public: + OGLRenderbuffer() = default; + + OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {} + + ~OGLRenderbuffer() { + Release(); + } + + OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept { + Release(); + handle = std::exchange(o.handle, 0); + return *this; + } + + /// Creates a new internal OpenGL resource and stores the handle + void Create(); + + /// Deletes the internal OpenGL resource + void Release(); + + GLuint handle = 0; +}; + class OGLTexture : private NonCopyable { public: OGLTexture() = default; @@ -216,31 +241,6 @@ public: GLsync handle = 0; }; -class OGLVertexArray : private NonCopyable { -public: - OGLVertexArray() = default; - - OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {} - - ~OGLVertexArray() { - Release(); - } - - OGLVertexArray& operator=(OGLVertexArray&& o) noexcept { - Release(); - handle = std::exchange(o.handle, 0); - return *this; - } - - /// Creates a new internal OpenGL resource and stores the handle - void Create(); - - /// Deletes the internal OpenGL resource - void Release(); - - GLuint handle = 0; -}; - class OGLFramebuffer : private NonCopyable { public: OGLFramebuffer() = default; diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp index 3ded5ecea..5c174879a 100644 --- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp +++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp @@ -38,7 +38,7 @@ OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy()); } else if (GLAD_GL_EXT_texture_filter_anisotropic) { glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy()); - } else if (tsc.GetMaxAnisotropy() != 1) { + } else { LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver"); } diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 489eb143c..e3d31c3eb 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -2,12 +2,16 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <atomic> +#include <functional> #include <mutex> #include <optional> #include <string> #include <thread> #include <unordered_set> + #include <boost/functional/hash.hpp> + #include "common/alignment.h" #include "common/assert.h" #include "common/logging/log.h" @@ -22,14 +26,16 @@ #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/utils.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace OpenGL { using Tegra::Engines::ShaderType; -using VideoCommon::Shader::ConstBufferLocker; using VideoCommon::Shader::ProgramCode; +using VideoCommon::Shader::Registry; using VideoCommon::Shader::ShaderIR; namespace { @@ -55,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) { } /// Calculates the size of a program stream -std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { +std::size_t CalculateProgramSize(const ProgramCode& program) { constexpr std::size_t start_offset = 10; // This is the encoded version of BRA that jumps to itself. All Nvidia // shaders end with one. @@ -108,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) { } } -/// Describes primitive behavior on geometry shaders -constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) { - switch (primitive_mode) { - case GL_POINTS: - return {"points", 1}; - case GL_LINES: - case GL_LINE_STRIP: - return {"lines", 2}; - case GL_LINES_ADJACENCY: - case GL_LINE_STRIP_ADJACENCY: - return {"lines_adjacency", 4}; - case GL_TRIANGLES: - case GL_TRIANGLE_STRIP: - case GL_TRIANGLE_FAN: - return {"triangles", 3}; - case GL_TRIANGLES_ADJACENCY: - case GL_TRIANGLE_STRIP_ADJACENCY: - return {"triangles_adjacency", 6}; - default: - return {"points", 1}; - } -} - /// Hashes one (or two) program streams u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code, - const ProgramCode& code_b) { + const ProgramCode& code_b = {}) { u64 unique_identifier = boost::hash_value(code); if (is_a) { // VertexA programs include two programs @@ -142,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co return unique_identifier; } -/// Creates an unspecialized program from code streams -std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir, - const std::optional<ShaderIR>& ir_b) { - switch (shader_type) { - case ShaderType::Vertex: - return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr); - case ShaderType::Geometry: - return GLShader::GenerateGeometryShader(device, ir); - case ShaderType::Fragment: - return GLShader::GenerateFragmentShader(device, ir); - case ShaderType::Compute: - return GLShader::GenerateComputeShader(device, ir); - default: - UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type)); - return {}; - } -} - constexpr const char* GetShaderTypeName(ShaderType shader_type) { switch (shader_type) { case ShaderType::Vertex: @@ -195,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) { return {}; } -std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) { +std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier); } -Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system, - ShaderType shader_type) { - if (shader_type == ShaderType::Compute) { - return system.GPU().KeplerCompute(); - } else { - return system.GPU().Maxwell3D(); - } -} - -std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) { - return std::make_unique<ConstBufferLocker>(shader_type, - GetConstBufferEngineInterface(system, shader_type)); -} - -void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) { - locker.SetBoundBuffer(usage.bound_buffer); - for (const auto& key : usage.keys) { - const auto [buffer, offset] = key.first; - locker.InsertKey(buffer, offset, key.second); +std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { + const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; + const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, + entry.graphics_info, entry.compute_info}; + const auto registry = std::make_shared<Registry>(entry.type, info); + for (const auto& [address, value] : entry.keys) { + const auto [buffer, offset] = address; + registry->InsertKey(buffer, offset, value); } - for (const auto& [offset, sampler] : usage.bound_samplers) { - locker.InsertBoundSampler(offset, sampler); + for (const auto& [offset, sampler] : entry.bound_samplers) { + registry->InsertBoundSampler(offset, sampler); } - for (const auto& [key, sampler] : usage.bindless_samplers) { + for (const auto& [key, sampler] : entry.bindless_samplers) { const auto [buffer, offset] = key; - locker.InsertBindlessSampler(buffer, offset, sampler); + registry->InsertBindlessSampler(buffer, offset, sampler); } + return registry; } -CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type, - const ProgramCode& code, const ProgramCode& code_b, - ConstBufferLocker& locker, const ProgramVariant& variant, - bool hint_retrievable = false) { - LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type)); - - const bool is_compute = shader_type == ShaderType::Compute; - const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker); - std::optional<ShaderIR> ir_b; - if (!code_b.empty()) { - ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker); - } - - std::string source = fmt::format(R"(// {} -#version 430 core -#extension GL_ARB_separate_shader_objects : enable -)", - GetShaderId(unique_identifier, shader_type)); - if (device.HasShaderBallot()) { - source += "#extension GL_ARB_shader_ballot : require\n"; - } - if (device.HasVertexViewportLayer()) { - source += "#extension GL_ARB_shader_viewport_layer_array : require\n"; - } - if (device.HasImageLoadFormatted()) { - source += "#extension GL_EXT_shader_image_load_formatted : require\n"; - } - if (device.HasWarpIntrinsics()) { - source += "#extension GL_NV_gpu_shader5 : require\n" - "#extension GL_NV_shader_thread_group : require\n" - "#extension GL_NV_shader_thread_shuffle : require\n"; - } - // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations) - // on places where we don't want to. - // Thanks to Ryujinx for finding this workaround. - source += "#pragma optionNV(fastmath off)\n"; - - if (shader_type == ShaderType::Geometry) { - const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); - source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices); - source += fmt::format("layout ({}) in;\n", glsl_topology); - } - if (shader_type == ShaderType::Compute) { - if (variant.local_memory_size > 0) { - source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n", - Common::AlignUp(variant.local_memory_size, 4) / 4); - } - source += - fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n", - variant.block_x, variant.block_y, variant.block_z); - - if (variant.shared_memory_size > 0) { - // shared_memory_size is described in number of words - source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size); - } - } - - source += '\n'; - source += GenerateGLSL(device, shader_type, ir, ir_b); +std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, + u64 unique_identifier, const ShaderIR& ir, + const Registry& registry, bool hint_retrievable = false) { + const std::string shader_id = MakeShaderID(unique_identifier, shader_type); + LOG_INFO(Render_OpenGL, "{}", shader_id); + const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id); OGLShader shader; - shader.Create(source.c_str(), GetGLShaderType(shader_type)); + shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); auto program = std::make_shared<OGLProgram>(); program->Create(true, hint_retrievable, shader.handle); @@ -298,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp } std::unordered_set<GLenum> GetSupportedFormats() { - GLint num_formats{}; + GLint num_formats; glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats); std::vector<GLint> formats(num_formats); @@ -313,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() { } // Anonymous namespace -CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type, - GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b) - : RasterizerCacheObject{params.host_ptr}, system{params.system}, - disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr}, - unique_identifier{params.unique_identifier}, shader_type{shader_type}, - entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} { - if (!params.precompiled_variants) { - return; - } - for (const auto& pair : *params.precompiled_variants) { - auto locker = MakeLocker(system, shader_type); - const auto& usage = pair->first; - FillLocker(*locker, usage); - - std::unique_ptr<LockerVariant>* locker_variant = nullptr; - const auto it = - std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) { - return variant->locker->HasEqualKeys(*locker); - }); - if (it == locker_variants.end()) { - locker_variant = &locker_variants.emplace_back(); - *locker_variant = std::make_unique<LockerVariant>(); - locker_variant->get()->locker = std::move(locker); - } else { - locker_variant = &*it; - } - locker_variant->get()->programs.emplace(usage.variant, pair->second); - } +CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, + std::shared_ptr<VideoCommon::Shader::Registry> registry, + ShaderEntries entries, std::shared_ptr<OGLProgram> program) + : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)}, + cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {} + +CachedShader::~CachedShader() = default; + +GLuint CachedShader::GetHandle() const { + DEBUG_ASSERT(registry->IsConsistent()); + return program->handle; } Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, Maxwell::ShaderProgram program_type, ProgramCode code, ProgramCode code_b) { const auto shader_type = GetShaderType(program_type); - params.disk_cache.SaveRaw( - ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b)); + const std::size_t size_in_bytes = code.size() * sizeof(u64); - ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D()); - const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker); + auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D()); + const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry); // TODO(Rodrigo): Handle VertexA shaders // std::optional<ShaderIR> ir_b; // if (!code_b.empty()) { // ir_b.emplace(code_b, STAGE_MAIN_OFFSET); // } - return std::shared_ptr<CachedShader>(new CachedShader( - params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b))); + auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); + + ShaderDiskCacheEntry entry; + entry.type = shader_type; + entry.code = std::move(code); + entry.code_b = std::move(code_b); + entry.unique_identifier = params.unique_identifier; + entry.bound_buffer = registry->GetBoundBuffer(); + entry.graphics_info = registry->GetGraphicsInfo(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, + size_in_bytes, std::move(registry), + MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { - params.disk_cache.SaveRaw( - ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code)); - - ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute, - params.system.GPU().KeplerCompute()); - const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker); - return std::shared_ptr<CachedShader>(new CachedShader( - params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {})); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + + auto& engine = params.system.GPU().KeplerCompute(); + auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); + const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); + const u64 uid = params.unique_identifier; + auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); + + ShaderDiskCacheEntry entry; + entry.type = ShaderType::Compute; + entry.code = std::move(code); + entry.unique_identifier = uid; + entry.bound_buffer = registry->GetBoundBuffer(); + entry.compute_info = registry->GetComputeInfo(); + entry.keys = registry->GetKeys(); + entry.bound_samplers = registry->GetBoundSamplers(); + entry.bindless_samplers = registry->GetBindlessSamplers(); + params.disk_cache.SaveEntry(std::move(entry)); + + return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr, + size_in_bytes, std::move(registry), + MakeEntries(ir), std::move(program))); } Shader CachedShader::CreateFromCache(const ShaderParameters& params, - const UnspecializedShader& unspecialized) { - return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type, - unspecialized.entries, unspecialized.code, - unspecialized.code_b)); -} - -GLuint CachedShader::GetHandle(const ProgramVariant& variant) { - EnsureValidLockerVariant(); - - const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); - auto& program = entry->second; - if (!is_cache_miss) { - return program->handle; - } - - program = BuildShader(device, unique_identifier, shader_type, code, code_b, - *curr_locker_variant->locker, variant); - disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); - - LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); - return program->handle; -} - -bool CachedShader::EnsureValidLockerVariant() { - const auto previous_variant = curr_locker_variant; - if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) { - curr_locker_variant = nullptr; - } - if (!curr_locker_variant) { - for (auto& variant : locker_variants) { - if (variant->locker->IsConsistent()) { - curr_locker_variant = variant.get(); - } - } - } - if (!curr_locker_variant) { - auto& new_variant = locker_variants.emplace_back(); - new_variant = std::make_unique<LockerVariant>(); - new_variant->locker = MakeLocker(system, shader_type); - curr_locker_variant = new_variant.get(); - } - return previous_variant == curr_locker_variant; -} - -ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant, - const ConstBufferLocker& locker) const { - return ShaderDiskCacheUsage{unique_identifier, variant, - locker.GetBoundBuffer(), locker.GetKeys(), - locker.GetBoundSamplers(), locker.GetBindlessSamplers()}; + const PrecompiledShader& precompiled_shader, + std::size_t size_in_bytes) { + return std::shared_ptr<CachedShader>(new CachedShader( + params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry, + precompiled_shader.entries, precompiled_shader.program)); } ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system, @@ -431,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) { - const auto transferable = disk_cache.LoadTransferable(); + const std::optional transferable = disk_cache.LoadTransferable(); if (!transferable) { return; } - const auto [raws, shader_usages] = *transferable; - if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) { - return; - } - const auto dumps = disk_cache.LoadPrecompiled(); + const std::vector gl_cache = disk_cache.LoadPrecompiled(); const auto supported_formats = GetSupportedFormats(); // Track if precompiled cache was altered during loading to know if we have to @@ -449,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, // Inform the frontend about shader build initialization if (callback) { - callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size()); + callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size()); } std::mutex mutex; std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex - std::atomic_bool compilation_failed = false; + std::atomic_bool gl_cache_failed = false; - const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, - std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages, - const ShaderDumpsMap& dumps) { + const auto find_precompiled = [&gl_cache](u64 id) { + return std::find_if(gl_cache.begin(), gl_cache.end(), + [id](const auto& entry) { return entry.unique_identifier == id; }); + }; + + const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin, + std::size_t end) { context->MakeCurrent(); SCOPE_EXIT({ return context->DoneCurrent(); }); for (std::size_t i = begin; i < end; ++i) { - if (stop_loading || compilation_failed) { + if (stop_loading) { return; } - const auto& usage{shader_usages[i]}; - const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)}; - const auto dump{dumps.find(usage)}; - - CachedProgram shader; - if (dump != dumps.end()) { - // If the shader is dumped, attempt to load it with - shader = GeneratePrecompiledProgram(dump->second, supported_formats); - if (!shader) { - compilation_failed = true; - return; + const auto& entry = (*transferable)[i]; + const u64 uid = entry.unique_identifier; + const auto it = find_precompiled(uid); + const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; + + const bool is_compute = entry.type == ShaderType::Compute; + const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; + auto registry = MakeRegistry(entry); + const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry); + + std::shared_ptr<OGLProgram> program; + if (precompiled_entry) { + // If the shader is precompiled, attempt to load it with + program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats); + if (!program) { + gl_cache_failed = true; } } - if (!shader) { - auto locker{MakeLocker(system, unspecialized.type)}; - FillLocker(*locker, usage); - - shader = BuildShader(device, usage.unique_identifier, unspecialized.type, - unspecialized.code, unspecialized.code_b, *locker, - usage.variant, true); + if (!program) { + // Otherwise compile it from GLSL + program = BuildShader(device, entry.type, uid, ir, *registry, true); } + PrecompiledShader shader; + shader.program = std::move(program); + shader.registry = std::move(registry); + shader.entries = MakeEntries(ir); + std::scoped_lock lock{mutex}; if (callback) { callback(VideoCore::LoadCallbackStage::Build, ++built_shaders, - shader_usages.size()); + transferable->size()); } - - precompiled_programs.emplace(usage, std::move(shader)); - - // TODO(Rodrigo): Is there a better way to do this? - precompiled_variants[usage.unique_identifier].push_back( - precompiled_programs.find(usage)); + runtime_cache.emplace(entry.unique_identifier, std::move(shader)); } }; const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)}; - const std::size_t bucket_size{shader_usages.size() / num_workers}; + const std::size_t bucket_size{transferable->size() / num_workers}; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers); std::vector<std::thread> threads(num_workers); for (std::size_t i = 0; i < num_workers; ++i) { const bool is_last_worker = i + 1 == num_workers; const std::size_t start{bucket_size * i}; - const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size}; + const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size}; // On some platforms the shared context has to be created from the GUI thread contexts[i] = emu_window.CreateSharedContext(); - threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps); + threads[i] = std::thread(worker, contexts[i].get(), start, end); } for (auto& thread : threads) { thread.join(); } - if (compilation_failed) { + if (gl_cache_failed) { // Invalidate the precompiled cache if a shader dumped shader was rejected disk_cache.InvalidatePrecompiled(); precompiled_cache_altered = true; @@ -532,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, // TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw // before precompiling them - for (std::size_t i = 0; i < shader_usages.size(); ++i) { - const auto& usage{shader_usages[i]}; - if (dumps.find(usage) == dumps.end()) { - const auto& program{precompiled_programs.at(usage)}; - disk_cache.SaveDump(usage, program->handle); + for (std::size_t i = 0; i < transferable->size(); ++i) { + const u64 id = (*transferable)[i].unique_identifier; + const auto it = find_precompiled(id); + if (it == gl_cache.end()) { + const GLuint program = runtime_cache.at(id).program->handle; + disk_cache.SavePrecompiled(id, program); precompiled_cache_altered = true; } } @@ -546,84 +416,33 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, } } -const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const { - const auto it = precompiled_variants.find(unique_identifier); - return it == precompiled_variants.end() ? nullptr : &it->second; -} - -CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram( - const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) { - if (supported_formats.find(dump.binary_format) == supported_formats.end()) { - LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing"); +std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram( + const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, + const std::unordered_set<GLenum>& supported_formats) { + if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) { + LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing"); return {}; } - CachedProgram shader = std::make_shared<OGLProgram>(); - shader->handle = glCreateProgram(); - glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); - glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(), - static_cast<GLsizei>(dump.binary.size())); - - GLint link_status{}; - glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status); + auto program = std::make_shared<OGLProgram>(); + program->handle = glCreateProgram(); + glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE); + glProgramBinary(program->handle, precompiled_entry.binary_format, + precompiled_entry.binary.data(), + static_cast<GLsizei>(precompiled_entry.binary.size())); + + GLint link_status; + glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status); if (link_status == GL_FALSE) { - LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing"); + LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing"); return {}; } - return shader; -} - -bool ShaderCacheOpenGL::GenerateUnspecializedShaders( - const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback, - const std::vector<ShaderDiskCacheRaw>& raws) { - if (callback) { - callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size()); - } - - for (std::size_t i = 0; i < raws.size(); ++i) { - if (stop_loading) { - return false; - } - const auto& raw{raws[i]}; - const u64 unique_identifier{raw.GetUniqueIdentifier()}; - const u64 calculated_hash{ - GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())}; - if (unique_identifier != calculated_hash) { - LOG_ERROR(Render_OpenGL, - "Invalid hash in entry={:016x} (obtained hash={:016x}) - " - "removing shader cache", - raw.GetUniqueIdentifier(), calculated_hash); - disk_cache.InvalidateTransferable(); - return false; - } - - const u32 main_offset = - raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET; - ConstBufferLocker locker(raw.GetType()); - const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker); - // TODO(Rodrigo): Handle VertexA shaders - // std::optional<ShaderIR> ir_b; - // if (raw.HasProgramA()) { - // ir_b.emplace(raw.GetProgramCodeB(), main_offset); - // } - - UnspecializedShader unspecialized; - unspecialized.entries = GLShader::GetEntries(ir); - unspecialized.type = raw.GetType(); - unspecialized.code = raw.GetCode(); - unspecialized.code_b = raw.GetCodeB(); - unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized); - - if (callback) { - callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size()); - } - } - return true; + return program; } Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { - if (!system.GPU().Maxwell3D().dirty.shaders) { + if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) { return last_shaders[static_cast<std::size_t>(program)]; } @@ -647,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { const auto unique_identifier = GetUniqueIdentifier( GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b); - const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)}; - const ShaderParameters params{system, disk_cache, precompiled_variants, device, + const ShaderParameters params{system, disk_cache, device, cpu_addr, host_ptr, unique_identifier}; - const auto found = unspecialized_shaders.find(unique_identifier); - if (found == unspecialized_shaders.end()) { + const auto found = runtime_cache.find(unique_identifier); + if (found == runtime_cache.end()) { shader = CachedShader::CreateStageFromMemory(params, program, std::move(code), std::move(code_b)); } else { - shader = CachedShader::CreateFromCache(params, found->second); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes); } Register(shader); @@ -672,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { return kernel; } - // No kernel found - create a new one + // No kernel found, create a new one auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; - const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})}; - const auto precompiled_variants = GetPrecompiledVariants(unique_identifier); + const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)}; const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; - const ShaderParameters params{system, disk_cache, precompiled_variants, device, + const ShaderParameters params{system, disk_cache, device, cpu_addr, host_ptr, unique_identifier}; - const auto found = unspecialized_shaders.find(unique_identifier); - if (found == unspecialized_shaders.end()) { + const auto found = runtime_cache.find(unique_identifier); + if (found == runtime_cache.end()) { kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); } else { - kernel = CachedShader::CreateFromCache(params, found->second); + const std::size_t size_in_bytes = code.size() * sizeof(u64); + kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes); } Register(kernel); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index 7b1470db3..4935019fc 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -22,7 +22,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_disk_cache.h" -#include "video_core/shader/const_buffer_locker.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" namespace Core { @@ -41,22 +41,17 @@ class RasterizerOpenGL; struct UnspecializedShader; using Shader = std::shared_ptr<CachedShader>; -using CachedProgram = std::shared_ptr<OGLProgram>; using Maxwell = Tegra::Engines::Maxwell3D::Regs; -using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>; -using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>; - -struct UnspecializedShader { - GLShader::ShaderEntries entries; - Tegra::Engines::ShaderType type; - ProgramCode code; - ProgramCode code_b; + +struct PrecompiledShader { + std::shared_ptr<OGLProgram> program; + std::shared_ptr<VideoCommon::Shader::Registry> registry; + ShaderEntries entries; }; struct ShaderParameters { Core::System& system; ShaderDiskCacheOpenGL& disk_cache; - const PrecompiledVariants* precompiled_variants; const Device& device; VAddr cpu_addr; u8* host_ptr; @@ -65,61 +60,45 @@ struct ShaderParameters { class CachedShader final : public RasterizerCacheObject { public: - static Shader CreateStageFromMemory(const ShaderParameters& params, - Maxwell::ShaderProgram program_type, - ProgramCode program_code, ProgramCode program_code_b); - static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); + ~CachedShader(); - static Shader CreateFromCache(const ShaderParameters& params, - const UnspecializedShader& unspecialized); + /// Gets the GL program handle for the shader + GLuint GetHandle() const; + /// Returns the guest CPU address of the shader VAddr GetCpuAddr() const override { return cpu_addr; } + /// Returns the size in bytes of the shader std::size_t GetSizeInBytes() const override { - return code.size() * sizeof(u64); + return size_in_bytes; } /// Gets the shader entries for the shader - const GLShader::ShaderEntries& GetShaderEntries() const { + const ShaderEntries& GetEntries() const { return entries; } - /// Gets the GL program handle for the shader - GLuint GetHandle(const ProgramVariant& variant); - -private: - struct LockerVariant { - std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker; - std::unordered_map<ProgramVariant, CachedProgram> programs; - }; - - explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type, - GLShader::ShaderEntries entries, ProgramCode program_code, - ProgramCode program_code_b); - - bool EnsureValidLockerVariant(); - - ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant, - const VideoCommon::Shader::ConstBufferLocker& locker) const; - - Core::System& system; - ShaderDiskCacheOpenGL& disk_cache; - const Device& device; - - VAddr cpu_addr{}; - - u64 unique_identifier{}; - Tegra::Engines::ShaderType shader_type{}; - - GLShader::ShaderEntries entries; + static Shader CreateStageFromMemory(const ShaderParameters& params, + Maxwell::ShaderProgram program_type, + ProgramCode program_code, ProgramCode program_code_b); + static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code); - ProgramCode code; - ProgramCode code_b; + static Shader CreateFromCache(const ShaderParameters& params, + const PrecompiledShader& precompiled_shader, + std::size_t size_in_bytes); - LockerVariant* curr_locker_variant = nullptr; - std::vector<std::unique_ptr<LockerVariant>> locker_variants; +private: + explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes, + std::shared_ptr<VideoCommon::Shader::Registry> registry, + ShaderEntries entries, std::shared_ptr<OGLProgram> program); + + std::shared_ptr<VideoCommon::Shader::Registry> registry; + ShaderEntries entries; + VAddr cpu_addr = 0; + std::size_t size_in_bytes = 0; + std::shared_ptr<OGLProgram> program; }; class ShaderCacheOpenGL final : public RasterizerCache<Shader> { @@ -142,25 +121,15 @@ protected: void FlushObjectInner(const Shader& object) override {} private: - bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading, - const VideoCore::DiskResourceLoadCallback& callback, - const std::vector<ShaderDiskCacheRaw>& raws); - - CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump, - const std::unordered_set<GLenum>& supported_formats); - - const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const; + std::shared_ptr<OGLProgram> GeneratePrecompiledProgram( + const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry, + const std::unordered_set<GLenum>& supported_formats); Core::System& system; Core::Frontend::EmuWindow& emu_window; const Device& device; - ShaderDiskCacheOpenGL disk_cache; - - PrecompiledPrograms precompiled_programs; - std::unordered_map<u64, PrecompiledVariants> precompiled_variants; - - std::unordered_map<u64, UnspecializedShader> unspecialized_shaders; + std::unordered_map<u64, PrecompiledShader> runtime_cache; std::array<Shader, Maxwell::MaxShaderProgram> last_shaders; }; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 4735000b5..8aa4a7ac9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -23,8 +23,9 @@ #include "video_core/shader/ast.h" #include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" +#include "video_core/shader/transform_feedback.h" -namespace OpenGL::GLShader { +namespace OpenGL { namespace { @@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::Register; +using VideoCommon::Shader::BuildTransformFeedback; +using VideoCommon::Shader::Registry; using namespace std::string_literals; using namespace VideoCommon::Shader; @@ -48,6 +51,11 @@ class ExprDecompiler; enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; +constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; + +constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; +constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; + struct TextureOffset {}; struct TextureDerivates {}; using TextureArgument = std::pair<Type, Node>; @@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument> constexpr u32 MAX_CONSTBUFFER_ELEMENTS = static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); +constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt +#define ftou floatBitsToUint +#define itof intBitsToFloat +#define utof uintBitsToFloat + +bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{ + bvec2 is_nan1 = isnan(pair1); + bvec2 is_nan2 = isnan(pair2); + return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); +}} + +const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); +const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); + +layout (std140, binding = {}) uniform vs_config {{ + float y_direction; +}}; +)"; + class ShaderWriter final { public: void AddExpression(std::string_view text) { @@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { } } +/// Describes primitive behavior on geometry shaders +std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { + switch (topology) { + case Maxwell::PrimitiveTopology::Points: + return {"points", 1}; + case Maxwell::PrimitiveTopology::Lines: + case Maxwell::PrimitiveTopology::LineStrip: + return {"lines", 2}; + case Maxwell::PrimitiveTopology::LinesAdjacency: + case Maxwell::PrimitiveTopology::LineStripAdjacency: + return {"lines_adjacency", 4}; + case Maxwell::PrimitiveTopology::Triangles: + case Maxwell::PrimitiveTopology::TriangleStrip: + case Maxwell::PrimitiveTopology::TriangleFan: + return {"triangles", 3}; + case Maxwell::PrimitiveTopology::TrianglesAdjacency: + case Maxwell::PrimitiveTopology::TriangleStripAdjacency: + return {"triangles_adjacency", 6}; + default: + UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); + return {"points", 1}; + } +} + /// Generates code to use for a swizzle operation. -constexpr const char* GetSwizzle(u32 element) { +constexpr const char* GetSwizzle(std::size_t element) { constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; return swizzle.at(element); } +constexpr const char* GetColorSwizzle(std::size_t element) { + constexpr std::array swizzle = {".r", ".g", ".b", ".a"}; + return swizzle.at(element); +} + /// Translate topology std::string GetTopologyName(Tegra::Shader::OutputTopology topology) { switch (topology) { @@ -310,10 +366,19 @@ constexpr bool IsGenericAttribute(Attribute::Index index) { return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31; } +constexpr bool IsLegacyTexCoord(Attribute::Index index) { + return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) && + static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7); +} + constexpr Attribute::Index ToGenericAttribute(u64 value) { return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0)); } +constexpr int GetLegacyTexCoordIndex(Attribute::Index index) { + return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0); +} + u32 GetGenericAttributeIndex(Attribute::Index index) { ASSERT(IsGenericAttribute(index)); return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); @@ -337,15 +402,66 @@ std::string FlowStackTopName(MetaStackClass stack) { return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); } -[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) { - return stage == ShaderType::Vertex; -} +struct GenericVaryingDescription { + std::string name; + u8 first_element = 0; + bool is_scalar = false; +}; class GLSLDecompiler final { public: - explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, - std::string suffix) - : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} + explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, + ShaderType stage, std::string_view identifier, std::string_view suffix) + : device{device}, ir{ir}, registry{registry}, stage{stage}, + identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { + if (stage != ShaderType::Compute) { + transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); + } + } + + void Decompile() { + DeclareHeader(); + DeclareVertex(); + DeclareGeometry(); + DeclareFragment(); + DeclareCompute(); + DeclareInputAttributes(); + DeclareOutputAttributes(); + DeclareImages(); + DeclareSamplers(); + DeclareGlobalMemory(); + DeclareConstantBuffers(); + DeclareLocalMemory(); + DeclareRegisters(); + DeclarePredicates(); + DeclareInternalFlags(); + DeclareCustomVariables(); + DeclarePhysicalAttributeReader(); + + code.AddLine("void main() {{"); + ++code.scope; + + if (stage == ShaderType::Vertex) { + code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);"); + } + + if (ir.IsDecompiled()) { + DecompileAST(); + } else { + DecompileBranchMode(); + } + + --code.scope; + code.AddLine("}}"); + } + + std::string GetResult() { + return code.GetResult(); + } + +private: + friend class ASTDecompiler; + friend class ExprDecompiler; void DecompileBranchMode() { // VM's program counter @@ -387,46 +503,40 @@ public: void DecompileAST(); - void Decompile() { - DeclareVertex(); - DeclareGeometry(); - DeclareRegisters(); - DeclareCustomVariables(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareInternalFlags(); - DeclareInputAttributes(); - DeclareOutputAttributes(); - DeclareConstantBuffers(); - DeclareGlobalMemory(); - DeclareSamplers(); - DeclareImages(); - DeclarePhysicalAttributeReader(); - - code.AddLine("void execute_{}() {{", suffix); - ++code.scope; - - if (ir.IsDecompiled()) { - DecompileAST(); - } else { - DecompileBranchMode(); + void DeclareHeader() { + if (!identifier.empty()) { + code.AddLine("// {}", identifier); + } + code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core"); + code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); + if (device.HasShaderBallot()) { + code.AddLine("#extension GL_ARB_shader_ballot : require"); + } + if (device.HasVertexViewportLayer()) { + code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require"); + } + if (device.HasImageLoadFormatted()) { + code.AddLine("#extension GL_EXT_shader_image_load_formatted : require"); } + if (device.HasWarpIntrinsics()) { + code.AddLine("#extension GL_NV_gpu_shader5 : require"); + code.AddLine("#extension GL_NV_shader_thread_group : require"); + code.AddLine("#extension GL_NV_shader_thread_shuffle : require"); + } + // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 + // operations) on places where we don't want to. + // Thanks to Ryujinx for finding this workaround. + code.AddLine("#pragma optionNV(fastmath off)"); - --code.scope; - code.AddLine("}}"); - } + code.AddNewLine(); - std::string GetResult() { - return code.GetResult(); + code.AddLine(CommonDeclarations, EmulationUniformBlockBinding); } -private: - friend class ASTDecompiler; - friend class ExprDecompiler; - void DeclareVertex() { - if (!IsVertexShader(stage)) + if (stage != ShaderType::Vertex) { return; + } DeclareVertexRedeclarations(); } @@ -436,9 +546,15 @@ private: return; } + const auto& info = registry.GetGraphicsInfo(); + const auto input_topology = info.primitive_topology; + const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); + max_input_vertices = max_vertices; + code.AddLine("layout ({}) in;", glsl_topology); + const auto topology = GetTopologyName(header.common3.output_topology); - const auto max_vertices = header.common4.max_output_vertices.Value(); - code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); + const auto max_output_vertices = header.common4.max_output_vertices.Value(); + code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); code.AddNewLine(); code.AddLine("in gl_PerVertex {{"); @@ -450,11 +566,50 @@ private: DeclareVertexRedeclarations(); } + void DeclareFragment() { + if (stage != ShaderType::Fragment) { + return; + } + if (ir.UsesLegacyVaryings()) { + code.AddLine("in gl_PerFragment {{"); + ++code.scope; + code.AddLine("vec4 gl_TexCoord[8];"); + code.AddLine("vec4 gl_Color;"); + code.AddLine("vec4 gl_SecondaryColor;"); + --code.scope; + code.AddLine("}};"); + } + + for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { + code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt); + } + } + + void DeclareCompute() { + if (stage != ShaderType::Compute) { + return; + } + const auto& info = registry.GetComputeInfo(); + if (const u32 size = info.shared_memory_size_in_words; size > 0) { + code.AddLine("shared uint smem[{}];", size); + code.AddNewLine(); + } + code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", + info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); + code.AddNewLine(); + } + void DeclareVertexRedeclarations() { code.AddLine("out gl_PerVertex {{"); ++code.scope; - code.AddLine("vec4 gl_Position;"); + auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); + if (!pos_xfb.empty()) { + pos_xfb = fmt::format("layout ({}) ", pos_xfb); + } + const char* pos_type = + FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); + code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); for (const auto attribute : ir.GetOutputAttributes()) { if (attribute == Attribute::Index::ClipDistances0123 || @@ -463,14 +618,14 @@ private: break; } } - if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { + if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) { if (ir.UsesLayer()) { code.AddLine("int gl_Layer;"); } if (ir.UsesViewportIndex()) { code.AddLine("int gl_ViewportIndex;"); } - } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && + } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { LOG_ERROR( Render_OpenGL, @@ -481,12 +636,12 @@ private: code.AddLine("float gl_PointSize;"); } - if (ir.UsesInstanceId()) { - code.AddLine("int gl_InstanceID;"); - } - - if (ir.UsesVertexId()) { - code.AddLine("int gl_VertexID;"); + if (ir.UsesLegacyVaryings()) { + code.AddLine("vec4 gl_TexCoord[8];"); + code.AddLine("vec4 gl_FrontColor;"); + code.AddLine("vec4 gl_FrontSecondaryColor;"); + code.AddLine("vec4 gl_BackColor;"); + code.AddLine("vec4 gl_BackSecondaryColor;"); } --code.scope; @@ -525,18 +680,16 @@ private: } void DeclareLocalMemory() { + u64 local_memory_size = 0; if (stage == ShaderType::Compute) { - code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); - code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); - code.AddLine("#endif"); - return; + local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; + } else { + local_memory_size = header.GetLocalMemorySize(); } - - const u64 local_memory_size = header.GetLocalMemorySize(); if (local_memory_size == 0) { return; } - const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; + const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); code.AddNewLine(); } @@ -589,7 +742,7 @@ private: void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { const u32 location{GetGenericAttributeIndex(index)}; - std::string name{GetInputAttribute(index)}; + std::string name{GetGenericInputAttribute(index)}; if (stage == ShaderType::Geometry) { name = "gs_" + name + "[]"; } @@ -626,9 +779,59 @@ private: } } + std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { + const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); + const auto it = transform_feedback.find(location); + if (it == transform_feedback.end()) { + return {}; + } + return it->second.components; + } + + std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { + const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); + const auto it = transform_feedback.find(location); + if (it == transform_feedback.end()) { + return {}; + } + + const VaryingTFB& tfb = it->second; + return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, + tfb.offset, tfb.stride); + } + void DeclareOutputAttribute(Attribute::Index index) { - const u32 location{GetGenericAttributeIndex(index)}; - code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); + static constexpr std::string_view swizzle = "xyzw"; + u8 element = 0; + while (element < 4) { + auto xfb = GetTransformFeedbackDecoration(index, element); + if (!xfb.empty()) { + xfb = fmt::format(", {}", xfb); + } + const std::size_t remainder = 4 - element; + const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); + const char* const type = FLOAT_TYPES.at(num_components - 1); + + const u32 location = GetGenericAttributeIndex(index); + + GenericVaryingDescription description; + description.first_element = static_cast<u8>(element); + description.is_scalar = num_components == 1; + description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); + if (element != 0 || num_components != 4) { + const std::string_view name_swizzle = swizzle.substr(element, num_components); + description.name = fmt::format("{}_{}", description.name, name_swizzle); + } + for (std::size_t i = 0; i < num_components; ++i) { + const u8 offset = static_cast<u8>(location * 4 + element + i); + varying_description.insert({offset, description}); + } + + code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, + xfb, type, description.name); + + element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); + } } void DeclareConstantBuffers() { @@ -925,7 +1128,8 @@ private: // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games // set an 0x80000000 index for those and the shader fails to build. Find out why // this happens and what's its intent. - return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); + return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), + max_input_vertices.value()); } return std::string(name); }; @@ -943,6 +1147,10 @@ private: default: UNREACHABLE(); } + case Attribute::Index::FrontColor: + return {"gl_Color"s + GetSwizzle(element), Type::Float}; + case Attribute::Index::FrontSecondaryColor: + return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float}; case Attribute::Index::PointCoord: switch (element) { case 0: @@ -959,7 +1167,7 @@ private: // TODO(Subv): Find out what the values are for the first two elements when inside a // vertex shader, and what's the value of the fourth element when inside a Tess Eval // shader. - ASSERT(IsVertexShader(stage)); + ASSERT(stage == ShaderType::Vertex); switch (element) { case 2: // Config pack's first value is instance_id. @@ -980,7 +1188,13 @@ private: return {"0", Type::Int}; default: if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), + return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), + Type::Float}; + } + if (IsLegacyTexCoord(attribute)) { + UNIMPLEMENTED_IF(stage == ShaderType::Geometry); + return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), + GetSwizzle(element)), Type::Float}; } break; @@ -1021,21 +1235,22 @@ private: } std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) { + const u32 element = abuf->GetElement(); switch (const auto attribute = abuf->GetIndex()) { case Attribute::Index::Position: - return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}}; + return {{"gl_Position"s + GetSwizzle(element), Type::Float}}; case Attribute::Index::LayerViewportPointSize: - switch (abuf->GetElement()) { + switch (element) { case 0: UNIMPLEMENTED(); return {}; case 1: - if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { + if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { return {}; } return {{"gl_Layer", Type::Int}}; case 2: - if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { + if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) { return {}; } return {{"gl_ViewportIndex", Type::Int}}; @@ -1043,14 +1258,26 @@ private: return {{"gl_PointSize", Type::Float}}; } return {}; + case Attribute::Index::FrontColor: + return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}}; + case Attribute::Index::FrontSecondaryColor: + return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}}; + case Attribute::Index::BackColor: + return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}}; + case Attribute::Index::BackSecondaryColor: + return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}}; case Attribute::Index::ClipDistances0123: - return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}}; + return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}}; case Attribute::Index::ClipDistances4567: - return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; + return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}}; default: if (IsGenericAttribute(attribute)) { - return { - {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; + return {{GetGenericOutputAttribute(attribute, element), Type::Float}}; + } + if (IsLegacyTexCoord(attribute)) { + return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute), + GetSwizzle(element)), + Type::Float}}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); return {}; @@ -1822,16 +2049,19 @@ private: expr += GetSampler(meta->sampler); expr += ", "; - expr += constructors.at(operation.GetOperandsCount() - 1); + expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - const std::size_t next = i + 1; - if (next == count) - expr += ')'; - else if (next < count) + if (i > 0) { expr += ", "; + } + expr += VisitOperand(operation, i).AsInt(); + } + if (meta->array) { + expr += ", "; + expr += Visit(meta->array).AsInt(); } + expr += ')'; if (meta->lod && !meta->sampler.IsBuffer()) { expr += ", "; @@ -1945,7 +2175,7 @@ private: // TODO(Subv): Figure out how dual-source blending is configured in the Switch. for (u32 component = 0; component < 4; ++component) { if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { - code.AddLine("FragColor{}[{}] = {};", render_target, component, + code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component), SafeGetRegister(current_reg).AsFloat()); ++current_reg; } @@ -2261,27 +2491,34 @@ private: static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); std::string GetRegister(u32 index) const { - return GetDeclarationWithSuffix(index, "gpr"); + return AppendSuffix(index, "gpr"); } std::string GetCustomVariable(u32 index) const { - return GetDeclarationWithSuffix(index, "custom_var"); + return AppendSuffix(index, "custom_var"); } std::string GetPredicate(Tegra::Shader::Pred pred) const { - return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); + return AppendSuffix(static_cast<u32>(pred), "pred"); } - std::string GetInputAttribute(Attribute::Index attribute) const { - return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); + std::string GetGenericInputAttribute(Attribute::Index attribute) const { + return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); } - std::string GetOutputAttribute(Attribute::Index attribute) const { - return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); + std::unordered_map<u8, GenericVaryingDescription> varying_description; + + std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { + const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); + const auto& description = varying_description.at(offset); + if (description.is_scalar) { + return description.name; + } + return fmt::format("{}[{}]", description.name, element - description.first_element); } std::string GetConstBuffer(u32 index) const { - return GetDeclarationWithSuffix(index, "cbuf"); + return AppendSuffix(index, "cbuf"); } std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { @@ -2294,11 +2531,15 @@ private: } std::string GetConstBufferBlock(u32 index) const { - return GetDeclarationWithSuffix(index, "cbuf_block"); + return AppendSuffix(index, "cbuf_block"); } std::string GetLocalMemory() const { - return "lmem_" + suffix; + if (suffix.empty()) { + return "lmem"; + } else { + return "lmem_" + std::string{suffix}; + } } std::string GetInternalFlag(InternalFlag flag) const { @@ -2307,23 +2548,31 @@ private: const auto index = static_cast<u32>(flag); ASSERT(index < static_cast<u32>(InternalFlag::Amount)); - return fmt::format("{}_{}", InternalFlagNames[index], suffix); + if (suffix.empty()) { + return InternalFlagNames[index]; + } else { + return fmt::format("{}_{}", InternalFlagNames[index], suffix); + } } std::string GetSampler(const Sampler& sampler) const { - return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); + return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); } std::string GetImage(const Image& image) const { - return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); + return AppendSuffix(static_cast<u32>(image.GetIndex()), "image"); } - std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { - return fmt::format("{}_{}_{}", name, index, suffix); + std::string AppendSuffix(u32 index, std::string_view name) const { + if (suffix.empty()) { + return fmt::format("{}{}", name, index); + } else { + return fmt::format("{}{}_{}", name, index, suffix); + } } u32 GetNumPhysicalInputAttributes() const { - return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); + return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); } u32 GetNumPhysicalAttributes() const { @@ -2334,17 +2583,31 @@ private: return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings); } + bool IsRenderTargetEnabled(u32 render_target) const { + for (u32 component = 0; component < 4; ++component) { + if (header.ps.IsColorComponentOutputEnabled(render_target, component)) { + return true; + } + } + return false; + } + const Device& device; const ShaderIR& ir; + const Registry& registry; const ShaderType stage; - const std::string suffix; + const std::string_view identifier; + const std::string_view suffix; const Header header; + std::unordered_map<u8, VaryingTFB> transform_feedback; ShaderWriter code; + + std::optional<u32> max_input_vertices; }; -std::string GetFlowVariable(u32 i) { - return fmt::format("flow_var_{}", i); +std::string GetFlowVariable(u32 index) { + return fmt::format("flow_var{}", index); } class ExprDecompiler { @@ -2531,7 +2794,7 @@ void GLSLDecompiler::DecompileAST() { } // Anonymous namespace -ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { +ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { ShaderEntries entries; for (const auto& cbuf : ir.GetConstantBuffers()) { entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(), @@ -2547,33 +2810,20 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) { for (const auto& image : ir.GetImages()) { entries.images.emplace_back(image); } - entries.clip_distances = ir.GetClipDistances(); + const auto clip_distances = ir.GetClipDistances(); + for (std::size_t i = 0; i < std::size(clip_distances); ++i) { + entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i; + } entries.shader_length = ir.GetLength(); return entries; } -std::string GetCommonDeclarations() { - return R"(#define ftoi floatBitsToInt -#define ftou floatBitsToUint -#define itof intBitsToFloat -#define utof uintBitsToFloat - -bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { - bvec2 is_nan1 = isnan(pair1); - bvec2 is_nan2 = isnan(pair2); - return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); -} - -const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); -const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); -)"; -} - -std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage, - const std::string& suffix) { - GLSLDecompiler decompiler(device, ir, stage, suffix); +std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, + ShaderType stage, std::string_view identifier, + std::string_view suffix) { + GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix); decompiler.Decompile(); return decompiler.GetResult(); } -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 7876f48d6..e7dbd810c 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -6,22 +6,18 @@ #include <array> #include <string> +#include <string_view> #include <utility> #include <vector> #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/shader/registry.h" #include "video_core/shader/shader_ir.h" -namespace VideoCommon::Shader { -class ShaderIR; -} - namespace OpenGL { -class Device; -} -namespace OpenGL::GLShader { +class Device; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using SamplerEntry = VideoCommon::Shader::Sampler; @@ -74,15 +70,15 @@ struct ShaderEntries { std::vector<GlobalMemoryEntry> global_memory_entries; std::vector<SamplerEntry> samplers; std::vector<ImageEntry> images; - std::array<bool, Maxwell::NumClipDistances> clip_distances{}; + u32 clip_distances{}; std::size_t shader_length{}; }; -ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir); - -std::string GetCommonDeclarations(); +ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); -std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, - Tegra::Engines::ShaderType stage, const std::string& suffix); +std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, + const VideoCommon::Shader::Registry& registry, + Tegra::Engines::ShaderType stage, std::string_view identifier, + std::string_view suffix = {}); -} // namespace OpenGL::GLShader +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 1fc204f6f..9e95a122b 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -31,32 +31,24 @@ namespace { using ShaderCacheVersionHash = std::array<u8, 64>; -enum class TransferableEntryKind : u32 { - Raw, - Usage, -}; - struct ConstBufferKey { - u32 cbuf{}; - u32 offset{}; - u32 value{}; + u32 cbuf = 0; + u32 offset = 0; + u32 value = 0; }; struct BoundSamplerKey { - u32 offset{}; - Tegra::Engines::SamplerDescriptor sampler{}; + u32 offset = 0; + Tegra::Engines::SamplerDescriptor sampler; }; struct BindlessSamplerKey { - u32 cbuf{}; - u32 offset{}; - Tegra::Engines::SamplerDescriptor sampler{}; + u32 cbuf = 0; + u32 offset = 0; + Tegra::Engines::SamplerDescriptor sampler; }; -constexpr u32 NativeVersion = 12; - -// Making sure sizes doesn't change by accident -static_assert(sizeof(ProgramVariant) == 20); +constexpr u32 NativeVersion = 20; ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; @@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { } // Anonymous namespace -ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code, - ProgramCode code_b) - : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move( - code_b)} {} +ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default; -ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default; +ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default; -ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default; - -bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) { - if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) || - file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { +bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { + if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) { return false; } - u32 code_size{}; - u32 code_size_b{}; + u32 code_size; + u32 code_size_b; if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) || file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) { return false; } - code.resize(code_size); code_b.resize(code_size_b); - if (file.ReadArray(code.data(), code_size) != code_size) + if (file.ReadArray(code.data(), code_size) != code_size) { return false; - + } if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) { return false; } + + u8 is_texture_handler_size_known; + u32 texture_handler_size_value; + u32 num_keys; + u32 num_bound_samplers; + u32 num_bindless_samplers; + if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || + file.ReadArray(&is_texture_handler_size_known, 1) != 1 || + file.ReadArray(&texture_handler_size_value, 1) != 1 || + file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || + file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || + file.ReadArray(&num_bindless_samplers, 1) != 1) { + return false; + } + if (is_texture_handler_size_known) { + texture_handler_size = texture_handler_size_value; + } + + std::vector<ConstBufferKey> flat_keys(num_keys); + std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); + std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); + if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || + file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != + flat_bound_samplers.size() || + file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != + flat_bindless_samplers.size()) { + return false; + } + for (const auto& key : flat_keys) { + keys.insert({{key.cbuf, key.offset}, key.value}); + } + for (const auto& key : flat_bound_samplers) { + bound_samplers.emplace(key.offset, key.sampler); + } + for (const auto& key : flat_bindless_samplers) { + bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); + } + return true; } -bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const { - if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 || +bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { + if (file.WriteObject(static_cast<u32>(type)) != 1 || file.WriteObject(static_cast<u32>(code.size())) != 1 || file.WriteObject(static_cast<u32>(code_b.size())) != 1) { return false; } - - if (file.WriteArray(code.data(), code.size()) != code.size()) + if (file.WriteArray(code.data(), code.size()) != code.size()) { return false; - + } if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) { return false; } - return true; + + if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || + file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 || + file.WriteObject(texture_handler_size.value_or(0)) != 1 || + file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || + file.WriteObject(static_cast<u32>(keys.size())) != 1 || + file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || + file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { + return false; + } + + std::vector<ConstBufferKey> flat_keys; + flat_keys.reserve(keys.size()); + for (const auto& [address, value] : keys) { + flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); + } + + std::vector<BoundSamplerKey> flat_bound_samplers; + flat_bound_samplers.reserve(bound_samplers.size()); + for (const auto& [address, sampler] : bound_samplers) { + flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); + } + + std::vector<BindlessSamplerKey> flat_bindless_samplers; + flat_bindless_samplers.reserve(bindless_samplers.size()); + for (const auto& [address, sampler] : bindless_samplers) { + flat_bindless_samplers.push_back( + BindlessSamplerKey{address.first, address.second, sampler}); + } + + return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && + file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == + flat_bound_samplers.size() && + file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == + flat_bindless_samplers.size(); } ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {} ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default; -std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> -ShaderDiskCacheOpenGL::LoadTransferable() { +std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() { // Skip games without title id const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0; if (!Settings::values.use_disk_shader_cache || !has_title_id) { @@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() { FileUtil::IOFile file(GetTransferablePath(), "rb"); if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}", - GetTitleID()); + LOG_INFO(Render_OpenGL, "No transferable shader cache found"); is_usable = true; return {}; } u32 version{}; if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) { - LOG_ERROR(Render_OpenGL, - "Failed to get transferable cache version for title id={}, skipping", - GetTitleID()); + LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it"); return {}; } @@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() { } // Version is valid, load the shaders - constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping"; - std::vector<ShaderDiskCacheRaw> raws; - std::vector<ShaderDiskCacheUsage> usages; + std::vector<ShaderDiskCacheEntry> entries; while (file.Tell() < file.GetSize()) { - TransferableEntryKind kind{}; - if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) { - LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping"); - return {}; - } - - switch (kind) { - case TransferableEntryKind::Raw: { - ShaderDiskCacheRaw entry; - if (!entry.Load(file)) { - LOG_ERROR(Render_OpenGL, error_loading); - return {}; - } - transferable.insert({entry.GetUniqueIdentifier(), {}}); - raws.push_back(std::move(entry)); - break; - } - case TransferableEntryKind::Usage: { - ShaderDiskCacheUsage usage; - - u32 num_keys{}; - u32 num_bound_samplers{}; - u32 num_bindless_samplers{}; - if (file.ReadArray(&usage.unique_identifier, 1) != 1 || - file.ReadArray(&usage.variant, 1) != 1 || - file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || - file.ReadArray(&num_bound_samplers, 1) != 1 || - file.ReadArray(&num_bindless_samplers, 1) != 1) { - LOG_ERROR(Render_OpenGL, error_loading); - return {}; - } - - std::vector<ConstBufferKey> keys(num_keys); - std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); - std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); - if (file.ReadArray(keys.data(), keys.size()) != keys.size() || - file.ReadArray(bound_samplers.data(), bound_samplers.size()) != - bound_samplers.size() || - file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) != - bindless_samplers.size()) { - LOG_ERROR(Render_OpenGL, error_loading); - return {}; - } - for (const auto& key : keys) { - usage.keys.insert({{key.cbuf, key.offset}, key.value}); - } - for (const auto& key : bound_samplers) { - usage.bound_samplers.emplace(key.offset, key.sampler); - } - for (const auto& key : bindless_samplers) { - usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); - } - - usages.push_back(std::move(usage)); - break; - } - default: - LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping", - static_cast<u32>(kind)); + ShaderDiskCacheEntry& entry = entries.emplace_back(); + if (!entry.Load(file)) { + LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping"); return {}; } } is_usable = true; - return {{std::move(raws), std::move(usages)}}; + return {std::move(entries)}; } -std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> -ShaderDiskCacheOpenGL::LoadPrecompiled() { +std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() { if (!is_usable) { return {}; } - std::string path = GetPrecompiledPath(); - FileUtil::IOFile file(path, "rb"); + FileUtil::IOFile file(GetPrecompiledPath(), "rb"); if (!file.IsOpen()) { - LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}", - GetTitleID()); + LOG_INFO(Render_OpenGL, "No precompiled shader cache found"); return {}; } - const auto result = LoadPrecompiledFile(file); - if (!result) { - LOG_INFO(Render_OpenGL, - "Failed to load precompiled cache for game with title id={}, removing", - GetTitleID()); - file.Close(); - InvalidatePrecompiled(); - return {}; + if (const auto result = LoadPrecompiledFile(file)) { + return *result; } - return *result; + + LOG_INFO(Render_OpenGL, "Failed to load precompiled cache"); + file.Close(); + InvalidatePrecompiled(); + return {}; } -std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> -ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { +std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile( + FileUtil::IOFile& file) { // Read compressed file from disk and decompress to virtual precompiled cache file std::vector<u8> compressed(file.GetSize()); file.ReadBytes(compressed.data(), compressed.size()); @@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) { return {}; } - ShaderDumpsMap dumps; + std::vector<ShaderDiskCachePrecompiled> entries; while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) { - u32 num_keys{}; - u32 num_bound_samplers{}; - u32 num_bindless_samplers{}; - ShaderDiskCacheUsage usage; - if (!LoadObjectFromPrecompiled(usage.unique_identifier) || - !LoadObjectFromPrecompiled(usage.variant) || - !LoadObjectFromPrecompiled(usage.bound_buffer) || - !LoadObjectFromPrecompiled(num_keys) || - !LoadObjectFromPrecompiled(num_bound_samplers) || - !LoadObjectFromPrecompiled(num_bindless_samplers)) { - return {}; - } - std::vector<ConstBufferKey> keys(num_keys); - std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers); - std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers); - if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) || - !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) != - bound_samplers.size() || - !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) != - bindless_samplers.size()) { - return {}; - } - for (const auto& key : keys) { - usage.keys.insert({{key.cbuf, key.offset}, key.value}); - } - for (const auto& key : bound_samplers) { - usage.bound_samplers.emplace(key.offset, key.sampler); - } - for (const auto& key : bindless_samplers) { - usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); - } - - ShaderDiskCacheDump dump; - if (!LoadObjectFromPrecompiled(dump.binary_format)) { - return {}; - } - - u32 binary_length{}; - if (!LoadObjectFromPrecompiled(binary_length)) { + u32 binary_size; + auto& entry = entries.emplace_back(); + if (!LoadObjectFromPrecompiled(entry.unique_identifier) || + !LoadObjectFromPrecompiled(entry.binary_format) || + !LoadObjectFromPrecompiled(binary_size)) { return {}; } - dump.binary.resize(binary_length); - if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) { + entry.binary.resize(binary_size); + if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) { return {}; } - - dumps.emplace(std::move(usage), dump); } - return dumps; + return entries; } void ShaderDiskCacheOpenGL::InvalidateTransferable() { @@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() { } } -void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { +void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) { if (!is_usable) { return; } - const u64 id = entry.GetUniqueIdentifier(); - if (transferable.find(id) != transferable.end()) { + const u64 id = entry.unique_identifier; + if (stored_transferable.find(id) != stored_transferable.end()) { // The shader already exists return; } @@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) { if (!file.IsOpen()) { return; } - if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) { + if (!entry.Save(file)) { LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing"); file.Close(); InvalidateTransferable(); return; } - transferable.insert({id, {}}); -} -void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) { - if (!is_usable) { - return; - } - - const auto it = transferable.find(usage.unique_identifier); - ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously"); - - auto& usages{it->second}; - if (usages.find(usage) != usages.end()) { - // Skip this variant since the shader is already stored. - return; - } - usages.insert(usage); - - FileUtil::IOFile file = AppendTransferableFile(); - if (!file.IsOpen()) - return; - const auto Close = [&] { - LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing"); - file.Close(); - InvalidateTransferable(); - }; - - if (file.WriteObject(TransferableEntryKind::Usage) != 1 || - file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 || - file.WriteObject(usage.bound_buffer) != 1 || - file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 || - file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 || - file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) { - Close(); - return; - } - for (const auto& [pair, value] : usage.keys) { - const auto [cbuf, offset] = pair; - if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) { - Close(); - return; - } - } - for (const auto& [offset, sampler] : usage.bound_samplers) { - if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) { - Close(); - return; - } - } - for (const auto& [pair, sampler] : usage.bindless_samplers) { - const auto [cbuf, offset] = pair; - if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { - Close(); - return; - } - } + stored_transferable.insert(id); } -void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) { +void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) { if (!is_usable) { return; } @@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p SavePrecompiledHeaderToVirtualPrecompiledCache(); } - GLint binary_length{}; + GLint binary_length; glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length); - GLenum binary_format{}; + GLenum binary_format; std::vector<u8> binary(binary_length); glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data()); - const auto Close = [&] { + if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) || + !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) || + !SaveArrayToPrecompiled(binary.data(), binary.size())) { LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing", - usage.unique_identifier); + unique_identifier); InvalidatePrecompiled(); - }; - - if (!SaveObjectToPrecompiled(usage.unique_identifier) || - !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) || - !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) || - !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) || - !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) { - Close(); - return; - } - for (const auto& [pair, value] : usage.keys) { - const auto [cbuf, offset] = pair; - if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) { - Close(); - return; - } - } - for (const auto& [offset, sampler] : usage.bound_samplers) { - if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) { - Close(); - return; - } - } - for (const auto& [pair, sampler] : usage.bindless_samplers) { - const auto [cbuf, offset] = pair; - if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) { - Close(); - return; - } - } - if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) || - !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) || - !SaveArrayToPrecompiled(binary.data(), binary.size())) { - Close(); } } @@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() { if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) { LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}", precompiled_path); - return; } } diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index ef2371f6d..d5be52e40 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -19,8 +19,7 @@ #include "common/common_types.h" #include "core/file_sys/vfs_vector.h" #include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" -#include "video_core/shader/const_buffer_locker.h" +#include "video_core/shader/registry.h" namespace Core { class System; @@ -32,139 +31,39 @@ class IOFile; namespace OpenGL { -struct ShaderDiskCacheUsage; -struct ShaderDiskCacheDump; - using ProgramCode = std::vector<u64>; -using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; - -/// Describes the different variants a program can be compiled with. -struct ProgramVariant final { - ProgramVariant() = default; - - /// Graphics constructor. - explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept - : primitive_mode{primitive_mode} {} - - /// Compute constructor. - explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, - u32 local_memory_size) noexcept - : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)}, - shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} - - // Graphics specific parameters. - GLenum primitive_mode{}; - - // Compute specific parameters. - u32 block_x{}; - u16 block_y{}; - u16 block_z{}; - u32 shared_memory_size{}; - u32 local_memory_size{}; - - bool operator==(const ProgramVariant& rhs) const noexcept { - return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, - local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, - rhs.block_z, rhs.shared_memory_size, - rhs.local_memory_size); - } - - bool operator!=(const ProgramVariant& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::is_trivially_copyable_v<ProgramVariant>); - -/// Describes how a shader is used. -struct ShaderDiskCacheUsage { - u64 unique_identifier{}; - ProgramVariant variant; - u32 bound_buffer{}; - VideoCommon::Shader::KeyMap keys; - VideoCommon::Shader::BoundSamplerMap bound_samplers; - VideoCommon::Shader::BindlessSamplerMap bindless_samplers; - - bool operator==(const ShaderDiskCacheUsage& rhs) const { - return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) == - std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers, - rhs.bindless_samplers); - } - - bool operator!=(const ShaderDiskCacheUsage& rhs) const { - return !operator==(rhs); - } -}; - -} // namespace OpenGL - -namespace std { - -template <> -struct hash<OpenGL::ProgramVariant> { - std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { - return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^ - static_cast<std::size_t>(variant.block_x) ^ - (static_cast<std::size_t>(variant.block_y) << 32) ^ - (static_cast<std::size_t>(variant.block_z) << 48) ^ - (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^ - (static_cast<std::size_t>(variant.local_memory_size) << 36); - } -}; - -template <> -struct hash<OpenGL::ShaderDiskCacheUsage> { - std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept { - return static_cast<std::size_t>(usage.unique_identifier) ^ - std::hash<OpenGL::ProgramVariant>{}(usage.variant); - } -}; - -} // namespace std - -namespace OpenGL { -/// Describes a shader how it's used by the guest GPU -class ShaderDiskCacheRaw { -public: - explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type, - ProgramCode code, ProgramCode code_b = {}); - ShaderDiskCacheRaw(); - ~ShaderDiskCacheRaw(); +/// Describes a shader and how it's used by the guest GPU +struct ShaderDiskCacheEntry { + ShaderDiskCacheEntry(); + ~ShaderDiskCacheEntry(); bool Load(FileUtil::IOFile& file); bool Save(FileUtil::IOFile& file) const; - u64 GetUniqueIdentifier() const { - return unique_identifier; - } - bool HasProgramA() const { return !code.empty() && !code_b.empty(); } - Tegra::Engines::ShaderType GetType() const { - return type; - } - - const ProgramCode& GetCode() const { - return code; - } - - const ProgramCode& GetCodeB() const { - return code_b; - } - -private: - u64 unique_identifier{}; Tegra::Engines::ShaderType type{}; ProgramCode code; ProgramCode code_b; + + u64 unique_identifier = 0; + std::optional<u32> texture_handler_size; + u32 bound_buffer = 0; + VideoCommon::Shader::GraphicsInfo graphics_info; + VideoCommon::Shader::ComputeInfo compute_info; + VideoCommon::Shader::KeyMap keys; + VideoCommon::Shader::BoundSamplerMap bound_samplers; + VideoCommon::Shader::BindlessSamplerMap bindless_samplers; }; /// Contains an OpenGL dumped binary program -struct ShaderDiskCacheDump { - GLenum binary_format{}; +struct ShaderDiskCachePrecompiled { + u64 unique_identifier = 0; + GLenum binary_format = 0; std::vector<u8> binary; }; @@ -174,11 +73,10 @@ public: ~ShaderDiskCacheOpenGL(); /// Loads transferable cache. If file has a old version or on failure, it deletes the file. - std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>> - LoadTransferable(); + std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable(); /// Loads current game's precompiled cache. Invalidates on failure. - std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled(); + std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled(); /// Removes the transferable (and precompiled) cache file. void InvalidateTransferable(); @@ -187,21 +85,18 @@ public: void InvalidatePrecompiled(); /// Saves a raw dump to the transferable file. Checks for collisions. - void SaveRaw(const ShaderDiskCacheRaw& entry); - - /// Saves shader usage to the transferable file. Does not check for collisions. - void SaveUsage(const ShaderDiskCacheUsage& usage); + void SaveEntry(const ShaderDiskCacheEntry& entry); /// Saves a dump entry to the precompiled file. Does not check for collisions. - void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program); + void SavePrecompiled(u64 unique_identifier, GLuint program); /// Serializes virtual precompiled shader cache file to real file void SaveVirtualPrecompiledFile(); private: /// Loads the transferable cache. Returns empty on failure. - std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>> - LoadPrecompiledFile(FileUtil::IOFile& file); + std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile( + FileUtil::IOFile& file); /// Opens current game's transferable file and write it's header if it doesn't exist FileUtil::IOFile AppendTransferableFile() const; @@ -270,7 +165,7 @@ private: std::size_t precompiled_cache_virtual_file_offset = 0; // Stored transferable shaders - std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable; + std::unordered_set<u64> stored_transferable; // The cache has been loaded at boot bool is_usable{}; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp deleted file mode 100644 index 34946fb47..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ /dev/null @@ -1,109 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <string> - -#include <fmt/format.h> - -#include "video_core/engines/maxwell_3d.h" -#include "video_core/engines/shader_type.h" -#include "video_core/renderer_opengl/gl_device.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/gl_shader_gen.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL::GLShader { - -using Tegra::Engines::Maxwell3D; -using Tegra::Engines::ShaderType; -using VideoCommon::Shader::CompileDepth; -using VideoCommon::Shader::CompilerSettings; -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::ShaderIR; - -std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { - std::string out = GetCommonDeclarations(); - out += fmt::format(R"( -layout (std140, binding = {}) uniform vs_config {{ - float y_direction; -}}; - -)", - EmulationUniformBlockBinding); - out += Decompile(device, ir, ShaderType::Vertex, "vertex"); - if (ir_b) { - out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); - } - - out += R"( -void main() { - gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f); - execute_vertex(); -)"; - if (ir_b) { - out += " execute_vertex_b();"; - } - out += "}\n"; - return out; -} - -std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { - std::string out = GetCommonDeclarations(); - out += fmt::format(R"( -layout (std140, binding = {}) uniform gs_config {{ - float y_direction; -}}; - -)", - EmulationUniformBlockBinding); - out += Decompile(device, ir, ShaderType::Geometry, "geometry"); - - out += R"( -void main() { - execute_geometry(); -} -)"; - return out; -} - -std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { - std::string out = GetCommonDeclarations(); - out += fmt::format(R"( -layout (location = 0) out vec4 FragColor0; -layout (location = 1) out vec4 FragColor1; -layout (location = 2) out vec4 FragColor2; -layout (location = 3) out vec4 FragColor3; -layout (location = 4) out vec4 FragColor4; -layout (location = 5) out vec4 FragColor5; -layout (location = 6) out vec4 FragColor6; -layout (location = 7) out vec4 FragColor7; - -layout (std140, binding = {}) uniform fs_config {{ - float y_direction; -}}; - -)", - EmulationUniformBlockBinding); - out += Decompile(device, ir, ShaderType::Fragment, "fragment"); - - out += R"( -void main() { - execute_fragment(); -} -)"; - return out; -} - -std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) { - std::string out = GetCommonDeclarations(); - out += Decompile(device, ir, ShaderType::Compute, "compute"); - out += R"( -void main() { - execute_compute(); -} -)"; - return out; -} - -} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h deleted file mode 100644 index cba2be9f9..000000000 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <vector> - -#include "common/common_types.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/shader/shader_ir.h" - -namespace OpenGL { -class Device; -} - -namespace OpenGL::GLShader { - -using VideoCommon::Shader::ProgramCode; -using VideoCommon::Shader::ShaderIR; - -/// Generates the GLSL vertex shader program source code for the given VS program -std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b); - -/// Generates the GLSL geometry shader program source code for the given GS program -std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir); - -/// Generates the GLSL fragment shader program source code for the given FS program -std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir); - -/// Generates the GLSL compute shader program source code for the given CS program -std::string GenerateComputeShader(const Device& device, const ShaderIR& ir); - -} // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp index 75d3fac04..9c7b0adbd 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.cpp +++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp @@ -2,45 +2,52 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <glad/glad.h> + #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_opengl/gl_shader_manager.h" namespace OpenGL::GLShader { -using Tegra::Engines::Maxwell3D; - -ProgramManager::ProgramManager() { - pipeline.Create(); -} +ProgramManager::ProgramManager() = default; ProgramManager::~ProgramManager() = default; -void ProgramManager::ApplyTo(OpenGLState& state) { - UpdatePipeline(); - state.draw.shader_program = 0; - state.draw.program_pipeline = pipeline.handle; +void ProgramManager::Create() { + graphics_pipeline.Create(); + glBindProgramPipeline(graphics_pipeline.handle); } -void ProgramManager::UpdatePipeline() { +void ProgramManager::BindGraphicsPipeline() { + if (!is_graphics_bound) { + is_graphics_bound = true; + glUseProgram(0); + } + // Avoid updating the pipeline when values have no changed if (old_state == current_state) { return; } // Workaround for AMD bug - constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | - GL_FRAGMENT_SHADER_BIT}; - glUseProgramStages(pipeline.handle, all_used_stages, 0); - - glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); - glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); - glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); + static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT | + GL_FRAGMENT_SHADER_BIT}; + const GLuint handle = graphics_pipeline.handle; + glUseProgramStages(handle, all_used_stages, 0); + glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader); + glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader); + glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader); old_state = current_state; } -void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) { +void ProgramManager::BindComputeShader(GLuint program) { + is_graphics_bound = false; + glUseProgram(program); +} + +void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { const auto& regs = maxwell.regs; // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value. diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h index 478c165ce..d2e47f2a9 100644 --- a/src/video_core/renderer_opengl/gl_shader_manager.h +++ b/src/video_core/renderer_opengl/gl_shader_manager.h @@ -9,7 +9,6 @@ #include <glad/glad.h> #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/maxwell_to_gl.h" namespace OpenGL::GLShader { @@ -32,49 +31,47 @@ public: explicit ProgramManager(); ~ProgramManager(); - void ApplyTo(OpenGLState& state); + void Create(); - void UseProgrammableVertexShader(GLuint program) { + /// Updates the graphics pipeline and binds it. + void BindGraphicsPipeline(); + + /// Binds a compute shader. + void BindComputeShader(GLuint program); + + void UseVertexShader(GLuint program) { current_state.vertex_shader = program; } - void UseProgrammableGeometryShader(GLuint program) { + void UseGeometryShader(GLuint program) { current_state.geometry_shader = program; } - void UseProgrammableFragmentShader(GLuint program) { + void UseFragmentShader(GLuint program) { current_state.fragment_shader = program; } - void UseTrivialGeometryShader() { - current_state.geometry_shader = 0; - } - - void UseTrivialFragmentShader() { - current_state.fragment_shader = 0; - } - private: struct PipelineState { - bool operator==(const PipelineState& rhs) const { + bool operator==(const PipelineState& rhs) const noexcept { return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader && geometry_shader == rhs.geometry_shader; } - bool operator!=(const PipelineState& rhs) const { + bool operator!=(const PipelineState& rhs) const noexcept { return !operator==(rhs); } - GLuint vertex_shader{}; - GLuint fragment_shader{}; - GLuint geometry_shader{}; + GLuint vertex_shader = 0; + GLuint fragment_shader = 0; + GLuint geometry_shader = 0; }; - void UpdatePipeline(); - - OGLPipeline pipeline; + OGLPipeline graphics_pipeline; + OGLPipeline compute_pipeline; PipelineState current_state; PipelineState old_state; + bool is_graphics_bound = true; }; } // namespace OpenGL::GLShader diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp deleted file mode 100644 index ab1f7983c..000000000 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ /dev/null @@ -1,554 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include <algorithm> -#include <iterator> -#include <glad/glad.h> -#include "common/assert.h" -#include "common/logging/log.h" -#include "common/microprofile.h" -#include "video_core/renderer_opengl/gl_state.h" - -MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128)); - -namespace OpenGL { - -using Maxwell = Tegra::Engines::Maxwell3D::Regs; - -OpenGLState OpenGLState::cur_state; - -namespace { - -template <typename T> -bool UpdateValue(T& current_value, const T new_value) { - const bool changed = current_value != new_value; - current_value = new_value; - return changed; -} - -template <typename T1, typename T2> -bool UpdateTie(T1 current_value, const T2 new_value) { - const bool changed = current_value != new_value; - current_value = new_value; - return changed; -} - -template <typename T> -std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) { - std::optional<std::size_t> first; - std::size_t last; - for (std::size_t i = 0; i < std::size(current_values); ++i) { - if (!UpdateValue(current_values[i], new_values[i])) { - continue; - } - if (!first) { - first = i; - } - last = i; - } - if (!first) { - return std::nullopt; - } - return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1)); -} - -void Enable(GLenum cap, bool enable) { - if (enable) { - glEnable(cap); - } else { - glDisable(cap); - } -} - -void Enable(GLenum cap, GLuint index, bool enable) { - if (enable) { - glEnablei(cap, index); - } else { - glDisablei(cap, index); - } -} - -void Enable(GLenum cap, bool& current_value, bool new_value) { - if (UpdateValue(current_value, new_value)) { - Enable(cap, new_value); - } -} - -void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) { - if (UpdateValue(current_value, new_value)) { - Enable(cap, index, new_value); - } -} - -} // Anonymous namespace - -OpenGLState::OpenGLState() = default; - -void OpenGLState::SetDefaultViewports() { - viewports.fill(Viewport{}); - - depth_clamp.far_plane = false; - depth_clamp.near_plane = false; -} - -void OpenGLState::ApplyFramebufferState() { - if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) { - glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); - } - if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) { - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer); - } -} - -void OpenGLState::ApplyVertexArrayState() { - if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) { - glBindVertexArray(draw.vertex_array); - } -} - -void OpenGLState::ApplyShaderProgram() { - if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) { - glUseProgram(draw.shader_program); - } -} - -void OpenGLState::ApplyProgramPipeline() { - if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) { - glBindProgramPipeline(draw.program_pipeline); - } -} - -void OpenGLState::ApplyClipDistances() { - for (std::size_t i = 0; i < clip_distance.size(); ++i) { - Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i], - clip_distance[i]); - } -} - -void OpenGLState::ApplyPointSize() { - Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control); - Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite); - if (UpdateValue(cur_state.point.size, point.size)) { - glPointSize(point.size); - } -} - -void OpenGLState::ApplyFragmentColorClamp() { - if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) { - glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB, - fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE); - } -} - -void OpenGLState::ApplyMultisample() { - Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage, - multisample_control.alpha_to_coverage); - Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one, - multisample_control.alpha_to_one); -} - -void OpenGLState::ApplyDepthClamp() { - if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane && - depth_clamp.near_plane == cur_state.depth_clamp.near_plane) { - return; - } - cur_state.depth_clamp = depth_clamp; - - UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane, - "Unimplemented Depth Clamp Separation!"); - - Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane); -} - -void OpenGLState::ApplySRgb() { - if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled) - return; - cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled; - if (framebuffer_srgb.enabled) { - glEnable(GL_FRAMEBUFFER_SRGB); - } else { - glDisable(GL_FRAMEBUFFER_SRGB); - } -} - -void OpenGLState::ApplyCulling() { - Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled); - - if (UpdateValue(cur_state.cull.mode, cull.mode)) { - glCullFace(cull.mode); - } - - if (UpdateValue(cur_state.cull.front_face, cull.front_face)) { - glFrontFace(cull.front_face); - } -} - -void OpenGLState::ApplyRasterizerDiscard() { - Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard); -} - -void OpenGLState::ApplyColorMask() { - if (!dirty.color_mask) { - return; - } - dirty.color_mask = false; - - for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) { - const auto& updated = color_mask[i]; - auto& current = cur_state.color_mask[i]; - if (updated.red_enabled != current.red_enabled || - updated.green_enabled != current.green_enabled || - updated.blue_enabled != current.blue_enabled || - updated.alpha_enabled != current.alpha_enabled) { - current = updated; - glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled, - updated.blue_enabled, updated.alpha_enabled); - } - } -} - -void OpenGLState::ApplyDepth() { - Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled); - - if (cur_state.depth.test_func != depth.test_func) { - cur_state.depth.test_func = depth.test_func; - glDepthFunc(depth.test_func); - } - - if (cur_state.depth.write_mask != depth.write_mask) { - cur_state.depth.write_mask = depth.write_mask; - glDepthMask(depth.write_mask); - } -} - -void OpenGLState::ApplyPrimitiveRestart() { - Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled); - - if (cur_state.primitive_restart.index != primitive_restart.index) { - cur_state.primitive_restart.index = primitive_restart.index; - glPrimitiveRestartIndex(primitive_restart.index); - } -} - -void OpenGLState::ApplyStencilTest() { - if (!dirty.stencil_state) { - return; - } - dirty.stencil_state = false; - - Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled); - - const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) { - if (current.test_func != config.test_func || current.test_ref != config.test_ref || - current.test_mask != config.test_mask) { - current.test_func = config.test_func; - current.test_ref = config.test_ref; - current.test_mask = config.test_mask; - glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask); - } - if (current.action_depth_fail != config.action_depth_fail || - current.action_depth_pass != config.action_depth_pass || - current.action_stencil_fail != config.action_stencil_fail) { - current.action_depth_fail = config.action_depth_fail; - current.action_depth_pass = config.action_depth_pass; - current.action_stencil_fail = config.action_stencil_fail; - glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail, - config.action_depth_pass); - } - if (current.write_mask != config.write_mask) { - current.write_mask = config.write_mask; - glStencilMaskSeparate(face, config.write_mask); - } - }; - ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front); - ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back); -} - -void OpenGLState::ApplyViewport() { - for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) { - const auto& updated = viewports[i]; - auto& current = cur_state.viewports[i]; - - if (current.x != updated.x || current.y != updated.y || current.width != updated.width || - current.height != updated.height) { - current.x = updated.x; - current.y = updated.y; - current.width = updated.width; - current.height = updated.height; - glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y), - static_cast<GLfloat>(updated.width), - static_cast<GLfloat>(updated.height)); - } - if (current.depth_range_near != updated.depth_range_near || - current.depth_range_far != updated.depth_range_far) { - current.depth_range_near = updated.depth_range_near; - current.depth_range_far = updated.depth_range_far; - glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far); - } - - Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled); - - if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y || - current.scissor.width != updated.scissor.width || - current.scissor.height != updated.scissor.height) { - current.scissor.x = updated.scissor.x; - current.scissor.y = updated.scissor.y; - current.scissor.width = updated.scissor.width; - current.scissor.height = updated.scissor.height; - glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width, - updated.scissor.height); - } - } -} - -void OpenGLState::ApplyGlobalBlending() { - const Blend& updated = blend[0]; - Blend& current = cur_state.blend[0]; - - Enable(GL_BLEND, current.enabled, updated.enabled); - - if (current.src_rgb_func != updated.src_rgb_func || - current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func || - current.dst_a_func != updated.dst_a_func) { - current.src_rgb_func = updated.src_rgb_func; - current.dst_rgb_func = updated.dst_rgb_func; - current.src_a_func = updated.src_a_func; - current.dst_a_func = updated.dst_a_func; - glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, - updated.dst_a_func); - } - - if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) { - current.rgb_equation = updated.rgb_equation; - current.a_equation = updated.a_equation; - glBlendEquationSeparate(updated.rgb_equation, updated.a_equation); - } -} - -void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) { - const Blend& updated = blend[target]; - Blend& current = cur_state.blend[target]; - - if (current.enabled != updated.enabled || force) { - current.enabled = updated.enabled; - Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled); - } - - if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func, - current.dst_a_func), - std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func, - updated.dst_a_func))) { - glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func, - updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func); - } - - if (UpdateTie(std::tie(current.rgb_equation, current.a_equation), - std::tie(updated.rgb_equation, updated.a_equation))) { - glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation, - updated.a_equation); - } -} - -void OpenGLState::ApplyBlending() { - if (!dirty.blend_state) { - return; - } - dirty.blend_state = false; - - if (independant_blend.enabled) { - const bool force = independant_blend.enabled != cur_state.independant_blend.enabled; - for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) { - ApplyTargetBlending(target, force); - } - } else { - ApplyGlobalBlending(); - } - cur_state.independant_blend.enabled = independant_blend.enabled; - - if (UpdateTie( - std::tie(cur_state.blend_color.red, cur_state.blend_color.green, - cur_state.blend_color.blue, cur_state.blend_color.alpha), - std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) { - glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha); - } -} - -void OpenGLState::ApplyLogicOp() { - Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled); - - if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) { - glLogicOp(logic_op.operation); - } -} - -void OpenGLState::ApplyPolygonOffset() { - if (!dirty.polygon_offset) { - return; - } - dirty.polygon_offset = false; - - Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable, - polygon_offset.fill_enable); - Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable, - polygon_offset.line_enable); - Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable, - polygon_offset.point_enable); - - if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units, - cur_state.polygon_offset.clamp), - std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) { - if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) { - glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp); - } else { - UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0, - "Unimplemented Depth polygon offset clamp."); - glPolygonOffset(polygon_offset.factor, polygon_offset.units); - } - } -} - -void OpenGLState::ApplyAlphaTest() { - Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled); - if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref), - std::tie(alpha_test.func, alpha_test.ref))) { - glAlphaFunc(alpha_test.func, alpha_test.ref); - } -} - -void OpenGLState::ApplyClipControl() { - if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode), - std::tie(clip_control.origin, clip_control.depth_mode))) { - glClipControl(clip_control.origin, clip_control.depth_mode); - } -} - -void OpenGLState::ApplyTextures() { - const std::size_t size = std::size(textures); - for (std::size_t i = 0; i < size; ++i) { - if (UpdateValue(cur_state.textures[i], textures[i])) { - // BindTextureUnit doesn't support binding null textures, skip those binds. - // TODO(Rodrigo): Stop using null textures - if (textures[i] != 0) { - glBindTextureUnit(static_cast<GLuint>(i), textures[i]); - } - } - } -} - -void OpenGLState::ApplySamplers() { - const std::size_t size = std::size(samplers); - for (std::size_t i = 0; i < size; ++i) { - if (UpdateValue(cur_state.samplers[i], samplers[i])) { - glBindSampler(static_cast<GLuint>(i), samplers[i]); - } - } -} - -void OpenGLState::ApplyImages() { - if (const auto update = UpdateArray(cur_state.images, images)) { - glBindImageTextures(update->first, update->second, images.data() + update->first); - } -} - -void OpenGLState::Apply() { - MICROPROFILE_SCOPE(OpenGL_State); - ApplyFramebufferState(); - ApplyVertexArrayState(); - ApplyShaderProgram(); - ApplyProgramPipeline(); - ApplyClipDistances(); - ApplyPointSize(); - ApplyFragmentColorClamp(); - ApplyMultisample(); - ApplyRasterizerDiscard(); - ApplyColorMask(); - ApplyDepthClamp(); - ApplyViewport(); - ApplyStencilTest(); - ApplySRgb(); - ApplyCulling(); - ApplyDepth(); - ApplyPrimitiveRestart(); - ApplyBlending(); - ApplyLogicOp(); - ApplyTextures(); - ApplySamplers(); - ApplyImages(); - ApplyPolygonOffset(); - ApplyAlphaTest(); - ApplyClipControl(); -} - -void OpenGLState::EmulateViewportWithScissor() { - auto& current = viewports[0]; - if (current.scissor.enabled) { - const GLint left = std::max(current.x, current.scissor.x); - const GLint right = - std::max(current.x + current.width, current.scissor.x + current.scissor.width); - const GLint bottom = std::max(current.y, current.scissor.y); - const GLint top = - std::max(current.y + current.height, current.scissor.y + current.scissor.height); - current.scissor.x = std::max(left, 0); - current.scissor.y = std::max(bottom, 0); - current.scissor.width = std::max(right - left, 0); - current.scissor.height = std::max(top - bottom, 0); - } else { - current.scissor.enabled = true; - current.scissor.x = current.x; - current.scissor.y = current.y; - current.scissor.width = current.width; - current.scissor.height = current.height; - } -} - -OpenGLState& OpenGLState::UnbindTexture(GLuint handle) { - for (auto& texture : textures) { - if (texture == handle) { - texture = 0; - } - } - return *this; -} - -OpenGLState& OpenGLState::ResetSampler(GLuint handle) { - for (auto& sampler : samplers) { - if (sampler == handle) { - sampler = 0; - } - } - return *this; -} - -OpenGLState& OpenGLState::ResetProgram(GLuint handle) { - if (draw.shader_program == handle) { - draw.shader_program = 0; - } - return *this; -} - -OpenGLState& OpenGLState::ResetPipeline(GLuint handle) { - if (draw.program_pipeline == handle) { - draw.program_pipeline = 0; - } - return *this; -} - -OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) { - if (draw.vertex_array == handle) { - draw.vertex_array = 0; - } - return *this; -} - -OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) { - if (draw.read_framebuffer == handle) { - draw.read_framebuffer = 0; - } - if (draw.draw_framebuffer == handle) { - draw.draw_framebuffer = 0; - } - return *this; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h deleted file mode 100644 index 4953eeda2..000000000 --- a/src/video_core/renderer_opengl/gl_state.h +++ /dev/null @@ -1,247 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include <array> -#include <type_traits> -#include <glad/glad.h> -#include "video_core/engines/maxwell_3d.h" - -namespace OpenGL { - -class OpenGLState { -public: - struct { - bool enabled = false; // GL_FRAMEBUFFER_SRGB - } framebuffer_srgb; - - struct { - bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE - bool alpha_to_one = false; // GL_ALPHA_TO_ONE - } multisample_control; - - struct { - bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB - } fragment_color_clamp; - - struct { - bool far_plane = false; - bool near_plane = false; - } depth_clamp; // GL_DEPTH_CLAMP - - struct { - bool enabled = false; // GL_CULL_FACE - GLenum mode = GL_BACK; // GL_CULL_FACE_MODE - GLenum front_face = GL_CCW; // GL_FRONT_FACE - } cull; - - struct { - bool test_enabled = false; // GL_DEPTH_TEST - GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK - GLenum test_func = GL_LESS; // GL_DEPTH_FUNC - } depth; - - struct { - bool enabled = false; - GLuint index = 0; - } primitive_restart; // GL_PRIMITIVE_RESTART - - bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD - - struct ColorMask { - GLboolean red_enabled = GL_TRUE; - GLboolean green_enabled = GL_TRUE; - GLboolean blue_enabled = GL_TRUE; - GLboolean alpha_enabled = GL_TRUE; - }; - std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> - color_mask; // GL_COLOR_WRITEMASK - - struct { - bool test_enabled = false; // GL_STENCIL_TEST - struct { - GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC - GLint test_ref = 0; // GL_STENCIL_REF - GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK - GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK - GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL - GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL - GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS - } front, back; - } stencil; - - struct Blend { - bool enabled = false; // GL_BLEND - GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB - GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA - GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB - GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB - GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA - GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA - }; - std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend; - - struct { - bool enabled = false; - } independant_blend; - - struct { - GLclampf red = 0.0f; - GLclampf green = 0.0f; - GLclampf blue = 0.0f; - GLclampf alpha = 0.0f; - } blend_color; // GL_BLEND_COLOR - - struct { - bool enabled = false; // GL_LOGIC_OP_MODE - GLenum operation = GL_COPY; - } logic_op; - - static constexpr std::size_t NumSamplers = 32 * 5; - static constexpr std::size_t NumImages = 8 * 5; - std::array<GLuint, NumSamplers> textures = {}; - std::array<GLuint, NumSamplers> samplers = {}; - std::array<GLuint, NumImages> images = {}; - - struct { - GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING - GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING - GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING - GLuint shader_program = 0; // GL_CURRENT_PROGRAM - GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING - } draw; - - struct Viewport { - GLint x = 0; - GLint y = 0; - GLint width = 0; - GLint height = 0; - GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE - GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE - struct { - bool enabled = false; // GL_SCISSOR_TEST - GLint x = 0; - GLint y = 0; - GLsizei width = 0; - GLsizei height = 0; - } scissor; - }; - std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports; - - struct { - bool program_control = false; // GL_PROGRAM_POINT_SIZE - bool sprite = false; // GL_POINT_SPRITE - GLfloat size = 1.0f; // GL_POINT_SIZE - } point; - - struct { - bool point_enable = false; - bool line_enable = false; - bool fill_enable = false; - GLfloat units = 0.0f; - GLfloat factor = 0.0f; - GLfloat clamp = 0.0f; - } polygon_offset; - - struct { - bool enabled = false; // GL_ALPHA_TEST - GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC - GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF - } alpha_test; - - std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE - - struct { - GLenum origin = GL_LOWER_LEFT; - GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE; - } clip_control; - - OpenGLState(); - - /// Get the currently active OpenGL state - static OpenGLState GetCurState() { - return cur_state; - } - - void SetDefaultViewports(); - /// Apply this state as the current OpenGL state - void Apply(); - - void ApplyFramebufferState(); - void ApplyVertexArrayState(); - void ApplyShaderProgram(); - void ApplyProgramPipeline(); - void ApplyClipDistances(); - void ApplyPointSize(); - void ApplyFragmentColorClamp(); - void ApplyMultisample(); - void ApplySRgb(); - void ApplyCulling(); - void ApplyRasterizerDiscard(); - void ApplyColorMask(); - void ApplyDepth(); - void ApplyPrimitiveRestart(); - void ApplyStencilTest(); - void ApplyViewport(); - void ApplyTargetBlending(std::size_t target, bool force); - void ApplyGlobalBlending(); - void ApplyBlending(); - void ApplyLogicOp(); - void ApplyTextures(); - void ApplySamplers(); - void ApplyImages(); - void ApplyDepthClamp(); - void ApplyPolygonOffset(); - void ApplyAlphaTest(); - void ApplyClipControl(); - - /// Resets any references to the given resource - OpenGLState& UnbindTexture(GLuint handle); - OpenGLState& ResetSampler(GLuint handle); - OpenGLState& ResetProgram(GLuint handle); - OpenGLState& ResetPipeline(GLuint handle); - OpenGLState& ResetVertexArray(GLuint handle); - OpenGLState& ResetFramebuffer(GLuint handle); - - /// Viewport does not affects glClearBuffer so emulate viewport using scissor test - void EmulateViewportWithScissor(); - - void MarkDirtyBlendState() { - dirty.blend_state = true; - } - - void MarkDirtyStencilState() { - dirty.stencil_state = true; - } - - void MarkDirtyPolygonOffset() { - dirty.polygon_offset = true; - } - - void MarkDirtyColorMask() { - dirty.color_mask = true; - } - - void AllDirty() { - dirty.blend_state = true; - dirty.stencil_state = true; - dirty.polygon_offset = true; - dirty.color_mask = true; - } - -private: - static OpenGLState cur_state; - - struct { - bool blend_state; - bool stencil_state; - bool viewport_state; - bool polygon_offset; - bool color_mask; - } dirty{}; -}; -static_assert(std::is_trivially_copyable_v<OpenGLState>); - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp new file mode 100644 index 000000000..255ac3147 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp @@ -0,0 +1,247 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include <algorithm> +#include <array> +#include <cstddef> + +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/gpu.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" + +#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name) +#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32)) + +namespace OpenGL { + +namespace { + +using namespace Dirty; +using namespace VideoCommon::Dirty; +using Tegra::Engines::Maxwell3D; +using Regs = Maxwell3D::Regs; +using Tables = Maxwell3D::DirtyState::Tables; +using Table = Maxwell3D::DirtyState::Table; + +void SetupDirtyColorMasks(Tables& tables) { + tables[0][OFF(color_mask_common)] = ColorMaskCommon; + for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) { + const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]); + FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt); + } + + FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks); +} + +void SetupDirtyVertexArrays(Tables& tables) { + static constexpr std::size_t num_array = 3; + static constexpr std::size_t instance_base_offset = 3; + for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { + const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]); + const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]); + + FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers); + FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers); + + const std::size_t instance_array_offset = array_offset + instance_base_offset; + tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i); + tables[1][instance_array_offset] = VertexInstances; + + const std::size_t instance_offset = OFF(instanced_arrays) + i; + tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i); + tables[1][instance_offset] = VertexInstances; + } +} + +void SetupDirtyVertexFormat(Tables& tables) { + for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) { + const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]); + FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i); + } + + FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats); +} + +void SetupDirtyViewports(Tables& tables) { + for (std::size_t i = 0; i < Regs::NumViewports; ++i) { + const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]); + const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]); + + FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i); + FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i); + } + + FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports); + FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports); + + tables[0][OFF(viewport_transform_enabled)] = ViewportTransform; + tables[1][OFF(viewport_transform_enabled)] = Viewports; +} + +void SetupDirtyScissors(Tables& tables) { + for (std::size_t i = 0; i < Regs::NumViewports; ++i) { + const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]); + FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i); + } + FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors); +} + +void SetupDirtyShaders(Tables& tables) { + FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram, + Shaders); +} + +void SetupDirtyPolygonModes(Tables& tables) { + tables[0][OFF(polygon_mode_front)] = PolygonModeFront; + tables[0][OFF(polygon_mode_back)] = PolygonModeBack; + + tables[1][OFF(polygon_mode_front)] = PolygonModes; + tables[1][OFF(polygon_mode_back)] = PolygonModes; + tables[0][OFF(fill_rectangle)] = PolygonModes; +} + +void SetupDirtyDepthTest(Tables& tables) { + auto& table = tables[0]; + table[OFF(depth_test_enable)] = DepthTest; + table[OFF(depth_write_enabled)] = DepthMask; + table[OFF(depth_test_func)] = DepthTest; +} + +void SetupDirtyStencilTest(Tables& tables) { + static constexpr std::array offsets = { + OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref), + OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail), + OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable), + OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask), + OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass), + OFF(stencil_back_mask)}; + for (const auto offset : offsets) { + tables[0][offset] = StencilTest; + } +} + +void SetupDirtyAlphaTest(Tables& tables) { + auto& table = tables[0]; + table[OFF(alpha_test_ref)] = AlphaTest; + table[OFF(alpha_test_func)] = AlphaTest; + table[OFF(alpha_test_enabled)] = AlphaTest; +} + +void SetupDirtyBlend(Tables& tables) { + FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor); + + tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled; + + for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) { + const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]); + FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i); + + tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i); + } + FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates); + FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates); +} + +void SetupDirtyPrimitiveRestart(Tables& tables) { + FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart); +} + +void SetupDirtyPolygonOffset(Tables& tables) { + auto& table = tables[0]; + table[OFF(polygon_offset_fill_enable)] = PolygonOffset; + table[OFF(polygon_offset_line_enable)] = PolygonOffset; + table[OFF(polygon_offset_point_enable)] = PolygonOffset; + table[OFF(polygon_offset_factor)] = PolygonOffset; + table[OFF(polygon_offset_units)] = PolygonOffset; + table[OFF(polygon_offset_clamp)] = PolygonOffset; +} + +void SetupDirtyMultisampleControl(Tables& tables) { + FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl); +} + +void SetupDirtyRasterizeEnable(Tables& tables) { + tables[0][OFF(rasterize_enable)] = RasterizeEnable; +} + +void SetupDirtyFramebufferSRGB(Tables& tables) { + tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB; +} + +void SetupDirtyLogicOp(Tables& tables) { + FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp); +} + +void SetupDirtyFragmentClampColor(Tables& tables) { + tables[0][OFF(frag_color_clamp)] = FragmentClampColor; +} + +void SetupDirtyPointSize(Tables& tables) { + tables[0][OFF(vp_point_size)] = PointSize; + tables[0][OFF(point_size)] = PointSize; + tables[0][OFF(point_sprite_enable)] = PointSize; +} + +void SetupDirtyClipControl(Tables& tables) { + auto& table = tables[0]; + table[OFF(screen_y_control)] = ClipControl; + table[OFF(depth_mode)] = ClipControl; +} + +void SetupDirtyDepthClampEnabled(Tables& tables) { + tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled; +} + +void SetupDirtyMisc(Tables& tables) { + auto& table = tables[0]; + + table[OFF(clip_distance_enabled)] = ClipDistances; + + table[OFF(front_face)] = FrontFace; + + table[OFF(cull_test_enabled)] = CullTest; + table[OFF(cull_face)] = CullTest; +} + +} // Anonymous namespace + +StateTracker::StateTracker(Core::System& system) : system{system} {} + +void StateTracker::Initialize() { + auto& dirty = system.GPU().Maxwell3D().dirty; + auto& tables = dirty.tables; + SetupDirtyRenderTargets(tables); + SetupDirtyColorMasks(tables); + SetupDirtyViewports(tables); + SetupDirtyScissors(tables); + SetupDirtyVertexArrays(tables); + SetupDirtyVertexFormat(tables); + SetupDirtyShaders(tables); + SetupDirtyPolygonModes(tables); + SetupDirtyDepthTest(tables); + SetupDirtyStencilTest(tables); + SetupDirtyAlphaTest(tables); + SetupDirtyBlend(tables); + SetupDirtyPrimitiveRestart(tables); + SetupDirtyPolygonOffset(tables); + SetupDirtyMultisampleControl(tables); + SetupDirtyRasterizeEnable(tables); + SetupDirtyFramebufferSRGB(tables); + SetupDirtyLogicOp(tables); + SetupDirtyFragmentClampColor(tables); + SetupDirtyPointSize(tables); + SetupDirtyClipControl(tables); + SetupDirtyDepthClampEnabled(tables); + SetupDirtyMisc(tables); + + auto& store = dirty.on_write_stores; + store[VertexBuffers] = true; + for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) { + store[VertexBuffer0 + i] = true; + } +} + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h new file mode 100644 index 000000000..b882d75c3 --- /dev/null +++ b/src/video_core/renderer_opengl/gl_state_tracker.h @@ -0,0 +1,215 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include <limits> + +#include <glad/glad.h> + +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/dirty_flags.h" +#include "video_core/engines/maxwell_3d.h" + +namespace Core { +class System; +} + +namespace OpenGL { + +namespace Dirty { + +enum : u8 { + First = VideoCommon::Dirty::LastCommonEntry, + + VertexFormats, + VertexFormat0, + VertexFormat31 = VertexFormat0 + 31, + + VertexBuffers, + VertexBuffer0, + VertexBuffer31 = VertexBuffer0 + 31, + + VertexInstances, + VertexInstance0, + VertexInstance31 = VertexInstance0 + 31, + + ViewportTransform, + Viewports, + Viewport0, + Viewport15 = Viewport0 + 15, + + Scissors, + Scissor0, + Scissor15 = Scissor0 + 15, + + ColorMaskCommon, + ColorMasks, + ColorMask0, + ColorMask7 = ColorMask0 + 7, + + BlendColor, + BlendIndependentEnabled, + BlendStates, + BlendState0, + BlendState7 = BlendState0 + 7, + + Shaders, + ClipDistances, + + PolygonModes, + PolygonModeFront, + PolygonModeBack, + + ColorMask, + FrontFace, + CullTest, + DepthMask, + DepthTest, + StencilTest, + AlphaTest, + PrimitiveRestart, + PolygonOffset, + MultisampleControl, + RasterizeEnable, + FramebufferSRGB, + LogicOp, + FragmentClampColor, + PointSize, + ClipControl, + DepthClampEnabled, + + Last +}; +static_assert(Last <= std::numeric_limits<u8>::max()); + +} // namespace Dirty + +class StateTracker { +public: + explicit StateTracker(Core::System& system); + + void Initialize(); + + void BindIndexBuffer(GLuint new_index_buffer) { + if (index_buffer == new_index_buffer) { + return; + } + index_buffer = new_index_buffer; + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer); + } + + void NotifyScreenDrawVertexArray() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::VertexFormats] = true; + flags[OpenGL::Dirty::VertexFormat0 + 0] = true; + flags[OpenGL::Dirty::VertexFormat0 + 1] = true; + + flags[OpenGL::Dirty::VertexBuffers] = true; + flags[OpenGL::Dirty::VertexBuffer0] = true; + + flags[OpenGL::Dirty::VertexInstances] = true; + flags[OpenGL::Dirty::VertexInstance0 + 0] = true; + flags[OpenGL::Dirty::VertexInstance0 + 1] = true; + } + + void NotifyPolygonModes() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::PolygonModes] = true; + flags[OpenGL::Dirty::PolygonModeFront] = true; + flags[OpenGL::Dirty::PolygonModeBack] = true; + } + + void NotifyViewport0() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::Viewports] = true; + flags[OpenGL::Dirty::Viewport0] = true; + } + + void NotifyScissor0() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::Scissors] = true; + flags[OpenGL::Dirty::Scissor0] = true; + } + + void NotifyColorMask0() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::ColorMasks] = true; + flags[OpenGL::Dirty::ColorMask0] = true; + } + + void NotifyBlend0() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::BlendStates] = true; + flags[OpenGL::Dirty::BlendState0] = true; + } + + void NotifyFramebuffer() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[VideoCommon::Dirty::RenderTargets] = true; + } + + void NotifyFrontFace() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::FrontFace] = true; + } + + void NotifyCullTest() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::CullTest] = true; + } + + void NotifyDepthMask() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::DepthMask] = true; + } + + void NotifyDepthTest() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::DepthTest] = true; + } + + void NotifyStencilTest() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::StencilTest] = true; + } + + void NotifyPolygonOffset() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::PolygonOffset] = true; + } + + void NotifyRasterizeEnable() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::RasterizeEnable] = true; + } + + void NotifyFramebufferSRGB() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::FramebufferSRGB] = true; + } + + void NotifyLogicOp() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::LogicOp] = true; + } + + void NotifyClipControl() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::ClipControl] = true; + } + + void NotifyAlphaTest() { + auto& flags = system.GPU().Maxwell3D().dirty.flags; + flags[OpenGL::Dirty::AlphaTest] = true; + } + +private: + Core::System& system; + + GLuint index_buffer = 0; +}; + +} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp index 35ba334e4..6ec328c53 100644 --- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp +++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp @@ -7,7 +7,6 @@ #include "common/alignment.h" #include "common/assert.h" #include "common/microprofile.h" -#include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning", diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 5c1ae1418..f424e3000 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -10,7 +10,7 @@ #include "core/core.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/utils.h" #include "video_core/texture_cache/surface_base.h" @@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI @@ -87,6 +88,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI + {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5 {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4 @@ -396,6 +398,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p const bool is_proxy) : VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} { target = GetTextureTarget(params.target); + format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format; if (!is_proxy) { texture_view = CreateTextureView(); } @@ -405,24 +408,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p CachedSurfaceView::~CachedSurfaceView() = default; void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const { - ASSERT(params.num_layers == 1 && params.num_levels == 1); + ASSERT(params.num_levels == 1); - const auto& owner_params = surface.GetSurfaceParams(); + const GLuint texture = surface.GetTexture(); + if (params.num_layers > 1) { + // Layered framebuffer attachments + UNIMPLEMENTED_IF(params.base_layer != 0); - switch (owner_params.target) { + switch (params.target) { + case SurfaceTarget::Texture2DArray: + glFramebufferTexture(target, attachment, texture, params.base_level); + break; + default: + UNIMPLEMENTED(); + } + return; + } + + const GLenum view_target = surface.GetTarget(); + switch (surface.GetSurfaceParams().target) { case SurfaceTarget::Texture1D: - glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(), - params.base_level); + glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture2D: - glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(), - params.base_level); + glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level); break; case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture2DArray: case SurfaceTarget::TextureCubemap: case SurfaceTarget::TextureCubeArray: - glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level, + glFramebufferTextureLayer(target, attachment, texture, params.base_level, params.base_layer); break; default: @@ -454,25 +469,20 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou } OGLTextureView CachedSurfaceView::CreateTextureView() const { - const auto& owner_params = surface.GetSurfaceParams(); OGLTextureView texture_view; texture_view.Create(); - const GLuint handle{texture_view.handle}; - const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)}; - - glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level, + glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level, params.num_levels, params.base_layer, params.num_layers); - - ApplyTextureDefaults(owner_params, handle); + ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle); return texture_view; } TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const Device& device) - : TextureCacheBase{system, rasterizer} { + const Device& device, StateTracker& state_tracker) + : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} { src_framebuffer.Create(); dst_framebuffer.Create(); } @@ -506,25 +516,26 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view, const Tegra::Engines::Fermi2D::Config& copy_config) { const auto& src_params{src_view->GetSurfaceParams()}; const auto& dst_params{dst_view->GetSurfaceParams()}; + UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); + UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); - OpenGLState prev_state{OpenGLState::GetCurState()}; - SCOPE_EXIT({ - prev_state.AllDirty(); - prev_state.Apply(); - }); - - OpenGLState state; - state.draw.read_framebuffer = src_framebuffer.handle; - state.draw.draw_framebuffer = dst_framebuffer.handle; - state.framebuffer_srgb.enabled = dst_params.srgb_conversion; - state.AllDirty(); - state.Apply(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); - u32 buffers{}; + if (dst_params.srgb_conversion) { + glEnable(GL_FRAMEBUFFER_SRGB); + } else { + glDisable(GL_FRAMEBUFFER_SRGB); + } + glDisable(GL_RASTERIZER_DISCARD); + glDisablei(GL_SCISSOR_TEST, 0); - UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D); - UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D); + glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle); + GLenum buffers = 0; if (src_params.type == SurfaceType::ColorTexture) { src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER); glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 8e13ab38b..6658c6ffd 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -27,6 +27,7 @@ using VideoCommon::ViewParams; class CachedSurfaceView; class CachedSurface; class TextureCacheOpenGL; +class StateTracker; using Surface = std::shared_ptr<CachedSurface>; using View = std::shared_ptr<CachedSurfaceView>; @@ -96,6 +97,10 @@ public: return texture_view.handle; } + GLenum GetFormat() const { + return format; + } + const SurfaceParams& GetSurfaceParams() const { return surface.GetSurfaceParams(); } @@ -113,6 +118,7 @@ private: CachedSurface& surface; GLenum target{}; + GLenum format{}; OGLTextureView texture_view; u32 swizzle{}; @@ -122,7 +128,7 @@ private: class TextureCacheOpenGL final : public TextureCacheBase { public: explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer, - const Device& device); + const Device& device, StateTracker& state_tracker); ~TextureCacheOpenGL(); protected: @@ -139,6 +145,8 @@ protected: private: GLuint FetchPBO(std::size_t buffer_size); + StateTracker& state_tracker; + OGLFramebuffer src_framebuffer; OGLFramebuffer dst_framebuffer; std::unordered_map<u32, OGLBuffer> copy_pbo_cache; diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h index 7ed505628..89f0e04ef 100644 --- a/src/video_core/renderer_opengl/maxwell_to_gl.h +++ b/src/video_core/renderer_opengl/maxwell_to_gl.h @@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) { } case Maxwell::VertexAttribute::Type::UnsignedScaled: switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: return GL_UNSIGNED_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: + case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return GL_UNSIGNED_SHORT; + default: + LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); + return {}; + } + case Maxwell::VertexAttribute::Type::SignedScaled: + switch (attrib.size) { + case Maxwell::VertexAttribute::Size::Size_8: + case Maxwell::VertexAttribute::Size::Size_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8: + case Maxwell::VertexAttribute::Size::Size_8_8_8_8: + return GL_BYTE; + case Maxwell::VertexAttribute::Size::Size_16: + case Maxwell::VertexAttribute::Size::Size_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16: + case Maxwell::VertexAttribute::Size::Size_16_16_16_16: + return GL_SHORT; default: LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString()); return {}; @@ -401,24 +425,24 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) { return GL_KEEP; } -inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) { +inline GLenum FrontFace(Maxwell::FrontFace front_face) { switch (front_face) { - case Maxwell::Cull::FrontFace::ClockWise: + case Maxwell::FrontFace::ClockWise: return GL_CW; - case Maxwell::Cull::FrontFace::CounterClockWise: + case Maxwell::FrontFace::CounterClockWise: return GL_CCW; } LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face)); return GL_CCW; } -inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) { +inline GLenum CullFace(Maxwell::CullFace cull_face) { switch (cull_face) { - case Maxwell::Cull::CullFace::Front: + case Maxwell::CullFace::Front: return GL_FRONT; - case Maxwell::Cull::CullFace::Back: + case Maxwell::CullFace::Back: return GL_BACK; - case Maxwell::Cull::CullFace::FrontAndBack: + case Maxwell::CullFace::FrontAndBack: return GL_FRONT_AND_BACK; } LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face)); @@ -464,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) { return GL_COPY; } +inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) { + switch (polygon_mode) { + case Maxwell::PolygonMode::Point: + return GL_POINT; + case Maxwell::PolygonMode::Line: + return GL_LINE; + case Maxwell::PolygonMode::Fill: + return GL_FILL; + } + UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode)); + return GL_FILL; +} + } // namespace MaxwellToGL } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index bba16afaf..fca5e3ec0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,30 +5,54 @@ #include <algorithm> #include <cstddef> #include <cstdlib> +#include <cstring> #include <memory> + #include <glad/glad.h> + #include "common/assert.h" #include "common/logging/log.h" +#include "common/microprofile.h" #include "common/telemetry.h" #include "core/core.h" #include "core/core_timing.h" #include "core/frontend/emu_window.h" -#include "core/frontend/scope_acquire_window_context.h" #include "core/memory.h" #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/renderer_opengl.h" namespace OpenGL { namespace { -constexpr char vertex_shader[] = R"( +// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have +// to wait on available presentation frames. +constexpr std::size_t SWAP_CHAIN_SIZE = 3; + +struct Frame { + u32 width{}; /// Width of the frame (to detect resize) + u32 height{}; /// Height of the frame + bool color_reloaded{}; /// Texture attachment was recreated (ie: resized) + OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO + OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread + OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread + GLsync render_fence{}; /// Fence created on the render thread + GLsync present_fence{}; /// Fence created on the presentation thread + bool is_srgb{}; /// Framebuffer is sRGB or RGB +}; + +constexpr char VERTEX_SHADER[] = R"( #version 430 core +out gl_PerVertex { + vec4 gl_Position; +}; + layout (location = 0) in vec2 vert_position; layout (location = 1) in vec2 vert_tex_coord; layout (location = 0) out vec2 frag_tex_coord; @@ -49,7 +73,7 @@ void main() { } )"; -constexpr char fragment_shader[] = R"( +constexpr char FRAGMENT_SHADER[] = R"( #version 430 core layout (location = 0) in vec2 frag_tex_coord; @@ -58,7 +82,7 @@ layout (location = 0) out vec4 color; layout (binding = 0) uniform sampler2D color_texture; void main() { - color = texture(color_texture, frag_tex_coord); + color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f); } )"; @@ -67,13 +91,31 @@ constexpr GLint TexCoordLocation = 1; constexpr GLint ModelViewMatrixLocation = 0; struct ScreenRectVertex { - constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v) - : position{{x, y}}, tex_coord{{u, v}} {} + constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v) + : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {} std::array<GLfloat, 2> position; std::array<GLfloat, 2> tex_coord; }; +/// Returns true if any debug tool is attached +bool HasDebugTool() { + const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + if (nsight) { + return true; + } + + GLint num_extensions; + glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); + for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) { + const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index)); + if (!std::strcmp(name, "GL_EXT_debug_tool")) { + return true; + } + } + return false; +} + /** * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left * corner and (width, height) on the lower-bottom. @@ -157,22 +199,229 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit } // Anonymous namespace +/** + * For smooth Vsync rendering, we want to always present the latest frame that the core generates, + * but also make sure that rendering happens at the pace that the frontend dictates. This is a + * helper class that the renderer uses to sync frames between the render thread and the presentation + * thread + */ +class FrameMailbox { +public: + std::mutex swap_chain_lock; + std::condition_variable present_cv; + std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; + std::queue<Frame*> free_queue; + std::deque<Frame*> present_queue; + Frame* previous_frame{}; + + FrameMailbox() : has_debug_tool{HasDebugTool()} { + for (auto& frame : swap_chain) { + free_queue.push(&frame); + } + } + + ~FrameMailbox() { + // lock the mutex and clear out the present and free_queues and notify any people who are + // blocked to prevent deadlock on shutdown + std::scoped_lock lock{swap_chain_lock}; + std::queue<Frame*>().swap(free_queue); + present_queue.clear(); + present_cv.notify_all(); + } + + void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { + frame->present.Release(); + frame->present.Create(); + GLint previous_draw_fbo{}; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); + } + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); + frame->color_reloaded = false; + } + + void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { + // Recreate the color texture attachment + frame->color.Release(); + frame->color.Create(); + const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; + glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); + + // Recreate the FBO for the render target + frame->render.Release(); + frame->render.Create(); + glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); + } + + frame->width = width; + frame->height = height; + frame->color_reloaded = true; + } + + Frame* GetRenderFrame() { + std::unique_lock lock{swap_chain_lock}; + + // If theres no free frames, we will reuse the oldest render frame + if (free_queue.empty()) { + auto frame = present_queue.back(); + present_queue.pop_back(); + return frame; + } + + Frame* frame = free_queue.front(); + free_queue.pop(); + return frame; + } + + void ReleaseRenderFrame(Frame* frame) { + std::unique_lock lock{swap_chain_lock}; + present_queue.push_front(frame); + present_cv.notify_one(); + + DebugNotifyNextFrame(); + } + + Frame* TryGetPresentFrame(int timeout_ms) { + DebugWaitForNextFrame(); + + std::unique_lock lock{swap_chain_lock}; + // wait for new entries in the present_queue + present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [&] { return !present_queue.empty(); }); + if (present_queue.empty()) { + // timed out waiting for a frame to draw so return the previous frame + return previous_frame; + } + + // free the previous frame and add it back to the free queue + if (previous_frame) { + free_queue.push(previous_frame); + } + + // the newest entries are pushed to the front of the queue + Frame* frame = present_queue.front(); + present_queue.pop_front(); + // remove all old entries from the present queue and move them back to the free_queue + for (auto f : present_queue) { + free_queue.push(f); + } + present_queue.clear(); + previous_frame = frame; + return frame; + } + +private: + std::mutex debug_synch_mutex; + std::condition_variable debug_synch_condition; + std::atomic_int frame_for_debug{}; + const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step + + /// Signal that a new frame is available (called from GPU thread) + void DebugNotifyNextFrame() { + if (!has_debug_tool) { + return; + } + frame_for_debug++; + std::lock_guard lock{debug_synch_mutex}; + debug_synch_condition.notify_one(); + } + + /// Wait for a new frame to be available (called from presentation thread) + void DebugWaitForNextFrame() { + if (!has_debug_tool) { + return; + } + const int last_frame = frame_for_debug; + std::unique_lock lock{debug_synch_mutex}; + debug_synch_condition.wait(lock, + [this, last_frame] { return frame_for_debug > last_frame; }); + } +}; + RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) - : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {} + : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, + frame_mailbox{std::make_unique<FrameMailbox>()} {} RendererOpenGL::~RendererOpenGL() = default; +MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64)); +MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128)); + void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { - // Maintain the rasterizer's state as a priority - OpenGLState prev_state = OpenGLState::GetCurState(); - state.AllDirty(); - state.Apply(); + render_window.PollEvents(); + + if (!framebuffer) { + return; + } + + PrepareRendertarget(framebuffer); + RenderScreenshot(); + + Frame* frame; + { + MICROPROFILE_SCOPE(OpenGL_WaitPresent); + + frame = frame_mailbox->GetRenderFrame(); + + // Clean up sync objects before drawing + + // INTEL driver workaround. We can't delete the previous render sync object until we are + // sure that the presentation is done + if (frame->present_fence) { + glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); + } + + // delete the draw fence if the frame wasn't presented + if (frame->render_fence) { + glDeleteSync(frame->render_fence); + frame->render_fence = 0; + } + + // wait for the presentation to be done + if (frame->present_fence) { + glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED); + glDeleteSync(frame->present_fence); + frame->present_fence = 0; + } + } + { + MICROPROFILE_SCOPE(OpenGL_RenderFrame); + const auto& layout = render_window.GetFramebufferLayout(); + + // Recreate the frame if the size of the window has changed + if (layout.width != frame->width || layout.height != frame->height || + screen_info.display_srgb != frame->is_srgb) { + LOG_DEBUG(Render_OpenGL, "Reloading render frame"); + frame->is_srgb = screen_info.display_srgb; + frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height); + } + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle); + DrawScreen(layout); + // Create a fence for the frontend to wait on and swap this frame to OffTex + frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + frame_mailbox->ReleaseRenderFrame(frame); + m_current_frame++; + rasterizer->TickFrame(); + } +} + +void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) { if (framebuffer) { // If framebuffer is provided, reload it from memory to a texture if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) || screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) || - screen_info.texture.pixel_format != framebuffer->pixel_format) { + screen_info.texture.pixel_format != framebuffer->pixel_format || + gl_framebuffer_data.empty()) { // Reallocate texture if the framebuffer size has changed. // This is expected to not happen very often and hence should not be a // performance problem. @@ -181,22 +430,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { // Load the framebuffer from memory, draw it to the screen, and swap buffers LoadFBToScreenInfo(*framebuffer); - - if (renderer_settings.screenshot_requested) - CaptureScreenshot(); - - DrawScreen(render_window.GetFramebufferLayout()); - - rasterizer->TickFrame(); - - render_window.SwapBuffers(); } - - render_window.PollEvents(); - - // Restore the rasterizer state - prev_state.AllDirty(); - prev_state.Apply(); } void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) { @@ -249,31 +483,24 @@ void RendererOpenGL::InitOpenGLObjects() { glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, 0.0f); - // Link shaders and get variable locations - shader.CreateFromSource(vertex_shader, nullptr, fragment_shader); - state.draw.shader_program = shader.handle; - state.AllDirty(); - state.Apply(); + // Create shader programs + OGLShader vertex_shader; + vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER); + + OGLShader fragment_shader; + fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER); + + vertex_program.Create(true, false, vertex_shader.handle); + fragment_program.Create(true, false, fragment_shader.handle); + + // Create program pipeline + program_manager.Create(); // Generate VBO handle for drawing vertex_buffer.Create(); - // Generate VAO - vertex_array.Create(); - state.draw.vertex_array = vertex_array.handle; - // Attach vertex data to VAO glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW); - glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, position)); - glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE, - offsetof(ScreenRectVertex, tex_coord)); - glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0); - glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0); - glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation); - glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation); - glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0, - sizeof(ScreenRectVertex)); // Allocate textures for the screen screen_info.texture.resource.Create(GL_TEXTURE_2D); @@ -306,7 +533,8 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info); + rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info, + program_manager, state_tracker); } void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, @@ -345,8 +573,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture, glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height); } -void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, - float h) { +void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { + if (renderer_settings.set_background_color) { + // Update background color before drawing + glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, + 0.0f); + } + + // Set projection matrix + const std::array ortho_matrix = + MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); + glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, + std::data(ortho_matrix)); + const auto& texcoords = screen_info.display_texcoords; auto left = texcoords.left; auto right = texcoords.right; @@ -378,60 +617,129 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, static_cast<f32>(screen_info.texture.height); } + const auto& screen = layout.screen; const std::array vertices = { - ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v), - ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v), - ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v), - ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v), + ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v), + ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v), + ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v), + ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v), }; - - state.textures[0] = screen_info.display_texture; - state.framebuffer_srgb.enabled = screen_info.display_srgb; - state.AllDirty(); - state.Apply(); glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices)); - glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); - // Restore default state - state.framebuffer_srgb.enabled = false; - state.textures[0] = 0; - state.AllDirty(); - state.Apply(); -} -void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { - if (renderer_settings.set_background_color) { - // Update background color before drawing - glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue, - 0.0f); + // TODO: Signal state tracker about these changes + state_tracker.NotifyScreenDrawVertexArray(); + state_tracker.NotifyPolygonModes(); + state_tracker.NotifyViewport0(); + state_tracker.NotifyScissor0(); + state_tracker.NotifyColorMask0(); + state_tracker.NotifyBlend0(); + state_tracker.NotifyFramebuffer(); + state_tracker.NotifyFrontFace(); + state_tracker.NotifyCullTest(); + state_tracker.NotifyDepthTest(); + state_tracker.NotifyStencilTest(); + state_tracker.NotifyPolygonOffset(); + state_tracker.NotifyRasterizeEnable(); + state_tracker.NotifyFramebufferSRGB(); + state_tracker.NotifyLogicOp(); + state_tracker.NotifyClipControl(); + state_tracker.NotifyAlphaTest(); + + program_manager.UseVertexShader(vertex_program.handle); + program_manager.UseGeometryShader(0); + program_manager.UseFragmentShader(fragment_program.handle); + program_manager.BindGraphicsPipeline(); + + glEnable(GL_CULL_FACE); + if (screen_info.display_srgb) { + glEnable(GL_FRAMEBUFFER_SRGB); + } else { + glDisable(GL_FRAMEBUFFER_SRGB); } + glDisable(GL_COLOR_LOGIC_OP); + glDisable(GL_DEPTH_TEST); + glDisable(GL_STENCIL_TEST); + glDisable(GL_POLYGON_OFFSET_FILL); + glDisable(GL_RASTERIZER_DISCARD); + glDisable(GL_ALPHA_TEST); + glDisablei(GL_BLEND, 0); + glDisablei(GL_SCISSOR_TEST, 0); + glPolygonMode(GL_FRONT_AND_BACK, GL_FILL); + glCullFace(GL_BACK); + glFrontFace(GL_CW); + glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); + glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width), + static_cast<GLfloat>(layout.height)); + glDepthRangeIndexed(0, 0.0, 0.0); + + glEnableVertexAttribArray(PositionLocation); + glEnableVertexAttribArray(TexCoordLocation); + glVertexAttribDivisor(PositionLocation, 0); + glVertexAttribDivisor(TexCoordLocation, 0); + glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, position)); + glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE, + offsetof(ScreenRectVertex, tex_coord)); + glVertexAttribBinding(PositionLocation, 0); + glVertexAttribBinding(TexCoordLocation, 0); + glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex)); + + glBindTextureUnit(0, screen_info.display_texture); + glBindSampler(0, 0); - const auto& screen = layout.screen; - - glViewport(0, 0, layout.width, layout.height); glClear(GL_COLOR_BUFFER_BIT); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); +} - // Set projection matrix - const std::array ortho_matrix = - MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); - glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); +void RendererOpenGL::TryPresent(int timeout_ms) { + const auto& layout = render_window.GetFramebufferLayout(); + auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms); + if (!frame) { + LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present"); + return; + } - DrawScreenTriangles(screen_info, static_cast<float>(screen.left), - static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()), - static_cast<float>(screen.GetHeight())); + // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a + // readback since we won't be doing any blending + glClear(GL_COLOR_BUFFER_BIT); - m_current_frame++; + // Recreate the presentation FBO if the color attachment was changed + if (frame->color_reloaded) { + LOG_DEBUG(Render_OpenGL, "Reloading present frame"); + frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height); + } + glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED); + // INTEL workaround. + // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete + // it on the emulation thread without too much penalty + // glDeleteSync(frame.render_sync); + // frame.render_sync = 0; + + glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle); + glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height, + GL_COLOR_BUFFER_BIT, GL_LINEAR); + + // Insert fence for the main thread to block on + frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + glFlush(); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, 0); } -void RendererOpenGL::UpdateFramerate() {} +void RendererOpenGL::RenderScreenshot() { + if (!renderer_settings.screenshot_requested) { + return; + } + + GLint old_read_fb; + GLint old_draw_fb; + glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb); + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb); -void RendererOpenGL::CaptureScreenshot() { // Draw the current frame to the screenshot framebuffer screenshot_framebuffer.Create(); - GLuint old_read_fb = state.draw.read_framebuffer; - GLuint old_draw_fb = state.draw.draw_framebuffer; - state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle; - state.AllDirty(); - state.Apply(); + glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle); Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout}; @@ -448,19 +756,16 @@ void RendererOpenGL::CaptureScreenshot() { renderer_settings.screenshot_bits); screenshot_framebuffer.Release(); - state.draw.read_framebuffer = old_read_fb; - state.draw.draw_framebuffer = old_draw_fb; - state.AllDirty(); - state.Apply(); glDeleteRenderbuffers(1, &renderbuffer); + glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb); + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb); + renderer_settings.screenshot_complete_callback(); renderer_settings.screenshot_requested = false; } bool RendererOpenGL::Init() { - Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window}; - if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(DebugHandler, nullptr); diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h index b56328a7f..33073ce5b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.h +++ b/src/video_core/renderer_opengl/renderer_opengl.h @@ -10,7 +10,8 @@ #include "common/math_util.h" #include "video_core/renderer_base.h" #include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_state.h" +#include "video_core/renderer_opengl/gl_shader_manager.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" namespace Core { class System; @@ -44,19 +45,23 @@ struct ScreenInfo { TextureInfo texture; }; +struct PresentationTexture { + u32 width = 0; + u32 height = 0; + OGLTexture texture; +}; + +class FrameMailbox; + class RendererOpenGL final : public VideoCore::RendererBase { public: explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system); ~RendererOpenGL() override; - /// Swap buffers (render frame) - void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; - - /// Initialize the renderer bool Init() override; - - /// Shutdown the renderer void ShutDown() override; + void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + void TryPresent(int timeout_ms) override; private: /// Initializes the OpenGL state and creates persistent objects. @@ -72,12 +77,7 @@ private: /// Draws the emulated screens to the emulator window. void DrawScreen(const Layout::FramebufferLayout& layout); - void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h); - - /// Updates the framerate. - void UpdateFramerate(); - - void CaptureScreenshot(); + void RenderScreenshot(); /// Loads framebuffer from emulated memory into the active OpenGL texture. void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer); @@ -87,26 +87,34 @@ private: void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a, const TextureInfo& texture); + void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer); + Core::Frontend::EmuWindow& emu_window; Core::System& system; - OpenGLState state; + StateTracker state_tracker{system}; // OpenGL object IDs - OGLVertexArray vertex_array; OGLBuffer vertex_buffer; - OGLProgram shader; + OGLProgram vertex_program; + OGLProgram fragment_program; OGLFramebuffer screenshot_framebuffer; /// Display information for Switch screen ScreenInfo screen_info; + /// Global dummy shader pipeline + GLShader::ProgramManager program_manager; + /// OpenGL framebuffer data std::vector<u8> gl_framebuffer_data; /// Used for transforming the framebuffer orientation Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags; Common::Rectangle<int> framebuffer_crop_rect; + + /// Frame presentation mailbox + std::unique_ptr<FrameMailbox> frame_mailbox; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index ac99e6385..b751086fa 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -9,6 +9,7 @@ #include <glad/glad.h> #include "common/common_types.h" +#include "video_core/renderer_opengl/gl_state_tracker.h" #include "video_core/renderer_opengl/utils.h" namespace OpenGL { @@ -20,12 +21,12 @@ struct VertexArrayPushBuffer::Entry { GLsizei stride{}; }; -VertexArrayPushBuffer::VertexArrayPushBuffer() = default; +VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker) + : state_tracker{state_tracker} {} VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; -void VertexArrayPushBuffer::Setup(GLuint vao_) { - vao = vao_; +void VertexArrayPushBuffer::Setup() { index_buffer = nullptr; vertex_buffers.clear(); } @@ -41,13 +42,11 @@ void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* void VertexArrayPushBuffer::Bind() { if (index_buffer) { - glVertexArrayElementBuffer(vao, *index_buffer); + state_tracker.BindIndexBuffer(*index_buffer); } - // TODO(Rodrigo): Find a way to ARB_multi_bind this for (const auto& entry : vertex_buffers) { - glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, - entry.stride); + glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride); } } diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 3ad7c02d4..47ee3177b 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -11,12 +11,14 @@ namespace OpenGL { +class StateTracker; + class VertexArrayPushBuffer final { public: - explicit VertexArrayPushBuffer(); + explicit VertexArrayPushBuffer(StateTracker& state_tracker); ~VertexArrayPushBuffer(); - void Setup(GLuint vao_); + void Setup(); void SetIndexBuffer(const GLuint* buffer); @@ -28,7 +30,8 @@ public: private: struct Entry; - GLuint vao{}; + StateTracker& state_tracker; + const GLuint* index_buffer{}; std::vector<Entry> vertex_buffers; }; |
