diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_buffer_cache.cpp | 2 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_device.cpp | 3 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_global_cache.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 80 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 15 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 31 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 1 |
7 files changed, 81 insertions, 52 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 48b86f3bd..2b9bd142e 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -23,6 +23,7 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, bool cache) { + std::lock_guard lock{mutex}; auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); // Cache management is a big overhead, so only cache entries with a given size. @@ -62,6 +63,7 @@ GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std:: GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment) { + std::lock_guard lock{mutex}; AlignBuffer(alignment); std::memcpy(buffer_ptr, raw_pointer, size); const GLintptr uploaded_offset = buffer_offset; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 65a88b06c..a48e14d2e 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -43,8 +43,9 @@ bool Device::TestVariableAoffi() { // This is a unit test, please ignore me on apitrace bug reports. uniform sampler2D tex; uniform ivec2 variable_offset; +out vec4 output_attribute; void main() { - gl_Position = textureOffset(tex, vec2(0), variable_offset); + output_attribute = textureOffset(tex, vec2(0), variable_offset); } )"; const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)}; diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp index ea4a593af..d5e385151 100644 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ b/src/video_core/renderer_opengl/gl_global_cache.cpp @@ -76,6 +76,7 @@ GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( const GLShader::GlobalMemoryEntry& global_region, Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { + std::lock_guard lock{mutex}; auto& gpu{Core::System::GetInstance().GPU()}; auto& memory_manager{gpu.MemoryManager()}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index ca410287a..d77426067 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -322,9 +322,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { } const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); - SetupConstBuffers(stage_enum, shader, program_handle, base_bindings); - SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings); - SetupTextures(stage_enum, shader, program_handle, base_bindings); + SetupDrawConstBuffers(stage_enum, shader); + SetupGlobalRegions(stage_enum, shader); + SetupTextures(stage_enum, shader, base_bindings); // Workaround for Intel drivers. // When a clip distance is enabled but not set in the shader it crops parts of the screen @@ -776,57 +776,55 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, return true; } -void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLuint program_handle, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); - const auto& gpu = system.GPU(); - const auto& maxwell3d = gpu.Maxwell3D(); - const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)]; + const auto stage_index = static_cast<std::size_t>(stage); + const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; const auto& entries = shader->GetShaderEntries().const_buffers; // Upload only the enabled buffers from the 16 constbuffers of each shader stage for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { - const auto& used_buffer = entries[bindpoint]; - const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()]; - - if (!buffer.enabled) { - // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(0, 0, 0); - continue; - } + const auto& entry = entries[bindpoint]; + SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); + } +} - std::size_t size = 0; +void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry) { + if (!buffer.enabled) { + // Set values to zero to unbind buffers + bind_ubo_pushbuffer.Push(0, 0, 0); + return; + } - if (used_buffer.IsIndirect()) { - // Buffer is accessed indirectly, so upload the entire thing - size = buffer.size; + std::size_t size; + if (entry.IsIndirect()) { + // Buffer is accessed indirectly, so upload the entire thing + size = buffer.size; - if (size > MaxConstbufferSize) { - LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); - size = MaxConstbufferSize; - } - } else { - // Buffer is accessed directly, upload just what we use - size = used_buffer.GetSize(); + if (size > MaxConstbufferSize) { + LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, + MaxConstbufferSize); + size = MaxConstbufferSize; } + } else { + // Buffer is accessed directly, upload just what we use + size = entry.GetSize(); + } - // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 - // UBO alignment requirements. - size = Common::AlignUp(size, sizeof(GLvec4)); - ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); - - const GLintptr const_buffer_offset = - buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); + // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 + // UBO alignment requirements. + size = Common::AlignUp(size, sizeof(GLvec4)); + ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); - bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size); - } + const std::size_t alignment = device.GetUniformBufferAlignment(); + const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); + bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); } void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLenum primitive_mode, - BaseBindings base_bindings) { + const Shader& shader) { const auto& entries = shader->GetShaderEntries().global_memory_entries; for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry{entries[bindpoint]}; @@ -840,7 +838,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade } void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings) { + BaseBindings base_bindings) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 2817f65c9..f7671ff5d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -17,6 +17,7 @@ #include <glad/glad.h> #include "common/common_types.h" +#include "video_core/engines/const_buffer_info.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_cache.h" #include "video_core/rasterizer_interface.h" @@ -27,6 +28,7 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" +#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_state.h" #include "video_core/renderer_opengl/utils.h" @@ -105,17 +107,20 @@ private: bool preserve_contents = true, std::optional<std::size_t> single_color_target = {}); /// Configures the current constbuffers to use for the draw command. - void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings); + void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, + const Shader& shader); + + /// Configures a constant buffer. + void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry); /// Configures the current global memory entries to use for the draw command. void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, - const Shader& shader, GLenum primitive_mode, - BaseBindings base_bindings); + const Shader& shader); /// Configures the current textures to use for the draw command. void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader, - GLuint program_handle, BaseBindings base_bindings); + BaseBindings base_bindings); /// Syncs the viewport and depth range to match the guest state void SyncViewport(OpenGLState& current_state); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 739477cc9..7dc2e0560 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -143,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) { return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); } +constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { + switch (stack) { + case MetaStackClass::Ssy: + return "ssy"; + case MetaStackClass::Pbk: + return "pbk"; + } + return {}; +} + +std::string FlowStackName(MetaStackClass stack) { + return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); +} + +std::string FlowStackTopName(MetaStackClass stack) { + return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); +} + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, @@ -173,8 +191,10 @@ public: // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems // unlikely that shaders will use 20 nested SSYs and PBKs. constexpr u32 FLOW_STACK_SIZE = 20; - code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); - code.AddLine("uint flow_stack_top = 0u;"); + for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { + code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); + code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); + } code.AddLine("while (true) {{"); ++code.scope; @@ -1438,15 +1458,18 @@ private: } std::string PushFlowStack(Operation operation) { + const auto stack = std::get<MetaStackClass>(operation.GetMeta()); const auto target = std::get_if<ImmediateNode>(&*operation[0]); UNIMPLEMENTED_IF(!target); - code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); + code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), + target->GetValue()); return {}; } std::string PopFlowStack(Operation operation) { - code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); + const auto stack = std::get<MetaStackClass>(operation.GetMeta()); + code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); code.AddLine("break;"); return {}; } diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 3451d321d..aafd6f31b 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -18,7 +18,6 @@ #include "core/perf_stats.h" #include "core/settings.h" #include "core/telemetry_session.h" -#include "core/tracer/recorder.h" #include "video_core/morton.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/renderer_opengl.h" |
