aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_buffer_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_device.cpp3
-rw-r--r--src/video_core/renderer_opengl/gl_global_cache.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp80
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h15
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp31
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp1
7 files changed, 81 insertions, 52 deletions
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index 48b86f3bd..2b9bd142e 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -23,6 +23,7 @@ OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment,
bool cache) {
+ std::lock_guard lock{mutex};
auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
// Cache management is a big overhead, so only cache entries with a given size.
@@ -62,6 +63,7 @@ GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::
GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size,
std::size_t alignment) {
+ std::lock_guard lock{mutex};
AlignBuffer(alignment);
std::memcpy(buffer_ptr, raw_pointer, size);
const GLintptr uploaded_offset = buffer_offset;
diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp
index 65a88b06c..a48e14d2e 100644
--- a/src/video_core/renderer_opengl/gl_device.cpp
+++ b/src/video_core/renderer_opengl/gl_device.cpp
@@ -43,8 +43,9 @@ bool Device::TestVariableAoffi() {
// This is a unit test, please ignore me on apitrace bug reports.
uniform sampler2D tex;
uniform ivec2 variable_offset;
+out vec4 output_attribute;
void main() {
- gl_Position = textureOffset(tex, vec2(0), variable_offset);
+ output_attribute = textureOffset(tex, vec2(0), variable_offset);
}
)";
const GLuint shader{glCreateShaderProgramv(GL_VERTEX_SHADER, 1, &AOFFI_TEST)};
diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp
index ea4a593af..d5e385151 100644
--- a/src/video_core/renderer_opengl/gl_global_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_global_cache.cpp
@@ -76,6 +76,7 @@ GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer)
GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion(
const GLShader::GlobalMemoryEntry& global_region,
Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) {
+ std::lock_guard lock{mutex};
auto& gpu{Core::System::GetInstance().GPU()};
auto& memory_manager{gpu.MemoryManager()};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index ca410287a..d77426067 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -322,9 +322,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage);
- SetupConstBuffers(stage_enum, shader, program_handle, base_bindings);
- SetupGlobalRegions(stage_enum, shader, program_handle, base_bindings);
- SetupTextures(stage_enum, shader, program_handle, base_bindings);
+ SetupDrawConstBuffers(stage_enum, shader);
+ SetupGlobalRegions(stage_enum, shader);
+ SetupTextures(stage_enum, shader, base_bindings);
// Workaround for Intel drivers.
// When a clip distance is enabled but not set in the shader it crops parts of the screen
@@ -776,57 +776,55 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
return true;
}
-void RasterizerOpenGL::SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, GLuint program_handle,
- BaseBindings base_bindings) {
+void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader) {
MICROPROFILE_SCOPE(OpenGL_UBO);
- const auto& gpu = system.GPU();
- const auto& maxwell3d = gpu.Maxwell3D();
- const auto& shader_stage = maxwell3d.state.shader_stages[static_cast<std::size_t>(stage)];
+ const auto stage_index = static_cast<std::size_t>(stage);
+ const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index];
const auto& entries = shader->GetShaderEntries().const_buffers;
// Upload only the enabled buffers from the 16 constbuffers of each shader stage
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
- const auto& used_buffer = entries[bindpoint];
- const auto& buffer = shader_stage.const_buffers[used_buffer.GetIndex()];
-
- if (!buffer.enabled) {
- // Set values to zero to unbind buffers
- bind_ubo_pushbuffer.Push(0, 0, 0);
- continue;
- }
+ const auto& entry = entries[bindpoint];
+ SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry);
+ }
+}
- std::size_t size = 0;
+void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
+ const GLShader::ConstBufferEntry& entry) {
+ if (!buffer.enabled) {
+ // Set values to zero to unbind buffers
+ bind_ubo_pushbuffer.Push(0, 0, 0);
+ return;
+ }
- if (used_buffer.IsIndirect()) {
- // Buffer is accessed indirectly, so upload the entire thing
- size = buffer.size;
+ std::size_t size;
+ if (entry.IsIndirect()) {
+ // Buffer is accessed indirectly, so upload the entire thing
+ size = buffer.size;
- if (size > MaxConstbufferSize) {
- LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
- MaxConstbufferSize);
- size = MaxConstbufferSize;
- }
- } else {
- // Buffer is accessed directly, upload just what we use
- size = used_buffer.GetSize();
+ if (size > MaxConstbufferSize) {
+ LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size,
+ MaxConstbufferSize);
+ size = MaxConstbufferSize;
}
+ } else {
+ // Buffer is accessed directly, upload just what we use
+ size = entry.GetSize();
+ }
- // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
- // UBO alignment requirements.
- size = Common::AlignUp(size, sizeof(GLvec4));
- ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big");
-
- const GLintptr const_buffer_offset =
- buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment());
+ // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140
+ // UBO alignment requirements.
+ size = Common::AlignUp(size, sizeof(GLvec4));
+ ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
- bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), const_buffer_offset, size);
- }
+ const std::size_t alignment = device.GetUniformBufferAlignment();
+ const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment);
+ bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size);
}
void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, GLenum primitive_mode,
- BaseBindings base_bindings) {
+ const Shader& shader) {
const auto& entries = shader->GetShaderEntries().global_memory_entries;
for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry{entries[bindpoint]};
@@ -840,7 +838,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade
}
void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader,
- GLuint program_handle, BaseBindings base_bindings) {
+ BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D();
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 2817f65c9..f7671ff5d 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -17,6 +17,7 @@
#include <glad/glad.h>
#include "common/common_types.h"
+#include "video_core/engines/const_buffer_info.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/rasterizer_cache.h"
#include "video_core/rasterizer_interface.h"
@@ -27,6 +28,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_sampler_cache.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
+#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/utils.h"
@@ -105,17 +107,20 @@ private:
bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
/// Configures the current constbuffers to use for the draw command.
- void SetupConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
- GLuint program_handle, BaseBindings base_bindings);
+ void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
+ const Shader& shader);
+
+ /// Configures a constant buffer.
+ void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer,
+ const GLShader::ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
- const Shader& shader, GLenum primitive_mode,
- BaseBindings base_bindings);
+ const Shader& shader);
/// Configures the current textures to use for the draw command.
void SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader,
- GLuint program_handle, BaseBindings base_bindings);
+ BaseBindings base_bindings);
/// Syncs the viewport and depth range to match the guest state
void SyncViewport(OpenGLState& current_state);
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 739477cc9..7dc2e0560 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -143,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) {
return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
}
+constexpr const char* GetFlowStackPrefix(MetaStackClass stack) {
+ switch (stack) {
+ case MetaStackClass::Ssy:
+ return "ssy";
+ case MetaStackClass::Pbk:
+ return "pbk";
+ }
+ return {};
+}
+
+std::string FlowStackName(MetaStackClass stack) {
+ return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack));
+}
+
+std::string FlowStackTopName(MetaStackClass stack) {
+ return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
+}
+
class GLSLDecompiler final {
public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage,
@@ -173,8 +191,10 @@ public:
// TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
// unlikely that shaders will use 20 nested SSYs and PBKs.
constexpr u32 FLOW_STACK_SIZE = 20;
- code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE);
- code.AddLine("uint flow_stack_top = 0u;");
+ for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) {
+ code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE);
+ code.AddLine("uint {} = 0u;", FlowStackTopName(stack));
+ }
code.AddLine("while (true) {{");
++code.scope;
@@ -1438,15 +1458,18 @@ private:
}
std::string PushFlowStack(Operation operation) {
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
const auto target = std::get_if<ImmediateNode>(&*operation[0]);
UNIMPLEMENTED_IF(!target);
- code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue());
+ code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack),
+ target->GetValue());
return {};
}
std::string PopFlowStack(Operation operation) {
- code.AddLine("jmp_to = flow_stack[--flow_stack_top];");
+ const auto stack = std::get<MetaStackClass>(operation.GetMeta());
+ code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack));
code.AddLine("break;");
return {};
}
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 3451d321d..aafd6f31b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -18,7 +18,6 @@
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
-#include "core/tracer/recorder.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/renderer_opengl.h"