aboutsummaryrefslogtreecommitdiff
path: root/src/video_core/renderer_opengl
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/renderer_opengl')
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.cpp4
-rw-r--r--src/video_core/renderer_opengl/gl_framebuffer_cache.h2
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp1042
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.h71
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.cpp42
-rw-r--r--src/video_core/renderer_opengl/gl_resource_manager.h50
-rw-r--r--src/video_core/renderer_opengl/gl_sampler_cache.cpp2
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.cpp513
-rw-r--r--src/video_core/renderer_opengl/gl_shader_cache.h99
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.cpp494
-rw-r--r--src/video_core/renderer_opengl/gl_shader_decompiler.h24
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.cpp404
-rw-r--r--src/video_core/renderer_opengl/gl_shader_disk_cache.h153
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.cpp109
-rw-r--r--src/video_core/renderer_opengl/gl_shader_gen.h34
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.cpp43
-rw-r--r--src/video_core/renderer_opengl/gl_shader_manager.h39
-rw-r--r--src/video_core/renderer_opengl/gl_state.cpp554
-rw-r--r--src/video_core/renderer_opengl/gl_state.h247
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.cpp247
-rw-r--r--src/video_core/renderer_opengl/gl_state_tracker.h215
-rw-r--r--src/video_core/renderer_opengl/gl_stream_buffer.cpp1
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.cpp77
-rw-r--r--src/video_core/renderer_opengl/gl_texture_cache.h10
-rw-r--r--src/video_core/renderer_opengl/maxwell_to_gl.h51
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp493
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.h40
-rw-r--r--src/video_core/renderer_opengl/utils.cpp13
-rw-r--r--src/video_core/renderer_opengl/utils.h9
29 files changed, 2449 insertions, 2633 deletions
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
index 874ed3c6e..b8a512cb6 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.cpp
@@ -11,7 +11,6 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
-#include "video_core/renderer_opengl/gl_state.h"
namespace OpenGL {
@@ -36,8 +35,7 @@ OGLFramebuffer FramebufferCacheOpenGL::CreateFramebuffer(const FramebufferCacheK
framebuffer.Create();
// TODO(Rodrigo): Use DSA here after Nvidia fixes their framebuffer DSA bugs.
- local_state.draw.draw_framebuffer = framebuffer.handle;
- local_state.ApplyFramebufferState();
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer.handle);
if (key.zeta) {
const bool stencil = key.zeta->GetSurfaceParams().type == SurfaceType::DepthStencil;
diff --git a/src/video_core/renderer_opengl/gl_framebuffer_cache.h b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
index 02ec80ae9..8f698fee0 100644
--- a/src/video_core/renderer_opengl/gl_framebuffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_framebuffer_cache.h
@@ -13,7 +13,6 @@
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
namespace OpenGL {
@@ -63,7 +62,6 @@ public:
private:
OGLFramebuffer CreateFramebuffer(const FramebufferCacheKey& key);
- OpenGLState local_state;
std::unordered_map<FramebufferCacheKey, OGLFramebuffer> cache;
};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index e1965fb21..826eee7df 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -28,7 +28,6 @@
#include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
@@ -36,6 +35,7 @@ namespace OpenGL {
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
+using Tegra::Engines::ShaderType;
using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceTarget;
using VideoCore::Surface::SurfaceType;
@@ -54,10 +54,11 @@ MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255
namespace {
+constexpr std::size_t NumSupportedVertexAttributes = 16;
+
template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
- Tegra::Engines::ShaderType shader_type,
- std::size_t index = 0) {
+ ShaderType shader_type, std::size_t index = 0) {
if (entry.IsBindless()) {
const Tegra::Texture::TextureHandle tex_handle =
engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
@@ -74,7 +75,7 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
}
std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
- const GLShader::ConstBufferEntry& entry) {
+ const ConstBufferEntry& entry) {
if (!entry.IsIndirect()) {
return entry.GetSize();
}
@@ -88,18 +89,19 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
return buffer.size;
}
+void oglEnable(GLenum cap, bool state) {
+ (state ? glEnable : glDisable)(cap);
+}
+
} // Anonymous namespace
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- ScreenInfo& info)
- : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
+ ScreenInfo& info, GLShader::ProgramManager& program_manager,
+ StateTracker& state_tracker)
+ : RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device, state_tracker},
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
- screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
- shader_program_manager = std::make_unique<GLShader::ProgramManager>();
- state.draw.shader_program = 0;
- state.Apply();
-
- LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
+ screen_info{info}, program_manager{program_manager}, state_tracker{state_tracker},
+ buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
CheckExtensions();
}
@@ -113,93 +115,72 @@ void RasterizerOpenGL::CheckExtensions() {
}
}
-GLuint RasterizerOpenGL::SetupVertexFormat() {
+void RasterizerOpenGL::SetupVertexFormat() {
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
-
- if (!gpu.dirty.vertex_attrib_format) {
- return state.draw.vertex_array;
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::VertexFormats]) {
+ return;
}
- gpu.dirty.vertex_attrib_format = false;
+ flags[Dirty::VertexFormats] = false;
MICROPROFILE_SCOPE(OpenGL_VAO);
- auto [iter, is_cache_miss] = vertex_array_cache.try_emplace(regs.vertex_attrib_format);
- auto& vao_entry = iter->second;
-
- if (is_cache_miss) {
- vao_entry.Create();
- const GLuint vao = vao_entry.handle;
-
- // Eventhough we are using DSA to create this vertex array, there is a bug on Intel's blob
- // that fails to properly create the vertex array if it's not bound even after creating it
- // with glCreateVertexArrays
- state.draw.vertex_array = vao;
- state.ApplyVertexArrayState();
-
- // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL.
- // Enables the first 16 vertex attributes always, as we don't know which ones are actually
- // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16
- // for now to avoid OpenGL errors.
- // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
- // assume every shader uses them all.
- for (u32 index = 0; index < 16; ++index) {
- const auto& attrib = regs.vertex_attrib_format[index];
-
- // Ignore invalid attributes.
- if (!attrib.IsValid())
- continue;
-
- const auto& buffer = regs.vertex_array[attrib.buffer];
- LOG_TRACE(Render_OpenGL,
- "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}",
- index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(),
- attrib.offset.Value(), attrib.IsNormalized());
-
- ASSERT(buffer.IsEnabled());
-
- glEnableVertexArrayAttrib(vao, index);
- if (attrib.type == Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::SignedInt ||
- attrib.type ==
- Tegra::Engines::Maxwell3D::Regs::VertexAttribute::Type::UnsignedInt) {
- glVertexArrayAttribIFormat(vao, index, attrib.ComponentCount(),
- MaxwellToGL::VertexType(attrib), attrib.offset);
- } else {
- glVertexArrayAttribFormat(
- vao, index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
- attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
- }
- glVertexArrayAttribBinding(vao, index, attrib.buffer);
+ // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. Enables
+ // the first 16 vertex attributes always, as we don't know which ones are actually used until
+ // shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now to
+ // avoid OpenGL errors.
+ // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't
+ // assume every shader uses them all.
+ for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
+ if (!flags[Dirty::VertexFormat0 + index]) {
+ continue;
}
- }
+ flags[Dirty::VertexFormat0 + index] = false;
+
+ const auto attrib = gpu.regs.vertex_attrib_format[index];
+ const auto gl_index = static_cast<GLuint>(index);
- // Rebinding the VAO invalidates the vertex buffer bindings.
- gpu.dirty.ResetVertexArrays();
+ // Ignore invalid attributes.
+ if (!attrib.IsValid()) {
+ glDisableVertexAttribArray(gl_index);
+ continue;
+ }
+ glEnableVertexAttribArray(gl_index);
- state.draw.vertex_array = vao_entry.handle;
- return vao_entry.handle;
+ if (attrib.type == Maxwell::VertexAttribute::Type::SignedInt ||
+ attrib.type == Maxwell::VertexAttribute::Type::UnsignedInt) {
+ glVertexAttribIFormat(gl_index, attrib.ComponentCount(),
+ MaxwellToGL::VertexType(attrib), attrib.offset);
+ } else {
+ glVertexAttribFormat(gl_index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib),
+ attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset);
+ }
+ glVertexAttribBinding(gl_index, attrib.buffer);
+ }
}
-void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
+void RasterizerOpenGL::SetupVertexBuffer() {
auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.dirty.vertex_array_buffers)
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::VertexBuffers]) {
return;
- gpu.dirty.vertex_array_buffers = false;
-
- const auto& regs = gpu.regs;
+ }
+ flags[Dirty::VertexBuffers] = false;
MICROPROFILE_SCOPE(OpenGL_VB);
// Upload all guest vertex arrays sequentially to our buffer
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!gpu.dirty.vertex_array[index])
+ const auto& regs = gpu.regs;
+ for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) {
+ if (!flags[Dirty::VertexBuffer0 + index]) {
continue;
- gpu.dirty.vertex_array[index] = false;
- gpu.dirty.vertex_instance[index] = false;
+ }
+ flags[Dirty::VertexBuffer0 + index] = false;
const auto& vertex_array = regs.vertex_array[index];
- if (!vertex_array.IsEnabled())
+ if (!vertex_array.IsEnabled()) {
continue;
+ }
const GPUVAddr start = vertex_array.StartAddress();
const GPUVAddr end = regs.vertex_array_limit[index].LimitAddress();
@@ -209,42 +190,30 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
// Bind the vertex array to the buffer at the current offset.
- vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset,
- vertex_array.stride);
-
- if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) {
- // Enable vertex buffer instancing with the specified divisor.
- glVertexArrayBindingDivisor(vao, index, vertex_array.divisor);
- } else {
- // Disable the vertex buffer instancing.
- glVertexArrayBindingDivisor(vao, index, 0);
- }
+ vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer,
+ vertex_buffer_offset, vertex_array.stride);
}
}
-void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
+void RasterizerOpenGL::SetupVertexInstances() {
auto& gpu = system.GPU().Maxwell3D();
-
- if (!gpu.dirty.vertex_instances)
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::VertexInstances]) {
return;
- gpu.dirty.vertex_instances = false;
+ }
+ flags[Dirty::VertexInstances] = false;
const auto& regs = gpu.regs;
- // Upload all guest vertex arrays sequentially to our buffer
- for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
- if (!gpu.dirty.vertex_instance[index])
+ for (std::size_t index = 0; index < NumSupportedVertexAttributes; ++index) {
+ if (!flags[Dirty::VertexInstance0 + index]) {
continue;
-
- gpu.dirty.vertex_instance[index] = false;
-
- if (regs.instanced_arrays.IsInstancingEnabled(index) &&
- regs.vertex_array[index].divisor != 0) {
- // Enable vertex buffer instancing with the specified divisor.
- glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
- } else {
- // Disable the vertex buffer instancing.
- glVertexArrayBindingDivisor(vao, index, 0);
}
+ flags[Dirty::VertexInstance0 + index] = false;
+
+ const auto gl_index = static_cast<GLuint>(index);
+ const bool instancing_enabled = regs.instanced_arrays.IsInstancingEnabled(gl_index);
+ const GLuint divisor = instancing_enabled ? regs.vertex_array[index].divisor : 0;
+ glVertexBindingDivisor(gl_index, divisor);
}
}
@@ -260,8 +229,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = system.GPU().Maxwell3D();
-
- std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+ u32 clip_distances = 0;
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
const auto& shader_config = gpu.regs.shader_config[index];
@@ -271,10 +239,10 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
if (!gpu.regs.IsShaderConfigEnabled(index)) {
switch (program) {
case Maxwell::ShaderProgram::Geometry:
- shader_program_manager->UseTrivialGeometryShader();
+ program_manager.UseGeometryShader(0);
break;
case Maxwell::ShaderProgram::Fragment:
- shader_program_manager->UseTrivialFragmentShader();
+ program_manager.UseFragmentShader(0);
break;
default:
break;
@@ -299,19 +267,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
SetupDrawTextures(stage, shader);
SetupDrawImages(stage, shader);
- const ProgramVariant variant(primitive_mode);
- const auto program_handle = shader->GetHandle(variant);
-
+ const GLuint program_handle = shader->GetHandle();
switch (program) {
case Maxwell::ShaderProgram::VertexA:
case Maxwell::ShaderProgram::VertexB:
- shader_program_manager->UseProgrammableVertexShader(program_handle);
+ program_manager.UseVertexShader(program_handle);
break;
case Maxwell::ShaderProgram::Geometry:
- shader_program_manager->UseProgrammableGeometryShader(program_handle);
+ program_manager.UseGeometryShader(program_handle);
break;
case Maxwell::ShaderProgram::Fragment:
- shader_program_manager->UseProgrammableFragmentShader(program_handle);
+ program_manager.UseFragmentShader(program_handle);
break;
default:
UNIMPLEMENTED_MSG("Unimplemented shader index={}, enable={}, offset=0x{:08X}", index,
@@ -322,9 +288,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// When a clip distance is enabled but not set in the shader it crops parts of the screen
// (sometimes it's half the screen, sometimes three quarters). To avoid this, enable the
// clip distances only when it's written by a shader stage.
- for (std::size_t i = 0; i < Maxwell::NumClipDistances; ++i) {
- clip_distances[i] = clip_distances[i] || shader->GetShaderEntries().clip_distances[i];
- }
+ clip_distances |= shader->GetEntries().clip_distances;
// When VertexA is enabled, we have dual vertex shaders
if (program == Maxwell::ShaderProgram::VertexA) {
@@ -334,8 +298,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
}
SyncClipEnabled(clip_distances);
-
- gpu.dirty.shaders = false;
+ gpu.dirty.flags[Dirty::Shaders] = false;
}
std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -368,20 +331,23 @@ void RasterizerOpenGL::LoadDiskResources(const std::atomic_bool& stop_loading,
shader_cache.LoadDiskCache(stop_loading, callback);
}
+void RasterizerOpenGL::SetupDirtyFlags() {
+ state_tracker.Initialize();
+}
+
void RasterizerOpenGL::ConfigureFramebuffers() {
MICROPROFILE_SCOPE(OpenGL_Framebuffer);
auto& gpu = system.GPU().Maxwell3D();
- if (!gpu.dirty.render_settings) {
+ if (!gpu.dirty.flags[VideoCommon::Dirty::RenderTargets]) {
return;
}
- gpu.dirty.render_settings = false;
+ gpu.dirty.flags[VideoCommon::Dirty::RenderTargets] = false;
texture_cache.GuardRenderTargets(true);
View depth_surface = texture_cache.GetDepthBufferSurface(true);
const auto& regs = gpu.regs;
- state.framebuffer_srgb.enabled = regs.framebuffer_srgb != 0;
UNIMPLEMENTED_IF(regs.rt_separate_frag_data == 0);
// Bind the framebuffer surfaces
@@ -409,14 +375,11 @@ void RasterizerOpenGL::ConfigureFramebuffers() {
texture_cache.GuardRenderTargets(false);
- state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
- SyncViewport(state);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
}
-void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
- bool using_depth_fb, bool using_stencil_fb) {
- using VideoCore::Surface::SurfaceType;
-
+void RasterizerOpenGL::ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb,
+ bool using_stencil_fb) {
auto& gpu = system.GPU().Maxwell3D();
const auto& regs = gpu.regs;
@@ -435,80 +398,44 @@ void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, boo
key.colors[0] = color_surface;
key.zeta = depth_surface;
- current_state.draw.draw_framebuffer = framebuffer_cache.GetFramebuffer(key);
- current_state.ApplyFramebufferState();
+ state_tracker.NotifyFramebuffer();
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, framebuffer_cache.GetFramebuffer(key));
}
void RasterizerOpenGL::Clear() {
- const auto& maxwell3d = system.GPU().Maxwell3D();
-
- if (!maxwell3d.ShouldExecute()) {
+ const auto& gpu = system.GPU().Maxwell3D();
+ if (!gpu.ShouldExecute()) {
return;
}
- const auto& regs = maxwell3d.regs;
+ const auto& regs = gpu.regs;
bool use_color{};
bool use_depth{};
bool use_stencil{};
- OpenGLState prev_state{OpenGLState::GetCurState()};
- SCOPE_EXIT({
- prev_state.AllDirty();
- prev_state.Apply();
- });
-
- OpenGLState clear_state{OpenGLState::GetCurState()};
- clear_state.SetDefaultViewports();
if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
regs.clear_buffers.A) {
use_color = true;
}
if (use_color) {
- clear_state.color_mask[0].red_enabled = regs.clear_buffers.R ? GL_TRUE : GL_FALSE;
- clear_state.color_mask[0].green_enabled = regs.clear_buffers.G ? GL_TRUE : GL_FALSE;
- clear_state.color_mask[0].blue_enabled = regs.clear_buffers.B ? GL_TRUE : GL_FALSE;
- clear_state.color_mask[0].alpha_enabled = regs.clear_buffers.A ? GL_TRUE : GL_FALSE;
+ state_tracker.NotifyColorMask0();
+ glColorMaski(0, regs.clear_buffers.R != 0, regs.clear_buffers.G != 0,
+ regs.clear_buffers.B != 0, regs.clear_buffers.A != 0);
+
+ // TODO(Rodrigo): Determine if clamping is used on clears
+ SyncFragmentColorClampState();
+ SyncFramebufferSRGB();
}
if (regs.clear_buffers.Z) {
ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear Z but buffer is not enabled!");
use_depth = true;
- // Always enable the depth write when clearing the depth buffer. The depth write mask is
- // ignored when clearing the buffer in the Switch, but OpenGL obeys it so we set it to
- // true.
- clear_state.depth.test_enabled = true;
- clear_state.depth.test_func = GL_ALWAYS;
- clear_state.depth.write_mask = GL_TRUE;
+ state_tracker.NotifyDepthMask();
+ glDepthMask(GL_TRUE);
}
if (regs.clear_buffers.S) {
- ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
+ ASSERT_MSG(regs.zeta_enable, "Tried to clear stencil but buffer is not enabled!");
use_stencil = true;
- clear_state.stencil.test_enabled = true;
-
- if (regs.clear_flags.stencil) {
- // Stencil affects the clear so fill it with the used masks
- clear_state.stencil.front.test_func = GL_ALWAYS;
- clear_state.stencil.front.test_mask = regs.stencil_front_func_mask;
- clear_state.stencil.front.action_stencil_fail = GL_KEEP;
- clear_state.stencil.front.action_depth_fail = GL_KEEP;
- clear_state.stencil.front.action_depth_pass = GL_KEEP;
- clear_state.stencil.front.write_mask = regs.stencil_front_mask;
- if (regs.stencil_two_side_enable) {
- clear_state.stencil.back.test_func = GL_ALWAYS;
- clear_state.stencil.back.test_mask = regs.stencil_back_func_mask;
- clear_state.stencil.back.action_stencil_fail = GL_KEEP;
- clear_state.stencil.back.action_depth_fail = GL_KEEP;
- clear_state.stencil.back.action_depth_pass = GL_KEEP;
- clear_state.stencil.back.write_mask = regs.stencil_back_mask;
- } else {
- clear_state.stencil.back.test_func = GL_ALWAYS;
- clear_state.stencil.back.test_mask = 0xFFFFFFFF;
- clear_state.stencil.back.write_mask = 0xFFFFFFFF;
- clear_state.stencil.back.action_stencil_fail = GL_KEEP;
- clear_state.stencil.back.action_depth_fail = GL_KEEP;
- clear_state.stencil.back.action_depth_pass = GL_KEEP;
- }
- }
}
if (!use_color && !use_depth && !use_stencil) {
@@ -516,20 +443,18 @@ void RasterizerOpenGL::Clear() {
return;
}
- ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
+ SyncRasterizeEnable();
- SyncViewport(clear_state);
- SyncRasterizeEnable(clear_state);
if (regs.clear_flags.scissor) {
- SyncScissorTest(clear_state);
+ SyncScissorTest();
+ } else {
+ state_tracker.NotifyScissor0();
+ glDisablei(GL_SCISSOR_TEST, 0);
}
- if (regs.clear_flags.viewport) {
- clear_state.EmulateViewportWithScissor();
- }
+ UNIMPLEMENTED_IF(regs.clear_flags.viewport);
- clear_state.AllDirty();
- clear_state.Apply();
+ ConfigureClearFramebuffer(use_color, use_depth, use_stencil);
if (use_color) {
glClearBufferfv(GL_COLOR, 0, regs.clear_color);
@@ -549,25 +474,27 @@ void RasterizerOpenGL::Clear() {
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
MICROPROFILE_SCOPE(OpenGL_Drawing);
auto& gpu = system.GPU().Maxwell3D();
- const auto& regs = gpu.regs;
query_cache.UpdateCounters();
- SyncRasterizeEnable(state);
+ SyncViewport();
+ SyncRasterizeEnable();
+ SyncPolygonModes();
SyncColorMask();
SyncFragmentColorClampState();
SyncMultiSampleState();
SyncDepthTestState();
+ SyncDepthClamp();
SyncStencilTestState();
SyncBlendState();
SyncLogicOpState();
SyncCullMode();
SyncPrimitiveRestart();
- SyncScissorTest(state);
- SyncTransformFeedback();
+ SyncScissorTest();
SyncPointState();
SyncPolygonOffset();
SyncAlphaTest();
+ SyncFramebufferSRGB();
buffer_cache.Acquire();
@@ -591,14 +518,13 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
buffer_cache.Map(buffer_size);
// Prepare vertex array format.
- const GLuint vao = SetupVertexFormat();
- vertex_array_pushbuffer.Setup(vao);
+ SetupVertexFormat();
+ vertex_array_pushbuffer.Setup();
// Upload vertex and index data.
- SetupVertexBuffer(vao);
- SetupVertexInstances(vao);
-
- GLintptr index_buffer_offset;
+ SetupVertexBuffer();
+ SetupVertexInstances();
+ GLintptr index_buffer_offset = 0;
if (is_indexed) {
index_buffer_offset = SetupIndexBuffer();
}
@@ -624,27 +550,20 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
ConfigureFramebuffers();
// Signal the buffer cache that we are not going to upload more things.
- const bool invalidate = buffer_cache.Unmap();
+ buffer_cache.Unmap();
// Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
vertex_array_pushbuffer.Bind();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
- if (invalidate) {
- // As all cached buffers are invalidated, we need to recheck their state.
- gpu.dirty.ResetVertexArrays();
- }
- gpu.dirty.memory_general = false;
-
- shader_program_manager->ApplyTo(state);
- state.Apply();
+ program_manager.BindGraphicsPipeline();
if (texture_cache.TextureBarrier()) {
glTextureBarrier();
}
- ++num_queued_commands;
+ BeginTransformFeedback(primitive_mode);
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
const GLsizei num_instances =
@@ -683,6 +602,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
num_instances, base_instance);
}
}
+
+ EndTransformFeedback();
+
+ ++num_queued_commands;
}
void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
@@ -695,13 +618,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
auto kernel = shader_cache.GetComputeKernel(code_addr);
SetupComputeTextures(kernel);
SetupComputeImages(kernel);
-
- const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
- const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
- launch_desc.block_dim_z, launch_desc.shared_alloc,
- launch_desc.local_pos_alloc);
- state.draw.shader_program = kernel->GetHandle(variant);
- state.draw.program_pipeline = 0;
+ program_manager.BindComputeShader(kernel->GetHandle());
const std::size_t buffer_size =
Tegra::Engines::KeplerCompute::NumConstBuffers *
@@ -719,11 +636,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
- state.ApplyTextures();
- state.ApplyImages();
- state.ApplyShaderProgram();
- state.ApplyProgramPipeline();
-
+ const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands;
}
@@ -828,7 +741,7 @@ void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shad
const auto& shader_stage = stages[stage_index];
u32 binding = device.GetBaseBindings(stage_index).uniform_buffer;
- for (const auto& entry : shader->GetShaderEntries().const_buffers) {
+ for (const auto& entry : shader->GetEntries().const_buffers) {
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
SetupConstBuffer(binding++, buffer, entry);
}
@@ -839,7 +752,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
u32 binding = 0;
- for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
+ for (const auto& entry : kernel->GetEntries().const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
Tegra::Engines::ConstBufferInfo buffer;
@@ -851,7 +764,7 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
}
void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const GLShader::ConstBufferEntry& entry) {
+ const ConstBufferEntry& entry) {
if (!buffer.enabled) {
// Set values to zero to unbind buffers
bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
@@ -875,7 +788,7 @@ void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shad
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
- for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
+ for (const auto& entry : shader->GetEntries().global_memory_entries) {
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -889,7 +802,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
u32 binding = 0;
- for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
+ for (const auto& entry : kernel->GetEntries().global_memory_entries) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)};
@@ -897,7 +810,7 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
}
}
-void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry,
+void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] =
@@ -909,16 +822,11 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).sampler;
- for (const auto& entry : shader->GetShaderEntries().samplers) {
- const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
- if (!entry.IsIndexed()) {
- const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
+ for (const auto& entry : shader->GetEntries().samplers) {
+ const auto shader_type = static_cast<ShaderType>(stage_index);
+ for (std::size_t i = 0; i < entry.Size(); ++i) {
+ const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
SetupTexture(binding++, texture, entry);
- } else {
- for (std::size_t i = 0; i < entry.Size(); ++i) {
- const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
- SetupTexture(binding++, texture, entry);
- }
}
}
}
@@ -927,46 +835,39 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
- for (const auto& entry : kernel->GetShaderEntries().samplers) {
- if (!entry.IsIndexed()) {
- const auto texture =
- GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
+ for (const auto& entry : kernel->GetEntries().samplers) {
+ for (std::size_t i = 0; i < entry.Size(); ++i) {
+ const auto texture = GetTextureInfo(compute, entry, ShaderType::Compute, i);
SetupTexture(binding++, texture, entry);
- } else {
- for (std::size_t i = 0; i < entry.Size(); ++i) {
- const auto texture =
- GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
- SetupTexture(binding++, texture, entry);
- }
}
}
}
void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const GLShader::SamplerEntry& entry) {
+ const SamplerEntry& entry) {
const auto view = texture_cache.GetTextureSurface(texture.tic, entry);
if (!view) {
// Can occur when texture addr is null or its memory is unmapped/invalid
- state.samplers[binding] = 0;
- state.textures[binding] = 0;
+ glBindSampler(binding, 0);
+ glBindTextureUnit(binding, 0);
return;
}
- state.textures[binding] = view->GetTexture();
+ glBindTextureUnit(binding, view->GetTexture());
if (view->GetSurfaceParams().IsBuffer()) {
return;
}
- state.samplers[binding] = sampler_cache.GetSampler(texture.tsc);
-
// Apply swizzle to textures that are not buffers.
view->ApplySwizzle(texture.tic.x_source, texture.tic.y_source, texture.tic.z_source,
texture.tic.w_source);
+
+ glBindSampler(binding, sampler_cache.GetSampler(texture.tsc));
}
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
const auto& maxwell3d = system.GPU().Maxwell3D();
u32 binding = device.GetBaseBindings(stage_index).image;
- for (const auto& entry : shader->GetShaderEntries().images) {
+ for (const auto& entry : shader->GetEntries().images) {
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(binding++, tic, entry);
@@ -976,17 +877,17 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
void RasterizerOpenGL::SetupComputeImages(const Shader& shader) {
const auto& compute = system.GPU().KeplerCompute();
u32 binding = 0;
- for (const auto& entry : shader->GetShaderEntries().images) {
+ for (const auto& entry : shader->GetEntries().images) {
const auto tic = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute).tic;
SetupImage(binding++, tic, entry);
}
}
void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
- const GLShader::ImageEntry& entry) {
+ const ImageEntry& entry) {
const auto view = texture_cache.GetImageSurface(tic, entry);
if (!view) {
- state.images[binding] = 0;
+ glBindImageTexture(binding, 0, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R8);
return;
}
if (!tic.IsBuffer()) {
@@ -995,55 +896,87 @@ void RasterizerOpenGL::SetupImage(u32 binding, const Tegra::Texture::TICEntry& t
if (entry.IsWritten()) {
view->MarkAsModified(texture_cache.Tick());
}
- state.images[binding] = view->GetTexture();
+ glBindImageTexture(binding, view->GetTexture(), 0, GL_TRUE, 0, GL_READ_WRITE,
+ view->GetFormat());
}
-void RasterizerOpenGL::SyncViewport(OpenGLState& current_state) {
- const auto& regs = system.GPU().Maxwell3D().regs;
- const bool geometry_shaders_enabled =
- regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
- const std::size_t viewport_count =
- geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
- for (std::size_t i = 0; i < viewport_count; i++) {
- auto& viewport = current_state.viewports[i];
- const auto& src = regs.viewports[i];
- const Common::Rectangle<s32> viewport_rect{regs.viewport_transform[i].GetRect()};
- viewport.x = viewport_rect.left;
- viewport.y = viewport_rect.bottom;
- viewport.width = viewport_rect.GetWidth();
- viewport.height = viewport_rect.GetHeight();
- viewport.depth_range_far = src.depth_range_far;
- viewport.depth_range_near = src.depth_range_near;
- }
- state.depth_clamp.far_plane = regs.view_volume_clip_control.depth_clamp_far != 0;
- state.depth_clamp.near_plane = regs.view_volume_clip_control.depth_clamp_near != 0;
-
- bool flip_y = false;
- if (regs.viewport_transform[0].scale_y < 0.0) {
- flip_y = !flip_y;
- }
- if (regs.screen_y_control.y_negate != 0) {
- flip_y = !flip_y;
- }
- state.clip_control.origin = flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT;
- state.clip_control.depth_mode =
- regs.depth_mode == Tegra::Engines::Maxwell3D::Regs::DepthMode::ZeroToOne
- ? GL_ZERO_TO_ONE
- : GL_NEGATIVE_ONE_TO_ONE;
+void RasterizerOpenGL::SyncViewport() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ const auto& regs = gpu.regs;
+
+ const bool dirty_viewport = flags[Dirty::Viewports];
+ if (dirty_viewport || flags[Dirty::ClipControl]) {
+ flags[Dirty::ClipControl] = false;
+
+ bool flip_y = false;
+ if (regs.viewport_transform[0].scale_y < 0.0) {
+ flip_y = !flip_y;
+ }
+ if (regs.screen_y_control.y_negate != 0) {
+ flip_y = !flip_y;
+ }
+ glClipControl(flip_y ? GL_UPPER_LEFT : GL_LOWER_LEFT,
+ regs.depth_mode == Maxwell::DepthMode::ZeroToOne ? GL_ZERO_TO_ONE
+ : GL_NEGATIVE_ONE_TO_ONE);
+ }
+
+ if (dirty_viewport) {
+ flags[Dirty::Viewports] = false;
+
+ const bool force = flags[Dirty::ViewportTransform];
+ flags[Dirty::ViewportTransform] = false;
+
+ for (std::size_t i = 0; i < Maxwell::NumViewports; ++i) {
+ if (!force && !flags[Dirty::Viewport0 + i]) {
+ continue;
+ }
+ flags[Dirty::Viewport0 + i] = false;
+
+ const auto& src = regs.viewport_transform[i];
+ const Common::Rectangle<f32> rect{src.GetRect()};
+ glViewportIndexedf(static_cast<GLuint>(i), rect.left, rect.bottom, rect.GetWidth(),
+ rect.GetHeight());
+
+ const GLdouble reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne;
+ const GLdouble near_depth = src.translate_z - src.scale_z * reduce_z;
+ const GLdouble far_depth = src.translate_z + src.scale_z;
+ glDepthRangeIndexed(static_cast<GLuint>(i), near_depth, far_depth);
+ }
+ }
}
-void RasterizerOpenGL::SyncClipEnabled(
- const std::array<bool, Maxwell::Regs::NumClipDistances>& clip_mask) {
+void RasterizerOpenGL::SyncDepthClamp() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::DepthClampEnabled]) {
+ return;
+ }
+ flags[Dirty::DepthClampEnabled] = false;
- const auto& regs = system.GPU().Maxwell3D().regs;
- const std::array<bool, Maxwell::Regs::NumClipDistances> reg_state{
- regs.clip_distance_enabled.c0 != 0, regs.clip_distance_enabled.c1 != 0,
- regs.clip_distance_enabled.c2 != 0, regs.clip_distance_enabled.c3 != 0,
- regs.clip_distance_enabled.c4 != 0, regs.clip_distance_enabled.c5 != 0,
- regs.clip_distance_enabled.c6 != 0, regs.clip_distance_enabled.c7 != 0};
+ const auto& state = gpu.regs.view_volume_clip_control;
+ UNIMPLEMENTED_IF_MSG(state.depth_clamp_far != state.depth_clamp_near,
+ "Unimplemented depth clamp separation!");
+
+ oglEnable(GL_DEPTH_CLAMP, state.depth_clamp_far || state.depth_clamp_near);
+}
+
+void RasterizerOpenGL::SyncClipEnabled(u32 clip_mask) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::ClipDistances] && !flags[Dirty::Shaders]) {
+ return;
+ }
+ flags[Dirty::ClipDistances] = false;
+
+ clip_mask &= gpu.regs.clip_distance_enabled;
+ if (clip_mask == last_clip_distance_mask) {
+ return;
+ }
+ last_clip_distance_mask = clip_mask;
for (std::size_t i = 0; i < Maxwell::Regs::NumClipDistances; ++i) {
- state.clip_distance[i] = reg_state[i] && clip_mask[i];
+ oglEnable(static_cast<GLenum>(GL_CLIP_DISTANCE0 + i), (clip_mask >> i) & 1);
}
}
@@ -1052,247 +985,442 @@ void RasterizerOpenGL::SyncClipCoef() {
}
void RasterizerOpenGL::SyncCullMode() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ const auto& regs = gpu.regs;
- state.cull.enabled = regs.cull.enabled != 0;
- if (state.cull.enabled) {
- state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
+ if (flags[Dirty::CullTest]) {
+ flags[Dirty::CullTest] = false;
+
+ if (regs.cull_test_enabled) {
+ glEnable(GL_CULL_FACE);
+ glCullFace(MaxwellToGL::CullFace(regs.cull_face));
+ } else {
+ glDisable(GL_CULL_FACE);
+ }
}
- state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
+ if (flags[Dirty::FrontFace]) {
+ flags[Dirty::FrontFace] = false;
+ glFrontFace(MaxwellToGL::FrontFace(regs.front_face));
+ }
}
void RasterizerOpenGL::SyncPrimitiveRestart() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::PrimitiveRestart]) {
+ return;
+ }
+ flags[Dirty::PrimitiveRestart] = false;
- state.primitive_restart.enabled = regs.primitive_restart.enabled;
- state.primitive_restart.index = regs.primitive_restart.index;
+ if (gpu.regs.primitive_restart.enabled) {
+ glEnable(GL_PRIMITIVE_RESTART);
+ glPrimitiveRestartIndex(gpu.regs.primitive_restart.index);
+ } else {
+ glDisable(GL_PRIMITIVE_RESTART);
+ }
}
void RasterizerOpenGL::SyncDepthTestState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
-
- state.depth.test_enabled = regs.depth_test_enable != 0;
- state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
- if (!state.depth.test_enabled) {
- return;
+ const auto& regs = gpu.regs;
+ if (flags[Dirty::DepthMask]) {
+ flags[Dirty::DepthMask] = false;
+ glDepthMask(regs.depth_write_enabled ? GL_TRUE : GL_FALSE);
}
- state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
+ if (flags[Dirty::DepthTest]) {
+ flags[Dirty::DepthTest] = false;
+ if (regs.depth_test_enable) {
+ glEnable(GL_DEPTH_TEST);
+ glDepthFunc(MaxwellToGL::ComparisonOp(regs.depth_test_func));
+ } else {
+ glDisable(GL_DEPTH_TEST);
+ }
+ }
}
void RasterizerOpenGL::SyncStencilTestState() {
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.stencil_test) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::StencilTest]) {
return;
}
- maxwell3d.dirty.stencil_test = false;
-
- const auto& regs = maxwell3d.regs;
- state.stencil.test_enabled = regs.stencil_enable != 0;
- state.MarkDirtyStencilState();
+ flags[Dirty::StencilTest] = false;
+ const auto& regs = gpu.regs;
if (!regs.stencil_enable) {
+ glDisable(GL_STENCIL_TEST);
return;
}
- state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
- state.stencil.front.test_ref = regs.stencil_front_func_ref;
- state.stencil.front.test_mask = regs.stencil_front_func_mask;
- state.stencil.front.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_fail);
- state.stencil.front.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_front_op_zfail);
- state.stencil.front.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_front_op_zpass);
- state.stencil.front.write_mask = regs.stencil_front_mask;
+ glEnable(GL_STENCIL_TEST);
+ glStencilFuncSeparate(GL_FRONT, MaxwellToGL::ComparisonOp(regs.stencil_front_func_func),
+ regs.stencil_front_func_ref, regs.stencil_front_func_mask);
+ glStencilOpSeparate(GL_FRONT, MaxwellToGL::StencilOp(regs.stencil_front_op_fail),
+ MaxwellToGL::StencilOp(regs.stencil_front_op_zfail),
+ MaxwellToGL::StencilOp(regs.stencil_front_op_zpass));
+ glStencilMaskSeparate(GL_FRONT, regs.stencil_front_mask);
+
if (regs.stencil_two_side_enable) {
- state.stencil.back.test_func = MaxwellToGL::ComparisonOp(regs.stencil_back_func_func);
- state.stencil.back.test_ref = regs.stencil_back_func_ref;
- state.stencil.back.test_mask = regs.stencil_back_func_mask;
- state.stencil.back.action_stencil_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_fail);
- state.stencil.back.action_depth_fail = MaxwellToGL::StencilOp(regs.stencil_back_op_zfail);
- state.stencil.back.action_depth_pass = MaxwellToGL::StencilOp(regs.stencil_back_op_zpass);
- state.stencil.back.write_mask = regs.stencil_back_mask;
+ glStencilFuncSeparate(GL_BACK, MaxwellToGL::ComparisonOp(regs.stencil_back_func_func),
+ regs.stencil_back_func_ref, regs.stencil_back_func_mask);
+ glStencilOpSeparate(GL_BACK, MaxwellToGL::StencilOp(regs.stencil_back_op_fail),
+ MaxwellToGL::StencilOp(regs.stencil_back_op_zfail),
+ MaxwellToGL::StencilOp(regs.stencil_back_op_zpass));
+ glStencilMaskSeparate(GL_BACK, regs.stencil_back_mask);
} else {
- state.stencil.back.test_func = GL_ALWAYS;
- state.stencil.back.test_ref = 0;
- state.stencil.back.test_mask = 0xFFFFFFFF;
- state.stencil.back.write_mask = 0xFFFFFFFF;
- state.stencil.back.action_stencil_fail = GL_KEEP;
- state.stencil.back.action_depth_fail = GL_KEEP;
- state.stencil.back.action_depth_pass = GL_KEEP;
+ glStencilFuncSeparate(GL_BACK, GL_ALWAYS, 0, 0xFFFFFFFF);
+ glStencilOpSeparate(GL_BACK, GL_KEEP, GL_KEEP, GL_KEEP);
+ glStencilMaskSeparate(GL_BACK, 0xFFFFFFFF);
}
}
-void RasterizerOpenGL::SyncRasterizeEnable(OpenGLState& current_state) {
- const auto& regs = system.GPU().Maxwell3D().regs;
- current_state.rasterizer_discard = regs.rasterize_enable == 0;
+void RasterizerOpenGL::SyncRasterizeEnable() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::RasterizeEnable]) {
+ return;
+ }
+ flags[Dirty::RasterizeEnable] = false;
+
+ oglEnable(GL_RASTERIZER_DISCARD, gpu.regs.rasterize_enable == 0);
+}
+
+void RasterizerOpenGL::SyncPolygonModes() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::PolygonModes]) {
+ return;
+ }
+ flags[Dirty::PolygonModes] = false;
+
+ if (gpu.regs.fill_rectangle) {
+ if (!GLAD_GL_NV_fill_rectangle) {
+ LOG_ERROR(Render_OpenGL, "GL_NV_fill_rectangle used and not supported");
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ return;
+ }
+
+ flags[Dirty::PolygonModeFront] = true;
+ flags[Dirty::PolygonModeBack] = true;
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL_RECTANGLE_NV);
+ return;
+ }
+
+ if (gpu.regs.polygon_mode_front == gpu.regs.polygon_mode_back) {
+ flags[Dirty::PolygonModeFront] = false;
+ flags[Dirty::PolygonModeBack] = false;
+ glPolygonMode(GL_FRONT_AND_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ return;
+ }
+
+ if (flags[Dirty::PolygonModeFront]) {
+ flags[Dirty::PolygonModeFront] = false;
+ glPolygonMode(GL_FRONT, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_front));
+ }
+
+ if (flags[Dirty::PolygonModeBack]) {
+ flags[Dirty::PolygonModeBack] = false;
+ glPolygonMode(GL_BACK, MaxwellToGL::PolygonMode(gpu.regs.polygon_mode_back));
+ }
}
void RasterizerOpenGL::SyncColorMask() {
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.color_mask) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::ColorMasks]) {
return;
}
- const auto& regs = maxwell3d.regs;
+ flags[Dirty::ColorMasks] = false;
+
+ const bool force = flags[Dirty::ColorMaskCommon];
+ flags[Dirty::ColorMaskCommon] = false;
+
+ const auto& regs = gpu.regs;
+ if (regs.color_mask_common) {
+ if (!force && !flags[Dirty::ColorMask0]) {
+ return;
+ }
+ flags[Dirty::ColorMask0] = false;
- const std::size_t count =
- regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
- for (std::size_t i = 0; i < count; i++) {
- const auto& source = regs.color_mask[regs.color_mask_common ? 0 : i];
- auto& dest = state.color_mask[i];
- dest.red_enabled = (source.R == 0) ? GL_FALSE : GL_TRUE;
- dest.green_enabled = (source.G == 0) ? GL_FALSE : GL_TRUE;
- dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
- dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
+ auto& mask = regs.color_mask[0];
+ glColorMask(mask.R != 0, mask.B != 0, mask.G != 0, mask.A != 0);
+ return;
}
- state.MarkDirtyColorMask();
- maxwell3d.dirty.color_mask = false;
+ // Path without color_mask_common set
+ for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+ if (!force && !flags[Dirty::ColorMask0 + i]) {
+ continue;
+ }
+ flags[Dirty::ColorMask0 + i] = false;
+
+ const auto& mask = regs.color_mask[i];
+ glColorMaski(static_cast<GLuint>(i), mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0);
+ }
}
void RasterizerOpenGL::SyncMultiSampleState() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::MultisampleControl]) {
+ return;
+ }
+ flags[Dirty::MultisampleControl] = false;
+
const auto& regs = system.GPU().Maxwell3D().regs;
- state.multisample_control.alpha_to_coverage = regs.multisample_control.alpha_to_coverage != 0;
- state.multisample_control.alpha_to_one = regs.multisample_control.alpha_to_one != 0;
+ oglEnable(GL_SAMPLE_ALPHA_TO_COVERAGE, regs.multisample_control.alpha_to_coverage);
+ oglEnable(GL_SAMPLE_ALPHA_TO_ONE, regs.multisample_control.alpha_to_one);
}
void RasterizerOpenGL::SyncFragmentColorClampState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
- state.fragment_color_clamp.enabled = regs.frag_color_clamp != 0;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::FragmentClampColor]) {
+ return;
+ }
+ flags[Dirty::FragmentClampColor] = false;
+
+ glClampColor(GL_CLAMP_FRAGMENT_COLOR, gpu.regs.frag_color_clamp ? GL_TRUE : GL_FALSE);
}
void RasterizerOpenGL::SyncBlendState() {
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.blend_state) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ const auto& regs = gpu.regs;
+
+ if (flags[Dirty::BlendColor]) {
+ flags[Dirty::BlendColor] = false;
+ glBlendColor(regs.blend_color.r, regs.blend_color.g, regs.blend_color.b,
+ regs.blend_color.a);
+ }
+
+ // TODO(Rodrigo): Revisit blending, there are several registers we are not reading
+
+ if (!flags[Dirty::BlendStates]) {
return;
}
- const auto& regs = maxwell3d.regs;
-
- state.blend_color.red = regs.blend_color.r;
- state.blend_color.green = regs.blend_color.g;
- state.blend_color.blue = regs.blend_color.b;
- state.blend_color.alpha = regs.blend_color.a;
-
- state.independant_blend.enabled = regs.independent_blend_enable;
- if (!state.independant_blend.enabled) {
- auto& blend = state.blend[0];
- const auto& src = regs.blend;
- blend.enabled = src.enable[0] != 0;
- if (blend.enabled) {
- blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb);
- blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb);
- blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb);
- blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a);
- blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
- blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
- }
- for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- state.blend[i].enabled = false;
+ flags[Dirty::BlendStates] = false;
+
+ if (!regs.independent_blend_enable) {
+ if (!regs.blend.enable[0]) {
+ glDisable(GL_BLEND);
+ return;
}
- maxwell3d.dirty.blend_state = false;
- state.MarkDirtyBlendState();
+ glEnable(GL_BLEND);
+ glBlendFuncSeparate(MaxwellToGL::BlendFunc(regs.blend.factor_source_rgb),
+ MaxwellToGL::BlendFunc(regs.blend.factor_dest_rgb),
+ MaxwellToGL::BlendFunc(regs.blend.factor_source_a),
+ MaxwellToGL::BlendFunc(regs.blend.factor_dest_a));
+ glBlendEquationSeparate(MaxwellToGL::BlendEquation(regs.blend.equation_rgb),
+ MaxwellToGL::BlendEquation(regs.blend.equation_a));
return;
}
- for (std::size_t i = 0; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
- auto& blend = state.blend[i];
- const auto& src = regs.independent_blend[i];
- blend.enabled = regs.blend.enable[i] != 0;
- if (!blend.enabled)
+ const bool force = flags[Dirty::BlendIndependentEnabled];
+ flags[Dirty::BlendIndependentEnabled] = false;
+
+ for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
+ if (!force && !flags[Dirty::BlendState0 + i]) {
continue;
- blend.rgb_equation = MaxwellToGL::BlendEquation(src.equation_rgb);
- blend.src_rgb_func = MaxwellToGL::BlendFunc(src.factor_source_rgb);
- blend.dst_rgb_func = MaxwellToGL::BlendFunc(src.factor_dest_rgb);
- blend.a_equation = MaxwellToGL::BlendEquation(src.equation_a);
- blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
- blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
- }
+ }
+ flags[Dirty::BlendState0 + i] = false;
+
+ if (!regs.blend.enable[i]) {
+ glDisablei(GL_BLEND, static_cast<GLuint>(i));
+ continue;
+ }
+ glEnablei(GL_BLEND, static_cast<GLuint>(i));
- state.MarkDirtyBlendState();
- maxwell3d.dirty.blend_state = false;
+ const auto& src = regs.independent_blend[i];
+ glBlendFuncSeparatei(static_cast<GLuint>(i), MaxwellToGL::BlendFunc(src.factor_source_rgb),
+ MaxwellToGL::BlendFunc(src.factor_dest_rgb),
+ MaxwellToGL::BlendFunc(src.factor_source_a),
+ MaxwellToGL::BlendFunc(src.factor_dest_a));
+ glBlendEquationSeparatei(static_cast<GLuint>(i),
+ MaxwellToGL::BlendEquation(src.equation_rgb),
+ MaxwellToGL::BlendEquation(src.equation_a));
+ }
}
void RasterizerOpenGL::SyncLogicOpState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::LogicOp]) {
+ return;
+ }
+ flags[Dirty::LogicOp] = false;
- state.logic_op.enabled = regs.logic_op.enable != 0;
+ const auto& regs = gpu.regs;
+ if (regs.logic_op.enable) {
+ glEnable(GL_COLOR_LOGIC_OP);
+ glLogicOp(MaxwellToGL::LogicOp(regs.logic_op.operation));
+ } else {
+ glDisable(GL_COLOR_LOGIC_OP);
+ }
+}
- if (!state.logic_op.enabled)
+void RasterizerOpenGL::SyncScissorTest() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::Scissors]) {
return;
+ }
+ flags[Dirty::Scissors] = false;
- ASSERT_MSG(regs.blend.enable[0] == 0,
- "Blending and logic op can't be enabled at the same time.");
-
- state.logic_op.operation = MaxwellToGL::LogicOp(regs.logic_op.operation);
-}
+ const auto& regs = gpu.regs;
+ for (std::size_t index = 0; index < Maxwell::NumViewports; ++index) {
+ if (!flags[Dirty::Scissor0 + index]) {
+ continue;
+ }
+ flags[Dirty::Scissor0 + index] = false;
-void RasterizerOpenGL::SyncScissorTest(OpenGLState& current_state) {
- const auto& regs = system.GPU().Maxwell3D().regs;
- const bool geometry_shaders_enabled =
- regs.IsShaderConfigEnabled(static_cast<size_t>(Maxwell::ShaderProgram::Geometry));
- const std::size_t viewport_count =
- geometry_shaders_enabled ? Tegra::Engines::Maxwell3D::Regs::NumViewports : 1;
- for (std::size_t i = 0; i < viewport_count; i++) {
- const auto& src = regs.scissor_test[i];
- auto& dst = current_state.viewports[i].scissor;
- dst.enabled = (src.enable != 0);
- if (dst.enabled == 0) {
- return;
+ const auto& src = regs.scissor_test[index];
+ if (src.enable) {
+ glEnablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
+ glScissorIndexed(static_cast<GLuint>(index), src.min_x, src.min_y,
+ src.max_x - src.min_x, src.max_y - src.min_y);
+ } else {
+ glDisablei(GL_SCISSOR_TEST, static_cast<GLuint>(index));
}
- const u32 width = src.max_x - src.min_x;
- const u32 height = src.max_y - src.min_y;
- dst.x = src.min_x;
- dst.y = src.min_y;
- dst.width = width;
- dst.height = height;
}
}
-void RasterizerOpenGL::SyncTransformFeedback() {
- const auto& regs = system.GPU().Maxwell3D().regs;
- UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented");
-}
-
void RasterizerOpenGL::SyncPointState() {
- const auto& regs = system.GPU().Maxwell3D().regs;
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::PointSize]) {
+ return;
+ }
+ flags[Dirty::PointSize] = false;
+
+ oglEnable(GL_POINT_SPRITE, gpu.regs.point_sprite_enable);
+
+ if (gpu.regs.vp_point_size.enable) {
+ // By definition of GL_POINT_SIZE, it only matters if GL_PROGRAM_POINT_SIZE is disabled.
+ glEnable(GL_PROGRAM_POINT_SIZE);
+ return;
+ }
+
// Limit the point size to 1 since nouveau sometimes sets a point size of 0 (and that's invalid
// in OpenGL).
- state.point.program_control = regs.vp_point_size.enable != 0;
- state.point.sprite = regs.point_sprite_enable != 0;
- state.point.size = std::max(1.0f, regs.point_size);
+ glPointSize(std::max(1.0f, gpu.regs.point_size));
+ glDisable(GL_PROGRAM_POINT_SIZE);
}
void RasterizerOpenGL::SyncPolygonOffset() {
- auto& maxwell3d = system.GPU().Maxwell3D();
- if (!maxwell3d.dirty.polygon_offset) {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::PolygonOffset]) {
return;
}
- const auto& regs = maxwell3d.regs;
-
- state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
- state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
- state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
+ flags[Dirty::PolygonOffset] = false;
- // Hardware divides polygon offset units by two
- state.polygon_offset.units = regs.polygon_offset_units / 2.0f;
- state.polygon_offset.factor = regs.polygon_offset_factor;
- state.polygon_offset.clamp = regs.polygon_offset_clamp;
+ const auto& regs = gpu.regs;
+ oglEnable(GL_POLYGON_OFFSET_FILL, regs.polygon_offset_fill_enable);
+ oglEnable(GL_POLYGON_OFFSET_LINE, regs.polygon_offset_line_enable);
+ oglEnable(GL_POLYGON_OFFSET_POINT, regs.polygon_offset_point_enable);
- state.MarkDirtyPolygonOffset();
- maxwell3d.dirty.polygon_offset = false;
+ if (regs.polygon_offset_fill_enable || regs.polygon_offset_line_enable ||
+ regs.polygon_offset_point_enable) {
+ // Hardware divides polygon offset units by two
+ glPolygonOffsetClamp(regs.polygon_offset_factor, regs.polygon_offset_units / 2.0f,
+ regs.polygon_offset_clamp);
+ }
}
void RasterizerOpenGL::SyncAlphaTest() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::AlphaTest]) {
+ return;
+ }
+ flags[Dirty::AlphaTest] = false;
+
+ const auto& regs = gpu.regs;
+ if (regs.alpha_test_enabled && regs.rt_control.count > 1) {
+ LOG_WARNING(Render_OpenGL, "Alpha testing with more than one render target is not tested");
+ }
+
+ if (regs.alpha_test_enabled) {
+ glEnable(GL_ALPHA_TEST);
+ glAlphaFunc(MaxwellToGL::ComparisonOp(regs.alpha_test_func), regs.alpha_test_ref);
+ } else {
+ glDisable(GL_ALPHA_TEST);
+ }
+}
+
+void RasterizerOpenGL::SyncFramebufferSRGB() {
+ auto& gpu = system.GPU().Maxwell3D();
+ auto& flags = gpu.dirty.flags;
+ if (!flags[Dirty::FramebufferSRGB]) {
+ return;
+ }
+ flags[Dirty::FramebufferSRGB] = false;
+
+ oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb);
+}
+
+void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) {
const auto& regs = system.GPU().Maxwell3D().regs;
- UNIMPLEMENTED_IF_MSG(regs.alpha_test_enabled != 0 && regs.rt_control.count > 1,
- "Alpha Testing is enabled with more than one rendertarget");
+ if (regs.tfb_enabled == 0) {
+ return;
+ }
+
+ UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) ||
+ regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry));
- state.alpha_test.enabled = regs.alpha_test_enabled;
- if (!state.alpha_test.enabled) {
+ for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
+ const auto& binding = regs.tfb_bindings[index];
+ if (!binding.buffer_enable) {
+ if (enabled_transform_feedback_buffers[index]) {
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0,
+ 0);
+ }
+ enabled_transform_feedback_buffers[index] = false;
+ continue;
+ }
+ enabled_transform_feedback_buffers[index] = true;
+
+ auto& tfb_buffer = transform_feedback_buffers[index];
+ tfb_buffer.Create();
+
+ const GLuint handle = tfb_buffer.handle;
+ const std::size_t size = binding.buffer_size;
+ glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY);
+ glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0,
+ static_cast<GLsizeiptr>(size));
+ }
+
+ glBeginTransformFeedback(GL_POINTS);
+}
+
+void RasterizerOpenGL::EndTransformFeedback() {
+ const auto& regs = system.GPU().Maxwell3D().regs;
+ if (regs.tfb_enabled == 0) {
return;
}
- state.alpha_test.func = MaxwellToGL::ComparisonOp(regs.alpha_test_func);
- state.alpha_test.ref = regs.alpha_test_ref;
+
+ glEndTransformFeedback();
+
+ for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) {
+ const auto& binding = regs.tfb_bindings[index];
+ if (!binding.buffer_enable) {
+ continue;
+ }
+ UNIMPLEMENTED_IF(binding.buffer_offset != 0);
+
+ const GLuint handle = transform_feedback_buffers[index].handle;
+ const GPUVAddr gpu_addr = binding.Address();
+ const std::size_t size = binding.buffer_size;
+ const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
+ glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
+ }
}
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 68abe9a21..2d3be2437 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -30,7 +30,7 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/textures/texture.h"
@@ -55,7 +55,8 @@ struct DrawParameters;
class RasterizerOpenGL : public VideoCore::RasterizerAccelerated {
public:
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
- ScreenInfo& info);
+ ScreenInfo& info, GLShader::ProgramManager& program_manager,
+ StateTracker& state_tracker);
~RasterizerOpenGL() override;
void Draw(bool is_indexed, bool is_instanced) override;
@@ -76,6 +77,7 @@ public:
u32 pixel_stride) override;
void LoadDiskResources(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) override;
+ void SetupDirtyFlags() override;
/// Returns true when there are commands queued to the OpenGL server.
bool AnyCommandQueued() const {
@@ -86,8 +88,7 @@ private:
/// Configures the color and depth framebuffer states.
void ConfigureFramebuffers();
- void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
- bool using_depth_fb, bool using_stencil_fb);
+ void ConfigureClearFramebuffer(bool using_color_fb, bool using_depth_fb, bool using_stencil_fb);
/// Configures the current constbuffers to use for the draw command.
void SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader);
@@ -97,7 +98,7 @@ private:
/// Configures a constant buffer.
void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
- const GLShader::ConstBufferEntry& entry);
+ const ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command.
void SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader);
@@ -106,7 +107,7 @@ private:
void SetupComputeGlobalMemory(const Shader& kernel);
/// Configures a constant buffer.
- void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
+ void SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
std::size_t size);
/// Configures the current textures to use for the draw command.
@@ -117,7 +118,7 @@ private:
/// Configures a texture.
void SetupTexture(u32 binding, const Tegra::Texture::FullTextureInfo& texture,
- const GLShader::SamplerEntry& entry);
+ const SamplerEntry& entry);
/// Configures images in a graphics shader.
void SetupDrawImages(std::size_t stage_index, const Shader& shader);
@@ -126,15 +127,16 @@ private:
void SetupComputeImages(const Shader& shader);
/// Configures an image.
- void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic,
- const GLShader::ImageEntry& entry);
+ void SetupImage(u32 binding, const Tegra::Texture::TICEntry& tic, const ImageEntry& entry);
/// Syncs the viewport and depth range to match the guest state
- void SyncViewport(OpenGLState& current_state);
+ void SyncViewport();
+
+ /// Syncs the depth clamp state
+ void SyncDepthClamp();
/// Syncs the clip enabled status to match the guest state
- void SyncClipEnabled(
- const std::array<bool, Tegra::Engines::Maxwell3D::Regs::NumClipDistances>& clip_mask);
+ void SyncClipEnabled(u32 clip_mask);
/// Syncs the clip coefficients to match the guest state
void SyncClipCoef();
@@ -164,16 +166,16 @@ private:
void SyncMultiSampleState();
/// Syncs the scissor test state to match the guest state
- void SyncScissorTest(OpenGLState& current_state);
-
- /// Syncs the transform feedback state to match the guest state
- void SyncTransformFeedback();
+ void SyncScissorTest();
/// Syncs the point state to match the guest state
void SyncPointState();
/// Syncs the rasterizer enable state to match the guest state
- void SyncRasterizeEnable(OpenGLState& current_state);
+ void SyncRasterizeEnable();
+
+ /// Syncs polygon modes to match the guest state
+ void SyncPolygonModes();
/// Syncs Color Mask
void SyncColorMask();
@@ -184,6 +186,15 @@ private:
/// Syncs the alpha test state to match the guest state
void SyncAlphaTest();
+ /// Syncs the framebuffer sRGB state to match the guest state
+ void SyncFramebufferSRGB();
+
+ /// Begin a transform feedback
+ void BeginTransformFeedback(GLenum primitive_mode);
+
+ /// End a transform feedback
+ void EndTransformFeedback();
+
/// Check for extension that are not strictly required but are needed for correct emulation
void CheckExtensions();
@@ -191,18 +202,17 @@ private:
std::size_t CalculateIndexBufferSize() const;
- /// Updates and returns a vertex array object representing current vertex format
- GLuint SetupVertexFormat();
+ /// Updates the current vertex format
+ void SetupVertexFormat();
- void SetupVertexBuffer(GLuint vao);
- void SetupVertexInstances(GLuint vao);
+ void SetupVertexBuffer();
+ void SetupVertexInstances();
GLintptr SetupIndexBuffer();
void SetupShaders(GLenum primitive_mode);
const Device device;
- OpenGLState state;
TextureCacheOpenGL texture_cache;
ShaderCacheOpenGL shader_cache;
@@ -212,22 +222,25 @@ private:
Core::System& system;
ScreenInfo& screen_info;
-
- std::unique_ptr<GLShader::ProgramManager> shader_program_manager;
- std::map<std::array<Tegra::Engines::Maxwell3D::Regs::VertexAttribute,
- Tegra::Engines::Maxwell3D::Regs::NumVertexAttributes>,
- OGLVertexArray>
- vertex_array_cache;
+ GLShader::ProgramManager& program_manager;
+ StateTracker& state_tracker;
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache;
- VertexArrayPushBuffer vertex_array_pushbuffer;
+ VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker};
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
+ std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+ transform_feedback_buffers;
+ std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
+ enabled_transform_feedback_buffers;
+
/// Number of commands queued to the OpenGL driver. Reseted on flush.
std::size_t num_queued_commands = 0;
+
+ u32 last_clip_distance_mask = 0;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp
index f0ddfb276..97803d480 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_resource_manager.cpp
@@ -8,13 +8,29 @@
#include "common/microprofile.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
-#include "video_core/renderer_opengl/gl_state.h"
MICROPROFILE_DEFINE(OpenGL_ResourceCreation, "OpenGL", "Resource Creation", MP_RGB(128, 128, 192));
MICROPROFILE_DEFINE(OpenGL_ResourceDeletion, "OpenGL", "Resource Deletion", MP_RGB(128, 128, 192));
namespace OpenGL {
+void OGLRenderbuffer::Create() {
+ if (handle != 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
+ glCreateRenderbuffers(1, &handle);
+}
+
+void OGLRenderbuffer::Release() {
+ if (handle == 0)
+ return;
+
+ MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
+ glDeleteRenderbuffers(1, &handle);
+ handle = 0;
+}
+
void OGLTexture::Create(GLenum target) {
if (handle != 0)
return;
@@ -29,7 +45,6 @@ void OGLTexture::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
- OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0;
}
@@ -47,7 +62,6 @@ void OGLTextureView::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteTextures(1, &handle);
- OpenGLState::GetCurState().UnbindTexture(handle).Apply();
handle = 0;
}
@@ -65,7 +79,6 @@ void OGLSampler::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteSamplers(1, &handle);
- OpenGLState::GetCurState().ResetSampler(handle).Apply();
handle = 0;
}
@@ -109,7 +122,6 @@ void OGLProgram::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgram(handle);
- OpenGLState::GetCurState().ResetProgram(handle).Apply();
handle = 0;
}
@@ -127,7 +139,6 @@ void OGLPipeline::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteProgramPipelines(1, &handle);
- OpenGLState::GetCurState().ResetPipeline(handle).Apply();
handle = 0;
}
@@ -171,24 +182,6 @@ void OGLSync::Release() {
handle = 0;
}
-void OGLVertexArray::Create() {
- if (handle != 0)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
- glCreateVertexArrays(1, &handle);
-}
-
-void OGLVertexArray::Release() {
- if (handle == 0)
- return;
-
- MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
- glDeleteVertexArrays(1, &handle);
- OpenGLState::GetCurState().ResetVertexArray(handle).Apply();
- handle = 0;
-}
-
void OGLFramebuffer::Create() {
if (handle != 0)
return;
@@ -203,7 +196,6 @@ void OGLFramebuffer::Release() {
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
glDeleteFramebuffers(1, &handle);
- OpenGLState::GetCurState().ResetFramebuffer(handle).Apply();
handle = 0;
}
diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h
index 514d1d165..de93f4212 100644
--- a/src/video_core/renderer_opengl/gl_resource_manager.h
+++ b/src/video_core/renderer_opengl/gl_resource_manager.h
@@ -11,6 +11,31 @@
namespace OpenGL {
+class OGLRenderbuffer : private NonCopyable {
+public:
+ OGLRenderbuffer() = default;
+
+ OGLRenderbuffer(OGLRenderbuffer&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
+
+ ~OGLRenderbuffer() {
+ Release();
+ }
+
+ OGLRenderbuffer& operator=(OGLRenderbuffer&& o) noexcept {
+ Release();
+ handle = std::exchange(o.handle, 0);
+ return *this;
+ }
+
+ /// Creates a new internal OpenGL resource and stores the handle
+ void Create();
+
+ /// Deletes the internal OpenGL resource
+ void Release();
+
+ GLuint handle = 0;
+};
+
class OGLTexture : private NonCopyable {
public:
OGLTexture() = default;
@@ -216,31 +241,6 @@ public:
GLsync handle = 0;
};
-class OGLVertexArray : private NonCopyable {
-public:
- OGLVertexArray() = default;
-
- OGLVertexArray(OGLVertexArray&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
-
- ~OGLVertexArray() {
- Release();
- }
-
- OGLVertexArray& operator=(OGLVertexArray&& o) noexcept {
- Release();
- handle = std::exchange(o.handle, 0);
- return *this;
- }
-
- /// Creates a new internal OpenGL resource and stores the handle
- void Create();
-
- /// Deletes the internal OpenGL resource
- void Release();
-
- GLuint handle = 0;
-};
-
class OGLFramebuffer : private NonCopyable {
public:
OGLFramebuffer() = default;
diff --git a/src/video_core/renderer_opengl/gl_sampler_cache.cpp b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
index 3ded5ecea..5c174879a 100644
--- a/src/video_core/renderer_opengl/gl_sampler_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_sampler_cache.cpp
@@ -38,7 +38,7 @@ OGLSampler SamplerCacheOpenGL::CreateSampler(const Tegra::Texture::TSCEntry& tsc
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY, tsc.GetMaxAnisotropy());
} else if (GLAD_GL_EXT_texture_filter_anisotropic) {
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT, tsc.GetMaxAnisotropy());
- } else if (tsc.GetMaxAnisotropy() != 1) {
+ } else {
LOG_WARNING(Render_OpenGL, "Anisotropy not supported by host GPU driver");
}
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 489eb143c..e3d31c3eb 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -2,12 +2,16 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <atomic>
+#include <functional>
#include <mutex>
#include <optional>
#include <string>
#include <thread>
#include <unordered_set>
+
#include <boost/functional/hash.hpp>
+
#include "common/alignment.h"
#include "common/assert.h"
#include "common/logging/log.h"
@@ -22,14 +26,16 @@
#include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace OpenGL {
using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::ConstBufferLocker;
using VideoCommon::Shader::ProgramCode;
+using VideoCommon::Shader::Registry;
using VideoCommon::Shader::ShaderIR;
namespace {
@@ -55,7 +61,7 @@ constexpr bool IsSchedInstruction(std::size_t offset, std::size_t main_offset) {
}
/// Calculates the size of a program stream
-std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) {
+std::size_t CalculateProgramSize(const ProgramCode& program) {
constexpr std::size_t start_offset = 10;
// This is the encoded version of BRA that jumps to itself. All Nvidia
// shaders end with one.
@@ -108,32 +114,9 @@ constexpr GLenum GetGLShaderType(ShaderType shader_type) {
}
}
-/// Describes primitive behavior on geometry shaders
-constexpr std::pair<const char*, u32> GetPrimitiveDescription(GLenum primitive_mode) {
- switch (primitive_mode) {
- case GL_POINTS:
- return {"points", 1};
- case GL_LINES:
- case GL_LINE_STRIP:
- return {"lines", 2};
- case GL_LINES_ADJACENCY:
- case GL_LINE_STRIP_ADJACENCY:
- return {"lines_adjacency", 4};
- case GL_TRIANGLES:
- case GL_TRIANGLE_STRIP:
- case GL_TRIANGLE_FAN:
- return {"triangles", 3};
- case GL_TRIANGLES_ADJACENCY:
- case GL_TRIANGLE_STRIP_ADJACENCY:
- return {"triangles_adjacency", 6};
- default:
- return {"points", 1};
- }
-}
-
/// Hashes one (or two) program streams
u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& code,
- const ProgramCode& code_b) {
+ const ProgramCode& code_b = {}) {
u64 unique_identifier = boost::hash_value(code);
if (is_a) {
// VertexA programs include two programs
@@ -142,24 +125,6 @@ u64 GetUniqueIdentifier(ShaderType shader_type, bool is_a, const ProgramCode& co
return unique_identifier;
}
-/// Creates an unspecialized program from code streams
-std::string GenerateGLSL(const Device& device, ShaderType shader_type, const ShaderIR& ir,
- const std::optional<ShaderIR>& ir_b) {
- switch (shader_type) {
- case ShaderType::Vertex:
- return GLShader::GenerateVertexShader(device, ir, ir_b ? &*ir_b : nullptr);
- case ShaderType::Geometry:
- return GLShader::GenerateGeometryShader(device, ir);
- case ShaderType::Fragment:
- return GLShader::GenerateFragmentShader(device, ir);
- case ShaderType::Compute:
- return GLShader::GenerateComputeShader(device, ir);
- default:
- UNIMPLEMENTED_MSG("Unimplemented shader_type={}", static_cast<u32>(shader_type));
- return {};
- }
-}
-
constexpr const char* GetShaderTypeName(ShaderType shader_type) {
switch (shader_type) {
case ShaderType::Vertex:
@@ -195,102 +160,38 @@ constexpr ShaderType GetShaderType(Maxwell::ShaderProgram program_type) {
return {};
}
-std::string GetShaderId(u64 unique_identifier, ShaderType shader_type) {
+std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) {
return fmt::format("{}{:016X}", GetShaderTypeName(shader_type), unique_identifier);
}
-Tegra::Engines::ConstBufferEngineInterface& GetConstBufferEngineInterface(Core::System& system,
- ShaderType shader_type) {
- if (shader_type == ShaderType::Compute) {
- return system.GPU().KeplerCompute();
- } else {
- return system.GPU().Maxwell3D();
- }
-}
-
-std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType shader_type) {
- return std::make_unique<ConstBufferLocker>(shader_type,
- GetConstBufferEngineInterface(system, shader_type));
-}
-
-void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
- locker.SetBoundBuffer(usage.bound_buffer);
- for (const auto& key : usage.keys) {
- const auto [buffer, offset] = key.first;
- locker.InsertKey(buffer, offset, key.second);
+std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) {
+ const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size};
+ const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer,
+ entry.graphics_info, entry.compute_info};
+ const auto registry = std::make_shared<Registry>(entry.type, info);
+ for (const auto& [address, value] : entry.keys) {
+ const auto [buffer, offset] = address;
+ registry->InsertKey(buffer, offset, value);
}
- for (const auto& [offset, sampler] : usage.bound_samplers) {
- locker.InsertBoundSampler(offset, sampler);
+ for (const auto& [offset, sampler] : entry.bound_samplers) {
+ registry->InsertBoundSampler(offset, sampler);
}
- for (const auto& [key, sampler] : usage.bindless_samplers) {
+ for (const auto& [key, sampler] : entry.bindless_samplers) {
const auto [buffer, offset] = key;
- locker.InsertBindlessSampler(buffer, offset, sampler);
+ registry->InsertBindlessSampler(buffer, offset, sampler);
}
+ return registry;
}
-CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderType shader_type,
- const ProgramCode& code, const ProgramCode& code_b,
- ConstBufferLocker& locker, const ProgramVariant& variant,
- bool hint_retrievable = false) {
- LOG_INFO(Render_OpenGL, "called. {}", GetShaderId(unique_identifier, shader_type));
-
- const bool is_compute = shader_type == ShaderType::Compute;
- const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- const ShaderIR ir(code, main_offset, COMPILER_SETTINGS, locker);
- std::optional<ShaderIR> ir_b;
- if (!code_b.empty()) {
- ir_b.emplace(code_b, main_offset, COMPILER_SETTINGS, locker);
- }
-
- std::string source = fmt::format(R"(// {}
-#version 430 core
-#extension GL_ARB_separate_shader_objects : enable
-)",
- GetShaderId(unique_identifier, shader_type));
- if (device.HasShaderBallot()) {
- source += "#extension GL_ARB_shader_ballot : require\n";
- }
- if (device.HasVertexViewportLayer()) {
- source += "#extension GL_ARB_shader_viewport_layer_array : require\n";
- }
- if (device.HasImageLoadFormatted()) {
- source += "#extension GL_EXT_shader_image_load_formatted : require\n";
- }
- if (device.HasWarpIntrinsics()) {
- source += "#extension GL_NV_gpu_shader5 : require\n"
- "#extension GL_NV_shader_thread_group : require\n"
- "#extension GL_NV_shader_thread_shuffle : require\n";
- }
- // This pragma stops Nvidia's driver from over optimizing math (probably using fp16 operations)
- // on places where we don't want to.
- // Thanks to Ryujinx for finding this workaround.
- source += "#pragma optionNV(fastmath off)\n";
-
- if (shader_type == ShaderType::Geometry) {
- const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(variant.primitive_mode);
- source += fmt::format("#define MAX_VERTEX_INPUT {}\n", max_vertices);
- source += fmt::format("layout ({}) in;\n", glsl_topology);
- }
- if (shader_type == ShaderType::Compute) {
- if (variant.local_memory_size > 0) {
- source += fmt::format("#define LOCAL_MEMORY_SIZE {}\n",
- Common::AlignUp(variant.local_memory_size, 4) / 4);
- }
- source +=
- fmt::format("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;\n",
- variant.block_x, variant.block_y, variant.block_z);
-
- if (variant.shared_memory_size > 0) {
- // shared_memory_size is described in number of words
- source += fmt::format("shared uint smem[{}];\n", variant.shared_memory_size);
- }
- }
-
- source += '\n';
- source += GenerateGLSL(device, shader_type, ir, ir_b);
+std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type,
+ u64 unique_identifier, const ShaderIR& ir,
+ const Registry& registry, bool hint_retrievable = false) {
+ const std::string shader_id = MakeShaderID(unique_identifier, shader_type);
+ LOG_INFO(Render_OpenGL, "{}", shader_id);
+ const std::string glsl = DecompileShader(device, ir, registry, shader_type, shader_id);
OGLShader shader;
- shader.Create(source.c_str(), GetGLShaderType(shader_type));
+ shader.Create(glsl.c_str(), GetGLShaderType(shader_type));
auto program = std::make_shared<OGLProgram>();
program->Create(true, hint_retrievable, shader.handle);
@@ -298,7 +199,7 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
}
std::unordered_set<GLenum> GetSupportedFormats() {
- GLint num_formats{};
+ GLint num_formats;
glGetIntegerv(GL_NUM_PROGRAM_BINARY_FORMATS, &num_formats);
std::vector<GLint> formats(num_formats);
@@ -313,115 +214,82 @@ std::unordered_set<GLenum> GetSupportedFormats() {
} // Anonymous namespace
-CachedShader::CachedShader(const ShaderParameters& params, ShaderType shader_type,
- GLShader::ShaderEntries entries, ProgramCode code, ProgramCode code_b)
- : RasterizerCacheObject{params.host_ptr}, system{params.system},
- disk_cache{params.disk_cache}, device{params.device}, cpu_addr{params.cpu_addr},
- unique_identifier{params.unique_identifier}, shader_type{shader_type},
- entries{std::move(entries)}, code{std::move(code)}, code_b{std::move(code_b)} {
- if (!params.precompiled_variants) {
- return;
- }
- for (const auto& pair : *params.precompiled_variants) {
- auto locker = MakeLocker(system, shader_type);
- const auto& usage = pair->first;
- FillLocker(*locker, usage);
-
- std::unique_ptr<LockerVariant>* locker_variant = nullptr;
- const auto it =
- std::find_if(locker_variants.begin(), locker_variants.end(), [&](const auto& variant) {
- return variant->locker->HasEqualKeys(*locker);
- });
- if (it == locker_variants.end()) {
- locker_variant = &locker_variants.emplace_back();
- *locker_variant = std::make_unique<LockerVariant>();
- locker_variant->get()->locker = std::move(locker);
- } else {
- locker_variant = &*it;
- }
- locker_variant->get()->programs.emplace(usage.variant, pair->second);
- }
+CachedShader::CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
+ std::shared_ptr<VideoCommon::Shader::Registry> registry,
+ ShaderEntries entries, std::shared_ptr<OGLProgram> program)
+ : RasterizerCacheObject{host_ptr}, registry{std::move(registry)}, entries{std::move(entries)},
+ cpu_addr{cpu_addr}, size_in_bytes{size_in_bytes}, program{std::move(program)} {}
+
+CachedShader::~CachedShader() = default;
+
+GLuint CachedShader::GetHandle() const {
+ DEBUG_ASSERT(registry->IsConsistent());
+ return program->handle;
}
Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params,
Maxwell::ShaderProgram program_type, ProgramCode code,
ProgramCode code_b) {
const auto shader_type = GetShaderType(program_type);
- params.disk_cache.SaveRaw(
- ShaderDiskCacheRaw(params.unique_identifier, shader_type, code, code_b));
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
- ConstBufferLocker locker(shader_type, params.system.GPU().Maxwell3D());
- const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, locker);
+ auto registry = std::make_shared<Registry>(shader_type, params.system.GPU().Maxwell3D());
+ const ShaderIR ir(code, STAGE_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
// TODO(Rodrigo): Handle VertexA shaders
// std::optional<ShaderIR> ir_b;
// if (!code_b.empty()) {
// ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
// }
- return std::shared_ptr<CachedShader>(new CachedShader(
- params, shader_type, GLShader::GetEntries(ir), std::move(code), std::move(code_b)));
+ auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry);
+
+ ShaderDiskCacheEntry entry;
+ entry.type = shader_type;
+ entry.code = std::move(code);
+ entry.code_b = std::move(code_b);
+ entry.unique_identifier = params.unique_identifier;
+ entry.bound_buffer = registry->GetBoundBuffer();
+ entry.graphics_info = registry->GetGraphicsInfo();
+ entry.keys = registry->GetKeys();
+ entry.bound_samplers = registry->GetBoundSamplers();
+ entry.bindless_samplers = registry->GetBindlessSamplers();
+ params.disk_cache.SaveEntry(std::move(entry));
+
+ return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
+ size_in_bytes, std::move(registry),
+ MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) {
- params.disk_cache.SaveRaw(
- ShaderDiskCacheRaw(params.unique_identifier, ShaderType::Compute, code));
-
- ConstBufferLocker locker(Tegra::Engines::ShaderType::Compute,
- params.system.GPU().KeplerCompute());
- const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, locker);
- return std::shared_ptr<CachedShader>(new CachedShader(
- params, ShaderType::Compute, GLShader::GetEntries(ir), std::move(code), {}));
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+
+ auto& engine = params.system.GPU().KeplerCompute();
+ auto registry = std::make_shared<Registry>(ShaderType::Compute, engine);
+ const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry);
+ const u64 uid = params.unique_identifier;
+ auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry);
+
+ ShaderDiskCacheEntry entry;
+ entry.type = ShaderType::Compute;
+ entry.code = std::move(code);
+ entry.unique_identifier = uid;
+ entry.bound_buffer = registry->GetBoundBuffer();
+ entry.compute_info = registry->GetComputeInfo();
+ entry.keys = registry->GetKeys();
+ entry.bound_samplers = registry->GetBoundSamplers();
+ entry.bindless_samplers = registry->GetBindlessSamplers();
+ params.disk_cache.SaveEntry(std::move(entry));
+
+ return std::shared_ptr<CachedShader>(new CachedShader(params.host_ptr, params.cpu_addr,
+ size_in_bytes, std::move(registry),
+ MakeEntries(ir), std::move(program)));
}
Shader CachedShader::CreateFromCache(const ShaderParameters& params,
- const UnspecializedShader& unspecialized) {
- return std::shared_ptr<CachedShader>(new CachedShader(params, unspecialized.type,
- unspecialized.entries, unspecialized.code,
- unspecialized.code_b));
-}
-
-GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
- EnsureValidLockerVariant();
-
- const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
- auto& program = entry->second;
- if (!is_cache_miss) {
- return program->handle;
- }
-
- program = BuildShader(device, unique_identifier, shader_type, code, code_b,
- *curr_locker_variant->locker, variant);
- disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
-
- LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
- return program->handle;
-}
-
-bool CachedShader::EnsureValidLockerVariant() {
- const auto previous_variant = curr_locker_variant;
- if (curr_locker_variant && !curr_locker_variant->locker->IsConsistent()) {
- curr_locker_variant = nullptr;
- }
- if (!curr_locker_variant) {
- for (auto& variant : locker_variants) {
- if (variant->locker->IsConsistent()) {
- curr_locker_variant = variant.get();
- }
- }
- }
- if (!curr_locker_variant) {
- auto& new_variant = locker_variants.emplace_back();
- new_variant = std::make_unique<LockerVariant>();
- new_variant->locker = MakeLocker(system, shader_type);
- curr_locker_variant = new_variant.get();
- }
- return previous_variant == curr_locker_variant;
-}
-
-ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
- const ConstBufferLocker& locker) const {
- return ShaderDiskCacheUsage{unique_identifier, variant,
- locker.GetBoundBuffer(), locker.GetKeys(),
- locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
+ const PrecompiledShader& precompiled_shader,
+ std::size_t size_in_bytes) {
+ return std::shared_ptr<CachedShader>(new CachedShader(
+ params.host_ptr, params.cpu_addr, size_in_bytes, precompiled_shader.registry,
+ precompiled_shader.entries, precompiled_shader.program));
}
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
@@ -431,16 +299,12 @@ ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System&
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
const VideoCore::DiskResourceLoadCallback& callback) {
- const auto transferable = disk_cache.LoadTransferable();
+ const std::optional transferable = disk_cache.LoadTransferable();
if (!transferable) {
return;
}
- const auto [raws, shader_usages] = *transferable;
- if (!GenerateUnspecializedShaders(stop_loading, callback, raws) || stop_loading) {
- return;
- }
- const auto dumps = disk_cache.LoadPrecompiled();
+ const std::vector gl_cache = disk_cache.LoadPrecompiled();
const auto supported_formats = GetSupportedFormats();
// Track if precompiled cache was altered during loading to know if we have to
@@ -449,77 +313,82 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
// Inform the frontend about shader build initialization
if (callback) {
- callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size());
+ callback(VideoCore::LoadCallbackStage::Build, 0, transferable->size());
}
std::mutex mutex;
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
- std::atomic_bool compilation_failed = false;
+ std::atomic_bool gl_cache_failed = false;
- const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
- std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages,
- const ShaderDumpsMap& dumps) {
+ const auto find_precompiled = [&gl_cache](u64 id) {
+ return std::find_if(gl_cache.begin(), gl_cache.end(),
+ [id](const auto& entry) { return entry.unique_identifier == id; });
+ };
+
+ const auto worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
+ std::size_t end) {
context->MakeCurrent();
SCOPE_EXIT({ return context->DoneCurrent(); });
for (std::size_t i = begin; i < end; ++i) {
- if (stop_loading || compilation_failed) {
+ if (stop_loading) {
return;
}
- const auto& usage{shader_usages[i]};
- const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
- const auto dump{dumps.find(usage)};
-
- CachedProgram shader;
- if (dump != dumps.end()) {
- // If the shader is dumped, attempt to load it with
- shader = GeneratePrecompiledProgram(dump->second, supported_formats);
- if (!shader) {
- compilation_failed = true;
- return;
+ const auto& entry = (*transferable)[i];
+ const u64 uid = entry.unique_identifier;
+ const auto it = find_precompiled(uid);
+ const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr;
+
+ const bool is_compute = entry.type == ShaderType::Compute;
+ const u32 main_offset = is_compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
+ auto registry = MakeRegistry(entry);
+ const ShaderIR ir(entry.code, main_offset, COMPILER_SETTINGS, *registry);
+
+ std::shared_ptr<OGLProgram> program;
+ if (precompiled_entry) {
+ // If the shader is precompiled, attempt to load it with
+ program = GeneratePrecompiledProgram(entry, *precompiled_entry, supported_formats);
+ if (!program) {
+ gl_cache_failed = true;
}
}
- if (!shader) {
- auto locker{MakeLocker(system, unspecialized.type)};
- FillLocker(*locker, usage);
-
- shader = BuildShader(device, usage.unique_identifier, unspecialized.type,
- unspecialized.code, unspecialized.code_b, *locker,
- usage.variant, true);
+ if (!program) {
+ // Otherwise compile it from GLSL
+ program = BuildShader(device, entry.type, uid, ir, *registry, true);
}
+ PrecompiledShader shader;
+ shader.program = std::move(program);
+ shader.registry = std::move(registry);
+ shader.entries = MakeEntries(ir);
+
std::scoped_lock lock{mutex};
if (callback) {
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
- shader_usages.size());
+ transferable->size());
}
-
- precompiled_programs.emplace(usage, std::move(shader));
-
- // TODO(Rodrigo): Is there a better way to do this?
- precompiled_variants[usage.unique_identifier].push_back(
- precompiled_programs.find(usage));
+ runtime_cache.emplace(entry.unique_identifier, std::move(shader));
}
};
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1ULL)};
- const std::size_t bucket_size{shader_usages.size() / num_workers};
+ const std::size_t bucket_size{transferable->size() / num_workers};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
std::vector<std::thread> threads(num_workers);
for (std::size_t i = 0; i < num_workers; ++i) {
const bool is_last_worker = i + 1 == num_workers;
const std::size_t start{bucket_size * i};
- const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size};
+ const std::size_t end{is_last_worker ? transferable->size() : start + bucket_size};
// On some platforms the shared context has to be created from the GUI thread
contexts[i] = emu_window.CreateSharedContext();
- threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps);
+ threads[i] = std::thread(worker, contexts[i].get(), start, end);
}
for (auto& thread : threads) {
thread.join();
}
- if (compilation_failed) {
+ if (gl_cache_failed) {
// Invalidate the precompiled cache if a shader dumped shader was rejected
disk_cache.InvalidatePrecompiled();
precompiled_cache_altered = true;
@@ -532,11 +401,12 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw
// before precompiling them
- for (std::size_t i = 0; i < shader_usages.size(); ++i) {
- const auto& usage{shader_usages[i]};
- if (dumps.find(usage) == dumps.end()) {
- const auto& program{precompiled_programs.at(usage)};
- disk_cache.SaveDump(usage, program->handle);
+ for (std::size_t i = 0; i < transferable->size(); ++i) {
+ const u64 id = (*transferable)[i].unique_identifier;
+ const auto it = find_precompiled(id);
+ if (it == gl_cache.end()) {
+ const GLuint program = runtime_cache.at(id).program->handle;
+ disk_cache.SavePrecompiled(id, program);
precompiled_cache_altered = true;
}
}
@@ -546,84 +416,33 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
}
}
-const PrecompiledVariants* ShaderCacheOpenGL::GetPrecompiledVariants(u64 unique_identifier) const {
- const auto it = precompiled_variants.find(unique_identifier);
- return it == precompiled_variants.end() ? nullptr : &it->second;
-}
-
-CachedProgram ShaderCacheOpenGL::GeneratePrecompiledProgram(
- const ShaderDiskCacheDump& dump, const std::unordered_set<GLenum>& supported_formats) {
- if (supported_formats.find(dump.binary_format) == supported_formats.end()) {
- LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format - removing");
+std::shared_ptr<OGLProgram> ShaderCacheOpenGL::GeneratePrecompiledProgram(
+ const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
+ const std::unordered_set<GLenum>& supported_formats) {
+ if (supported_formats.find(precompiled_entry.binary_format) == supported_formats.end()) {
+ LOG_INFO(Render_OpenGL, "Precompiled cache entry with unsupported format, removing");
return {};
}
- CachedProgram shader = std::make_shared<OGLProgram>();
- shader->handle = glCreateProgram();
- glProgramParameteri(shader->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
- glProgramBinary(shader->handle, dump.binary_format, dump.binary.data(),
- static_cast<GLsizei>(dump.binary.size()));
-
- GLint link_status{};
- glGetProgramiv(shader->handle, GL_LINK_STATUS, &link_status);
+ auto program = std::make_shared<OGLProgram>();
+ program->handle = glCreateProgram();
+ glProgramParameteri(program->handle, GL_PROGRAM_SEPARABLE, GL_TRUE);
+ glProgramBinary(program->handle, precompiled_entry.binary_format,
+ precompiled_entry.binary.data(),
+ static_cast<GLsizei>(precompiled_entry.binary.size()));
+
+ GLint link_status;
+ glGetProgramiv(program->handle, GL_LINK_STATUS, &link_status);
if (link_status == GL_FALSE) {
- LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver - removing");
+ LOG_INFO(Render_OpenGL, "Precompiled cache rejected by the driver, removing");
return {};
}
- return shader;
-}
-
-bool ShaderCacheOpenGL::GenerateUnspecializedShaders(
- const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback,
- const std::vector<ShaderDiskCacheRaw>& raws) {
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Decompile, 0, raws.size());
- }
-
- for (std::size_t i = 0; i < raws.size(); ++i) {
- if (stop_loading) {
- return false;
- }
- const auto& raw{raws[i]};
- const u64 unique_identifier{raw.GetUniqueIdentifier()};
- const u64 calculated_hash{
- GetUniqueIdentifier(raw.GetType(), raw.HasProgramA(), raw.GetCode(), raw.GetCodeB())};
- if (unique_identifier != calculated_hash) {
- LOG_ERROR(Render_OpenGL,
- "Invalid hash in entry={:016x} (obtained hash={:016x}) - "
- "removing shader cache",
- raw.GetUniqueIdentifier(), calculated_hash);
- disk_cache.InvalidateTransferable();
- return false;
- }
-
- const u32 main_offset =
- raw.GetType() == ShaderType::Compute ? KERNEL_MAIN_OFFSET : STAGE_MAIN_OFFSET;
- ConstBufferLocker locker(raw.GetType());
- const ShaderIR ir(raw.GetCode(), main_offset, COMPILER_SETTINGS, locker);
- // TODO(Rodrigo): Handle VertexA shaders
- // std::optional<ShaderIR> ir_b;
- // if (raw.HasProgramA()) {
- // ir_b.emplace(raw.GetProgramCodeB(), main_offset);
- // }
-
- UnspecializedShader unspecialized;
- unspecialized.entries = GLShader::GetEntries(ir);
- unspecialized.type = raw.GetType();
- unspecialized.code = raw.GetCode();
- unspecialized.code_b = raw.GetCodeB();
- unspecialized_shaders.emplace(raw.GetUniqueIdentifier(), unspecialized);
-
- if (callback) {
- callback(VideoCore::LoadCallbackStage::Decompile, i, raws.size());
- }
- }
- return true;
+ return program;
}
Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
- if (!system.GPU().Maxwell3D().dirty.shaders) {
+ if (!system.GPU().Maxwell3D().dirty.flags[Dirty::Shaders]) {
return last_shaders[static_cast<std::size_t>(program)];
}
@@ -647,17 +466,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
const auto unique_identifier = GetUniqueIdentifier(
GetShaderType(program), program == Maxwell::ShaderProgram::VertexA, code, code_b);
- const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
const auto cpu_addr{*memory_manager.GpuToCpuAddress(address)};
- const ShaderParameters params{system, disk_cache, precompiled_variants, device,
+ const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
- const auto found = unspecialized_shaders.find(unique_identifier);
- if (found == unspecialized_shaders.end()) {
+ const auto found = runtime_cache.find(unique_identifier);
+ if (found == runtime_cache.end()) {
shader = CachedShader::CreateStageFromMemory(params, program, std::move(code),
std::move(code_b));
} else {
- shader = CachedShader::CreateFromCache(params, found->second);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+ shader = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(shader);
@@ -672,19 +491,19 @@ Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) {
return kernel;
}
- // No kernel found - create a new one
+ // No kernel found, create a new one
auto code{GetShaderCode(memory_manager, code_addr, host_ptr)};
- const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code, {})};
- const auto precompiled_variants = GetPrecompiledVariants(unique_identifier);
+ const auto unique_identifier{GetUniqueIdentifier(ShaderType::Compute, false, code)};
const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)};
- const ShaderParameters params{system, disk_cache, precompiled_variants, device,
+ const ShaderParameters params{system, disk_cache, device,
cpu_addr, host_ptr, unique_identifier};
- const auto found = unspecialized_shaders.find(unique_identifier);
- if (found == unspecialized_shaders.end()) {
+ const auto found = runtime_cache.find(unique_identifier);
+ if (found == runtime_cache.end()) {
kernel = CachedShader::CreateKernelFromMemory(params, std::move(code));
} else {
- kernel = CachedShader::CreateFromCache(params, found->second);
+ const std::size_t size_in_bytes = code.size() * sizeof(u64);
+ kernel = CachedShader::CreateFromCache(params, found->second, size_in_bytes);
}
Register(kernel);
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h
index 7b1470db3..4935019fc 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_cache.h
@@ -22,7 +22,7 @@
#include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_disk_cache.h"
-#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
namespace Core {
@@ -41,22 +41,17 @@ class RasterizerOpenGL;
struct UnspecializedShader;
using Shader = std::shared_ptr<CachedShader>;
-using CachedProgram = std::shared_ptr<OGLProgram>;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-using PrecompiledPrograms = std::unordered_map<ShaderDiskCacheUsage, CachedProgram>;
-using PrecompiledVariants = std::vector<PrecompiledPrograms::iterator>;
-
-struct UnspecializedShader {
- GLShader::ShaderEntries entries;
- Tegra::Engines::ShaderType type;
- ProgramCode code;
- ProgramCode code_b;
+
+struct PrecompiledShader {
+ std::shared_ptr<OGLProgram> program;
+ std::shared_ptr<VideoCommon::Shader::Registry> registry;
+ ShaderEntries entries;
};
struct ShaderParameters {
Core::System& system;
ShaderDiskCacheOpenGL& disk_cache;
- const PrecompiledVariants* precompiled_variants;
const Device& device;
VAddr cpu_addr;
u8* host_ptr;
@@ -65,61 +60,45 @@ struct ShaderParameters {
class CachedShader final : public RasterizerCacheObject {
public:
- static Shader CreateStageFromMemory(const ShaderParameters& params,
- Maxwell::ShaderProgram program_type,
- ProgramCode program_code, ProgramCode program_code_b);
- static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
+ ~CachedShader();
- static Shader CreateFromCache(const ShaderParameters& params,
- const UnspecializedShader& unspecialized);
+ /// Gets the GL program handle for the shader
+ GLuint GetHandle() const;
+ /// Returns the guest CPU address of the shader
VAddr GetCpuAddr() const override {
return cpu_addr;
}
+ /// Returns the size in bytes of the shader
std::size_t GetSizeInBytes() const override {
- return code.size() * sizeof(u64);
+ return size_in_bytes;
}
/// Gets the shader entries for the shader
- const GLShader::ShaderEntries& GetShaderEntries() const {
+ const ShaderEntries& GetEntries() const {
return entries;
}
- /// Gets the GL program handle for the shader
- GLuint GetHandle(const ProgramVariant& variant);
-
-private:
- struct LockerVariant {
- std::unique_ptr<VideoCommon::Shader::ConstBufferLocker> locker;
- std::unordered_map<ProgramVariant, CachedProgram> programs;
- };
-
- explicit CachedShader(const ShaderParameters& params, Tegra::Engines::ShaderType shader_type,
- GLShader::ShaderEntries entries, ProgramCode program_code,
- ProgramCode program_code_b);
-
- bool EnsureValidLockerVariant();
-
- ShaderDiskCacheUsage GetUsage(const ProgramVariant& variant,
- const VideoCommon::Shader::ConstBufferLocker& locker) const;
-
- Core::System& system;
- ShaderDiskCacheOpenGL& disk_cache;
- const Device& device;
-
- VAddr cpu_addr{};
-
- u64 unique_identifier{};
- Tegra::Engines::ShaderType shader_type{};
-
- GLShader::ShaderEntries entries;
+ static Shader CreateStageFromMemory(const ShaderParameters& params,
+ Maxwell::ShaderProgram program_type,
+ ProgramCode program_code, ProgramCode program_code_b);
+ static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code);
- ProgramCode code;
- ProgramCode code_b;
+ static Shader CreateFromCache(const ShaderParameters& params,
+ const PrecompiledShader& precompiled_shader,
+ std::size_t size_in_bytes);
- LockerVariant* curr_locker_variant = nullptr;
- std::vector<std::unique_ptr<LockerVariant>> locker_variants;
+private:
+ explicit CachedShader(const u8* host_ptr, VAddr cpu_addr, std::size_t size_in_bytes,
+ std::shared_ptr<VideoCommon::Shader::Registry> registry,
+ ShaderEntries entries, std::shared_ptr<OGLProgram> program);
+
+ std::shared_ptr<VideoCommon::Shader::Registry> registry;
+ ShaderEntries entries;
+ VAddr cpu_addr = 0;
+ std::size_t size_in_bytes = 0;
+ std::shared_ptr<OGLProgram> program;
};
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
@@ -142,25 +121,15 @@ protected:
void FlushObjectInner(const Shader& object) override {}
private:
- bool GenerateUnspecializedShaders(const std::atomic_bool& stop_loading,
- const VideoCore::DiskResourceLoadCallback& callback,
- const std::vector<ShaderDiskCacheRaw>& raws);
-
- CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
- const std::unordered_set<GLenum>& supported_formats);
-
- const PrecompiledVariants* GetPrecompiledVariants(u64 unique_identifier) const;
+ std::shared_ptr<OGLProgram> GeneratePrecompiledProgram(
+ const ShaderDiskCacheEntry& entry, const ShaderDiskCachePrecompiled& precompiled_entry,
+ const std::unordered_set<GLenum>& supported_formats);
Core::System& system;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
-
ShaderDiskCacheOpenGL disk_cache;
-
- PrecompiledPrograms precompiled_programs;
- std::unordered_map<u64, PrecompiledVariants> precompiled_variants;
-
- std::unordered_map<u64, UnspecializedShader> unspecialized_shaders;
+ std::unordered_map<u64, PrecompiledShader> runtime_cache;
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
};
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4735000b5..8aa4a7ac9 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -23,8 +23,9 @@
#include "video_core/shader/ast.h"
#include "video_core/shader/node.h"
#include "video_core/shader/shader_ir.h"
+#include "video_core/shader/transform_feedback.h"
-namespace OpenGL::GLShader {
+namespace OpenGL {
namespace {
@@ -36,6 +37,8 @@ using Tegra::Shader::IpaInterpMode;
using Tegra::Shader::IpaMode;
using Tegra::Shader::IpaSampleMode;
using Tegra::Shader::Register;
+using VideoCommon::Shader::BuildTransformFeedback;
+using VideoCommon::Shader::Registry;
using namespace std::string_literals;
using namespace VideoCommon::Shader;
@@ -48,6 +51,11 @@ class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
+constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"};
+
+constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr";
+constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr";
+
struct TextureOffset {};
struct TextureDerivates {};
using TextureArgument = std::pair<Type, Node>;
@@ -56,6 +64,25 @@ using TextureIR = std::variant<TextureOffset, TextureDerivates, TextureArgument>
constexpr u32 MAX_CONSTBUFFER_ELEMENTS =
static_cast<u32>(Maxwell::MaxConstBufferSize) / (4 * sizeof(float));
+constexpr std::string_view CommonDeclarations = R"(#define ftoi floatBitsToInt
+#define ftou floatBitsToUint
+#define itof intBitsToFloat
+#define utof uintBitsToFloat
+
+bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {{
+ bvec2 is_nan1 = isnan(pair1);
+ bvec2 is_nan2 = isnan(pair2);
+ return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
+}}
+
+const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
+const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
+
+layout (std140, binding = {}) uniform vs_config {{
+ float y_direction;
+}};
+)";
+
class ShaderWriter final {
public:
void AddExpression(std::string_view text) {
@@ -269,12 +296,41 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) {
}
}
+/// Describes primitive behavior on geometry shaders
+std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) {
+ switch (topology) {
+ case Maxwell::PrimitiveTopology::Points:
+ return {"points", 1};
+ case Maxwell::PrimitiveTopology::Lines:
+ case Maxwell::PrimitiveTopology::LineStrip:
+ return {"lines", 2};
+ case Maxwell::PrimitiveTopology::LinesAdjacency:
+ case Maxwell::PrimitiveTopology::LineStripAdjacency:
+ return {"lines_adjacency", 4};
+ case Maxwell::PrimitiveTopology::Triangles:
+ case Maxwell::PrimitiveTopology::TriangleStrip:
+ case Maxwell::PrimitiveTopology::TriangleFan:
+ return {"triangles", 3};
+ case Maxwell::PrimitiveTopology::TrianglesAdjacency:
+ case Maxwell::PrimitiveTopology::TriangleStripAdjacency:
+ return {"triangles_adjacency", 6};
+ default:
+ UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology));
+ return {"points", 1};
+ }
+}
+
/// Generates code to use for a swizzle operation.
-constexpr const char* GetSwizzle(u32 element) {
+constexpr const char* GetSwizzle(std::size_t element) {
constexpr std::array swizzle = {".x", ".y", ".z", ".w"};
return swizzle.at(element);
}
+constexpr const char* GetColorSwizzle(std::size_t element) {
+ constexpr std::array swizzle = {".r", ".g", ".b", ".a"};
+ return swizzle.at(element);
+}
+
/// Translate topology
std::string GetTopologyName(Tegra::Shader::OutputTopology topology) {
switch (topology) {
@@ -310,10 +366,19 @@ constexpr bool IsGenericAttribute(Attribute::Index index) {
return index >= Attribute::Index::Attribute_0 && index <= Attribute::Index::Attribute_31;
}
+constexpr bool IsLegacyTexCoord(Attribute::Index index) {
+ return static_cast<int>(index) >= static_cast<int>(Attribute::Index::TexCoord_0) &&
+ static_cast<int>(index) <= static_cast<int>(Attribute::Index::TexCoord_7);
+}
+
constexpr Attribute::Index ToGenericAttribute(u64 value) {
return static_cast<Attribute::Index>(value + static_cast<u64>(Attribute::Index::Attribute_0));
}
+constexpr int GetLegacyTexCoordIndex(Attribute::Index index) {
+ return static_cast<int>(index) - static_cast<int>(Attribute::Index::TexCoord_0);
+}
+
u32 GetGenericAttributeIndex(Attribute::Index index) {
ASSERT(IsGenericAttribute(index));
return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0);
@@ -337,15 +402,66 @@ std::string FlowStackTopName(MetaStackClass stack) {
return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack));
}
-[[deprecated]] constexpr bool IsVertexShader(ShaderType stage) {
- return stage == ShaderType::Vertex;
-}
+struct GenericVaryingDescription {
+ std::string name;
+ u8 first_element = 0;
+ bool is_scalar = false;
+};
class GLSLDecompiler final {
public:
- explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
- std::string suffix)
- : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {}
+ explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier, std::string_view suffix)
+ : device{device}, ir{ir}, registry{registry}, stage{stage},
+ identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {
+ if (stage != ShaderType::Compute) {
+ transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo());
+ }
+ }
+
+ void Decompile() {
+ DeclareHeader();
+ DeclareVertex();
+ DeclareGeometry();
+ DeclareFragment();
+ DeclareCompute();
+ DeclareInputAttributes();
+ DeclareOutputAttributes();
+ DeclareImages();
+ DeclareSamplers();
+ DeclareGlobalMemory();
+ DeclareConstantBuffers();
+ DeclareLocalMemory();
+ DeclareRegisters();
+ DeclarePredicates();
+ DeclareInternalFlags();
+ DeclareCustomVariables();
+ DeclarePhysicalAttributeReader();
+
+ code.AddLine("void main() {{");
+ ++code.scope;
+
+ if (stage == ShaderType::Vertex) {
+ code.AddLine("gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);");
+ }
+
+ if (ir.IsDecompiled()) {
+ DecompileAST();
+ } else {
+ DecompileBranchMode();
+ }
+
+ --code.scope;
+ code.AddLine("}}");
+ }
+
+ std::string GetResult() {
+ return code.GetResult();
+ }
+
+private:
+ friend class ASTDecompiler;
+ friend class ExprDecompiler;
void DecompileBranchMode() {
// VM's program counter
@@ -387,46 +503,40 @@ public:
void DecompileAST();
- void Decompile() {
- DeclareVertex();
- DeclareGeometry();
- DeclareRegisters();
- DeclareCustomVariables();
- DeclarePredicates();
- DeclareLocalMemory();
- DeclareInternalFlags();
- DeclareInputAttributes();
- DeclareOutputAttributes();
- DeclareConstantBuffers();
- DeclareGlobalMemory();
- DeclareSamplers();
- DeclareImages();
- DeclarePhysicalAttributeReader();
-
- code.AddLine("void execute_{}() {{", suffix);
- ++code.scope;
-
- if (ir.IsDecompiled()) {
- DecompileAST();
- } else {
- DecompileBranchMode();
+ void DeclareHeader() {
+ if (!identifier.empty()) {
+ code.AddLine("// {}", identifier);
+ }
+ code.AddLine("#version 440 {}", ir.UsesLegacyVaryings() ? "compatibility" : "core");
+ code.AddLine("#extension GL_ARB_separate_shader_objects : enable");
+ if (device.HasShaderBallot()) {
+ code.AddLine("#extension GL_ARB_shader_ballot : require");
+ }
+ if (device.HasVertexViewportLayer()) {
+ code.AddLine("#extension GL_ARB_shader_viewport_layer_array : require");
+ }
+ if (device.HasImageLoadFormatted()) {
+ code.AddLine("#extension GL_EXT_shader_image_load_formatted : require");
}
+ if (device.HasWarpIntrinsics()) {
+ code.AddLine("#extension GL_NV_gpu_shader5 : require");
+ code.AddLine("#extension GL_NV_shader_thread_group : require");
+ code.AddLine("#extension GL_NV_shader_thread_shuffle : require");
+ }
+ // This pragma stops Nvidia's driver from over optimizing math (probably using fp16
+ // operations) on places where we don't want to.
+ // Thanks to Ryujinx for finding this workaround.
+ code.AddLine("#pragma optionNV(fastmath off)");
- --code.scope;
- code.AddLine("}}");
- }
+ code.AddNewLine();
- std::string GetResult() {
- return code.GetResult();
+ code.AddLine(CommonDeclarations, EmulationUniformBlockBinding);
}
-private:
- friend class ASTDecompiler;
- friend class ExprDecompiler;
-
void DeclareVertex() {
- if (!IsVertexShader(stage))
+ if (stage != ShaderType::Vertex) {
return;
+ }
DeclareVertexRedeclarations();
}
@@ -436,9 +546,15 @@ private:
return;
}
+ const auto& info = registry.GetGraphicsInfo();
+ const auto input_topology = info.primitive_topology;
+ const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology);
+ max_input_vertices = max_vertices;
+ code.AddLine("layout ({}) in;", glsl_topology);
+
const auto topology = GetTopologyName(header.common3.output_topology);
- const auto max_vertices = header.common4.max_output_vertices.Value();
- code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices);
+ const auto max_output_vertices = header.common4.max_output_vertices.Value();
+ code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices);
code.AddNewLine();
code.AddLine("in gl_PerVertex {{");
@@ -450,11 +566,50 @@ private:
DeclareVertexRedeclarations();
}
+ void DeclareFragment() {
+ if (stage != ShaderType::Fragment) {
+ return;
+ }
+ if (ir.UsesLegacyVaryings()) {
+ code.AddLine("in gl_PerFragment {{");
+ ++code.scope;
+ code.AddLine("vec4 gl_TexCoord[8];");
+ code.AddLine("vec4 gl_Color;");
+ code.AddLine("vec4 gl_SecondaryColor;");
+ --code.scope;
+ code.AddLine("}};");
+ }
+
+ for (u32 rt = 0; rt < Maxwell::NumRenderTargets; ++rt) {
+ code.AddLine("layout (location = {}) out vec4 frag_color{};", rt, rt);
+ }
+ }
+
+ void DeclareCompute() {
+ if (stage != ShaderType::Compute) {
+ return;
+ }
+ const auto& info = registry.GetComputeInfo();
+ if (const u32 size = info.shared_memory_size_in_words; size > 0) {
+ code.AddLine("shared uint smem[{}];", size);
+ code.AddNewLine();
+ }
+ code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;",
+ info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]);
+ code.AddNewLine();
+ }
+
void DeclareVertexRedeclarations() {
code.AddLine("out gl_PerVertex {{");
++code.scope;
- code.AddLine("vec4 gl_Position;");
+ auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position);
+ if (!pos_xfb.empty()) {
+ pos_xfb = fmt::format("layout ({}) ", pos_xfb);
+ }
+ const char* pos_type =
+ FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1);
+ code.AddLine("{}{} gl_Position;", pos_xfb, pos_type);
for (const auto attribute : ir.GetOutputAttributes()) {
if (attribute == Attribute::Index::ClipDistances0123 ||
@@ -463,14 +618,14 @@ private:
break;
}
}
- if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) {
+ if (stage != ShaderType::Vertex || device.HasVertexViewportLayer()) {
if (ir.UsesLayer()) {
code.AddLine("int gl_Layer;");
}
if (ir.UsesViewportIndex()) {
code.AddLine("int gl_ViewportIndex;");
}
- } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) &&
+ } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderType::Vertex &&
!device.HasVertexViewportLayer()) {
LOG_ERROR(
Render_OpenGL,
@@ -481,12 +636,12 @@ private:
code.AddLine("float gl_PointSize;");
}
- if (ir.UsesInstanceId()) {
- code.AddLine("int gl_InstanceID;");
- }
-
- if (ir.UsesVertexId()) {
- code.AddLine("int gl_VertexID;");
+ if (ir.UsesLegacyVaryings()) {
+ code.AddLine("vec4 gl_TexCoord[8];");
+ code.AddLine("vec4 gl_FrontColor;");
+ code.AddLine("vec4 gl_FrontSecondaryColor;");
+ code.AddLine("vec4 gl_BackColor;");
+ code.AddLine("vec4 gl_BackSecondaryColor;");
}
--code.scope;
@@ -525,18 +680,16 @@ private:
}
void DeclareLocalMemory() {
+ u64 local_memory_size = 0;
if (stage == ShaderType::Compute) {
- code.AddLine("#ifdef LOCAL_MEMORY_SIZE");
- code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory());
- code.AddLine("#endif");
- return;
+ local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL;
+ } else {
+ local_memory_size = header.GetLocalMemorySize();
}
-
- const u64 local_memory_size = header.GetLocalMemorySize();
if (local_memory_size == 0) {
return;
}
- const auto element_count = Common::AlignUp(local_memory_size, 4) / 4;
+ const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4;
code.AddLine("uint {}[{}];", GetLocalMemory(), element_count);
code.AddNewLine();
}
@@ -589,7 +742,7 @@ private:
void DeclareInputAttribute(Attribute::Index index, bool skip_unused) {
const u32 location{GetGenericAttributeIndex(index)};
- std::string name{GetInputAttribute(index)};
+ std::string name{GetGenericInputAttribute(index)};
if (stage == ShaderType::Geometry) {
name = "gs_" + name + "[]";
}
@@ -626,9 +779,59 @@ private:
}
}
+ std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+ return it->second.components;
+ }
+
+ std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const {
+ const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element);
+ const auto it = transform_feedback.find(location);
+ if (it == transform_feedback.end()) {
+ return {};
+ }
+
+ const VaryingTFB& tfb = it->second;
+ return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer,
+ tfb.offset, tfb.stride);
+ }
+
void DeclareOutputAttribute(Attribute::Index index) {
- const u32 location{GetGenericAttributeIndex(index)};
- code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index));
+ static constexpr std::string_view swizzle = "xyzw";
+ u8 element = 0;
+ while (element < 4) {
+ auto xfb = GetTransformFeedbackDecoration(index, element);
+ if (!xfb.empty()) {
+ xfb = fmt::format(", {}", xfb);
+ }
+ const std::size_t remainder = 4 - element;
+ const std::size_t num_components = GetNumComponents(index, element).value_or(remainder);
+ const char* const type = FLOAT_TYPES.at(num_components - 1);
+
+ const u32 location = GetGenericAttributeIndex(index);
+
+ GenericVaryingDescription description;
+ description.first_element = static_cast<u8>(element);
+ description.is_scalar = num_components == 1;
+ description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME);
+ if (element != 0 || num_components != 4) {
+ const std::string_view name_swizzle = swizzle.substr(element, num_components);
+ description.name = fmt::format("{}_{}", description.name, name_swizzle);
+ }
+ for (std::size_t i = 0; i < num_components; ++i) {
+ const u8 offset = static_cast<u8>(location * 4 + element + i);
+ varying_description.insert({offset, description});
+ }
+
+ code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element,
+ xfb, type, description.name);
+
+ element = static_cast<u8>(static_cast<std::size_t>(element) + num_components);
+ }
}
void DeclareConstantBuffers() {
@@ -925,7 +1128,8 @@ private:
// TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
// set an 0x80000000 index for those and the shader fails to build. Find out why
// this happens and what's its intent.
- return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint());
+ return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(),
+ max_input_vertices.value());
}
return std::string(name);
};
@@ -943,6 +1147,10 @@ private:
default:
UNREACHABLE();
}
+ case Attribute::Index::FrontColor:
+ return {"gl_Color"s + GetSwizzle(element), Type::Float};
+ case Attribute::Index::FrontSecondaryColor:
+ return {"gl_SecondaryColor"s + GetSwizzle(element), Type::Float};
case Attribute::Index::PointCoord:
switch (element) {
case 0:
@@ -959,7 +1167,7 @@ private:
// TODO(Subv): Find out what the values are for the first two elements when inside a
// vertex shader, and what's the value of the fourth element when inside a Tess Eval
// shader.
- ASSERT(IsVertexShader(stage));
+ ASSERT(stage == ShaderType::Vertex);
switch (element) {
case 2:
// Config pack's first value is instance_id.
@@ -980,7 +1188,13 @@ private:
return {"0", Type::Int};
default:
if (IsGenericAttribute(attribute)) {
- return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element),
+ return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element),
+ Type::Float};
+ }
+ if (IsLegacyTexCoord(attribute)) {
+ UNIMPLEMENTED_IF(stage == ShaderType::Geometry);
+ return {fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
+ GetSwizzle(element)),
Type::Float};
}
break;
@@ -1021,21 +1235,22 @@ private:
}
std::optional<Expression> GetOutputAttribute(const AbufNode* abuf) {
+ const u32 element = abuf->GetElement();
switch (const auto attribute = abuf->GetIndex()) {
case Attribute::Index::Position:
- return {{"gl_Position"s + GetSwizzle(abuf->GetElement()), Type::Float}};
+ return {{"gl_Position"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::LayerViewportPointSize:
- switch (abuf->GetElement()) {
+ switch (element) {
case 0:
UNIMPLEMENTED();
return {};
case 1:
- if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
+ if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
return {};
}
return {{"gl_Layer", Type::Int}};
case 2:
- if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) {
+ if (stage == ShaderType::Vertex && !device.HasVertexViewportLayer()) {
return {};
}
return {{"gl_ViewportIndex", Type::Int}};
@@ -1043,14 +1258,26 @@ private:
return {{"gl_PointSize", Type::Float}};
}
return {};
+ case Attribute::Index::FrontColor:
+ return {{"gl_FrontColor"s + GetSwizzle(element), Type::Float}};
+ case Attribute::Index::FrontSecondaryColor:
+ return {{"gl_FrontSecondaryColor"s + GetSwizzle(element), Type::Float}};
+ case Attribute::Index::BackColor:
+ return {{"gl_BackColor"s + GetSwizzle(element), Type::Float}};
+ case Attribute::Index::BackSecondaryColor:
+ return {{"gl_BackSecondaryColor"s + GetSwizzle(element), Type::Float}};
case Attribute::Index::ClipDistances0123:
- return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement()), Type::Float}};
+ return {{fmt::format("gl_ClipDistance[{}]", element), Type::Float}};
case Attribute::Index::ClipDistances4567:
- return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}};
+ return {{fmt::format("gl_ClipDistance[{}]", element + 4), Type::Float}};
default:
if (IsGenericAttribute(attribute)) {
- return {
- {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}};
+ return {{GetGenericOutputAttribute(attribute, element), Type::Float}};
+ }
+ if (IsLegacyTexCoord(attribute)) {
+ return {{fmt::format("gl_TexCoord[{}]{}", GetLegacyTexCoordIndex(attribute),
+ GetSwizzle(element)),
+ Type::Float}};
}
UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute));
return {};
@@ -1822,16 +2049,19 @@ private:
expr += GetSampler(meta->sampler);
expr += ", ";
- expr += constructors.at(operation.GetOperandsCount() - 1);
+ expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1);
expr += '(';
for (std::size_t i = 0; i < count; ++i) {
- expr += VisitOperand(operation, i).AsInt();
- const std::size_t next = i + 1;
- if (next == count)
- expr += ')';
- else if (next < count)
+ if (i > 0) {
expr += ", ";
+ }
+ expr += VisitOperand(operation, i).AsInt();
+ }
+ if (meta->array) {
+ expr += ", ";
+ expr += Visit(meta->array).AsInt();
}
+ expr += ')';
if (meta->lod && !meta->sampler.IsBuffer()) {
expr += ", ";
@@ -1945,7 +2175,7 @@ private:
// TODO(Subv): Figure out how dual-source blending is configured in the Switch.
for (u32 component = 0; component < 4; ++component) {
if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
- code.AddLine("FragColor{}[{}] = {};", render_target, component,
+ code.AddLine("frag_color{}{} = {};", render_target, GetColorSwizzle(component),
SafeGetRegister(current_reg).AsFloat());
++current_reg;
}
@@ -2261,27 +2491,34 @@ private:
static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount));
std::string GetRegister(u32 index) const {
- return GetDeclarationWithSuffix(index, "gpr");
+ return AppendSuffix(index, "gpr");
}
std::string GetCustomVariable(u32 index) const {
- return GetDeclarationWithSuffix(index, "custom_var");
+ return AppendSuffix(index, "custom_var");
}
std::string GetPredicate(Tegra::Shader::Pred pred) const {
- return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
+ return AppendSuffix(static_cast<u32>(pred), "pred");
}
- std::string GetInputAttribute(Attribute::Index attribute) const {
- return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr");
+ std::string GetGenericInputAttribute(Attribute::Index attribute) const {
+ return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME);
}
- std::string GetOutputAttribute(Attribute::Index attribute) const {
- return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr");
+ std::unordered_map<u8, GenericVaryingDescription> varying_description;
+
+ std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const {
+ const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element);
+ const auto& description = varying_description.at(offset);
+ if (description.is_scalar) {
+ return description.name;
+ }
+ return fmt::format("{}[{}]", description.name, element - description.first_element);
}
std::string GetConstBuffer(u32 index) const {
- return GetDeclarationWithSuffix(index, "cbuf");
+ return AppendSuffix(index, "cbuf");
}
std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const {
@@ -2294,11 +2531,15 @@ private:
}
std::string GetConstBufferBlock(u32 index) const {
- return GetDeclarationWithSuffix(index, "cbuf_block");
+ return AppendSuffix(index, "cbuf_block");
}
std::string GetLocalMemory() const {
- return "lmem_" + suffix;
+ if (suffix.empty()) {
+ return "lmem";
+ } else {
+ return "lmem_" + std::string{suffix};
+ }
}
std::string GetInternalFlag(InternalFlag flag) const {
@@ -2307,23 +2548,31 @@ private:
const auto index = static_cast<u32>(flag);
ASSERT(index < static_cast<u32>(InternalFlag::Amount));
- return fmt::format("{}_{}", InternalFlagNames[index], suffix);
+ if (suffix.empty()) {
+ return InternalFlagNames[index];
+ } else {
+ return fmt::format("{}_{}", InternalFlagNames[index], suffix);
+ }
}
std::string GetSampler(const Sampler& sampler) const {
- return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
+ return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler");
}
std::string GetImage(const Image& image) const {
- return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image");
+ return AppendSuffix(static_cast<u32>(image.GetIndex()), "image");
}
- std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const {
- return fmt::format("{}_{}_{}", name, index, suffix);
+ std::string AppendSuffix(u32 index, std::string_view name) const {
+ if (suffix.empty()) {
+ return fmt::format("{}{}", name, index);
+ } else {
+ return fmt::format("{}{}_{}", name, index, suffix);
+ }
}
u32 GetNumPhysicalInputAttributes() const {
- return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
+ return stage == ShaderType::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings();
}
u32 GetNumPhysicalAttributes() const {
@@ -2334,17 +2583,31 @@ private:
return std::min<u32>(device.GetMaxVaryings(), Maxwell::NumVaryings);
}
+ bool IsRenderTargetEnabled(u32 render_target) const {
+ for (u32 component = 0; component < 4; ++component) {
+ if (header.ps.IsColorComponentOutputEnabled(render_target, component)) {
+ return true;
+ }
+ }
+ return false;
+ }
+
const Device& device;
const ShaderIR& ir;
+ const Registry& registry;
const ShaderType stage;
- const std::string suffix;
+ const std::string_view identifier;
+ const std::string_view suffix;
const Header header;
+ std::unordered_map<u8, VaryingTFB> transform_feedback;
ShaderWriter code;
+
+ std::optional<u32> max_input_vertices;
};
-std::string GetFlowVariable(u32 i) {
- return fmt::format("flow_var_{}", i);
+std::string GetFlowVariable(u32 index) {
+ return fmt::format("flow_var{}", index);
}
class ExprDecompiler {
@@ -2531,7 +2794,7 @@ void GLSLDecompiler::DecompileAST() {
} // Anonymous namespace
-ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
+ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) {
ShaderEntries entries;
for (const auto& cbuf : ir.GetConstantBuffers()) {
entries.const_buffers.emplace_back(cbuf.second.GetMaxOffset(), cbuf.second.IsIndirect(),
@@ -2547,33 +2810,20 @@ ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir) {
for (const auto& image : ir.GetImages()) {
entries.images.emplace_back(image);
}
- entries.clip_distances = ir.GetClipDistances();
+ const auto clip_distances = ir.GetClipDistances();
+ for (std::size_t i = 0; i < std::size(clip_distances); ++i) {
+ entries.clip_distances = (clip_distances[i] ? 1U : 0U) << i;
+ }
entries.shader_length = ir.GetLength();
return entries;
}
-std::string GetCommonDeclarations() {
- return R"(#define ftoi floatBitsToInt
-#define ftou floatBitsToUint
-#define itof intBitsToFloat
-#define utof uintBitsToFloat
-
-bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) {
- bvec2 is_nan1 = isnan(pair1);
- bvec2 is_nan2 = isnan(pair2);
- return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y);
-}
-
-const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f );
-const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f );
-)";
-}
-
-std::string Decompile(const Device& device, const ShaderIR& ir, ShaderType stage,
- const std::string& suffix) {
- GLSLDecompiler decompiler(device, ir, stage, suffix);
+std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry,
+ ShaderType stage, std::string_view identifier,
+ std::string_view suffix) {
+ GLSLDecompiler decompiler(device, ir, registry, stage, identifier, suffix);
decompiler.Decompile();
return decompiler.GetResult();
}
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h
index 7876f48d6..e7dbd810c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.h
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h
@@ -6,22 +6,18 @@
#include <array>
#include <string>
+#include <string_view>
#include <utility>
#include <vector>
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h"
+#include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h"
-namespace VideoCommon::Shader {
-class ShaderIR;
-}
-
namespace OpenGL {
-class Device;
-}
-namespace OpenGL::GLShader {
+class Device;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using SamplerEntry = VideoCommon::Shader::Sampler;
@@ -74,15 +70,15 @@ struct ShaderEntries {
std::vector<GlobalMemoryEntry> global_memory_entries;
std::vector<SamplerEntry> samplers;
std::vector<ImageEntry> images;
- std::array<bool, Maxwell::NumClipDistances> clip_distances{};
+ u32 clip_distances{};
std::size_t shader_length{};
};
-ShaderEntries GetEntries(const VideoCommon::Shader::ShaderIR& ir);
-
-std::string GetCommonDeclarations();
+ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir);
-std::string Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
- Tegra::Engines::ShaderType stage, const std::string& suffix);
+std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir,
+ const VideoCommon::Shader::Registry& registry,
+ Tegra::Engines::ShaderType stage, std::string_view identifier,
+ std::string_view suffix = {});
-} // namespace OpenGL::GLShader
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index 1fc204f6f..9e95a122b 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -31,32 +31,24 @@ namespace {
using ShaderCacheVersionHash = std::array<u8, 64>;
-enum class TransferableEntryKind : u32 {
- Raw,
- Usage,
-};
-
struct ConstBufferKey {
- u32 cbuf{};
- u32 offset{};
- u32 value{};
+ u32 cbuf = 0;
+ u32 offset = 0;
+ u32 value = 0;
};
struct BoundSamplerKey {
- u32 offset{};
- Tegra::Engines::SamplerDescriptor sampler{};
+ u32 offset = 0;
+ Tegra::Engines::SamplerDescriptor sampler;
};
struct BindlessSamplerKey {
- u32 cbuf{};
- u32 offset{};
- Tegra::Engines::SamplerDescriptor sampler{};
+ u32 cbuf = 0;
+ u32 offset = 0;
+ Tegra::Engines::SamplerDescriptor sampler;
};
-constexpr u32 NativeVersion = 12;
-
-// Making sure sizes doesn't change by accident
-static_assert(sizeof(ProgramVariant) == 20);
+constexpr u32 NativeVersion = 20;
ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{};
@@ -67,61 +59,124 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() {
} // Anonymous namespace
-ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ShaderType type, ProgramCode code,
- ProgramCode code_b)
- : unique_identifier{unique_identifier}, type{type}, code{std::move(code)}, code_b{std::move(
- code_b)} {}
+ShaderDiskCacheEntry::ShaderDiskCacheEntry() = default;
-ShaderDiskCacheRaw::ShaderDiskCacheRaw() = default;
+ShaderDiskCacheEntry::~ShaderDiskCacheEntry() = default;
-ShaderDiskCacheRaw::~ShaderDiskCacheRaw() = default;
-
-bool ShaderDiskCacheRaw::Load(FileUtil::IOFile& file) {
- if (file.ReadBytes(&unique_identifier, sizeof(u64)) != sizeof(u64) ||
- file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
+bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
+ if (file.ReadBytes(&type, sizeof(u32)) != sizeof(u32)) {
return false;
}
- u32 code_size{};
- u32 code_size_b{};
+ u32 code_size;
+ u32 code_size_b;
if (file.ReadBytes(&code_size, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&code_size_b, sizeof(u32)) != sizeof(u32)) {
return false;
}
-
code.resize(code_size);
code_b.resize(code_size_b);
- if (file.ReadArray(code.data(), code_size) != code_size)
+ if (file.ReadArray(code.data(), code_size) != code_size) {
return false;
-
+ }
if (HasProgramA() && file.ReadArray(code_b.data(), code_size_b) != code_size_b) {
return false;
}
+
+ u8 is_texture_handler_size_known;
+ u32 texture_handler_size_value;
+ u32 num_keys;
+ u32 num_bound_samplers;
+ u32 num_bindless_samplers;
+ if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
+ file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
+ file.ReadArray(&texture_handler_size_value, 1) != 1 ||
+ file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
+ file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
+ file.ReadArray(&num_bindless_samplers, 1) != 1) {
+ return false;
+ }
+ if (is_texture_handler_size_known) {
+ texture_handler_size = texture_handler_size_value;
+ }
+
+ std::vector<ConstBufferKey> flat_keys(num_keys);
+ std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers);
+ std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers);
+ if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
+ file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
+ flat_bound_samplers.size() ||
+ file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
+ flat_bindless_samplers.size()) {
+ return false;
+ }
+ for (const auto& key : flat_keys) {
+ keys.insert({{key.cbuf, key.offset}, key.value});
+ }
+ for (const auto& key : flat_bound_samplers) {
+ bound_samplers.emplace(key.offset, key.sampler);
+ }
+ for (const auto& key : flat_bindless_samplers) {
+ bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
+ }
+
return true;
}
-bool ShaderDiskCacheRaw::Save(FileUtil::IOFile& file) const {
- if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(static_cast<u32>(type)) != 1 ||
+bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
+ if (file.WriteObject(static_cast<u32>(type)) != 1 ||
file.WriteObject(static_cast<u32>(code.size())) != 1 ||
file.WriteObject(static_cast<u32>(code_b.size())) != 1) {
return false;
}
-
- if (file.WriteArray(code.data(), code.size()) != code.size())
+ if (file.WriteArray(code.data(), code.size()) != code.size()) {
return false;
-
+ }
if (HasProgramA() && file.WriteArray(code_b.data(), code_b.size()) != code_b.size()) {
return false;
}
- return true;
+
+ if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 ||
+ file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 ||
+ file.WriteObject(texture_handler_size.value_or(0)) != 1 ||
+ file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
+ file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
+ file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
+ return false;
+ }
+
+ std::vector<ConstBufferKey> flat_keys;
+ flat_keys.reserve(keys.size());
+ for (const auto& [address, value] : keys) {
+ flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
+ }
+
+ std::vector<BoundSamplerKey> flat_bound_samplers;
+ flat_bound_samplers.reserve(bound_samplers.size());
+ for (const auto& [address, sampler] : bound_samplers) {
+ flat_bound_samplers.push_back(BoundSamplerKey{address, sampler});
+ }
+
+ std::vector<BindlessSamplerKey> flat_bindless_samplers;
+ flat_bindless_samplers.reserve(bindless_samplers.size());
+ for (const auto& [address, sampler] : bindless_samplers) {
+ flat_bindless_samplers.push_back(
+ BindlessSamplerKey{address.first, address.second, sampler});
+ }
+
+ return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
+ file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
+ flat_bound_samplers.size() &&
+ file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
+ flat_bindless_samplers.size();
}
ShaderDiskCacheOpenGL::ShaderDiskCacheOpenGL(Core::System& system) : system{system} {}
ShaderDiskCacheOpenGL::~ShaderDiskCacheOpenGL() = default;
-std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
-ShaderDiskCacheOpenGL::LoadTransferable() {
+std::optional<std::vector<ShaderDiskCacheEntry>> ShaderDiskCacheOpenGL::LoadTransferable() {
// Skip games without title id
const bool has_title_id = system.CurrentProcess()->GetTitleID() != 0;
if (!Settings::values.use_disk_shader_cache || !has_title_id) {
@@ -130,17 +185,14 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
FileUtil::IOFile file(GetTransferablePath(), "rb");
if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No transferable shader cache found for game with title id={}",
- GetTitleID());
+ LOG_INFO(Render_OpenGL, "No transferable shader cache found");
is_usable = true;
return {};
}
u32 version{};
if (file.ReadBytes(&version, sizeof(version)) != sizeof(version)) {
- LOG_ERROR(Render_OpenGL,
- "Failed to get transferable cache version for title id={}, skipping",
- GetTitleID());
+ LOG_ERROR(Render_OpenGL, "Failed to get transferable cache version, skipping it");
return {};
}
@@ -158,105 +210,42 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
}
// Version is valid, load the shaders
- constexpr const char error_loading[] = "Failed to load transferable raw entry, skipping";
- std::vector<ShaderDiskCacheRaw> raws;
- std::vector<ShaderDiskCacheUsage> usages;
+ std::vector<ShaderDiskCacheEntry> entries;
while (file.Tell() < file.GetSize()) {
- TransferableEntryKind kind{};
- if (file.ReadBytes(&kind, sizeof(u32)) != sizeof(u32)) {
- LOG_ERROR(Render_OpenGL, "Failed to read transferable file, skipping");
- return {};
- }
-
- switch (kind) {
- case TransferableEntryKind::Raw: {
- ShaderDiskCacheRaw entry;
- if (!entry.Load(file)) {
- LOG_ERROR(Render_OpenGL, error_loading);
- return {};
- }
- transferable.insert({entry.GetUniqueIdentifier(), {}});
- raws.push_back(std::move(entry));
- break;
- }
- case TransferableEntryKind::Usage: {
- ShaderDiskCacheUsage usage;
-
- u32 num_keys{};
- u32 num_bound_samplers{};
- u32 num_bindless_samplers{};
- if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
- file.ReadArray(&usage.variant, 1) != 1 ||
- file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
- file.ReadArray(&num_bound_samplers, 1) != 1 ||
- file.ReadArray(&num_bindless_samplers, 1) != 1) {
- LOG_ERROR(Render_OpenGL, error_loading);
- return {};
- }
-
- std::vector<ConstBufferKey> keys(num_keys);
- std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
- std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
- if (file.ReadArray(keys.data(), keys.size()) != keys.size() ||
- file.ReadArray(bound_samplers.data(), bound_samplers.size()) !=
- bound_samplers.size() ||
- file.ReadArray(bindless_samplers.data(), bindless_samplers.size()) !=
- bindless_samplers.size()) {
- LOG_ERROR(Render_OpenGL, error_loading);
- return {};
- }
- for (const auto& key : keys) {
- usage.keys.insert({{key.cbuf, key.offset}, key.value});
- }
- for (const auto& key : bound_samplers) {
- usage.bound_samplers.emplace(key.offset, key.sampler);
- }
- for (const auto& key : bindless_samplers) {
- usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
- }
-
- usages.push_back(std::move(usage));
- break;
- }
- default:
- LOG_ERROR(Render_OpenGL, "Unknown transferable shader cache entry kind={}, skipping",
- static_cast<u32>(kind));
+ ShaderDiskCacheEntry& entry = entries.emplace_back();
+ if (!entry.Load(file)) {
+ LOG_ERROR(Render_OpenGL, "Failed to load transferable raw entry, skipping");
return {};
}
}
is_usable = true;
- return {{std::move(raws), std::move(usages)}};
+ return {std::move(entries)};
}
-std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>
-ShaderDiskCacheOpenGL::LoadPrecompiled() {
+std::vector<ShaderDiskCachePrecompiled> ShaderDiskCacheOpenGL::LoadPrecompiled() {
if (!is_usable) {
return {};
}
- std::string path = GetPrecompiledPath();
- FileUtil::IOFile file(path, "rb");
+ FileUtil::IOFile file(GetPrecompiledPath(), "rb");
if (!file.IsOpen()) {
- LOG_INFO(Render_OpenGL, "No precompiled shader cache found for game with title id={}",
- GetTitleID());
+ LOG_INFO(Render_OpenGL, "No precompiled shader cache found");
return {};
}
- const auto result = LoadPrecompiledFile(file);
- if (!result) {
- LOG_INFO(Render_OpenGL,
- "Failed to load precompiled cache for game with title id={}, removing",
- GetTitleID());
- file.Close();
- InvalidatePrecompiled();
- return {};
+ if (const auto result = LoadPrecompiledFile(file)) {
+ return *result;
}
- return *result;
+
+ LOG_INFO(Render_OpenGL, "Failed to load precompiled cache");
+ file.Close();
+ InvalidatePrecompiled();
+ return {};
}
-std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
-ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
+std::optional<std::vector<ShaderDiskCachePrecompiled>> ShaderDiskCacheOpenGL::LoadPrecompiledFile(
+ FileUtil::IOFile& file) {
// Read compressed file from disk and decompress to virtual precompiled cache file
std::vector<u8> compressed(file.GetSize());
file.ReadBytes(compressed.data(), compressed.size());
@@ -275,58 +264,22 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
return {};
}
- ShaderDumpsMap dumps;
+ std::vector<ShaderDiskCachePrecompiled> entries;
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
- u32 num_keys{};
- u32 num_bound_samplers{};
- u32 num_bindless_samplers{};
- ShaderDiskCacheUsage usage;
- if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
- !LoadObjectFromPrecompiled(usage.variant) ||
- !LoadObjectFromPrecompiled(usage.bound_buffer) ||
- !LoadObjectFromPrecompiled(num_keys) ||
- !LoadObjectFromPrecompiled(num_bound_samplers) ||
- !LoadObjectFromPrecompiled(num_bindless_samplers)) {
- return {};
- }
- std::vector<ConstBufferKey> keys(num_keys);
- std::vector<BoundSamplerKey> bound_samplers(num_bound_samplers);
- std::vector<BindlessSamplerKey> bindless_samplers(num_bindless_samplers);
- if (!LoadArrayFromPrecompiled(keys.data(), keys.size()) ||
- !LoadArrayFromPrecompiled(bound_samplers.data(), bound_samplers.size()) !=
- bound_samplers.size() ||
- !LoadArrayFromPrecompiled(bindless_samplers.data(), bindless_samplers.size()) !=
- bindless_samplers.size()) {
- return {};
- }
- for (const auto& key : keys) {
- usage.keys.insert({{key.cbuf, key.offset}, key.value});
- }
- for (const auto& key : bound_samplers) {
- usage.bound_samplers.emplace(key.offset, key.sampler);
- }
- for (const auto& key : bindless_samplers) {
- usage.bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler});
- }
-
- ShaderDiskCacheDump dump;
- if (!LoadObjectFromPrecompiled(dump.binary_format)) {
- return {};
- }
-
- u32 binary_length{};
- if (!LoadObjectFromPrecompiled(binary_length)) {
+ u32 binary_size;
+ auto& entry = entries.emplace_back();
+ if (!LoadObjectFromPrecompiled(entry.unique_identifier) ||
+ !LoadObjectFromPrecompiled(entry.binary_format) ||
+ !LoadObjectFromPrecompiled(binary_size)) {
return {};
}
- dump.binary.resize(binary_length);
- if (!LoadArrayFromPrecompiled(dump.binary.data(), dump.binary.size())) {
+ entry.binary.resize(binary_size);
+ if (!LoadArrayFromPrecompiled(entry.binary.data(), entry.binary.size())) {
return {};
}
-
- dumps.emplace(std::move(usage), dump);
}
- return dumps;
+ return entries;
}
void ShaderDiskCacheOpenGL::InvalidateTransferable() {
@@ -346,13 +299,13 @@ void ShaderDiskCacheOpenGL::InvalidatePrecompiled() {
}
}
-void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
+void ShaderDiskCacheOpenGL::SaveEntry(const ShaderDiskCacheEntry& entry) {
if (!is_usable) {
return;
}
- const u64 id = entry.GetUniqueIdentifier();
- if (transferable.find(id) != transferable.end()) {
+ const u64 id = entry.unique_identifier;
+ if (stored_transferable.find(id) != stored_transferable.end()) {
// The shader already exists
return;
}
@@ -361,71 +314,17 @@ void ShaderDiskCacheOpenGL::SaveRaw(const ShaderDiskCacheRaw& entry) {
if (!file.IsOpen()) {
return;
}
- if (file.WriteObject(TransferableEntryKind::Raw) != 1 || !entry.Save(file)) {
+ if (!entry.Save(file)) {
LOG_ERROR(Render_OpenGL, "Failed to save raw transferable cache entry, removing");
file.Close();
InvalidateTransferable();
return;
}
- transferable.insert({id, {}});
-}
-void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
- if (!is_usable) {
- return;
- }
-
- const auto it = transferable.find(usage.unique_identifier);
- ASSERT_MSG(it != transferable.end(), "Saving shader usage without storing raw previously");
-
- auto& usages{it->second};
- if (usages.find(usage) != usages.end()) {
- // Skip this variant since the shader is already stored.
- return;
- }
- usages.insert(usage);
-
- FileUtil::IOFile file = AppendTransferableFile();
- if (!file.IsOpen())
- return;
- const auto Close = [&] {
- LOG_ERROR(Render_OpenGL, "Failed to save usage transferable cache entry, removing");
- file.Close();
- InvalidateTransferable();
- };
-
- if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
- file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
- file.WriteObject(usage.bound_buffer) != 1 ||
- file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
- file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
- file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
- Close();
- return;
- }
- for (const auto& [pair, value] : usage.keys) {
- const auto [cbuf, offset] = pair;
- if (file.WriteObject(ConstBufferKey{cbuf, offset, value}) != 1) {
- Close();
- return;
- }
- }
- for (const auto& [offset, sampler] : usage.bound_samplers) {
- if (file.WriteObject(BoundSamplerKey{offset, sampler}) != 1) {
- Close();
- return;
- }
- }
- for (const auto& [pair, sampler] : usage.bindless_samplers) {
- const auto [cbuf, offset] = pair;
- if (file.WriteObject(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
- Close();
- return;
- }
- }
+ stored_transferable.insert(id);
}
-void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint program) {
+void ShaderDiskCacheOpenGL::SavePrecompiled(u64 unique_identifier, GLuint program) {
if (!is_usable) {
return;
}
@@ -437,51 +336,19 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
SavePrecompiledHeaderToVirtualPrecompiledCache();
}
- GLint binary_length{};
+ GLint binary_length;
glGetProgramiv(program, GL_PROGRAM_BINARY_LENGTH, &binary_length);
- GLenum binary_format{};
+ GLenum binary_format;
std::vector<u8> binary(binary_length);
glGetProgramBinary(program, binary_length, nullptr, &binary_format, binary.data());
- const auto Close = [&] {
+ if (!SaveObjectToPrecompiled(unique_identifier) || !SaveObjectToPrecompiled(binary_format) ||
+ !SaveObjectToPrecompiled(static_cast<u32>(binary.size())) ||
+ !SaveArrayToPrecompiled(binary.data(), binary.size())) {
LOG_ERROR(Render_OpenGL, "Failed to save binary program file in shader={:016X}, removing",
- usage.unique_identifier);
+ unique_identifier);
InvalidatePrecompiled();
- };
-
- if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
- !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
- !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
- !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
- !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
- Close();
- return;
- }
- for (const auto& [pair, value] : usage.keys) {
- const auto [cbuf, offset] = pair;
- if (SaveObjectToPrecompiled(ConstBufferKey{cbuf, offset, value}) != 1) {
- Close();
- return;
- }
- }
- for (const auto& [offset, sampler] : usage.bound_samplers) {
- if (SaveObjectToPrecompiled(BoundSamplerKey{offset, sampler}) != 1) {
- Close();
- return;
- }
- }
- for (const auto& [pair, sampler] : usage.bindless_samplers) {
- const auto [cbuf, offset] = pair;
- if (SaveObjectToPrecompiled(BindlessSamplerKey{cbuf, offset, sampler}) != 1) {
- Close();
- return;
- }
- }
- if (!SaveObjectToPrecompiled(static_cast<u32>(binary_format)) ||
- !SaveObjectToPrecompiled(static_cast<u32>(binary_length)) ||
- !SaveArrayToPrecompiled(binary.data(), binary.size())) {
- Close();
}
}
@@ -534,7 +401,6 @@ void ShaderDiskCacheOpenGL::SaveVirtualPrecompiledFile() {
if (file.WriteBytes(compressed.data(), compressed.size()) != compressed.size()) {
LOG_ERROR(Render_OpenGL, "Failed to write precompiled cache version in path={}",
precompiled_path);
- return;
}
}
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index ef2371f6d..d5be52e40 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -19,8 +19,7 @@
#include "common/common_types.h"
#include "core/file_sys/vfs_vector.h"
#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
-#include "video_core/shader/const_buffer_locker.h"
+#include "video_core/shader/registry.h"
namespace Core {
class System;
@@ -32,139 +31,39 @@ class IOFile;
namespace OpenGL {
-struct ShaderDiskCacheUsage;
-struct ShaderDiskCacheDump;
-
using ProgramCode = std::vector<u64>;
-using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
-
-/// Describes the different variants a program can be compiled with.
-struct ProgramVariant final {
- ProgramVariant() = default;
-
- /// Graphics constructor.
- explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
- : primitive_mode{primitive_mode} {}
-
- /// Compute constructor.
- explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
- u32 local_memory_size) noexcept
- : block_x{block_x}, block_y{static_cast<u16>(block_y)}, block_z{static_cast<u16>(block_z)},
- shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
-
- // Graphics specific parameters.
- GLenum primitive_mode{};
-
- // Compute specific parameters.
- u32 block_x{};
- u16 block_y{};
- u16 block_z{};
- u32 shared_memory_size{};
- u32 local_memory_size{};
-
- bool operator==(const ProgramVariant& rhs) const noexcept {
- return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
- local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
- rhs.block_z, rhs.shared_memory_size,
- rhs.local_memory_size);
- }
-
- bool operator!=(const ProgramVariant& rhs) const noexcept {
- return !operator==(rhs);
- }
-};
-static_assert(std::is_trivially_copyable_v<ProgramVariant>);
-
-/// Describes how a shader is used.
-struct ShaderDiskCacheUsage {
- u64 unique_identifier{};
- ProgramVariant variant;
- u32 bound_buffer{};
- VideoCommon::Shader::KeyMap keys;
- VideoCommon::Shader::BoundSamplerMap bound_samplers;
- VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
-
- bool operator==(const ShaderDiskCacheUsage& rhs) const {
- return std::tie(unique_identifier, variant, keys, bound_samplers, bindless_samplers) ==
- std::tie(rhs.unique_identifier, rhs.variant, rhs.keys, rhs.bound_samplers,
- rhs.bindless_samplers);
- }
-
- bool operator!=(const ShaderDiskCacheUsage& rhs) const {
- return !operator==(rhs);
- }
-};
-
-} // namespace OpenGL
-
-namespace std {
-
-template <>
-struct hash<OpenGL::ProgramVariant> {
- std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
- return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
- static_cast<std::size_t>(variant.block_x) ^
- (static_cast<std::size_t>(variant.block_y) << 32) ^
- (static_cast<std::size_t>(variant.block_z) << 48) ^
- (static_cast<std::size_t>(variant.shared_memory_size) << 16) ^
- (static_cast<std::size_t>(variant.local_memory_size) << 36);
- }
-};
-
-template <>
-struct hash<OpenGL::ShaderDiskCacheUsage> {
- std::size_t operator()(const OpenGL::ShaderDiskCacheUsage& usage) const noexcept {
- return static_cast<std::size_t>(usage.unique_identifier) ^
- std::hash<OpenGL::ProgramVariant>{}(usage.variant);
- }
-};
-
-} // namespace std
-
-namespace OpenGL {
-/// Describes a shader how it's used by the guest GPU
-class ShaderDiskCacheRaw {
-public:
- explicit ShaderDiskCacheRaw(u64 unique_identifier, Tegra::Engines::ShaderType type,
- ProgramCode code, ProgramCode code_b = {});
- ShaderDiskCacheRaw();
- ~ShaderDiskCacheRaw();
+/// Describes a shader and how it's used by the guest GPU
+struct ShaderDiskCacheEntry {
+ ShaderDiskCacheEntry();
+ ~ShaderDiskCacheEntry();
bool Load(FileUtil::IOFile& file);
bool Save(FileUtil::IOFile& file) const;
- u64 GetUniqueIdentifier() const {
- return unique_identifier;
- }
-
bool HasProgramA() const {
return !code.empty() && !code_b.empty();
}
- Tegra::Engines::ShaderType GetType() const {
- return type;
- }
-
- const ProgramCode& GetCode() const {
- return code;
- }
-
- const ProgramCode& GetCodeB() const {
- return code_b;
- }
-
-private:
- u64 unique_identifier{};
Tegra::Engines::ShaderType type{};
ProgramCode code;
ProgramCode code_b;
+
+ u64 unique_identifier = 0;
+ std::optional<u32> texture_handler_size;
+ u32 bound_buffer = 0;
+ VideoCommon::Shader::GraphicsInfo graphics_info;
+ VideoCommon::Shader::ComputeInfo compute_info;
+ VideoCommon::Shader::KeyMap keys;
+ VideoCommon::Shader::BoundSamplerMap bound_samplers;
+ VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
};
/// Contains an OpenGL dumped binary program
-struct ShaderDiskCacheDump {
- GLenum binary_format{};
+struct ShaderDiskCachePrecompiled {
+ u64 unique_identifier = 0;
+ GLenum binary_format = 0;
std::vector<u8> binary;
};
@@ -174,11 +73,10 @@ public:
~ShaderDiskCacheOpenGL();
/// Loads transferable cache. If file has a old version or on failure, it deletes the file.
- std::optional<std::pair<std::vector<ShaderDiskCacheRaw>, std::vector<ShaderDiskCacheUsage>>>
- LoadTransferable();
+ std::optional<std::vector<ShaderDiskCacheEntry>> LoadTransferable();
/// Loads current game's precompiled cache. Invalidates on failure.
- std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> LoadPrecompiled();
+ std::vector<ShaderDiskCachePrecompiled> LoadPrecompiled();
/// Removes the transferable (and precompiled) cache file.
void InvalidateTransferable();
@@ -187,21 +85,18 @@ public:
void InvalidatePrecompiled();
/// Saves a raw dump to the transferable file. Checks for collisions.
- void SaveRaw(const ShaderDiskCacheRaw& entry);
-
- /// Saves shader usage to the transferable file. Does not check for collisions.
- void SaveUsage(const ShaderDiskCacheUsage& usage);
+ void SaveEntry(const ShaderDiskCacheEntry& entry);
/// Saves a dump entry to the precompiled file. Does not check for collisions.
- void SaveDump(const ShaderDiskCacheUsage& usage, GLuint program);
+ void SavePrecompiled(u64 unique_identifier, GLuint program);
/// Serializes virtual precompiled shader cache file to real file
void SaveVirtualPrecompiledFile();
private:
/// Loads the transferable cache. Returns empty on failure.
- std::optional<std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
- LoadPrecompiledFile(FileUtil::IOFile& file);
+ std::optional<std::vector<ShaderDiskCachePrecompiled>> LoadPrecompiledFile(
+ FileUtil::IOFile& file);
/// Opens current game's transferable file and write it's header if it doesn't exist
FileUtil::IOFile AppendTransferableFile() const;
@@ -270,7 +165,7 @@ private:
std::size_t precompiled_cache_virtual_file_offset = 0;
// Stored transferable shaders
- std::unordered_map<u64, std::unordered_set<ShaderDiskCacheUsage>> transferable;
+ std::unordered_set<u64> stored_transferable;
// The cache has been loaded at boot
bool is_usable{};
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp
deleted file mode 100644
index 34946fb47..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.cpp
+++ /dev/null
@@ -1,109 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <string>
-
-#include <fmt/format.h>
-
-#include "video_core/engines/maxwell_3d.h"
-#include "video_core/engines/shader_type.h"
-#include "video_core/renderer_opengl/gl_device.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/renderer_opengl/gl_shader_gen.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL::GLShader {
-
-using Tegra::Engines::Maxwell3D;
-using Tegra::Engines::ShaderType;
-using VideoCommon::Shader::CompileDepth;
-using VideoCommon::Shader::CompilerSettings;
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::ShaderIR;
-
-std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
- std::string out = GetCommonDeclarations();
- out += fmt::format(R"(
-layout (std140, binding = {}) uniform vs_config {{
- float y_direction;
-}};
-
-)",
- EmulationUniformBlockBinding);
- out += Decompile(device, ir, ShaderType::Vertex, "vertex");
- if (ir_b) {
- out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
- }
-
- out += R"(
-void main() {
- gl_Position = vec4(0.0f, 0.0f, 0.0f, 1.0f);
- execute_vertex();
-)";
- if (ir_b) {
- out += " execute_vertex_b();";
- }
- out += "}\n";
- return out;
-}
-
-std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
- std::string out = GetCommonDeclarations();
- out += fmt::format(R"(
-layout (std140, binding = {}) uniform gs_config {{
- float y_direction;
-}};
-
-)",
- EmulationUniformBlockBinding);
- out += Decompile(device, ir, ShaderType::Geometry, "geometry");
-
- out += R"(
-void main() {
- execute_geometry();
-}
-)";
- return out;
-}
-
-std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
- std::string out = GetCommonDeclarations();
- out += fmt::format(R"(
-layout (location = 0) out vec4 FragColor0;
-layout (location = 1) out vec4 FragColor1;
-layout (location = 2) out vec4 FragColor2;
-layout (location = 3) out vec4 FragColor3;
-layout (location = 4) out vec4 FragColor4;
-layout (location = 5) out vec4 FragColor5;
-layout (location = 6) out vec4 FragColor6;
-layout (location = 7) out vec4 FragColor7;
-
-layout (std140, binding = {}) uniform fs_config {{
- float y_direction;
-}};
-
-)",
- EmulationUniformBlockBinding);
- out += Decompile(device, ir, ShaderType::Fragment, "fragment");
-
- out += R"(
-void main() {
- execute_fragment();
-}
-)";
- return out;
-}
-
-std::string GenerateComputeShader(const Device& device, const ShaderIR& ir) {
- std::string out = GetCommonDeclarations();
- out += Decompile(device, ir, ShaderType::Compute, "compute");
- out += R"(
-void main() {
- execute_compute();
-}
-)";
- return out;
-}
-
-} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h
deleted file mode 100644
index cba2be9f9..000000000
--- a/src/video_core/renderer_opengl/gl_shader_gen.h
+++ /dev/null
@@ -1,34 +0,0 @@
-// Copyright 2018 yuzu Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <vector>
-
-#include "common/common_types.h"
-#include "video_core/renderer_opengl/gl_shader_decompiler.h"
-#include "video_core/shader/shader_ir.h"
-
-namespace OpenGL {
-class Device;
-}
-
-namespace OpenGL::GLShader {
-
-using VideoCommon::Shader::ProgramCode;
-using VideoCommon::Shader::ShaderIR;
-
-/// Generates the GLSL vertex shader program source code for the given VS program
-std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b);
-
-/// Generates the GLSL geometry shader program source code for the given GS program
-std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir);
-
-/// Generates the GLSL fragment shader program source code for the given FS program
-std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir);
-
-/// Generates the GLSL compute shader program source code for the given CS program
-std::string GenerateComputeShader(const Device& device, const ShaderIR& ir);
-
-} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.cpp b/src/video_core/renderer_opengl/gl_shader_manager.cpp
index 75d3fac04..9c7b0adbd 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_manager.cpp
@@ -2,45 +2,52 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <glad/glad.h>
+
#include "common/common_types.h"
#include "video_core/engines/maxwell_3d.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
namespace OpenGL::GLShader {
-using Tegra::Engines::Maxwell3D;
-
-ProgramManager::ProgramManager() {
- pipeline.Create();
-}
+ProgramManager::ProgramManager() = default;
ProgramManager::~ProgramManager() = default;
-void ProgramManager::ApplyTo(OpenGLState& state) {
- UpdatePipeline();
- state.draw.shader_program = 0;
- state.draw.program_pipeline = pipeline.handle;
+void ProgramManager::Create() {
+ graphics_pipeline.Create();
+ glBindProgramPipeline(graphics_pipeline.handle);
}
-void ProgramManager::UpdatePipeline() {
+void ProgramManager::BindGraphicsPipeline() {
+ if (!is_graphics_bound) {
+ is_graphics_bound = true;
+ glUseProgram(0);
+ }
+
// Avoid updating the pipeline when values have no changed
if (old_state == current_state) {
return;
}
// Workaround for AMD bug
- constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
- GL_FRAGMENT_SHADER_BIT};
- glUseProgramStages(pipeline.handle, all_used_stages, 0);
-
- glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
- glUseProgramStages(pipeline.handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
- glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
+ static constexpr GLenum all_used_stages{GL_VERTEX_SHADER_BIT | GL_GEOMETRY_SHADER_BIT |
+ GL_FRAGMENT_SHADER_BIT};
+ const GLuint handle = graphics_pipeline.handle;
+ glUseProgramStages(handle, all_used_stages, 0);
+ glUseProgramStages(handle, GL_VERTEX_SHADER_BIT, current_state.vertex_shader);
+ glUseProgramStages(handle, GL_GEOMETRY_SHADER_BIT, current_state.geometry_shader);
+ glUseProgramStages(handle, GL_FRAGMENT_SHADER_BIT, current_state.fragment_shader);
old_state = current_state;
}
-void MaxwellUniformData::SetFromRegs(const Maxwell3D& maxwell) {
+void ProgramManager::BindComputeShader(GLuint program) {
+ is_graphics_bound = false;
+ glUseProgram(program);
+}
+
+void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) {
const auto& regs = maxwell.regs;
// Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
diff --git a/src/video_core/renderer_opengl/gl_shader_manager.h b/src/video_core/renderer_opengl/gl_shader_manager.h
index 478c165ce..d2e47f2a9 100644
--- a/src/video_core/renderer_opengl/gl_shader_manager.h
+++ b/src/video_core/renderer_opengl/gl_shader_manager.h
@@ -9,7 +9,6 @@
#include <glad/glad.h>
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h"
namespace OpenGL::GLShader {
@@ -32,49 +31,47 @@ public:
explicit ProgramManager();
~ProgramManager();
- void ApplyTo(OpenGLState& state);
+ void Create();
- void UseProgrammableVertexShader(GLuint program) {
+ /// Updates the graphics pipeline and binds it.
+ void BindGraphicsPipeline();
+
+ /// Binds a compute shader.
+ void BindComputeShader(GLuint program);
+
+ void UseVertexShader(GLuint program) {
current_state.vertex_shader = program;
}
- void UseProgrammableGeometryShader(GLuint program) {
+ void UseGeometryShader(GLuint program) {
current_state.geometry_shader = program;
}
- void UseProgrammableFragmentShader(GLuint program) {
+ void UseFragmentShader(GLuint program) {
current_state.fragment_shader = program;
}
- void UseTrivialGeometryShader() {
- current_state.geometry_shader = 0;
- }
-
- void UseTrivialFragmentShader() {
- current_state.fragment_shader = 0;
- }
-
private:
struct PipelineState {
- bool operator==(const PipelineState& rhs) const {
+ bool operator==(const PipelineState& rhs) const noexcept {
return vertex_shader == rhs.vertex_shader && fragment_shader == rhs.fragment_shader &&
geometry_shader == rhs.geometry_shader;
}
- bool operator!=(const PipelineState& rhs) const {
+ bool operator!=(const PipelineState& rhs) const noexcept {
return !operator==(rhs);
}
- GLuint vertex_shader{};
- GLuint fragment_shader{};
- GLuint geometry_shader{};
+ GLuint vertex_shader = 0;
+ GLuint fragment_shader = 0;
+ GLuint geometry_shader = 0;
};
- void UpdatePipeline();
-
- OGLPipeline pipeline;
+ OGLPipeline graphics_pipeline;
+ OGLPipeline compute_pipeline;
PipelineState current_state;
PipelineState old_state;
+ bool is_graphics_bound = true;
};
} // namespace OpenGL::GLShader
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
deleted file mode 100644
index ab1f7983c..000000000
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ /dev/null
@@ -1,554 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#include <algorithm>
-#include <iterator>
-#include <glad/glad.h>
-#include "common/assert.h"
-#include "common/logging/log.h"
-#include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_state.h"
-
-MICROPROFILE_DEFINE(OpenGL_State, "OpenGL", "State Change", MP_RGB(192, 128, 128));
-
-namespace OpenGL {
-
-using Maxwell = Tegra::Engines::Maxwell3D::Regs;
-
-OpenGLState OpenGLState::cur_state;
-
-namespace {
-
-template <typename T>
-bool UpdateValue(T& current_value, const T new_value) {
- const bool changed = current_value != new_value;
- current_value = new_value;
- return changed;
-}
-
-template <typename T1, typename T2>
-bool UpdateTie(T1 current_value, const T2 new_value) {
- const bool changed = current_value != new_value;
- current_value = new_value;
- return changed;
-}
-
-template <typename T>
-std::optional<std::pair<GLuint, GLsizei>> UpdateArray(T& current_values, const T& new_values) {
- std::optional<std::size_t> first;
- std::size_t last;
- for (std::size_t i = 0; i < std::size(current_values); ++i) {
- if (!UpdateValue(current_values[i], new_values[i])) {
- continue;
- }
- if (!first) {
- first = i;
- }
- last = i;
- }
- if (!first) {
- return std::nullopt;
- }
- return std::make_pair(static_cast<GLuint>(*first), static_cast<GLsizei>(last - *first + 1));
-}
-
-void Enable(GLenum cap, bool enable) {
- if (enable) {
- glEnable(cap);
- } else {
- glDisable(cap);
- }
-}
-
-void Enable(GLenum cap, GLuint index, bool enable) {
- if (enable) {
- glEnablei(cap, index);
- } else {
- glDisablei(cap, index);
- }
-}
-
-void Enable(GLenum cap, bool& current_value, bool new_value) {
- if (UpdateValue(current_value, new_value)) {
- Enable(cap, new_value);
- }
-}
-
-void Enable(GLenum cap, GLuint index, bool& current_value, bool new_value) {
- if (UpdateValue(current_value, new_value)) {
- Enable(cap, index, new_value);
- }
-}
-
-} // Anonymous namespace
-
-OpenGLState::OpenGLState() = default;
-
-void OpenGLState::SetDefaultViewports() {
- viewports.fill(Viewport{});
-
- depth_clamp.far_plane = false;
- depth_clamp.near_plane = false;
-}
-
-void OpenGLState::ApplyFramebufferState() {
- if (UpdateValue(cur_state.draw.read_framebuffer, draw.read_framebuffer)) {
- glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
- }
- if (UpdateValue(cur_state.draw.draw_framebuffer, draw.draw_framebuffer)) {
- glBindFramebuffer(GL_DRAW_FRAMEBUFFER, draw.draw_framebuffer);
- }
-}
-
-void OpenGLState::ApplyVertexArrayState() {
- if (UpdateValue(cur_state.draw.vertex_array, draw.vertex_array)) {
- glBindVertexArray(draw.vertex_array);
- }
-}
-
-void OpenGLState::ApplyShaderProgram() {
- if (UpdateValue(cur_state.draw.shader_program, draw.shader_program)) {
- glUseProgram(draw.shader_program);
- }
-}
-
-void OpenGLState::ApplyProgramPipeline() {
- if (UpdateValue(cur_state.draw.program_pipeline, draw.program_pipeline)) {
- glBindProgramPipeline(draw.program_pipeline);
- }
-}
-
-void OpenGLState::ApplyClipDistances() {
- for (std::size_t i = 0; i < clip_distance.size(); ++i) {
- Enable(GL_CLIP_DISTANCE0 + static_cast<GLenum>(i), cur_state.clip_distance[i],
- clip_distance[i]);
- }
-}
-
-void OpenGLState::ApplyPointSize() {
- Enable(GL_PROGRAM_POINT_SIZE, cur_state.point.program_control, point.program_control);
- Enable(GL_POINT_SPRITE, cur_state.point.sprite, point.sprite);
- if (UpdateValue(cur_state.point.size, point.size)) {
- glPointSize(point.size);
- }
-}
-
-void OpenGLState::ApplyFragmentColorClamp() {
- if (UpdateValue(cur_state.fragment_color_clamp.enabled, fragment_color_clamp.enabled)) {
- glClampColor(GL_CLAMP_FRAGMENT_COLOR_ARB,
- fragment_color_clamp.enabled ? GL_TRUE : GL_FALSE);
- }
-}
-
-void OpenGLState::ApplyMultisample() {
- Enable(GL_SAMPLE_ALPHA_TO_COVERAGE, cur_state.multisample_control.alpha_to_coverage,
- multisample_control.alpha_to_coverage);
- Enable(GL_SAMPLE_ALPHA_TO_ONE, cur_state.multisample_control.alpha_to_one,
- multisample_control.alpha_to_one);
-}
-
-void OpenGLState::ApplyDepthClamp() {
- if (depth_clamp.far_plane == cur_state.depth_clamp.far_plane &&
- depth_clamp.near_plane == cur_state.depth_clamp.near_plane) {
- return;
- }
- cur_state.depth_clamp = depth_clamp;
-
- UNIMPLEMENTED_IF_MSG(depth_clamp.far_plane != depth_clamp.near_plane,
- "Unimplemented Depth Clamp Separation!");
-
- Enable(GL_DEPTH_CLAMP, depth_clamp.far_plane || depth_clamp.near_plane);
-}
-
-void OpenGLState::ApplySRgb() {
- if (cur_state.framebuffer_srgb.enabled == framebuffer_srgb.enabled)
- return;
- cur_state.framebuffer_srgb.enabled = framebuffer_srgb.enabled;
- if (framebuffer_srgb.enabled) {
- glEnable(GL_FRAMEBUFFER_SRGB);
- } else {
- glDisable(GL_FRAMEBUFFER_SRGB);
- }
-}
-
-void OpenGLState::ApplyCulling() {
- Enable(GL_CULL_FACE, cur_state.cull.enabled, cull.enabled);
-
- if (UpdateValue(cur_state.cull.mode, cull.mode)) {
- glCullFace(cull.mode);
- }
-
- if (UpdateValue(cur_state.cull.front_face, cull.front_face)) {
- glFrontFace(cull.front_face);
- }
-}
-
-void OpenGLState::ApplyRasterizerDiscard() {
- Enable(GL_RASTERIZER_DISCARD, cur_state.rasterizer_discard, rasterizer_discard);
-}
-
-void OpenGLState::ApplyColorMask() {
- if (!dirty.color_mask) {
- return;
- }
- dirty.color_mask = false;
-
- for (std::size_t i = 0; i < Maxwell::NumRenderTargets; ++i) {
- const auto& updated = color_mask[i];
- auto& current = cur_state.color_mask[i];
- if (updated.red_enabled != current.red_enabled ||
- updated.green_enabled != current.green_enabled ||
- updated.blue_enabled != current.blue_enabled ||
- updated.alpha_enabled != current.alpha_enabled) {
- current = updated;
- glColorMaski(static_cast<GLuint>(i), updated.red_enabled, updated.green_enabled,
- updated.blue_enabled, updated.alpha_enabled);
- }
- }
-}
-
-void OpenGLState::ApplyDepth() {
- Enable(GL_DEPTH_TEST, cur_state.depth.test_enabled, depth.test_enabled);
-
- if (cur_state.depth.test_func != depth.test_func) {
- cur_state.depth.test_func = depth.test_func;
- glDepthFunc(depth.test_func);
- }
-
- if (cur_state.depth.write_mask != depth.write_mask) {
- cur_state.depth.write_mask = depth.write_mask;
- glDepthMask(depth.write_mask);
- }
-}
-
-void OpenGLState::ApplyPrimitiveRestart() {
- Enable(GL_PRIMITIVE_RESTART, cur_state.primitive_restart.enabled, primitive_restart.enabled);
-
- if (cur_state.primitive_restart.index != primitive_restart.index) {
- cur_state.primitive_restart.index = primitive_restart.index;
- glPrimitiveRestartIndex(primitive_restart.index);
- }
-}
-
-void OpenGLState::ApplyStencilTest() {
- if (!dirty.stencil_state) {
- return;
- }
- dirty.stencil_state = false;
-
- Enable(GL_STENCIL_TEST, cur_state.stencil.test_enabled, stencil.test_enabled);
-
- const auto ConfigStencil = [](GLenum face, const auto& config, auto& current) {
- if (current.test_func != config.test_func || current.test_ref != config.test_ref ||
- current.test_mask != config.test_mask) {
- current.test_func = config.test_func;
- current.test_ref = config.test_ref;
- current.test_mask = config.test_mask;
- glStencilFuncSeparate(face, config.test_func, config.test_ref, config.test_mask);
- }
- if (current.action_depth_fail != config.action_depth_fail ||
- current.action_depth_pass != config.action_depth_pass ||
- current.action_stencil_fail != config.action_stencil_fail) {
- current.action_depth_fail = config.action_depth_fail;
- current.action_depth_pass = config.action_depth_pass;
- current.action_stencil_fail = config.action_stencil_fail;
- glStencilOpSeparate(face, config.action_stencil_fail, config.action_depth_fail,
- config.action_depth_pass);
- }
- if (current.write_mask != config.write_mask) {
- current.write_mask = config.write_mask;
- glStencilMaskSeparate(face, config.write_mask);
- }
- };
- ConfigStencil(GL_FRONT, stencil.front, cur_state.stencil.front);
- ConfigStencil(GL_BACK, stencil.back, cur_state.stencil.back);
-}
-
-void OpenGLState::ApplyViewport() {
- for (GLuint i = 0; i < static_cast<GLuint>(Maxwell::NumViewports); ++i) {
- const auto& updated = viewports[i];
- auto& current = cur_state.viewports[i];
-
- if (current.x != updated.x || current.y != updated.y || current.width != updated.width ||
- current.height != updated.height) {
- current.x = updated.x;
- current.y = updated.y;
- current.width = updated.width;
- current.height = updated.height;
- glViewportIndexedf(i, static_cast<GLfloat>(updated.x), static_cast<GLfloat>(updated.y),
- static_cast<GLfloat>(updated.width),
- static_cast<GLfloat>(updated.height));
- }
- if (current.depth_range_near != updated.depth_range_near ||
- current.depth_range_far != updated.depth_range_far) {
- current.depth_range_near = updated.depth_range_near;
- current.depth_range_far = updated.depth_range_far;
- glDepthRangeIndexed(i, updated.depth_range_near, updated.depth_range_far);
- }
-
- Enable(GL_SCISSOR_TEST, i, current.scissor.enabled, updated.scissor.enabled);
-
- if (current.scissor.x != updated.scissor.x || current.scissor.y != updated.scissor.y ||
- current.scissor.width != updated.scissor.width ||
- current.scissor.height != updated.scissor.height) {
- current.scissor.x = updated.scissor.x;
- current.scissor.y = updated.scissor.y;
- current.scissor.width = updated.scissor.width;
- current.scissor.height = updated.scissor.height;
- glScissorIndexed(i, updated.scissor.x, updated.scissor.y, updated.scissor.width,
- updated.scissor.height);
- }
- }
-}
-
-void OpenGLState::ApplyGlobalBlending() {
- const Blend& updated = blend[0];
- Blend& current = cur_state.blend[0];
-
- Enable(GL_BLEND, current.enabled, updated.enabled);
-
- if (current.src_rgb_func != updated.src_rgb_func ||
- current.dst_rgb_func != updated.dst_rgb_func || current.src_a_func != updated.src_a_func ||
- current.dst_a_func != updated.dst_a_func) {
- current.src_rgb_func = updated.src_rgb_func;
- current.dst_rgb_func = updated.dst_rgb_func;
- current.src_a_func = updated.src_a_func;
- current.dst_a_func = updated.dst_a_func;
- glBlendFuncSeparate(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
- updated.dst_a_func);
- }
-
- if (current.rgb_equation != updated.rgb_equation || current.a_equation != updated.a_equation) {
- current.rgb_equation = updated.rgb_equation;
- current.a_equation = updated.a_equation;
- glBlendEquationSeparate(updated.rgb_equation, updated.a_equation);
- }
-}
-
-void OpenGLState::ApplyTargetBlending(std::size_t target, bool force) {
- const Blend& updated = blend[target];
- Blend& current = cur_state.blend[target];
-
- if (current.enabled != updated.enabled || force) {
- current.enabled = updated.enabled;
- Enable(GL_BLEND, static_cast<GLuint>(target), updated.enabled);
- }
-
- if (UpdateTie(std::tie(current.src_rgb_func, current.dst_rgb_func, current.src_a_func,
- current.dst_a_func),
- std::tie(updated.src_rgb_func, updated.dst_rgb_func, updated.src_a_func,
- updated.dst_a_func))) {
- glBlendFuncSeparatei(static_cast<GLuint>(target), updated.src_rgb_func,
- updated.dst_rgb_func, updated.src_a_func, updated.dst_a_func);
- }
-
- if (UpdateTie(std::tie(current.rgb_equation, current.a_equation),
- std::tie(updated.rgb_equation, updated.a_equation))) {
- glBlendEquationSeparatei(static_cast<GLuint>(target), updated.rgb_equation,
- updated.a_equation);
- }
-}
-
-void OpenGLState::ApplyBlending() {
- if (!dirty.blend_state) {
- return;
- }
- dirty.blend_state = false;
-
- if (independant_blend.enabled) {
- const bool force = independant_blend.enabled != cur_state.independant_blend.enabled;
- for (std::size_t target = 0; target < Maxwell::NumRenderTargets; ++target) {
- ApplyTargetBlending(target, force);
- }
- } else {
- ApplyGlobalBlending();
- }
- cur_state.independant_blend.enabled = independant_blend.enabled;
-
- if (UpdateTie(
- std::tie(cur_state.blend_color.red, cur_state.blend_color.green,
- cur_state.blend_color.blue, cur_state.blend_color.alpha),
- std::tie(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha))) {
- glBlendColor(blend_color.red, blend_color.green, blend_color.blue, blend_color.alpha);
- }
-}
-
-void OpenGLState::ApplyLogicOp() {
- Enable(GL_COLOR_LOGIC_OP, cur_state.logic_op.enabled, logic_op.enabled);
-
- if (UpdateValue(cur_state.logic_op.operation, logic_op.operation)) {
- glLogicOp(logic_op.operation);
- }
-}
-
-void OpenGLState::ApplyPolygonOffset() {
- if (!dirty.polygon_offset) {
- return;
- }
- dirty.polygon_offset = false;
-
- Enable(GL_POLYGON_OFFSET_FILL, cur_state.polygon_offset.fill_enable,
- polygon_offset.fill_enable);
- Enable(GL_POLYGON_OFFSET_LINE, cur_state.polygon_offset.line_enable,
- polygon_offset.line_enable);
- Enable(GL_POLYGON_OFFSET_POINT, cur_state.polygon_offset.point_enable,
- polygon_offset.point_enable);
-
- if (UpdateTie(std::tie(cur_state.polygon_offset.factor, cur_state.polygon_offset.units,
- cur_state.polygon_offset.clamp),
- std::tie(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp))) {
- if (GLAD_GL_EXT_polygon_offset_clamp && polygon_offset.clamp != 0) {
- glPolygonOffsetClamp(polygon_offset.factor, polygon_offset.units, polygon_offset.clamp);
- } else {
- UNIMPLEMENTED_IF_MSG(polygon_offset.clamp != 0,
- "Unimplemented Depth polygon offset clamp.");
- glPolygonOffset(polygon_offset.factor, polygon_offset.units);
- }
- }
-}
-
-void OpenGLState::ApplyAlphaTest() {
- Enable(GL_ALPHA_TEST, cur_state.alpha_test.enabled, alpha_test.enabled);
- if (UpdateTie(std::tie(cur_state.alpha_test.func, cur_state.alpha_test.ref),
- std::tie(alpha_test.func, alpha_test.ref))) {
- glAlphaFunc(alpha_test.func, alpha_test.ref);
- }
-}
-
-void OpenGLState::ApplyClipControl() {
- if (UpdateTie(std::tie(cur_state.clip_control.origin, cur_state.clip_control.depth_mode),
- std::tie(clip_control.origin, clip_control.depth_mode))) {
- glClipControl(clip_control.origin, clip_control.depth_mode);
- }
-}
-
-void OpenGLState::ApplyTextures() {
- const std::size_t size = std::size(textures);
- for (std::size_t i = 0; i < size; ++i) {
- if (UpdateValue(cur_state.textures[i], textures[i])) {
- // BindTextureUnit doesn't support binding null textures, skip those binds.
- // TODO(Rodrigo): Stop using null textures
- if (textures[i] != 0) {
- glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
- }
- }
- }
-}
-
-void OpenGLState::ApplySamplers() {
- const std::size_t size = std::size(samplers);
- for (std::size_t i = 0; i < size; ++i) {
- if (UpdateValue(cur_state.samplers[i], samplers[i])) {
- glBindSampler(static_cast<GLuint>(i), samplers[i]);
- }
- }
-}
-
-void OpenGLState::ApplyImages() {
- if (const auto update = UpdateArray(cur_state.images, images)) {
- glBindImageTextures(update->first, update->second, images.data() + update->first);
- }
-}
-
-void OpenGLState::Apply() {
- MICROPROFILE_SCOPE(OpenGL_State);
- ApplyFramebufferState();
- ApplyVertexArrayState();
- ApplyShaderProgram();
- ApplyProgramPipeline();
- ApplyClipDistances();
- ApplyPointSize();
- ApplyFragmentColorClamp();
- ApplyMultisample();
- ApplyRasterizerDiscard();
- ApplyColorMask();
- ApplyDepthClamp();
- ApplyViewport();
- ApplyStencilTest();
- ApplySRgb();
- ApplyCulling();
- ApplyDepth();
- ApplyPrimitiveRestart();
- ApplyBlending();
- ApplyLogicOp();
- ApplyTextures();
- ApplySamplers();
- ApplyImages();
- ApplyPolygonOffset();
- ApplyAlphaTest();
- ApplyClipControl();
-}
-
-void OpenGLState::EmulateViewportWithScissor() {
- auto& current = viewports[0];
- if (current.scissor.enabled) {
- const GLint left = std::max(current.x, current.scissor.x);
- const GLint right =
- std::max(current.x + current.width, current.scissor.x + current.scissor.width);
- const GLint bottom = std::max(current.y, current.scissor.y);
- const GLint top =
- std::max(current.y + current.height, current.scissor.y + current.scissor.height);
- current.scissor.x = std::max(left, 0);
- current.scissor.y = std::max(bottom, 0);
- current.scissor.width = std::max(right - left, 0);
- current.scissor.height = std::max(top - bottom, 0);
- } else {
- current.scissor.enabled = true;
- current.scissor.x = current.x;
- current.scissor.y = current.y;
- current.scissor.width = current.width;
- current.scissor.height = current.height;
- }
-}
-
-OpenGLState& OpenGLState::UnbindTexture(GLuint handle) {
- for (auto& texture : textures) {
- if (texture == handle) {
- texture = 0;
- }
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetSampler(GLuint handle) {
- for (auto& sampler : samplers) {
- if (sampler == handle) {
- sampler = 0;
- }
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetProgram(GLuint handle) {
- if (draw.shader_program == handle) {
- draw.shader_program = 0;
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetPipeline(GLuint handle) {
- if (draw.program_pipeline == handle) {
- draw.program_pipeline = 0;
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetVertexArray(GLuint handle) {
- if (draw.vertex_array == handle) {
- draw.vertex_array = 0;
- }
- return *this;
-}
-
-OpenGLState& OpenGLState::ResetFramebuffer(GLuint handle) {
- if (draw.read_framebuffer == handle) {
- draw.read_framebuffer = 0;
- }
- if (draw.draw_framebuffer == handle) {
- draw.draw_framebuffer = 0;
- }
- return *this;
-}
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
deleted file mode 100644
index 4953eeda2..000000000
--- a/src/video_core/renderer_opengl/gl_state.h
+++ /dev/null
@@ -1,247 +0,0 @@
-// Copyright 2015 Citra Emulator Project
-// Licensed under GPLv2 or any later version
-// Refer to the license.txt file included.
-
-#pragma once
-
-#include <array>
-#include <type_traits>
-#include <glad/glad.h>
-#include "video_core/engines/maxwell_3d.h"
-
-namespace OpenGL {
-
-class OpenGLState {
-public:
- struct {
- bool enabled = false; // GL_FRAMEBUFFER_SRGB
- } framebuffer_srgb;
-
- struct {
- bool alpha_to_coverage = false; // GL_ALPHA_TO_COVERAGE
- bool alpha_to_one = false; // GL_ALPHA_TO_ONE
- } multisample_control;
-
- struct {
- bool enabled = false; // GL_CLAMP_FRAGMENT_COLOR_ARB
- } fragment_color_clamp;
-
- struct {
- bool far_plane = false;
- bool near_plane = false;
- } depth_clamp; // GL_DEPTH_CLAMP
-
- struct {
- bool enabled = false; // GL_CULL_FACE
- GLenum mode = GL_BACK; // GL_CULL_FACE_MODE
- GLenum front_face = GL_CCW; // GL_FRONT_FACE
- } cull;
-
- struct {
- bool test_enabled = false; // GL_DEPTH_TEST
- GLboolean write_mask = GL_TRUE; // GL_DEPTH_WRITEMASK
- GLenum test_func = GL_LESS; // GL_DEPTH_FUNC
- } depth;
-
- struct {
- bool enabled = false;
- GLuint index = 0;
- } primitive_restart; // GL_PRIMITIVE_RESTART
-
- bool rasterizer_discard = false; // GL_RASTERIZER_DISCARD
-
- struct ColorMask {
- GLboolean red_enabled = GL_TRUE;
- GLboolean green_enabled = GL_TRUE;
- GLboolean blue_enabled = GL_TRUE;
- GLboolean alpha_enabled = GL_TRUE;
- };
- std::array<ColorMask, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets>
- color_mask; // GL_COLOR_WRITEMASK
-
- struct {
- bool test_enabled = false; // GL_STENCIL_TEST
- struct {
- GLenum test_func = GL_ALWAYS; // GL_STENCIL_FUNC
- GLint test_ref = 0; // GL_STENCIL_REF
- GLuint test_mask = 0xFFFFFFFF; // GL_STENCIL_VALUE_MASK
- GLuint write_mask = 0xFFFFFFFF; // GL_STENCIL_WRITEMASK
- GLenum action_stencil_fail = GL_KEEP; // GL_STENCIL_FAIL
- GLenum action_depth_fail = GL_KEEP; // GL_STENCIL_PASS_DEPTH_FAIL
- GLenum action_depth_pass = GL_KEEP; // GL_STENCIL_PASS_DEPTH_PASS
- } front, back;
- } stencil;
-
- struct Blend {
- bool enabled = false; // GL_BLEND
- GLenum rgb_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_RGB
- GLenum a_equation = GL_FUNC_ADD; // GL_BLEND_EQUATION_ALPHA
- GLenum src_rgb_func = GL_ONE; // GL_BLEND_SRC_RGB
- GLenum dst_rgb_func = GL_ZERO; // GL_BLEND_DST_RGB
- GLenum src_a_func = GL_ONE; // GL_BLEND_SRC_ALPHA
- GLenum dst_a_func = GL_ZERO; // GL_BLEND_DST_ALPHA
- };
- std::array<Blend, Tegra::Engines::Maxwell3D::Regs::NumRenderTargets> blend;
-
- struct {
- bool enabled = false;
- } independant_blend;
-
- struct {
- GLclampf red = 0.0f;
- GLclampf green = 0.0f;
- GLclampf blue = 0.0f;
- GLclampf alpha = 0.0f;
- } blend_color; // GL_BLEND_COLOR
-
- struct {
- bool enabled = false; // GL_LOGIC_OP_MODE
- GLenum operation = GL_COPY;
- } logic_op;
-
- static constexpr std::size_t NumSamplers = 32 * 5;
- static constexpr std::size_t NumImages = 8 * 5;
- std::array<GLuint, NumSamplers> textures = {};
- std::array<GLuint, NumSamplers> samplers = {};
- std::array<GLuint, NumImages> images = {};
-
- struct {
- GLuint read_framebuffer = 0; // GL_READ_FRAMEBUFFER_BINDING
- GLuint draw_framebuffer = 0; // GL_DRAW_FRAMEBUFFER_BINDING
- GLuint vertex_array = 0; // GL_VERTEX_ARRAY_BINDING
- GLuint shader_program = 0; // GL_CURRENT_PROGRAM
- GLuint program_pipeline = 0; // GL_PROGRAM_PIPELINE_BINDING
- } draw;
-
- struct Viewport {
- GLint x = 0;
- GLint y = 0;
- GLint width = 0;
- GLint height = 0;
- GLfloat depth_range_near = 0.0f; // GL_DEPTH_RANGE
- GLfloat depth_range_far = 1.0f; // GL_DEPTH_RANGE
- struct {
- bool enabled = false; // GL_SCISSOR_TEST
- GLint x = 0;
- GLint y = 0;
- GLsizei width = 0;
- GLsizei height = 0;
- } scissor;
- };
- std::array<Viewport, Tegra::Engines::Maxwell3D::Regs::NumViewports> viewports;
-
- struct {
- bool program_control = false; // GL_PROGRAM_POINT_SIZE
- bool sprite = false; // GL_POINT_SPRITE
- GLfloat size = 1.0f; // GL_POINT_SIZE
- } point;
-
- struct {
- bool point_enable = false;
- bool line_enable = false;
- bool fill_enable = false;
- GLfloat units = 0.0f;
- GLfloat factor = 0.0f;
- GLfloat clamp = 0.0f;
- } polygon_offset;
-
- struct {
- bool enabled = false; // GL_ALPHA_TEST
- GLenum func = GL_ALWAYS; // GL_ALPHA_TEST_FUNC
- GLfloat ref = 0.0f; // GL_ALPHA_TEST_REF
- } alpha_test;
-
- std::array<bool, 8> clip_distance = {}; // GL_CLIP_DISTANCE
-
- struct {
- GLenum origin = GL_LOWER_LEFT;
- GLenum depth_mode = GL_NEGATIVE_ONE_TO_ONE;
- } clip_control;
-
- OpenGLState();
-
- /// Get the currently active OpenGL state
- static OpenGLState GetCurState() {
- return cur_state;
- }
-
- void SetDefaultViewports();
- /// Apply this state as the current OpenGL state
- void Apply();
-
- void ApplyFramebufferState();
- void ApplyVertexArrayState();
- void ApplyShaderProgram();
- void ApplyProgramPipeline();
- void ApplyClipDistances();
- void ApplyPointSize();
- void ApplyFragmentColorClamp();
- void ApplyMultisample();
- void ApplySRgb();
- void ApplyCulling();
- void ApplyRasterizerDiscard();
- void ApplyColorMask();
- void ApplyDepth();
- void ApplyPrimitiveRestart();
- void ApplyStencilTest();
- void ApplyViewport();
- void ApplyTargetBlending(std::size_t target, bool force);
- void ApplyGlobalBlending();
- void ApplyBlending();
- void ApplyLogicOp();
- void ApplyTextures();
- void ApplySamplers();
- void ApplyImages();
- void ApplyDepthClamp();
- void ApplyPolygonOffset();
- void ApplyAlphaTest();
- void ApplyClipControl();
-
- /// Resets any references to the given resource
- OpenGLState& UnbindTexture(GLuint handle);
- OpenGLState& ResetSampler(GLuint handle);
- OpenGLState& ResetProgram(GLuint handle);
- OpenGLState& ResetPipeline(GLuint handle);
- OpenGLState& ResetVertexArray(GLuint handle);
- OpenGLState& ResetFramebuffer(GLuint handle);
-
- /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
- void EmulateViewportWithScissor();
-
- void MarkDirtyBlendState() {
- dirty.blend_state = true;
- }
-
- void MarkDirtyStencilState() {
- dirty.stencil_state = true;
- }
-
- void MarkDirtyPolygonOffset() {
- dirty.polygon_offset = true;
- }
-
- void MarkDirtyColorMask() {
- dirty.color_mask = true;
- }
-
- void AllDirty() {
- dirty.blend_state = true;
- dirty.stencil_state = true;
- dirty.polygon_offset = true;
- dirty.color_mask = true;
- }
-
-private:
- static OpenGLState cur_state;
-
- struct {
- bool blend_state;
- bool stencil_state;
- bool viewport_state;
- bool polygon_offset;
- bool color_mask;
- } dirty{};
-};
-static_assert(std::is_trivially_copyable_v<OpenGLState>);
-
-} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.cpp b/src/video_core/renderer_opengl/gl_state_tracker.cpp
new file mode 100644
index 000000000..255ac3147
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_state_tracker.cpp
@@ -0,0 +1,247 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <algorithm>
+#include <array>
+#include <cstddef>
+
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/engines/maxwell_3d.h"
+#include "video_core/gpu.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
+
+#define OFF(field_name) MAXWELL3D_REG_INDEX(field_name)
+#define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / sizeof(u32))
+
+namespace OpenGL {
+
+namespace {
+
+using namespace Dirty;
+using namespace VideoCommon::Dirty;
+using Tegra::Engines::Maxwell3D;
+using Regs = Maxwell3D::Regs;
+using Tables = Maxwell3D::DirtyState::Tables;
+using Table = Maxwell3D::DirtyState::Table;
+
+void SetupDirtyColorMasks(Tables& tables) {
+ tables[0][OFF(color_mask_common)] = ColorMaskCommon;
+ for (std::size_t rt = 0; rt < Regs::NumRenderTargets; ++rt) {
+ const std::size_t offset = OFF(color_mask) + rt * NUM(color_mask[0]);
+ FillBlock(tables[0], offset, NUM(color_mask[0]), ColorMask0 + rt);
+ }
+
+ FillBlock(tables[1], OFF(color_mask), NUM(color_mask), ColorMasks);
+}
+
+void SetupDirtyVertexArrays(Tables& tables) {
+ static constexpr std::size_t num_array = 3;
+ static constexpr std::size_t instance_base_offset = 3;
+ for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
+ const std::size_t array_offset = OFF(vertex_array) + i * NUM(vertex_array[0]);
+ const std::size_t limit_offset = OFF(vertex_array_limit) + i * NUM(vertex_array_limit[0]);
+
+ FillBlock(tables, array_offset, num_array, VertexBuffer0 + i, VertexBuffers);
+ FillBlock(tables, limit_offset, NUM(vertex_array_limit), VertexBuffer0 + i, VertexBuffers);
+
+ const std::size_t instance_array_offset = array_offset + instance_base_offset;
+ tables[0][instance_array_offset] = static_cast<u8>(VertexInstance0 + i);
+ tables[1][instance_array_offset] = VertexInstances;
+
+ const std::size_t instance_offset = OFF(instanced_arrays) + i;
+ tables[0][instance_offset] = static_cast<u8>(VertexInstance0 + i);
+ tables[1][instance_offset] = VertexInstances;
+ }
+}
+
+void SetupDirtyVertexFormat(Tables& tables) {
+ for (std::size_t i = 0; i < Regs::NumVertexAttributes; ++i) {
+ const std::size_t offset = OFF(vertex_attrib_format) + i * NUM(vertex_attrib_format[0]);
+ FillBlock(tables[0], offset, NUM(vertex_attrib_format[0]), VertexFormat0 + i);
+ }
+
+ FillBlock(tables[1], OFF(vertex_attrib_format), Regs::NumVertexAttributes, VertexFormats);
+}
+
+void SetupDirtyViewports(Tables& tables) {
+ for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
+ const std::size_t transf_offset = OFF(viewport_transform) + i * NUM(viewport_transform[0]);
+ const std::size_t viewport_offset = OFF(viewports) + i * NUM(viewports[0]);
+
+ FillBlock(tables[0], transf_offset, NUM(viewport_transform[0]), Viewport0 + i);
+ FillBlock(tables[0], viewport_offset, NUM(viewports[0]), Viewport0 + i);
+ }
+
+ FillBlock(tables[1], OFF(viewport_transform), NUM(viewport_transform), Viewports);
+ FillBlock(tables[1], OFF(viewports), NUM(viewports), Viewports);
+
+ tables[0][OFF(viewport_transform_enabled)] = ViewportTransform;
+ tables[1][OFF(viewport_transform_enabled)] = Viewports;
+}
+
+void SetupDirtyScissors(Tables& tables) {
+ for (std::size_t i = 0; i < Regs::NumViewports; ++i) {
+ const std::size_t offset = OFF(scissor_test) + i * NUM(scissor_test[0]);
+ FillBlock(tables[0], offset, NUM(scissor_test[0]), Scissor0 + i);
+ }
+ FillBlock(tables[1], OFF(scissor_test), NUM(scissor_test), Scissors);
+}
+
+void SetupDirtyShaders(Tables& tables) {
+ FillBlock(tables[0], OFF(shader_config[0]), NUM(shader_config[0]) * Regs::MaxShaderProgram,
+ Shaders);
+}
+
+void SetupDirtyPolygonModes(Tables& tables) {
+ tables[0][OFF(polygon_mode_front)] = PolygonModeFront;
+ tables[0][OFF(polygon_mode_back)] = PolygonModeBack;
+
+ tables[1][OFF(polygon_mode_front)] = PolygonModes;
+ tables[1][OFF(polygon_mode_back)] = PolygonModes;
+ tables[0][OFF(fill_rectangle)] = PolygonModes;
+}
+
+void SetupDirtyDepthTest(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(depth_test_enable)] = DepthTest;
+ table[OFF(depth_write_enabled)] = DepthMask;
+ table[OFF(depth_test_func)] = DepthTest;
+}
+
+void SetupDirtyStencilTest(Tables& tables) {
+ static constexpr std::array offsets = {
+ OFF(stencil_enable), OFF(stencil_front_func_func), OFF(stencil_front_func_ref),
+ OFF(stencil_front_func_mask), OFF(stencil_front_op_fail), OFF(stencil_front_op_zfail),
+ OFF(stencil_front_op_zpass), OFF(stencil_front_mask), OFF(stencil_two_side_enable),
+ OFF(stencil_back_func_func), OFF(stencil_back_func_ref), OFF(stencil_back_func_mask),
+ OFF(stencil_back_op_fail), OFF(stencil_back_op_zfail), OFF(stencil_back_op_zpass),
+ OFF(stencil_back_mask)};
+ for (const auto offset : offsets) {
+ tables[0][offset] = StencilTest;
+ }
+}
+
+void SetupDirtyAlphaTest(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(alpha_test_ref)] = AlphaTest;
+ table[OFF(alpha_test_func)] = AlphaTest;
+ table[OFF(alpha_test_enabled)] = AlphaTest;
+}
+
+void SetupDirtyBlend(Tables& tables) {
+ FillBlock(tables[0], OFF(blend_color), NUM(blend_color), BlendColor);
+
+ tables[0][OFF(independent_blend_enable)] = BlendIndependentEnabled;
+
+ for (std::size_t i = 0; i < Regs::NumRenderTargets; ++i) {
+ const std::size_t offset = OFF(independent_blend) + i * NUM(independent_blend[0]);
+ FillBlock(tables[0], offset, NUM(independent_blend[0]), BlendState0 + i);
+
+ tables[0][OFF(blend.enable) + i] = static_cast<u8>(BlendState0 + i);
+ }
+ FillBlock(tables[1], OFF(independent_blend), NUM(independent_blend), BlendStates);
+ FillBlock(tables[1], OFF(blend), NUM(blend), BlendStates);
+}
+
+void SetupDirtyPrimitiveRestart(Tables& tables) {
+ FillBlock(tables[0], OFF(primitive_restart), NUM(primitive_restart), PrimitiveRestart);
+}
+
+void SetupDirtyPolygonOffset(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(polygon_offset_fill_enable)] = PolygonOffset;
+ table[OFF(polygon_offset_line_enable)] = PolygonOffset;
+ table[OFF(polygon_offset_point_enable)] = PolygonOffset;
+ table[OFF(polygon_offset_factor)] = PolygonOffset;
+ table[OFF(polygon_offset_units)] = PolygonOffset;
+ table[OFF(polygon_offset_clamp)] = PolygonOffset;
+}
+
+void SetupDirtyMultisampleControl(Tables& tables) {
+ FillBlock(tables[0], OFF(multisample_control), NUM(multisample_control), MultisampleControl);
+}
+
+void SetupDirtyRasterizeEnable(Tables& tables) {
+ tables[0][OFF(rasterize_enable)] = RasterizeEnable;
+}
+
+void SetupDirtyFramebufferSRGB(Tables& tables) {
+ tables[0][OFF(framebuffer_srgb)] = FramebufferSRGB;
+}
+
+void SetupDirtyLogicOp(Tables& tables) {
+ FillBlock(tables[0], OFF(logic_op), NUM(logic_op), LogicOp);
+}
+
+void SetupDirtyFragmentClampColor(Tables& tables) {
+ tables[0][OFF(frag_color_clamp)] = FragmentClampColor;
+}
+
+void SetupDirtyPointSize(Tables& tables) {
+ tables[0][OFF(vp_point_size)] = PointSize;
+ tables[0][OFF(point_size)] = PointSize;
+ tables[0][OFF(point_sprite_enable)] = PointSize;
+}
+
+void SetupDirtyClipControl(Tables& tables) {
+ auto& table = tables[0];
+ table[OFF(screen_y_control)] = ClipControl;
+ table[OFF(depth_mode)] = ClipControl;
+}
+
+void SetupDirtyDepthClampEnabled(Tables& tables) {
+ tables[0][OFF(view_volume_clip_control)] = DepthClampEnabled;
+}
+
+void SetupDirtyMisc(Tables& tables) {
+ auto& table = tables[0];
+
+ table[OFF(clip_distance_enabled)] = ClipDistances;
+
+ table[OFF(front_face)] = FrontFace;
+
+ table[OFF(cull_test_enabled)] = CullTest;
+ table[OFF(cull_face)] = CullTest;
+}
+
+} // Anonymous namespace
+
+StateTracker::StateTracker(Core::System& system) : system{system} {}
+
+void StateTracker::Initialize() {
+ auto& dirty = system.GPU().Maxwell3D().dirty;
+ auto& tables = dirty.tables;
+ SetupDirtyRenderTargets(tables);
+ SetupDirtyColorMasks(tables);
+ SetupDirtyViewports(tables);
+ SetupDirtyScissors(tables);
+ SetupDirtyVertexArrays(tables);
+ SetupDirtyVertexFormat(tables);
+ SetupDirtyShaders(tables);
+ SetupDirtyPolygonModes(tables);
+ SetupDirtyDepthTest(tables);
+ SetupDirtyStencilTest(tables);
+ SetupDirtyAlphaTest(tables);
+ SetupDirtyBlend(tables);
+ SetupDirtyPrimitiveRestart(tables);
+ SetupDirtyPolygonOffset(tables);
+ SetupDirtyMultisampleControl(tables);
+ SetupDirtyRasterizeEnable(tables);
+ SetupDirtyFramebufferSRGB(tables);
+ SetupDirtyLogicOp(tables);
+ SetupDirtyFragmentClampColor(tables);
+ SetupDirtyPointSize(tables);
+ SetupDirtyClipControl(tables);
+ SetupDirtyDepthClampEnabled(tables);
+ SetupDirtyMisc(tables);
+
+ auto& store = dirty.on_write_stores;
+ store[VertexBuffers] = true;
+ for (std::size_t i = 0; i < Regs::NumVertexArrays; ++i) {
+ store[VertexBuffer0 + i] = true;
+ }
+}
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_state_tracker.h b/src/video_core/renderer_opengl/gl_state_tracker.h
new file mode 100644
index 000000000..b882d75c3
--- /dev/null
+++ b/src/video_core/renderer_opengl/gl_state_tracker.h
@@ -0,0 +1,215 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <limits>
+
+#include <glad/glad.h>
+
+#include "common/common_types.h"
+#include "core/core.h"
+#include "video_core/dirty_flags.h"
+#include "video_core/engines/maxwell_3d.h"
+
+namespace Core {
+class System;
+}
+
+namespace OpenGL {
+
+namespace Dirty {
+
+enum : u8 {
+ First = VideoCommon::Dirty::LastCommonEntry,
+
+ VertexFormats,
+ VertexFormat0,
+ VertexFormat31 = VertexFormat0 + 31,
+
+ VertexBuffers,
+ VertexBuffer0,
+ VertexBuffer31 = VertexBuffer0 + 31,
+
+ VertexInstances,
+ VertexInstance0,
+ VertexInstance31 = VertexInstance0 + 31,
+
+ ViewportTransform,
+ Viewports,
+ Viewport0,
+ Viewport15 = Viewport0 + 15,
+
+ Scissors,
+ Scissor0,
+ Scissor15 = Scissor0 + 15,
+
+ ColorMaskCommon,
+ ColorMasks,
+ ColorMask0,
+ ColorMask7 = ColorMask0 + 7,
+
+ BlendColor,
+ BlendIndependentEnabled,
+ BlendStates,
+ BlendState0,
+ BlendState7 = BlendState0 + 7,
+
+ Shaders,
+ ClipDistances,
+
+ PolygonModes,
+ PolygonModeFront,
+ PolygonModeBack,
+
+ ColorMask,
+ FrontFace,
+ CullTest,
+ DepthMask,
+ DepthTest,
+ StencilTest,
+ AlphaTest,
+ PrimitiveRestart,
+ PolygonOffset,
+ MultisampleControl,
+ RasterizeEnable,
+ FramebufferSRGB,
+ LogicOp,
+ FragmentClampColor,
+ PointSize,
+ ClipControl,
+ DepthClampEnabled,
+
+ Last
+};
+static_assert(Last <= std::numeric_limits<u8>::max());
+
+} // namespace Dirty
+
+class StateTracker {
+public:
+ explicit StateTracker(Core::System& system);
+
+ void Initialize();
+
+ void BindIndexBuffer(GLuint new_index_buffer) {
+ if (index_buffer == new_index_buffer) {
+ return;
+ }
+ index_buffer = new_index_buffer;
+ glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, new_index_buffer);
+ }
+
+ void NotifyScreenDrawVertexArray() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::VertexFormats] = true;
+ flags[OpenGL::Dirty::VertexFormat0 + 0] = true;
+ flags[OpenGL::Dirty::VertexFormat0 + 1] = true;
+
+ flags[OpenGL::Dirty::VertexBuffers] = true;
+ flags[OpenGL::Dirty::VertexBuffer0] = true;
+
+ flags[OpenGL::Dirty::VertexInstances] = true;
+ flags[OpenGL::Dirty::VertexInstance0 + 0] = true;
+ flags[OpenGL::Dirty::VertexInstance0 + 1] = true;
+ }
+
+ void NotifyPolygonModes() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::PolygonModes] = true;
+ flags[OpenGL::Dirty::PolygonModeFront] = true;
+ flags[OpenGL::Dirty::PolygonModeBack] = true;
+ }
+
+ void NotifyViewport0() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::Viewports] = true;
+ flags[OpenGL::Dirty::Viewport0] = true;
+ }
+
+ void NotifyScissor0() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::Scissors] = true;
+ flags[OpenGL::Dirty::Scissor0] = true;
+ }
+
+ void NotifyColorMask0() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::ColorMasks] = true;
+ flags[OpenGL::Dirty::ColorMask0] = true;
+ }
+
+ void NotifyBlend0() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::BlendStates] = true;
+ flags[OpenGL::Dirty::BlendState0] = true;
+ }
+
+ void NotifyFramebuffer() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[VideoCommon::Dirty::RenderTargets] = true;
+ }
+
+ void NotifyFrontFace() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::FrontFace] = true;
+ }
+
+ void NotifyCullTest() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::CullTest] = true;
+ }
+
+ void NotifyDepthMask() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::DepthMask] = true;
+ }
+
+ void NotifyDepthTest() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::DepthTest] = true;
+ }
+
+ void NotifyStencilTest() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::StencilTest] = true;
+ }
+
+ void NotifyPolygonOffset() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::PolygonOffset] = true;
+ }
+
+ void NotifyRasterizeEnable() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::RasterizeEnable] = true;
+ }
+
+ void NotifyFramebufferSRGB() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::FramebufferSRGB] = true;
+ }
+
+ void NotifyLogicOp() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::LogicOp] = true;
+ }
+
+ void NotifyClipControl() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::ClipControl] = true;
+ }
+
+ void NotifyAlphaTest() {
+ auto& flags = system.GPU().Maxwell3D().dirty.flags;
+ flags[OpenGL::Dirty::AlphaTest] = true;
+ }
+
+private:
+ Core::System& system;
+
+ GLuint index_buffer = 0;
+};
+
+} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_stream_buffer.cpp b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
index 35ba334e4..6ec328c53 100644
--- a/src/video_core/renderer_opengl/gl_stream_buffer.cpp
+++ b/src/video_core/renderer_opengl/gl_stream_buffer.cpp
@@ -7,7 +7,6 @@
#include "common/alignment.h"
#include "common/assert.h"
#include "common/microprofile.h"
-#include "video_core/renderer_opengl/gl_state.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
MICROPROFILE_DEFINE(OpenGL_StreamBuffer, "OpenGL", "Stream Buffer Orphaning",
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index 5c1ae1418..f424e3000 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -10,7 +10,7 @@
#include "core/core.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/utils.h"
#include "video_core/texture_cache/surface_base.h"
@@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI
{GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F
{GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U
+ {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S
{GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI
{GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F
{GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI
@@ -87,6 +88,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format
{GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT, false}, // RG32UI
{GL_RGB16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBX16F
{GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT, false}, // R32UI
+ {GL_R32I, GL_RED_INTEGER, GL_INT, false}, // R32I
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X8
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_8X5
{GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE, false}, // ASTC_2D_5X4
@@ -396,6 +398,7 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
const bool is_proxy)
: VideoCommon::ViewBase(params), surface{surface}, is_proxy{is_proxy} {
target = GetTextureTarget(params.target);
+ format = GetFormatTuple(surface.GetSurfaceParams().pixel_format).internal_format;
if (!is_proxy) {
texture_view = CreateTextureView();
}
@@ -405,24 +408,36 @@ CachedSurfaceView::CachedSurfaceView(CachedSurface& surface, const ViewParams& p
CachedSurfaceView::~CachedSurfaceView() = default;
void CachedSurfaceView::Attach(GLenum attachment, GLenum target) const {
- ASSERT(params.num_layers == 1 && params.num_levels == 1);
+ ASSERT(params.num_levels == 1);
- const auto& owner_params = surface.GetSurfaceParams();
+ const GLuint texture = surface.GetTexture();
+ if (params.num_layers > 1) {
+ // Layered framebuffer attachments
+ UNIMPLEMENTED_IF(params.base_layer != 0);
- switch (owner_params.target) {
+ switch (params.target) {
+ case SurfaceTarget::Texture2DArray:
+ glFramebufferTexture(target, attachment, texture, params.base_level);
+ break;
+ default:
+ UNIMPLEMENTED();
+ }
+ return;
+ }
+
+ const GLenum view_target = surface.GetTarget();
+ switch (surface.GetSurfaceParams().target) {
case SurfaceTarget::Texture1D:
- glFramebufferTexture1D(target, attachment, surface.GetTarget(), surface.GetTexture(),
- params.base_level);
+ glFramebufferTexture1D(target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture2D:
- glFramebufferTexture2D(target, attachment, surface.GetTarget(), surface.GetTexture(),
- params.base_level);
+ glFramebufferTexture2D(target, attachment, view_target, texture, params.base_level);
break;
case SurfaceTarget::Texture1DArray:
case SurfaceTarget::Texture2DArray:
case SurfaceTarget::TextureCubemap:
case SurfaceTarget::TextureCubeArray:
- glFramebufferTextureLayer(target, attachment, surface.GetTexture(), params.base_level,
+ glFramebufferTextureLayer(target, attachment, texture, params.base_level,
params.base_layer);
break;
default:
@@ -454,25 +469,20 @@ void CachedSurfaceView::ApplySwizzle(SwizzleSource x_source, SwizzleSource y_sou
}
OGLTextureView CachedSurfaceView::CreateTextureView() const {
- const auto& owner_params = surface.GetSurfaceParams();
OGLTextureView texture_view;
texture_view.Create();
- const GLuint handle{texture_view.handle};
- const FormatTuple& tuple{GetFormatTuple(owner_params.pixel_format)};
-
- glTextureView(handle, target, surface.texture.handle, tuple.internal_format, params.base_level,
+ glTextureView(texture_view.handle, target, surface.texture.handle, format, params.base_level,
params.num_levels, params.base_layer, params.num_layers);
-
- ApplyTextureDefaults(owner_params, handle);
+ ApplyTextureDefaults(surface.GetSurfaceParams(), texture_view.handle);
return texture_view;
}
TextureCacheOpenGL::TextureCacheOpenGL(Core::System& system,
VideoCore::RasterizerInterface& rasterizer,
- const Device& device)
- : TextureCacheBase{system, rasterizer} {
+ const Device& device, StateTracker& state_tracker)
+ : TextureCacheBase{system, rasterizer}, state_tracker{state_tracker} {
src_framebuffer.Create();
dst_framebuffer.Create();
}
@@ -506,25 +516,26 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
const Tegra::Engines::Fermi2D::Config& copy_config) {
const auto& src_params{src_view->GetSurfaceParams()};
const auto& dst_params{dst_view->GetSurfaceParams()};
+ UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
+ UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
- OpenGLState prev_state{OpenGLState::GetCurState()};
- SCOPE_EXIT({
- prev_state.AllDirty();
- prev_state.Apply();
- });
-
- OpenGLState state;
- state.draw.read_framebuffer = src_framebuffer.handle;
- state.draw.draw_framebuffer = dst_framebuffer.handle;
- state.framebuffer_srgb.enabled = dst_params.srgb_conversion;
- state.AllDirty();
- state.Apply();
+ state_tracker.NotifyScissor0();
+ state_tracker.NotifyFramebuffer();
+ state_tracker.NotifyRasterizeEnable();
+ state_tracker.NotifyFramebufferSRGB();
- u32 buffers{};
+ if (dst_params.srgb_conversion) {
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ } else {
+ glDisable(GL_FRAMEBUFFER_SRGB);
+ }
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisablei(GL_SCISSOR_TEST, 0);
- UNIMPLEMENTED_IF(src_params.target == SurfaceTarget::Texture3D);
- UNIMPLEMENTED_IF(dst_params.target == SurfaceTarget::Texture3D);
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, src_framebuffer.handle);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_framebuffer.handle);
+ GLenum buffers = 0;
if (src_params.type == SurfaceType::ColorTexture) {
src_view->Attach(GL_COLOR_ATTACHMENT0, GL_READ_FRAMEBUFFER);
glFramebufferTexture2D(GL_READ_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h
index 8e13ab38b..6658c6ffd 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.h
+++ b/src/video_core/renderer_opengl/gl_texture_cache.h
@@ -27,6 +27,7 @@ using VideoCommon::ViewParams;
class CachedSurfaceView;
class CachedSurface;
class TextureCacheOpenGL;
+class StateTracker;
using Surface = std::shared_ptr<CachedSurface>;
using View = std::shared_ptr<CachedSurfaceView>;
@@ -96,6 +97,10 @@ public:
return texture_view.handle;
}
+ GLenum GetFormat() const {
+ return format;
+ }
+
const SurfaceParams& GetSurfaceParams() const {
return surface.GetSurfaceParams();
}
@@ -113,6 +118,7 @@ private:
CachedSurface& surface;
GLenum target{};
+ GLenum format{};
OGLTextureView texture_view;
u32 swizzle{};
@@ -122,7 +128,7 @@ private:
class TextureCacheOpenGL final : public TextureCacheBase {
public:
explicit TextureCacheOpenGL(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
- const Device& device);
+ const Device& device, StateTracker& state_tracker);
~TextureCacheOpenGL();
protected:
@@ -139,6 +145,8 @@ protected:
private:
GLuint FetchPBO(std::size_t buffer_size);
+ StateTracker& state_tracker;
+
OGLFramebuffer src_framebuffer;
OGLFramebuffer dst_framebuffer;
std::unordered_map<u32, OGLBuffer> copy_pbo_cache;
diff --git a/src/video_core/renderer_opengl/maxwell_to_gl.h b/src/video_core/renderer_opengl/maxwell_to_gl.h
index 7ed505628..89f0e04ef 100644
--- a/src/video_core/renderer_opengl/maxwell_to_gl.h
+++ b/src/video_core/renderer_opengl/maxwell_to_gl.h
@@ -92,8 +92,32 @@ inline GLenum VertexType(Maxwell::VertexAttribute attrib) {
}
case Maxwell::VertexAttribute::Type::UnsignedScaled:
switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
case Maxwell::VertexAttribute::Size::Size_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
return GL_UNSIGNED_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return GL_UNSIGNED_SHORT;
+ default:
+ LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
+ return {};
+ }
+ case Maxwell::VertexAttribute::Type::SignedScaled:
+ switch (attrib.size) {
+ case Maxwell::VertexAttribute::Size::Size_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8:
+ case Maxwell::VertexAttribute::Size::Size_8_8_8_8:
+ return GL_BYTE;
+ case Maxwell::VertexAttribute::Size::Size_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16:
+ case Maxwell::VertexAttribute::Size::Size_16_16_16_16:
+ return GL_SHORT;
default:
LOG_ERROR(Render_OpenGL, "Unimplemented vertex size={}", attrib.SizeString());
return {};
@@ -401,24 +425,24 @@ inline GLenum StencilOp(Maxwell::StencilOp stencil) {
return GL_KEEP;
}
-inline GLenum FrontFace(Maxwell::Cull::FrontFace front_face) {
+inline GLenum FrontFace(Maxwell::FrontFace front_face) {
switch (front_face) {
- case Maxwell::Cull::FrontFace::ClockWise:
+ case Maxwell::FrontFace::ClockWise:
return GL_CW;
- case Maxwell::Cull::FrontFace::CounterClockWise:
+ case Maxwell::FrontFace::CounterClockWise:
return GL_CCW;
}
LOG_ERROR(Render_OpenGL, "Unimplemented front face cull={}", static_cast<u32>(front_face));
return GL_CCW;
}
-inline GLenum CullFace(Maxwell::Cull::CullFace cull_face) {
+inline GLenum CullFace(Maxwell::CullFace cull_face) {
switch (cull_face) {
- case Maxwell::Cull::CullFace::Front:
+ case Maxwell::CullFace::Front:
return GL_FRONT;
- case Maxwell::Cull::CullFace::Back:
+ case Maxwell::CullFace::Back:
return GL_BACK;
- case Maxwell::Cull::CullFace::FrontAndBack:
+ case Maxwell::CullFace::FrontAndBack:
return GL_FRONT_AND_BACK;
}
LOG_ERROR(Render_OpenGL, "Unimplemented cull face={}", static_cast<u32>(cull_face));
@@ -464,5 +488,18 @@ inline GLenum LogicOp(Maxwell::LogicOperation operation) {
return GL_COPY;
}
+inline GLenum PolygonMode(Maxwell::PolygonMode polygon_mode) {
+ switch (polygon_mode) {
+ case Maxwell::PolygonMode::Point:
+ return GL_POINT;
+ case Maxwell::PolygonMode::Line:
+ return GL_LINE;
+ case Maxwell::PolygonMode::Fill:
+ return GL_FILL;
+ }
+ UNREACHABLE_MSG("Invalid polygon mode={}", static_cast<int>(polygon_mode));
+ return GL_FILL;
+}
+
} // namespace MaxwellToGL
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index bba16afaf..fca5e3ec0 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -5,30 +5,54 @@
#include <algorithm>
#include <cstddef>
#include <cstdlib>
+#include <cstring>
#include <memory>
+
#include <glad/glad.h>
+
#include "common/assert.h"
#include "common/logging/log.h"
+#include "common/microprofile.h"
#include "common/telemetry.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/frontend/emu_window.h"
-#include "core/frontend/scope_acquire_window_context.h"
#include "core/memory.h"
#include "core/perf_stats.h"
#include "core/settings.h"
#include "core/telemetry_session.h"
#include "video_core/morton.h"
#include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
namespace OpenGL {
namespace {
-constexpr char vertex_shader[] = R"(
+// If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have
+// to wait on available presentation frames.
+constexpr std::size_t SWAP_CHAIN_SIZE = 3;
+
+struct Frame {
+ u32 width{}; /// Width of the frame (to detect resize)
+ u32 height{}; /// Height of the frame
+ bool color_reloaded{}; /// Texture attachment was recreated (ie: resized)
+ OpenGL::OGLRenderbuffer color{}; /// Buffer shared between the render/present FBO
+ OpenGL::OGLFramebuffer render{}; /// FBO created on the render thread
+ OpenGL::OGLFramebuffer present{}; /// FBO created on the present thread
+ GLsync render_fence{}; /// Fence created on the render thread
+ GLsync present_fence{}; /// Fence created on the presentation thread
+ bool is_srgb{}; /// Framebuffer is sRGB or RGB
+};
+
+constexpr char VERTEX_SHADER[] = R"(
#version 430 core
+out gl_PerVertex {
+ vec4 gl_Position;
+};
+
layout (location = 0) in vec2 vert_position;
layout (location = 1) in vec2 vert_tex_coord;
layout (location = 0) out vec2 frag_tex_coord;
@@ -49,7 +73,7 @@ void main() {
}
)";
-constexpr char fragment_shader[] = R"(
+constexpr char FRAGMENT_SHADER[] = R"(
#version 430 core
layout (location = 0) in vec2 frag_tex_coord;
@@ -58,7 +82,7 @@ layout (location = 0) out vec4 color;
layout (binding = 0) uniform sampler2D color_texture;
void main() {
- color = texture(color_texture, frag_tex_coord);
+ color = vec4(texture(color_texture, frag_tex_coord).rgb, 1.0f);
}
)";
@@ -67,13 +91,31 @@ constexpr GLint TexCoordLocation = 1;
constexpr GLint ModelViewMatrixLocation = 0;
struct ScreenRectVertex {
- constexpr ScreenRectVertex(GLfloat x, GLfloat y, GLfloat u, GLfloat v)
- : position{{x, y}}, tex_coord{{u, v}} {}
+ constexpr ScreenRectVertex(u32 x, u32 y, GLfloat u, GLfloat v)
+ : position{{static_cast<GLfloat>(x), static_cast<GLfloat>(y)}}, tex_coord{{u, v}} {}
std::array<GLfloat, 2> position;
std::array<GLfloat, 2> tex_coord;
};
+/// Returns true if any debug tool is attached
+bool HasDebugTool() {
+ const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
+ if (nsight) {
+ return true;
+ }
+
+ GLint num_extensions;
+ glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions);
+ for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) {
+ const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index));
+ if (!std::strcmp(name, "GL_EXT_debug_tool")) {
+ return true;
+ }
+ }
+ return false;
+}
+
/**
* Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left
* corner and (width, height) on the lower-bottom.
@@ -157,22 +199,229 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit
} // Anonymous namespace
+/**
+ * For smooth Vsync rendering, we want to always present the latest frame that the core generates,
+ * but also make sure that rendering happens at the pace that the frontend dictates. This is a
+ * helper class that the renderer uses to sync frames between the render thread and the presentation
+ * thread
+ */
+class FrameMailbox {
+public:
+ std::mutex swap_chain_lock;
+ std::condition_variable present_cv;
+ std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{};
+ std::queue<Frame*> free_queue;
+ std::deque<Frame*> present_queue;
+ Frame* previous_frame{};
+
+ FrameMailbox() : has_debug_tool{HasDebugTool()} {
+ for (auto& frame : swap_chain) {
+ free_queue.push(&frame);
+ }
+ }
+
+ ~FrameMailbox() {
+ // lock the mutex and clear out the present and free_queues and notify any people who are
+ // blocked to prevent deadlock on shutdown
+ std::scoped_lock lock{swap_chain_lock};
+ std::queue<Frame*>().swap(free_queue);
+ present_queue.clear();
+ present_cv.notify_all();
+ }
+
+ void ReloadPresentFrame(Frame* frame, u32 height, u32 width) {
+ frame->present.Release();
+ frame->present.Create();
+ GLint previous_draw_fbo{};
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo);
+ glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ frame->color.handle);
+ if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!");
+ }
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo);
+ frame->color_reloaded = false;
+ }
+
+ void ReloadRenderFrame(Frame* frame, u32 width, u32 height) {
+ // Recreate the color texture attachment
+ frame->color.Release();
+ frame->color.Create();
+ const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8;
+ glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height);
+
+ // Recreate the FBO for the render target
+ frame->render.Release();
+ frame->render.Create();
+ glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle);
+ glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER,
+ frame->color.handle);
+ if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) {
+ LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!");
+ }
+
+ frame->width = width;
+ frame->height = height;
+ frame->color_reloaded = true;
+ }
+
+ Frame* GetRenderFrame() {
+ std::unique_lock lock{swap_chain_lock};
+
+ // If theres no free frames, we will reuse the oldest render frame
+ if (free_queue.empty()) {
+ auto frame = present_queue.back();
+ present_queue.pop_back();
+ return frame;
+ }
+
+ Frame* frame = free_queue.front();
+ free_queue.pop();
+ return frame;
+ }
+
+ void ReleaseRenderFrame(Frame* frame) {
+ std::unique_lock lock{swap_chain_lock};
+ present_queue.push_front(frame);
+ present_cv.notify_one();
+
+ DebugNotifyNextFrame();
+ }
+
+ Frame* TryGetPresentFrame(int timeout_ms) {
+ DebugWaitForNextFrame();
+
+ std::unique_lock lock{swap_chain_lock};
+ // wait for new entries in the present_queue
+ present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms),
+ [&] { return !present_queue.empty(); });
+ if (present_queue.empty()) {
+ // timed out waiting for a frame to draw so return the previous frame
+ return previous_frame;
+ }
+
+ // free the previous frame and add it back to the free queue
+ if (previous_frame) {
+ free_queue.push(previous_frame);
+ }
+
+ // the newest entries are pushed to the front of the queue
+ Frame* frame = present_queue.front();
+ present_queue.pop_front();
+ // remove all old entries from the present queue and move them back to the free_queue
+ for (auto f : present_queue) {
+ free_queue.push(f);
+ }
+ present_queue.clear();
+ previous_frame = frame;
+ return frame;
+ }
+
+private:
+ std::mutex debug_synch_mutex;
+ std::condition_variable debug_synch_condition;
+ std::atomic_int frame_for_debug{};
+ const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step
+
+ /// Signal that a new frame is available (called from GPU thread)
+ void DebugNotifyNextFrame() {
+ if (!has_debug_tool) {
+ return;
+ }
+ frame_for_debug++;
+ std::lock_guard lock{debug_synch_mutex};
+ debug_synch_condition.notify_one();
+ }
+
+ /// Wait for a new frame to be available (called from presentation thread)
+ void DebugWaitForNextFrame() {
+ if (!has_debug_tool) {
+ return;
+ }
+ const int last_frame = frame_for_debug;
+ std::unique_lock lock{debug_synch_mutex};
+ debug_synch_condition.wait(lock,
+ [this, last_frame] { return frame_for_debug > last_frame; });
+ }
+};
+
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
- : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
+ : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system},
+ frame_mailbox{std::make_unique<FrameMailbox>()} {}
RendererOpenGL::~RendererOpenGL() = default;
+MICROPROFILE_DEFINE(OpenGL_RenderFrame, "OpenGL", "Render Frame", MP_RGB(128, 128, 64));
+MICROPROFILE_DEFINE(OpenGL_WaitPresent, "OpenGL", "Wait For Present", MP_RGB(128, 128, 128));
+
void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
- // Maintain the rasterizer's state as a priority
- OpenGLState prev_state = OpenGLState::GetCurState();
- state.AllDirty();
- state.Apply();
+ render_window.PollEvents();
+
+ if (!framebuffer) {
+ return;
+ }
+
+ PrepareRendertarget(framebuffer);
+ RenderScreenshot();
+
+ Frame* frame;
+ {
+ MICROPROFILE_SCOPE(OpenGL_WaitPresent);
+
+ frame = frame_mailbox->GetRenderFrame();
+
+ // Clean up sync objects before drawing
+
+ // INTEL driver workaround. We can't delete the previous render sync object until we are
+ // sure that the presentation is done
+ if (frame->present_fence) {
+ glClientWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
+ }
+
+ // delete the draw fence if the frame wasn't presented
+ if (frame->render_fence) {
+ glDeleteSync(frame->render_fence);
+ frame->render_fence = 0;
+ }
+
+ // wait for the presentation to be done
+ if (frame->present_fence) {
+ glWaitSync(frame->present_fence, 0, GL_TIMEOUT_IGNORED);
+ glDeleteSync(frame->present_fence);
+ frame->present_fence = 0;
+ }
+ }
+ {
+ MICROPROFILE_SCOPE(OpenGL_RenderFrame);
+ const auto& layout = render_window.GetFramebufferLayout();
+
+ // Recreate the frame if the size of the window has changed
+ if (layout.width != frame->width || layout.height != frame->height ||
+ screen_info.display_srgb != frame->is_srgb) {
+ LOG_DEBUG(Render_OpenGL, "Reloading render frame");
+ frame->is_srgb = screen_info.display_srgb;
+ frame_mailbox->ReloadRenderFrame(frame, layout.width, layout.height);
+ }
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frame->render.handle);
+ DrawScreen(layout);
+ // Create a fence for the frontend to wait on and swap this frame to OffTex
+ frame->render_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ glFlush();
+ frame_mailbox->ReleaseRenderFrame(frame);
+ m_current_frame++;
+ rasterizer->TickFrame();
+ }
+}
+
+void RendererOpenGL::PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer) {
if (framebuffer) {
// If framebuffer is provided, reload it from memory to a texture
if (screen_info.texture.width != static_cast<GLsizei>(framebuffer->width) ||
screen_info.texture.height != static_cast<GLsizei>(framebuffer->height) ||
- screen_info.texture.pixel_format != framebuffer->pixel_format) {
+ screen_info.texture.pixel_format != framebuffer->pixel_format ||
+ gl_framebuffer_data.empty()) {
// Reallocate texture if the framebuffer size has changed.
// This is expected to not happen very often and hence should not be a
// performance problem.
@@ -181,22 +430,7 @@ void RendererOpenGL::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) {
// Load the framebuffer from memory, draw it to the screen, and swap buffers
LoadFBToScreenInfo(*framebuffer);
-
- if (renderer_settings.screenshot_requested)
- CaptureScreenshot();
-
- DrawScreen(render_window.GetFramebufferLayout());
-
- rasterizer->TickFrame();
-
- render_window.SwapBuffers();
}
-
- render_window.PollEvents();
-
- // Restore the rasterizer state
- prev_state.AllDirty();
- prev_state.Apply();
}
void RendererOpenGL::LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer) {
@@ -249,31 +483,24 @@ void RendererOpenGL::InitOpenGLObjects() {
glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
0.0f);
- // Link shaders and get variable locations
- shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
- state.draw.shader_program = shader.handle;
- state.AllDirty();
- state.Apply();
+ // Create shader programs
+ OGLShader vertex_shader;
+ vertex_shader.Create(VERTEX_SHADER, GL_VERTEX_SHADER);
+
+ OGLShader fragment_shader;
+ fragment_shader.Create(FRAGMENT_SHADER, GL_FRAGMENT_SHADER);
+
+ vertex_program.Create(true, false, vertex_shader.handle);
+ fragment_program.Create(true, false, fragment_shader.handle);
+
+ // Create program pipeline
+ program_manager.Create();
// Generate VBO handle for drawing
vertex_buffer.Create();
- // Generate VAO
- vertex_array.Create();
- state.draw.vertex_array = vertex_array.handle;
-
// Attach vertex data to VAO
glNamedBufferData(vertex_buffer.handle, sizeof(ScreenRectVertex) * 4, nullptr, GL_STREAM_DRAW);
- glVertexArrayAttribFormat(vertex_array.handle, PositionLocation, 2, GL_FLOAT, GL_FALSE,
- offsetof(ScreenRectVertex, position));
- glVertexArrayAttribFormat(vertex_array.handle, TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
- offsetof(ScreenRectVertex, tex_coord));
- glVertexArrayAttribBinding(vertex_array.handle, PositionLocation, 0);
- glVertexArrayAttribBinding(vertex_array.handle, TexCoordLocation, 0);
- glEnableVertexArrayAttrib(vertex_array.handle, PositionLocation);
- glEnableVertexArrayAttrib(vertex_array.handle, TexCoordLocation);
- glVertexArrayVertexBuffer(vertex_array.handle, 0, vertex_buffer.handle, 0,
- sizeof(ScreenRectVertex));
// Allocate textures for the screen
screen_info.texture.resource.Create(GL_TEXTURE_2D);
@@ -306,7 +533,8 @@ void RendererOpenGL::CreateRasterizer() {
if (rasterizer) {
return;
}
- rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
+ rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info,
+ program_manager, state_tracker);
}
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
@@ -345,8 +573,19 @@ void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
glTextureStorage2D(texture.resource.handle, 1, internal_format, texture.width, texture.height);
}
-void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w,
- float h) {
+void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
+ if (renderer_settings.set_background_color) {
+ // Update background color before drawing
+ glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
+ 0.0f);
+ }
+
+ // Set projection matrix
+ const std::array ortho_matrix =
+ MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
+ glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE,
+ std::data(ortho_matrix));
+
const auto& texcoords = screen_info.display_texcoords;
auto left = texcoords.left;
auto right = texcoords.right;
@@ -378,60 +617,129 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
static_cast<f32>(screen_info.texture.height);
}
+ const auto& screen = layout.screen;
const std::array vertices = {
- ScreenRectVertex(x, y, texcoords.top * scale_u, left * scale_v),
- ScreenRectVertex(x + w, y, texcoords.bottom * scale_u, left * scale_v),
- ScreenRectVertex(x, y + h, texcoords.top * scale_u, right * scale_v),
- ScreenRectVertex(x + w, y + h, texcoords.bottom * scale_u, right * scale_v),
+ ScreenRectVertex(screen.left, screen.top, texcoords.top * scale_u, left * scale_v),
+ ScreenRectVertex(screen.right, screen.top, texcoords.bottom * scale_u, left * scale_v),
+ ScreenRectVertex(screen.left, screen.bottom, texcoords.top * scale_u, right * scale_v),
+ ScreenRectVertex(screen.right, screen.bottom, texcoords.bottom * scale_u, right * scale_v),
};
-
- state.textures[0] = screen_info.display_texture;
- state.framebuffer_srgb.enabled = screen_info.display_srgb;
- state.AllDirty();
- state.Apply();
glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), std::data(vertices));
- glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
- // Restore default state
- state.framebuffer_srgb.enabled = false;
- state.textures[0] = 0;
- state.AllDirty();
- state.Apply();
-}
-void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) {
- if (renderer_settings.set_background_color) {
- // Update background color before drawing
- glClearColor(Settings::values.bg_red, Settings::values.bg_green, Settings::values.bg_blue,
- 0.0f);
+ // TODO: Signal state tracker about these changes
+ state_tracker.NotifyScreenDrawVertexArray();
+ state_tracker.NotifyPolygonModes();
+ state_tracker.NotifyViewport0();
+ state_tracker.NotifyScissor0();
+ state_tracker.NotifyColorMask0();
+ state_tracker.NotifyBlend0();
+ state_tracker.NotifyFramebuffer();
+ state_tracker.NotifyFrontFace();
+ state_tracker.NotifyCullTest();
+ state_tracker.NotifyDepthTest();
+ state_tracker.NotifyStencilTest();
+ state_tracker.NotifyPolygonOffset();
+ state_tracker.NotifyRasterizeEnable();
+ state_tracker.NotifyFramebufferSRGB();
+ state_tracker.NotifyLogicOp();
+ state_tracker.NotifyClipControl();
+ state_tracker.NotifyAlphaTest();
+
+ program_manager.UseVertexShader(vertex_program.handle);
+ program_manager.UseGeometryShader(0);
+ program_manager.UseFragmentShader(fragment_program.handle);
+ program_manager.BindGraphicsPipeline();
+
+ glEnable(GL_CULL_FACE);
+ if (screen_info.display_srgb) {
+ glEnable(GL_FRAMEBUFFER_SRGB);
+ } else {
+ glDisable(GL_FRAMEBUFFER_SRGB);
}
+ glDisable(GL_COLOR_LOGIC_OP);
+ glDisable(GL_DEPTH_TEST);
+ glDisable(GL_STENCIL_TEST);
+ glDisable(GL_POLYGON_OFFSET_FILL);
+ glDisable(GL_RASTERIZER_DISCARD);
+ glDisable(GL_ALPHA_TEST);
+ glDisablei(GL_BLEND, 0);
+ glDisablei(GL_SCISSOR_TEST, 0);
+ glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+ glCullFace(GL_BACK);
+ glFrontFace(GL_CW);
+ glColorMaski(0, GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE);
+ glClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE);
+ glViewportIndexedf(0, 0.0f, 0.0f, static_cast<GLfloat>(layout.width),
+ static_cast<GLfloat>(layout.height));
+ glDepthRangeIndexed(0, 0.0, 0.0);
+
+ glEnableVertexAttribArray(PositionLocation);
+ glEnableVertexAttribArray(TexCoordLocation);
+ glVertexAttribDivisor(PositionLocation, 0);
+ glVertexAttribDivisor(TexCoordLocation, 0);
+ glVertexAttribFormat(PositionLocation, 2, GL_FLOAT, GL_FALSE,
+ offsetof(ScreenRectVertex, position));
+ glVertexAttribFormat(TexCoordLocation, 2, GL_FLOAT, GL_FALSE,
+ offsetof(ScreenRectVertex, tex_coord));
+ glVertexAttribBinding(PositionLocation, 0);
+ glVertexAttribBinding(TexCoordLocation, 0);
+ glBindVertexBuffer(0, vertex_buffer.handle, 0, sizeof(ScreenRectVertex));
+
+ glBindTextureUnit(0, screen_info.display_texture);
+ glBindSampler(0, 0);
- const auto& screen = layout.screen;
-
- glViewport(0, 0, layout.width, layout.height);
glClear(GL_COLOR_BUFFER_BIT);
+ glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
+}
- // Set projection matrix
- const std::array ortho_matrix =
- MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height));
- glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data());
+void RendererOpenGL::TryPresent(int timeout_ms) {
+ const auto& layout = render_window.GetFramebufferLayout();
+ auto frame = frame_mailbox->TryGetPresentFrame(timeout_ms);
+ if (!frame) {
+ LOG_DEBUG(Render_OpenGL, "TryGetPresentFrame returned no frame to present");
+ return;
+ }
- DrawScreenTriangles(screen_info, static_cast<float>(screen.left),
- static_cast<float>(screen.top), static_cast<float>(screen.GetWidth()),
- static_cast<float>(screen.GetHeight()));
+ // Clearing before a full overwrite of a fbo can signal to drivers that they can avoid a
+ // readback since we won't be doing any blending
+ glClear(GL_COLOR_BUFFER_BIT);
- m_current_frame++;
+ // Recreate the presentation FBO if the color attachment was changed
+ if (frame->color_reloaded) {
+ LOG_DEBUG(Render_OpenGL, "Reloading present frame");
+ frame_mailbox->ReloadPresentFrame(frame, layout.width, layout.height);
+ }
+ glWaitSync(frame->render_fence, 0, GL_TIMEOUT_IGNORED);
+ // INTEL workaround.
+ // Normally we could just delete the draw fence here, but due to driver bugs, we can just delete
+ // it on the emulation thread without too much penalty
+ // glDeleteSync(frame.render_sync);
+ // frame.render_sync = 0;
+
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, frame->present.handle);
+ glBlitFramebuffer(0, 0, frame->width, frame->height, 0, 0, layout.width, layout.height,
+ GL_COLOR_BUFFER_BIT, GL_LINEAR);
+
+ // Insert fence for the main thread to block on
+ frame->present_fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+ glFlush();
+
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
}
-void RendererOpenGL::UpdateFramerate() {}
+void RendererOpenGL::RenderScreenshot() {
+ if (!renderer_settings.screenshot_requested) {
+ return;
+ }
+
+ GLint old_read_fb;
+ GLint old_draw_fb;
+ glGetIntegerv(GL_READ_FRAMEBUFFER_BINDING, &old_read_fb);
+ glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &old_draw_fb);
-void RendererOpenGL::CaptureScreenshot() {
// Draw the current frame to the screenshot framebuffer
screenshot_framebuffer.Create();
- GLuint old_read_fb = state.draw.read_framebuffer;
- GLuint old_draw_fb = state.draw.draw_framebuffer;
- state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
- state.AllDirty();
- state.Apply();
+ glBindFramebuffer(GL_FRAMEBUFFER, screenshot_framebuffer.handle);
Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -448,19 +756,16 @@ void RendererOpenGL::CaptureScreenshot() {
renderer_settings.screenshot_bits);
screenshot_framebuffer.Release();
- state.draw.read_framebuffer = old_read_fb;
- state.draw.draw_framebuffer = old_draw_fb;
- state.AllDirty();
- state.Apply();
glDeleteRenderbuffers(1, &renderbuffer);
+ glBindFramebuffer(GL_READ_FRAMEBUFFER, old_read_fb);
+ glBindFramebuffer(GL_DRAW_FRAMEBUFFER, old_draw_fb);
+
renderer_settings.screenshot_complete_callback();
renderer_settings.screenshot_requested = false;
}
bool RendererOpenGL::Init() {
- Core::Frontend::ScopeAcquireWindowContext acquire_context{render_window};
-
if (GLAD_GL_KHR_debug) {
glEnable(GL_DEBUG_OUTPUT);
glDebugMessageCallback(DebugHandler, nullptr);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.h b/src/video_core/renderer_opengl/renderer_opengl.h
index b56328a7f..33073ce5b 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.h
+++ b/src/video_core/renderer_opengl/renderer_opengl.h
@@ -10,7 +10,8 @@
#include "common/math_util.h"
#include "video_core/renderer_base.h"
#include "video_core/renderer_opengl/gl_resource_manager.h"
-#include "video_core/renderer_opengl/gl_state.h"
+#include "video_core/renderer_opengl/gl_shader_manager.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
namespace Core {
class System;
@@ -44,19 +45,23 @@ struct ScreenInfo {
TextureInfo texture;
};
+struct PresentationTexture {
+ u32 width = 0;
+ u32 height = 0;
+ OGLTexture texture;
+};
+
+class FrameMailbox;
+
class RendererOpenGL final : public VideoCore::RendererBase {
public:
explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
~RendererOpenGL() override;
- /// Swap buffers (render frame)
- void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
-
- /// Initialize the renderer
bool Init() override;
-
- /// Shutdown the renderer
void ShutDown() override;
+ void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override;
+ void TryPresent(int timeout_ms) override;
private:
/// Initializes the OpenGL state and creates persistent objects.
@@ -72,12 +77,7 @@ private:
/// Draws the emulated screens to the emulator window.
void DrawScreen(const Layout::FramebufferLayout& layout);
- void DrawScreenTriangles(const ScreenInfo& screen_info, float x, float y, float w, float h);
-
- /// Updates the framerate.
- void UpdateFramerate();
-
- void CaptureScreenshot();
+ void RenderScreenshot();
/// Loads framebuffer from emulated memory into the active OpenGL texture.
void LoadFBToScreenInfo(const Tegra::FramebufferConfig& framebuffer);
@@ -87,26 +87,34 @@ private:
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
const TextureInfo& texture);
+ void PrepareRendertarget(const Tegra::FramebufferConfig* framebuffer);
+
Core::Frontend::EmuWindow& emu_window;
Core::System& system;
- OpenGLState state;
+ StateTracker state_tracker{system};
// OpenGL object IDs
- OGLVertexArray vertex_array;
OGLBuffer vertex_buffer;
- OGLProgram shader;
+ OGLProgram vertex_program;
+ OGLProgram fragment_program;
OGLFramebuffer screenshot_framebuffer;
/// Display information for Switch screen
ScreenInfo screen_info;
+ /// Global dummy shader pipeline
+ GLShader::ProgramManager program_manager;
+
/// OpenGL framebuffer data
std::vector<u8> gl_framebuffer_data;
/// Used for transforming the framebuffer orientation
Tegra::FramebufferConfig::TransformFlags framebuffer_transform_flags;
Common::Rectangle<int> framebuffer_crop_rect;
+
+ /// Frame presentation mailbox
+ std::unique_ptr<FrameMailbox> frame_mailbox;
};
} // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp
index ac99e6385..b751086fa 100644
--- a/src/video_core/renderer_opengl/utils.cpp
+++ b/src/video_core/renderer_opengl/utils.cpp
@@ -9,6 +9,7 @@
#include <glad/glad.h>
#include "common/common_types.h"
+#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/renderer_opengl/utils.h"
namespace OpenGL {
@@ -20,12 +21,12 @@ struct VertexArrayPushBuffer::Entry {
GLsizei stride{};
};
-VertexArrayPushBuffer::VertexArrayPushBuffer() = default;
+VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker)
+ : state_tracker{state_tracker} {}
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
-void VertexArrayPushBuffer::Setup(GLuint vao_) {
- vao = vao_;
+void VertexArrayPushBuffer::Setup() {
index_buffer = nullptr;
vertex_buffers.clear();
}
@@ -41,13 +42,11 @@ void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint*
void VertexArrayPushBuffer::Bind() {
if (index_buffer) {
- glVertexArrayElementBuffer(vao, *index_buffer);
+ state_tracker.BindIndexBuffer(*index_buffer);
}
- // TODO(Rodrigo): Find a way to ARB_multi_bind this
for (const auto& entry : vertex_buffers) {
- glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset,
- entry.stride);
+ glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride);
}
}
diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h
index 3ad7c02d4..47ee3177b 100644
--- a/src/video_core/renderer_opengl/utils.h
+++ b/src/video_core/renderer_opengl/utils.h
@@ -11,12 +11,14 @@
namespace OpenGL {
+class StateTracker;
+
class VertexArrayPushBuffer final {
public:
- explicit VertexArrayPushBuffer();
+ explicit VertexArrayPushBuffer(StateTracker& state_tracker);
~VertexArrayPushBuffer();
- void Setup(GLuint vao_);
+ void Setup();
void SetIndexBuffer(const GLuint* buffer);
@@ -28,7 +30,8 @@ public:
private:
struct Entry;
- GLuint vao{};
+ StateTracker& state_tracker;
+
const GLuint* index_buffer{};
std::vector<Entry> vertex_buffers;
};