diff options
Diffstat (limited to 'src/video_core/renderer_opengl')
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.cpp | 70 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_rasterizer.h | 14 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 153 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/gl_texture_cache.cpp | 1 | ||||
| -rw-r--r-- | src/video_core/renderer_opengl/renderer_opengl.cpp | 288 |
5 files changed, 363 insertions, 163 deletions
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8a2db8e36..1af4268a4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -496,7 +496,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { SyncCullMode(); SyncPrimitiveRestart(); SyncScissorTest(); - SyncTransformFeedback(); SyncPointState(); SyncPolygonOffset(); SyncAlphaTest(); @@ -569,7 +568,7 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { glTextureBarrier(); } - ++num_queued_commands; + BeginTransformFeedback(primitive_mode); const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance); const GLsizei num_instances = @@ -608,6 +607,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { num_instances, base_instance); } } + + EndTransformFeedback(); + + ++num_queued_commands; } void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { @@ -1290,11 +1293,6 @@ void RasterizerOpenGL::SyncScissorTest() { } } -void RasterizerOpenGL::SyncTransformFeedback() { - const auto& regs = system.GPU().Maxwell3D().regs; - UNIMPLEMENTED_IF_MSG(regs.tfb_enabled != 0, "Transform feedbacks are not implemented"); -} - void RasterizerOpenGL::SyncPointState() { auto& gpu = system.GPU().Maxwell3D(); auto& flags = gpu.dirty.flags; @@ -1370,4 +1368,62 @@ void RasterizerOpenGL::SyncFramebufferSRGB() { oglEnable(GL_FRAMEBUFFER_SRGB, gpu.regs.framebuffer_srgb); } +void RasterizerOpenGL::BeginTransformFeedback(GLenum primitive_mode) { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { + return; + } + + UNIMPLEMENTED_IF(regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationControl) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::TesselationEval) || + regs.IsShaderConfigEnabled(Maxwell::ShaderProgram::Geometry)); + + for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { + const auto& binding = regs.tfb_bindings[index]; + if (!binding.buffer_enable) { + if (enabled_transform_feedback_buffers[index]) { + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), 0, 0, + 0); + } + enabled_transform_feedback_buffers[index] = false; + continue; + } + enabled_transform_feedback_buffers[index] = true; + + auto& tfb_buffer = transform_feedback_buffers[index]; + tfb_buffer.Create(); + + const GLuint handle = tfb_buffer.handle; + const std::size_t size = binding.buffer_size; + glNamedBufferData(handle, static_cast<GLsizeiptr>(size), nullptr, GL_STREAM_COPY); + glBindBufferRange(GL_TRANSFORM_FEEDBACK_BUFFER, static_cast<GLuint>(index), handle, 0, + static_cast<GLsizeiptr>(size)); + } + + glBeginTransformFeedback(GL_POINTS); +} + +void RasterizerOpenGL::EndTransformFeedback() { + const auto& regs = system.GPU().Maxwell3D().regs; + if (regs.tfb_enabled == 0) { + return; + } + + glEndTransformFeedback(); + + for (std::size_t index = 0; index < Maxwell::NumTransformFeedbackBuffers; ++index) { + const auto& binding = regs.tfb_bindings[index]; + if (!binding.buffer_enable) { + continue; + } + UNIMPLEMENTED_IF(binding.buffer_offset != 0); + + const GLuint handle = transform_feedback_buffers[index].handle; + const GPUVAddr gpu_addr = binding.Address(); + const std::size_t size = binding.buffer_size; + const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); + glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); + } +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index e6424f5d2..2d3be2437 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -168,9 +168,6 @@ private: /// Syncs the scissor test state to match the guest state void SyncScissorTest(); - /// Syncs the transform feedback state to match the guest state - void SyncTransformFeedback(); - /// Syncs the point state to match the guest state void SyncPointState(); @@ -192,6 +189,12 @@ private: /// Syncs the framebuffer sRGB state to match the guest state void SyncFramebufferSRGB(); + /// Begin a transform feedback + void BeginTransformFeedback(GLenum primitive_mode); + + /// End a transform feedback + void EndTransformFeedback(); + /// Check for extension that are not strictly required but are needed for correct emulation void CheckExtensions(); @@ -229,6 +232,11 @@ private: BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; + std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> + transform_feedback_buffers; + std::bitset<Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> + enabled_transform_feedback_buffers; + /// Number of commands queued to the OpenGL driver. Reseted on flush. std::size_t num_queued_commands = 0; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 19d6f3dcb..849839fe3 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -23,6 +23,7 @@ #include "video_core/shader/ast.h" #include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" +#include "video_core/shader/transform_feedback.h" namespace OpenGL { @@ -36,6 +37,7 @@ using Tegra::Shader::IpaInterpMode; using Tegra::Shader::IpaMode; using Tegra::Shader::IpaSampleMode; using Tegra::Shader::Register; +using VideoCommon::Shader::BuildTransformFeedback; using VideoCommon::Shader::Registry; using namespace std::string_literals; @@ -49,6 +51,11 @@ class ExprDecompiler; enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; +constexpr std::array FLOAT_TYPES{"float", "vec2", "vec3", "vec4"}; + +constexpr std::string_view INPUT_ATTRIBUTE_NAME = "in_attr"; +constexpr std::string_view OUTPUT_ATTRIBUTE_NAME = "out_attr"; + struct TextureOffset {}; struct TextureDerivates {}; using TextureArgument = std::pair<Type, Node>; @@ -390,12 +397,22 @@ std::string FlowStackTopName(MetaStackClass stack) { return stage == ShaderType::Vertex; } +struct GenericVaryingDescription { + std::string name; + u8 first_element = 0; + bool is_scalar = false; +}; + class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, ShaderType stage, std::string_view identifier, std::string_view suffix) : device{device}, ir{ir}, registry{registry}, stage{stage}, - identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} {} + identifier{identifier}, suffix{suffix}, header{ir.GetHeader()} { + if (stage != ShaderType::Compute) { + transform_feedback = BuildTransformFeedback(registry.GetGraphicsInfo()); + } + } void Decompile() { DeclareHeader(); @@ -403,17 +420,17 @@ public: DeclareGeometry(); DeclareFragment(); DeclareCompute(); - DeclareRegisters(); - DeclareCustomVariables(); - DeclarePredicates(); - DeclareLocalMemory(); - DeclareInternalFlags(); DeclareInputAttributes(); DeclareOutputAttributes(); - DeclareConstantBuffers(); - DeclareGlobalMemory(); - DeclareSamplers(); DeclareImages(); + DeclareSamplers(); + DeclareGlobalMemory(); + DeclareConstantBuffers(); + DeclareLocalMemory(); + DeclareRegisters(); + DeclarePredicates(); + DeclareInternalFlags(); + DeclareCustomVariables(); DeclarePhysicalAttributeReader(); code.AddLine("void main() {{"); @@ -485,7 +502,7 @@ private: if (!identifier.empty()) { code.AddLine("// {}", identifier); } - code.AddLine("#version 430 core"); + code.AddLine("#version 440 core"); code.AddLine("#extension GL_ARB_separate_shader_objects : enable"); if (device.HasShaderBallot()) { code.AddLine("#extension GL_ARB_shader_ballot : require"); @@ -570,7 +587,13 @@ private: code.AddLine("out gl_PerVertex {{"); ++code.scope; - code.AddLine("vec4 gl_Position;"); + auto pos_xfb = GetTransformFeedbackDecoration(Attribute::Index::Position); + if (!pos_xfb.empty()) { + pos_xfb = fmt::format("layout ({}) ", pos_xfb); + } + const char* pos_type = + FLOAT_TYPES.at(GetNumComponents(Attribute::Index::Position).value_or(4) - 1); + code.AddLine("{}{} gl_Position;", pos_xfb, pos_type); for (const auto attribute : ir.GetOutputAttributes()) { if (attribute == Attribute::Index::ClipDistances0123 || @@ -703,7 +726,7 @@ private: void DeclareInputAttribute(Attribute::Index index, bool skip_unused) { const u32 location{GetGenericAttributeIndex(index)}; - std::string name{GetInputAttribute(index)}; + std::string name{GetGenericInputAttribute(index)}; if (stage == ShaderType::Geometry) { name = "gs_" + name + "[]"; } @@ -740,9 +763,59 @@ private: } } + std::optional<std::size_t> GetNumComponents(Attribute::Index index, u8 element = 0) const { + const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); + const auto it = transform_feedback.find(location); + if (it == transform_feedback.end()) { + return {}; + } + return it->second.components; + } + + std::string GetTransformFeedbackDecoration(Attribute::Index index, u8 element = 0) const { + const u8 location = static_cast<u8>(static_cast<u32>(index) * 4 + element); + const auto it = transform_feedback.find(location); + if (it == transform_feedback.end()) { + return {}; + } + + const VaryingTFB& tfb = it->second; + return fmt::format("xfb_buffer = {}, xfb_offset = {}, xfb_stride = {}", tfb.buffer, + tfb.offset, tfb.stride); + } + void DeclareOutputAttribute(Attribute::Index index) { - const u32 location{GetGenericAttributeIndex(index)}; - code.AddLine("layout (location = {}) out vec4 {};", location, GetOutputAttribute(index)); + static constexpr std::string_view swizzle = "xyzw"; + u8 element = 0; + while (element < 4) { + auto xfb = GetTransformFeedbackDecoration(index, element); + if (!xfb.empty()) { + xfb = fmt::format(", {}", xfb); + } + const std::size_t remainder = 4 - element; + const std::size_t num_components = GetNumComponents(index, element).value_or(remainder); + const char* const type = FLOAT_TYPES.at(num_components - 1); + + const u32 location = GetGenericAttributeIndex(index); + + GenericVaryingDescription description; + description.first_element = static_cast<u8>(element); + description.is_scalar = num_components == 1; + description.name = AppendSuffix(location, OUTPUT_ATTRIBUTE_NAME); + if (element != 0 || num_components != 4) { + const std::string_view name_swizzle = swizzle.substr(element, num_components); + description.name = fmt::format("{}_{}", description.name, name_swizzle); + } + for (std::size_t i = 0; i < num_components; ++i) { + const u8 offset = static_cast<u8>(location * 4 + element + i); + varying_description.insert({offset, description}); + } + + code.AddLine("layout (location = {}, component = {}{}) out {} {};", location, element, + xfb, type, description.name); + + element = static_cast<u8>(static_cast<std::size_t>(element) + num_components); + } } void DeclareConstantBuffers() { @@ -1095,7 +1168,7 @@ private: return {"0", Type::Int}; default: if (IsGenericAttribute(attribute)) { - return {GeometryPass(GetInputAttribute(attribute)) + GetSwizzle(element), + return {GeometryPass(GetGenericInputAttribute(attribute)) + GetSwizzle(element), Type::Float}; } break; @@ -1164,8 +1237,7 @@ private: return {{fmt::format("gl_ClipDistance[{}]", abuf->GetElement() + 4), Type::Float}}; default: if (IsGenericAttribute(attribute)) { - return { - {GetOutputAttribute(attribute) + GetSwizzle(abuf->GetElement()), Type::Float}}; + return {{GetGenericOutputAttribute(attribute, abuf->GetElement()), Type::Float}}; } UNIMPLEMENTED_MSG("Unhandled output attribute: {}", static_cast<u32>(attribute)); return {}; @@ -1937,16 +2009,19 @@ private: expr += GetSampler(meta->sampler); expr += ", "; - expr += constructors.at(operation.GetOperandsCount() - 1); + expr += constructors.at(operation.GetOperandsCount() + (meta->array ? 1 : 0) - 1); expr += '('; for (std::size_t i = 0; i < count; ++i) { - expr += VisitOperand(operation, i).AsInt(); - const std::size_t next = i + 1; - if (next == count) - expr += ')'; - else if (next < count) + if (i > 0) { expr += ", "; + } + expr += VisitOperand(operation, i).AsInt(); + } + if (meta->array) { + expr += ", "; + expr += Visit(meta->array).AsInt(); } + expr += ')'; if (meta->lod && !meta->sampler.IsBuffer()) { expr += ", "; @@ -2376,27 +2451,34 @@ private: static_assert(operation_decompilers.size() == static_cast<std::size_t>(OperationCode::Amount)); std::string GetRegister(u32 index) const { - return GetDeclarationWithSuffix(index, "gpr"); + return AppendSuffix(index, "gpr"); } std::string GetCustomVariable(u32 index) const { - return GetDeclarationWithSuffix(index, "custom_var"); + return AppendSuffix(index, "custom_var"); } std::string GetPredicate(Tegra::Shader::Pred pred) const { - return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred"); + return AppendSuffix(static_cast<u32>(pred), "pred"); } - std::string GetInputAttribute(Attribute::Index attribute) const { - return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "input_attr"); + std::string GetGenericInputAttribute(Attribute::Index attribute) const { + return AppendSuffix(GetGenericAttributeIndex(attribute), INPUT_ATTRIBUTE_NAME); } - std::string GetOutputAttribute(Attribute::Index attribute) const { - return GetDeclarationWithSuffix(GetGenericAttributeIndex(attribute), "output_attr"); + std::unordered_map<u8, GenericVaryingDescription> varying_description; + + std::string GetGenericOutputAttribute(Attribute::Index attribute, std::size_t element) const { + const u8 offset = static_cast<u8>(GetGenericAttributeIndex(attribute) * 4 + element); + const auto& description = varying_description.at(offset); + if (description.is_scalar) { + return description.name; + } + return fmt::format("{}[{}]", description.name, element - description.first_element); } std::string GetConstBuffer(u32 index) const { - return GetDeclarationWithSuffix(index, "cbuf"); + return AppendSuffix(index, "cbuf"); } std::string GetGlobalMemory(const GlobalMemoryBase& descriptor) const { @@ -2409,7 +2491,7 @@ private: } std::string GetConstBufferBlock(u32 index) const { - return GetDeclarationWithSuffix(index, "cbuf_block"); + return AppendSuffix(index, "cbuf_block"); } std::string GetLocalMemory() const { @@ -2434,14 +2516,14 @@ private: } std::string GetSampler(const Sampler& sampler) const { - return GetDeclarationWithSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); + return AppendSuffix(static_cast<u32>(sampler.GetIndex()), "sampler"); } std::string GetImage(const Image& image) const { - return GetDeclarationWithSuffix(static_cast<u32>(image.GetIndex()), "image"); + return AppendSuffix(static_cast<u32>(image.GetIndex()), "image"); } - std::string GetDeclarationWithSuffix(u32 index, std::string_view name) const { + std::string AppendSuffix(u32 index, std::string_view name) const { if (suffix.empty()) { return fmt::format("{}{}", name, index); } else { @@ -2477,6 +2559,7 @@ private: const std::string_view identifier; const std::string_view suffix; const Header header; + std::unordered_map<u8, VaryingTFB> transform_feedback; ShaderWriter code; diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 2d3838a7a..f424e3000 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -53,6 +53,7 @@ constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> tex_format {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE, false}, // R8UI {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT, false}, // RGBA16F {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT, false}, // RGBA16U + {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT, false}, // RGBA16S {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT, false}, // RGBA16UI {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV, false}, // R11FG11FB10F {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT, false}, // RGBA32UI diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 12333e8c9..fca5e3ec0 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -5,8 +5,11 @@ #include <algorithm> #include <cstddef> #include <cstdlib> +#include <cstring> #include <memory> + #include <glad/glad.h> + #include "common/assert.h" #include "common/logging/log.h" #include "common/microprofile.h" @@ -25,6 +28,8 @@ namespace OpenGL { +namespace { + // If the size of this is too small, it ends up creating a soft cap on FPS as the renderer will have // to wait on available presentation frames. constexpr std::size_t SWAP_CHAIN_SIZE = 3; @@ -41,124 +46,6 @@ struct Frame { bool is_srgb{}; /// Framebuffer is sRGB or RGB }; -/** - * For smooth Vsync rendering, we want to always present the latest frame that the core generates, - * but also make sure that rendering happens at the pace that the frontend dictates. This is a - * helper class that the renderer uses to sync frames between the render thread and the presentation - * thread - */ -class FrameMailbox { -public: - std::mutex swap_chain_lock; - std::condition_variable present_cv; - std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; - std::queue<Frame*> free_queue; - std::deque<Frame*> present_queue; - Frame* previous_frame{}; - - FrameMailbox() { - for (auto& frame : swap_chain) { - free_queue.push(&frame); - } - } - - ~FrameMailbox() { - // lock the mutex and clear out the present and free_queues and notify any people who are - // blocked to prevent deadlock on shutdown - std::scoped_lock lock{swap_chain_lock}; - std::queue<Frame*>().swap(free_queue); - present_queue.clear(); - present_cv.notify_all(); - } - - void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { - frame->present.Release(); - frame->present.Create(); - GLint previous_draw_fbo{}; - glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); - glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, - frame->color.handle); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); - } - glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); - frame->color_reloaded = false; - } - - void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { - // Recreate the color texture attachment - frame->color.Release(); - frame->color.Create(); - const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; - glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); - - // Recreate the FBO for the render target - frame->render.Release(); - frame->render.Create(); - glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, - frame->color.handle); - if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { - LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); - } - - frame->width = width; - frame->height = height; - frame->color_reloaded = true; - } - - Frame* GetRenderFrame() { - std::unique_lock lock{swap_chain_lock}; - - // If theres no free frames, we will reuse the oldest render frame - if (free_queue.empty()) { - auto frame = present_queue.back(); - present_queue.pop_back(); - return frame; - } - - Frame* frame = free_queue.front(); - free_queue.pop(); - return frame; - } - - void ReleaseRenderFrame(Frame* frame) { - std::unique_lock lock{swap_chain_lock}; - present_queue.push_front(frame); - present_cv.notify_one(); - } - - Frame* TryGetPresentFrame(int timeout_ms) { - std::unique_lock lock{swap_chain_lock}; - // wait for new entries in the present_queue - present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), - [&] { return !present_queue.empty(); }); - if (present_queue.empty()) { - // timed out waiting for a frame to draw so return the previous frame - return previous_frame; - } - - // free the previous frame and add it back to the free queue - if (previous_frame) { - free_queue.push(previous_frame); - } - - // the newest entries are pushed to the front of the queue - Frame* frame = present_queue.front(); - present_queue.pop_front(); - // remove all old entries from the present queue and move them back to the free_queue - for (auto f : present_queue) { - free_queue.push(f); - } - present_queue.clear(); - previous_frame = frame; - return frame; - } -}; - -namespace { - constexpr char VERTEX_SHADER[] = R"( #version 430 core @@ -211,6 +98,24 @@ struct ScreenRectVertex { std::array<GLfloat, 2> tex_coord; }; +/// Returns true if any debug tool is attached +bool HasDebugTool() { + const bool nsight = std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED"); + if (nsight) { + return true; + } + + GLint num_extensions; + glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); + for (GLuint index = 0; index < static_cast<GLuint>(num_extensions); ++index) { + const auto name = reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, index)); + if (!std::strcmp(name, "GL_EXT_debug_tool")) { + return true; + } + } + return false; +} + /** * Defines a 1:1 pixel ortographic projection matrix with (0,0) on the top-left * corner and (width, height) on the lower-bottom. @@ -294,6 +199,153 @@ void APIENTRY DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severit } // Anonymous namespace +/** + * For smooth Vsync rendering, we want to always present the latest frame that the core generates, + * but also make sure that rendering happens at the pace that the frontend dictates. This is a + * helper class that the renderer uses to sync frames between the render thread and the presentation + * thread + */ +class FrameMailbox { +public: + std::mutex swap_chain_lock; + std::condition_variable present_cv; + std::array<Frame, SWAP_CHAIN_SIZE> swap_chain{}; + std::queue<Frame*> free_queue; + std::deque<Frame*> present_queue; + Frame* previous_frame{}; + + FrameMailbox() : has_debug_tool{HasDebugTool()} { + for (auto& frame : swap_chain) { + free_queue.push(&frame); + } + } + + ~FrameMailbox() { + // lock the mutex and clear out the present and free_queues and notify any people who are + // blocked to prevent deadlock on shutdown + std::scoped_lock lock{swap_chain_lock}; + std::queue<Frame*>().swap(free_queue); + present_queue.clear(); + present_cv.notify_all(); + } + + void ReloadPresentFrame(Frame* frame, u32 height, u32 width) { + frame->present.Release(); + frame->present.Create(); + GLint previous_draw_fbo{}; + glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &previous_draw_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, frame->present.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate present FBO!"); + } + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, previous_draw_fbo); + frame->color_reloaded = false; + } + + void ReloadRenderFrame(Frame* frame, u32 width, u32 height) { + // Recreate the color texture attachment + frame->color.Release(); + frame->color.Create(); + const GLenum internal_format = frame->is_srgb ? GL_SRGB8 : GL_RGB8; + glNamedRenderbufferStorage(frame->color.handle, internal_format, width, height); + + // Recreate the FBO for the render target + frame->render.Release(); + frame->render.Create(); + glBindFramebuffer(GL_FRAMEBUFFER, frame->render.handle); + glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_RENDERBUFFER, + frame->color.handle); + if (glCheckFramebufferStatus(GL_FRAMEBUFFER) != GL_FRAMEBUFFER_COMPLETE) { + LOG_CRITICAL(Render_OpenGL, "Failed to recreate render FBO!"); + } + + frame->width = width; + frame->height = height; + frame->color_reloaded = true; + } + + Frame* GetRenderFrame() { + std::unique_lock lock{swap_chain_lock}; + + // If theres no free frames, we will reuse the oldest render frame + if (free_queue.empty()) { + auto frame = present_queue.back(); + present_queue.pop_back(); + return frame; + } + + Frame* frame = free_queue.front(); + free_queue.pop(); + return frame; + } + + void ReleaseRenderFrame(Frame* frame) { + std::unique_lock lock{swap_chain_lock}; + present_queue.push_front(frame); + present_cv.notify_one(); + + DebugNotifyNextFrame(); + } + + Frame* TryGetPresentFrame(int timeout_ms) { + DebugWaitForNextFrame(); + + std::unique_lock lock{swap_chain_lock}; + // wait for new entries in the present_queue + present_cv.wait_for(lock, std::chrono::milliseconds(timeout_ms), + [&] { return !present_queue.empty(); }); + if (present_queue.empty()) { + // timed out waiting for a frame to draw so return the previous frame + return previous_frame; + } + + // free the previous frame and add it back to the free queue + if (previous_frame) { + free_queue.push(previous_frame); + } + + // the newest entries are pushed to the front of the queue + Frame* frame = present_queue.front(); + present_queue.pop_front(); + // remove all old entries from the present queue and move them back to the free_queue + for (auto f : present_queue) { + free_queue.push(f); + } + present_queue.clear(); + previous_frame = frame; + return frame; + } + +private: + std::mutex debug_synch_mutex; + std::condition_variable debug_synch_condition; + std::atomic_int frame_for_debug{}; + const bool has_debug_tool; // When true, using a GPU debugger, so keep frames in lock-step + + /// Signal that a new frame is available (called from GPU thread) + void DebugNotifyNextFrame() { + if (!has_debug_tool) { + return; + } + frame_for_debug++; + std::lock_guard lock{debug_synch_mutex}; + debug_synch_condition.notify_one(); + } + + /// Wait for a new frame to be available (called from presentation thread) + void DebugWaitForNextFrame() { + if (!has_debug_tool) { + return; + } + const int last_frame = frame_for_debug; + std::unique_lock lock{debug_synch_mutex}; + debug_synch_condition.wait(lock, + [this, last_frame] { return frame_for_debug > last_frame; }); + } +}; + RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system) : VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system}, frame_mailbox{std::make_unique<FrameMailbox>()} {} |
