diff options
| author | riperiperi <rhy3756547@hotmail.com> | 2024-07-18 00:21:32 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-07-17 20:21:32 -0300 |
| commit | 1a919e99b29fff4e2158e622cb3dfbee21293b6d (patch) | |
| tree | 00f1f3263411b96301be80b26a3b10967374328e /src/Ryujinx.Graphics.Vulkan/BarrierBatch.cs | |
| parent | f77bebac80bd2fcbee72b00845e56faf3de3bad6 (diff) | |
Vulkan: Defer guest barriers, and improve image barrier timings (#7012)
* More guarantees for buffer correct placement, defer guest requested buffers
* Split RP on indirect barrier rn
* Better handling for feedback loops.
* Qualcomm barriers suck too
* Fix condition
* Remove unused field
* Allow render pass barriers on turnip for now
Diffstat (limited to 'src/Ryujinx.Graphics.Vulkan/BarrierBatch.cs')
| -rw-r--r-- | src/Ryujinx.Graphics.Vulkan/BarrierBatch.cs | 260 |
1 files changed, 238 insertions, 22 deletions
diff --git a/src/Ryujinx.Graphics.Vulkan/BarrierBatch.cs b/src/Ryujinx.Graphics.Vulkan/BarrierBatch.cs index 24642af2..a6a006bb 100644 --- a/src/Ryujinx.Graphics.Vulkan/BarrierBatch.cs +++ b/src/Ryujinx.Graphics.Vulkan/BarrierBatch.cs @@ -1,6 +1,7 @@ using Silk.NET.Vulkan; using System; using System.Collections.Generic; +using System.Runtime.CompilerServices; namespace Ryujinx.Graphics.Vulkan { @@ -8,22 +9,64 @@ namespace Ryujinx.Graphics.Vulkan { private const int MaxBarriersPerCall = 16; + private const AccessFlags BaseAccess = AccessFlags.ShaderReadBit | AccessFlags.ShaderWriteBit; + private const AccessFlags BufferAccess = AccessFlags.IndexReadBit | AccessFlags.VertexAttributeReadBit | AccessFlags.UniformReadBit; + private const AccessFlags CommandBufferAccess = AccessFlags.IndirectCommandReadBit; + private readonly VulkanRenderer _gd; private readonly NativeArray<MemoryBarrier> _memoryBarrierBatch = new(MaxBarriersPerCall); private readonly NativeArray<BufferMemoryBarrier> _bufferBarrierBatch = new(MaxBarriersPerCall); private readonly NativeArray<ImageMemoryBarrier> _imageBarrierBatch = new(MaxBarriersPerCall); - private readonly List<BarrierWithStageFlags<MemoryBarrier>> _memoryBarriers = new(); - private readonly List<BarrierWithStageFlags<BufferMemoryBarrier>> _bufferBarriers = new(); - private readonly List<BarrierWithStageFlags<ImageMemoryBarrier>> _imageBarriers = new(); + private readonly List<BarrierWithStageFlags<MemoryBarrier, int>> _memoryBarriers = new(); + private readonly List<BarrierWithStageFlags<BufferMemoryBarrier, int>> _bufferBarriers = new(); + private readonly List<BarrierWithStageFlags<ImageMemoryBarrier, TextureStorage>> _imageBarriers = new(); private int _queuedBarrierCount; + private enum IncoherentBarrierType + { + None, + Texture, + All, + CommandBuffer + } + + private PipelineStageFlags _incoherentBufferWriteStages; + private PipelineStageFlags _incoherentTextureWriteStages; + private PipelineStageFlags _extraStages; + private IncoherentBarrierType _queuedIncoherentBarrier; + public BarrierBatch(VulkanRenderer gd) { _gd = gd; } + public static (AccessFlags Access, PipelineStageFlags Stages) GetSubpassAccessSuperset(VulkanRenderer gd) + { + AccessFlags access = BufferAccess; + PipelineStageFlags stages = PipelineStageFlags.AllGraphicsBit; + + if (gd.TransformFeedbackApi != null) + { + access |= AccessFlags.TransformFeedbackWriteBitExt; + stages |= PipelineStageFlags.TransformFeedbackBitExt; + } + + if (!gd.IsTBDR) + { + // Desktop GPUs can transform image barriers into memory barriers. + + access |= AccessFlags.DepthStencilAttachmentWriteBit | AccessFlags.ColorAttachmentWriteBit; + access |= AccessFlags.DepthStencilAttachmentReadBit | AccessFlags.ColorAttachmentReadBit; + + stages |= PipelineStageFlags.EarlyFragmentTestsBit | PipelineStageFlags.LateFragmentTestsBit; + stages |= PipelineStageFlags.ColorAttachmentOutputBit; + } + + return (access, stages); + } + private readonly record struct StageFlags : IEquatable<StageFlags> { public readonly PipelineStageFlags Source; @@ -36,47 +79,130 @@ namespace Ryujinx.Graphics.Vulkan } } - private readonly struct BarrierWithStageFlags<T> where T : unmanaged + private readonly struct BarrierWithStageFlags<T, T2> where T : unmanaged { public readonly StageFlags Flags; public readonly T Barrier; + public readonly T2 Resource; public BarrierWithStageFlags(StageFlags flags, T barrier) { Flags = flags; Barrier = barrier; + Resource = default; } - public BarrierWithStageFlags(PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags, T barrier) + public BarrierWithStageFlags(PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags, T barrier, T2 resource) { Flags = new StageFlags(srcStageFlags, dstStageFlags); Barrier = barrier; + Resource = resource; } } - private void QueueBarrier<T>(List<BarrierWithStageFlags<T>> list, T barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) where T : unmanaged + private void QueueBarrier<T, T2>(List<BarrierWithStageFlags<T, T2>> list, T barrier, T2 resource, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) where T : unmanaged { - list.Add(new BarrierWithStageFlags<T>(srcStageFlags, dstStageFlags, barrier)); + list.Add(new BarrierWithStageFlags<T, T2>(srcStageFlags, dstStageFlags, barrier, resource)); _queuedBarrierCount++; } public void QueueBarrier(MemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) { - QueueBarrier(_memoryBarriers, barrier, srcStageFlags, dstStageFlags); + QueueBarrier(_memoryBarriers, barrier, default, srcStageFlags, dstStageFlags); } public void QueueBarrier(BufferMemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) { - QueueBarrier(_bufferBarriers, barrier, srcStageFlags, dstStageFlags); + QueueBarrier(_bufferBarriers, barrier, default, srcStageFlags, dstStageFlags); } - public void QueueBarrier(ImageMemoryBarrier barrier, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) + public void QueueBarrier(ImageMemoryBarrier barrier, TextureStorage resource, PipelineStageFlags srcStageFlags, PipelineStageFlags dstStageFlags) { - QueueBarrier(_imageBarriers, barrier, srcStageFlags, dstStageFlags); + QueueBarrier(_imageBarriers, barrier, resource, srcStageFlags, dstStageFlags); } - public unsafe void Flush(CommandBuffer cb, bool insideRenderPass, Action endRenderPass) + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public unsafe void FlushMemoryBarrier(ShaderCollection program, bool inRenderPass) { + if (_queuedIncoherentBarrier > IncoherentBarrierType.None) + { + // We should emit a memory barrier if there's a write access in the program (current program, or program since last barrier) + bool hasTextureWrite = _incoherentTextureWriteStages != PipelineStageFlags.None; + bool hasBufferWrite = _incoherentBufferWriteStages != PipelineStageFlags.None; + bool hasBufferBarrier = _queuedIncoherentBarrier > IncoherentBarrierType.Texture; + + if (hasTextureWrite || (hasBufferBarrier && hasBufferWrite)) + { + AccessFlags access = BaseAccess; + + PipelineStageFlags stages = inRenderPass ? PipelineStageFlags.AllGraphicsBit : PipelineStageFlags.AllCommandsBit; + + if (hasBufferBarrier && hasBufferWrite) + { + access |= BufferAccess; + + if (_gd.TransformFeedbackApi != null) + { + access |= AccessFlags.TransformFeedbackWriteBitExt; + stages |= PipelineStageFlags.TransformFeedbackBitExt; + } + } + + if (_queuedIncoherentBarrier == IncoherentBarrierType.CommandBuffer) + { + access |= CommandBufferAccess; + stages |= PipelineStageFlags.DrawIndirectBit; + } + + MemoryBarrier barrier = new MemoryBarrier() + { + SType = StructureType.MemoryBarrier, + SrcAccessMask = access, + DstAccessMask = access + }; + + QueueBarrier(barrier, stages, stages); + + _incoherentTextureWriteStages = program?.IncoherentTextureWriteStages ?? PipelineStageFlags.None; + + if (_queuedIncoherentBarrier > IncoherentBarrierType.Texture) + { + if (program != null) + { + _incoherentBufferWriteStages = program.IncoherentBufferWriteStages | _extraStages; + } + else + { + _incoherentBufferWriteStages = PipelineStageFlags.None; + } + } + + _queuedIncoherentBarrier = IncoherentBarrierType.None; + } + } + } + + public unsafe void Flush(CommandBufferScoped cbs, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass) + { + Flush(cbs, null, inRenderPass, rpHolder, endRenderPass); + } + + public unsafe void Flush(CommandBufferScoped cbs, ShaderCollection program, bool inRenderPass, RenderPassHolder rpHolder, Action endRenderPass) + { + if (program != null) + { + _incoherentBufferWriteStages |= program.IncoherentBufferWriteStages | _extraStages; + _incoherentTextureWriteStages |= program.IncoherentTextureWriteStages; + } + + FlushMemoryBarrier(program, inRenderPass); + + if (!inRenderPass && rpHolder != null) + { + // Render pass is about to begin. Queue any fences that normally interrupt the pass. + rpHolder.InsertForcedFences(cbs); + } + while (_queuedBarrierCount > 0) { int memoryCount = 0; @@ -86,20 +212,20 @@ namespace Ryujinx.Graphics.Vulkan bool hasBarrier = false; StageFlags flags = default; - static void AddBarriers<T>( + static void AddBarriers<T, T2>( Span<T> target, ref int queuedBarrierCount, ref bool hasBarrier, ref StageFlags flags, ref int count, - List<BarrierWithStageFlags<T>> list) where T : unmanaged + List<BarrierWithStageFlags<T, T2>> list) where T : unmanaged { int firstMatch = -1; int end = list.Count; for (int i = 0; i < list.Count; i++) { - BarrierWithStageFlags<T> barrier = list[i]; + BarrierWithStageFlags<T, T2> barrier = list[i]; if (!hasBarrier) { @@ -162,21 +288,60 @@ namespace Ryujinx.Graphics.Vulkan } } - if (insideRenderPass) + if (inRenderPass && _imageBarriers.Count > 0) { // Image barriers queued in the batch are meant to be globally scoped, // but inside a render pass they're scoped to just the range of the render pass. // On MoltenVK, we just break the rules and always use image barrier. // On desktop GPUs, all barriers are globally scoped, so we just replace it with a generic memory barrier. - // TODO: On certain GPUs, we need to split render pass so the barrier scope is global. When this is done, - // notify the resource that it should add a barrier as soon as a render pass ends to avoid this in future. + // Generally, we want to avoid this from happening in the future, so flag the texture to immediately + // emit a barrier whenever the current render pass is bound again. + + bool anyIsNonAttachment = false; + + foreach (BarrierWithStageFlags<ImageMemoryBarrier, TextureStorage> barrier in _imageBarriers) + { + // If the binding is an attachment, don't add it as a forced fence. + bool isAttachment = rpHolder.ContainsAttachment(barrier.Resource); + + if (!isAttachment) + { + rpHolder.AddForcedFence(barrier.Resource, barrier.Flags.Dest); + anyIsNonAttachment = true; + } + } + + if (_gd.IsTBDR) + { + if (!_gd.IsMoltenVk) + { + if (!anyIsNonAttachment) + { + // This case is a feedback loop. To prevent this from causing an absolute performance disaster, + // remove the barriers entirely. + // If this is not here, there will be a lot of single draw render passes. + // TODO: explicit handling for feedback loops, likely outside this class. - if (!_gd.IsMoltenVk) + _queuedBarrierCount -= _imageBarriers.Count; + _imageBarriers.Clear(); + } + else + { + // TBDR GPUs are sensitive to barriers, so we need to end the pass to ensure the data is available. + // Metal already has hazard tracking so MVK doesn't need this. + endRenderPass(); + inRenderPass = false; + } + } + } + else { + // Generic pipeline memory barriers will work for desktop GPUs. + // They do require a few more access flags on the subpass dependency, though. foreach (var barrier in _imageBarriers) { - _memoryBarriers.Add(new BarrierWithStageFlags<MemoryBarrier>( + _memoryBarriers.Add(new BarrierWithStageFlags<MemoryBarrier, int>( barrier.Flags, new MemoryBarrier() { @@ -190,6 +355,22 @@ namespace Ryujinx.Graphics.Vulkan } } + if (inRenderPass && _memoryBarriers.Count > 0) + { + PipelineStageFlags allFlags = PipelineStageFlags.None; + + foreach (var barrier in _memoryBarriers) + { + allFlags |= barrier.Flags.Dest; + } + + if (allFlags.HasFlag(PipelineStageFlags.DrawIndirectBit) || !_gd.SupportsRenderPassBarrier(allFlags)) + { + endRenderPass(); + inRenderPass = false; + } + } + AddBarriers(_memoryBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref memoryCount, _memoryBarriers); AddBarriers(_bufferBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref bufferCount, _bufferBarriers); AddBarriers(_imageBarrierBatch.AsSpan(), ref _queuedBarrierCount, ref hasBarrier, ref flags, ref imageCount, _imageBarriers); @@ -198,14 +379,14 @@ namespace Ryujinx.Graphics.Vulkan { PipelineStageFlags srcStageFlags = flags.Source; - if (insideRenderPass) + if (inRenderPass) { // Inside a render pass, barrier stages can only be from rasterization. srcStageFlags &= ~PipelineStageFlags.ComputeShaderBit; } _gd.Api.CmdPipelineBarrier( - cb, + cbs.CommandBuffer, srcStageFlags, flags.Dest, 0, @@ -219,6 +400,41 @@ namespace Ryujinx.Graphics.Vulkan } } + private void QueueIncoherentBarrier(IncoherentBarrierType type) + { + if (type > _queuedIncoherentBarrier) + { + _queuedIncoherentBarrier = type; + } + } + + public void QueueTextureBarrier() + { + QueueIncoherentBarrier(IncoherentBarrierType.Texture); + } + + public void QueueMemoryBarrier() + { + QueueIncoherentBarrier(IncoherentBarrierType.All); + } + + public void QueueCommandBufferBarrier() + { + QueueIncoherentBarrier(IncoherentBarrierType.CommandBuffer); + } + + public void EnableTfbBarriers(bool enable) + { + if (enable) + { + _extraStages |= PipelineStageFlags.TransformFeedbackBitExt; + } + else + { + _extraStages &= ~PipelineStageFlags.TransformFeedbackBitExt; + } + } + public void Dispose() { _memoryBarrierBatch.Dispose(); |
