From d92fff541bf6fddadabf6ab628ddf8fec41cd52e Mon Sep 17 00:00:00 2001 From: riperiperi Date: Wed, 29 Sep 2021 01:27:03 +0100 Subject: Replace CacheResourceWrite with more general "precise" write (#2684) * Replace CacheResourceWrite with more general "precise" write The goal of CacheResourceWrite was to notify GPU resources when they were modified directly, by looking up the modified address/size in a structure and calling a method on each resource. The downside of this is that each resource cache has to be queried individually, they all have to implement their own way to do this, and it can only signal to resources using the same PhysicalMemory instance. This PR adds the ability to signal a write as "precise" on the tracking, which signals a special handler (if present) which can be used to avoid unnecessary flush actions, or maybe even more. For buffers, precise writes specifically do not flush, and instead punch a hole in the modified range list to indicate that the data on GPU has been replaced. The downside is that precise actions must ignore the page protection bits and always signal - as they need to notify the target resource to ignore the sequence number optimization. I had to reintroduce the sequence number increment after I2M, as removing it was causing issues in rabbids kingdom battle. However - all resources modified by I2M are notified directly to lower their sequence number, so the problem is likely that another unrelated resource is not being properly updated. Thankfully, doing this does not affect performance in the games I tested. This should fix regressions from #2624. Test any games that were broken by that. (RF4, rabbids kingdom battle) I've also added a sequence number increment to ThreedClass.IncrementSyncpoint, as it seems to fix buffer corruption in OpenGL homebrew. (this was a regression from removing sequence number increment from constant buffer update - another unrelated resource thing) * Add tests. * Add XML docs for GpuRegionHandle * Skip UpdateProtection if only precise actions were called This allows precise actions to skip reprotection costs. --- .../Engine/InlineToMemory/InlineToMemoryClass.cs | 3 +- Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs | 1 + Ryujinx.Graphics.Gpu/Image/Pool.cs | 19 +++++++++++ Ryujinx.Graphics.Gpu/Image/SamplerPool.cs | 6 ++-- Ryujinx.Graphics.Gpu/Image/TextureCache.cs | 12 ------- Ryujinx.Graphics.Gpu/Image/TexturePool.cs | 5 ++- Ryujinx.Graphics.Gpu/Memory/Buffer.cs | 37 ++++++++++++++++++++++ Ryujinx.Graphics.Gpu/Memory/GpuRegionHandle.cs | 34 ++++++++++++++++++++ Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs | 10 ++++++ Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs | 33 +++++++------------ 10 files changed, 118 insertions(+), 42 deletions(-) (limited to 'Ryujinx.Graphics.Gpu') diff --git a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs index e3e8d5ba..9649841f 100644 --- a/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs @@ -171,7 +171,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory if (_isLinear && _lineCount == 1) { - memoryManager.Physical.CacheResourceWrite(memoryManager, _dstGpuVa, data); + memoryManager.WriteTrackedResource(_dstGpuVa, data); + _context.AdvanceSequence(); } else { diff --git a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs index d4f228e9..52637c20 100644 --- a/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs +++ b/Ryujinx.Graphics.Gpu/Engine/Threed/ThreedClass.cs @@ -211,6 +211,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed { uint syncpointId = (uint)argument & 0xFFFF; + _context.AdvanceSequence(); _context.CreateHostSyncIfNeeded(); _context.Renderer.UpdateCounters(); // Poll the query counters, the game may want an updated result. _context.Synchronization.IncrementSyncpoint(syncpointId); diff --git a/Ryujinx.Graphics.Gpu/Image/Pool.cs b/Ryujinx.Graphics.Gpu/Image/Pool.cs index a06a7ccf..f54ce1d7 100644 --- a/Ryujinx.Graphics.Gpu/Image/Pool.cs +++ b/Ryujinx.Graphics.Gpu/Image/Pool.cs @@ -15,6 +15,7 @@ namespace Ryujinx.Graphics.Gpu.Image protected GpuContext Context; protected PhysicalMemory PhysicalMemory; + protected int SequenceNumber; protected T1[] Items; protected T2[] DescriptorCache; @@ -64,6 +65,7 @@ namespace Ryujinx.Graphics.Gpu.Image Size = size; _memoryTracking = physicalMemory.BeginGranularTracking(address, size); + _memoryTracking.RegisterPreciseAction(address, size, PreciseAction); _modifiedDelegate = RegionModified; } @@ -116,6 +118,23 @@ namespace Ryujinx.Graphics.Gpu.Image InvalidateRangeImpl(mAddress, mSize); } + /// + /// An action to be performed when a precise memory access occurs to this resource. + /// Makes sure that the dirty flags are checked. + /// + /// Address of the memory action + /// Size in bytes + /// True if the access was a write, false otherwise + private bool PreciseAction(ulong address, ulong size, bool write) + { + if (write && Context.SequenceNumber == SequenceNumber) + { + SequenceNumber--; + } + + return false; + } + protected abstract void InvalidateRangeImpl(ulong address, ulong size); protected abstract void Delete(T1 item); diff --git a/Ryujinx.Graphics.Gpu/Image/SamplerPool.cs b/Ryujinx.Graphics.Gpu/Image/SamplerPool.cs index aed6cb9c..5a84bd84 100644 --- a/Ryujinx.Graphics.Gpu/Image/SamplerPool.cs +++ b/Ryujinx.Graphics.Gpu/Image/SamplerPool.cs @@ -7,8 +7,6 @@ namespace Ryujinx.Graphics.Gpu.Image /// class SamplerPool : Pool { - private int _sequenceNumber; - /// /// Constructs a new instance of the sampler pool. /// @@ -30,9 +28,9 @@ namespace Ryujinx.Graphics.Gpu.Image return null; } - if (_sequenceNumber != Context.SequenceNumber) + if (SequenceNumber != Context.SequenceNumber) { - _sequenceNumber = Context.SequenceNumber; + SequenceNumber = Context.SequenceNumber; SynchronizeMemory(); } diff --git a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs index cc6867a6..1aa09b90 100644 --- a/Ryujinx.Graphics.Gpu/Image/TextureCache.cs +++ b/Ryujinx.Graphics.Gpu/Image/TextureCache.cs @@ -99,18 +99,6 @@ namespace Ryujinx.Graphics.Gpu.Image return TextureScaleMode.Blacklisted; } - /// - /// Determines if any texture exists within the target memory range. - /// - /// The GPU memory manager - /// GPU virtual address to search for textures - /// The size of the range - /// True if any texture exists in the range, false otherwise - public bool IsTextureInRange(MemoryManager memoryManager, ulong gpuVa, ulong size) - { - return _textures.FindOverlaps(memoryManager.GetPhysicalRegions(gpuVa, size), ref _textureOverlaps) != 0; - } - /// /// Determines if a given texture is "safe" for upscaling from its info. /// Note that this is different from being compatible - this elilinates targets that would have detrimental effects when scaled. diff --git a/Ryujinx.Graphics.Gpu/Image/TexturePool.cs b/Ryujinx.Graphics.Gpu/Image/TexturePool.cs index 5b5c5ab0..66cd9d4d 100644 --- a/Ryujinx.Graphics.Gpu/Image/TexturePool.cs +++ b/Ryujinx.Graphics.Gpu/Image/TexturePool.cs @@ -12,7 +12,6 @@ namespace Ryujinx.Graphics.Gpu.Image /// class TexturePool : Pool { - private int _sequenceNumber; private readonly GpuChannel _channel; private readonly ConcurrentQueue _dereferenceQueue = new ConcurrentQueue(); @@ -45,9 +44,9 @@ namespace Ryujinx.Graphics.Gpu.Image return null; } - if (_sequenceNumber != Context.SequenceNumber) + if (SequenceNumber != Context.SequenceNumber) { - _sequenceNumber = Context.SequenceNumber; + SequenceNumber = Context.SequenceNumber; SynchronizeMemory(); } diff --git a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs index af69e693..76125e31 100644 --- a/Ryujinx.Graphics.Gpu/Memory/Buffer.cs +++ b/Ryujinx.Graphics.Gpu/Memory/Buffer.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common.Logging; using Ryujinx.Cpu.Tracking; using Ryujinx.Graphics.GAL; using Ryujinx.Memory.Range; @@ -104,6 +105,8 @@ namespace Ryujinx.Graphics.Gpu.Memory if (_useGranular) { _memoryTrackingGranular = physicalMemory.BeginGranularTracking(address, size, baseHandles); + + _memoryTrackingGranular.RegisterPreciseAction(address, size, PreciseAction); } else { @@ -123,6 +126,8 @@ namespace Ryujinx.Graphics.Gpu.Memory handle.Dispose(); } } + + _memoryTracking.RegisterPreciseAction(PreciseAction); } _externalFlushDelegate = new RegionSignal(ExternalFlush); @@ -452,6 +457,38 @@ namespace Ryujinx.Graphics.Gpu.Memory }, true); } + /// + /// An action to be performed when a precise memory access occurs to this resource. + /// For buffers, this skips flush-on-write by punching holes directly into the modified range list. + /// + /// Address of the memory action + /// Size in bytes + /// True if the access was a write, false otherwise + private bool PreciseAction(ulong address, ulong size, bool write) + { + if (!write) + { + // We only want to skip flush-on-write. + return false; + } + + if (address < Address) + { + address = Address; + } + + ulong maxSize = Address + Size - address; + + if (size > maxSize) + { + size = maxSize; + } + + ForceDirty(address, size); + + return true; + } + /// /// Called when part of the memory for this buffer has been unmapped. /// Calls are from non-GPU threads. diff --git a/Ryujinx.Graphics.Gpu/Memory/GpuRegionHandle.cs b/Ryujinx.Graphics.Gpu/Memory/GpuRegionHandle.cs index 8a9c6767..bc07bfad 100644 --- a/Ryujinx.Graphics.Gpu/Memory/GpuRegionHandle.cs +++ b/Ryujinx.Graphics.Gpu/Memory/GpuRegionHandle.cs @@ -4,6 +4,9 @@ using System; namespace Ryujinx.Graphics.Gpu.Memory { + /// + /// A tracking handle for a region of GPU VA, represented by one or more tracking handles in CPU VA. + /// class GpuRegionHandle : IRegionHandle { private readonly CpuRegionHandle[] _cpuRegionHandles; @@ -28,11 +31,18 @@ namespace Ryujinx.Graphics.Gpu.Memory public ulong Size => throw new NotSupportedException(); public ulong EndAddress => throw new NotSupportedException(); + /// + /// Create a new GpuRegionHandle, made up of mulitple CpuRegionHandles. + /// + /// The CpuRegionHandles that make up this handle public GpuRegionHandle(CpuRegionHandle[] cpuRegionHandles) { _cpuRegionHandles = cpuRegionHandles; } + /// + /// Dispose the child handles. + /// public void Dispose() { foreach (var regionHandle in _cpuRegionHandles) @@ -41,6 +51,11 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Register an action to perform when the tracked region is read or written. + /// The action is automatically removed after it runs. + /// + /// Action to call on read or write public void RegisterAction(RegionSignal action) { foreach (var regionHandle in _cpuRegionHandles) @@ -49,6 +64,22 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Register an action to perform when a precise access occurs (one with exact address and size). + /// If the action returns true, read/write tracking are skipped. + /// + /// Action to call on read or write + public void RegisterPreciseAction(PreciseRegionSignal action) + { + foreach (var regionHandle in _cpuRegionHandles) + { + regionHandle.RegisterPreciseAction(action); + } + } + + /// + /// Consume the dirty flag for the handles, and reprotect so it can be set on the next write. + /// public void Reprotect(bool asDirty = false) { foreach (var regionHandle in _cpuRegionHandles) @@ -57,6 +88,9 @@ namespace Ryujinx.Graphics.Gpu.Memory } } + /// + /// Force the handles to be dirty, without reprotecting. + /// public void ForceDirty() { foreach (var regionHandle in _cpuRegionHandles) diff --git a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs index 2dc1edd2..3968cb96 100644 --- a/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs +++ b/Ryujinx.Graphics.Gpu/Memory/MemoryManager.cs @@ -194,6 +194,16 @@ namespace Ryujinx.Graphics.Gpu.Memory WriteImpl(va, data, Physical.Write); } + /// + /// Writes data to GPU mapped memory, destined for a tracked resource. + /// + /// GPU virtual address to write the data into + /// The data to be written + public void WriteTrackedResource(ulong va, ReadOnlySpan data) + { + WriteImpl(va, data, Physical.WriteTrackedResource); + } + /// /// Writes data to GPU mapped memory without write tracking. /// diff --git a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs index 0ec41a8f..d292fab0 100644 --- a/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs +++ b/Ryujinx.Graphics.Gpu/Memory/PhysicalMemory.cs @@ -80,28 +80,6 @@ namespace Ryujinx.Graphics.Gpu.Memory } } - /// - /// Write data to memory that is destined for a resource in a cache. - /// This avoids triggering write tracking when possible, which can avoid flushes and incrementing sequence number. - /// - /// The GPU memory manager - /// GPU virtual address to write the data into - /// The data to be written - public void CacheResourceWrite(MemoryManager memoryManager, ulong gpuVa, ReadOnlySpan data) - { - if (TextureCache.IsTextureInRange(memoryManager, gpuVa, (ulong)data.Length)) - { - // No fast path yet - copy the data back and trigger write tracking. - memoryManager.Write(gpuVa, data); - _context.AdvanceSequence(); - } - else - { - BufferCache.ForceDirty(memoryManager, gpuVa, (ulong)data.Length); - memoryManager.WriteUntracked(gpuVa, data); - } - } - /// /// Gets a span of data from the application process. /// @@ -179,6 +157,17 @@ namespace Ryujinx.Graphics.Gpu.Memory return _cpuMemory.ReadTracked(address); } + /// + /// Writes data to the application process, triggering a precise memory tracking event. + /// + /// Address to write into + /// Data to be written + public void WriteTrackedResource(ulong address, ReadOnlySpan data) + { + _cpuMemory.SignalMemoryTracking(address, (ulong)data.Length, true, precise: true); + _cpuMemory.WriteUntracked(address, data); + } + /// /// Writes data to the application process. /// -- cgit v1.2.3