diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2021-09-28 19:43:40 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-09-29 00:43:40 +0200 |
| commit | f4f496cb48a59aae36e3252baa90396e1bfadd2e (patch) | |
| tree | 5594d76b3f1b552f1fecdeda37bd2f6667781a56 /Ryujinx.Graphics.Nvdec | |
| parent | 0d23504e30395ba20d1704da464b41f3fe539062 (diff) | |
NVDEC (H264): Use separate contexts per channel and decode frames in DTS order (#2671)
* Use separate NVDEC contexts per channel (for FFMPEG)
* Remove NVDEC -> VIC frame override hack
* Add missing bottom_field_pic_order_in_frame_present_flag
* Make FFMPEG logging static
* nit: Remove empty lines
* New FFMPEG decoding approach -- call h264_decode_frame directly, trim surface cache to reduce memory usage
* Fix case
* Silence warnings
* PR feedback
* Per-decoder rather than per-codec ownership of surfaces on the cache
Diffstat (limited to 'Ryujinx.Graphics.Nvdec')
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/H264Decoder.cs | 21 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs | 169 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs | 21 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/NvdecDevice.cs | 45 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/NvdecStatus.cs | 16 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs | 18 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs | 13 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs | 1 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Nvdec/Vp9Decoder.cs | 6 |
9 files changed, 196 insertions, 114 deletions
diff --git a/Ryujinx.Graphics.Nvdec/H264Decoder.cs b/Ryujinx.Graphics.Nvdec/H264Decoder.cs index 57ce12d0..1ee3997b 100644 --- a/Ryujinx.Graphics.Nvdec/H264Decoder.cs +++ b/Ryujinx.Graphics.Nvdec/H264Decoder.cs @@ -10,9 +10,7 @@ namespace Ryujinx.Graphics.Nvdec { private const int MbSizeInPixels = 16; - private static readonly Decoder _decoder = new Decoder(); - - public unsafe static void Decode(NvdecDevice device, ResourceManager rm, ref NvdecRegisters state) + public unsafe static void Decode(NvdecDecoderContext context, ResourceManager rm, ref NvdecRegisters state) { PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetPictureInfoOffset); H264PictureInfo info = pictureInfo.Convert(); @@ -22,19 +20,18 @@ namespace Ryujinx.Graphics.Nvdec int width = (int)pictureInfo.PicWidthInMbs * MbSizeInPixels; int height = (int)pictureInfo.PicHeightInMbs * MbSizeInPixels; - ISurface outputSurface = rm.Cache.Get(_decoder, CodecId.H264, 0, 0, width, height); + int surfaceIndex = (int)pictureInfo.OutputSurfaceIndex; - if (_decoder.Decode(ref info, outputSurface, bitstream)) - { - int li = (int)pictureInfo.LumaOutputSurfaceIndex; - int ci = (int)pictureInfo.ChromaOutputSurfaceIndex; + uint lumaOffset = state.SetSurfaceLumaOffset[surfaceIndex]; + uint chromaOffset = state.SetSurfaceChromaOffset[surfaceIndex]; - uint lumaOffset = state.SetSurfaceLumaOffset[li]; - uint chromaOffset = state.SetSurfaceChromaOffset[ci]; + Decoder decoder = context.GetDecoder(); - SurfaceWriter.Write(rm.Gmm, outputSurface, lumaOffset, chromaOffset); + ISurface outputSurface = rm.Cache.Get(decoder, 0, 0, width, height); - device.OnFrameDecoded(CodecId.H264, lumaOffset, chromaOffset); + if (decoder.Decode(ref info, outputSurface, bitstream)) + { + SurfaceWriter.Write(rm.Gmm, outputSurface, lumaOffset, chromaOffset); } rm.Cache.Put(outputSurface); diff --git a/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs b/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs index c362185f..dc119673 100644 --- a/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs +++ b/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs @@ -21,7 +21,7 @@ namespace Ryujinx.Graphics.Nvdec.Image public uint ChromaOffset; public int Width; public int Height; - public CodecId CodecId; + public IDecoder Owner; public ISurface Surface; } @@ -34,104 +34,110 @@ namespace Ryujinx.Graphics.Nvdec.Image _gmm = gmm; } - public ISurface Get(IDecoder decoder, CodecId codecId, uint lumaOffset, uint chromaOffset, int width, int height) + public ISurface Get(IDecoder decoder, uint lumaOffset, uint chromaOffset, int width, int height) { - ISurface surface = null; - - // Try to find a compatible surface with same parameters, and same offsets. - for (int i = 0; i < MaxItems; i++) + lock (_pool) { - ref CacheItem item = ref _pool[i]; + ISurface surface = null; - if (item.LumaOffset == lumaOffset && - item.ChromaOffset == chromaOffset && - item.CodecId == codecId && - item.Width == width && - item.Height == height) - { - item.ReferenceCount++; - surface = item.Surface; - MoveToFront(i); - break; - } - } - - // If we failed to find a perfect match, now ignore the offsets. - // Search backwards to replace the oldest compatible surface, - // this avoids thrashing frquently used surfaces. - // Now we need to ensure that the surface is not in use, as we'll change the data. - if (surface == null) - { - for (int i = MaxItems - 1; i >= 0; i--) + // Try to find a compatible surface with same parameters, and same offsets. + for (int i = 0; i < MaxItems; i++) { ref CacheItem item = ref _pool[i]; - if (item.ReferenceCount == 0 && item.CodecId == codecId && item.Width == width && item.Height == height) + if (item.LumaOffset == lumaOffset && + item.ChromaOffset == chromaOffset && + item.Owner == decoder && + item.Width == width && + item.Height == height) { - item.ReferenceCount = 1; - item.LumaOffset = lumaOffset; - item.ChromaOffset = chromaOffset; + item.ReferenceCount++; surface = item.Surface; - - if ((lumaOffset | chromaOffset) != 0) - { - SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); - } - MoveToFront(i); break; } } - } - // If everything else failed, we try to create a new surface, - // and insert it on the pool. We replace the oldest item on the - // pool to avoid thrashing frequently used surfaces. - // If even the oldest item is in use, that means that the entire pool - // is in use, in that case we throw as there's no place to insert - // the new surface. - if (surface == null) - { - if (_pool[MaxItems - 1].ReferenceCount == 0) + // If we failed to find a perfect match, now ignore the offsets. + // Search backwards to replace the oldest compatible surface, + // this avoids thrashing frequently used surfaces. + // Now we need to ensure that the surface is not in use, as we'll change the data. + if (surface == null) { - surface = decoder.CreateSurface(width, height); - - if ((lumaOffset | chromaOffset) != 0) + for (int i = MaxItems - 1; i >= 0; i--) { - SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); - } + ref CacheItem item = ref _pool[i]; - MoveToFront(MaxItems - 1); - ref CacheItem item = ref _pool[0]; - item.Surface?.Dispose(); - item.ReferenceCount = 1; - item.LumaOffset = lumaOffset; - item.ChromaOffset = chromaOffset; - item.Width = width; - item.Height = height; - item.CodecId = codecId; - item.Surface = surface; + if (item.ReferenceCount == 0 && item.Owner == decoder && item.Width == width && item.Height == height) + { + item.ReferenceCount = 1; + item.LumaOffset = lumaOffset; + item.ChromaOffset = chromaOffset; + surface = item.Surface; + + if ((lumaOffset | chromaOffset) != 0) + { + SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); + } + + MoveToFront(i); + break; + } + } } - else + + // If everything else failed, we try to create a new surface, + // and insert it on the pool. We replace the oldest item on the + // pool to avoid thrashing frequently used surfaces. + // If even the oldest item is in use, that means that the entire pool + // is in use, in that case we throw as there's no place to insert + // the new surface. + if (surface == null) { - throw new InvalidOperationException("No free slot on the surface pool."); + if (_pool[MaxItems - 1].ReferenceCount == 0) + { + surface = decoder.CreateSurface(width, height); + + if ((lumaOffset | chromaOffset) != 0) + { + SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); + } + + MoveToFront(MaxItems - 1); + ref CacheItem item = ref _pool[0]; + item.Surface?.Dispose(); + item.ReferenceCount = 1; + item.LumaOffset = lumaOffset; + item.ChromaOffset = chromaOffset; + item.Width = width; + item.Height = height; + item.Owner = decoder; + item.Surface = surface; + } + else + { + throw new InvalidOperationException("No free slot on the surface pool."); + } } - } - return surface; + return surface; + } } public void Put(ISurface surface) { - for (int i = 0; i < MaxItems; i++) + lock (_pool) { - ref CacheItem item = ref _pool[i]; - - if (item.Surface == surface) + for (int i = 0; i < MaxItems; i++) { - item.ReferenceCount--; - Debug.Assert(item.ReferenceCount >= 0); - break; + ref CacheItem item = ref _pool[i]; + + if (item.Surface == surface) + { + item.ReferenceCount--; + Debug.Assert(item.ReferenceCount >= 0); + break; + } } } } @@ -147,5 +153,22 @@ namespace Ryujinx.Graphics.Nvdec.Image _pool[0] = temp; } } + + public void Trim() + { + lock (_pool) + { + for (int i = 0; i < MaxItems; i++) + { + ref CacheItem item = ref _pool[i]; + + if (item.ReferenceCount == 0) + { + item.Surface?.Dispose(); + item = default; + } + } + } + } } } diff --git a/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs b/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs new file mode 100644 index 00000000..90da0bee --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs @@ -0,0 +1,21 @@ +using Ryujinx.Graphics.Nvdec.H264; +using System; + +namespace Ryujinx.Graphics.Nvdec +{ + class NvdecDecoderContext : IDisposable + { + private Decoder _decoder; + + public Decoder GetDecoder() + { + return _decoder ??= new Decoder(); + } + + public void Dispose() + { + _decoder?.Dispose(); + _decoder = null; + } + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/NvdecDevice.cs b/Ryujinx.Graphics.Nvdec/NvdecDevice.cs index 08f802a1..5319429b 100644 --- a/Ryujinx.Graphics.Nvdec/NvdecDevice.cs +++ b/Ryujinx.Graphics.Nvdec/NvdecDevice.cs @@ -2,17 +2,20 @@ using Ryujinx.Graphics.Device; using Ryujinx.Graphics.Gpu.Memory; using Ryujinx.Graphics.Nvdec.Image; -using System; +using System.Collections.Concurrent; using System.Collections.Generic; +using System.Threading; namespace Ryujinx.Graphics.Nvdec { - public class NvdecDevice : IDeviceState + public class NvdecDevice : IDeviceStateWithContext { private readonly ResourceManager _rm; private readonly DeviceState<NvdecRegisters> _state; - public event Action<FrameDecodedEventArgs> FrameDecoded; + private long _currentId; + private ConcurrentDictionary<long, NvdecDecoderContext> _contexts; + private NvdecDecoderContext _currentContext; public NvdecDevice(MemoryManager gmm) { @@ -21,6 +24,33 @@ namespace Ryujinx.Graphics.Nvdec { { nameof(NvdecRegisters.Execute), new RwCallback(Execute, null) } }); + _contexts = new ConcurrentDictionary<long, NvdecDecoderContext>(); + } + + public long CreateContext() + { + long id = Interlocked.Increment(ref _currentId); + _contexts.TryAdd(id, new NvdecDecoderContext()); + + return id; + } + + public void DestroyContext(long id) + { + if (_contexts.TryRemove(id, out var context)) + { + context.Dispose(); + } + + _rm.Cache.Trim(); + } + + public void BindContext(long id) + { + if (_contexts.TryGetValue(id, out var context)) + { + _currentContext = context; + } } public int Read(int offset) => _state.Read(offset); @@ -36,20 +66,15 @@ namespace Ryujinx.Graphics.Nvdec switch (codecId) { case CodecId.H264: - H264Decoder.Decode(this, _rm, ref _state.State); + H264Decoder.Decode(_currentContext, _rm, ref _state.State); break; case CodecId.Vp9: - Vp9Decoder.Decode(this, _rm, ref _state.State); + Vp9Decoder.Decode(_rm, ref _state.State); break; default: Logger.Error?.Print(LogClass.Nvdec, $"Unsupported codec \"{codecId}\"."); break; } } - - internal void OnFrameDecoded(CodecId codecId, uint lumaOffset, uint chromaOffset) - { - FrameDecoded?.Invoke(new FrameDecodedEventArgs(codecId, lumaOffset, chromaOffset)); - } } } diff --git a/Ryujinx.Graphics.Nvdec/NvdecStatus.cs b/Ryujinx.Graphics.Nvdec/NvdecStatus.cs new file mode 100644 index 00000000..0712af88 --- /dev/null +++ b/Ryujinx.Graphics.Nvdec/NvdecStatus.cs @@ -0,0 +1,16 @@ +using Ryujinx.Graphics.Nvdec.Types.Vp9; + +namespace Ryujinx.Graphics.Nvdec +{ + struct NvdecStatus + { +#pragma warning disable CS0649 + public uint MbsCorrectlyDecoded; + public uint MbsInError; + public uint Reserved; + public uint ErrorStatus; + public FrameStats Stats; + public uint SliceHeaderErrorCode; +#pragma warning restore CS0649 + } +}
\ No newline at end of file diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs index 86570342..326c40ae 100644 --- a/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs +++ b/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs @@ -26,13 +26,13 @@ namespace Ryujinx.Graphics.Nvdec.Types.H264 public uint Transform8x8ModeFlag; public uint LumaPitch; public uint ChromaPitch; - public uint Unknown94; - public uint LumaSecondFieldOffset; - public uint Unknown9C; - public uint UnknownA0; - public uint ChromaSecondFieldOffset; - public uint UnknownA8; - public uint UnknownAC; + public uint LumaTopOffset; + public uint LumaBottomOffset; + public uint LumaFrameOffset; + public uint ChromaTopOffset; + public uint ChromaBottomFieldOffset; + public uint ChromaFrameOffset; + public uint HistBufferSize; public ulong Flags; public Array2<int> FieldOrderCnt; public Array16<ReferenceFrame> RefFrames; @@ -64,8 +64,8 @@ namespace Ryujinx.Graphics.Nvdec.Types.H264 public int ChromaQpIndexOffset => ExtractSx(Flags, 22, 5); public int SecondChromaQpIndexOffset => ExtractSx(Flags, 27, 5); public uint WeightedBipredIdc => (uint)(Flags >> 32) & 3; - public uint LumaOutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f; - public uint ChromaOutputSurfaceIndex => (uint)(Flags >> 41) & 0x1f; + public uint OutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f; + public uint ColIndex => (uint)(Flags >> 41) & 0x1f; public ushort FrameNum => (ushort)(Flags >> 46); public bool QpprimeYZeroTransformBypassFlag => (Flags2 & (1 << 1)) != 0; diff --git a/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs b/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs index 163a8783..d205a47a 100644 --- a/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs +++ b/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs @@ -1,12 +1,15 @@ -namespace Ryujinx.Graphics.Nvdec.Types.H264 +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Types.H264 { struct ReferenceFrame { #pragma warning disable CS0649 - public uint Unknown0; - public uint Unknown4; - public uint Unknown8; - public uint UnknownC; + public uint Flags; + public Array2<uint> FieldOrderCnt; + public uint FrameNum; #pragma warning restore CS0649 + + public uint OutputSurfaceIndex => (uint)Flags & 0x7f; } } diff --git a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs index c360d378..26aab506 100644 --- a/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs +++ b/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs @@ -16,7 +16,6 @@ public uint FrameStatusBwdMvyCnt; public uint ErrorCtbPos; public uint ErrorSlicePos; - public uint Unknown34; #pragma warning restore CS0649 } } diff --git a/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs b/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs index f05555c6..b56dc56e 100644 --- a/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs +++ b/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs @@ -15,14 +15,14 @@ namespace Ryujinx.Graphics.Nvdec { private static Decoder _decoder = new Decoder(); - public unsafe static void Decode(NvdecDevice device, ResourceManager rm, ref NvdecRegisters state) + public unsafe static void Decode(ResourceManager rm, ref NvdecRegisters state) { PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetPictureInfoOffset); EntropyProbs entropy = rm.Gmm.DeviceRead<EntropyProbs>(state.SetVp9EntropyProbsOffset); ISurface Rent(uint lumaOffset, uint chromaOffset, FrameSize size) { - return rm.Cache.Get(_decoder, CodecId.Vp9, lumaOffset, chromaOffset, size.Width, size.Height); + return rm.Cache.Get(_decoder, lumaOffset, chromaOffset, size.Width, size.Height); } ISurface lastSurface = Rent(state.SetSurfaceLumaOffset[0], state.SetSurfaceChromaOffset[0], pictureInfo.LastFrameSize); @@ -60,8 +60,6 @@ namespace Ryujinx.Graphics.Nvdec if (_decoder.Decode(ref info, currentSurface, bitstream, mvsIn, mvsOut)) { SurfaceWriter.Write(rm.Gmm, currentSurface, lumaOffset, chromaOffset); - - device.OnFrameDecoded(CodecId.Vp9, lumaOffset, chromaOffset); } WriteBackwardUpdates(rm.Gmm, state.SetVp9BackwardUpdatesOffset, ref info.BackwardUpdateCounts); |
