diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Nvdec | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Nvdec')
26 files changed, 1542 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Nvdec/ApplicationId.cs b/src/Ryujinx.Graphics.Nvdec/ApplicationId.cs new file mode 100644 index 00000000..ada12f8d --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/ApplicationId.cs @@ -0,0 +1,14 @@ +namespace Ryujinx.Graphics.Nvdec +{ + public enum ApplicationId + { + Mpeg = 1, + Vc1 = 2, + H264 = 3, + Mpeg4 = 4, + Vp8 = 5, + Hevc = 7, + Vp9 = 9, + HevcParser = 12, + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/H264Decoder.cs b/src/Ryujinx.Graphics.Nvdec/H264Decoder.cs new file mode 100644 index 00000000..ecc7dbc7 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/H264Decoder.cs @@ -0,0 +1,57 @@ +using Ryujinx.Graphics.Nvdec.FFmpeg.H264; +using Ryujinx.Graphics.Nvdec.Image; +using Ryujinx.Graphics.Nvdec.Types.H264; +using Ryujinx.Graphics.Video; +using System; + +namespace Ryujinx.Graphics.Nvdec +{ + static class H264Decoder + { + private const int MbSizeInPixels = 16; + + public static void Decode(NvdecDecoderContext context, ResourceManager rm, ref NvdecRegisters state) + { + PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetDrvPicSetupOffset); + H264PictureInfo info = pictureInfo.Convert(); + + ReadOnlySpan<byte> bitstream = rm.Gmm.DeviceGetSpan(state.SetInBufBaseOffset, (int)pictureInfo.BitstreamSize); + + int width = (int)pictureInfo.PicWidthInMbs * MbSizeInPixels; + int height = (int)pictureInfo.PicHeightInMbs * MbSizeInPixels; + + int surfaceIndex = (int)pictureInfo.OutputSurfaceIndex; + + uint lumaOffset = state.SetPictureLumaOffset[surfaceIndex]; + uint chromaOffset = state.SetPictureChromaOffset[surfaceIndex]; + + Decoder decoder = context.GetH264Decoder(); + + ISurface outputSurface = rm.Cache.Get(decoder, 0, 0, width, height); + + if (decoder.Decode(ref info, outputSurface, bitstream)) + { + if (outputSurface.Field == FrameField.Progressive) + { + SurfaceWriter.Write( + rm.Gmm, + outputSurface, + lumaOffset + pictureInfo.LumaFrameOffset, + chromaOffset + pictureInfo.ChromaFrameOffset); + } + else + { + SurfaceWriter.WriteInterlaced( + rm.Gmm, + outputSurface, + lumaOffset + pictureInfo.LumaTopFieldOffset, + chromaOffset + pictureInfo.ChromaTopFieldOffset, + lumaOffset + pictureInfo.LumaBottomFieldOffset, + chromaOffset + pictureInfo.ChromaBottomFieldOffset); + } + } + + rm.Cache.Put(outputSurface); + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs new file mode 100644 index 00000000..dc119673 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs @@ -0,0 +1,174 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Video; +using System; +using System.Diagnostics; + +namespace Ryujinx.Graphics.Nvdec.Image +{ + class SurfaceCache + { + // Must be equal to at least the maximum number of surfaces + // that can be in use simultaneously (which is 17, since H264 + // can have up to 16 reference frames, and we need another one + // for the current frame). + // Realistically, most codecs won't ever use more than 4 simultaneously. + private const int MaxItems = 17; + + private struct CacheItem + { + public int ReferenceCount; + public uint LumaOffset; + public uint ChromaOffset; + public int Width; + public int Height; + public IDecoder Owner; + public ISurface Surface; + } + + private readonly CacheItem[] _pool = new CacheItem[MaxItems]; + + private readonly MemoryManager _gmm; + + public SurfaceCache(MemoryManager gmm) + { + _gmm = gmm; + } + + public ISurface Get(IDecoder decoder, uint lumaOffset, uint chromaOffset, int width, int height) + { + lock (_pool) + { + ISurface surface = null; + + // Try to find a compatible surface with same parameters, and same offsets. + for (int i = 0; i < MaxItems; i++) + { + ref CacheItem item = ref _pool[i]; + + if (item.LumaOffset == lumaOffset && + item.ChromaOffset == chromaOffset && + item.Owner == decoder && + item.Width == width && + item.Height == height) + { + item.ReferenceCount++; + surface = item.Surface; + MoveToFront(i); + break; + } + } + + // If we failed to find a perfect match, now ignore the offsets. + // Search backwards to replace the oldest compatible surface, + // this avoids thrashing frequently used surfaces. + // Now we need to ensure that the surface is not in use, as we'll change the data. + if (surface == null) + { + for (int i = MaxItems - 1; i >= 0; i--) + { + ref CacheItem item = ref _pool[i]; + + if (item.ReferenceCount == 0 && item.Owner == decoder && item.Width == width && item.Height == height) + { + item.ReferenceCount = 1; + item.LumaOffset = lumaOffset; + item.ChromaOffset = chromaOffset; + surface = item.Surface; + + if ((lumaOffset | chromaOffset) != 0) + { + SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); + } + + MoveToFront(i); + break; + } + } + } + + // If everything else failed, we try to create a new surface, + // and insert it on the pool. We replace the oldest item on the + // pool to avoid thrashing frequently used surfaces. + // If even the oldest item is in use, that means that the entire pool + // is in use, in that case we throw as there's no place to insert + // the new surface. + if (surface == null) + { + if (_pool[MaxItems - 1].ReferenceCount == 0) + { + surface = decoder.CreateSurface(width, height); + + if ((lumaOffset | chromaOffset) != 0) + { + SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset); + } + + MoveToFront(MaxItems - 1); + ref CacheItem item = ref _pool[0]; + item.Surface?.Dispose(); + item.ReferenceCount = 1; + item.LumaOffset = lumaOffset; + item.ChromaOffset = chromaOffset; + item.Width = width; + item.Height = height; + item.Owner = decoder; + item.Surface = surface; + } + else + { + throw new InvalidOperationException("No free slot on the surface pool."); + } + } + + return surface; + } + } + + public void Put(ISurface surface) + { + lock (_pool) + { + for (int i = 0; i < MaxItems; i++) + { + ref CacheItem item = ref _pool[i]; + + if (item.Surface == surface) + { + item.ReferenceCount--; + Debug.Assert(item.ReferenceCount >= 0); + break; + } + } + } + } + + private void MoveToFront(int index) + { + // If index is 0 we don't need to do anything, + // as it's already on the front. + if (index != 0) + { + CacheItem temp = _pool[index]; + Array.Copy(_pool, 0, _pool, 1, index); + _pool[0] = temp; + } + } + + public void Trim() + { + lock (_pool) + { + for (int i = 0; i < MaxItems; i++) + { + ref CacheItem item = ref _pool[i]; + + if (item.ReferenceCount == 0) + { + item.Surface?.Dispose(); + item = default; + } + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs new file mode 100644 index 00000000..6087f5b1 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs @@ -0,0 +1,26 @@ +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Video; +using System; + +namespace Ryujinx.Graphics.Nvdec.Image +{ + static class SurfaceCommon + { + public static int GetBlockLinearSize(int width, int height, int bytesPerPixel) + { + return SizeCalculator.GetBlockLinearTextureSize(width, height, 1, 1, 1, 1, 1, bytesPerPixel, 2, 1, 1).TotalSize; + } + + public static void Copy(ISurface src, ISurface dst) + { + src.YPlane.AsSpan().CopyTo(dst.YPlane.AsSpan()); + src.UPlane.AsSpan().CopyTo(dst.UPlane.AsSpan()); + src.VPlane.AsSpan().CopyTo(dst.VPlane.AsSpan()); + } + + public unsafe static Span<byte> AsSpan(this Plane plane) + { + return new Span<byte>((void*)plane.Pointer, plane.Length); + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs new file mode 100644 index 00000000..039a2583 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs @@ -0,0 +1,133 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Video; +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon; + +namespace Ryujinx.Graphics.Nvdec.Image +{ + static class SurfaceReader + { + public static void Read(MemoryManager gmm, ISurface surface, uint lumaOffset, uint chromaOffset) + { + int width = surface.Width; + int height = surface.Height; + int stride = surface.Stride; + + ReadOnlySpan<byte> luma = gmm.DeviceGetSpan(lumaOffset, GetBlockLinearSize(width, height, 1)); + + ReadLuma(surface.YPlane.AsSpan(), luma, stride, width, height); + + int uvWidth = surface.UvWidth; + int uvHeight = surface.UvHeight; + int uvStride = surface.UvStride; + + ReadOnlySpan<byte> chroma = gmm.DeviceGetSpan(chromaOffset, GetBlockLinearSize(uvWidth, uvHeight, 2)); + + ReadChroma(surface.UPlane.AsSpan(), surface.VPlane.AsSpan(), chroma, uvStride, uvWidth, uvHeight); + } + + private static void ReadLuma(Span<byte> dst, ReadOnlySpan<byte> src, int dstStride, int width, int height) + { + LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, 1, 2, src); + } + + private unsafe static void ReadChroma( + Span<byte> dstU, + Span<byte> dstV, + ReadOnlySpan<byte> src, + int dstStride, + int width, + int height) + { + OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2); + + if (Sse2.IsSupported) + { + int strideTrunc64 = BitUtils.AlignDown(width * 2, 64); + + int outStrideGap = dstStride - width; + + fixed (byte* dstUPtr = dstU, dstVPtr = dstV, dataPtr = src) + { + byte* uPtr = dstUPtr; + byte* vPtr = dstVPtr; + + for (int y = 0; y < height; y++) + { + calc.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, uPtr += 32, vPtr += 32) + { + byte* offset = dataPtr + calc.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128<byte> value = *(Vector128<byte>*)offset; + Vector128<byte> value2 = *(Vector128<byte>*)offset2; + Vector128<byte> value3 = *(Vector128<byte>*)offset3; + Vector128<byte> value4 = *(Vector128<byte>*)offset4; + + Vector128<byte> u00 = Sse2.UnpackLow(value, value2); + Vector128<byte> v00 = Sse2.UnpackHigh(value, value2); + Vector128<byte> u01 = Sse2.UnpackLow(value3, value4); + Vector128<byte> v01 = Sse2.UnpackHigh(value3, value4); + + Vector128<byte> u10 = Sse2.UnpackLow(u00, v00); + Vector128<byte> v10 = Sse2.UnpackHigh(u00, v00); + Vector128<byte> u11 = Sse2.UnpackLow(u01, v01); + Vector128<byte> v11 = Sse2.UnpackHigh(u01, v01); + + Vector128<byte> u20 = Sse2.UnpackLow(u10, v10); + Vector128<byte> v20 = Sse2.UnpackHigh(u10, v10); + Vector128<byte> u21 = Sse2.UnpackLow(u11, v11); + Vector128<byte> v21 = Sse2.UnpackHigh(u11, v11); + + Vector128<byte> u30 = Sse2.UnpackLow(u20, v20); + Vector128<byte> v30 = Sse2.UnpackHigh(u20, v20); + Vector128<byte> u31 = Sse2.UnpackLow(u21, v21); + Vector128<byte> v31 = Sse2.UnpackHigh(u21, v21); + + *(Vector128<byte>*)uPtr = u30; + *(Vector128<byte>*)(uPtr + 16) = u31; + *(Vector128<byte>*)vPtr = v30; + *(Vector128<byte>*)(vPtr + 16) = v31; + } + + for (int x = strideTrunc64 / 2; x < width; x++, uPtr++, vPtr++) + { + byte* offset = dataPtr + calc.GetOffset(x); + + *uPtr = *offset; + *vPtr = *(offset + 1); + } + + uPtr += outStrideGap; + vPtr += outStrideGap; + } + } + } + else + { + for (int y = 0; y < height; y++) + { + int dstBaseOffset = y * dstStride; + + calc.SetY(y); + + for (int x = 0; x < width; x++) + { + int srcOffset = calc.GetOffset(x); + + dstU[dstBaseOffset + x] = src[srcOffset]; + dstV[dstBaseOffset + x] = src[srcOffset + 1]; + } + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs new file mode 100644 index 00000000..cc5c251b --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs @@ -0,0 +1,175 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Texture; +using Ryujinx.Graphics.Video; +using System; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; +using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon; +using static Ryujinx.Graphics.Nvdec.MemoryExtensions; + +namespace Ryujinx.Graphics.Nvdec.Image +{ + static class SurfaceWriter + { + public static void Write(MemoryManager gmm, ISurface surface, uint lumaOffset, uint chromaOffset) + { + int lumaSize = GetBlockLinearSize(surface.Width, surface.Height, 1); + + using var luma = gmm.GetWritableRegion(ExtendOffset(lumaOffset), lumaSize); + + WriteLuma( + luma.Memory.Span, + surface.YPlane.AsSpan(), + surface.Stride, + surface.Width, + surface.Height); + + int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight, 2); + + using var chroma = gmm.GetWritableRegion(ExtendOffset(chromaOffset), chromaSize); + + WriteChroma( + chroma.Memory.Span, + surface.UPlane.AsSpan(), + surface.VPlane.AsSpan(), + surface.UvStride, + surface.UvWidth, + surface.UvHeight); + } + + public static void WriteInterlaced( + MemoryManager gmm, + ISurface surface, + uint lumaTopOffset, + uint chromaTopOffset, + uint lumaBottomOffset, + uint chromaBottomOffset) + { + int lumaSize = GetBlockLinearSize(surface.Width, surface.Height / 2, 1); + + using var lumaTop = gmm.GetWritableRegion(ExtendOffset(lumaTopOffset), lumaSize); + using var lumaBottom = gmm.GetWritableRegion(ExtendOffset(lumaBottomOffset), lumaSize); + + WriteLuma( + lumaTop.Memory.Span, + surface.YPlane.AsSpan(), + surface.Stride * 2, + surface.Width, + surface.Height / 2); + + WriteLuma( + lumaBottom.Memory.Span, + surface.YPlane.AsSpan().Slice(surface.Stride), + surface.Stride * 2, + surface.Width, + surface.Height / 2); + + int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight / 2, 2); + + using var chromaTop = gmm.GetWritableRegion(ExtendOffset(chromaTopOffset), chromaSize); + using var chromaBottom = gmm.GetWritableRegion(ExtendOffset(chromaBottomOffset), chromaSize); + + WriteChroma( + chromaTop.Memory.Span, + surface.UPlane.AsSpan(), + surface.VPlane.AsSpan(), + surface.UvStride * 2, + surface.UvWidth, + surface.UvHeight / 2); + + WriteChroma( + chromaBottom.Memory.Span, + surface.UPlane.AsSpan().Slice(surface.UvStride), + surface.VPlane.AsSpan().Slice(surface.UvStride), + surface.UvStride * 2, + surface.UvWidth, + surface.UvHeight / 2); + } + + private static void WriteLuma(Span<byte> dst, ReadOnlySpan<byte> src, int srcStride, int width, int height) + { + LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, srcStride, 1, 2, src); + } + + private unsafe static void WriteChroma( + Span<byte> dst, + ReadOnlySpan<byte> srcU, + ReadOnlySpan<byte> srcV, + int srcStride, + int width, + int height) + { + OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2); + + if (Sse2.IsSupported) + { + int strideTrunc64 = BitUtils.AlignDown(width * 2, 64); + + int inStrideGap = srcStride - width; + + fixed (byte* outputPtr = dst, srcUPtr = srcU, srcVPtr = srcV) + { + byte* inUPtr = srcUPtr; + byte* inVPtr = srcVPtr; + + for (int y = 0; y < height; y++) + { + calc.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, inUPtr += 32, inVPtr += 32) + { + byte* offset = outputPtr + calc.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128<byte> value = *(Vector128<byte>*)inUPtr; + Vector128<byte> value2 = *(Vector128<byte>*)inVPtr; + Vector128<byte> value3 = *(Vector128<byte>*)(inUPtr + 16); + Vector128<byte> value4 = *(Vector128<byte>*)(inVPtr + 16); + + Vector128<byte> uv0 = Sse2.UnpackLow(value, value2); + Vector128<byte> uv1 = Sse2.UnpackHigh(value, value2); + Vector128<byte> uv2 = Sse2.UnpackLow(value3, value4); + Vector128<byte> uv3 = Sse2.UnpackHigh(value3, value4); + + *(Vector128<byte>*)offset = uv0; + *(Vector128<byte>*)offset2 = uv1; + *(Vector128<byte>*)offset3 = uv2; + *(Vector128<byte>*)offset4 = uv3; + } + + for (int x = strideTrunc64 / 2; x < width; x++, inUPtr++, inVPtr++) + { + byte* offset = outputPtr + calc.GetOffset(x); + + *offset = *inUPtr; + *(offset + 1) = *inVPtr; + } + + inUPtr += inStrideGap; + inVPtr += inStrideGap; + } + } + } + else + { + for (int y = 0; y < height; y++) + { + int srcBaseOffset = y * srcStride; + + calc.SetY(y); + + for (int x = 0; x < width; x++) + { + int dstOffset = calc.GetOffset(x); + + dst[dstOffset + 0] = srcU[srcBaseOffset + x]; + dst[dstOffset + 1] = srcV[srcBaseOffset + x]; + } + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs b/src/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs new file mode 100644 index 00000000..2855a8c7 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs @@ -0,0 +1,28 @@ +using Ryujinx.Graphics.Gpu.Memory; +using System; + +namespace Ryujinx.Graphics.Nvdec +{ + static class MemoryExtensions + { + public static T DeviceRead<T>(this MemoryManager gmm, uint offset) where T : unmanaged + { + return gmm.Read<T>((ulong)offset << 8); + } + + public static ReadOnlySpan<byte> DeviceGetSpan(this MemoryManager gmm, uint offset, int size) + { + return gmm.GetSpan((ulong)offset << 8, size); + } + + public static void DeviceWrite(this MemoryManager gmm, uint offset, ReadOnlySpan<byte> data) + { + gmm.Write((ulong)offset << 8, data); + } + + public static ulong ExtendOffset(uint offset) + { + return (ulong)offset << 8; + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs b/src/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs new file mode 100644 index 00000000..54934bc5 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs @@ -0,0 +1,29 @@ +using System; + +namespace Ryujinx.Graphics.Nvdec +{ + class NvdecDecoderContext : IDisposable + { + private FFmpeg.H264.Decoder _h264Decoder; + private FFmpeg.Vp8.Decoder _vp8Decoder; + + public FFmpeg.H264.Decoder GetH264Decoder() + { + return _h264Decoder ??= new FFmpeg.H264.Decoder(); + } + + public FFmpeg.Vp8.Decoder GetVp8Decoder() + { + return _vp8Decoder ??= new FFmpeg.Vp8.Decoder(); + } + + public void Dispose() + { + _h264Decoder?.Dispose(); + _h264Decoder = null; + + _vp8Decoder?.Dispose(); + _vp8Decoder = null; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Nvdec/NvdecDevice.cs b/src/Ryujinx.Graphics.Nvdec/NvdecDevice.cs new file mode 100644 index 00000000..ef8185f4 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/NvdecDevice.cs @@ -0,0 +1,83 @@ +using Ryujinx.Common.Logging; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Nvdec.Image; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.Threading; + +namespace Ryujinx.Graphics.Nvdec +{ + public class NvdecDevice : IDeviceStateWithContext + { + private readonly ResourceManager _rm; + private readonly DeviceState<NvdecRegisters> _state; + + private long _currentId; + private ConcurrentDictionary<long, NvdecDecoderContext> _contexts; + private NvdecDecoderContext _currentContext; + + public NvdecDevice(MemoryManager gmm) + { + _rm = new ResourceManager(gmm, new SurfaceCache(gmm)); + _state = new DeviceState<NvdecRegisters>(new Dictionary<string, RwCallback> + { + { nameof(NvdecRegisters.Execute), new RwCallback(Execute, null) } + }); + _contexts = new ConcurrentDictionary<long, NvdecDecoderContext>(); + } + + public long CreateContext() + { + long id = Interlocked.Increment(ref _currentId); + _contexts.TryAdd(id, new NvdecDecoderContext()); + + return id; + } + + public void DestroyContext(long id) + { + if (_contexts.TryRemove(id, out var context)) + { + context.Dispose(); + } + + _rm.Cache.Trim(); + } + + public void BindContext(long id) + { + if (_contexts.TryGetValue(id, out var context)) + { + _currentContext = context; + } + } + + public int Read(int offset) => _state.Read(offset); + public void Write(int offset, int data) => _state.Write(offset, data); + + private void Execute(int data) + { + Decode((ApplicationId)_state.State.SetApplicationId); + } + + private void Decode(ApplicationId applicationId) + { + switch (applicationId) + { + case ApplicationId.H264: + H264Decoder.Decode(_currentContext, _rm, ref _state.State); + break; + case ApplicationId.Vp8: + Vp8Decoder.Decode(_currentContext, _rm, ref _state.State); + break; + case ApplicationId.Vp9: + Vp9Decoder.Decode(_rm, ref _state.State); + break; + default: + Logger.Error?.Print(LogClass.Nvdec, $"Unsupported codec \"{applicationId}\"."); + break; + } + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs b/src/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs new file mode 100644 index 00000000..cf867783 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs @@ -0,0 +1,63 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec +{ + struct NvdecRegisters + { +#pragma warning disable CS0649 + public Array64<uint> Reserved0; + public uint Nop; + public Array63<uint> Reserved104; + public uint SetApplicationId; + public uint SetWatchdogTimer; + public Array14<uint> Reserved208; + public uint SemaphoreA; + public uint SemaphoreB; + public uint SemaphoreC; + public uint CtxSaveArea; + public Array44<uint> Reserved254; + public uint Execute; + public uint SemaphoreD; + public Array62<uint> Reserved308; + public uint SetControlParams; + public uint SetDrvPicSetupOffset; + public uint SetInBufBaseOffset; + public uint SetPictureIndex; + public uint SetSliceOffsetsBufOffset; // Also used by VC1 + public uint SetColocDataOffset; // Also used by VC1 + public uint SetHistoryOffset; // Used by VC1 + public uint SetDisplayBufSize; + public uint SetHistogramOffset; // Used by VC1 + public uint SetNvDecStatusOffset; + public uint SetDisplayBufLumaOffset; + public uint SetDisplayBufChromaOffset; + public Array17<uint> SetPictureLumaOffset; + public Array17<uint> SetPictureChromaOffset; + public uint SetPicScratchBufOffset; + public uint SetExternalMvBufferOffset; + public uint SetCryptoData0Offset; + public uint SetCryptoData1Offset; + public Array14<uint> Unknown4C8; + public uint H264SetMbHistBufOffset; + public Array15<uint> Unknown504; + public uint Vp8SetProbDataOffset; + public uint Vp8SetHeaderPartitionBufBaseOffset; + public Array14<uint> Unknown548; + public uint HevcSetScalingListOffset; + public uint HevcSetTileSizesOffset; + public uint HevcSetFilterBufferOffset; + public uint HevcSetSaoBufferOffset; + public uint HevcSetSliceInfoBufferOffset; + public uint HevcSetSliceGroupIndex; + public Array10<uint> Unknown598; + public uint Vp9SetProbTabBufOffset; + public uint Vp9SetCtxCounterBufOffset; + public uint Vp9SetSegmentReadBufOffset; + public uint Vp9SetSegmentWriteBufOffset; + public uint Vp9SetTileSizeBufOffset; + public uint Vp9SetColMvWriteBufOffset; + public uint Vp9SetColMvReadBufOffset; + public uint Vp9SetFilterBufferOffset; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/NvdecStatus.cs b/src/Ryujinx.Graphics.Nvdec/NvdecStatus.cs new file mode 100644 index 00000000..0712af88 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/NvdecStatus.cs @@ -0,0 +1,16 @@ +using Ryujinx.Graphics.Nvdec.Types.Vp9; + +namespace Ryujinx.Graphics.Nvdec +{ + struct NvdecStatus + { +#pragma warning disable CS0649 + public uint MbsCorrectlyDecoded; + public uint MbsInError; + public uint Reserved; + public uint ErrorStatus; + public FrameStats Stats; + public uint SliceHeaderErrorCode; +#pragma warning restore CS0649 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Nvdec/ResourceManager.cs b/src/Ryujinx.Graphics.Nvdec/ResourceManager.cs new file mode 100644 index 00000000..08d24258 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/ResourceManager.cs @@ -0,0 +1,17 @@ +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Nvdec.Image; + +namespace Ryujinx.Graphics.Nvdec +{ + readonly struct ResourceManager + { + public MemoryManager Gmm { get; } + public SurfaceCache Cache { get; } + + public ResourceManager(MemoryManager gmm, SurfaceCache cache) + { + Gmm = gmm; + Cache = cache; + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj b/src/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj new file mode 100644 index 00000000..bfba98a7 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj @@ -0,0 +1,18 @@ +<Project Sdk="Microsoft.NET.Sdk"> + + <PropertyGroup> + <TargetFramework>net7.0</TargetFramework> + <AllowUnsafeBlocks>true</AllowUnsafeBlocks> + </PropertyGroup> + + <ItemGroup> + <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" /> + <ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" /> + <ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" /> + <ProjectReference Include="..\Ryujinx.Graphics.Nvdec.FFmpeg\Ryujinx.Graphics.Nvdec.FFmpeg.csproj" /> + <ProjectReference Include="..\Ryujinx.Graphics.Nvdec.Vp9\Ryujinx.Graphics.Nvdec.Vp9.csproj" /> + <ProjectReference Include="..\Ryujinx.Graphics.Texture\Ryujinx.Graphics.Texture.csproj" /> + <ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" /> + </ItemGroup> + +</Project> diff --git a/src/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs b/src/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs new file mode 100644 index 00000000..7c779dff --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs @@ -0,0 +1,122 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.H264 +{ + struct PictureInfo + { +#pragma warning disable CS0169, CS0649 + Array18<uint> Unknown0; + public uint BitstreamSize; + public uint NumSlices; + public uint Unknown50; + public uint Unknown54; + public uint Log2MaxPicOrderCntLsbMinus4; + public uint DeltaPicOrderAlwaysZeroFlag; + public uint FrameMbsOnlyFlag; + public uint PicWidthInMbs; + public uint PicHeightInMbs; + public uint BlockLayout; // Not supported on T210 + public uint EntropyCodingModeFlag; + public uint PicOrderPresentFlag; + public uint NumRefIdxL0ActiveMinus1; + public uint NumRefIdxL1ActiveMinus1; + public uint DeblockingFilterControlPresentFlag; + public uint RedundantPicCntPresentFlag; + public uint Transform8x8ModeFlag; + public uint LumaPitch; + public uint ChromaPitch; + public uint LumaTopFieldOffset; + public uint LumaBottomFieldOffset; + public uint LumaFrameOffset; + public uint ChromaTopFieldOffset; + public uint ChromaBottomFieldOffset; + public uint ChromaFrameOffset; + public uint HistBufferSize; + public ulong Flags; + public Array2<int> FieldOrderCnt; + public Array16<ReferenceFrame> RefFrames; + public Array6<Array16<byte>> ScalingLists4x4; + public Array2<Array64<byte>> ScalingLists8x8; + public byte MvcextNumInterViewRefsL0; + public byte MvcextNumInterViewRefsL1; + public ushort Padding2A2; + public uint Unknown2A4; + public uint Unknown2A8; + public uint Unknown2AC; + public Array16<byte> MvcextViewRefMasksL0; + public Array16<byte> MvcextViewRefMasksL1; + public uint Flags2; + public Array10<uint> Unknown2D4; +#pragma warning restore CS0169, CS0649 + + public bool MbAdaptiveFrameFieldFlag => (Flags & (1 << 0)) != 0; + public bool Direct8x8InferenceFlag => (Flags & (1 << 1)) != 0; + public bool WeightedPredFlag => (Flags & (1 << 2)) != 0; + public bool ConstrainedIntraPredFlag => (Flags & (1 << 3)) != 0; + public bool IsReference => (Flags & (1 << 4)) != 0; + public bool FieldPicFlag => (Flags & (1 << 5)) != 0; + public bool BottomFieldFlag => (Flags & (1 << 6)) != 0; + public uint Log2MaxFrameNumMinus4 => (uint)(Flags >> 8) & 0xf; + public ushort ChromaFormatIdc => (ushort)((Flags >> 12) & 3); + public uint PicOrderCntType => (uint)(Flags >> 14) & 3; + public int PicInitQpMinus26 => ExtractSx(Flags, 16, 6); + public int ChromaQpIndexOffset => ExtractSx(Flags, 22, 5); + public int SecondChromaQpIndexOffset => ExtractSx(Flags, 27, 5); + public uint WeightedBipredIdc => (uint)(Flags >> 32) & 3; + public uint OutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f; + public uint ColIndex => (uint)(Flags >> 41) & 0x1f; + public ushort FrameNum => (ushort)(Flags >> 46); + public bool QpprimeYZeroTransformBypassFlag => (Flags2 & (1 << 1)) != 0; + + private static int ExtractSx(ulong packed, int lsb, int length) + { + return (int)((long)packed << (64 - (lsb + length)) >> (64 - length)); + } + + public H264PictureInfo Convert() + { + return new H264PictureInfo() + { + FieldOrderCnt = FieldOrderCnt, + IsReference = IsReference, + ChromaFormatIdc = ChromaFormatIdc, + FrameNum = FrameNum, + FieldPicFlag = FieldPicFlag, + BottomFieldFlag = BottomFieldFlag, + NumRefFrames = 0, + MbAdaptiveFrameFieldFlag = MbAdaptiveFrameFieldFlag, + ConstrainedIntraPredFlag = ConstrainedIntraPredFlag, + WeightedPredFlag = WeightedPredFlag, + WeightedBipredIdc = WeightedBipredIdc, + FrameMbsOnlyFlag = FrameMbsOnlyFlag != 0, + Transform8x8ModeFlag = Transform8x8ModeFlag != 0, + ChromaQpIndexOffset = ChromaQpIndexOffset, + SecondChromaQpIndexOffset = SecondChromaQpIndexOffset, + PicInitQpMinus26 = PicInitQpMinus26, + NumRefIdxL0ActiveMinus1 = NumRefIdxL0ActiveMinus1, + NumRefIdxL1ActiveMinus1 = NumRefIdxL1ActiveMinus1, + Log2MaxFrameNumMinus4 = Log2MaxFrameNumMinus4, + PicOrderCntType = PicOrderCntType, + Log2MaxPicOrderCntLsbMinus4 = Log2MaxPicOrderCntLsbMinus4, + DeltaPicOrderAlwaysZeroFlag = DeltaPicOrderAlwaysZeroFlag != 0, + Direct8x8InferenceFlag = Direct8x8InferenceFlag, + EntropyCodingModeFlag = EntropyCodingModeFlag != 0, + PicOrderPresentFlag = PicOrderPresentFlag != 0, + DeblockingFilterControlPresentFlag = DeblockingFilterControlPresentFlag != 0, + RedundantPicCntPresentFlag = RedundantPicCntPresentFlag != 0, + NumSliceGroupsMinus1 = 0, + SliceGroupMapType = 0, + SliceGroupChangeRateMinus1 = 0, + FmoAsoEnable = false, + ScalingMatrixPresent = true, + ScalingLists4x4 = ScalingLists4x4, + ScalingLists8x8 = ScalingLists8x8, + FrameType = 0, + PicWidthInMbsMinus1 = PicWidthInMbs - 1, + PicHeightInMapUnitsMinus1 = (PicHeightInMbs >> (FrameMbsOnlyFlag != 0 ? 0 : 1)) - 1, + QpprimeYZeroTransformBypassFlag = QpprimeYZeroTransformBypassFlag + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs b/src/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs new file mode 100644 index 00000000..d205a47a --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs @@ -0,0 +1,15 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Types.H264 +{ + struct ReferenceFrame + { +#pragma warning disable CS0649 + public uint Flags; + public Array2<uint> FieldOrderCnt; + public uint FrameNum; +#pragma warning restore CS0649 + + public uint OutputSurfaceIndex => (uint)Flags & 0x7f; + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp8/PictureInfo.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp8/PictureInfo.cs new file mode 100644 index 00000000..844f2103 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp8/PictureInfo.cs @@ -0,0 +1,75 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp8 +{ + struct PictureInfo + { +#pragma warning disable CS0649 + public Array13<uint> Unknown0; + public uint GpTimerTimeoutValue; + public ushort FrameWidth; + public ushort FrameHeight; + public byte KeyFrame; // 1: key frame - 0: not + public byte Version; + public byte Flags0; + // TileFormat : 2 // 0: TBL; 1: KBL; + // GobHeight : 3 // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards) + // ReserverdSurfaceFormat : 3 + public byte ErrorConcealOn; // 1: error conceal on - 0: off + public uint FirstPartSize; // the size of first partition (frame header and mb header partition) + public uint HistBufferSize; // in units of 256 + public uint VLDBufferSize; // in units of 1 + public Array2<uint> FrameStride; // [y_c] + public uint LumaTopOffset; // offset of luma top field in units of 256 + public uint LumaBotOffset; // offset of luma bottom field in units of 256 + public uint LumaFrameOffset; // offset of luma frame in units of 256 + public uint ChromaTopOffset; // offset of chroma top field in units of 256 + public uint ChromaBotOffset; // offset of chroma bottom field in units of 256 + public uint ChromaFrameOffset; // offset of chroma frame in units of 256 + public uint Flags1; + // EnableTFOutput : 1; // =1, enable dbfdma to output the display surface; if disable, then the following configure on tf is useless. + // Remap for VC1 + // VC1MapYFlag : 1 + // MapYValue : 3 + // VC1MapUVFlag : 1 + // MapUVValue : 3 + // TF + // OutStride : 8 + // TilingFormat : 3; + // OutputStructure : 1 // 0:frame, 1:field + // Reserved0 : 11 + public Array2<int> OutputTop; // in units of 256 + public Array2<int> OutputBottom; // in units of 256 + // Histogram + public uint Flags2; + // EnableHistogram : 1 // enable histogram info collection + // HistogramStartX : 12 // start X of Histogram window + // HistogramStartY : 12 // start Y of Histogram window + // Reserved1 : 7 + // HistogramEndX : 12 // end X of Histogram window + // HistogramEndY : 12 // end y of Histogram window + // Reserved2 : 8 + // Decode picture buffer related + public sbyte CurrentOutputMemoryLayout; + public Array3<sbyte> OutputMemoryLayout; // output NV12/NV24 setting. item 0:golden - 1: altref - 2: last + public byte SegmentationFeatureDataUpdate; + public Array3<byte> Reserved3; + public uint ResultValue; // ucode return result + public Array8<uint> PartitionOffset; + public Array3<uint> Reserved4; +#pragma warning restore CS0649 + + public Vp8PictureInfo Convert() + { + return new Vp8PictureInfo() + { + KeyFrame = KeyFrame != 0, + FirstPartSize = FirstPartSize, + Version = Version, + FrameWidth = FrameWidth, + FrameHeight = FrameHeight + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs new file mode 100644 index 00000000..661e6cdd --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs @@ -0,0 +1,72 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct BackwardUpdates + { + public Array7<Array3<Array2<uint>>> InterModeCounts; + public Array4<Array10<uint>> YModeCounts; + public Array10<Array10<uint>> UvModeCounts; + public Array16<Array4<uint>> PartitionCounts; + public Array4<Array3<uint>> SwitchableInterpsCount; + public Array4<Array2<uint>> IntraInterCount; + public Array5<Array2<uint>> CompInterCount; + public Array5<Array2<Array2<uint>>> SingleRefCount; + public Array5<Array2<uint>> CompRefCount; + public Array2<Array4<uint>> Tx32x32; + public Array2<Array3<uint>> Tx16x16; + public Array2<Array2<uint>> Tx8x8; + public Array3<Array2<uint>> MbSkipCount; + public Array4<uint> Joints; + public Array2<Array2<uint>> Sign; + public Array2<Array11<uint>> Classes; + public Array2<Array2<uint>> Class0; + public Array2<Array10<Array2<uint>>> Bits; + public Array2<Array2<Array4<uint>>> Class0Fp; + public Array2<Array4<uint>> Fp; + public Array2<Array2<uint>> Class0Hp; + public Array2<Array2<uint>> Hp; + public Array4<Array2<Array2<Array6<Array6<Array4<uint>>>>>> CoefCounts; + public Array4<Array2<Array2<Array6<Array6<uint>>>>> EobCounts; + + public BackwardUpdates(ref Vp9BackwardUpdates counts) + { + InterModeCounts = new Array7<Array3<Array2<uint>>>(); + + for (int i = 0; i < 7; i++) + { + InterModeCounts[i][0][0] = counts.InterMode[i][2]; + InterModeCounts[i][0][1] = counts.InterMode[i][0] + counts.InterMode[i][1] + counts.InterMode[i][3]; + InterModeCounts[i][1][0] = counts.InterMode[i][0]; + InterModeCounts[i][1][1] = counts.InterMode[i][1] + counts.InterMode[i][3]; + InterModeCounts[i][2][0] = counts.InterMode[i][1]; + InterModeCounts[i][2][1] = counts.InterMode[i][3]; + } + + YModeCounts = counts.YMode; + UvModeCounts = counts.UvMode; + PartitionCounts = counts.Partition; + SwitchableInterpsCount = counts.SwitchableInterp; + IntraInterCount = counts.IntraInter; + CompInterCount = counts.CompInter; + SingleRefCount = counts.SingleRef; + CompRefCount = counts.CompRef; + Tx32x32 = counts.Tx32x32; + Tx16x16 = counts.Tx16x16; + Tx8x8 = counts.Tx8x8; + MbSkipCount = counts.Skip; + Joints = counts.Joints; + Sign = counts.Sign; + Classes = counts.Classes; + Class0 = counts.Class0; + Bits = counts.Bits; + Class0Fp = counts.Class0Fp; + Fp = counts.Fp; + Class0Hp = counts.Class0Hp; + Hp = counts.Hp; + CoefCounts = counts.Coef; + EobCounts = counts.EobBranch; + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs new file mode 100644 index 00000000..b2858d2d --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs @@ -0,0 +1,141 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct EntropyProbs + { +#pragma warning disable CS0649 + public Array10<Array10<Array8<byte>>> KfYModeProbE0ToE7; + public Array10<Array10<byte>> KfYModeProbE8; + public Array3<byte> Padding384; + public Array7<byte> SegTreeProbs; + public Array3<byte> SegPredProbs; + public Array15<byte> Padding391; + public Array10<Array8<byte>> KfUvModeProbE0ToE7; + public Array10<byte> KfUvModeProbE8; + public Array6<byte> Padding3FA; + public Array7<Array4<byte>> InterModeProb; + public Array4<byte> IntraInterProb; + public Array10<Array8<byte>> UvModeProbE0ToE7; + public Array2<Array1<byte>> Tx8x8Prob; + public Array2<Array2<byte>> Tx16x16Prob; + public Array2<Array3<byte>> Tx32x32Prob; + public Array4<byte> YModeProbE8; + public Array4<Array8<byte>> YModeProbE0ToE7; + public Array16<Array4<byte>> KfPartitionProb; + public Array16<Array4<byte>> PartitionProb; + public Array10<byte> UvModeProbE8; + public Array4<Array2<byte>> SwitchableInterpProb; + public Array5<byte> CompInterProb; + public Array4<byte> SkipProbs; + public Array3<byte> Joints; + public Array2<byte> Sign; + public Array2<Array1<byte>> Class0; + public Array2<Array3<byte>> Fp; + public Array2<byte> Class0Hp; + public Array2<byte> Hp; + public Array2<Array10<byte>> Classes; + public Array2<Array2<Array3<byte>>> Class0Fp; + public Array2<Array10<byte>> Bits; + public Array5<Array2<byte>> SingleRefProb; + public Array5<byte> CompRefProb; + public Array17<byte> Padding58F; + public Array4<Array2<Array2<Array6<Array6<Array4<byte>>>>>> CoefProbs; +#pragma warning restore CS0649 + + public void Convert(ref Vp9EntropyProbs fc) + { + for (int i = 0; i < 10; i++) + { + for (int j = 0; j < 10; j++) + { + for (int k = 0; k < 9; k++) + { + fc.KfYModeProb[i][j][k] = k < 8 ? KfYModeProbE0ToE7[i][j][k] : KfYModeProbE8[i][j]; + } + } + } + + fc.SegTreeProb = SegTreeProbs; + fc.SegPredProb = SegPredProbs; + + for (int i = 0; i < 7; i++) + { + for (int j = 0; j < 3; j++) + { + fc.InterModeProb[i][j] = InterModeProb[i][j]; + } + } + + fc.IntraInterProb = IntraInterProb; + + for (int i = 0; i < 10; i++) + { + for (int j = 0; j < 9; j++) + { + fc.KfUvModeProb[i][j] = j < 8 ? KfUvModeProbE0ToE7[i][j] : KfUvModeProbE8[i]; + fc.UvModeProb[i][j] = j < 8 ? UvModeProbE0ToE7[i][j] : UvModeProbE8[i]; + } + } + + fc.Tx8x8Prob = Tx8x8Prob; + fc.Tx16x16Prob = Tx16x16Prob; + fc.Tx32x32Prob = Tx32x32Prob; + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 9; j++) + { + fc.YModeProb[i][j] = j < 8 ? YModeProbE0ToE7[i][j] : YModeProbE8[i]; + } + } + + for (int i = 0; i < 16; i++) + { + for (int j = 0; j < 3; j++) + { + fc.KfPartitionProb[i][j] = KfPartitionProb[i][j]; + fc.PartitionProb[i][j] = PartitionProb[i][j]; + } + } + + fc.SwitchableInterpProb = SwitchableInterpProb; + fc.CompInterProb = CompInterProb; + fc.SkipProb[0] = SkipProbs[0]; + fc.SkipProb[1] = SkipProbs[1]; + fc.SkipProb[2] = SkipProbs[2]; + fc.Joints = Joints; + fc.Sign = Sign; + fc.Class0 = Class0; + fc.Fp = Fp; + fc.Class0Hp = Class0Hp; + fc.Hp = Hp; + fc.Classes = Classes; + fc.Class0Fp = Class0Fp; + fc.Bits = Bits; + fc.SingleRefProb = SingleRefProb; + fc.CompRefProb = CompRefProb; + + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 2; j++) + { + for (int k = 0; k < 2; k++) + { + for (int l = 0; l < 6; l++) + { + for (int m = 0; m < 6; m++) + { + for (int n = 0; n < 3; n++) + { + fc.CoefProbs[i][j][k][l][m][n] = CoefProbs[i][j][k][l][m][n]; + } + } + } + } + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs new file mode 100644 index 00000000..88f1ac20 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + enum FrameFlags : uint + { + IsKeyFrame = 1 << 0, + LastFrameIsKeyFrame = 1 << 1, + FrameSizeChanged = 1 << 2, + ErrorResilientMode = 1 << 3, + LastShowFrame = 1 << 4, + IntraOnly = 1 << 5 + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs new file mode 100644 index 00000000..d449ec4d --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs @@ -0,0 +1,12 @@ +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct FrameSize + { +#pragma warning disable CS0649 + public ushort Width; + public ushort Height; + public ushort LumaPitch; + public ushort ChromaPitch; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs new file mode 100644 index 00000000..26aab506 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs @@ -0,0 +1,21 @@ +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct FrameStats + { +#pragma warning disable CS0649 + public uint Unknown0; + public uint Unknown4; + public uint Pass2CycleCount; + public uint ErrorStatus; + public uint FrameStatusIntraCnt; + public uint FrameStatusInterCnt; + public uint FrameStatusSkipCtuCount; + public uint FrameStatusFwdMvxCnt; + public uint FrameStatusFwdMvyCnt; + public uint FrameStatusBwdMvxCnt; + public uint FrameStatusBwdMvyCnt; + public uint ErrorCtbPos; + public uint ErrorSlicePos; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs new file mode 100644 index 00000000..7cb0fd7a --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs @@ -0,0 +1,13 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct LoopFilter + { +#pragma warning disable CS0649 + public byte ModeRefDeltaEnabled; + public Array4<sbyte> RefDeltas; + public Array2<sbyte> ModeDeltas; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs new file mode 100644 index 00000000..7d06f747 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs @@ -0,0 +1,87 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Video; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct PictureInfo + { +#pragma warning disable CS0649 + public Array12<uint> Unknown0; + public uint BitstreamSize; + public uint IsEncrypted; + public uint Unknown38; + public uint Reserved3C; + public uint BlockLayout; // Not supported on T210 + public uint WorkBufferSizeShr8; + public FrameSize LastFrameSize; + public FrameSize GoldenFrameSize; + public FrameSize AltFrameSize; + public FrameSize CurrentFrameSize; + public FrameFlags Flags; + public Array4<sbyte> RefFrameSignBias; + public byte FirstLevel; + public byte SharpnessLevel; + public byte BaseQIndex; + public byte YDcDeltaQ; + public byte UvAcDeltaQ; + public byte UvDcDeltaQ; + public byte Lossless; + public byte TxMode; + public byte AllowHighPrecisionMv; + public byte InterpFilter; + public byte ReferenceMode; + public sbyte CompFixedRef; + public Array2<sbyte> CompVarRef; + public byte Log2TileCols; + public byte Log2TileRows; + public Segmentation Seg; + public LoopFilter Lf; + public byte PaddingEB; + public uint WorkBufferSizeShr8New; // Not supported on T210 + public uint SurfaceParams; // Not supported on T210 + public uint UnknownF4; + public uint UnknownF8; + public uint UnknownFC; +#pragma warning restore CS0649 + + public uint BitDepth => (SurfaceParams >> 1) & 0xf; + + public Vp9PictureInfo Convert() + { + return new Vp9PictureInfo() + { + IsKeyFrame = Flags.HasFlag(FrameFlags.IsKeyFrame), + IntraOnly = Flags.HasFlag(FrameFlags.IntraOnly), + UsePrevInFindMvRefs = + !Flags.HasFlag(FrameFlags.ErrorResilientMode) && + !Flags.HasFlag(FrameFlags.FrameSizeChanged) && + !Flags.HasFlag(FrameFlags.IntraOnly) && + Flags.HasFlag(FrameFlags.LastShowFrame) && + !Flags.HasFlag(FrameFlags.LastFrameIsKeyFrame), + RefFrameSignBias = RefFrameSignBias, + BaseQIndex = BaseQIndex, + YDcDeltaQ = YDcDeltaQ, + UvDcDeltaQ = UvDcDeltaQ, + UvAcDeltaQ = UvAcDeltaQ, + Lossless = Lossless != 0, + TransformMode = TxMode, + AllowHighPrecisionMv = AllowHighPrecisionMv != 0, + InterpFilter = InterpFilter, + ReferenceMode = ReferenceMode, + CompFixedRef = CompFixedRef, + CompVarRef = CompVarRef, + Log2TileCols = Log2TileCols, + Log2TileRows = Log2TileRows, + SegmentEnabled = Seg.Enabled != 0, + SegmentMapUpdate = Seg.UpdateMap != 0, + SegmentMapTemporalUpdate = Seg.TemporalUpdate != 0, + SegmentAbsDelta = Seg.AbsDelta, + SegmentFeatureEnable = Seg.FeatureMask, + SegmentFeatureData = Seg.FeatureData, + ModeRefDeltaEnabled = Lf.ModeRefDeltaEnabled != 0, + RefDeltas = Lf.RefDeltas, + ModeDeltas = Lf.ModeDeltas + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs new file mode 100644 index 00000000..f6c4f0b1 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs @@ -0,0 +1,16 @@ +using Ryujinx.Common.Memory; + +namespace Ryujinx.Graphics.Nvdec.Types.Vp9 +{ + struct Segmentation + { +#pragma warning disable CS0649 + public byte Enabled; + public byte UpdateMap; + public byte TemporalUpdate; + public byte AbsDelta; + public Array8<uint> FeatureMask; + public Array8<Array4<short>> FeatureData; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Nvdec/Vp8Decoder.cs b/src/Ryujinx.Graphics.Nvdec/Vp8Decoder.cs new file mode 100644 index 00000000..cce9a574 --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Vp8Decoder.cs @@ -0,0 +1,33 @@ +using Ryujinx.Graphics.Nvdec.FFmpeg.Vp8; +using Ryujinx.Graphics.Nvdec.Image; +using Ryujinx.Graphics.Nvdec.Types.Vp8; +using Ryujinx.Graphics.Video; +using System; + +namespace Ryujinx.Graphics.Nvdec +{ + static class Vp8Decoder + { + public static void Decode(NvdecDecoderContext context, ResourceManager rm, ref NvdecRegisters state) + { + PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetDrvPicSetupOffset); + ReadOnlySpan<byte> bitstream = rm.Gmm.DeviceGetSpan(state.SetInBufBaseOffset, (int)pictureInfo.VLDBufferSize); + + Decoder decoder = context.GetVp8Decoder(); + + ISurface outputSurface = rm.Cache.Get(decoder, 0, 0, pictureInfo.FrameWidth, pictureInfo.FrameHeight); + + Vp8PictureInfo info = pictureInfo.Convert(); + + uint lumaOffset = state.SetPictureLumaOffset[3]; + uint chromaOffset = state.SetPictureChromaOffset[3]; + + if (decoder.Decode(ref info, outputSurface, bitstream)) + { + SurfaceWriter.Write(rm.Gmm, outputSurface, lumaOffset, chromaOffset); + } + + rm.Cache.Put(outputSurface); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs b/src/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs new file mode 100644 index 00000000..9bb3529e --- /dev/null +++ b/src/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs @@ -0,0 +1,90 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Nvdec.Image; +using Ryujinx.Graphics.Nvdec.Types.Vp9; +using Ryujinx.Graphics.Nvdec.Vp9; +using Ryujinx.Graphics.Video; +using System; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using static Ryujinx.Graphics.Nvdec.MemoryExtensions; + +namespace Ryujinx.Graphics.Nvdec +{ + static class Vp9Decoder + { + private static Decoder _decoder = new Decoder(); + + public unsafe static void Decode(ResourceManager rm, ref NvdecRegisters state) + { + PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetDrvPicSetupOffset); + EntropyProbs entropy = rm.Gmm.DeviceRead<EntropyProbs>(state.Vp9SetProbTabBufOffset); + + ISurface Rent(uint lumaOffset, uint chromaOffset, FrameSize size) + { + return rm.Cache.Get(_decoder, lumaOffset, chromaOffset, size.Width, size.Height); + } + + ISurface lastSurface = Rent(state.SetPictureLumaOffset[0], state.SetPictureChromaOffset[0], pictureInfo.LastFrameSize); + ISurface goldenSurface = Rent(state.SetPictureLumaOffset[1], state.SetPictureChromaOffset[1], pictureInfo.GoldenFrameSize); + ISurface altSurface = Rent(state.SetPictureLumaOffset[2], state.SetPictureChromaOffset[2], pictureInfo.AltFrameSize); + ISurface currentSurface = Rent(state.SetPictureLumaOffset[3], state.SetPictureChromaOffset[3], pictureInfo.CurrentFrameSize); + + Vp9PictureInfo info = pictureInfo.Convert(); + + info.LastReference = lastSurface; + info.GoldenReference = goldenSurface; + info.AltReference = altSurface; + + entropy.Convert(ref info.Entropy); + + ReadOnlySpan<byte> bitstream = rm.Gmm.DeviceGetSpan(state.SetInBufBaseOffset, (int)pictureInfo.BitstreamSize); + + ReadOnlySpan<Vp9MvRef> mvsIn = ReadOnlySpan<Vp9MvRef>.Empty; + + if (info.UsePrevInFindMvRefs) + { + mvsIn = GetMvsInput(rm.Gmm, pictureInfo.CurrentFrameSize, state.Vp9SetColMvReadBufOffset); + } + + int miCols = BitUtils.DivRoundUp(pictureInfo.CurrentFrameSize.Width, 8); + int miRows = BitUtils.DivRoundUp(pictureInfo.CurrentFrameSize.Height, 8); + + using var mvsRegion = rm.Gmm.GetWritableRegion(ExtendOffset(state.Vp9SetColMvWriteBufOffset), miRows * miCols * 16); + + Span<Vp9MvRef> mvsOut = MemoryMarshal.Cast<byte, Vp9MvRef>(mvsRegion.Memory.Span); + + uint lumaOffset = state.SetPictureLumaOffset[3]; + uint chromaOffset = state.SetPictureChromaOffset[3]; + + if (_decoder.Decode(ref info, currentSurface, bitstream, mvsIn, mvsOut)) + { + SurfaceWriter.Write(rm.Gmm, currentSurface, lumaOffset, chromaOffset); + } + + WriteBackwardUpdates(rm.Gmm, state.Vp9SetCtxCounterBufOffset, ref info.BackwardUpdateCounts); + + rm.Cache.Put(lastSurface); + rm.Cache.Put(goldenSurface); + rm.Cache.Put(altSurface); + rm.Cache.Put(currentSurface); + } + + private static ReadOnlySpan<Vp9MvRef> GetMvsInput(MemoryManager gmm, FrameSize size, uint offset) + { + int miCols = BitUtils.DivRoundUp(size.Width, 8); + int miRows = BitUtils.DivRoundUp(size.Height, 8); + + return MemoryMarshal.Cast<byte, Vp9MvRef>(gmm.DeviceGetSpan(offset, miRows * miCols * 16)); + } + + private static void WriteBackwardUpdates(MemoryManager gmm, uint offset, ref Vp9BackwardUpdates counts) + { + using var backwardUpdatesRegion = gmm.GetWritableRegion(ExtendOffset(offset), Unsafe.SizeOf<BackwardUpdates>()); + + ref var backwardUpdates = ref MemoryMarshal.Cast<byte, BackwardUpdates>(backwardUpdatesRegion.Memory.Span)[0]; + + backwardUpdates = new BackwardUpdates(ref counts); + } + } +} |
