aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Nvdec
diff options
context:
space:
mode:
authorTSR Berry <20988865+TSRBerry@users.noreply.github.com>2023-04-08 01:22:00 +0200
committerMary <thog@protonmail.com>2023-04-27 23:51:14 +0200
commitcee712105850ac3385cd0091a923438167433f9f (patch)
tree4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Nvdec
parentcd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff)
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Nvdec')
-rw-r--r--src/Ryujinx.Graphics.Nvdec/ApplicationId.cs14
-rw-r--r--src/Ryujinx.Graphics.Nvdec/H264Decoder.cs57
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs174
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs26
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs133
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs175
-rw-r--r--src/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs28
-rw-r--r--src/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs29
-rw-r--r--src/Ryujinx.Graphics.Nvdec/NvdecDevice.cs83
-rw-r--r--src/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs63
-rw-r--r--src/Ryujinx.Graphics.Nvdec/NvdecStatus.cs16
-rw-r--r--src/Ryujinx.Graphics.Nvdec/ResourceManager.cs17
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj18
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs122
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs15
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp8/PictureInfo.cs75
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs72
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs141
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs12
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs12
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs21
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs13
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs87
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs16
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Vp8Decoder.cs33
-rw-r--r--src/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs90
26 files changed, 1542 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Nvdec/ApplicationId.cs b/src/Ryujinx.Graphics.Nvdec/ApplicationId.cs
new file mode 100644
index 00000000..ada12f8d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/ApplicationId.cs
@@ -0,0 +1,14 @@
+namespace Ryujinx.Graphics.Nvdec
+{
+ public enum ApplicationId
+ {
+ Mpeg = 1,
+ Vc1 = 2,
+ H264 = 3,
+ Mpeg4 = 4,
+ Vp8 = 5,
+ Hevc = 7,
+ Vp9 = 9,
+ HevcParser = 12,
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/H264Decoder.cs b/src/Ryujinx.Graphics.Nvdec/H264Decoder.cs
new file mode 100644
index 00000000..ecc7dbc7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/H264Decoder.cs
@@ -0,0 +1,57 @@
+using Ryujinx.Graphics.Nvdec.FFmpeg.H264;
+using Ryujinx.Graphics.Nvdec.Image;
+using Ryujinx.Graphics.Nvdec.Types.H264;
+using Ryujinx.Graphics.Video;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ static class H264Decoder
+ {
+ private const int MbSizeInPixels = 16;
+
+ public static void Decode(NvdecDecoderContext context, ResourceManager rm, ref NvdecRegisters state)
+ {
+ PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetDrvPicSetupOffset);
+ H264PictureInfo info = pictureInfo.Convert();
+
+ ReadOnlySpan<byte> bitstream = rm.Gmm.DeviceGetSpan(state.SetInBufBaseOffset, (int)pictureInfo.BitstreamSize);
+
+ int width = (int)pictureInfo.PicWidthInMbs * MbSizeInPixels;
+ int height = (int)pictureInfo.PicHeightInMbs * MbSizeInPixels;
+
+ int surfaceIndex = (int)pictureInfo.OutputSurfaceIndex;
+
+ uint lumaOffset = state.SetPictureLumaOffset[surfaceIndex];
+ uint chromaOffset = state.SetPictureChromaOffset[surfaceIndex];
+
+ Decoder decoder = context.GetH264Decoder();
+
+ ISurface outputSurface = rm.Cache.Get(decoder, 0, 0, width, height);
+
+ if (decoder.Decode(ref info, outputSurface, bitstream))
+ {
+ if (outputSurface.Field == FrameField.Progressive)
+ {
+ SurfaceWriter.Write(
+ rm.Gmm,
+ outputSurface,
+ lumaOffset + pictureInfo.LumaFrameOffset,
+ chromaOffset + pictureInfo.ChromaFrameOffset);
+ }
+ else
+ {
+ SurfaceWriter.WriteInterlaced(
+ rm.Gmm,
+ outputSurface,
+ lumaOffset + pictureInfo.LumaTopFieldOffset,
+ chromaOffset + pictureInfo.ChromaTopFieldOffset,
+ lumaOffset + pictureInfo.LumaBottomFieldOffset,
+ chromaOffset + pictureInfo.ChromaBottomFieldOffset);
+ }
+ }
+
+ rm.Cache.Put(outputSurface);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs
new file mode 100644
index 00000000..dc119673
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCache.cs
@@ -0,0 +1,174 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Nvdec.Image
+{
+ class SurfaceCache
+ {
+ // Must be equal to at least the maximum number of surfaces
+ // that can be in use simultaneously (which is 17, since H264
+ // can have up to 16 reference frames, and we need another one
+ // for the current frame).
+ // Realistically, most codecs won't ever use more than 4 simultaneously.
+ private const int MaxItems = 17;
+
+ private struct CacheItem
+ {
+ public int ReferenceCount;
+ public uint LumaOffset;
+ public uint ChromaOffset;
+ public int Width;
+ public int Height;
+ public IDecoder Owner;
+ public ISurface Surface;
+ }
+
+ private readonly CacheItem[] _pool = new CacheItem[MaxItems];
+
+ private readonly MemoryManager _gmm;
+
+ public SurfaceCache(MemoryManager gmm)
+ {
+ _gmm = gmm;
+ }
+
+ public ISurface Get(IDecoder decoder, uint lumaOffset, uint chromaOffset, int width, int height)
+ {
+ lock (_pool)
+ {
+ ISurface surface = null;
+
+ // Try to find a compatible surface with same parameters, and same offsets.
+ for (int i = 0; i < MaxItems; i++)
+ {
+ ref CacheItem item = ref _pool[i];
+
+ if (item.LumaOffset == lumaOffset &&
+ item.ChromaOffset == chromaOffset &&
+ item.Owner == decoder &&
+ item.Width == width &&
+ item.Height == height)
+ {
+ item.ReferenceCount++;
+ surface = item.Surface;
+ MoveToFront(i);
+ break;
+ }
+ }
+
+ // If we failed to find a perfect match, now ignore the offsets.
+ // Search backwards to replace the oldest compatible surface,
+ // this avoids thrashing frequently used surfaces.
+ // Now we need to ensure that the surface is not in use, as we'll change the data.
+ if (surface == null)
+ {
+ for (int i = MaxItems - 1; i >= 0; i--)
+ {
+ ref CacheItem item = ref _pool[i];
+
+ if (item.ReferenceCount == 0 && item.Owner == decoder && item.Width == width && item.Height == height)
+ {
+ item.ReferenceCount = 1;
+ item.LumaOffset = lumaOffset;
+ item.ChromaOffset = chromaOffset;
+ surface = item.Surface;
+
+ if ((lumaOffset | chromaOffset) != 0)
+ {
+ SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset);
+ }
+
+ MoveToFront(i);
+ break;
+ }
+ }
+ }
+
+ // If everything else failed, we try to create a new surface,
+ // and insert it on the pool. We replace the oldest item on the
+ // pool to avoid thrashing frequently used surfaces.
+ // If even the oldest item is in use, that means that the entire pool
+ // is in use, in that case we throw as there's no place to insert
+ // the new surface.
+ if (surface == null)
+ {
+ if (_pool[MaxItems - 1].ReferenceCount == 0)
+ {
+ surface = decoder.CreateSurface(width, height);
+
+ if ((lumaOffset | chromaOffset) != 0)
+ {
+ SurfaceReader.Read(_gmm, surface, lumaOffset, chromaOffset);
+ }
+
+ MoveToFront(MaxItems - 1);
+ ref CacheItem item = ref _pool[0];
+ item.Surface?.Dispose();
+ item.ReferenceCount = 1;
+ item.LumaOffset = lumaOffset;
+ item.ChromaOffset = chromaOffset;
+ item.Width = width;
+ item.Height = height;
+ item.Owner = decoder;
+ item.Surface = surface;
+ }
+ else
+ {
+ throw new InvalidOperationException("No free slot on the surface pool.");
+ }
+ }
+
+ return surface;
+ }
+ }
+
+ public void Put(ISurface surface)
+ {
+ lock (_pool)
+ {
+ for (int i = 0; i < MaxItems; i++)
+ {
+ ref CacheItem item = ref _pool[i];
+
+ if (item.Surface == surface)
+ {
+ item.ReferenceCount--;
+ Debug.Assert(item.ReferenceCount >= 0);
+ break;
+ }
+ }
+ }
+ }
+
+ private void MoveToFront(int index)
+ {
+ // If index is 0 we don't need to do anything,
+ // as it's already on the front.
+ if (index != 0)
+ {
+ CacheItem temp = _pool[index];
+ Array.Copy(_pool, 0, _pool, 1, index);
+ _pool[0] = temp;
+ }
+ }
+
+ public void Trim()
+ {
+ lock (_pool)
+ {
+ for (int i = 0; i < MaxItems; i++)
+ {
+ ref CacheItem item = ref _pool[i];
+
+ if (item.ReferenceCount == 0)
+ {
+ item.Surface?.Dispose();
+ item = default;
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs
new file mode 100644
index 00000000..6087f5b1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceCommon.cs
@@ -0,0 +1,26 @@
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Graphics.Video;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec.Image
+{
+ static class SurfaceCommon
+ {
+ public static int GetBlockLinearSize(int width, int height, int bytesPerPixel)
+ {
+ return SizeCalculator.GetBlockLinearTextureSize(width, height, 1, 1, 1, 1, 1, bytesPerPixel, 2, 1, 1).TotalSize;
+ }
+
+ public static void Copy(ISurface src, ISurface dst)
+ {
+ src.YPlane.AsSpan().CopyTo(dst.YPlane.AsSpan());
+ src.UPlane.AsSpan().CopyTo(dst.UPlane.AsSpan());
+ src.VPlane.AsSpan().CopyTo(dst.VPlane.AsSpan());
+ }
+
+ public unsafe static Span<byte> AsSpan(this Plane plane)
+ {
+ return new Span<byte>((void*)plane.Pointer, plane.Length);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs
new file mode 100644
index 00000000..039a2583
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceReader.cs
@@ -0,0 +1,133 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon;
+
+namespace Ryujinx.Graphics.Nvdec.Image
+{
+ static class SurfaceReader
+ {
+ public static void Read(MemoryManager gmm, ISurface surface, uint lumaOffset, uint chromaOffset)
+ {
+ int width = surface.Width;
+ int height = surface.Height;
+ int stride = surface.Stride;
+
+ ReadOnlySpan<byte> luma = gmm.DeviceGetSpan(lumaOffset, GetBlockLinearSize(width, height, 1));
+
+ ReadLuma(surface.YPlane.AsSpan(), luma, stride, width, height);
+
+ int uvWidth = surface.UvWidth;
+ int uvHeight = surface.UvHeight;
+ int uvStride = surface.UvStride;
+
+ ReadOnlySpan<byte> chroma = gmm.DeviceGetSpan(chromaOffset, GetBlockLinearSize(uvWidth, uvHeight, 2));
+
+ ReadChroma(surface.UPlane.AsSpan(), surface.VPlane.AsSpan(), chroma, uvStride, uvWidth, uvHeight);
+ }
+
+ private static void ReadLuma(Span<byte> dst, ReadOnlySpan<byte> src, int dstStride, int width, int height)
+ {
+ LayoutConverter.ConvertBlockLinearToLinear(dst, width, height, dstStride, 1, 2, src);
+ }
+
+ private unsafe static void ReadChroma(
+ Span<byte> dstU,
+ Span<byte> dstV,
+ ReadOnlySpan<byte> src,
+ int dstStride,
+ int width,
+ int height)
+ {
+ OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2);
+
+ if (Sse2.IsSupported)
+ {
+ int strideTrunc64 = BitUtils.AlignDown(width * 2, 64);
+
+ int outStrideGap = dstStride - width;
+
+ fixed (byte* dstUPtr = dstU, dstVPtr = dstV, dataPtr = src)
+ {
+ byte* uPtr = dstUPtr;
+ byte* vPtr = dstVPtr;
+
+ for (int y = 0; y < height; y++)
+ {
+ calc.SetY(y);
+
+ for (int x = 0; x < strideTrunc64; x += 64, uPtr += 32, vPtr += 32)
+ {
+ byte* offset = dataPtr + calc.GetOffsetWithLineOffset64(x);
+ byte* offset2 = offset + 0x20;
+ byte* offset3 = offset + 0x100;
+ byte* offset4 = offset + 0x120;
+
+ Vector128<byte> value = *(Vector128<byte>*)offset;
+ Vector128<byte> value2 = *(Vector128<byte>*)offset2;
+ Vector128<byte> value3 = *(Vector128<byte>*)offset3;
+ Vector128<byte> value4 = *(Vector128<byte>*)offset4;
+
+ Vector128<byte> u00 = Sse2.UnpackLow(value, value2);
+ Vector128<byte> v00 = Sse2.UnpackHigh(value, value2);
+ Vector128<byte> u01 = Sse2.UnpackLow(value3, value4);
+ Vector128<byte> v01 = Sse2.UnpackHigh(value3, value4);
+
+ Vector128<byte> u10 = Sse2.UnpackLow(u00, v00);
+ Vector128<byte> v10 = Sse2.UnpackHigh(u00, v00);
+ Vector128<byte> u11 = Sse2.UnpackLow(u01, v01);
+ Vector128<byte> v11 = Sse2.UnpackHigh(u01, v01);
+
+ Vector128<byte> u20 = Sse2.UnpackLow(u10, v10);
+ Vector128<byte> v20 = Sse2.UnpackHigh(u10, v10);
+ Vector128<byte> u21 = Sse2.UnpackLow(u11, v11);
+ Vector128<byte> v21 = Sse2.UnpackHigh(u11, v11);
+
+ Vector128<byte> u30 = Sse2.UnpackLow(u20, v20);
+ Vector128<byte> v30 = Sse2.UnpackHigh(u20, v20);
+ Vector128<byte> u31 = Sse2.UnpackLow(u21, v21);
+ Vector128<byte> v31 = Sse2.UnpackHigh(u21, v21);
+
+ *(Vector128<byte>*)uPtr = u30;
+ *(Vector128<byte>*)(uPtr + 16) = u31;
+ *(Vector128<byte>*)vPtr = v30;
+ *(Vector128<byte>*)(vPtr + 16) = v31;
+ }
+
+ for (int x = strideTrunc64 / 2; x < width; x++, uPtr++, vPtr++)
+ {
+ byte* offset = dataPtr + calc.GetOffset(x);
+
+ *uPtr = *offset;
+ *vPtr = *(offset + 1);
+ }
+
+ uPtr += outStrideGap;
+ vPtr += outStrideGap;
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < height; y++)
+ {
+ int dstBaseOffset = y * dstStride;
+
+ calc.SetY(y);
+
+ for (int x = 0; x < width; x++)
+ {
+ int srcOffset = calc.GetOffset(x);
+
+ dstU[dstBaseOffset + x] = src[srcOffset];
+ dstV[dstBaseOffset + x] = src[srcOffset + 1];
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs
new file mode 100644
index 00000000..cc5c251b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Image/SurfaceWriter.cs
@@ -0,0 +1,175 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Texture;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static Ryujinx.Graphics.Nvdec.Image.SurfaceCommon;
+using static Ryujinx.Graphics.Nvdec.MemoryExtensions;
+
+namespace Ryujinx.Graphics.Nvdec.Image
+{
+ static class SurfaceWriter
+ {
+ public static void Write(MemoryManager gmm, ISurface surface, uint lumaOffset, uint chromaOffset)
+ {
+ int lumaSize = GetBlockLinearSize(surface.Width, surface.Height, 1);
+
+ using var luma = gmm.GetWritableRegion(ExtendOffset(lumaOffset), lumaSize);
+
+ WriteLuma(
+ luma.Memory.Span,
+ surface.YPlane.AsSpan(),
+ surface.Stride,
+ surface.Width,
+ surface.Height);
+
+ int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight, 2);
+
+ using var chroma = gmm.GetWritableRegion(ExtendOffset(chromaOffset), chromaSize);
+
+ WriteChroma(
+ chroma.Memory.Span,
+ surface.UPlane.AsSpan(),
+ surface.VPlane.AsSpan(),
+ surface.UvStride,
+ surface.UvWidth,
+ surface.UvHeight);
+ }
+
+ public static void WriteInterlaced(
+ MemoryManager gmm,
+ ISurface surface,
+ uint lumaTopOffset,
+ uint chromaTopOffset,
+ uint lumaBottomOffset,
+ uint chromaBottomOffset)
+ {
+ int lumaSize = GetBlockLinearSize(surface.Width, surface.Height / 2, 1);
+
+ using var lumaTop = gmm.GetWritableRegion(ExtendOffset(lumaTopOffset), lumaSize);
+ using var lumaBottom = gmm.GetWritableRegion(ExtendOffset(lumaBottomOffset), lumaSize);
+
+ WriteLuma(
+ lumaTop.Memory.Span,
+ surface.YPlane.AsSpan(),
+ surface.Stride * 2,
+ surface.Width,
+ surface.Height / 2);
+
+ WriteLuma(
+ lumaBottom.Memory.Span,
+ surface.YPlane.AsSpan().Slice(surface.Stride),
+ surface.Stride * 2,
+ surface.Width,
+ surface.Height / 2);
+
+ int chromaSize = GetBlockLinearSize(surface.UvWidth, surface.UvHeight / 2, 2);
+
+ using var chromaTop = gmm.GetWritableRegion(ExtendOffset(chromaTopOffset), chromaSize);
+ using var chromaBottom = gmm.GetWritableRegion(ExtendOffset(chromaBottomOffset), chromaSize);
+
+ WriteChroma(
+ chromaTop.Memory.Span,
+ surface.UPlane.AsSpan(),
+ surface.VPlane.AsSpan(),
+ surface.UvStride * 2,
+ surface.UvWidth,
+ surface.UvHeight / 2);
+
+ WriteChroma(
+ chromaBottom.Memory.Span,
+ surface.UPlane.AsSpan().Slice(surface.UvStride),
+ surface.VPlane.AsSpan().Slice(surface.UvStride),
+ surface.UvStride * 2,
+ surface.UvWidth,
+ surface.UvHeight / 2);
+ }
+
+ private static void WriteLuma(Span<byte> dst, ReadOnlySpan<byte> src, int srcStride, int width, int height)
+ {
+ LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, srcStride, 1, 2, src);
+ }
+
+ private unsafe static void WriteChroma(
+ Span<byte> dst,
+ ReadOnlySpan<byte> srcU,
+ ReadOnlySpan<byte> srcV,
+ int srcStride,
+ int width,
+ int height)
+ {
+ OffsetCalculator calc = new OffsetCalculator(width, height, 0, false, 2, 2);
+
+ if (Sse2.IsSupported)
+ {
+ int strideTrunc64 = BitUtils.AlignDown(width * 2, 64);
+
+ int inStrideGap = srcStride - width;
+
+ fixed (byte* outputPtr = dst, srcUPtr = srcU, srcVPtr = srcV)
+ {
+ byte* inUPtr = srcUPtr;
+ byte* inVPtr = srcVPtr;
+
+ for (int y = 0; y < height; y++)
+ {
+ calc.SetY(y);
+
+ for (int x = 0; x < strideTrunc64; x += 64, inUPtr += 32, inVPtr += 32)
+ {
+ byte* offset = outputPtr + calc.GetOffsetWithLineOffset64(x);
+ byte* offset2 = offset + 0x20;
+ byte* offset3 = offset + 0x100;
+ byte* offset4 = offset + 0x120;
+
+ Vector128<byte> value = *(Vector128<byte>*)inUPtr;
+ Vector128<byte> value2 = *(Vector128<byte>*)inVPtr;
+ Vector128<byte> value3 = *(Vector128<byte>*)(inUPtr + 16);
+ Vector128<byte> value4 = *(Vector128<byte>*)(inVPtr + 16);
+
+ Vector128<byte> uv0 = Sse2.UnpackLow(value, value2);
+ Vector128<byte> uv1 = Sse2.UnpackHigh(value, value2);
+ Vector128<byte> uv2 = Sse2.UnpackLow(value3, value4);
+ Vector128<byte> uv3 = Sse2.UnpackHigh(value3, value4);
+
+ *(Vector128<byte>*)offset = uv0;
+ *(Vector128<byte>*)offset2 = uv1;
+ *(Vector128<byte>*)offset3 = uv2;
+ *(Vector128<byte>*)offset4 = uv3;
+ }
+
+ for (int x = strideTrunc64 / 2; x < width; x++, inUPtr++, inVPtr++)
+ {
+ byte* offset = outputPtr + calc.GetOffset(x);
+
+ *offset = *inUPtr;
+ *(offset + 1) = *inVPtr;
+ }
+
+ inUPtr += inStrideGap;
+ inVPtr += inStrideGap;
+ }
+ }
+ }
+ else
+ {
+ for (int y = 0; y < height; y++)
+ {
+ int srcBaseOffset = y * srcStride;
+
+ calc.SetY(y);
+
+ for (int x = 0; x < width; x++)
+ {
+ int dstOffset = calc.GetOffset(x);
+
+ dst[dstOffset + 0] = srcU[srcBaseOffset + x];
+ dst[dstOffset + 1] = srcV[srcBaseOffset + x];
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs b/src/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs
new file mode 100644
index 00000000..2855a8c7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/MemoryExtensions.cs
@@ -0,0 +1,28 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ static class MemoryExtensions
+ {
+ public static T DeviceRead<T>(this MemoryManager gmm, uint offset) where T : unmanaged
+ {
+ return gmm.Read<T>((ulong)offset << 8);
+ }
+
+ public static ReadOnlySpan<byte> DeviceGetSpan(this MemoryManager gmm, uint offset, int size)
+ {
+ return gmm.GetSpan((ulong)offset << 8, size);
+ }
+
+ public static void DeviceWrite(this MemoryManager gmm, uint offset, ReadOnlySpan<byte> data)
+ {
+ gmm.Write((ulong)offset << 8, data);
+ }
+
+ public static ulong ExtendOffset(uint offset)
+ {
+ return (ulong)offset << 8;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs b/src/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs
new file mode 100644
index 00000000..54934bc5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/NvdecDecoderContext.cs
@@ -0,0 +1,29 @@
+using System;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ class NvdecDecoderContext : IDisposable
+ {
+ private FFmpeg.H264.Decoder _h264Decoder;
+ private FFmpeg.Vp8.Decoder _vp8Decoder;
+
+ public FFmpeg.H264.Decoder GetH264Decoder()
+ {
+ return _h264Decoder ??= new FFmpeg.H264.Decoder();
+ }
+
+ public FFmpeg.Vp8.Decoder GetVp8Decoder()
+ {
+ return _vp8Decoder ??= new FFmpeg.Vp8.Decoder();
+ }
+
+ public void Dispose()
+ {
+ _h264Decoder?.Dispose();
+ _h264Decoder = null;
+
+ _vp8Decoder?.Dispose();
+ _vp8Decoder = null;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Nvdec/NvdecDevice.cs b/src/Ryujinx.Graphics.Nvdec/NvdecDevice.cs
new file mode 100644
index 00000000..ef8185f4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/NvdecDevice.cs
@@ -0,0 +1,83 @@
+using Ryujinx.Common.Logging;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Nvdec.Image;
+using System.Collections.Concurrent;
+using System.Collections.Generic;
+using System.Threading;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ public class NvdecDevice : IDeviceStateWithContext
+ {
+ private readonly ResourceManager _rm;
+ private readonly DeviceState<NvdecRegisters> _state;
+
+ private long _currentId;
+ private ConcurrentDictionary<long, NvdecDecoderContext> _contexts;
+ private NvdecDecoderContext _currentContext;
+
+ public NvdecDevice(MemoryManager gmm)
+ {
+ _rm = new ResourceManager(gmm, new SurfaceCache(gmm));
+ _state = new DeviceState<NvdecRegisters>(new Dictionary<string, RwCallback>
+ {
+ { nameof(NvdecRegisters.Execute), new RwCallback(Execute, null) }
+ });
+ _contexts = new ConcurrentDictionary<long, NvdecDecoderContext>();
+ }
+
+ public long CreateContext()
+ {
+ long id = Interlocked.Increment(ref _currentId);
+ _contexts.TryAdd(id, new NvdecDecoderContext());
+
+ return id;
+ }
+
+ public void DestroyContext(long id)
+ {
+ if (_contexts.TryRemove(id, out var context))
+ {
+ context.Dispose();
+ }
+
+ _rm.Cache.Trim();
+ }
+
+ public void BindContext(long id)
+ {
+ if (_contexts.TryGetValue(id, out var context))
+ {
+ _currentContext = context;
+ }
+ }
+
+ public int Read(int offset) => _state.Read(offset);
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ private void Execute(int data)
+ {
+ Decode((ApplicationId)_state.State.SetApplicationId);
+ }
+
+ private void Decode(ApplicationId applicationId)
+ {
+ switch (applicationId)
+ {
+ case ApplicationId.H264:
+ H264Decoder.Decode(_currentContext, _rm, ref _state.State);
+ break;
+ case ApplicationId.Vp8:
+ Vp8Decoder.Decode(_currentContext, _rm, ref _state.State);
+ break;
+ case ApplicationId.Vp9:
+ Vp9Decoder.Decode(_rm, ref _state.State);
+ break;
+ default:
+ Logger.Error?.Print(LogClass.Nvdec, $"Unsupported codec \"{applicationId}\".");
+ break;
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs b/src/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs
new file mode 100644
index 00000000..cf867783
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/NvdecRegisters.cs
@@ -0,0 +1,63 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ struct NvdecRegisters
+ {
+#pragma warning disable CS0649
+ public Array64<uint> Reserved0;
+ public uint Nop;
+ public Array63<uint> Reserved104;
+ public uint SetApplicationId;
+ public uint SetWatchdogTimer;
+ public Array14<uint> Reserved208;
+ public uint SemaphoreA;
+ public uint SemaphoreB;
+ public uint SemaphoreC;
+ public uint CtxSaveArea;
+ public Array44<uint> Reserved254;
+ public uint Execute;
+ public uint SemaphoreD;
+ public Array62<uint> Reserved308;
+ public uint SetControlParams;
+ public uint SetDrvPicSetupOffset;
+ public uint SetInBufBaseOffset;
+ public uint SetPictureIndex;
+ public uint SetSliceOffsetsBufOffset; // Also used by VC1
+ public uint SetColocDataOffset; // Also used by VC1
+ public uint SetHistoryOffset; // Used by VC1
+ public uint SetDisplayBufSize;
+ public uint SetHistogramOffset; // Used by VC1
+ public uint SetNvDecStatusOffset;
+ public uint SetDisplayBufLumaOffset;
+ public uint SetDisplayBufChromaOffset;
+ public Array17<uint> SetPictureLumaOffset;
+ public Array17<uint> SetPictureChromaOffset;
+ public uint SetPicScratchBufOffset;
+ public uint SetExternalMvBufferOffset;
+ public uint SetCryptoData0Offset;
+ public uint SetCryptoData1Offset;
+ public Array14<uint> Unknown4C8;
+ public uint H264SetMbHistBufOffset;
+ public Array15<uint> Unknown504;
+ public uint Vp8SetProbDataOffset;
+ public uint Vp8SetHeaderPartitionBufBaseOffset;
+ public Array14<uint> Unknown548;
+ public uint HevcSetScalingListOffset;
+ public uint HevcSetTileSizesOffset;
+ public uint HevcSetFilterBufferOffset;
+ public uint HevcSetSaoBufferOffset;
+ public uint HevcSetSliceInfoBufferOffset;
+ public uint HevcSetSliceGroupIndex;
+ public Array10<uint> Unknown598;
+ public uint Vp9SetProbTabBufOffset;
+ public uint Vp9SetCtxCounterBufOffset;
+ public uint Vp9SetSegmentReadBufOffset;
+ public uint Vp9SetSegmentWriteBufOffset;
+ public uint Vp9SetTileSizeBufOffset;
+ public uint Vp9SetColMvWriteBufOffset;
+ public uint Vp9SetColMvReadBufOffset;
+ public uint Vp9SetFilterBufferOffset;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/NvdecStatus.cs b/src/Ryujinx.Graphics.Nvdec/NvdecStatus.cs
new file mode 100644
index 00000000..0712af88
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/NvdecStatus.cs
@@ -0,0 +1,16 @@
+using Ryujinx.Graphics.Nvdec.Types.Vp9;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ struct NvdecStatus
+ {
+#pragma warning disable CS0649
+ public uint MbsCorrectlyDecoded;
+ public uint MbsInError;
+ public uint Reserved;
+ public uint ErrorStatus;
+ public FrameStats Stats;
+ public uint SliceHeaderErrorCode;
+#pragma warning restore CS0649
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Nvdec/ResourceManager.cs b/src/Ryujinx.Graphics.Nvdec/ResourceManager.cs
new file mode 100644
index 00000000..08d24258
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/ResourceManager.cs
@@ -0,0 +1,17 @@
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Nvdec.Image;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ readonly struct ResourceManager
+ {
+ public MemoryManager Gmm { get; }
+ public SurfaceCache Cache { get; }
+
+ public ResourceManager(MemoryManager gmm, SurfaceCache cache)
+ {
+ Gmm = gmm;
+ Cache = cache;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj b/src/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj
new file mode 100644
index 00000000..bfba98a7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Ryujinx.Graphics.Nvdec.csproj
@@ -0,0 +1,18 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>net7.0</TargetFramework>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Device\Ryujinx.Graphics.Device.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Gpu\Ryujinx.Graphics.Gpu.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Nvdec.FFmpeg\Ryujinx.Graphics.Nvdec.FFmpeg.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Nvdec.Vp9\Ryujinx.Graphics.Nvdec.Vp9.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Texture\Ryujinx.Graphics.Texture.csproj" />
+ <ProjectReference Include="..\Ryujinx.Graphics.Video\Ryujinx.Graphics.Video.csproj" />
+ </ItemGroup>
+
+</Project>
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs b/src/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs
new file mode 100644
index 00000000..7c779dff
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/H264/PictureInfo.cs
@@ -0,0 +1,122 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.H264
+{
+ struct PictureInfo
+ {
+#pragma warning disable CS0169, CS0649
+ Array18<uint> Unknown0;
+ public uint BitstreamSize;
+ public uint NumSlices;
+ public uint Unknown50;
+ public uint Unknown54;
+ public uint Log2MaxPicOrderCntLsbMinus4;
+ public uint DeltaPicOrderAlwaysZeroFlag;
+ public uint FrameMbsOnlyFlag;
+ public uint PicWidthInMbs;
+ public uint PicHeightInMbs;
+ public uint BlockLayout; // Not supported on T210
+ public uint EntropyCodingModeFlag;
+ public uint PicOrderPresentFlag;
+ public uint NumRefIdxL0ActiveMinus1;
+ public uint NumRefIdxL1ActiveMinus1;
+ public uint DeblockingFilterControlPresentFlag;
+ public uint RedundantPicCntPresentFlag;
+ public uint Transform8x8ModeFlag;
+ public uint LumaPitch;
+ public uint ChromaPitch;
+ public uint LumaTopFieldOffset;
+ public uint LumaBottomFieldOffset;
+ public uint LumaFrameOffset;
+ public uint ChromaTopFieldOffset;
+ public uint ChromaBottomFieldOffset;
+ public uint ChromaFrameOffset;
+ public uint HistBufferSize;
+ public ulong Flags;
+ public Array2<int> FieldOrderCnt;
+ public Array16<ReferenceFrame> RefFrames;
+ public Array6<Array16<byte>> ScalingLists4x4;
+ public Array2<Array64<byte>> ScalingLists8x8;
+ public byte MvcextNumInterViewRefsL0;
+ public byte MvcextNumInterViewRefsL1;
+ public ushort Padding2A2;
+ public uint Unknown2A4;
+ public uint Unknown2A8;
+ public uint Unknown2AC;
+ public Array16<byte> MvcextViewRefMasksL0;
+ public Array16<byte> MvcextViewRefMasksL1;
+ public uint Flags2;
+ public Array10<uint> Unknown2D4;
+#pragma warning restore CS0169, CS0649
+
+ public bool MbAdaptiveFrameFieldFlag => (Flags & (1 << 0)) != 0;
+ public bool Direct8x8InferenceFlag => (Flags & (1 << 1)) != 0;
+ public bool WeightedPredFlag => (Flags & (1 << 2)) != 0;
+ public bool ConstrainedIntraPredFlag => (Flags & (1 << 3)) != 0;
+ public bool IsReference => (Flags & (1 << 4)) != 0;
+ public bool FieldPicFlag => (Flags & (1 << 5)) != 0;
+ public bool BottomFieldFlag => (Flags & (1 << 6)) != 0;
+ public uint Log2MaxFrameNumMinus4 => (uint)(Flags >> 8) & 0xf;
+ public ushort ChromaFormatIdc => (ushort)((Flags >> 12) & 3);
+ public uint PicOrderCntType => (uint)(Flags >> 14) & 3;
+ public int PicInitQpMinus26 => ExtractSx(Flags, 16, 6);
+ public int ChromaQpIndexOffset => ExtractSx(Flags, 22, 5);
+ public int SecondChromaQpIndexOffset => ExtractSx(Flags, 27, 5);
+ public uint WeightedBipredIdc => (uint)(Flags >> 32) & 3;
+ public uint OutputSurfaceIndex => (uint)(Flags >> 34) & 0x7f;
+ public uint ColIndex => (uint)(Flags >> 41) & 0x1f;
+ public ushort FrameNum => (ushort)(Flags >> 46);
+ public bool QpprimeYZeroTransformBypassFlag => (Flags2 & (1 << 1)) != 0;
+
+ private static int ExtractSx(ulong packed, int lsb, int length)
+ {
+ return (int)((long)packed << (64 - (lsb + length)) >> (64 - length));
+ }
+
+ public H264PictureInfo Convert()
+ {
+ return new H264PictureInfo()
+ {
+ FieldOrderCnt = FieldOrderCnt,
+ IsReference = IsReference,
+ ChromaFormatIdc = ChromaFormatIdc,
+ FrameNum = FrameNum,
+ FieldPicFlag = FieldPicFlag,
+ BottomFieldFlag = BottomFieldFlag,
+ NumRefFrames = 0,
+ MbAdaptiveFrameFieldFlag = MbAdaptiveFrameFieldFlag,
+ ConstrainedIntraPredFlag = ConstrainedIntraPredFlag,
+ WeightedPredFlag = WeightedPredFlag,
+ WeightedBipredIdc = WeightedBipredIdc,
+ FrameMbsOnlyFlag = FrameMbsOnlyFlag != 0,
+ Transform8x8ModeFlag = Transform8x8ModeFlag != 0,
+ ChromaQpIndexOffset = ChromaQpIndexOffset,
+ SecondChromaQpIndexOffset = SecondChromaQpIndexOffset,
+ PicInitQpMinus26 = PicInitQpMinus26,
+ NumRefIdxL0ActiveMinus1 = NumRefIdxL0ActiveMinus1,
+ NumRefIdxL1ActiveMinus1 = NumRefIdxL1ActiveMinus1,
+ Log2MaxFrameNumMinus4 = Log2MaxFrameNumMinus4,
+ PicOrderCntType = PicOrderCntType,
+ Log2MaxPicOrderCntLsbMinus4 = Log2MaxPicOrderCntLsbMinus4,
+ DeltaPicOrderAlwaysZeroFlag = DeltaPicOrderAlwaysZeroFlag != 0,
+ Direct8x8InferenceFlag = Direct8x8InferenceFlag,
+ EntropyCodingModeFlag = EntropyCodingModeFlag != 0,
+ PicOrderPresentFlag = PicOrderPresentFlag != 0,
+ DeblockingFilterControlPresentFlag = DeblockingFilterControlPresentFlag != 0,
+ RedundantPicCntPresentFlag = RedundantPicCntPresentFlag != 0,
+ NumSliceGroupsMinus1 = 0,
+ SliceGroupMapType = 0,
+ SliceGroupChangeRateMinus1 = 0,
+ FmoAsoEnable = false,
+ ScalingMatrixPresent = true,
+ ScalingLists4x4 = ScalingLists4x4,
+ ScalingLists8x8 = ScalingLists8x8,
+ FrameType = 0,
+ PicWidthInMbsMinus1 = PicWidthInMbs - 1,
+ PicHeightInMapUnitsMinus1 = (PicHeightInMbs >> (FrameMbsOnlyFlag != 0 ? 0 : 1)) - 1,
+ QpprimeYZeroTransformBypassFlag = QpprimeYZeroTransformBypassFlag
+ };
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs b/src/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs
new file mode 100644
index 00000000..d205a47a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/H264/ReferenceFrame.cs
@@ -0,0 +1,15 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Types.H264
+{
+ struct ReferenceFrame
+ {
+#pragma warning disable CS0649
+ public uint Flags;
+ public Array2<uint> FieldOrderCnt;
+ public uint FrameNum;
+#pragma warning restore CS0649
+
+ public uint OutputSurfaceIndex => (uint)Flags & 0x7f;
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp8/PictureInfo.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp8/PictureInfo.cs
new file mode 100644
index 00000000..844f2103
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp8/PictureInfo.cs
@@ -0,0 +1,75 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp8
+{
+ struct PictureInfo
+ {
+#pragma warning disable CS0649
+ public Array13<uint> Unknown0;
+ public uint GpTimerTimeoutValue;
+ public ushort FrameWidth;
+ public ushort FrameHeight;
+ public byte KeyFrame; // 1: key frame - 0: not
+ public byte Version;
+ public byte Flags0;
+ // TileFormat : 2 // 0: TBL; 1: KBL;
+ // GobHeight : 3 // Set GOB height, 0: GOB_2, 1: GOB_4, 2: GOB_8, 3: GOB_16, 4: GOB_32 (NVDEC3 onwards)
+ // ReserverdSurfaceFormat : 3
+ public byte ErrorConcealOn; // 1: error conceal on - 0: off
+ public uint FirstPartSize; // the size of first partition (frame header and mb header partition)
+ public uint HistBufferSize; // in units of 256
+ public uint VLDBufferSize; // in units of 1
+ public Array2<uint> FrameStride; // [y_c]
+ public uint LumaTopOffset; // offset of luma top field in units of 256
+ public uint LumaBotOffset; // offset of luma bottom field in units of 256
+ public uint LumaFrameOffset; // offset of luma frame in units of 256
+ public uint ChromaTopOffset; // offset of chroma top field in units of 256
+ public uint ChromaBotOffset; // offset of chroma bottom field in units of 256
+ public uint ChromaFrameOffset; // offset of chroma frame in units of 256
+ public uint Flags1;
+ // EnableTFOutput : 1; // =1, enable dbfdma to output the display surface; if disable, then the following configure on tf is useless.
+ // Remap for VC1
+ // VC1MapYFlag : 1
+ // MapYValue : 3
+ // VC1MapUVFlag : 1
+ // MapUVValue : 3
+ // TF
+ // OutStride : 8
+ // TilingFormat : 3;
+ // OutputStructure : 1 // 0:frame, 1:field
+ // Reserved0 : 11
+ public Array2<int> OutputTop; // in units of 256
+ public Array2<int> OutputBottom; // in units of 256
+ // Histogram
+ public uint Flags2;
+ // EnableHistogram : 1 // enable histogram info collection
+ // HistogramStartX : 12 // start X of Histogram window
+ // HistogramStartY : 12 // start Y of Histogram window
+ // Reserved1 : 7
+ // HistogramEndX : 12 // end X of Histogram window
+ // HistogramEndY : 12 // end y of Histogram window
+ // Reserved2 : 8
+ // Decode picture buffer related
+ public sbyte CurrentOutputMemoryLayout;
+ public Array3<sbyte> OutputMemoryLayout; // output NV12/NV24 setting. item 0:golden - 1: altref - 2: last
+ public byte SegmentationFeatureDataUpdate;
+ public Array3<byte> Reserved3;
+ public uint ResultValue; // ucode return result
+ public Array8<uint> PartitionOffset;
+ public Array3<uint> Reserved4;
+#pragma warning restore CS0649
+
+ public Vp8PictureInfo Convert()
+ {
+ return new Vp8PictureInfo()
+ {
+ KeyFrame = KeyFrame != 0,
+ FirstPartSize = FirstPartSize,
+ Version = Version,
+ FrameWidth = FrameWidth,
+ FrameHeight = FrameHeight
+ };
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs
new file mode 100644
index 00000000..661e6cdd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/BackwardUpdates.cs
@@ -0,0 +1,72 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct BackwardUpdates
+ {
+ public Array7<Array3<Array2<uint>>> InterModeCounts;
+ public Array4<Array10<uint>> YModeCounts;
+ public Array10<Array10<uint>> UvModeCounts;
+ public Array16<Array4<uint>> PartitionCounts;
+ public Array4<Array3<uint>> SwitchableInterpsCount;
+ public Array4<Array2<uint>> IntraInterCount;
+ public Array5<Array2<uint>> CompInterCount;
+ public Array5<Array2<Array2<uint>>> SingleRefCount;
+ public Array5<Array2<uint>> CompRefCount;
+ public Array2<Array4<uint>> Tx32x32;
+ public Array2<Array3<uint>> Tx16x16;
+ public Array2<Array2<uint>> Tx8x8;
+ public Array3<Array2<uint>> MbSkipCount;
+ public Array4<uint> Joints;
+ public Array2<Array2<uint>> Sign;
+ public Array2<Array11<uint>> Classes;
+ public Array2<Array2<uint>> Class0;
+ public Array2<Array10<Array2<uint>>> Bits;
+ public Array2<Array2<Array4<uint>>> Class0Fp;
+ public Array2<Array4<uint>> Fp;
+ public Array2<Array2<uint>> Class0Hp;
+ public Array2<Array2<uint>> Hp;
+ public Array4<Array2<Array2<Array6<Array6<Array4<uint>>>>>> CoefCounts;
+ public Array4<Array2<Array2<Array6<Array6<uint>>>>> EobCounts;
+
+ public BackwardUpdates(ref Vp9BackwardUpdates counts)
+ {
+ InterModeCounts = new Array7<Array3<Array2<uint>>>();
+
+ for (int i = 0; i < 7; i++)
+ {
+ InterModeCounts[i][0][0] = counts.InterMode[i][2];
+ InterModeCounts[i][0][1] = counts.InterMode[i][0] + counts.InterMode[i][1] + counts.InterMode[i][3];
+ InterModeCounts[i][1][0] = counts.InterMode[i][0];
+ InterModeCounts[i][1][1] = counts.InterMode[i][1] + counts.InterMode[i][3];
+ InterModeCounts[i][2][0] = counts.InterMode[i][1];
+ InterModeCounts[i][2][1] = counts.InterMode[i][3];
+ }
+
+ YModeCounts = counts.YMode;
+ UvModeCounts = counts.UvMode;
+ PartitionCounts = counts.Partition;
+ SwitchableInterpsCount = counts.SwitchableInterp;
+ IntraInterCount = counts.IntraInter;
+ CompInterCount = counts.CompInter;
+ SingleRefCount = counts.SingleRef;
+ CompRefCount = counts.CompRef;
+ Tx32x32 = counts.Tx32x32;
+ Tx16x16 = counts.Tx16x16;
+ Tx8x8 = counts.Tx8x8;
+ MbSkipCount = counts.Skip;
+ Joints = counts.Joints;
+ Sign = counts.Sign;
+ Classes = counts.Classes;
+ Class0 = counts.Class0;
+ Bits = counts.Bits;
+ Class0Fp = counts.Class0Fp;
+ Fp = counts.Fp;
+ Class0Hp = counts.Class0Hp;
+ Hp = counts.Hp;
+ CoefCounts = counts.Coef;
+ EobCounts = counts.EobBranch;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs
new file mode 100644
index 00000000..b2858d2d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/EntropyProbs.cs
@@ -0,0 +1,141 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct EntropyProbs
+ {
+#pragma warning disable CS0649
+ public Array10<Array10<Array8<byte>>> KfYModeProbE0ToE7;
+ public Array10<Array10<byte>> KfYModeProbE8;
+ public Array3<byte> Padding384;
+ public Array7<byte> SegTreeProbs;
+ public Array3<byte> SegPredProbs;
+ public Array15<byte> Padding391;
+ public Array10<Array8<byte>> KfUvModeProbE0ToE7;
+ public Array10<byte> KfUvModeProbE8;
+ public Array6<byte> Padding3FA;
+ public Array7<Array4<byte>> InterModeProb;
+ public Array4<byte> IntraInterProb;
+ public Array10<Array8<byte>> UvModeProbE0ToE7;
+ public Array2<Array1<byte>> Tx8x8Prob;
+ public Array2<Array2<byte>> Tx16x16Prob;
+ public Array2<Array3<byte>> Tx32x32Prob;
+ public Array4<byte> YModeProbE8;
+ public Array4<Array8<byte>> YModeProbE0ToE7;
+ public Array16<Array4<byte>> KfPartitionProb;
+ public Array16<Array4<byte>> PartitionProb;
+ public Array10<byte> UvModeProbE8;
+ public Array4<Array2<byte>> SwitchableInterpProb;
+ public Array5<byte> CompInterProb;
+ public Array4<byte> SkipProbs;
+ public Array3<byte> Joints;
+ public Array2<byte> Sign;
+ public Array2<Array1<byte>> Class0;
+ public Array2<Array3<byte>> Fp;
+ public Array2<byte> Class0Hp;
+ public Array2<byte> Hp;
+ public Array2<Array10<byte>> Classes;
+ public Array2<Array2<Array3<byte>>> Class0Fp;
+ public Array2<Array10<byte>> Bits;
+ public Array5<Array2<byte>> SingleRefProb;
+ public Array5<byte> CompRefProb;
+ public Array17<byte> Padding58F;
+ public Array4<Array2<Array2<Array6<Array6<Array4<byte>>>>>> CoefProbs;
+#pragma warning restore CS0649
+
+ public void Convert(ref Vp9EntropyProbs fc)
+ {
+ for (int i = 0; i < 10; i++)
+ {
+ for (int j = 0; j < 10; j++)
+ {
+ for (int k = 0; k < 9; k++)
+ {
+ fc.KfYModeProb[i][j][k] = k < 8 ? KfYModeProbE0ToE7[i][j][k] : KfYModeProbE8[i][j];
+ }
+ }
+ }
+
+ fc.SegTreeProb = SegTreeProbs;
+ fc.SegPredProb = SegPredProbs;
+
+ for (int i = 0; i < 7; i++)
+ {
+ for (int j = 0; j < 3; j++)
+ {
+ fc.InterModeProb[i][j] = InterModeProb[i][j];
+ }
+ }
+
+ fc.IntraInterProb = IntraInterProb;
+
+ for (int i = 0; i < 10; i++)
+ {
+ for (int j = 0; j < 9; j++)
+ {
+ fc.KfUvModeProb[i][j] = j < 8 ? KfUvModeProbE0ToE7[i][j] : KfUvModeProbE8[i];
+ fc.UvModeProb[i][j] = j < 8 ? UvModeProbE0ToE7[i][j] : UvModeProbE8[i];
+ }
+ }
+
+ fc.Tx8x8Prob = Tx8x8Prob;
+ fc.Tx16x16Prob = Tx16x16Prob;
+ fc.Tx32x32Prob = Tx32x32Prob;
+
+ for (int i = 0; i < 4; i++)
+ {
+ for (int j = 0; j < 9; j++)
+ {
+ fc.YModeProb[i][j] = j < 8 ? YModeProbE0ToE7[i][j] : YModeProbE8[i];
+ }
+ }
+
+ for (int i = 0; i < 16; i++)
+ {
+ for (int j = 0; j < 3; j++)
+ {
+ fc.KfPartitionProb[i][j] = KfPartitionProb[i][j];
+ fc.PartitionProb[i][j] = PartitionProb[i][j];
+ }
+ }
+
+ fc.SwitchableInterpProb = SwitchableInterpProb;
+ fc.CompInterProb = CompInterProb;
+ fc.SkipProb[0] = SkipProbs[0];
+ fc.SkipProb[1] = SkipProbs[1];
+ fc.SkipProb[2] = SkipProbs[2];
+ fc.Joints = Joints;
+ fc.Sign = Sign;
+ fc.Class0 = Class0;
+ fc.Fp = Fp;
+ fc.Class0Hp = Class0Hp;
+ fc.Hp = Hp;
+ fc.Classes = Classes;
+ fc.Class0Fp = Class0Fp;
+ fc.Bits = Bits;
+ fc.SingleRefProb = SingleRefProb;
+ fc.CompRefProb = CompRefProb;
+
+ for (int i = 0; i < 4; i++)
+ {
+ for (int j = 0; j < 2; j++)
+ {
+ for (int k = 0; k < 2; k++)
+ {
+ for (int l = 0; l < 6; l++)
+ {
+ for (int m = 0; m < 6; m++)
+ {
+ for (int n = 0; n < 3; n++)
+ {
+ fc.CoefProbs[i][j][k][l][m][n] = CoefProbs[i][j][k][l][m][n];
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs
new file mode 100644
index 00000000..88f1ac20
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameFlags.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ enum FrameFlags : uint
+ {
+ IsKeyFrame = 1 << 0,
+ LastFrameIsKeyFrame = 1 << 1,
+ FrameSizeChanged = 1 << 2,
+ ErrorResilientMode = 1 << 3,
+ LastShowFrame = 1 << 4,
+ IntraOnly = 1 << 5
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs
new file mode 100644
index 00000000..d449ec4d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameSize.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct FrameSize
+ {
+#pragma warning disable CS0649
+ public ushort Width;
+ public ushort Height;
+ public ushort LumaPitch;
+ public ushort ChromaPitch;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs
new file mode 100644
index 00000000..26aab506
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/FrameStats.cs
@@ -0,0 +1,21 @@
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct FrameStats
+ {
+#pragma warning disable CS0649
+ public uint Unknown0;
+ public uint Unknown4;
+ public uint Pass2CycleCount;
+ public uint ErrorStatus;
+ public uint FrameStatusIntraCnt;
+ public uint FrameStatusInterCnt;
+ public uint FrameStatusSkipCtuCount;
+ public uint FrameStatusFwdMvxCnt;
+ public uint FrameStatusFwdMvyCnt;
+ public uint FrameStatusBwdMvxCnt;
+ public uint FrameStatusBwdMvyCnt;
+ public uint ErrorCtbPos;
+ public uint ErrorSlicePos;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs
new file mode 100644
index 00000000..7cb0fd7a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/LoopFilter.cs
@@ -0,0 +1,13 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct LoopFilter
+ {
+#pragma warning disable CS0649
+ public byte ModeRefDeltaEnabled;
+ public Array4<sbyte> RefDeltas;
+ public Array2<sbyte> ModeDeltas;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs
new file mode 100644
index 00000000..7d06f747
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/PictureInfo.cs
@@ -0,0 +1,87 @@
+using Ryujinx.Common.Memory;
+using Ryujinx.Graphics.Video;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct PictureInfo
+ {
+#pragma warning disable CS0649
+ public Array12<uint> Unknown0;
+ public uint BitstreamSize;
+ public uint IsEncrypted;
+ public uint Unknown38;
+ public uint Reserved3C;
+ public uint BlockLayout; // Not supported on T210
+ public uint WorkBufferSizeShr8;
+ public FrameSize LastFrameSize;
+ public FrameSize GoldenFrameSize;
+ public FrameSize AltFrameSize;
+ public FrameSize CurrentFrameSize;
+ public FrameFlags Flags;
+ public Array4<sbyte> RefFrameSignBias;
+ public byte FirstLevel;
+ public byte SharpnessLevel;
+ public byte BaseQIndex;
+ public byte YDcDeltaQ;
+ public byte UvAcDeltaQ;
+ public byte UvDcDeltaQ;
+ public byte Lossless;
+ public byte TxMode;
+ public byte AllowHighPrecisionMv;
+ public byte InterpFilter;
+ public byte ReferenceMode;
+ public sbyte CompFixedRef;
+ public Array2<sbyte> CompVarRef;
+ public byte Log2TileCols;
+ public byte Log2TileRows;
+ public Segmentation Seg;
+ public LoopFilter Lf;
+ public byte PaddingEB;
+ public uint WorkBufferSizeShr8New; // Not supported on T210
+ public uint SurfaceParams; // Not supported on T210
+ public uint UnknownF4;
+ public uint UnknownF8;
+ public uint UnknownFC;
+#pragma warning restore CS0649
+
+ public uint BitDepth => (SurfaceParams >> 1) & 0xf;
+
+ public Vp9PictureInfo Convert()
+ {
+ return new Vp9PictureInfo()
+ {
+ IsKeyFrame = Flags.HasFlag(FrameFlags.IsKeyFrame),
+ IntraOnly = Flags.HasFlag(FrameFlags.IntraOnly),
+ UsePrevInFindMvRefs =
+ !Flags.HasFlag(FrameFlags.ErrorResilientMode) &&
+ !Flags.HasFlag(FrameFlags.FrameSizeChanged) &&
+ !Flags.HasFlag(FrameFlags.IntraOnly) &&
+ Flags.HasFlag(FrameFlags.LastShowFrame) &&
+ !Flags.HasFlag(FrameFlags.LastFrameIsKeyFrame),
+ RefFrameSignBias = RefFrameSignBias,
+ BaseQIndex = BaseQIndex,
+ YDcDeltaQ = YDcDeltaQ,
+ UvDcDeltaQ = UvDcDeltaQ,
+ UvAcDeltaQ = UvAcDeltaQ,
+ Lossless = Lossless != 0,
+ TransformMode = TxMode,
+ AllowHighPrecisionMv = AllowHighPrecisionMv != 0,
+ InterpFilter = InterpFilter,
+ ReferenceMode = ReferenceMode,
+ CompFixedRef = CompFixedRef,
+ CompVarRef = CompVarRef,
+ Log2TileCols = Log2TileCols,
+ Log2TileRows = Log2TileRows,
+ SegmentEnabled = Seg.Enabled != 0,
+ SegmentMapUpdate = Seg.UpdateMap != 0,
+ SegmentMapTemporalUpdate = Seg.TemporalUpdate != 0,
+ SegmentAbsDelta = Seg.AbsDelta,
+ SegmentFeatureEnable = Seg.FeatureMask,
+ SegmentFeatureData = Seg.FeatureData,
+ ModeRefDeltaEnabled = Lf.ModeRefDeltaEnabled != 0,
+ RefDeltas = Lf.RefDeltas,
+ ModeDeltas = Lf.ModeDeltas
+ };
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs
new file mode 100644
index 00000000..f6c4f0b1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Types/Vp9/Segmentation.cs
@@ -0,0 +1,16 @@
+using Ryujinx.Common.Memory;
+
+namespace Ryujinx.Graphics.Nvdec.Types.Vp9
+{
+ struct Segmentation
+ {
+#pragma warning disable CS0649
+ public byte Enabled;
+ public byte UpdateMap;
+ public byte TemporalUpdate;
+ public byte AbsDelta;
+ public Array8<uint> FeatureMask;
+ public Array8<Array4<short>> FeatureData;
+#pragma warning restore CS0649
+ }
+}
diff --git a/src/Ryujinx.Graphics.Nvdec/Vp8Decoder.cs b/src/Ryujinx.Graphics.Nvdec/Vp8Decoder.cs
new file mode 100644
index 00000000..cce9a574
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Vp8Decoder.cs
@@ -0,0 +1,33 @@
+using Ryujinx.Graphics.Nvdec.FFmpeg.Vp8;
+using Ryujinx.Graphics.Nvdec.Image;
+using Ryujinx.Graphics.Nvdec.Types.Vp8;
+using Ryujinx.Graphics.Video;
+using System;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ static class Vp8Decoder
+ {
+ public static void Decode(NvdecDecoderContext context, ResourceManager rm, ref NvdecRegisters state)
+ {
+ PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetDrvPicSetupOffset);
+ ReadOnlySpan<byte> bitstream = rm.Gmm.DeviceGetSpan(state.SetInBufBaseOffset, (int)pictureInfo.VLDBufferSize);
+
+ Decoder decoder = context.GetVp8Decoder();
+
+ ISurface outputSurface = rm.Cache.Get(decoder, 0, 0, pictureInfo.FrameWidth, pictureInfo.FrameHeight);
+
+ Vp8PictureInfo info = pictureInfo.Convert();
+
+ uint lumaOffset = state.SetPictureLumaOffset[3];
+ uint chromaOffset = state.SetPictureChromaOffset[3];
+
+ if (decoder.Decode(ref info, outputSurface, bitstream))
+ {
+ SurfaceWriter.Write(rm.Gmm, outputSurface, lumaOffset, chromaOffset);
+ }
+
+ rm.Cache.Put(outputSurface);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs b/src/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs
new file mode 100644
index 00000000..9bb3529e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Nvdec/Vp9Decoder.cs
@@ -0,0 +1,90 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Gpu.Memory;
+using Ryujinx.Graphics.Nvdec.Image;
+using Ryujinx.Graphics.Nvdec.Types.Vp9;
+using Ryujinx.Graphics.Nvdec.Vp9;
+using Ryujinx.Graphics.Video;
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using static Ryujinx.Graphics.Nvdec.MemoryExtensions;
+
+namespace Ryujinx.Graphics.Nvdec
+{
+ static class Vp9Decoder
+ {
+ private static Decoder _decoder = new Decoder();
+
+ public unsafe static void Decode(ResourceManager rm, ref NvdecRegisters state)
+ {
+ PictureInfo pictureInfo = rm.Gmm.DeviceRead<PictureInfo>(state.SetDrvPicSetupOffset);
+ EntropyProbs entropy = rm.Gmm.DeviceRead<EntropyProbs>(state.Vp9SetProbTabBufOffset);
+
+ ISurface Rent(uint lumaOffset, uint chromaOffset, FrameSize size)
+ {
+ return rm.Cache.Get(_decoder, lumaOffset, chromaOffset, size.Width, size.Height);
+ }
+
+ ISurface lastSurface = Rent(state.SetPictureLumaOffset[0], state.SetPictureChromaOffset[0], pictureInfo.LastFrameSize);
+ ISurface goldenSurface = Rent(state.SetPictureLumaOffset[1], state.SetPictureChromaOffset[1], pictureInfo.GoldenFrameSize);
+ ISurface altSurface = Rent(state.SetPictureLumaOffset[2], state.SetPictureChromaOffset[2], pictureInfo.AltFrameSize);
+ ISurface currentSurface = Rent(state.SetPictureLumaOffset[3], state.SetPictureChromaOffset[3], pictureInfo.CurrentFrameSize);
+
+ Vp9PictureInfo info = pictureInfo.Convert();
+
+ info.LastReference = lastSurface;
+ info.GoldenReference = goldenSurface;
+ info.AltReference = altSurface;
+
+ entropy.Convert(ref info.Entropy);
+
+ ReadOnlySpan<byte> bitstream = rm.Gmm.DeviceGetSpan(state.SetInBufBaseOffset, (int)pictureInfo.BitstreamSize);
+
+ ReadOnlySpan<Vp9MvRef> mvsIn = ReadOnlySpan<Vp9MvRef>.Empty;
+
+ if (info.UsePrevInFindMvRefs)
+ {
+ mvsIn = GetMvsInput(rm.Gmm, pictureInfo.CurrentFrameSize, state.Vp9SetColMvReadBufOffset);
+ }
+
+ int miCols = BitUtils.DivRoundUp(pictureInfo.CurrentFrameSize.Width, 8);
+ int miRows = BitUtils.DivRoundUp(pictureInfo.CurrentFrameSize.Height, 8);
+
+ using var mvsRegion = rm.Gmm.GetWritableRegion(ExtendOffset(state.Vp9SetColMvWriteBufOffset), miRows * miCols * 16);
+
+ Span<Vp9MvRef> mvsOut = MemoryMarshal.Cast<byte, Vp9MvRef>(mvsRegion.Memory.Span);
+
+ uint lumaOffset = state.SetPictureLumaOffset[3];
+ uint chromaOffset = state.SetPictureChromaOffset[3];
+
+ if (_decoder.Decode(ref info, currentSurface, bitstream, mvsIn, mvsOut))
+ {
+ SurfaceWriter.Write(rm.Gmm, currentSurface, lumaOffset, chromaOffset);
+ }
+
+ WriteBackwardUpdates(rm.Gmm, state.Vp9SetCtxCounterBufOffset, ref info.BackwardUpdateCounts);
+
+ rm.Cache.Put(lastSurface);
+ rm.Cache.Put(goldenSurface);
+ rm.Cache.Put(altSurface);
+ rm.Cache.Put(currentSurface);
+ }
+
+ private static ReadOnlySpan<Vp9MvRef> GetMvsInput(MemoryManager gmm, FrameSize size, uint offset)
+ {
+ int miCols = BitUtils.DivRoundUp(size.Width, 8);
+ int miRows = BitUtils.DivRoundUp(size.Height, 8);
+
+ return MemoryMarshal.Cast<byte, Vp9MvRef>(gmm.DeviceGetSpan(offset, miRows * miCols * 16));
+ }
+
+ private static void WriteBackwardUpdates(MemoryManager gmm, uint offset, ref Vp9BackwardUpdates counts)
+ {
+ using var backwardUpdatesRegion = gmm.GetWritableRegion(ExtendOffset(offset), Unsafe.SizeOf<BackwardUpdates>());
+
+ ref var backwardUpdates = ref MemoryMarshal.Cast<byte, BackwardUpdates>(backwardUpdatesRegion.Memory.Span)[0];
+
+ backwardUpdates = new BackwardUpdates(ref counts);
+ }
+ }
+}