diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /Ryujinx.Graphics.Vic/Image | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'Ryujinx.Graphics.Vic/Image')
| -rw-r--r-- | Ryujinx.Graphics.Vic/Image/BufferPool.cs | 103 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Vic/Image/InputSurface.cs | 86 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Vic/Image/Pixel.cs | 10 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Vic/Image/Surface.cs | 46 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs | 33 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Vic/Image/SurfaceReader.cs | 495 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs | 667 |
7 files changed, 0 insertions, 1440 deletions
diff --git a/Ryujinx.Graphics.Vic/Image/BufferPool.cs b/Ryujinx.Graphics.Vic/Image/BufferPool.cs deleted file mode 100644 index cde7e6eb..00000000 --- a/Ryujinx.Graphics.Vic/Image/BufferPool.cs +++ /dev/null @@ -1,103 +0,0 @@ -using System; - -namespace Ryujinx.Graphics.Vic.Image -{ - class BufferPool<T> - { - /// <summary> - /// Maximum number of buffers on the pool. - /// </summary> - private const int MaxBuffers = 4; - - /// <summary> - /// Maximum size of a buffer that can be added on the pool. - /// If the required buffer is larger than this, it won't be - /// added to the pool to avoid long term high memory usage. - /// </summary> - private const int MaxBufferSize = 2048 * 2048; - - private struct PoolItem - { - public bool InUse; - public T[] Buffer; - } - - private readonly PoolItem[] _pool = new PoolItem[MaxBuffers]; - - /// <summary> - /// Rents a buffer with the exact size requested. - /// </summary> - /// <param name="length">Size of the buffer</param> - /// <param name="buffer">Span of the requested size</param> - /// <returns>The index of the buffer on the pool</returns> - public int Rent(int length, out Span<T> buffer) - { - int index = RentMinimum(length, out T[] bufferArray); - - buffer = new Span<T>(bufferArray).Slice(0, length); - - return index; - } - - /// <summary> - /// Rents a buffer with a size greater than or equal to the requested size. - /// </summary> - /// <param name="length">Size of the buffer</param> - /// <param name="buffer">Array with a length greater than or equal to the requested length</param> - /// <returns>The index of the buffer on the pool</returns> - public int RentMinimum(int length, out T[] buffer) - { - if ((uint)length > MaxBufferSize) - { - buffer = new T[length]; - return -1; - } - - // Try to find a buffer that is larger or the same size of the requested one. - // This will avoid an allocation. - for (int i = 0; i < MaxBuffers; i++) - { - ref PoolItem item = ref _pool[i]; - - if (!item.InUse && item.Buffer != null && item.Buffer.Length >= length) - { - buffer = item.Buffer; - item.InUse = true; - return i; - } - } - - buffer = new T[length]; - - // Try to add the new buffer to the pool. - // We try to find a slot that is not in use, and replace the buffer in it. - for (int i = 0; i < MaxBuffers; i++) - { - ref PoolItem item = ref _pool[i]; - - if (!item.InUse) - { - item.Buffer = buffer; - item.InUse = true; - return i; - } - } - - return -1; - } - - /// <summary> - /// Returns a buffer returned from <see cref="Rent(int)"/> to the pool. - /// </summary> - /// <param name="index">Index of the buffer on the pool</param> - public void Return(int index) - { - if (index < 0) - { - return; - } - - _pool[index].InUse = false; - } - } -} diff --git a/Ryujinx.Graphics.Vic/Image/InputSurface.cs b/Ryujinx.Graphics.Vic/Image/InputSurface.cs deleted file mode 100644 index 15ac0460..00000000 --- a/Ryujinx.Graphics.Vic/Image/InputSurface.cs +++ /dev/null @@ -1,86 +0,0 @@ -using System; - -namespace Ryujinx.Graphics.Vic.Image -{ - ref struct RentedBuffer - { - public static RentedBuffer Empty => new RentedBuffer(Span<byte>.Empty, -1); - - public Span<byte> Data; - public int Index; - - public RentedBuffer(Span<byte> data, int index) - { - Data = data; - Index = index; - } - - public void Return(BufferPool<byte> pool) - { - if (Index != -1) - { - pool.Return(Index); - } - } - } - - ref struct InputSurface - { - public ReadOnlySpan<byte> Buffer0; - public ReadOnlySpan<byte> Buffer1; - public ReadOnlySpan<byte> Buffer2; - - public int Buffer0Index; - public int Buffer1Index; - public int Buffer2Index; - - public int Width; - public int Height; - - public int UvWidth; - public int UvHeight; - - public void Initialize() - { - Buffer0Index = -1; - Buffer1Index = -1; - Buffer2Index = -1; - } - - public void SetBuffer0(RentedBuffer buffer) - { - Buffer0 = buffer.Data; - Buffer0Index = buffer.Index; - } - - public void SetBuffer1(RentedBuffer buffer) - { - Buffer1 = buffer.Data; - Buffer1Index = buffer.Index; - } - - public void SetBuffer2(RentedBuffer buffer) - { - Buffer2 = buffer.Data; - Buffer2Index = buffer.Index; - } - - public void Return(BufferPool<byte> pool) - { - if (Buffer0Index != -1) - { - pool.Return(Buffer0Index); - } - - if (Buffer1Index != -1) - { - pool.Return(Buffer1Index); - } - - if (Buffer2Index != -1) - { - pool.Return(Buffer2Index); - } - } - } -} diff --git a/Ryujinx.Graphics.Vic/Image/Pixel.cs b/Ryujinx.Graphics.Vic/Image/Pixel.cs deleted file mode 100644 index 35f25d16..00000000 --- a/Ryujinx.Graphics.Vic/Image/Pixel.cs +++ /dev/null @@ -1,10 +0,0 @@ -namespace Ryujinx.Graphics.Vic.Image -{ - struct Pixel - { - public ushort R; - public ushort G; - public ushort B; - public ushort A; - } -} diff --git a/Ryujinx.Graphics.Vic/Image/Surface.cs b/Ryujinx.Graphics.Vic/Image/Surface.cs deleted file mode 100644 index f393eb15..00000000 --- a/Ryujinx.Graphics.Vic/Image/Surface.cs +++ /dev/null @@ -1,46 +0,0 @@ -using System; -using System.Runtime.CompilerServices; - -namespace Ryujinx.Graphics.Vic.Image -{ - readonly struct Surface : IDisposable - { - private readonly int _bufferIndex; - - private readonly BufferPool<Pixel> _pool; - - public Pixel[] Data { get; } - - public int Width { get; } - public int Height { get; } - - public Surface(BufferPool<Pixel> pool, int width, int height) - { - _bufferIndex = pool.RentMinimum(width * height, out Pixel[] data); - _pool = pool; - Data = data; - Width = width; - Height = height; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public ushort GetR(int x, int y) => Data[y * Width + x].R; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public ushort GetG(int x, int y) => Data[y * Width + x].G; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public ushort GetB(int x, int y) => Data[y * Width + x].B; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public ushort GetA(int x, int y) => Data[y * Width + x].A; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SetR(int x, int y, ushort value) => Data[y * Width + x].R = value; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SetG(int x, int y, ushort value) => Data[y * Width + x].G = value; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SetB(int x, int y, ushort value) => Data[y * Width + x].B = value; - [MethodImpl(MethodImplOptions.AggressiveInlining)] - public void SetA(int x, int y, ushort value) => Data[y * Width + x].A = value; - - public void Dispose() => _pool.Return(_bufferIndex); - } -} diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs b/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs deleted file mode 100644 index 10cdefe2..00000000 --- a/Ryujinx.Graphics.Vic/Image/SurfaceCommon.cs +++ /dev/null @@ -1,33 +0,0 @@ -using Ryujinx.Common; -using Ryujinx.Graphics.Texture; - -namespace Ryujinx.Graphics.Vic.Image -{ - static class SurfaceCommon - { - public static int GetPitch(int width, int bytesPerPixel) - { - return BitUtils.AlignUp(width * bytesPerPixel, 256); - } - - public static int GetBlockLinearSize(int width, int height, int bytesPerPixel, int gobBlocksInY) - { - return SizeCalculator.GetBlockLinearTextureSize(width, height, 1, 1, 1, 1, 1, bytesPerPixel, gobBlocksInY, 1, 1).TotalSize; - } - - public static ulong ExtendOffset(uint offset) - { - return (ulong)offset << 8; - } - - public static ushort Upsample(byte value) - { - return (ushort)(value << 2); - } - - public static byte Downsample(ushort value) - { - return (byte)(value >> 2); - } - } -} diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs b/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs deleted file mode 100644 index 10fd9d8d..00000000 --- a/Ryujinx.Graphics.Vic/Image/SurfaceReader.cs +++ /dev/null @@ -1,495 +0,0 @@ -using Ryujinx.Common.Logging; -using Ryujinx.Common.Memory; -using Ryujinx.Graphics.Texture; -using Ryujinx.Graphics.Vic.Types; -using System; -using System.Runtime.CompilerServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.X86; -using static Ryujinx.Graphics.Vic.Image.SurfaceCommon; - -namespace Ryujinx.Graphics.Vic.Image -{ - static class SurfaceReader - { - public static Surface Read( - ResourceManager rm, - ref SlotConfig config, - ref SlotSurfaceConfig surfaceConfig, - ref Array8<PlaneOffsets> offsets) - { - switch (surfaceConfig.SlotPixelFormat) - { - case PixelFormat.Y8___V8U8_N420: return ReadNv12(rm, ref config, ref surfaceConfig, ref offsets); - } - - Logger.Error?.Print(LogClass.Vic, $"Unsupported pixel format \"{surfaceConfig.SlotPixelFormat}\"."); - - int lw = surfaceConfig.SlotLumaWidth + 1; - int lh = surfaceConfig.SlotLumaHeight + 1; - - return new Surface(rm.SurfacePool, lw, lh); - } - - private unsafe static Surface ReadNv12( - ResourceManager rm, - ref SlotConfig config, - ref SlotSurfaceConfig surfaceConfig, - ref Array8<PlaneOffsets> offsets) - { - InputSurface input = ReadSurface(rm, ref config, ref surfaceConfig, ref offsets, 1, 2); - - int width = input.Width; - int height = input.Height; - - int yStride = GetPitch(width, 1); - int uvStride = GetPitch(input.UvWidth, 2); - - Surface output = new Surface(rm.SurfacePool, width, height); - - if (Sse41.IsSupported) - { - Vector128<byte> shufMask = Vector128.Create( - (byte)0, (byte)2, (byte)3, (byte)1, - (byte)4, (byte)6, (byte)7, (byte)5, - (byte)8, (byte)10, (byte)11, (byte)9, - (byte)12, (byte)14, (byte)15, (byte)13); - Vector128<short> alphaMask = Vector128.Create(0xff << 24).AsInt16(); - - int yStrideGap = yStride - width; - int uvStrideGap = uvStride - input.UvWidth; - - int widthTrunc = width & ~0xf; - - fixed (Pixel* dstPtr = output.Data) - { - Pixel* op = dstPtr; - - fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1) - { - byte* i0p = src0Ptr; - - for (int y = 0; y < height; y++) - { - byte* i1p = src1Ptr + (y >> 1) * uvStride; - - int x = 0; - - for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16) - { - Vector128<short> ya0 = Sse41.ConvertToVector128Int16(i0p); - Vector128<short> ya1 = Sse41.ConvertToVector128Int16(i0p + 8); - - Vector128<byte> uv = Sse2.LoadVector128(i1p); - - Vector128<short> uv0 = Sse2.UnpackLow(uv.AsInt16(), uv.AsInt16()); - Vector128<short> uv1 = Sse2.UnpackHigh(uv.AsInt16(), uv.AsInt16()); - - Vector128<short> rgba0 = Sse2.UnpackLow(ya0, uv0); - Vector128<short> rgba1 = Sse2.UnpackHigh(ya0, uv0); - Vector128<short> rgba2 = Sse2.UnpackLow(ya1, uv1); - Vector128<short> rgba3 = Sse2.UnpackHigh(ya1, uv1); - - rgba0 = Ssse3.Shuffle(rgba0.AsByte(), shufMask).AsInt16(); - rgba1 = Ssse3.Shuffle(rgba1.AsByte(), shufMask).AsInt16(); - rgba2 = Ssse3.Shuffle(rgba2.AsByte(), shufMask).AsInt16(); - rgba3 = Ssse3.Shuffle(rgba3.AsByte(), shufMask).AsInt16(); - - rgba0 = Sse2.Or(rgba0, alphaMask); - rgba1 = Sse2.Or(rgba1, alphaMask); - rgba2 = Sse2.Or(rgba2, alphaMask); - rgba3 = Sse2.Or(rgba3, alphaMask); - - Vector128<short> rgba16_0 = Sse41.ConvertToVector128Int16(rgba0.AsByte()); - Vector128<short> rgba16_1 = Sse41.ConvertToVector128Int16(HighToLow(rgba0.AsByte())); - Vector128<short> rgba16_2 = Sse41.ConvertToVector128Int16(rgba1.AsByte()); - Vector128<short> rgba16_3 = Sse41.ConvertToVector128Int16(HighToLow(rgba1.AsByte())); - Vector128<short> rgba16_4 = Sse41.ConvertToVector128Int16(rgba2.AsByte()); - Vector128<short> rgba16_5 = Sse41.ConvertToVector128Int16(HighToLow(rgba2.AsByte())); - Vector128<short> rgba16_6 = Sse41.ConvertToVector128Int16(rgba3.AsByte()); - Vector128<short> rgba16_7 = Sse41.ConvertToVector128Int16(HighToLow(rgba3.AsByte())); - - rgba16_0 = Sse2.ShiftLeftLogical(rgba16_0, 2); - rgba16_1 = Sse2.ShiftLeftLogical(rgba16_1, 2); - rgba16_2 = Sse2.ShiftLeftLogical(rgba16_2, 2); - rgba16_3 = Sse2.ShiftLeftLogical(rgba16_3, 2); - rgba16_4 = Sse2.ShiftLeftLogical(rgba16_4, 2); - rgba16_5 = Sse2.ShiftLeftLogical(rgba16_5, 2); - rgba16_6 = Sse2.ShiftLeftLogical(rgba16_6, 2); - rgba16_7 = Sse2.ShiftLeftLogical(rgba16_7, 2); - - Sse2.Store((short*)(op + (uint)x + 0), rgba16_0); - Sse2.Store((short*)(op + (uint)x + 2), rgba16_1); - Sse2.Store((short*)(op + (uint)x + 4), rgba16_2); - Sse2.Store((short*)(op + (uint)x + 6), rgba16_3); - Sse2.Store((short*)(op + (uint)x + 8), rgba16_4); - Sse2.Store((short*)(op + (uint)x + 10), rgba16_5); - Sse2.Store((short*)(op + (uint)x + 12), rgba16_6); - Sse2.Store((short*)(op + (uint)x + 14), rgba16_7); - } - - for (; x < width; x++, i1p += (x & 1) * 2) - { - Pixel* px = op + (uint)x; - - px->R = Upsample(*i0p++); - px->G = Upsample(*i1p); - px->B = Upsample(*(i1p + 1)); - px->A = 0x3ff; - } - - op += width; - i0p += yStrideGap; - i1p += uvStrideGap; - } - } - } - } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128<int> alphaMask = Vector128.Create(0xffu << 24).AsInt32(); - - int yStrideGap = yStride - width; - int uvStrideGap = uvStride - input.UvWidth; - - int widthTrunc = width & ~0xf; - - fixed (Pixel* dstPtr = output.Data) - { - Pixel* op = dstPtr; - - fixed (byte* src0Ptr = input.Buffer0, src1Ptr = input.Buffer1) - { - byte* i0p = src0Ptr; - - for (int y = 0; y < height; y++) - { - byte* i1p = src1Ptr + (y >> 1) * uvStride; - - int x = 0; - - for (; x < widthTrunc; x += 16, i0p += 16, i1p += 16) - { - Vector128<byte> ya = AdvSimd.LoadVector128(i0p); - Vector128<byte> uv = AdvSimd.LoadVector128(i1p); - - Vector128<short> ya0 = AdvSimd.ZeroExtendWideningLower(ya.GetLower()).AsInt16(); - Vector128<short> ya1 = AdvSimd.ZeroExtendWideningUpper(ya).AsInt16(); - - Vector128<short> uv0 = AdvSimd.Arm64.ZipLow(uv.AsInt16(), uv.AsInt16()); - Vector128<short> uv1 = AdvSimd.Arm64.ZipHigh(uv.AsInt16(), uv.AsInt16()); - - ya0 = AdvSimd.ShiftLeftLogical(ya0, 8); - ya1 = AdvSimd.ShiftLeftLogical(ya1, 8); - - Vector128<short> rgba0 = AdvSimd.Arm64.ZipLow(ya0, uv0); - Vector128<short> rgba1 = AdvSimd.Arm64.ZipHigh(ya0, uv0); - Vector128<short> rgba2 = AdvSimd.Arm64.ZipLow(ya1, uv1); - Vector128<short> rgba3 = AdvSimd.Arm64.ZipHigh(ya1, uv1); - - rgba0 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba0.AsInt32(), 8).AsInt16(); - rgba1 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba1.AsInt32(), 8).AsInt16(); - rgba2 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba2.AsInt32(), 8).AsInt16(); - rgba3 = AdvSimd.ShiftRightLogicalAdd(alphaMask, rgba3.AsInt32(), 8).AsInt16(); - - Vector128<short> rgba16_0 = AdvSimd.ZeroExtendWideningLower(rgba0.AsByte().GetLower()).AsInt16(); - Vector128<short> rgba16_1 = AdvSimd.ZeroExtendWideningUpper(rgba0.AsByte()).AsInt16(); - Vector128<short> rgba16_2 = AdvSimd.ZeroExtendWideningLower(rgba1.AsByte().GetLower()).AsInt16(); - Vector128<short> rgba16_3 = AdvSimd.ZeroExtendWideningUpper(rgba1.AsByte()).AsInt16(); - Vector128<short> rgba16_4 = AdvSimd.ZeroExtendWideningLower(rgba2.AsByte().GetLower()).AsInt16(); - Vector128<short> rgba16_5 = AdvSimd.ZeroExtendWideningUpper(rgba2.AsByte()).AsInt16(); - Vector128<short> rgba16_6 = AdvSimd.ZeroExtendWideningLower(rgba3.AsByte().GetLower()).AsInt16(); - Vector128<short> rgba16_7 = AdvSimd.ZeroExtendWideningUpper(rgba3.AsByte()).AsInt16(); - - rgba16_0 = AdvSimd.ShiftLeftLogical(rgba16_0, 2); - rgba16_1 = AdvSimd.ShiftLeftLogical(rgba16_1, 2); - rgba16_2 = AdvSimd.ShiftLeftLogical(rgba16_2, 2); - rgba16_3 = AdvSimd.ShiftLeftLogical(rgba16_3, 2); - rgba16_4 = AdvSimd.ShiftLeftLogical(rgba16_4, 2); - rgba16_5 = AdvSimd.ShiftLeftLogical(rgba16_5, 2); - rgba16_6 = AdvSimd.ShiftLeftLogical(rgba16_6, 2); - rgba16_7 = AdvSimd.ShiftLeftLogical(rgba16_7, 2); - - AdvSimd.Store((short*)(op + (uint)x + 0), rgba16_0); - AdvSimd.Store((short*)(op + (uint)x + 2), rgba16_1); - AdvSimd.Store((short*)(op + (uint)x + 4), rgba16_2); - AdvSimd.Store((short*)(op + (uint)x + 6), rgba16_3); - AdvSimd.Store((short*)(op + (uint)x + 8), rgba16_4); - AdvSimd.Store((short*)(op + (uint)x + 10), rgba16_5); - AdvSimd.Store((short*)(op + (uint)x + 12), rgba16_6); - AdvSimd.Store((short*)(op + (uint)x + 14), rgba16_7); - } - - for (; x < width; x++, i1p += (x & 1) * 2) - { - Pixel* px = op + (uint)x; - - px->R = Upsample(*i0p++); - px->G = Upsample(*i1p); - px->B = Upsample(*(i1p + 1)); - px->A = 0x3ff; - } - - op += width; - i0p += yStrideGap; - i1p += uvStrideGap; - } - } - } - } - else - { - for (int y = 0; y < height; y++) - { - int uvBase = (y >> 1) * uvStride; - - for (int x = 0; x < width; x++) - { - output.SetR(x, y, Upsample(input.Buffer0[y * yStride + x])); - - int uvOffs = uvBase + (x & ~1); - - output.SetG(x, y, Upsample(input.Buffer1[uvOffs])); - output.SetB(x, y, Upsample(input.Buffer1[uvOffs + 1])); - output.SetA(x, y, 0x3ff); - } - } - } - - input.Return(rm.BufferPool); - - return output; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128<byte> HighToLow(Vector128<byte> value) - { - return Sse.MoveHighToLow(value.AsSingle(), value.AsSingle()).AsByte(); - } - - private static InputSurface ReadSurface( - ResourceManager rm, - ref SlotConfig config, - ref SlotSurfaceConfig surfaceConfig, - ref Array8<PlaneOffsets> offsets, - int bytesPerPixel, - int planes) - { - InputSurface surface = new InputSurface(); - - surface.Initialize(); - - int gobBlocksInY = 1 << surfaceConfig.SlotBlkHeight; - - bool linear = surfaceConfig.SlotBlkKind == 0; - - int lw = surfaceConfig.SlotLumaWidth + 1; - int lh = surfaceConfig.SlotLumaHeight + 1; - - int cw = surfaceConfig.SlotChromaWidth + 1; - int ch = surfaceConfig.SlotChromaHeight + 1; - - // Interlaced inputs have double the height when deinterlaced. - int heightShift = config.FrameFormat.IsField() ? 1 : 0; - - surface.Width = lw; - surface.Height = lh << heightShift; - surface.UvWidth = cw; - surface.UvHeight = ch << heightShift; - - if (planes > 0) - { - surface.SetBuffer0(ReadBuffer(rm, ref config, ref offsets, linear, 0, lw, lh, bytesPerPixel, gobBlocksInY)); - } - - if (planes > 1) - { - surface.SetBuffer1(ReadBuffer(rm, ref config, ref offsets, linear, 1, cw, ch, planes == 2 ? 2 : 1, gobBlocksInY)); - } - - if (planes > 2) - { - surface.SetBuffer2(ReadBuffer(rm, ref config, ref offsets, linear, 2, cw, ch, 1, gobBlocksInY)); - } - - return surface; - } - - private static RentedBuffer ReadBuffer( - ResourceManager rm, - scoped ref SlotConfig config, - scoped ref Array8<PlaneOffsets> offsets, - bool linear, - int plane, - int width, - int height, - int bytesPerPixel, - int gobBlocksInY) - { - FrameFormat frameFormat = config.FrameFormat; - bool isLuma = plane == 0; - bool isField = frameFormat.IsField(); - bool isTopField = frameFormat.IsTopField(isLuma); - int stride = GetPitch(width, bytesPerPixel); - uint offset = GetOffset(ref offsets[0], plane); - - int dstStart = 0; - int dstStride = stride; - - if (isField) - { - dstStart = isTopField ? 0 : stride; - dstStride = stride * 2; - } - - RentedBuffer buffer; - - if (linear) - { - buffer = ReadBufferLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel); - } - else - { - buffer = ReadBufferBlockLinear(rm, offset, width, height, dstStart, dstStride, bytesPerPixel, gobBlocksInY); - } - - if (isField || frameFormat.IsInterlaced()) - { - RentedBuffer prevBuffer = RentedBuffer.Empty; - RentedBuffer nextBuffer = RentedBuffer.Empty; - - if (config.PrevFieldEnable) - { - prevBuffer = ReadBufferNoDeinterlace(rm, ref offsets[1], linear, plane, width, height, bytesPerPixel, gobBlocksInY); - } - - if (config.NextFieldEnable) - { - nextBuffer = ReadBufferNoDeinterlace(rm, ref offsets[2], linear, plane, width, height, bytesPerPixel, gobBlocksInY); - } - - int w = width * bytesPerPixel; - - switch (config.DeinterlaceMode) - { - case DeinterlaceMode.Weave: - Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField); - break; - case DeinterlaceMode.BobField: - Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField); - break; - case DeinterlaceMode.Bob: - bool isCurrentTop = isLuma ? config.IsEven : config.ChromaEven; - Scaler.DeinterlaceBob(buffer.Data, w, stride, isCurrentTop ^ frameFormat.IsInterlacedBottomFirst()); - break; - case DeinterlaceMode.NewBob: - case DeinterlaceMode.Disi1: - Scaler.DeinterlaceMotionAdaptive(buffer.Data, prevBuffer.Data, nextBuffer.Data, w, stride, isTopField); - break; - case DeinterlaceMode.WeaveLumaBobFieldChroma: - if (isLuma) - { - Scaler.DeinterlaceWeave(buffer.Data, prevBuffer.Data, w, stride, isTopField); - } - else - { - Scaler.DeinterlaceBob(buffer.Data, w, stride, isTopField); - } - break; - default: - Logger.Error?.Print(LogClass.Vic, $"Unsupported deinterlace mode \"{config.DeinterlaceMode}\"."); - break; - } - - prevBuffer.Return(rm.BufferPool); - nextBuffer.Return(rm.BufferPool); - } - - return buffer; - } - - private static uint GetOffset(ref PlaneOffsets offsets, int plane) - { - return plane switch - { - 0 => offsets.LumaOffset, - 1 => offsets.ChromaUOffset, - 2 => offsets.ChromaVOffset, - _ => throw new ArgumentOutOfRangeException(nameof(plane)) - }; - } - - private static RentedBuffer ReadBufferNoDeinterlace( - ResourceManager rm, - ref PlaneOffsets offsets, - bool linear, - int plane, - int width, - int height, - int bytesPerPixel, - int gobBlocksInY) - { - int stride = GetPitch(width, bytesPerPixel); - uint offset = GetOffset(ref offsets, plane); - - if (linear) - { - return ReadBufferLinear(rm, offset, width, height, 0, stride, bytesPerPixel); - } - - return ReadBufferBlockLinear(rm, offset, width, height, 0, stride, bytesPerPixel, gobBlocksInY); - } - - private static RentedBuffer ReadBufferLinear( - ResourceManager rm, - uint offset, - int width, - int height, - int dstStart, - int dstStride, - int bytesPerPixel) - { - int srcStride = GetPitch(width, bytesPerPixel); - int inSize = srcStride * height; - - ReadOnlySpan<byte> src = rm.Gmm.GetSpan(ExtendOffset(offset), inSize); - - int outSize = dstStride * height; - int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer); - Span<byte> dst = buffer; - dst = dst.Slice(0, outSize); - - for (int y = 0; y < height; y++) - { - src.Slice(y * srcStride, srcStride).CopyTo(dst.Slice(dstStart + y * dstStride, srcStride)); - } - - return new RentedBuffer(dst, bufferIndex); - } - - private static RentedBuffer ReadBufferBlockLinear( - ResourceManager rm, - uint offset, - int width, - int height, - int dstStart, - int dstStride, - int bytesPerPixel, - int gobBlocksInY) - { - int inSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY); - - ReadOnlySpan<byte> src = rm.Gmm.GetSpan(ExtendOffset(offset), inSize); - - int outSize = dstStride * height; - int bufferIndex = rm.BufferPool.RentMinimum(outSize, out byte[] buffer); - Span<byte> dst = buffer; - dst = dst.Slice(0, outSize); - - LayoutConverter.ConvertBlockLinearToLinear(dst.Slice(dstStart), width, height, dstStride, bytesPerPixel, gobBlocksInY, src); - - return new RentedBuffer(dst, bufferIndex); - } - } -} diff --git a/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs b/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs deleted file mode 100644 index 37d261f9..00000000 --- a/Ryujinx.Graphics.Vic/Image/SurfaceWriter.cs +++ /dev/null @@ -1,667 +0,0 @@ -using Ryujinx.Common.Logging; -using Ryujinx.Graphics.Texture; -using Ryujinx.Graphics.Vic.Types; -using System; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.X86; -using static Ryujinx.Graphics.Vic.Image.SurfaceCommon; - -namespace Ryujinx.Graphics.Vic.Image -{ - class SurfaceWriter - { - public static void Write(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets) - { - switch (config.OutPixelFormat) - { - case PixelFormat.A8B8G8R8: - case PixelFormat.X8B8G8R8: - WriteA8B8G8R8(rm, input, ref config, ref offsets); - break; - case PixelFormat.A8R8G8B8: - WriteA8R8G8B8(rm, input, ref config, ref offsets); - break; - case PixelFormat.Y8___V8U8_N420: - WriteNv12(rm, input, ref config, ref offsets); - break; - default: - Logger.Error?.Print(LogClass.Vic, $"Unsupported pixel format \"{config.OutPixelFormat}\"."); - break; - } - } - - private unsafe static void WriteA8B8G8R8(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets) - { - int width = input.Width; - int height = input.Height; - int stride = GetPitch(width, 4); - - int dstIndex = rm.BufferPool.Rent(height * stride, out Span<byte> dst); - - if (Sse2.IsSupported) - { - int widthTrunc = width & ~7; - int strideGap = stride - width * 4; - - fixed (Pixel* srcPtr = input.Data) - { - Pixel* ip = srcPtr; - - fixed (byte* dstPtr = dst) - { - byte* op = dstPtr; - - for (int y = 0; y < height; y++, ip += input.Width) - { - int x = 0; - - for (; x < widthTrunc; x += 8) - { - Vector128<ushort> pixel12 = Sse2.LoadVector128((ushort*)(ip + (uint)x)); - Vector128<ushort> pixel34 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 2)); - Vector128<ushort> pixel56 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 4)); - Vector128<ushort> pixel78 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 6)); - - pixel12 = Sse2.ShiftRightLogical(pixel12, 2); - pixel34 = Sse2.ShiftRightLogical(pixel34, 2); - pixel56 = Sse2.ShiftRightLogical(pixel56, 2); - pixel78 = Sse2.ShiftRightLogical(pixel78, 2); - - Vector128<byte> pixel1234 = Sse2.PackUnsignedSaturate(pixel12.AsInt16(), pixel34.AsInt16()); - Vector128<byte> pixel5678 = Sse2.PackUnsignedSaturate(pixel56.AsInt16(), pixel78.AsInt16()); - - Sse2.Store(op + 0x00, pixel1234); - Sse2.Store(op + 0x10, pixel5678); - - op += 0x20; - } - - for (; x < width; x++) - { - Pixel* px = ip + (uint)x; - - *(op + 0) = Downsample(px->R); - *(op + 1) = Downsample(px->G); - *(op + 2) = Downsample(px->B); - *(op + 3) = Downsample(px->A); - - op += 4; - } - - op += strideGap; - } - } - } - } - else if (AdvSimd.IsSupported) - { - int widthTrunc = width & ~7; - int strideGap = stride - width * 4; - - fixed (Pixel* srcPtr = input.Data) - { - Pixel* ip = srcPtr; - - fixed (byte* dstPtr = dst) - { - byte* op = dstPtr; - - for (int y = 0; y < height; y++, ip += input.Width) - { - int x = 0; - - for (; x < widthTrunc; x += 8) - { - Vector128<ushort> pixel12 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x)); - Vector128<ushort> pixel34 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x + 2)); - Vector128<ushort> pixel56 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x + 4)); - Vector128<ushort> pixel78 = AdvSimd.LoadVector128((ushort*)(ip + (uint)x + 6)); - - pixel12 = AdvSimd.ShiftRightLogical(pixel12, 2); - pixel34 = AdvSimd.ShiftRightLogical(pixel34, 2); - pixel56 = AdvSimd.ShiftRightLogical(pixel56, 2); - pixel78 = AdvSimd.ShiftRightLogical(pixel78, 2); - - Vector64<byte> lower12 = AdvSimd.ExtractNarrowingLower(pixel12.AsUInt16()); - Vector64<byte> lower56 = AdvSimd.ExtractNarrowingLower(pixel56.AsUInt16()); - - Vector128<byte> pixel1234 = AdvSimd.ExtractNarrowingUpper(lower12, pixel34.AsUInt16()); - Vector128<byte> pixel5678 = AdvSimd.ExtractNarrowingUpper(lower56, pixel78.AsUInt16()); - - AdvSimd.Store(op + 0x00, pixel1234); - AdvSimd.Store(op + 0x10, pixel5678); - - op += 0x20; - } - - for (; x < width; x++) - { - Pixel* px = ip + (uint)x; - - *(op + 0) = Downsample(px->R); - *(op + 1) = Downsample(px->G); - *(op + 2) = Downsample(px->B); - *(op + 3) = Downsample(px->A); - - op += 4; - } - - op += strideGap; - } - } - } - } - else - { - for (int y = 0; y < height; y++) - { - int baseOffs = y * stride; - - for (int x = 0; x < width; x++) - { - int offs = baseOffs + x * 4; - - dst[offs + 0] = Downsample(input.GetR(x, y)); - dst[offs + 1] = Downsample(input.GetG(x, y)); - dst[offs + 2] = Downsample(input.GetB(x, y)); - dst[offs + 3] = Downsample(input.GetA(x, y)); - } - } - } - - bool outLinear = config.OutBlkKind == 0; - - int gobBlocksInY = 1 << config.OutBlkHeight; - - WriteBuffer(rm, dst, offsets.LumaOffset, outLinear, width, height, 4, gobBlocksInY); - - rm.BufferPool.Return(dstIndex); - } - - private unsafe static void WriteA8R8G8B8(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets) - { - int width = input.Width; - int height = input.Height; - int stride = GetPitch(width, 4); - - int dstIndex = rm.BufferPool.Rent(height * stride, out Span<byte> dst); - - if (Ssse3.IsSupported) - { - Vector128<byte> shuffleMask = Vector128.Create( - (byte)2, (byte)1, (byte)0, (byte)3, - (byte)6, (byte)5, (byte)4, (byte)7, - (byte)10, (byte)9, (byte)8, (byte)11, - (byte)14, (byte)13, (byte)12, (byte)15); - - int widthTrunc = width & ~7; - int strideGap = stride - width * 4; - - fixed (Pixel* srcPtr = input.Data) - { - Pixel* ip = srcPtr; - - fixed (byte* dstPtr = dst) - { - byte* op = dstPtr; - - for (int y = 0; y < height; y++, ip += input.Width) - { - int x = 0; - - for (; x < widthTrunc; x += 8) - { - Vector128<ushort> pixel12 = Sse2.LoadVector128((ushort*)(ip + (uint)x)); - Vector128<ushort> pixel34 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 2)); - Vector128<ushort> pixel56 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 4)); - Vector128<ushort> pixel78 = Sse2.LoadVector128((ushort*)(ip + (uint)x + 6)); - - pixel12 = Sse2.ShiftRightLogical(pixel12, 2); - pixel34 = Sse2.ShiftRightLogical(pixel34, 2); - pixel56 = Sse2.ShiftRightLogical(pixel56, 2); - pixel78 = Sse2.ShiftRightLogical(pixel78, 2); - - Vector128<byte> pixel1234 = Sse2.PackUnsignedSaturate(pixel12.AsInt16(), pixel34.AsInt16()); - Vector128<byte> pixel5678 = Sse2.PackUnsignedSaturate(pixel56.AsInt16(), pixel78.AsInt16()); - - pixel1234 = Ssse3.Shuffle(pixel1234, shuffleMask); - pixel5678 = Ssse3.Shuffle(pixel5678, shuffleMask); - - Sse2.Store(op + 0x00, pixel1234); - Sse2.Store(op + 0x10, pixel5678); - - op += 0x20; - } - - for (; x < width; x++) - { - Pixel* px = ip + (uint)x; - - *(op + 0) = Downsample(px->B); - *(op + 1) = Downsample(px->G); - *(op + 2) = Downsample(px->R); - *(op + 3) = Downsample(px->A); - - op += 4; - } - - op += strideGap; - } - } - } - } - else - { - for (int y = 0; y < height; y++) - { - int baseOffs = y * stride; - - for (int x = 0; x < width; x++) - { - int offs = baseOffs + x * 4; - - dst[offs + 0] = Downsample(input.GetB(x, y)); - dst[offs + 1] = Downsample(input.GetG(x, y)); - dst[offs + 2] = Downsample(input.GetR(x, y)); - dst[offs + 3] = Downsample(input.GetA(x, y)); - } - } - } - - bool outLinear = config.OutBlkKind == 0; - - int gobBlocksInY = 1 << config.OutBlkHeight; - - WriteBuffer(rm, dst, offsets.LumaOffset, outLinear, width, height, 4, gobBlocksInY); - - rm.BufferPool.Return(dstIndex); - } - - private unsafe static void WriteNv12(ResourceManager rm, Surface input, ref OutputSurfaceConfig config, ref PlaneOffsets offsets) - { - int gobBlocksInY = 1 << config.OutBlkHeight; - - bool outLinear = config.OutBlkKind == 0; - - int width = Math.Min(config.OutLumaWidth + 1, input.Width); - int height = Math.Min(config.OutLumaHeight + 1, input.Height); - int yStride = GetPitch(config.OutLumaWidth + 1, 1); - - int dstYIndex = rm.BufferPool.Rent((config.OutLumaHeight + 1) * yStride, out Span<byte> dstY); - - if (Sse41.IsSupported) - { - Vector128<ushort> mask = Vector128.Create(0xffffUL).AsUInt16(); - - int widthTrunc = width & ~0xf; - int strideGap = yStride - width; - - fixed (Pixel* srcPtr = input.Data) - { - Pixel* ip = srcPtr; - - fixed (byte* dstPtr = dstY) - { - byte* op = dstPtr; - - for (int y = 0; y < height; y++, ip += input.Width) - { - int x = 0; - - for (; x < widthTrunc; x += 16) - { - byte* baseOffset = (byte*)(ip + (ulong)(uint)x); - - Vector128<ushort> pixelp1 = Sse2.LoadVector128((ushort*)baseOffset); - Vector128<ushort> pixelp2 = Sse2.LoadVector128((ushort*)(baseOffset + 0x10)); - Vector128<ushort> pixelp3 = Sse2.LoadVector128((ushort*)(baseOffset + 0x20)); - Vector128<ushort> pixelp4 = Sse2.LoadVector128((ushort*)(baseOffset + 0x30)); - Vector128<ushort> pixelp5 = Sse2.LoadVector128((ushort*)(baseOffset + 0x40)); - Vector128<ushort> pixelp6 = Sse2.LoadVector128((ushort*)(baseOffset + 0x50)); - Vector128<ushort> pixelp7 = Sse2.LoadVector128((ushort*)(baseOffset + 0x60)); - Vector128<ushort> pixelp8 = Sse2.LoadVector128((ushort*)(baseOffset + 0x70)); - - pixelp1 = Sse2.And(pixelp1, mask); - pixelp2 = Sse2.And(pixelp2, mask); - pixelp3 = Sse2.And(pixelp3, mask); - pixelp4 = Sse2.And(pixelp4, mask); - pixelp5 = Sse2.And(pixelp5, mask); - pixelp6 = Sse2.And(pixelp6, mask); - pixelp7 = Sse2.And(pixelp7, mask); - pixelp8 = Sse2.And(pixelp8, mask); - - Vector128<ushort> pixelq1 = Sse41.PackUnsignedSaturate(pixelp1.AsInt32(), pixelp2.AsInt32()); - Vector128<ushort> pixelq2 = Sse41.PackUnsignedSaturate(pixelp3.AsInt32(), pixelp4.AsInt32()); - Vector128<ushort> pixelq3 = Sse41.PackUnsignedSaturate(pixelp5.AsInt32(), pixelp6.AsInt32()); - Vector128<ushort> pixelq4 = Sse41.PackUnsignedSaturate(pixelp7.AsInt32(), pixelp8.AsInt32()); - - pixelq1 = Sse41.PackUnsignedSaturate(pixelq1.AsInt32(), pixelq2.AsInt32()); - pixelq2 = Sse41.PackUnsignedSaturate(pixelq3.AsInt32(), pixelq4.AsInt32()); - - pixelq1 = Sse2.ShiftRightLogical(pixelq1, 2); - pixelq2 = Sse2.ShiftRightLogical(pixelq2, 2); - - Vector128<byte> pixel = Sse2.PackUnsignedSaturate(pixelq1.AsInt16(), pixelq2.AsInt16()); - - Sse2.Store(op, pixel); - - op += 0x10; - } - - for (; x < width; x++) - { - Pixel* px = ip + (uint)x; - - *op++ = Downsample(px->R); - } - - op += strideGap; - } - } - } - } - else if (AdvSimd.IsSupported) - { - Vector128<ushort> mask = Vector128.Create(0xffffUL).AsUInt16(); - - int widthTrunc = width & ~0xf; - int strideGap = yStride - width; - - fixed (Pixel* srcPtr = input.Data) - { - Pixel* ip = srcPtr; - - fixed (byte* dstPtr = dstY) - { - byte* op = dstPtr; - - for (int y = 0; y < height; y++, ip += input.Width) - { - int x = 0; - - for (; x < widthTrunc; x += 16) - { - byte* baseOffset = (byte*)(ip + (ulong)(uint)x); - - Vector128<ushort> pixelp1 = AdvSimd.LoadVector128((ushort*)baseOffset); - Vector128<ushort> pixelp2 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x10)); - Vector128<ushort> pixelp3 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x20)); - Vector128<ushort> pixelp4 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x30)); - Vector128<ushort> pixelp5 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x40)); - Vector128<ushort> pixelp6 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x50)); - Vector128<ushort> pixelp7 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x60)); - Vector128<ushort> pixelp8 = AdvSimd.LoadVector128((ushort*)(baseOffset + 0x70)); - - pixelp1 = AdvSimd.And(pixelp1, mask); - pixelp2 = AdvSimd.And(pixelp2, mask); - pixelp3 = AdvSimd.And(pixelp3, mask); - pixelp4 = AdvSimd.And(pixelp4, mask); - pixelp5 = AdvSimd.And(pixelp5, mask); - pixelp6 = AdvSimd.And(pixelp6, mask); - pixelp7 = AdvSimd.And(pixelp7, mask); - pixelp8 = AdvSimd.And(pixelp8, mask); - - Vector64<ushort> lowerp1 = AdvSimd.ExtractNarrowingLower(pixelp1.AsUInt32()); - Vector64<ushort> lowerp3 = AdvSimd.ExtractNarrowingLower(pixelp3.AsUInt32()); - Vector64<ushort> lowerp5 = AdvSimd.ExtractNarrowingLower(pixelp5.AsUInt32()); - Vector64<ushort> lowerp7 = AdvSimd.ExtractNarrowingLower(pixelp7.AsUInt32()); - - Vector128<ushort> pixelq1 = AdvSimd.ExtractNarrowingUpper(lowerp1, pixelp2.AsUInt32()); - Vector128<ushort> pixelq2 = AdvSimd.ExtractNarrowingUpper(lowerp3, pixelp4.AsUInt32()); - Vector128<ushort> pixelq3 = AdvSimd.ExtractNarrowingUpper(lowerp5, pixelp6.AsUInt32()); - Vector128<ushort> pixelq4 = AdvSimd.ExtractNarrowingUpper(lowerp7, pixelp8.AsUInt32()); - - Vector64<ushort> lowerq1 = AdvSimd.ExtractNarrowingLower(pixelq1.AsUInt32()); - Vector64<ushort> lowerq3 = AdvSimd.ExtractNarrowingLower(pixelq3.AsUInt32()); - - pixelq1 = AdvSimd.ExtractNarrowingUpper(lowerq1, pixelq2.AsUInt32()); - pixelq2 = AdvSimd.ExtractNarrowingUpper(lowerq3, pixelq4.AsUInt32()); - - pixelq1 = AdvSimd.ShiftRightLogical(pixelq1, 2); - pixelq2 = AdvSimd.ShiftRightLogical(pixelq2, 2); - - Vector64<byte> pixelLower = AdvSimd.ExtractNarrowingLower(pixelq1.AsUInt16()); - - Vector128<byte> pixel = AdvSimd.ExtractNarrowingUpper(pixelLower, pixelq2.AsUInt16()); - - AdvSimd.Store(op, pixel); - - op += 0x10; - } - - for (; x < width; x++) - { - Pixel* px = ip + (uint)x; - - *op++ = Downsample(px->R); - } - - op += strideGap; - } - } - } - } - else - { - for (int y = 0; y < height; y++) - { - for (int x = 0; x < width; x++) - { - dstY[y * yStride + x] = Downsample(input.GetR(x, y)); - } - } - } - - WriteBuffer( - rm, - dstY, - offsets.LumaOffset, - outLinear, - config.OutLumaWidth + 1, - config.OutLumaHeight + 1, - 1, - gobBlocksInY); - - rm.BufferPool.Return(dstYIndex); - - int uvWidth = Math.Min(config.OutChromaWidth + 1, (width + 1) >> 1); - int uvHeight = Math.Min(config.OutChromaHeight + 1, (height + 1) >> 1); - int uvStride = GetPitch(config.OutChromaWidth + 1, 2); - - int dstUvIndex = rm.BufferPool.Rent((config.OutChromaHeight + 1) * uvStride, out Span<byte> dstUv); - - if (Sse2.IsSupported) - { - int widthTrunc = uvWidth & ~7; - int strideGap = uvStride - uvWidth * 2; - - fixed (Pixel* srcPtr = input.Data) - { - Pixel* ip = srcPtr; - - fixed (byte* dstPtr = dstUv) - { - byte* op = dstPtr; - - for (int y = 0; y < uvHeight; y++, ip += input.Width * 2) - { - int x = 0; - - for (; x < widthTrunc; x += 8) - { - byte* baseOffset = (byte*)ip + (ulong)(uint)x * 16; - - Vector128<uint> pixel1 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x02)); - Vector128<uint> pixel2 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x12)); - Vector128<uint> pixel3 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x22)); - Vector128<uint> pixel4 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x32)); - Vector128<uint> pixel5 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x42)); - Vector128<uint> pixel6 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x52)); - Vector128<uint> pixel7 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x62)); - Vector128<uint> pixel8 = Sse2.LoadScalarVector128((uint*)(baseOffset + 0x72)); - - Vector128<uint> pixel12 = Sse2.UnpackLow(pixel1, pixel2); - Vector128<uint> pixel34 = Sse2.UnpackLow(pixel3, pixel4); - Vector128<uint> pixel56 = Sse2.UnpackLow(pixel5, pixel6); - Vector128<uint> pixel78 = Sse2.UnpackLow(pixel7, pixel8); - - Vector128<ulong> pixel1234 = Sse2.UnpackLow(pixel12.AsUInt64(), pixel34.AsUInt64()); - Vector128<ulong> pixel5678 = Sse2.UnpackLow(pixel56.AsUInt64(), pixel78.AsUInt64()); - - pixel1234 = Sse2.ShiftRightLogical(pixel1234, 2); - pixel5678 = Sse2.ShiftRightLogical(pixel5678, 2); - - Vector128<byte> pixel = Sse2.PackUnsignedSaturate(pixel1234.AsInt16(), pixel5678.AsInt16()); - - Sse2.Store(op, pixel); - - op += 0x10; - } - - for (; x < uvWidth; x++) - { - Pixel* px = ip + (uint)(x << 1); - - *op++ = Downsample(px->G); - *op++ = Downsample(px->B); - } - - op += strideGap; - } - } - } - } - else if (AdvSimd.Arm64.IsSupported) - { - int widthTrunc = uvWidth & ~7; - int strideGap = uvStride - uvWidth * 2; - - fixed (Pixel* srcPtr = input.Data) - { - Pixel* ip = srcPtr; - - fixed (byte* dstPtr = dstUv) - { - byte* op = dstPtr; - - for (int y = 0; y < uvHeight; y++, ip += input.Width * 2) - { - int x = 0; - - for (; x < widthTrunc; x += 8) - { - byte* baseOffset = (byte*)ip + (ulong)(uint)x * 16; - - Vector128<uint> pixel1 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x02)); - Vector128<uint> pixel2 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x12)); - Vector128<uint> pixel3 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x22)); - Vector128<uint> pixel4 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x32)); - Vector128<uint> pixel5 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x42)); - Vector128<uint> pixel6 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x52)); - Vector128<uint> pixel7 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x62)); - Vector128<uint> pixel8 = AdvSimd.LoadAndReplicateToVector128((uint*)(baseOffset + 0x72)); - - Vector128<uint> pixel12 = AdvSimd.Arm64.ZipLow(pixel1, pixel2); - Vector128<uint> pixel34 = AdvSimd.Arm64.ZipLow(pixel3, pixel4); - Vector128<uint> pixel56 = AdvSimd.Arm64.ZipLow(pixel5, pixel6); - Vector128<uint> pixel78 = AdvSimd.Arm64.ZipLow(pixel7, pixel8); - - Vector128<ulong> pixel1234 = AdvSimd.Arm64.ZipLow(pixel12.AsUInt64(), pixel34.AsUInt64()); - Vector128<ulong> pixel5678 = AdvSimd.Arm64.ZipLow(pixel56.AsUInt64(), pixel78.AsUInt64()); - - pixel1234 = AdvSimd.ShiftRightLogical(pixel1234, 2); - pixel5678 = AdvSimd.ShiftRightLogical(pixel5678, 2); - - Vector64<byte> pixelLower = AdvSimd.ExtractNarrowingLower(pixel1234.AsUInt16()); - - Vector128<byte> pixel = AdvSimd.ExtractNarrowingUpper(pixelLower, pixel5678.AsUInt16()); - - AdvSimd.Store(op, pixel); - - op += 0x10; - } - - for (; x < uvWidth; x++) - { - Pixel* px = ip + (uint)(x << 1); - - *op++ = Downsample(px->G); - *op++ = Downsample(px->B); - } - - op += strideGap; - } - } - } - } - else - { - for (int y = 0; y < uvHeight; y++) - { - for (int x = 0; x < uvWidth; x++) - { - int xx = x << 1; - int yy = y << 1; - - int uvOffs = y * uvStride + xx; - - dstUv[uvOffs + 0] = Downsample(input.GetG(xx, yy)); - dstUv[uvOffs + 1] = Downsample(input.GetB(xx, yy)); - } - } - } - - WriteBuffer( - rm, - dstUv, - offsets.ChromaUOffset, - outLinear, - config.OutChromaWidth + 1, - config.OutChromaHeight + 1, 2, - gobBlocksInY); - - rm.BufferPool.Return(dstUvIndex); - } - - private static void WriteBuffer( - ResourceManager rm, - ReadOnlySpan<byte> src, - uint offset, - bool linear, - int width, - int height, - int bytesPerPixel, - int gobBlocksInY) - { - if (linear) - { - rm.Gmm.WriteMapped(ExtendOffset(offset), src); - return; - } - - WriteBuffer(rm, src, offset, width, height, bytesPerPixel, gobBlocksInY); - } - - private static void WriteBuffer( - ResourceManager rm, - ReadOnlySpan<byte> src, - uint offset, - int width, - int height, - int bytesPerPixel, - int gobBlocksInY) - { - int outSize = GetBlockLinearSize(width, height, bytesPerPixel, gobBlocksInY); - int dstStride = GetPitch(width, bytesPerPixel); - - int dstIndex = rm.BufferPool.Rent(outSize, out Span<byte> dst); - - LayoutConverter.ConvertLinearToBlockLinear(dst, width, height, dstStride, bytesPerPixel, gobBlocksInY, src); - - rm.Gmm.WriteMapped(ExtendOffset(offset), dst); - - rm.BufferPool.Return(dstIndex); - } - } -} |
