diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2020-07-12 00:07:01 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-07-12 05:07:01 +0200 |
| commit | 4d02a2d2c0451b4de1f6de3bbce54c457cacebe2 (patch) | |
| tree | 120fe4fb8cfa1ac1c6ef4c97d92be47e955e8c0f /Ryujinx.Graphics.Texture | |
| parent | 38b26cf4242999fa7d8c550993ac0940cd03d55f (diff) | |
New NVDEC and VIC implementation (#1384)
* Initial NVDEC and VIC implementation
* Update FFmpeg.AutoGen to 4.3.0
* Add nvdec dependencies for Windows
* Unify some VP9 structures
* Rename VP9 structure fields
* Improvements to Video API
* XML docs for Common.Memory
* Remove now unused or redundant overloads from MemoryAccessor
* NVDEC UV surface read/write scalar paths
* Add FIXME comments about hacky things/stuff that will need to be fixed in the future
* Cleaned up VP9 memory allocation
* Remove some debug logs
* Rename some VP9 structs
* Remove unused struct
* No need to compile Ryujinx.Graphics.Host1x with unsafe anymore
* Name AsyncWorkQueue threads to make debugging easier
* Make Vp9PictureInfo a ref struct
* LayoutConverter no longer needs the depth argument (broken by rebase)
* Pooling of VP9 buffers, plus fix a memory leak on VP9
* Really wish VS could rename projects properly...
* Address feedback
* Remove using
* Catch OperationCanceledException
* Add licensing informations
* Add THIRDPARTY.md to release too
Co-authored-by: Thog <me@thog.eu>
Diffstat (limited to 'Ryujinx.Graphics.Texture')
| -rw-r--r-- | Ryujinx.Graphics.Texture/LayoutConverter.cs | 168 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Texture/OffsetCalculator.cs | 13 |
2 files changed, 181 insertions, 0 deletions
diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs index 2ad57d59..ed046fb5 100644 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs @@ -9,6 +9,90 @@ namespace Ryujinx.Graphics.Texture { private const int HostStrideAlignment = 4; + public static void ConvertBlockLinearToLinear( + Span<byte> dst, + int width, + int height, + int stride, + int bytesPerPixel, + int gobBlocksInY, + ReadOnlySpan<byte> data) + { + int gobHeight = gobBlocksInY * GobHeight; + + int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16); + int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64); + + int xStart = strideTrunc / bytesPerPixel; + + int outStrideGap = stride - width * bytesPerPixel; + + int alignment = GobStride / bytesPerPixel; + + int wAligned = BitUtils.AlignUp(width, alignment); + + BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel); + + unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged + { + fixed (byte* outputPtr = output, dataPtr = data) + { + byte* outPtr = outputPtr; + + for (int y = 0; y < height; y++) + { + layoutConverter.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) + { + byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128<byte> value = *(Vector128<byte>*)offset; + Vector128<byte> value2 = *(Vector128<byte>*)offset2; + Vector128<byte> value3 = *(Vector128<byte>*)offset3; + Vector128<byte> value4 = *(Vector128<byte>*)offset4; + + *(Vector128<byte>*)outPtr = value; + *(Vector128<byte>*)(outPtr + 16) = value2; + *(Vector128<byte>*)(outPtr + 32) = value3; + *(Vector128<byte>*)(outPtr + 48) = value4; + } + + for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16) + { + byte* offset = dataPtr + layoutConverter.GetOffsetWithLineOffset16(x); + + *(Vector128<byte>*)outPtr = *(Vector128<byte>*)offset; + } + + for (int x = xStart; x < width; x++, outPtr += bytesPerPixel) + { + byte* offset = dataPtr + layoutConverter.GetOffset(x); + + *(T*)outPtr = *(T*)offset; + } + + outPtr += outStrideGap; + } + } + return true; + } + + bool _ = bytesPerPixel switch + { + 1 => Convert<byte>(dst, data), + 2 => Convert<ushort>(dst, data), + 4 => Convert<uint>(dst, data), + 8 => Convert<ulong>(dst, data), + 12 => Convert<Bpp12Pixel>(dst, data), + 16 => Convert<Vector128<byte>>(dst, data), + _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") + }; + } + public static Span<byte> ConvertBlockLinearToLinear( int width, int height, @@ -190,6 +274,90 @@ namespace Ryujinx.Graphics.Texture return output; } + public static void ConvertLinearToBlockLinear( + Span<byte> dst, + int width, + int height, + int stride, + int bytesPerPixel, + int gobBlocksInY, + ReadOnlySpan<byte> data) + { + int gobHeight = gobBlocksInY * GobHeight; + + int strideTrunc = BitUtils.AlignDown(width * bytesPerPixel, 16); + int strideTrunc64 = BitUtils.AlignDown(width * bytesPerPixel, 64); + + int xStart = strideTrunc / bytesPerPixel; + + int inStrideGap = stride - width * bytesPerPixel; + + int alignment = GobStride / bytesPerPixel; + + int wAligned = BitUtils.AlignUp(width, alignment); + + BlockLinearLayout layoutConverter = new BlockLinearLayout(wAligned, height, gobBlocksInY, 1, bytesPerPixel); + + unsafe bool Convert<T>(Span<byte> output, ReadOnlySpan<byte> data) where T : unmanaged + { + fixed (byte* outputPtr = output, dataPtr = data) + { + byte* inPtr = dataPtr; + + for (int y = 0; y < height; y++) + { + layoutConverter.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) + { + byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128<byte> value = *(Vector128<byte>*)inPtr; + Vector128<byte> value2 = *(Vector128<byte>*)(inPtr + 16); + Vector128<byte> value3 = *(Vector128<byte>*)(inPtr + 32); + Vector128<byte> value4 = *(Vector128<byte>*)(inPtr + 48); + + *(Vector128<byte>*)offset = value; + *(Vector128<byte>*)offset2 = value2; + *(Vector128<byte>*)offset3 = value3; + *(Vector128<byte>*)offset4 = value4; + } + + for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16) + { + byte* offset = outputPtr + layoutConverter.GetOffsetWithLineOffset16(x); + + *(Vector128<byte>*)offset = *(Vector128<byte>*)inPtr; + } + + for (int x = xStart; x < width; x++, inPtr += bytesPerPixel) + { + byte* offset = outputPtr + layoutConverter.GetOffset(x); + + *(T*)offset = *(T*)inPtr; + } + + inPtr += inStrideGap; + } + } + return true; + } + + bool _ = bytesPerPixel switch + { + 1 => Convert<byte>(dst, data), + 2 => Convert<ushort>(dst, data), + 4 => Convert<uint>(dst, data), + 8 => Convert<ulong>(dst, data), + 12 => Convert<Bpp12Pixel>(dst, data), + 16 => Convert<Vector128<byte>>(dst, data), + _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") + }; + } + public static Span<byte> ConvertLinearToBlockLinear( int width, int height, diff --git a/Ryujinx.Graphics.Texture/OffsetCalculator.cs b/Ryujinx.Graphics.Texture/OffsetCalculator.cs index 6d283954..dd4b6e7f 100644 --- a/Ryujinx.Graphics.Texture/OffsetCalculator.cs +++ b/Ryujinx.Graphics.Texture/OffsetCalculator.cs @@ -94,6 +94,19 @@ namespace Ryujinx.Graphics.Texture } } + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public int GetOffsetWithLineOffset64(int x) + { + if (_isLinear) + { + return x + _yPart; + } + else + { + return _layoutConverter.GetOffsetWithLineOffset64(x); + } + } + public (int offset, int size) GetRectangleRange(int x, int y, int width, int height) { if (_isLinear) |
