diff options
| author | riperiperi <rhy3756547@hotmail.com> | 2020-06-13 23:31:06 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2020-06-13 19:31:06 -0300 |
| commit | bea1fc2e8d40ec792964852f57e7b884dfbd8306 (patch) | |
| tree | 8cbbb5bb49d8c1ff635ddd1bb81e58f39a1b7eaf /Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs | |
| parent | ce983f360b3353bbcd73f3e58c24a23a22e1c94d (diff) | |
Optimize texture format conversion, and MethodCopyBuffer (#1274)
* Improve performance when converting texture formats.
Still more work to do.
* Speed up buffer -> texture copies.
No longer copies byte by byte. Fast path when formats are identical.
* Fix a few things, 64 byte block fast copy.
* Spacing cleanup, unrelated change.
* Fix base offset calculation for region copies.
* Fix Linear -> BlockLinear
* Fix some nits. (part 1 of review feedback)
* Use a generic version of the Convert* functions rather than lambdas.
This is some real monkey's paw shit.
* Remove unnecessary span constructor.
* Revert "Use a generic version of the Convert* functions rather than lambdas."
This reverts commit aa43dcfbe8bba291eea4e10c68569af7a56a5851.
* Fix bug with rectangle destination writing, better rectangle calculation for linear textures.
Diffstat (limited to 'Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs')
| -rw-r--r-- | Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs | 56 |
1 files changed, 48 insertions, 8 deletions
diff --git a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs b/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs index 7244db32..2e6fe0ab 100644 --- a/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs +++ b/Ryujinx.Graphics.Gpu/Engine/MethodCopyBuffer.cs @@ -1,6 +1,7 @@ using Ryujinx.Graphics.Gpu.State; using Ryujinx.Graphics.Texture; using System; +using System.Runtime.Intrinsics; namespace Ryujinx.Graphics.Gpu.Engine { @@ -56,19 +57,58 @@ namespace Ryujinx.Graphics.Gpu.Engine ulong srcBaseAddress = _context.MemoryManager.Translate(cbp.SrcAddress.Pack()); ulong dstBaseAddress = _context.MemoryManager.Translate(cbp.DstAddress.Pack()); - for (int y = 0; y < cbp.YCount; y++) - for (int x = 0; x < cbp.XCount; x++) + (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(src.RegionX, src.RegionY, cbp.XCount, cbp.YCount); + (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dst.RegionX, dst.RegionY, cbp.XCount, cbp.YCount); + + ReadOnlySpan<byte> srcSpan = _context.PhysicalMemory.GetSpan(srcBaseAddress + (ulong)srcBaseOffset, srcSize); + Span<byte> dstSpan = _context.PhysicalMemory.GetSpan(dstBaseAddress + (ulong)dstBaseOffset, dstSize).ToArray(); + + bool completeSource = src.RegionX == 0 && src.RegionY == 0 && src.Width == cbp.XCount && src.Height == cbp.YCount; + bool completeDest = dst.RegionX == 0 && dst.RegionY == 0 && dst.Width == cbp.XCount && dst.Height == cbp.YCount; + + if (completeSource && completeDest && srcCalculator.LayoutMatches(dstCalculator)) { - int srcOffset = srcCalculator.GetOffset(src.RegionX + x, src.RegionY + y); - int dstOffset = dstCalculator.GetOffset(dst.RegionX + x, dst.RegionY + y); + srcSpan.CopyTo(dstSpan); // No layout conversion has to be performed, just copy the data entirely. + } + else + { + unsafe bool Convert<T>(Span<byte> dstSpan, ReadOnlySpan<byte> srcSpan) where T : unmanaged + { + fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) + { + byte* dstBase = dstPtr - dstBaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. + byte* srcBase = srcPtr - srcBaseOffset; + + for (int y = 0; y < cbp.YCount; y++) + { + srcCalculator.SetY(src.RegionY + y); + dstCalculator.SetY(dst.RegionY + y); - ulong srcAddress = srcBaseAddress + (ulong)srcOffset; - ulong dstAddress = dstBaseAddress + (ulong)dstOffset; + for (int x = 0; x < cbp.XCount; x++) + { + int srcOffset = srcCalculator.GetOffset(src.RegionX + x); + int dstOffset = dstCalculator.GetOffset(dst.RegionX + x); - ReadOnlySpan<byte> pixel = _context.PhysicalMemory.GetSpan(srcAddress, srcBpp); + *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset); + } + } + } + return true; + } - _context.PhysicalMemory.Write(dstAddress, pixel); + bool _ = srcBpp switch + { + 1 => Convert<byte>(dstSpan, srcSpan), + 2 => Convert<ushort>(dstSpan, srcSpan), + 4 => Convert<uint>(dstSpan, srcSpan), + 8 => Convert<ulong>(dstSpan, srcSpan), + 12 => Convert<Bpp12Pixel>(dstSpan, srcSpan), + 16 => Convert<Vector128<byte>>(dstSpan, srcSpan), + _ => throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format.") + }; } + + _context.PhysicalMemory.Write(dstBaseAddress + (ulong)dstBaseOffset, dstSpan); } else { |
