From bea1fc2e8d40ec792964852f57e7b884dfbd8306 Mon Sep 17 00:00:00 2001 From: riperiperi Date: Sat, 13 Jun 2020 23:31:06 +0100 Subject: Optimize texture format conversion, and MethodCopyBuffer (#1274) * Improve performance when converting texture formats. Still more work to do. * Speed up buffer -> texture copies. No longer copies byte by byte. Fast path when formats are identical. * Fix a few things, 64 byte block fast copy. * Spacing cleanup, unrelated change. * Fix base offset calculation for region copies. * Fix Linear -> BlockLinear * Fix some nits. (part 1 of review feedback) * Use a generic version of the Convert* functions rather than lambdas. This is some real monkey's paw shit. * Remove unnecessary span constructor. * Revert "Use a generic version of the Convert* functions rather than lambdas." This reverts commit aa43dcfbe8bba291eea4e10c68569af7a56a5851. * Fix bug with rectangle destination writing, better rectangle calculation for linear textures. --- Ryujinx.Graphics.Texture/LayoutConverter.cs | 194 ++++++++++++++++++++-------- 1 file changed, 141 insertions(+), 53 deletions(-) (limited to 'Ryujinx.Graphics.Texture/LayoutConverter.cs') diff --git a/Ryujinx.Graphics.Texture/LayoutConverter.cs b/Ryujinx.Graphics.Texture/LayoutConverter.cs index ce2b37b5..525271c4 100644 --- a/Ryujinx.Graphics.Texture/LayoutConverter.cs +++ b/Ryujinx.Graphics.Texture/LayoutConverter.cs @@ -1,6 +1,6 @@ using Ryujinx.Common; using System; - +using System.Runtime.Intrinsics; using static Ryujinx.Graphics.Texture.BlockLinearConstants; namespace Ryujinx.Graphics.Texture @@ -64,11 +64,14 @@ namespace Ryujinx.Graphics.Texture } int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); + int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); int xStart = strideTrunc / bytesPerPixel; int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); + int outStrideGap = stride - w * bytesPerPixel; + int alignment = gobWidth; if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) @@ -86,36 +89,74 @@ namespace Ryujinx.Graphics.Texture mipGobBlocksInZ, bytesPerPixel); - for (int layer = 0; layer < layers; layer++) + unsafe bool Convert(Span output, ReadOnlySpan data) where T : unmanaged { - int inBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); - - for (int z = 0; z < d; z++) - for (int y = 0; y < h; y++) + fixed (byte* outputPtr = output, dataPtr = data) { - for (int x = 0; x < strideTrunc; x += 16) + byte* outPtr = outputPtr + outOffs; + for (int layer = 0; layer < layers; layer++) { - int offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset(x, y, z); - - Span dest = output.Slice(outOffs + x, 16); - - data.Slice(offset, 16).CopyTo(dest); - } - - for (int x = xStart; x < w; x++) - { - int offset = inBaseOffset + layoutConverter.GetOffset(x, y, z); - - Span dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel); - - data.Slice(offset, bytesPerPixel).CopyTo(dest); + byte* inBaseOffset = dataPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); + + for (int z = 0; z < d; z++) + { + layoutConverter.SetZ(z); + for (int y = 0; y < h; y++) + { + layoutConverter.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, outPtr += 64) + { + byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128 value = *(Vector128*)offset; + Vector128 value2 = *(Vector128*)offset2; + Vector128 value3 = *(Vector128*)offset3; + Vector128 value4 = *(Vector128*)offset4; + + *(Vector128*)outPtr = value; + *(Vector128*)(outPtr + 16) = value2; + *(Vector128*)(outPtr + 32) = value3; + *(Vector128*)(outPtr + 48) = value4; + } + + for (int x = strideTrunc64; x < strideTrunc; x += 16, outPtr += 16) + { + byte* offset = inBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); + + *(Vector128*)outPtr = *(Vector128*)offset; + } + + for (int x = xStart; x < w; x++, outPtr += bytesPerPixel) + { + byte* offset = inBaseOffset + layoutConverter.GetOffset(x); + + *(T*)outPtr = *(T*)offset; + } + + outPtr += outStrideGap; + } + } } - - outOffs += stride; + outOffs += stride * h * d * layers; } + return true; } - } + bool _ = bytesPerPixel switch + { + 1 => Convert(output, data), + 2 => Convert(output, data), + 4 => Convert(output, data), + 8 => Convert(output, data), + 12 => Convert(output, data), + 16 => Convert>(output, data), + _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") + }; + } return output; } @@ -132,22 +173,18 @@ namespace Ryujinx.Graphics.Texture int h = BitUtils.DivRoundUp(height, blockHeight); int outStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); + int lineSize = w * bytesPerPixel; Span output = new byte[h * outStride]; int outOffs = 0; + int inOffs = 0; for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) - { - int offset = y * stride + x * bytesPerPixel; - - Span dest = output.Slice(outOffs + x * bytesPerPixel, bytesPerPixel); - - data.Slice(offset, bytesPerPixel).CopyTo(dest); - } + data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize)); + inOffs += stride; outOffs += outStride; } @@ -198,8 +235,15 @@ namespace Ryujinx.Graphics.Texture mipGobBlocksInZ >>= 1; } + int strideTrunc = BitUtils.AlignDown(w * bytesPerPixel, 16); + int strideTrunc64 = BitUtils.AlignDown(w * bytesPerPixel, 64); + + int xStart = strideTrunc / bytesPerPixel; + int stride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); + int inStrideGap = stride - w * bytesPerPixel; + int alignment = gobWidth; if (d < gobBlocksInZ || w <= gobWidth || h <= gobHeight) @@ -217,25 +261,73 @@ namespace Ryujinx.Graphics.Texture mipGobBlocksInZ, bytesPerPixel); - for (int layer = 0; layer < layers; layer++) + unsafe bool Convert(Span output, ReadOnlySpan data) where T : unmanaged { - int outBaseOffset = layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level); - - for (int z = 0; z < d; z++) - for (int y = 0; y < h; y++) + fixed (byte* outputPtr = output, dataPtr = data) { - for (int x = 0; x < w; x++) + byte* inPtr = dataPtr + inOffs; + for (int layer = 0; layer < layers; layer++) { - int offset = outBaseOffset + layoutConverter.GetOffset(x, y, z); - - Span dest = output.Slice(offset, bytesPerPixel); - - data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest); + byte* outBaseOffset = outputPtr + (layer * sizeInfo.LayerSize + sizeInfo.GetMipOffset(level)); + + for (int z = 0; z < d; z++) + { + layoutConverter.SetZ(z); + for (int y = 0; y < h; y++) + { + layoutConverter.SetY(y); + + for (int x = 0; x < strideTrunc64; x += 64, inPtr += 64) + { + byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset64(x); + byte* offset2 = offset + 0x20; + byte* offset3 = offset + 0x100; + byte* offset4 = offset + 0x120; + + Vector128 value = *(Vector128*)inPtr; + Vector128 value2 = *(Vector128*)(inPtr + 16); + Vector128 value3 = *(Vector128*)(inPtr + 32); + Vector128 value4 = *(Vector128*)(inPtr + 48); + + *(Vector128*)offset = value; + *(Vector128*)offset2 = value2; + *(Vector128*)offset3 = value3; + *(Vector128*)offset4 = value4; + } + + for (int x = strideTrunc64; x < strideTrunc; x += 16, inPtr += 16) + { + byte* offset = outBaseOffset + layoutConverter.GetOffsetWithLineOffset16(x); + + *(Vector128*)offset = *(Vector128*)inPtr; + } + + for (int x = xStart; x < w; x++, inPtr += bytesPerPixel) + { + byte* offset = outBaseOffset + layoutConverter.GetOffset(x); + + *(T*)offset = *(T*)inPtr; + } + + inPtr += inStrideGap; + } + } } - - inOffs += stride; + inOffs += stride * h * d * layers; } + return true; } + + bool _ = bytesPerPixel switch + { + 1 => Convert(output, data), + 2 => Convert(output, data), + 4 => Convert(output, data), + 8 => Convert(output, data), + 12 => Convert(output, data), + 16 => Convert>(output, data), + _ => throw new NotSupportedException($"Unable to convert ${bytesPerPixel} bpp pixel format.") + }; } return output; @@ -254,23 +346,19 @@ namespace Ryujinx.Graphics.Texture int h = BitUtils.DivRoundUp(height, blockHeight); int inStride = BitUtils.AlignUp(w * bytesPerPixel, HostStrideAlignment); + int lineSize = width * bytesPerPixel; Span output = new byte[h * stride]; int inOffs = 0; + int outOffs = 0; for (int y = 0; y < h; y++) { - for (int x = 0; x < w; x++) - { - int offset = y * stride + x * bytesPerPixel; - - Span dest = output.Slice(offset, bytesPerPixel); - - data.Slice(inOffs + x * bytesPerPixel, bytesPerPixel).CopyTo(dest); - } + data.Slice(inOffs, lineSize).CopyTo(output.Slice(outOffs, lineSize)); inOffs += inStride; + outOffs += stride; } return output; -- cgit v1.2.3