From cee712105850ac3385cd0091a923438167433f9f Mon Sep 17 00:00:00 2001 From: TSR Berry <20988865+TSRBerry@users.noreply.github.com> Date: Sat, 8 Apr 2023 01:22:00 +0200 Subject: Move solution and projects to src --- src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs | 635 ++++++++++++++++++++++++ 1 file changed, 635 insertions(+) create mode 100644 src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs (limited to 'src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs') diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs new file mode 100644 index 00000000..fd93cd8b --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Dma/DmaClass.cs @@ -0,0 +1,635 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Memory; +using Ryujinx.Graphics.Texture; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Graphics.Gpu.Engine.Dma +{ + /// + /// Represents a DMA copy engine class. + /// + class DmaClass : IDeviceState + { + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly ThreedClass _3dEngine; + private readonly DeviceState _state; + + /// + /// Copy flags passed on DMA launch. + /// + [Flags] + private enum CopyFlags + { + SrcLinear = 1 << 7, + DstLinear = 1 << 8, + MultiLineEnable = 1 << 9, + RemapEnable = 1 << 10 + } + + /// + /// Texture parameters for copy. + /// + private struct TextureParams + { + /// + /// Copy region X coordinate. + /// + public readonly int RegionX; + + /// + /// Copy region Y coordinate. + /// + public readonly int RegionY; + + /// + /// Offset from the base pointer of the data in memory. + /// + public readonly int BaseOffset; + + /// + /// Bytes per pixel. + /// + public readonly int Bpp; + + /// + /// Whether the texture is linear. If false, the texture is block linear. + /// + public readonly bool Linear; + + /// + /// Pixel offset from XYZ coordinates calculator. + /// + public readonly OffsetCalculator Calculator; + + /// + /// Creates texture parameters. + /// + /// Copy region X coordinate + /// Copy region Y coordinate + /// Offset from the base pointer of the data in memory + /// Bytes per pixel + /// Whether the texture is linear. If false, the texture is block linear + /// Pixel offset from XYZ coordinates calculator + public TextureParams(int regionX, int regionY, int baseOffset, int bpp, bool linear, OffsetCalculator calculator) + { + RegionX = regionX; + RegionY = regionY; + BaseOffset = baseOffset; + Bpp = bpp; + Linear = linear; + Calculator = calculator; + } + } + + [StructLayout(LayoutKind.Sequential, Size = 3, Pack = 1)] + private struct UInt24 + { + public byte Byte0; + public byte Byte1; + public byte Byte2; + } + + /// + /// Creates a new instance of the DMA copy engine class. + /// + /// GPU context + /// GPU channel + /// 3D engine + public DmaClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine) + { + _context = context; + _channel = channel; + _3dEngine = threedEngine; + _state = new DeviceState(new Dictionary + { + { nameof(DmaClassState.LaunchDma), new RwCallback(LaunchDma, null) } + }); + } + + /// + /// Reads data from the class registers. + /// + /// Register byte offset + /// Data at the specified offset + public int Read(int offset) => _state.Read(offset); + + /// + /// Writes data to the class registers. + /// + /// Register byte offset + /// Data to be written + public void Write(int offset, int data) => _state.Write(offset, data); + + /// + /// Determine if a buffer-to-texture region covers the entirety of a texture. + /// + /// Texture to compare + /// True if the texture is linear, false if block linear + /// Texture bytes per pixel + /// Texture stride + /// Number of pixels to be copied + /// Number of lines to be copied + /// + private static bool IsTextureCopyComplete(DmaTexture tex, bool linear, int bpp, int stride, int xCount, int yCount) + { + if (linear) + { + // If the stride is negative, the texture has to be flipped, so + // the fast copy is not trivial, use the slow path. + if (stride <= 0) + { + return false; + } + + int alignWidth = Constants.StrideAlignment / bpp; + return stride / bpp == BitUtils.AlignUp(xCount, alignWidth); + } + else + { + int alignWidth = Constants.GobAlignment / bpp; + return tex.RegionX == 0 && + tex.RegionY == 0 && + tex.Width == BitUtils.AlignUp(xCount, alignWidth) && + tex.Height == yCount; + } + } + + /// + /// Releases a semaphore for a given LaunchDma method call. + /// + /// The LaunchDma call argument + private void ReleaseSemaphore(int argument) + { + LaunchDmaSemaphoreType type = (LaunchDmaSemaphoreType)((argument >> 3) & 0x3); + if (type != LaunchDmaSemaphoreType.None) + { + ulong address = ((ulong)_state.State.SetSemaphoreA << 32) | _state.State.SetSemaphoreB; + if (type == LaunchDmaSemaphoreType.ReleaseOneWordSemaphore) + { + _channel.MemoryManager.Write(address, _state.State.SetSemaphorePayload); + } + else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */ + { + _channel.MemoryManager.Write(address + 8, _context.GetTimestamp()); + _channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload); + } + } + } + + /// + /// Performs a buffer to buffer, or buffer to texture copy. + /// + /// The LaunchDma call argument + private void DmaCopy(int argument) + { + var memoryManager = _channel.MemoryManager; + + CopyFlags copyFlags = (CopyFlags)argument; + + bool srcLinear = copyFlags.HasFlag(CopyFlags.SrcLinear); + bool dstLinear = copyFlags.HasFlag(CopyFlags.DstLinear); + bool copy2D = copyFlags.HasFlag(CopyFlags.MultiLineEnable); + bool remap = copyFlags.HasFlag(CopyFlags.RemapEnable); + + uint size = _state.State.LineLengthIn; + + if (size == 0) + { + return; + } + + ulong srcGpuVa = ((ulong)_state.State.OffsetInUpperUpper << 32) | _state.State.OffsetInLower; + ulong dstGpuVa = ((ulong)_state.State.OffsetOutUpperUpper << 32) | _state.State.OffsetOutLower; + + int xCount = (int)_state.State.LineLengthIn; + int yCount = (int)_state.State.LineCount; + + _3dEngine.CreatePendingSyncs(); + _3dEngine.FlushUboDirty(); + + if (copy2D) + { + // Buffer to texture copy. + int componentSize = (int)_state.State.SetRemapComponentsComponentSize + 1; + int srcComponents = (int)_state.State.SetRemapComponentsNumSrcComponents + 1; + int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1; + int srcBpp = remap ? srcComponents * componentSize : 1; + int dstBpp = remap ? dstComponents * componentSize : 1; + + var dst = Unsafe.As(ref _state.State.SetDstBlockSize); + var src = Unsafe.As(ref _state.State.SetSrcBlockSize); + + int srcRegionX = 0, srcRegionY = 0, dstRegionX = 0, dstRegionY = 0; + + if (!srcLinear) + { + srcRegionX = src.RegionX; + srcRegionY = src.RegionY; + } + + if (!dstLinear) + { + dstRegionX = dst.RegionX; + dstRegionY = dst.RegionY; + } + + int srcStride = (int)_state.State.PitchIn; + int dstStride = (int)_state.State.PitchOut; + + var srcCalculator = new OffsetCalculator( + src.Width, + src.Height, + srcStride, + srcLinear, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ(), + srcBpp); + + var dstCalculator = new OffsetCalculator( + dst.Width, + dst.Height, + dstStride, + dstLinear, + dst.MemoryLayout.UnpackGobBlocksInY(), + dst.MemoryLayout.UnpackGobBlocksInZ(), + dstBpp); + + (int srcBaseOffset, int srcSize) = srcCalculator.GetRectangleRange(srcRegionX, srcRegionY, xCount, yCount); + (int dstBaseOffset, int dstSize) = dstCalculator.GetRectangleRange(dstRegionX, dstRegionY, xCount, yCount); + + if (srcLinear && srcStride < 0) + { + srcBaseOffset += srcStride * (yCount - 1); + } + + if (dstLinear && dstStride < 0) + { + dstBaseOffset += dstStride * (yCount - 1); + } + + ReadOnlySpan srcSpan = memoryManager.GetSpan(srcGpuVa + (ulong)srcBaseOffset, srcSize, true); + + bool completeSource = IsTextureCopyComplete(src, srcLinear, srcBpp, srcStride, xCount, yCount); + bool completeDest = IsTextureCopyComplete(dst, dstLinear, dstBpp, dstStride, xCount, yCount); + + if (completeSource && completeDest) + { + var target = memoryManager.Physical.TextureCache.FindTexture( + memoryManager, + dstGpuVa, + dstBpp, + dstStride, + dst.Height, + xCount, + yCount, + dstLinear, + dst.MemoryLayout.UnpackGobBlocksInY(), + dst.MemoryLayout.UnpackGobBlocksInZ()); + + if (target != null) + { + byte[] data; + if (srcLinear) + { + data = LayoutConverter.ConvertLinearStridedToLinear( + target.Info.Width, + target.Info.Height, + 1, + 1, + xCount * srcBpp, + srcStride, + target.Info.FormatInfo.BytesPerPixel, + srcSpan); + } + else + { + data = LayoutConverter.ConvertBlockLinearToLinear( + src.Width, + src.Height, + src.Depth, + 1, + 1, + 1, + 1, + 1, + srcBpp, + src.MemoryLayout.UnpackGobBlocksInY(), + src.MemoryLayout.UnpackGobBlocksInZ(), + 1, + new SizeInfo((int)target.Size), + srcSpan); + } + + target.SynchronizeMemory(); + target.SetData(data); + target.SignalModified(); + return; + } + else if (srcCalculator.LayoutMatches(dstCalculator)) + { + // No layout conversion has to be performed, just copy the data entirely. + memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, srcSpan); + return; + } + } + + // OPT: This allocates a (potentially) huge temporary array and then copies an existing + // region of memory into it, data that might get overwritten entirely anyways. Ideally this should + // all be rewritten to use pooled arrays, but that gets complicated with packed data and strides + Span dstSpan = memoryManager.GetSpan(dstGpuVa + (ulong)dstBaseOffset, dstSize).ToArray(); + + TextureParams srcParams = new TextureParams(srcRegionX, srcRegionY, srcBaseOffset, srcBpp, srcLinear, srcCalculator); + TextureParams dstParams = new TextureParams(dstRegionX, dstRegionY, dstBaseOffset, dstBpp, dstLinear, dstCalculator); + + // If remapping is enabled, we always copy the components directly, in order. + // If it's enabled, but the mapping is just XYZW, we also copy them in order. + bool isIdentityRemap = !remap || + (_state.State.SetRemapComponentsDstX == SetRemapComponentsDst.SrcX && + (dstComponents < 2 || _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.SrcY) && + (dstComponents < 3 || _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.SrcZ) && + (dstComponents < 4 || _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.SrcW)); + + if (isIdentityRemap) + { + // The order of the components doesn't change, so we can just copy directly + // (with layout conversion if necessary). + + switch (srcBpp) + { + case 1: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 2: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 4: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 8: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 12: Copy(dstSpan, srcSpan, dstParams, srcParams); break; + case 16: Copy>(dstSpan, srcSpan, dstParams, srcParams); break; + default: throw new NotSupportedException($"Unable to copy ${srcBpp} bpp pixel format."); + } + } + else + { + // The order or value of the components might change. + + switch (componentSize) + { + case 1: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break; + case 2: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break; + case 3: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break; + case 4: CopyShuffle(dstSpan, srcSpan, dstParams, srcParams); break; + default: throw new NotSupportedException($"Unable to copy ${componentSize} component size."); + } + } + + memoryManager.Write(dstGpuVa + (ulong)dstBaseOffset, dstSpan); + } + else + { + if (remap && + _state.State.SetRemapComponentsDstX == SetRemapComponentsDst.ConstA && + _state.State.SetRemapComponentsDstY == SetRemapComponentsDst.ConstA && + _state.State.SetRemapComponentsDstZ == SetRemapComponentsDst.ConstA && + _state.State.SetRemapComponentsDstW == SetRemapComponentsDst.ConstA && + _state.State.SetRemapComponentsNumSrcComponents == SetRemapComponentsNumComponents.One && + _state.State.SetRemapComponentsNumDstComponents == SetRemapComponentsNumComponents.One && + _state.State.SetRemapComponentsComponentSize == SetRemapComponentsComponentSize.Four) + { + // Fast path for clears when remap is enabled. + memoryManager.Physical.BufferCache.ClearBuffer(memoryManager, dstGpuVa, size * 4, _state.State.SetRemapConstA); + } + else + { + // TODO: Implement remap functionality. + // Buffer to buffer copy. + + bool srcIsPitchKind = memoryManager.GetKind(srcGpuVa).IsPitch(); + bool dstIsPitchKind = memoryManager.GetKind(dstGpuVa).IsPitch(); + + if (!srcIsPitchKind && dstIsPitchKind) + { + CopyGobBlockLinearToLinear(memoryManager, srcGpuVa, dstGpuVa, size); + } + else if (srcIsPitchKind && !dstIsPitchKind) + { + CopyGobLinearToBlockLinear(memoryManager, srcGpuVa, dstGpuVa, size); + } + else + { + memoryManager.Physical.BufferCache.CopyBuffer(memoryManager, srcGpuVa, dstGpuVa, size); + } + } + } + } + + /// + /// Copies data from one texture to another, while performing layout conversion if necessary. + /// + /// Pixel type + /// Destination texture memory region + /// Source texture memory region + /// Destination texture parameters + /// Source texture parameters + private unsafe void Copy(Span dstSpan, ReadOnlySpan srcSpan, TextureParams dst, TextureParams src) where T : unmanaged + { + int xCount = (int)_state.State.LineLengthIn; + int yCount = (int)_state.State.LineCount; + + if (src.Linear && dst.Linear && src.Bpp == dst.Bpp) + { + // Optimized path for purely linear copies - we don't need to calculate every single byte offset, + // and we can make use of Span.CopyTo which is very very fast (even compared to pointers) + for (int y = 0; y < yCount; y++) + { + src.Calculator.SetY(src.RegionY + y); + dst.Calculator.SetY(dst.RegionY + y); + int srcOffset = src.Calculator.GetOffset(src.RegionX); + int dstOffset = dst.Calculator.GetOffset(dst.RegionX); + srcSpan.Slice(srcOffset - src.BaseOffset, xCount * src.Bpp) + .CopyTo(dstSpan.Slice(dstOffset - dst.BaseOffset, xCount * dst.Bpp)); + } + } + else + { + fixed (byte* dstPtr = dstSpan, srcPtr = srcSpan) + { + byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. + byte* srcBase = srcPtr - src.BaseOffset; + + for (int y = 0; y < yCount; y++) + { + src.Calculator.SetY(src.RegionY + y); + dst.Calculator.SetY(dst.RegionY + y); + + for (int x = 0; x < xCount; x++) + { + int srcOffset = src.Calculator.GetOffset(src.RegionX + x); + int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x); + + *(T*)(dstBase + dstOffset) = *(T*)(srcBase + srcOffset); + } + } + } + } + } + + /// + /// Sets texture pixel data to a constant value, while performing layout conversion if necessary. + /// + /// Pixel type + /// Destination texture memory region + /// Destination texture parameters + /// Constant pixel value to be set + private unsafe void Fill(Span dstSpan, TextureParams dst, T fillValue) where T : unmanaged + { + int xCount = (int)_state.State.LineLengthIn; + int yCount = (int)_state.State.LineCount; + + fixed (byte* dstPtr = dstSpan) + { + byte* dstBase = dstPtr - dst.BaseOffset; // Layout offset is relative to the base, so we need to subtract the span's offset. + + for (int y = 0; y < yCount; y++) + { + dst.Calculator.SetY(dst.RegionY + y); + + for (int x = 0; x < xCount; x++) + { + int dstOffset = dst.Calculator.GetOffset(dst.RegionX + x); + + *(T*)(dstBase + dstOffset) = fillValue; + } + } + } + } + + /// + /// Copies data from one texture to another, while performing layout conversion and component shuffling if necessary. + /// + /// Pixel type + /// Destination texture memory region + /// Source texture memory region + /// Destination texture parameters + /// Source texture parameters + private void CopyShuffle(Span dstSpan, ReadOnlySpan srcSpan, TextureParams dst, TextureParams src) where T : unmanaged + { + int dstComponents = (int)_state.State.SetRemapComponentsNumDstComponents + 1; + + for (int i = 0; i < dstComponents; i++) + { + SetRemapComponentsDst componentsDst = i switch + { + 0 => _state.State.SetRemapComponentsDstX, + 1 => _state.State.SetRemapComponentsDstY, + 2 => _state.State.SetRemapComponentsDstZ, + _ => _state.State.SetRemapComponentsDstW + }; + + switch (componentsDst) + { + case SetRemapComponentsDst.SrcX: + Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan, dst, src); + break; + case SetRemapComponentsDst.SrcY: + Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf()), dst, src); + break; + case SetRemapComponentsDst.SrcZ: + Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf() * 2), dst, src); + break; + case SetRemapComponentsDst.SrcW: + Copy(dstSpan.Slice(Unsafe.SizeOf() * i), srcSpan.Slice(Unsafe.SizeOf() * 3), dst, src); + break; + case SetRemapComponentsDst.ConstA: + Fill(dstSpan.Slice(Unsafe.SizeOf() * i), dst, Unsafe.As(ref _state.State.SetRemapConstA)); + break; + case SetRemapComponentsDst.ConstB: + Fill(dstSpan.Slice(Unsafe.SizeOf() * i), dst, Unsafe.As(ref _state.State.SetRemapConstB)); + break; + } + } + } + + /// + /// Copies block linear data with block linear GOBs to a block linear destination with linear GOBs. + /// + /// GPU memory manager + /// Source GPU virtual address + /// Destination GPU virtual address + /// Size in bytes of the copy + private static void CopyGobBlockLinearToLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size) + { + if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0) + { + for (ulong offset = 0; offset < size; offset += 16) + { + Vector128 data = memoryManager.Read>(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true); + memoryManager.Write(dstGpuVa + offset, data); + } + } + else + { + for (ulong offset = 0; offset < size; offset++) + { + byte data = memoryManager.Read(ConvertGobLinearToBlockLinearAddress(srcGpuVa + offset), true); + memoryManager.Write(dstGpuVa + offset, data); + } + } + } + + /// + /// Copies block linear data with linear GOBs to a block linear destination with block linear GOBs. + /// + /// GPU memory manager + /// Source GPU virtual address + /// Destination GPU virtual address + /// Size in bytes of the copy + private static void CopyGobLinearToBlockLinear(MemoryManager memoryManager, ulong srcGpuVa, ulong dstGpuVa, ulong size) + { + if (((srcGpuVa | dstGpuVa | size) & 0xf) == 0) + { + for (ulong offset = 0; offset < size; offset += 16) + { + Vector128 data = memoryManager.Read>(srcGpuVa + offset, true); + memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data); + } + } + else + { + for (ulong offset = 0; offset < size; offset++) + { + byte data = memoryManager.Read(srcGpuVa + offset, true); + memoryManager.Write(ConvertGobLinearToBlockLinearAddress(dstGpuVa + offset), data); + } + } + } + + /// + /// Calculates the GOB block linear address from a linear address. + /// + /// Linear address + /// Block linear address + private static ulong ConvertGobLinearToBlockLinearAddress(ulong address) + { + // y2 y1 y0 x5 x4 x3 x2 x1 x0 -> x5 y2 y1 x4 y0 x3 x2 x1 x0 + return (address & ~0x1f0UL) | + ((address & 0x40) >> 2) | + ((address & 0x10) << 1) | + ((address & 0x180) >> 1) | + ((address & 0x20) << 3); + } + + /// + /// Performs a buffer to buffer, or buffer to texture copy, then optionally releases a semaphore. + /// + /// Method call argument + private void LaunchDma(int argument) + { + DmaCopy(argument); + ReleaseSemaphore(argument); + } + } +} -- cgit v1.2.3