diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory')
| -rw-r--r-- | src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs | 273 | ||||
| -rw-r--r-- | src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs | 181 |
2 files changed, 454 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs new file mode 100644 index 00000000..e1d7e940 --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs @@ -0,0 +1,273 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.Texture; +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory +{ + /// <summary> + /// Represents a Inline-to-Memory engine class. + /// </summary> + class InlineToMemoryClass : IDeviceState + { + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly DeviceState<InlineToMemoryClassState> _state; + + private bool _isLinear; + + private int _offset; + private int _size; + + private ulong _dstGpuVa; + private int _dstX; + private int _dstY; + private int _dstWidth; + private int _dstHeight; + private int _dstStride; + private int _dstGobBlocksInY; + private int _dstGobBlocksInZ; + private int _lineLengthIn; + private int _lineCount; + + private bool _finished; + + private int[] _buffer; + + /// <summary> + /// Creates a new instance of the Inline-to-Memory engine class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="initializeState">Indicates if the internal state should be initialized. Set to false if part of another engine</param> + public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState) + { + _context = context; + _channel = channel; + + if (initializeState) + { + _state = new DeviceState<InlineToMemoryClassState>(new Dictionary<string, RwCallback> + { + { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) }, + { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) } + }); + } + } + + /// <summary> + /// Creates a new instance of the inline-to-memory engine class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true) + { + } + + /// <summary> + /// Reads data from the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <returns>Data at the specified offset</returns> + public int Read(int offset) => _state.Read(offset); + + /// <summary> + /// Writes data to the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <param name="data">Data to be written</param> + public void Write(int offset, int data) => _state.Write(offset, data); + + /// <summary> + /// Launches Inline-to-Memory engine DMA copy. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LaunchDma(int argument) + { + LaunchDma(ref _state.State, argument); + } + + /// <summary> + /// Launches Inline-to-Memory engine DMA copy. + /// </summary> + /// <param name="state">Current class state</param> + /// <param name="argument">Method call argument</param> + public void LaunchDma(ref InlineToMemoryClassState state, int argument) + { + _isLinear = (argument & 1) != 0; + + _offset = 0; + _size = (int)(BitUtils.AlignUp<uint>(state.LineLengthIn, 4) * state.LineCount); + + int count = _size / 4; + + if (_buffer == null || _buffer.Length < count) + { + _buffer = new int[count]; + } + + ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut; + + _dstGpuVa = dstGpuVa; + _dstX = state.SetDstOriginBytesXV; + _dstY = state.SetDstOriginSamplesYV; + _dstWidth = (int)state.SetDstWidth; + _dstHeight = (int)state.SetDstHeight; + _dstStride = (int)state.PitchOut; + _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight; + _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth; + _lineLengthIn = (int)state.LineLengthIn; + _lineCount = (int)state.LineCount; + + _finished = false; + } + + /// <summary> + /// Pushes a block of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="data">Data to push</param> + public void LoadInlineData(ReadOnlySpan<int> data) + { + if (!_finished) + { + int copySize = Math.Min(data.Length, _buffer.Length - _offset); + data.Slice(0, copySize).CopyTo(new Span<int>(_buffer).Slice(_offset, copySize)); + + _offset += copySize; + + if (_offset * 4 >= _size) + { + FinishTransfer(); + } + } + } + + /// <summary> + /// Pushes a word of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="argument">Method call argument</param> + public void LoadInlineData(int argument) + { + if (!_finished) + { + _buffer[_offset++] = argument; + + if (_offset * 4 >= _size) + { + FinishTransfer(); + } + } + } + + /// <summary> + /// Performs actual copy of the inline data after the transfer is finished. + /// </summary> + private void FinishTransfer() + { + var memoryManager = _channel.MemoryManager; + + var data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size); + + if (_isLinear && _lineCount == 1) + { + memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn)); + _context.AdvanceSequence(); + } + else + { + // TODO: Verify if the destination X/Y and width/height are taken into account + // for linear texture transfers. If not, we can use the fast path for that aswell. + // Right now the copy code at the bottom assumes that it is used on both which might be incorrect. + if (!_isLinear) + { + var target = memoryManager.Physical.TextureCache.FindTexture( + memoryManager, + _dstGpuVa, + 1, + _dstStride, + _dstHeight, + _lineLengthIn, + _lineCount, + _isLinear, + _dstGobBlocksInY, + _dstGobBlocksInZ); + + if (target != null) + { + target.SynchronizeMemory(); + target.SetData(data, 0, 0, new GAL.Rectangle<int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount)); + target.SignalModified(); + + return; + } + } + + var dstCalculator = new OffsetCalculator( + _dstWidth, + _dstHeight, + _dstStride, + _isLinear, + _dstGobBlocksInY, + 1); + + int srcOffset = 0; + + for (int y = _dstY; y < _dstY + _lineCount; y++) + { + int x1 = _dstX; + int x2 = _dstX + _lineLengthIn; + int x1Round = BitUtils.AlignUp(_dstX, 16); + int x2Trunc = BitUtils.AlignDown(x2, 16); + + int x = x1; + + if (x1Round <= x2) + { + for (; x < x1Round; x++, srcOffset++) + { + int dstOffset = dstCalculator.GetOffset(x, y); + + ulong dstAddress = _dstGpuVa + (uint)dstOffset; + + memoryManager.Write(dstAddress, data[srcOffset]); + } + } + + for (; x < x2Trunc; x += 16, srcOffset += 16) + { + int dstOffset = dstCalculator.GetOffset(x, y); + + ulong dstAddress = _dstGpuVa + (uint)dstOffset; + + memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]); + } + + for (; x < x2; x++, srcOffset++) + { + int dstOffset = dstCalculator.GetOffset(x, y); + + ulong dstAddress = _dstGpuVa + (uint)dstOffset; + + memoryManager.Write(dstAddress, data[srcOffset]); + } + + // All lines must be aligned to 4 bytes, as the data is pushed one word at a time. + // If our copy length is not a multiple of 4, then we need to skip the padding bytes here. + int misalignment = _lineLengthIn & 3; + + if (misalignment != 0) + { + srcOffset += 4 - misalignment; + } + } + + _context.AdvanceSequence(); + } + + _finished = true; + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs new file mode 100644 index 00000000..d0c82a5e --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs @@ -0,0 +1,181 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory +{ + /// <summary> + /// Notify type. + /// </summary> + enum NotifyType + { + WriteOnly = 0, + WriteThenAwaken = 1, + } + + /// <summary> + /// Width in GOBs of the destination texture. + /// </summary> + enum SetDstBlockSizeWidth + { + OneGob = 0, + } + + /// <summary> + /// Height in GOBs of the destination texture. + /// </summary> + enum SetDstBlockSizeHeight + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Depth in GOBs of the destination texture. + /// </summary> + enum SetDstBlockSizeDepth + { + OneGob = 0, + TwoGobs = 1, + FourGobs = 2, + EightGobs = 3, + SixteenGobs = 4, + ThirtytwoGobs = 5, + } + + /// <summary> + /// Memory layout of the destination texture. + /// </summary> + enum LaunchDmaDstMemoryLayout + { + Blocklinear = 0, + Pitch = 1, + } + + /// <summary> + /// DMA completion type. + /// </summary> + enum LaunchDmaCompletionType + { + FlushDisable = 0, + FlushOnly = 1, + ReleaseSemaphore = 2, + } + + /// <summary> + /// DMA interrupt type. + /// </summary> + enum LaunchDmaInterruptType + { + None = 0, + Interrupt = 1, + } + + /// <summary> + /// DMA semaphore structure size. + /// </summary> + enum LaunchDmaSemaphoreStructSize + { + FourWords = 0, + OneWord = 1, + } + + /// <summary> + /// DMA semaphore reduction operation. + /// </summary> + enum LaunchDmaReductionOp + { + RedAdd = 0, + RedMin = 1, + RedMax = 2, + RedInc = 3, + RedDec = 4, + RedAnd = 5, + RedOr = 6, + RedXor = 7, + } + + /// <summary> + /// DMA semaphore reduction format. + /// </summary> + enum LaunchDmaReductionFormat + { + Unsigned32 = 0, + Signed32 = 1, + } + + /// <summary> + /// Inline-to-Memory class state. + /// </summary> + unsafe struct InlineToMemoryClassState + { +#pragma warning disable CS0649 + public uint SetObject; + public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF); + public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F); + public fixed uint Reserved04[63]; + public uint NoOperation; + public uint SetNotifyA; + public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF); + public uint SetNotifyB; + public uint Notify; + public NotifyType NotifyType => (NotifyType)(Notify); + public uint WaitForIdle; + public fixed uint Reserved114[7]; + public uint SetGlobalRenderEnableA; + public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF); + public uint SetGlobalRenderEnableB; + public uint SetGlobalRenderEnableC; + public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7); + public uint SendGoIdle; + public uint PmTrigger; + public uint PmTriggerWfi; + public fixed uint Reserved148[2]; + public uint SetInstrumentationMethodHeader; + public uint SetInstrumentationMethodData; + public fixed uint Reserved158[10]; + public uint LineLengthIn; + public uint LineCount; + public uint OffsetOutUpper; + public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF); + public uint OffsetOut; + public uint PitchOut; + public uint SetDstBlockSize; + public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF); + public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF); + public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF); + public uint SetDstWidth; + public uint SetDstHeight; + public uint SetDstDepth; + public uint SetDstLayer; + public uint SetDstOriginBytesX; + public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF); + public uint SetDstOriginSamplesY; + public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF); + public uint LaunchDma; + public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1); + public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3); + public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3); + public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1); + public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0; + public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7); + public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3); + public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0; + public uint LoadInlineData; + public fixed uint Reserved1B8[9]; + public uint SetI2mSemaphoreA; + public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF); + public uint SetI2mSemaphoreB; + public uint SetI2mSemaphoreC; + public fixed uint Reserved1E8[2]; + public uint SetI2mSpareNoop00; + public uint SetI2mSpareNoop01; + public uint SetI2mSpareNoop02; + public uint SetI2mSpareNoop03; + public fixed uint Reserved200[3200]; + public MmeShadowScratch SetMmeShadowScratch; +#pragma warning restore CS0649 + } +} |
