aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory
diff options
context:
space:
mode:
authorTSR Berry <20988865+TSRBerry@users.noreply.github.com>2023-04-08 01:22:00 +0200
committerMary <thog@protonmail.com>2023-04-27 23:51:14 +0200
commitcee712105850ac3385cd0091a923438167433f9f (patch)
tree4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory
parentcd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff)
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory')
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs273
-rw-r--r--src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs181
2 files changed, 454 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
new file mode 100644
index 00000000..e1d7e940
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClass.cs
@@ -0,0 +1,273 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Device;
+using Ryujinx.Graphics.Texture;
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+
+namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
+{
+ /// <summary>
+ /// Represents a Inline-to-Memory engine class.
+ /// </summary>
+ class InlineToMemoryClass : IDeviceState
+ {
+ private readonly GpuContext _context;
+ private readonly GpuChannel _channel;
+ private readonly DeviceState<InlineToMemoryClassState> _state;
+
+ private bool _isLinear;
+
+ private int _offset;
+ private int _size;
+
+ private ulong _dstGpuVa;
+ private int _dstX;
+ private int _dstY;
+ private int _dstWidth;
+ private int _dstHeight;
+ private int _dstStride;
+ private int _dstGobBlocksInY;
+ private int _dstGobBlocksInZ;
+ private int _lineLengthIn;
+ private int _lineCount;
+
+ private bool _finished;
+
+ private int[] _buffer;
+
+ /// <summary>
+ /// Creates a new instance of the Inline-to-Memory engine class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ /// <param name="initializeState">Indicates if the internal state should be initialized. Set to false if part of another engine</param>
+ public InlineToMemoryClass(GpuContext context, GpuChannel channel, bool initializeState)
+ {
+ _context = context;
+ _channel = channel;
+
+ if (initializeState)
+ {
+ _state = new DeviceState<InlineToMemoryClassState>(new Dictionary<string, RwCallback>
+ {
+ { nameof(InlineToMemoryClassState.LaunchDma), new RwCallback(LaunchDma, null) },
+ { nameof(InlineToMemoryClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }
+ });
+ }
+ }
+
+ /// <summary>
+ /// Creates a new instance of the inline-to-memory engine class.
+ /// </summary>
+ /// <param name="context">GPU context</param>
+ /// <param name="channel">GPU channel</param>
+ public InlineToMemoryClass(GpuContext context, GpuChannel channel) : this(context, channel, true)
+ {
+ }
+
+ /// <summary>
+ /// Reads data from the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <returns>Data at the specified offset</returns>
+ public int Read(int offset) => _state.Read(offset);
+
+ /// <summary>
+ /// Writes data to the class registers.
+ /// </summary>
+ /// <param name="offset">Register byte offset</param>
+ /// <param name="data">Data to be written</param>
+ public void Write(int offset, int data) => _state.Write(offset, data);
+
+ /// <summary>
+ /// Launches Inline-to-Memory engine DMA copy.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ private void LaunchDma(int argument)
+ {
+ LaunchDma(ref _state.State, argument);
+ }
+
+ /// <summary>
+ /// Launches Inline-to-Memory engine DMA copy.
+ /// </summary>
+ /// <param name="state">Current class state</param>
+ /// <param name="argument">Method call argument</param>
+ public void LaunchDma(ref InlineToMemoryClassState state, int argument)
+ {
+ _isLinear = (argument & 1) != 0;
+
+ _offset = 0;
+ _size = (int)(BitUtils.AlignUp<uint>(state.LineLengthIn, 4) * state.LineCount);
+
+ int count = _size / 4;
+
+ if (_buffer == null || _buffer.Length < count)
+ {
+ _buffer = new int[count];
+ }
+
+ ulong dstGpuVa = ((ulong)state.OffsetOutUpperValue << 32) | state.OffsetOut;
+
+ _dstGpuVa = dstGpuVa;
+ _dstX = state.SetDstOriginBytesXV;
+ _dstY = state.SetDstOriginSamplesYV;
+ _dstWidth = (int)state.SetDstWidth;
+ _dstHeight = (int)state.SetDstHeight;
+ _dstStride = (int)state.PitchOut;
+ _dstGobBlocksInY = 1 << (int)state.SetDstBlockSizeHeight;
+ _dstGobBlocksInZ = 1 << (int)state.SetDstBlockSizeDepth;
+ _lineLengthIn = (int)state.LineLengthIn;
+ _lineCount = (int)state.LineCount;
+
+ _finished = false;
+ }
+
+ /// <summary>
+ /// Pushes a block of data to the Inline-to-Memory engine.
+ /// </summary>
+ /// <param name="data">Data to push</param>
+ public void LoadInlineData(ReadOnlySpan<int> data)
+ {
+ if (!_finished)
+ {
+ int copySize = Math.Min(data.Length, _buffer.Length - _offset);
+ data.Slice(0, copySize).CopyTo(new Span<int>(_buffer).Slice(_offset, copySize));
+
+ _offset += copySize;
+
+ if (_offset * 4 >= _size)
+ {
+ FinishTransfer();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Pushes a word of data to the Inline-to-Memory engine.
+ /// </summary>
+ /// <param name="argument">Method call argument</param>
+ public void LoadInlineData(int argument)
+ {
+ if (!_finished)
+ {
+ _buffer[_offset++] = argument;
+
+ if (_offset * 4 >= _size)
+ {
+ FinishTransfer();
+ }
+ }
+ }
+
+ /// <summary>
+ /// Performs actual copy of the inline data after the transfer is finished.
+ /// </summary>
+ private void FinishTransfer()
+ {
+ var memoryManager = _channel.MemoryManager;
+
+ var data = MemoryMarshal.Cast<int, byte>(_buffer).Slice(0, _size);
+
+ if (_isLinear && _lineCount == 1)
+ {
+ memoryManager.WriteTrackedResource(_dstGpuVa, data.Slice(0, _lineLengthIn));
+ _context.AdvanceSequence();
+ }
+ else
+ {
+ // TODO: Verify if the destination X/Y and width/height are taken into account
+ // for linear texture transfers. If not, we can use the fast path for that aswell.
+ // Right now the copy code at the bottom assumes that it is used on both which might be incorrect.
+ if (!_isLinear)
+ {
+ var target = memoryManager.Physical.TextureCache.FindTexture(
+ memoryManager,
+ _dstGpuVa,
+ 1,
+ _dstStride,
+ _dstHeight,
+ _lineLengthIn,
+ _lineCount,
+ _isLinear,
+ _dstGobBlocksInY,
+ _dstGobBlocksInZ);
+
+ if (target != null)
+ {
+ target.SynchronizeMemory();
+ target.SetData(data, 0, 0, new GAL.Rectangle<int>(_dstX, _dstY, _lineLengthIn / target.Info.FormatInfo.BytesPerPixel, _lineCount));
+ target.SignalModified();
+
+ return;
+ }
+ }
+
+ var dstCalculator = new OffsetCalculator(
+ _dstWidth,
+ _dstHeight,
+ _dstStride,
+ _isLinear,
+ _dstGobBlocksInY,
+ 1);
+
+ int srcOffset = 0;
+
+ for (int y = _dstY; y < _dstY + _lineCount; y++)
+ {
+ int x1 = _dstX;
+ int x2 = _dstX + _lineLengthIn;
+ int x1Round = BitUtils.AlignUp(_dstX, 16);
+ int x2Trunc = BitUtils.AlignDown(x2, 16);
+
+ int x = x1;
+
+ if (x1Round <= x2)
+ {
+ for (; x < x1Round; x++, srcOffset++)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, data[srcOffset]);
+ }
+ }
+
+ for (; x < x2Trunc; x += 16, srcOffset += 16)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, MemoryMarshal.Cast<byte, Vector128<byte>>(data.Slice(srcOffset, 16))[0]);
+ }
+
+ for (; x < x2; x++, srcOffset++)
+ {
+ int dstOffset = dstCalculator.GetOffset(x, y);
+
+ ulong dstAddress = _dstGpuVa + (uint)dstOffset;
+
+ memoryManager.Write(dstAddress, data[srcOffset]);
+ }
+
+ // All lines must be aligned to 4 bytes, as the data is pushed one word at a time.
+ // If our copy length is not a multiple of 4, then we need to skip the padding bytes here.
+ int misalignment = _lineLengthIn & 3;
+
+ if (misalignment != 0)
+ {
+ srcOffset += 4 - misalignment;
+ }
+ }
+
+ _context.AdvanceSequence();
+ }
+
+ _finished = true;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
new file mode 100644
index 00000000..d0c82a5e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Gpu/Engine/InlineToMemory/InlineToMemoryClassState.cs
@@ -0,0 +1,181 @@
+// This file was auto-generated from NVIDIA official Maxwell definitions.
+
+namespace Ryujinx.Graphics.Gpu.Engine.InlineToMemory
+{
+ /// <summary>
+ /// Notify type.
+ /// </summary>
+ enum NotifyType
+ {
+ WriteOnly = 0,
+ WriteThenAwaken = 1,
+ }
+
+ /// <summary>
+ /// Width in GOBs of the destination texture.
+ /// </summary>
+ enum SetDstBlockSizeWidth
+ {
+ OneGob = 0,
+ }
+
+ /// <summary>
+ /// Height in GOBs of the destination texture.
+ /// </summary>
+ enum SetDstBlockSizeHeight
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Depth in GOBs of the destination texture.
+ /// </summary>
+ enum SetDstBlockSizeDepth
+ {
+ OneGob = 0,
+ TwoGobs = 1,
+ FourGobs = 2,
+ EightGobs = 3,
+ SixteenGobs = 4,
+ ThirtytwoGobs = 5,
+ }
+
+ /// <summary>
+ /// Memory layout of the destination texture.
+ /// </summary>
+ enum LaunchDmaDstMemoryLayout
+ {
+ Blocklinear = 0,
+ Pitch = 1,
+ }
+
+ /// <summary>
+ /// DMA completion type.
+ /// </summary>
+ enum LaunchDmaCompletionType
+ {
+ FlushDisable = 0,
+ FlushOnly = 1,
+ ReleaseSemaphore = 2,
+ }
+
+ /// <summary>
+ /// DMA interrupt type.
+ /// </summary>
+ enum LaunchDmaInterruptType
+ {
+ None = 0,
+ Interrupt = 1,
+ }
+
+ /// <summary>
+ /// DMA semaphore structure size.
+ /// </summary>
+ enum LaunchDmaSemaphoreStructSize
+ {
+ FourWords = 0,
+ OneWord = 1,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction operation.
+ /// </summary>
+ enum LaunchDmaReductionOp
+ {
+ RedAdd = 0,
+ RedMin = 1,
+ RedMax = 2,
+ RedInc = 3,
+ RedDec = 4,
+ RedAnd = 5,
+ RedOr = 6,
+ RedXor = 7,
+ }
+
+ /// <summary>
+ /// DMA semaphore reduction format.
+ /// </summary>
+ enum LaunchDmaReductionFormat
+ {
+ Unsigned32 = 0,
+ Signed32 = 1,
+ }
+
+ /// <summary>
+ /// Inline-to-Memory class state.
+ /// </summary>
+ unsafe struct InlineToMemoryClassState
+ {
+#pragma warning disable CS0649
+ public uint SetObject;
+ public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF);
+ public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F);
+ public fixed uint Reserved04[63];
+ public uint NoOperation;
+ public uint SetNotifyA;
+ public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF);
+ public uint SetNotifyB;
+ public uint Notify;
+ public NotifyType NotifyType => (NotifyType)(Notify);
+ public uint WaitForIdle;
+ public fixed uint Reserved114[7];
+ public uint SetGlobalRenderEnableA;
+ public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF);
+ public uint SetGlobalRenderEnableB;
+ public uint SetGlobalRenderEnableC;
+ public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7);
+ public uint SendGoIdle;
+ public uint PmTrigger;
+ public uint PmTriggerWfi;
+ public fixed uint Reserved148[2];
+ public uint SetInstrumentationMethodHeader;
+ public uint SetInstrumentationMethodData;
+ public fixed uint Reserved158[10];
+ public uint LineLengthIn;
+ public uint LineCount;
+ public uint OffsetOutUpper;
+ public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF);
+ public uint OffsetOut;
+ public uint PitchOut;
+ public uint SetDstBlockSize;
+ public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF);
+ public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF);
+ public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF);
+ public uint SetDstWidth;
+ public uint SetDstHeight;
+ public uint SetDstDepth;
+ public uint SetDstLayer;
+ public uint SetDstOriginBytesX;
+ public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF);
+ public uint SetDstOriginSamplesY;
+ public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF);
+ public uint LaunchDma;
+ public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1);
+ public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3);
+ public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3);
+ public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1);
+ public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0;
+ public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7);
+ public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3);
+ public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0;
+ public uint LoadInlineData;
+ public fixed uint Reserved1B8[9];
+ public uint SetI2mSemaphoreA;
+ public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF);
+ public uint SetI2mSemaphoreB;
+ public uint SetI2mSemaphoreC;
+ public fixed uint Reserved1E8[2];
+ public uint SetI2mSpareNoop00;
+ public uint SetI2mSpareNoop01;
+ public uint SetI2mSpareNoop02;
+ public uint SetI2mSpareNoop03;
+ public fixed uint Reserved200[3200];
+ public MmeShadowScratch SetMmeShadowScratch;
+#pragma warning restore CS0649
+ }
+}