diff options
| author | TSR Berry <20988865+TSRBerry@users.noreply.github.com> | 2023-04-08 01:22:00 +0200 |
|---|---|---|
| committer | Mary <thog@protonmail.com> | 2023-04-27 23:51:14 +0200 |
| commit | cee712105850ac3385cd0091a923438167433f9f (patch) | |
| tree | 4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Gpu/Engine/Compute | |
| parent | cd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff) | |
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Gpu/Engine/Compute')
3 files changed, 929 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs new file mode 100644 index 00000000..2ac738fd --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClass.cs @@ -0,0 +1,219 @@ +using Ryujinx.Graphics.Device; +using Ryujinx.Graphics.GAL; +using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; +using Ryujinx.Graphics.Gpu.Engine.Threed; +using Ryujinx.Graphics.Gpu.Engine.Types; +using Ryujinx.Graphics.Gpu.Image; +using Ryujinx.Graphics.Gpu.Shader; +using Ryujinx.Graphics.Shader; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Compute +{ + /// <summary> + /// Represents a compute engine class. + /// </summary> + class ComputeClass : IDeviceState + { + private readonly GpuContext _context; + private readonly GpuChannel _channel; + private readonly ThreedClass _3dEngine; + private readonly DeviceState<ComputeClassState> _state; + + private readonly InlineToMemoryClass _i2mClass; + + /// <summary> + /// Creates a new instance of the compute engine class. + /// </summary> + /// <param name="context">GPU context</param> + /// <param name="channel">GPU channel</param> + /// <param name="threedEngine">3D engine</param> + public ComputeClass(GpuContext context, GpuChannel channel, ThreedClass threedEngine) + { + _context = context; + _channel = channel; + _3dEngine = threedEngine; + _state = new DeviceState<ComputeClassState>(new Dictionary<string, RwCallback> + { + { nameof(ComputeClassState.LaunchDma), new RwCallback(LaunchDma, null) }, + { nameof(ComputeClassState.LoadInlineData), new RwCallback(LoadInlineData, null) }, + { nameof(ComputeClassState.SendSignalingPcasB), new RwCallback(SendSignalingPcasB, null) } + }); + + _i2mClass = new InlineToMemoryClass(context, channel, initializeState: false); + } + + /// <summary> + /// Reads data from the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <returns>Data at the specified offset</returns> + public int Read(int offset) => _state.Read(offset); + + /// <summary> + /// Writes data to the class registers. + /// </summary> + /// <param name="offset">Register byte offset</param> + /// <param name="data">Data to be written</param> + public void Write(int offset, int data) => _state.Write(offset, data); + + /// <summary> + /// Launches the Inline-to-Memory DMA copy operation. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LaunchDma(int argument) + { + _i2mClass.LaunchDma(ref Unsafe.As<ComputeClassState, InlineToMemoryClassState>(ref _state.State), argument); + } + + /// <summary> + /// Pushes a block of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="data">Data to push</param> + public void LoadInlineData(ReadOnlySpan<int> data) + { + _i2mClass.LoadInlineData(data); + } + + /// <summary> + /// Pushes a word of data to the Inline-to-Memory engine. + /// </summary> + /// <param name="argument">Method call argument</param> + private void LoadInlineData(int argument) + { + _i2mClass.LoadInlineData(argument); + } + + /// <summary> + /// Performs the compute dispatch operation. + /// </summary> + /// <param name="argument">Method call argument</param> + private void SendSignalingPcasB(int argument) + { + var memoryManager = _channel.MemoryManager; + + // Since we're going to change the state, make sure any pending instanced draws are done. + _3dEngine.PerformDeferredDraws(); + + // Make sure all pending uniform buffer data is written to memory. + _3dEngine.FlushUboDirty(); + + uint qmdAddress = _state.State.SendPcasA; + + var qmd = _channel.MemoryManager.Read<ComputeQmd>((ulong)qmdAddress << 8); + + ulong shaderGpuVa = ((ulong)_state.State.SetProgramRegionAAddressUpper << 32) | _state.State.SetProgramRegionB; + + shaderGpuVa += (uint)qmd.ProgramOffset; + + int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize; + + int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize); + + for (int index = 0; index < Constants.TotalCpUniformBuffers; index++) + { + if (!qmd.ConstantBufferValid(index)) + { + continue; + } + + ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32; + ulong size = (ulong)qmd.ConstantBufferSize(index); + + _channel.BufferManager.SetComputeUniformBuffer(index, gpuVa, size); + } + + ulong samplerPoolGpuVa = ((ulong)_state.State.SetTexSamplerPoolAOffsetUpper << 32) | _state.State.SetTexSamplerPoolB; + ulong texturePoolGpuVa = ((ulong)_state.State.SetTexHeaderPoolAOffsetUpper << 32) | _state.State.SetTexHeaderPoolB; + + GpuChannelPoolState poolState = new GpuChannelPoolState( + texturePoolGpuVa, + _state.State.SetTexHeaderPoolCMaximumIndex, + _state.State.SetBindlessTextureConstantBufferSlotSelect); + + GpuChannelComputeState computeState = new GpuChannelComputeState( + qmd.CtaThreadDimension0, + qmd.CtaThreadDimension1, + qmd.CtaThreadDimension2, + localMemorySize, + sharedMemorySize, + _channel.BufferManager.HasUnalignedStorageBuffers); + + CachedShaderProgram cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); + + _context.Renderer.Pipeline.SetProgram(cs.HostProgram); + + _channel.TextureManager.SetComputeSamplerPool(samplerPoolGpuVa, _state.State.SetTexSamplerPoolCMaximumIndex, qmd.SamplerIndex); + _channel.TextureManager.SetComputeTexturePool(texturePoolGpuVa, _state.State.SetTexHeaderPoolCMaximumIndex); + _channel.TextureManager.SetComputeTextureBufferIndex(_state.State.SetBindlessTextureConstantBufferSlotSelect); + + ShaderProgramInfo info = cs.Shaders[0].Info; + + bool hasUnaligned = _channel.BufferManager.HasUnalignedStorageBuffers; + + for (int index = 0; index < info.SBuffers.Count; index++) + { + BufferDescriptor sb = info.SBuffers[index]; + + ulong sbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); + + int sbDescOffset = 0x310 + sb.Slot * 0x10; + + sbDescAddress += (ulong)sbDescOffset; + + SbDescriptor sbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(sbDescAddress); + + _channel.BufferManager.SetComputeStorageBuffer(sb.Slot, sbDescriptor.PackAddress(), (uint)sbDescriptor.Size, sb.Flags); + } + + if ((_channel.BufferManager.HasUnalignedStorageBuffers) != hasUnaligned) + { + // Refetch the shader, as assumptions about storage buffer alignment have changed. + cs = memoryManager.Physical.ShaderCache.GetComputeShader(_channel, poolState, computeState, shaderGpuVa); + + _context.Renderer.Pipeline.SetProgram(cs.HostProgram); + + info = cs.Shaders[0].Info; + } + + for (int index = 0; index < info.CBuffers.Count; index++) + { + BufferDescriptor cb = info.CBuffers[index]; + + // NVN uses the "hardware" constant buffer for anything that is less than 8, + // and those are already bound above. + // Anything greater than or equal to 8 uses the emulated constant buffers. + // They are emulated using global memory loads. + if (cb.Slot < 8) + { + continue; + } + + ulong cbDescAddress = _channel.BufferManager.GetComputeUniformBufferAddress(0); + + int cbDescOffset = 0x260 + (cb.Slot - 8) * 0x10; + + cbDescAddress += (ulong)cbDescOffset; + + SbDescriptor cbDescriptor = _channel.MemoryManager.Physical.Read<SbDescriptor>(cbDescAddress); + + _channel.BufferManager.SetComputeUniformBuffer(cb.Slot, cbDescriptor.PackAddress(), (uint)cbDescriptor.Size); + } + + _channel.BufferManager.SetComputeBufferBindings(cs.Bindings); + + _channel.TextureManager.SetComputeBindings(cs.Bindings); + + // Should never return false for mismatching spec state, since the shader was fetched above. + _channel.TextureManager.CommitComputeBindings(cs.SpecializationState); + + _channel.BufferManager.CommitComputeBindings(); + + _context.Renderer.Pipeline.DispatchCompute(qmd.CtaRasterWidth, qmd.CtaRasterHeight, qmd.CtaRasterDepth); + + _3dEngine.ForceShaderUpdate(); + } + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs new file mode 100644 index 00000000..5d81de5d --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeClassState.cs @@ -0,0 +1,435 @@ +// This file was auto-generated from NVIDIA official Maxwell definitions. + +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Gpu.Engine.InlineToMemory; + +namespace Ryujinx.Graphics.Gpu.Engine.Compute +{ + /// <summary> + /// Notify type. + /// </summary> + enum NotifyType + { + WriteOnly = 0, + WriteThenAwaken = 1, + } + + /// <summary> + /// CWD control SM selection. + /// </summary> + enum SetCwdControlSmSelection + { + LoadBalanced = 0, + RoundRobin = 1, + } + + /// <summary> + /// Cache lines to invalidate. + /// </summary> + enum InvalidateCacheLines + { + All = 0, + One = 1, + } + + /// <summary> + /// GWC SCG type. + /// </summary> + enum SetGwcScgTypeScgType + { + GraphicsCompute0 = 0, + Compute1 = 1, + } + + /// <summary> + /// Render enable override mode. + /// </summary> + enum SetRenderEnableOverrideMode + { + UseRenderEnable = 0, + AlwaysRender = 1, + NeverRender = 2, + } + + /// <summary> + /// Semaphore report operation. + /// </summary> + enum SetReportSemaphoreDOperation + { + Release = 0, + Trap = 3, + } + + /// <summary> + /// Semaphore report structure size. + /// </summary> + enum SetReportSemaphoreDStructureSize + { + FourWords = 0, + OneWord = 1, + } + + /// <summary> + /// Semaphore report reduction operation. + /// </summary> + enum SetReportSemaphoreDReductionOp + { + RedAdd = 0, + RedMin = 1, + RedMax = 2, + RedInc = 3, + RedDec = 4, + RedAnd = 5, + RedOr = 6, + RedXor = 7, + } + + /// <summary> + /// Semaphore report reduction format. + /// </summary> + enum SetReportSemaphoreDReductionFormat + { + Unsigned32 = 0, + Signed32 = 1, + } + + /// <summary> + /// Compute class state. + /// </summary> + unsafe struct ComputeClassState + { +#pragma warning disable CS0649 + public uint SetObject; + public int SetObjectClassId => (int)((SetObject >> 0) & 0xFFFF); + public int SetObjectEngineId => (int)((SetObject >> 16) & 0x1F); + public fixed uint Reserved04[63]; + public uint NoOperation; + public uint SetNotifyA; + public int SetNotifyAAddressUpper => (int)((SetNotifyA >> 0) & 0xFF); + public uint SetNotifyB; + public uint Notify; + public NotifyType NotifyType => (NotifyType)(Notify); + public uint WaitForIdle; + public fixed uint Reserved114[7]; + public uint SetGlobalRenderEnableA; + public int SetGlobalRenderEnableAOffsetUpper => (int)((SetGlobalRenderEnableA >> 0) & 0xFF); + public uint SetGlobalRenderEnableB; + public uint SetGlobalRenderEnableC; + public int SetGlobalRenderEnableCMode => (int)((SetGlobalRenderEnableC >> 0) & 0x7); + public uint SendGoIdle; + public uint PmTrigger; + public uint PmTriggerWfi; + public fixed uint Reserved148[2]; + public uint SetInstrumentationMethodHeader; + public uint SetInstrumentationMethodData; + public fixed uint Reserved158[10]; + public uint LineLengthIn; + public uint LineCount; + public uint OffsetOutUpper; + public int OffsetOutUpperValue => (int)((OffsetOutUpper >> 0) & 0xFF); + public uint OffsetOut; + public uint PitchOut; + public uint SetDstBlockSize; + public SetDstBlockSizeWidth SetDstBlockSizeWidth => (SetDstBlockSizeWidth)((SetDstBlockSize >> 0) & 0xF); + public SetDstBlockSizeHeight SetDstBlockSizeHeight => (SetDstBlockSizeHeight)((SetDstBlockSize >> 4) & 0xF); + public SetDstBlockSizeDepth SetDstBlockSizeDepth => (SetDstBlockSizeDepth)((SetDstBlockSize >> 8) & 0xF); + public uint SetDstWidth; + public uint SetDstHeight; + public uint SetDstDepth; + public uint SetDstLayer; + public uint SetDstOriginBytesX; + public int SetDstOriginBytesXV => (int)((SetDstOriginBytesX >> 0) & 0xFFFFF); + public uint SetDstOriginSamplesY; + public int SetDstOriginSamplesYV => (int)((SetDstOriginSamplesY >> 0) & 0xFFFF); + public uint LaunchDma; + public LaunchDmaDstMemoryLayout LaunchDmaDstMemoryLayout => (LaunchDmaDstMemoryLayout)((LaunchDma >> 0) & 0x1); + public LaunchDmaCompletionType LaunchDmaCompletionType => (LaunchDmaCompletionType)((LaunchDma >> 4) & 0x3); + public LaunchDmaInterruptType LaunchDmaInterruptType => (LaunchDmaInterruptType)((LaunchDma >> 8) & 0x3); + public LaunchDmaSemaphoreStructSize LaunchDmaSemaphoreStructSize => (LaunchDmaSemaphoreStructSize)((LaunchDma >> 12) & 0x1); + public bool LaunchDmaReductionEnable => (LaunchDma & 0x2) != 0; + public LaunchDmaReductionOp LaunchDmaReductionOp => (LaunchDmaReductionOp)((LaunchDma >> 13) & 0x7); + public LaunchDmaReductionFormat LaunchDmaReductionFormat => (LaunchDmaReductionFormat)((LaunchDma >> 2) & 0x3); + public bool LaunchDmaSysmembarDisable => (LaunchDma & 0x40) != 0; + public uint LoadInlineData; + public fixed uint Reserved1B8[9]; + public uint SetI2mSemaphoreA; + public int SetI2mSemaphoreAOffsetUpper => (int)((SetI2mSemaphoreA >> 0) & 0xFF); + public uint SetI2mSemaphoreB; + public uint SetI2mSemaphoreC; + public fixed uint Reserved1E8[2]; + public uint SetI2mSpareNoop00; + public uint SetI2mSpareNoop01; + public uint SetI2mSpareNoop02; + public uint SetI2mSpareNoop03; + public uint SetValidSpanOverflowAreaA; + public int SetValidSpanOverflowAreaAAddressUpper => (int)((SetValidSpanOverflowAreaA >> 0) & 0xFF); + public uint SetValidSpanOverflowAreaB; + public uint SetValidSpanOverflowAreaC; + public uint SetCoalesceWaitingPeriodUnit; + public uint PerfmonTransfer; + public uint SetShaderSharedMemoryWindow; + public uint SetSelectMaxwellTextureHeaders; + public bool SetSelectMaxwellTextureHeadersV => (SetSelectMaxwellTextureHeaders & 0x1) != 0; + public uint InvalidateShaderCaches; + public bool InvalidateShaderCachesInstruction => (InvalidateShaderCaches & 0x1) != 0; + public bool InvalidateShaderCachesData => (InvalidateShaderCaches & 0x10) != 0; + public bool InvalidateShaderCachesConstant => (InvalidateShaderCaches & 0x1000) != 0; + public bool InvalidateShaderCachesLocks => (InvalidateShaderCaches & 0x2) != 0; + public bool InvalidateShaderCachesFlushData => (InvalidateShaderCaches & 0x4) != 0; + public uint SetReservedSwMethod00; + public uint SetReservedSwMethod01; + public uint SetReservedSwMethod02; + public uint SetReservedSwMethod03; + public uint SetReservedSwMethod04; + public uint SetReservedSwMethod05; + public uint SetReservedSwMethod06; + public uint SetReservedSwMethod07; + public uint SetCwdControl; + public SetCwdControlSmSelection SetCwdControlSmSelection => (SetCwdControlSmSelection)((SetCwdControl >> 0) & 0x1); + public uint InvalidateTextureHeaderCacheNoWfi; + public InvalidateCacheLines InvalidateTextureHeaderCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureHeaderCacheNoWfi >> 0) & 0x1); + public int InvalidateTextureHeaderCacheNoWfiTag => (int)((InvalidateTextureHeaderCacheNoWfi >> 4) & 0x3FFFFF); + public uint SetCwdRefCounter; + public int SetCwdRefCounterSelect => (int)((SetCwdRefCounter >> 0) & 0x3F); + public int SetCwdRefCounterValue => (int)((SetCwdRefCounter >> 8) & 0xFFFF); + public uint SetReservedSwMethod08; + public uint SetReservedSwMethod09; + public uint SetReservedSwMethod10; + public uint SetReservedSwMethod11; + public uint SetReservedSwMethod12; + public uint SetReservedSwMethod13; + public uint SetReservedSwMethod14; + public uint SetReservedSwMethod15; + public uint SetGwcScgType; + public SetGwcScgTypeScgType SetGwcScgTypeScgType => (SetGwcScgTypeScgType)((SetGwcScgType >> 0) & 0x1); + public uint SetScgControl; + public int SetScgControlCompute1MaxSmCount => (int)((SetScgControl >> 0) & 0x1FF); + public uint InvalidateConstantBufferCacheA; + public int InvalidateConstantBufferCacheAAddressUpper => (int)((InvalidateConstantBufferCacheA >> 0) & 0xFF); + public uint InvalidateConstantBufferCacheB; + public uint InvalidateConstantBufferCacheC; + public int InvalidateConstantBufferCacheCByteCount => (int)((InvalidateConstantBufferCacheC >> 0) & 0x1FFFF); + public bool InvalidateConstantBufferCacheCThruL2 => (InvalidateConstantBufferCacheC & 0x80000000) != 0; + public uint SetComputeClassVersion; + public int SetComputeClassVersionCurrent => (int)((SetComputeClassVersion >> 0) & 0xFFFF); + public int SetComputeClassVersionOldestSupported => (int)((SetComputeClassVersion >> 16) & 0xFFFF); + public uint CheckComputeClassVersion; + public int CheckComputeClassVersionCurrent => (int)((CheckComputeClassVersion >> 0) & 0xFFFF); + public int CheckComputeClassVersionOldestSupported => (int)((CheckComputeClassVersion >> 16) & 0xFFFF); + public uint SetQmdVersion; + public int SetQmdVersionCurrent => (int)((SetQmdVersion >> 0) & 0xFFFF); + public int SetQmdVersionOldestSupported => (int)((SetQmdVersion >> 16) & 0xFFFF); + public uint SetWfiConfig; + public bool SetWfiConfigEnableScgTypeWfi => (SetWfiConfig & 0x1) != 0; + public uint CheckQmdVersion; + public int CheckQmdVersionCurrent => (int)((CheckQmdVersion >> 0) & 0xFFFF); + public int CheckQmdVersionOldestSupported => (int)((CheckQmdVersion >> 16) & 0xFFFF); + public uint WaitForIdleScgType; + public uint InvalidateSkedCaches; + public bool InvalidateSkedCachesV => (InvalidateSkedCaches & 0x1) != 0; + public uint SetScgRenderEnableControl; + public bool SetScgRenderEnableControlCompute1UsesRenderEnable => (SetScgRenderEnableControl & 0x1) != 0; + public fixed uint Reserved2A0[4]; + public uint SetCwdSlotCount; + public int SetCwdSlotCountV => (int)((SetCwdSlotCount >> 0) & 0xFF); + public uint SendPcasA; + public uint SendPcasB; + public int SendPcasBFrom => (int)((SendPcasB >> 0) & 0xFFFFFF); + public int SendPcasBDelta => (int)((SendPcasB >> 24) & 0xFF); + public uint SendSignalingPcasB; + public bool SendSignalingPcasBInvalidate => (SendSignalingPcasB & 0x1) != 0; + public bool SendSignalingPcasBSchedule => (SendSignalingPcasB & 0x2) != 0; + public fixed uint Reserved2C0[9]; + public uint SetShaderLocalMemoryNonThrottledA; + public int SetShaderLocalMemoryNonThrottledASizeUpper => (int)((SetShaderLocalMemoryNonThrottledA >> 0) & 0xFF); + public uint SetShaderLocalMemoryNonThrottledB; + public uint SetShaderLocalMemoryNonThrottledC; + public int SetShaderLocalMemoryNonThrottledCMaxSmCount => (int)((SetShaderLocalMemoryNonThrottledC >> 0) & 0x1FF); + public uint SetShaderLocalMemoryThrottledA; + public int SetShaderLocalMemoryThrottledASizeUpper => (int)((SetShaderLocalMemoryThrottledA >> 0) & 0xFF); + public uint SetShaderLocalMemoryThrottledB; + public uint SetShaderLocalMemoryThrottledC; + public int SetShaderLocalMemoryThrottledCMaxSmCount => (int)((SetShaderLocalMemoryThrottledC >> 0) & 0x1FF); + public fixed uint Reserved2FC[5]; + public uint SetSpaVersion; + public int SetSpaVersionMinor => (int)((SetSpaVersion >> 0) & 0xFF); + public int SetSpaVersionMajor => (int)((SetSpaVersion >> 8) & 0xFF); + public fixed uint Reserved314[123]; + public uint SetFalcon00; + public uint SetFalcon01; + public uint SetFalcon02; + public uint SetFalcon03; + public uint SetFalcon04; + public uint SetFalcon05; + public uint SetFalcon06; + public uint SetFalcon07; + public uint SetFalcon08; + public uint SetFalcon09; + public uint SetFalcon10; + public uint SetFalcon11; + public uint SetFalcon12; + public uint SetFalcon13; + public uint SetFalcon14; + public uint SetFalcon15; + public uint SetFalcon16; + public uint SetFalcon17; + public uint SetFalcon18; + public uint SetFalcon19; + public uint SetFalcon20; + public uint SetFalcon21; + public uint SetFalcon22; + public uint SetFalcon23; + public uint SetFalcon24; + public uint SetFalcon25; + public uint SetFalcon26; + public uint SetFalcon27; + public uint SetFalcon28; + public uint SetFalcon29; + public uint SetFalcon30; + public uint SetFalcon31; + public fixed uint Reserved580[127]; + public uint SetShaderLocalMemoryWindow; + public fixed uint Reserved780[4]; + public uint SetShaderLocalMemoryA; + public int SetShaderLocalMemoryAAddressUpper => (int)((SetShaderLocalMemoryA >> 0) & 0xFF); + public uint SetShaderLocalMemoryB; + public fixed uint Reserved798[383]; + public uint SetShaderCacheControl; + public bool SetShaderCacheControlIcachePrefetchEnable => (SetShaderCacheControl & 0x1) != 0; + public fixed uint ReservedD98[19]; + public uint SetSmTimeoutInterval; + public int SetSmTimeoutIntervalCounterBit => (int)((SetSmTimeoutInterval >> 0) & 0x3F); + public fixed uint ReservedDE8[87]; + public uint SetSpareNoop12; + public uint SetSpareNoop13; + public uint SetSpareNoop14; + public uint SetSpareNoop15; + public fixed uint ReservedF54[59]; + public uint SetSpareNoop00; + public uint SetSpareNoop01; + public uint SetSpareNoop02; + public uint SetSpareNoop03; + public uint SetSpareNoop04; + public uint SetSpareNoop05; + public uint SetSpareNoop06; + public uint SetSpareNoop07; + public uint SetSpareNoop08; + public uint SetSpareNoop09; + public uint SetSpareNoop10; + public uint SetSpareNoop11; + public fixed uint Reserved1070[103]; + public uint InvalidateSamplerCacheAll; + public bool InvalidateSamplerCacheAllV => (InvalidateSamplerCacheAll & 0x1) != 0; + public uint InvalidateTextureHeaderCacheAll; + public bool InvalidateTextureHeaderCacheAllV => (InvalidateTextureHeaderCacheAll & 0x1) != 0; + public fixed uint Reserved1214[29]; + public uint InvalidateTextureDataCacheNoWfi; + public InvalidateCacheLines InvalidateTextureDataCacheNoWfiLines => (InvalidateCacheLines)((InvalidateTextureDataCacheNoWfi >> 0) & 0x1); + public int InvalidateTextureDataCacheNoWfiTag => (int)((InvalidateTextureDataCacheNoWfi >> 4) & 0x3FFFFF); + public fixed uint Reserved128C[7]; + public uint ActivatePerfSettingsForComputeContext; + public bool ActivatePerfSettingsForComputeContextAll => (ActivatePerfSettingsForComputeContext & 0x1) != 0; + public fixed uint Reserved12AC[33]; + public uint InvalidateSamplerCache; + public InvalidateCacheLines InvalidateSamplerCacheLines => (InvalidateCacheLines)((InvalidateSamplerCache >> 0) & 0x1); + public int InvalidateSamplerCacheTag => (int)((InvalidateSamplerCache >> 4) & 0x3FFFFF); + public uint InvalidateTextureHeaderCache; + public InvalidateCacheLines InvalidateTextureHeaderCacheLines => (InvalidateCacheLines)((InvalidateTextureHeaderCache >> 0) & 0x1); + public int InvalidateTextureHeaderCacheTag => (int)((InvalidateTextureHeaderCache >> 4) & 0x3FFFFF); + public uint InvalidateTextureDataCache; + public InvalidateCacheLines InvalidateTextureDataCacheLines => (InvalidateCacheLines)((InvalidateTextureDataCache >> 0) & 0x1); + public int InvalidateTextureDataCacheTag => (int)((InvalidateTextureDataCache >> 4) & 0x3FFFFF); + public fixed uint Reserved133C[58]; + public uint InvalidateSamplerCacheNoWfi; + public InvalidateCacheLines InvalidateSamplerCacheNoWfiLines => (InvalidateCacheLines)((InvalidateSamplerCacheNoWfi >> 0) & 0x1); + public int InvalidateSamplerCacheNoWfiTag => (int)((InvalidateSamplerCacheNoWfi >> 4) & 0x3FFFFF); + public fixed uint Reserved1428[64]; + public uint SetShaderExceptions; + public bool SetShaderExceptionsEnable => (SetShaderExceptions & 0x1) != 0; + public fixed uint Reserved152C[9]; + public uint SetRenderEnableA; + public int SetRenderEnableAOffsetUpper => (int)((SetRenderEnableA >> 0) & 0xFF); + public uint SetRenderEnableB; + public uint SetRenderEnableC; + public int SetRenderEnableCMode => (int)((SetRenderEnableC >> 0) & 0x7); + public uint SetTexSamplerPoolA; + public int SetTexSamplerPoolAOffsetUpper => (int)((SetTexSamplerPoolA >> 0) & 0xFF); + public uint SetTexSamplerPoolB; + public uint SetTexSamplerPoolC; + public int SetTexSamplerPoolCMaximumIndex => (int)((SetTexSamplerPoolC >> 0) & 0xFFFFF); + public fixed uint Reserved1568[3]; + public uint SetTexHeaderPoolA; + public int SetTexHeaderPoolAOffsetUpper => (int)((SetTexHeaderPoolA >> 0) & 0xFF); + public uint SetTexHeaderPoolB; + public uint SetTexHeaderPoolC; + public int SetTexHeaderPoolCMaximumIndex => (int)((SetTexHeaderPoolC >> 0) & 0x3FFFFF); + public fixed uint Reserved1580[34]; + public uint SetProgramRegionA; + public int SetProgramRegionAAddressUpper => (int)((SetProgramRegionA >> 0) & 0xFF); + public uint SetProgramRegionB; + public fixed uint Reserved1610[34]; + public uint InvalidateShaderCachesNoWfi; + public bool InvalidateShaderCachesNoWfiInstruction => (InvalidateShaderCachesNoWfi & 0x1) != 0; + public bool InvalidateShaderCachesNoWfiGlobalData => (InvalidateShaderCachesNoWfi & 0x10) != 0; + public bool InvalidateShaderCachesNoWfiConstant => (InvalidateShaderCachesNoWfi & 0x1000) != 0; + public fixed uint Reserved169C[170]; + public uint SetRenderEnableOverride; + public SetRenderEnableOverrideMode SetRenderEnableOverrideMode => (SetRenderEnableOverrideMode)((SetRenderEnableOverride >> 0) & 0x3); + public fixed uint Reserved1948[57]; + public uint PipeNop; + public uint SetSpare00; + public uint SetSpare01; + public uint SetSpare02; + public uint SetSpare03; + public fixed uint Reserved1A40[48]; + public uint SetReportSemaphoreA; + public int SetReportSemaphoreAOffsetUpper => (int)((SetReportSemaphoreA >> 0) & 0xFF); + public uint SetReportSemaphoreB; + public uint SetReportSemaphoreC; + public uint SetReportSemaphoreD; + public SetReportSemaphoreDOperation SetReportSemaphoreDOperation => (SetReportSemaphoreDOperation)((SetReportSemaphoreD >> 0) & 0x3); + public bool SetReportSemaphoreDAwakenEnable => (SetReportSemaphoreD & 0x100000) != 0; + public SetReportSemaphoreDStructureSize SetReportSemaphoreDStructureSize => (SetReportSemaphoreDStructureSize)((SetReportSemaphoreD >> 28) & 0x1); + public bool SetReportSemaphoreDFlushDisable => (SetReportSemaphoreD & 0x4) != 0; + public bool SetReportSemaphoreDReductionEnable => (SetReportSemaphoreD & 0x8) != 0; + public SetReportSemaphoreDReductionOp SetReportSemaphoreDReductionOp => (SetReportSemaphoreDReductionOp)((SetReportSemaphoreD >> 9) & 0x7); + public SetReportSemaphoreDReductionFormat SetReportSemaphoreDReductionFormat => (SetReportSemaphoreDReductionFormat)((SetReportSemaphoreD >> 17) & 0x3); + public fixed uint Reserved1B10[702]; + public uint SetBindlessTexture; + public int SetBindlessTextureConstantBufferSlotSelect => (int)((SetBindlessTexture >> 0) & 0x7); + public uint SetTrapHandler; + public fixed uint Reserved2610[843]; + public Array8<uint> SetShaderPerformanceCounterValueUpper; + public Array8<uint> SetShaderPerformanceCounterValue; + public Array8<uint> SetShaderPerformanceCounterEvent; + public int SetShaderPerformanceCounterEventEvent(int i) => (int)((SetShaderPerformanceCounterEvent[i] >> 0) & 0xFF); + public Array8<uint> SetShaderPerformanceCounterControlA; + public int SetShaderPerformanceCounterControlAEvent0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 0) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect0(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 2) & 0x7); + public int SetShaderPerformanceCounterControlAEvent1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 5) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect1(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 7) & 0x7); + public int SetShaderPerformanceCounterControlAEvent2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 10) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect2(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 12) & 0x7); + public int SetShaderPerformanceCounterControlAEvent3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 15) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect3(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 17) & 0x7); + public int SetShaderPerformanceCounterControlAEvent4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 20) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect4(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 22) & 0x7); + public int SetShaderPerformanceCounterControlAEvent5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 25) & 0x3); + public int SetShaderPerformanceCounterControlABitSelect5(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 27) & 0x7); + public int SetShaderPerformanceCounterControlASpare(int i) => (int)((SetShaderPerformanceCounterControlA[i] >> 30) & 0x3); + public Array8<uint> SetShaderPerformanceCounterControlB; + public bool SetShaderPerformanceCounterControlBEdge(int i) => (SetShaderPerformanceCounterControlB[i] & 0x1) != 0; + public int SetShaderPerformanceCounterControlBMode(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 1) & 0x3); + public bool SetShaderPerformanceCounterControlBWindowed(int i) => (SetShaderPerformanceCounterControlB[i] & 0x8) != 0; + public int SetShaderPerformanceCounterControlBFunc(int i) => (int)((SetShaderPerformanceCounterControlB[i] >> 4) & 0xFFFF); + public uint SetShaderPerformanceCounterTrapControl; + public int SetShaderPerformanceCounterTrapControlMask => (int)((SetShaderPerformanceCounterTrapControl >> 0) & 0xFF); + public uint StartShaderPerformanceCounter; + public int StartShaderPerformanceCounterCounterMask => (int)((StartShaderPerformanceCounter >> 0) & 0xFF); + public uint StopShaderPerformanceCounter; + public int StopShaderPerformanceCounterCounterMask => (int)((StopShaderPerformanceCounter >> 0) & 0xFF); + public fixed uint Reserved33E8[6]; + public MmeShadowScratch SetMmeShadowScratch; +#pragma warning restore CS0649 + } +} diff --git a/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs new file mode 100644 index 00000000..1b20e41c --- /dev/null +++ b/src/Ryujinx.Graphics.Gpu/Engine/Compute/ComputeQmd.cs @@ -0,0 +1,275 @@ +using Ryujinx.Graphics.Gpu.Engine.Types; +using System; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Gpu.Engine.Compute +{ + /// <summary> + /// Type of the dependent Queue Meta Data. + /// </summary> + enum DependentQmdType + { + Queue, + Grid + } + + /// <summary> + /// Type of the release memory barrier. + /// </summary> + enum ReleaseMembarType + { + FeNone, + FeSysmembar + } + + /// <summary> + /// Type of the CWD memory barrier. + /// </summary> + enum CwdMembarType + { + L1None, + L1Sysmembar, + L1Membar + } + + /// <summary> + /// NaN behavior of 32-bits float operations on the shader. + /// </summary> + enum Fp32NanBehavior + { + Legacy, + Fp64Compatible + } + + /// <summary> + /// NaN behavior of 32-bits float to integer conversion on the shader. + /// </summary> + enum Fp32F2iNanBehavior + { + PassZero, + PassIndefinite + } + + /// <summary> + /// Limit of calls. + /// </summary> + enum ApiVisibleCallLimit + { + _32, + NoCheck + } + + /// <summary> + /// Shared memory bank mapping mode. + /// </summary> + enum SharedMemoryBankMapping + { + FourBytesPerBank, + EightBytesPerBank + } + + /// <summary> + /// Denormal behavior of 32-bits float narrowing instructions. + /// </summary> + enum Fp32NarrowInstruction + { + KeepDenorms, + FlushDenorms + } + + /// <summary> + /// Configuration of the L1 cache. + /// </summary> + enum L1Configuration + { + DirectlyAddressableMemorySize16kb, + DirectlyAddressableMemorySize32kb, + DirectlyAddressableMemorySize48kb + } + + /// <summary> + /// Reduction operation. + /// </summary> + enum ReductionOp + { + RedAdd, + RedMin, + RedMax, + RedInc, + RedDec, + RedAnd, + RedOr, + RedXor + } + + /// <summary> + /// Reduction format. + /// </summary> + enum ReductionFormat + { + Unsigned32, + Signed32 + } + + /// <summary> + /// Size of a structure in words. + /// </summary> + enum StructureSize + { + FourWords, + OneWord + } + + /// <summary> + /// Compute Queue Meta Data. + /// </summary> + unsafe struct ComputeQmd + { + private fixed int _words[64]; + + public int OuterPut => BitRange(30, 0); + public bool OuterOverflow => Bit(31); + public int OuterGet => BitRange(62, 32); + public bool OuterStickyOverflow => Bit(63); + public int InnerGet => BitRange(94, 64); + public bool InnerOverflow => Bit(95); + public int InnerPut => BitRange(126, 96); + public bool InnerStickyOverflow => Bit(127); + public int QmdReservedAA => BitRange(159, 128); + public int DependentQmdPointer => BitRange(191, 160); + public int QmdGroupId => BitRange(197, 192); + public bool SmGlobalCachingEnable => Bit(198); + public bool RunCtaInOneSmPartition => Bit(199); + public bool IsQueue => Bit(200); + public bool AddToHeadOfQmdGroupLinkedList => Bit(201); + public bool SemaphoreReleaseEnable0 => Bit(202); + public bool SemaphoreReleaseEnable1 => Bit(203); + public bool RequireSchedulingPcas => Bit(204); + public bool DependentQmdScheduleEnable => Bit(205); + public DependentQmdType DependentQmdType => (DependentQmdType)BitRange(206, 206); + public bool DependentQmdFieldCopy => Bit(207); + public int QmdReservedB => BitRange(223, 208); + public int CircularQueueSize => BitRange(248, 224); + public bool QmdReservedC => Bit(249); + public bool InvalidateTextureHeaderCache => Bit(250); + public bool InvalidateTextureSamplerCache => Bit(251); + public bool InvalidateTextureDataCache => Bit(252); + public bool InvalidateShaderDataCache => Bit(253); + public bool InvalidateInstructionCache => Bit(254); + public bool InvalidateShaderConstantCache => Bit(255); + public int ProgramOffset => BitRange(287, 256); + public int CircularQueueAddrLower => BitRange(319, 288); + public int CircularQueueAddrUpper => BitRange(327, 320); + public int QmdReservedD => BitRange(335, 328); + public int CircularQueueEntrySize => BitRange(351, 336); + public int CwdReferenceCountId => BitRange(357, 352); + public int CwdReferenceCountDeltaMinusOne => BitRange(365, 358); + public ReleaseMembarType ReleaseMembarType => (ReleaseMembarType)BitRange(366, 366); + public bool CwdReferenceCountIncrEnable => Bit(367); + public CwdMembarType CwdMembarType => (CwdMembarType)BitRange(369, 368); + public bool SequentiallyRunCtas => Bit(370); + public bool CwdReferenceCountDecrEnable => Bit(371); + public bool Throttled => Bit(372); + public Fp32NanBehavior Fp32NanBehavior => (Fp32NanBehavior)BitRange(376, 376); + public Fp32F2iNanBehavior Fp32F2iNanBehavior => (Fp32F2iNanBehavior)BitRange(377, 377); + public ApiVisibleCallLimit ApiVisibleCallLimit => (ApiVisibleCallLimit)BitRange(378, 378); + public SharedMemoryBankMapping SharedMemoryBankMapping => (SharedMemoryBankMapping)BitRange(379, 379); + public SamplerIndex SamplerIndex => (SamplerIndex)BitRange(382, 382); + public Fp32NarrowInstruction Fp32NarrowInstruction => (Fp32NarrowInstruction)BitRange(383, 383); + public int CtaRasterWidth => BitRange(415, 384); + public int CtaRasterHeight => BitRange(431, 416); + public int CtaRasterDepth => BitRange(447, 432); + public int CtaRasterWidthResume => BitRange(479, 448); + public int CtaRasterHeightResume => BitRange(495, 480); + public int CtaRasterDepthResume => BitRange(511, 496); + public int QueueEntriesPerCtaMinusOne => BitRange(518, 512); + public int CoalesceWaitingPeriod => BitRange(529, 522); + public int SharedMemorySize => BitRange(561, 544); + public int QmdReservedG => BitRange(575, 562); + public int QmdVersion => BitRange(579, 576); + public int QmdMajorVersion => BitRange(583, 580); + public int QmdReservedH => BitRange(591, 584); + public int CtaThreadDimension0 => BitRange(607, 592); + public int CtaThreadDimension1 => BitRange(623, 608); + public int CtaThreadDimension2 => BitRange(639, 624); + public bool ConstantBufferValid(int i) => Bit(640 + i * 1); + public int QmdReservedI => BitRange(668, 648); + public L1Configuration L1Configuration => (L1Configuration)BitRange(671, 669); + public int SmDisableMaskLower => BitRange(703, 672); + public int SmDisableMaskUpper => BitRange(735, 704); + public int Release0AddressLower => BitRange(767, 736); + public int Release0AddressUpper => BitRange(775, 768); + public int QmdReservedJ => BitRange(783, 776); + public ReductionOp Release0ReductionOp => (ReductionOp)BitRange(790, 788); + public bool QmdReservedK => Bit(791); + public ReductionFormat Release0ReductionFormat => (ReductionFormat)BitRange(793, 792); + public bool Release0ReductionEnable => Bit(794); + public StructureSize Release0StructureSize => (StructureSize)BitRange(799, 799); + public int Release0Payload => BitRange(831, 800); + public int Release1AddressLower => BitRange(863, 832); + public int Release1AddressUpper => BitRange(871, 864); + public int QmdReservedL => BitRange(879, 872); + public ReductionOp Release1ReductionOp => (ReductionOp)BitRange(886, 884); + public bool QmdReservedM => Bit(887); + public ReductionFormat Release1ReductionFormat => (ReductionFormat)BitRange(889, 888); + public bool Release1ReductionEnable => Bit(890); + public StructureSize Release1StructureSize => (StructureSize)BitRange(895, 895); + public int Release1Payload => BitRange(927, 896); + public int ConstantBufferAddrLower(int i) => BitRange(959 + i * 64, 928 + i * 64); + public int ConstantBufferAddrUpper(int i) => BitRange(967 + i * 64, 960 + i * 64); + public int ConstantBufferReservedAddr(int i) => BitRange(973 + i * 64, 968 + i * 64); + public bool ConstantBufferInvalidate(int i) => Bit(974 + i * 64); + public int ConstantBufferSize(int i) => BitRange(991 + i * 64, 975 + i * 64); + public int ShaderLocalMemoryLowSize => BitRange(1463, 1440); + public int QmdReservedN => BitRange(1466, 1464); + public int BarrierCount => BitRange(1471, 1467); + public int ShaderLocalMemoryHighSize => BitRange(1495, 1472); + public int RegisterCount => BitRange(1503, 1496); + public int ShaderLocalMemoryCrsSize => BitRange(1527, 1504); + public int SassVersion => BitRange(1535, 1528); + public int HwOnlyInnerGet => BitRange(1566, 1536); + public bool HwOnlyRequireSchedulingPcas => Bit(1567); + public int HwOnlyInnerPut => BitRange(1598, 1568); + public bool HwOnlyScgType => Bit(1599); + public int HwOnlySpanListHeadIndex => BitRange(1629, 1600); + public bool QmdReservedQ => Bit(1630); + public bool HwOnlySpanListHeadIndexValid => Bit(1631); + public int HwOnlySkedNextQmdPointer => BitRange(1663, 1632); + public int QmdSpareE => BitRange(1695, 1664); + public int QmdSpareF => BitRange(1727, 1696); + public int QmdSpareG => BitRange(1759, 1728); + public int QmdSpareH => BitRange(1791, 1760); + public int QmdSpareI => BitRange(1823, 1792); + public int QmdSpareJ => BitRange(1855, 1824); + public int QmdSpareK => BitRange(1887, 1856); + public int QmdSpareL => BitRange(1919, 1888); + public int QmdSpareM => BitRange(1951, 1920); + public int QmdSpareN => BitRange(1983, 1952); + public int DebugIdUpper => BitRange(2015, 1984); + public int DebugIdLower => BitRange(2047, 2016); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private bool Bit(int bit) + { + if ((uint)bit >= 64 * 32) + { + throw new ArgumentOutOfRangeException(nameof(bit)); + } + + return (_words[bit >> 5] & (1 << (bit & 31))) != 0; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private int BitRange(int upper, int lower) + { + if ((uint)lower >= 64 * 32) + { + throw new ArgumentOutOfRangeException(nameof(lower)); + } + + int mask = (int)(uint.MaxValue >> (32 - (upper - lower + 1))); + + return (_words[lower >> 5] >> (lower & 31)) & mask; + } + } +}
\ No newline at end of file |
