diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2023-06-15 17:31:53 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-06-15 17:31:53 -0300 |
| commit | f92921a6d118aa9c6acdb3ecaa3cd61a19fe341e (patch) | |
| tree | 6cba0d6ad1dc27df5750cf671cd75f709082203d /src/Ryujinx.Graphics.Shader/Translation | |
| parent | 32d21ddf17ff7d61d8185a79bec3f5d02706109b (diff) | |
Implement Load/Store Local/Shared and Atomic shared using new instructions (#5241)
* Implement Load/Store Local/Shared and Atomic shared using new instructions
* Remove now unused code
* Fix base offset register overwrite
* Fix missing storage buffer set index when generating GLSL for Vulkan
* Shader cache version bump
* Remove more unused code
* Some PR feedback
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Translation')
8 files changed, 264 insertions, 56 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index be0cba80..0ba26107 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -67,6 +67,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value); } + public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand compare, Operand value) + { + return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, compare, value); + } + public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value) { return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value); @@ -661,16 +666,6 @@ namespace Ryujinx.Graphics.Shader.Translation : context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex); } - public static Operand LoadLocal(this EmitterContext context, Operand a) - { - return context.Add(Instruction.LoadLocal, Local(), a); - } - - public static Operand LoadShared(this EmitterContext context, Operand a) - { - return context.Add(Instruction.LoadShared, Local(), a); - } - public static Operand MemoryBarrier(this EmitterContext context) { return context.Add(Instruction.MemoryBarrier); @@ -753,6 +748,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.Store, storageKind, null, e0, e1, value); } + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand value) + { + return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, value); + } + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) { return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value); @@ -797,26 +797,6 @@ namespace Ryujinx.Graphics.Shader.Translation : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value); } - public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.StoreLocal, null, a, b); - } - - public static Operand StoreShared(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.StoreShared, null, a, b); - } - - public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.StoreShared16, null, a, b); - } - - public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.StoreShared8, null, a, b); - } - public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a) { return UnpackDouble2x32(context, a, 1); diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs index 6958b86f..51a39682 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs @@ -9,13 +9,13 @@ namespace Ryujinx.Graphics.Shader.Translation class HelperFunctionManager { private readonly List<Function> _functionList; - private readonly Dictionary<HelperFunctionName, int> _functionIds; + private readonly Dictionary<int, int> _functionIds; private readonly ShaderStage _stage; public HelperFunctionManager(List<Function> functionList, ShaderStage stage) { _functionList = functionList; - _functionIds = new Dictionary<HelperFunctionName, int>(); + _functionIds = new Dictionary<int, int>(); _stage = stage; } @@ -29,14 +29,30 @@ namespace Ryujinx.Graphics.Shader.Translation public int GetOrCreateFunctionId(HelperFunctionName functionName) { - if (_functionIds.TryGetValue(functionName, out int functionId)) + if (_functionIds.TryGetValue((int)functionName, out int functionId)) { return functionId; } Function function = GenerateFunction(functionName); functionId = AddFunction(function); - _functionIds.Add(functionName, functionId); + _functionIds.Add((int)functionName, functionId); + + return functionId; + } + + public int GetOrCreateFunctionId(HelperFunctionName functionName, int id) + { + int key = (int)functionName | (id << 16); + + if (_functionIds.TryGetValue(key, out int functionId)) + { + return functionId; + } + + Function function = GenerateFunction(functionName, id); + functionId = AddFunction(function); + _functionIds.Add(key, functionId); return functionId; } @@ -140,6 +156,67 @@ namespace Ryujinx.Graphics.Shader.Translation return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, "ConvertFloatToDouble", false, 1, 2); } + private static Function GenerateFunction(HelperFunctionName functionName, int id) + { + return functionName switch + { + HelperFunctionName.SharedAtomicMaxS32 => GenerateSharedAtomicSigned(id, isMin: false), + HelperFunctionName.SharedAtomicMinS32 => GenerateSharedAtomicSigned(id, isMin: true), + HelperFunctionName.SharedStore8 => GenerateSharedStore8(id), + HelperFunctionName.SharedStore16 => GenerateSharedStore16(id), + _ => throw new ArgumentException($"Invalid function name {functionName}") + }; + } + + private static Function GenerateSharedAtomicSigned(int id, bool isMin) + { + EmitterContext context = new EmitterContext(); + + Operand wordOffset = Argument(0); + Operand value = Argument(1); + + Operand result = GenerateSharedAtomicCasLoop(context, wordOffset, id, (memValue) => + { + return isMin + ? context.IMinimumS32(memValue, value) + : context.IMaximumS32(memValue, value); + }); + + context.Return(result); + + return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, $"SharedAtomic{(isMin ? "Min" : "Max")}_{id}", true, 2, 0); + } + + private static Function GenerateSharedStore8(int id) + { + return GenerateSharedStore(id, 8); + } + + private static Function GenerateSharedStore16(int id) + { + return GenerateSharedStore(id, 16); + } + + private static Function GenerateSharedStore(int id, int bitSize) + { + EmitterContext context = new EmitterContext(); + + Operand offset = Argument(0); + Operand value = Argument(1); + + Operand wordOffset = context.ShiftRightU32(offset, Const(2)); + Operand bitOffset = GetBitOffset(context, offset); + + GenerateSharedAtomicCasLoop(context, wordOffset, id, (memValue) => + { + return context.BitfieldInsert(memValue, value, bitOffset, Const(bitSize)); + }); + + context.Return(); + + return new Function(ControlFlowGraph.Create(context.GetOperations()).Blocks, $"SharedStore{bitSize}_{id}", false, 2, 0); + } + private Function GenerateTexelFetchScaleFunction() { EmitterContext context = new EmitterContext(); @@ -226,5 +303,29 @@ namespace Ryujinx.Graphics.Shader.Translation return context.IAdd(Const(1), index); } } + + public static Operand GetBitOffset(EmitterContext context, Operand offset) + { + return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3)); + } + + private static Operand GenerateSharedAtomicCasLoop(EmitterContext context, Operand wordOffset, int id, Func<Operand, Operand> opCallback) + { + Operand lblLoopHead = Label(); + + context.MarkLabel(lblLoopHead); + + Operand oldValue = context.Load(StorageKind.SharedMemory, id, wordOffset); + Operand newValue = opCallback(oldValue); + + Operand casResult = context.AtomicCompareAndSwap(StorageKind.SharedMemory, id, wordOffset, oldValue, newValue); + + Operand casFail = context.ICompareNotEqual(casResult, oldValue); + + context.BranchIfTrue(lblLoopHead, casFail); + + return oldValue; + } + } }
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs index 8c37c34c..984f2d04 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionName.cs @@ -4,6 +4,10 @@ namespace Ryujinx.Graphics.Shader.Translation { ConvertDoubleToFloat, ConvertFloatToDouble, + SharedAtomicMaxS32, + SharedAtomicMinS32, + SharedStore8, + SharedStore16, TexelFetchScale, TextureSizeUnscale } diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index 14904b26..9d260c67 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -244,7 +244,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations node = nextNode; } } - else if (operation.Inst == Instruction.StoreShared || operation.Inst == Instruction.StoreLocal) + else if (operation.Inst == Instruction.Store && + (operation.StorageKind == StorageKind.SharedMemory || + operation.StorageKind == StorageKind.LocalMemory)) { // The NVIDIA compiler can sometimes use shared or local memory as temporary // storage to place the base address and size on, so we need @@ -874,7 +876,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (bitSize < 32) { - Operand bitOffset = GetBitOffset(context, offset); + Operand bitOffset = HelperFunctionManager.GetBitOffset(context, offset); GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => { @@ -892,7 +894,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations if (IsSmallInt(storageKind)) { - Operand bitOffset = GetBitOffset(context, offset); + Operand bitOffset = HelperFunctionManager.GetBitOffset(context, offset); switch (storageKind) { @@ -921,11 +923,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return true; } - private static Operand GetBitOffset(EmitterContext context, Operand offset) - { - return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3)); - } - private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func<Operand, Operand> opCallback) { Operand lblLoopHead = Label(); @@ -1070,15 +1067,18 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { baseOffset = null; - if (operation.Inst == Instruction.LoadShared || operation.Inst == Instruction.StoreShared) - { - type = LsMemoryType.Shared; - return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset); - } - else if (operation.Inst == Instruction.LoadLocal || operation.Inst == Instruction.StoreLocal) + if (operation.Inst == Instruction.Load || operation.Inst == Instruction.Store) { - type = LsMemoryType.Local; - return TryGetLocalMemoryOffset(operation, out constOffset); + if (operation.StorageKind == StorageKind.SharedMemory) + { + type = LsMemoryType.Shared; + return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset); + } + else if (operation.StorageKind == StorageKind.LocalMemory) + { + type = LsMemoryType.Local; + return TryGetLocalMemoryOffset(operation, out constOffset); + } } type = default; diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index 2d19a5a7..c58e4828 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -1,3 +1,4 @@ +using Ryujinx.Common; using Ryujinx.Graphics.Shader.StructuredIr; using System; using System.Collections.Generic; @@ -22,9 +23,12 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly HashSet<int> _usedConstantBufferBindings; + public int LocalMemoryId { get; } + public int SharedMemoryId { get; } + public ShaderProperties Properties => _properties; - public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties) + public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ShaderProperties properties, int localMemorySize) { _gpuAccessor = gpuAccessor; _properties = properties; @@ -41,6 +45,25 @@ namespace Ryujinx.Graphics.Shader.Translation _usedConstantBufferBindings = new HashSet<int>(); properties.AddConstantBuffer(0, new BufferDefinition(BufferLayout.Std140, 0, 0, "support_buffer", SupportBuffer.GetStructureType())); + + LocalMemoryId = -1; + SharedMemoryId = -1; + + if (localMemorySize != 0) + { + var lmem = new MemoryDefinition("local_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(localMemorySize, sizeof(uint))); + + LocalMemoryId = properties.AddLocalMemory(lmem); + } + + int sharedMemorySize = stage == ShaderStage.Compute ? gpuAccessor.QueryComputeSharedMemorySize() : 0; + + if (sharedMemorySize != 0) + { + var smem = new MemoryDefinition("shared_memory", AggregateType.Array | AggregateType.U32, BitUtils.DivRoundUp(sharedMemorySize, sizeof(uint))); + + SharedMemoryId = properties.AddSharedMemory(smem); + } } public int GetConstantBufferBinding(int slot) diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index baa88251..f5a524a0 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -1,6 +1,8 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation.Optimizations; using System.Collections.Generic; +using System.Diagnostics; using System.Linq; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; @@ -70,6 +72,15 @@ namespace Ryujinx.Graphics.Shader.Translation } } } + else + { + node = InsertSharedStoreSmallInt(hfm, node); + + if (config.Options.TargetLanguage != TargetLanguage.Spirv) + { + node = InsertSharedAtomicSigned(hfm, node); + } + } } } } @@ -171,6 +182,87 @@ namespace Ryujinx.Graphics.Shader.Translation operation.TurnIntoCopy(result); } + private static LinkedListNode<INode> InsertSharedStoreSmallInt(HelperFunctionManager hfm, LinkedListNode<INode> node) + { + Operation operation = (Operation)node.Value; + HelperFunctionName name; + + if (operation.StorageKind == StorageKind.SharedMemory8) + { + name = HelperFunctionName.SharedStore8; + } + else if (operation.StorageKind == StorageKind.SharedMemory16) + { + name = HelperFunctionName.SharedStore16; + } + else + { + return node; + } + + if (operation.Inst != Instruction.Store) + { + return node; + } + + Operand memoryId = operation.GetSource(0); + Operand byteOffset = operation.GetSource(1); + Operand value = operation.GetSource(2); + + Debug.Assert(memoryId.Type == OperandType.Constant); + + int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value); + + Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value }; + + LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, (Operand)null, callArgs)); + + Utils.DeleteNode(node, operation); + + return newNode; + } + + private static LinkedListNode<INode> InsertSharedAtomicSigned(HelperFunctionManager hfm, LinkedListNode<INode> node) + { + Operation operation = (Operation)node.Value; + HelperFunctionName name; + + if (operation.Inst == Instruction.AtomicMaxS32) + { + name = HelperFunctionName.SharedAtomicMaxS32; + } + else if (operation.Inst == Instruction.AtomicMinS32) + { + name = HelperFunctionName.SharedAtomicMinS32; + } + else + { + return node; + } + + if (operation.StorageKind != StorageKind.SharedMemory) + { + return node; + } + + Operand result = operation.Dest; + Operand memoryId = operation.GetSource(0); + Operand byteOffset = operation.GetSource(1); + Operand value = operation.GetSource(2); + + Debug.Assert(memoryId.Type == OperandType.Constant); + + int functionId = hfm.GetOrCreateFunctionId(name, memoryId.Value); + + Operand[] callArgs = new Operand[] { Const(functionId), byteOffset, value }; + + LinkedListNode<INode> newNode = node.List.AddBefore(node, new Operation(Instruction.Call, 0, result, callArgs)); + + Utils.DeleteNode(node, operation); + + return newNode; + } + private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config) { TextureOperation texOp = (TextureOperation)node.Value; diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 534bda70..fa125002 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -124,7 +124,7 @@ namespace Ryujinx.Graphics.Shader.Translation private TextureDescriptor[] _cachedTextureDescriptors; private TextureDescriptor[] _cachedImageDescriptors; - public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options) + public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options, int localMemorySize) { Stage = stage; GpuAccessor = gpuAccessor; @@ -143,7 +143,7 @@ namespace Ryujinx.Graphics.Shader.Translation _usedTextures = new Dictionary<TextureInfo, TextureMeta>(); _usedImages = new Dictionary<TextureInfo, TextureMeta>(); - ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties()); + ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties(), localMemorySize); if (!gpuAccessor.QueryHostSupportsTransformFeedback() && gpuAccessor.QueryTransformFeedbackEnabled()) { @@ -176,14 +176,17 @@ namespace Ryujinx.Graphics.Shader.Translation OutputTopology outputTopology, int maxOutputVertices, IGpuAccessor gpuAccessor, - TranslationOptions options) : this(stage, gpuAccessor, options) + TranslationOptions options) : this(stage, gpuAccessor, options, 0) { ThreadsPerInputPrimitive = 1; OutputTopology = outputTopology; MaxOutputVertices = maxOutputVertices; } - public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options) + public ShaderConfig( + ShaderHeader header, + IGpuAccessor gpuAccessor, + TranslationOptions options) : this(header.Stage, gpuAccessor, options, GetLocalMemorySize(header)) { GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough; ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive; @@ -197,6 +200,11 @@ namespace Ryujinx.Graphics.Shader.Translation LastInVertexPipeline = header.Stage < ShaderStage.Fragment; } + private static int GetLocalMemorySize(ShaderHeader header) + { + return header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); + } + private void EnsureTransformFeedbackInitialized() { if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null) diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs index c0212a5b..b44d6daa 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -107,7 +107,7 @@ namespace Ryujinx.Graphics.Shader.Translation if (options.Flags.HasFlag(TranslationFlags.Compute)) { - config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options); + config = new ShaderConfig(ShaderStage.Compute, gpuAccessor, options, gpuAccessor.QueryComputeLocalMemorySize()); program = Decoder.Decode(config, address); } |
