diff options
| author | gdkchan <gab.dark.100@gmail.com> | 2023-06-03 20:12:18 -0300 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-06-03 20:12:18 -0300 |
| commit | 21c9ac6240a3db3300143d1d0dd4a1070d4f576f (patch) | |
| tree | 1d3fbafa1861368efe7cf8c923752cb0b621f717 /src/Ryujinx.Graphics.Shader/Translation | |
| parent | 81c9052847f1aa4a70010fefa8e6ee38b5ace612 (diff) | |
Implement shader storage buffer operations using new Load/Store instructions (#4993)
* Implement storage buffer operations using new Load/Store instruction
* Extend GenerateMultiTargetStorageOp to also match access with constant offset, and log and comments
* Remove now unused code
* Catch more complex cases of global memory usage
* Shader cache version bump
* Extend global access elimination to work with more shared memory cases
* Change alignment requirement from 16 bytes to 8 bytes, handle cases where we need more than 16 storage buffers
* Tweak preferencing to catch more cases
* Enable CB0 elimination even when host storage buffer alignment is > 16 (for Intel)
* Fix storage buffer bindings
* Simplify some code
* Shader cache version bump
* Fix typo
* Extend global memory elimination to handle shared memory with multiple possible offsets and local memory
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Translation')
12 files changed, 1168 insertions, 697 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index 6d4104ce..be0cba80 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -57,6 +57,56 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c); } + public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicAdd, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicAnd, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand compare, Operand value) + { + return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), Const(binding), e0, e1, compare, value); + } + + public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicMinS32, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicMinU32, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicOr, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicSwap, storageKind, Local(), Const(binding), e0, e1, value); + } + + public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.AtomicXor, storageKind, Local(), Const(binding), e0, e1, value); + } + public static Operand Ballot(this EmitterContext context, Operand a) { return context.Add(Instruction.Ballot, Local(), a); @@ -554,6 +604,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(fpType | Instruction.IsNan, Local(), a); } + public static Operand Load(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1) + { + return context.Add(Instruction.Load, storageKind, Local(), e0, e1); + } + public static Operand Load(this EmitterContext context, StorageKind storageKind, int binding) { return context.Add(Instruction.Load, storageKind, Local(), Const(binding)); @@ -606,11 +661,6 @@ namespace Ryujinx.Graphics.Shader.Translation : context.Load(storageKind, (int)ioVariable, arrayIndex, elemIndex); } - public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b) - { - return context.Add(Instruction.LoadGlobal, Local(), a, b); - } - public static Operand LoadLocal(this EmitterContext context, Operand a) { return context.Add(Instruction.LoadLocal, Local(), a); @@ -655,7 +705,6 @@ namespace Ryujinx.Graphics.Shader.Translation public static void Return(this EmitterContext context) { - context.PrepareForReturn(); context.Add(Instruction.Return); } @@ -699,6 +748,16 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c); } + public static Operand Store(this EmitterContext context, StorageKind storageKind, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.Store, storageKind, null, e0, e1, value); + } + + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand e1, Operand value) + { + return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, e1, value); + } + public static Operand Store( this EmitterContext context, StorageKind storageKind, @@ -738,21 +797,6 @@ namespace Ryujinx.Graphics.Shader.Translation : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value); } - public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c) - { - return context.Add(Instruction.StoreGlobal, null, a, b, c); - } - - public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c) - { - return context.Add(Instruction.StoreGlobal16, null, a, b, c); - } - - public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c) - { - return context.Add(Instruction.StoreGlobal8, null, a, b, c); - } - public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b) { return context.Add(Instruction.StoreLocal, null, a, b); diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs deleted file mode 100644 index a81d0fc4..00000000 --- a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs +++ /dev/null @@ -1,54 +0,0 @@ -using Ryujinx.Graphics.Shader.IntermediateRepresentation; - -namespace Ryujinx.Graphics.Shader.Translation -{ - static class GlobalMemory - { - private const int StorageDescsBaseOffset = 0x44; // In words. - - public const int StorageDescSize = 4; // In words. - public const int StorageMaxCount = 16; - - public const int StorageDescsSize = StorageDescSize * StorageMaxCount; - - public const int UbeBaseOffset = 0x98; // In words. - public const int UbeMaxCount = 9; - public const int UbeDescsSize = StorageDescSize * UbeMaxCount; - public const int UbeFirstCbuf = 8; - - public const int DriverReservedCb = 0; - - public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind) - { - return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) || - inst == Instruction.LoadGlobal || - inst == Instruction.StoreGlobal || - inst == Instruction.StoreGlobal16 || - inst == Instruction.StoreGlobal8; - } - - public static int GetStorageCbOffset(ShaderStage stage, int slot) - { - return GetStorageBaseCbOffset(stage) + slot * StorageDescSize; - } - - public static int GetStorageBaseCbOffset(ShaderStage stage) - { - return stage switch - { - ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize, - ShaderStage.Vertex => StorageDescsBaseOffset, - ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize, - ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize, - ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize, - ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize, - _ => 0 - }; - } - - public static int GetConstantUbeOffset(int slot) - { - return UbeBaseOffset + slot * StorageDescSize; - } - } -}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs index 206facd4..7dd267f3 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/HelperFunctionManager.cs @@ -19,6 +19,14 @@ namespace Ryujinx.Graphics.Shader.Translation _stage = stage; } + public int AddFunction(Function function) + { + int functionId = _functionList.Count; + _functionList.Add(function); + + return functionId; + } + public int GetOrCreateFunctionId(HelperFunctionName functionName) { if (_functionIds.TryGetValue(functionName, out int functionId)) @@ -27,8 +35,7 @@ namespace Ryujinx.Graphics.Shader.Translation } Function function = GenerateFunction(functionName); - functionId = _functionList.Count; - _functionList.Add(function); + functionId = AddFunction(function); _functionIds.Add(functionName, functionId); return functionId; diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs index 7758b4c6..14904b26 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -1,483 +1,1140 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; using System.Collections.Generic; +using System.Linq; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; -using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class GlobalToStorage { - private struct SearchResult + private const int DriverReservedCb = 0; + + enum LsMemoryType { - public static SearchResult NotFound => new SearchResult(-1, 0); - public bool Found => SbCbSlot != -1; - public int SbCbSlot { get; } - public int SbCbOffset { get; } + Local, + Shared + } - public SearchResult(int sbCbSlot, int sbCbOffset) + private class GtsContext + { + private struct Entry { - SbCbSlot = sbCbSlot; - SbCbOffset = sbCbOffset; + public readonly int FunctionId; + public readonly Instruction Inst; + public readonly StorageKind StorageKind; + public readonly bool IsMultiTarget; + public readonly IReadOnlyList<uint> TargetCbs; + + public Entry( + int functionId, + Instruction inst, + StorageKind storageKind, + bool isMultiTarget, + IReadOnlyList<uint> targetCbs) + { + FunctionId = functionId; + Inst = inst; + StorageKind = storageKind; + IsMultiTarget = isMultiTarget; + TargetCbs = targetCbs; + } } - } - public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask) - { - int sbStart = GetStorageBaseCbOffset(config.Stage); - int sbEnd = sbStart + StorageDescsSize; + private struct LsKey : IEquatable<LsKey> + { + public readonly Operand BaseOffset; + public readonly int ConstOffset; + public readonly LsMemoryType Type; - int ubeStart = UbeBaseOffset; - int ubeEnd = UbeBaseOffset + UbeDescsSize; + public LsKey(Operand baseOffset, int constOffset, LsMemoryType type) + { + BaseOffset = baseOffset; + ConstOffset = constOffset; + Type = type; + } - for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) - { - for (int index = 0; index < node.Value.SourcesCount; index++) + public override int GetHashCode() + { + return HashCode.Combine(BaseOffset, ConstOffset, Type); + } + + public override bool Equals(object obj) { - Operand src = node.Value.GetSource(index); + return obj is LsKey other && Equals(other); + } + + public bool Equals(LsKey other) + { + return other.BaseOffset == BaseOffset && other.ConstOffset == ConstOffset && other.Type == Type; + } + } + + private readonly List<Entry> _entries; + private readonly Dictionary<LsKey, Dictionary<uint, SearchResult>> _sharedEntries; + private readonly HelperFunctionManager _hfm; + + public GtsContext(HelperFunctionManager hfm) + { + _entries = new List<Entry>(); + _sharedEntries = new Dictionary<LsKey, Dictionary<uint, SearchResult>>(); + _hfm = hfm; + } + + public int AddFunction(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs, Function function) + { + int functionId = _hfm.AddFunction(function); - int storageIndex = GetStorageIndex(src, sbStart, sbEnd); + _entries.Add(new Entry(functionId, baseOp.Inst, baseOp.StorageKind, isMultiTarget, targetCbs)); - if (storageIndex >= 0) + return functionId; + } + + public bool TryGetFunctionId(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs, out int functionId) + { + foreach (Entry entry in _entries) + { + if (entry.Inst != baseOp.Inst || + entry.StorageKind != baseOp.StorageKind || + entry.IsMultiTarget != isMultiTarget || + entry.TargetCbs.Count != targetCbs.Count) { - sbUseMask |= 1 << storageIndex; + continue; } - if (config.Stage == ShaderStage.Compute) - { - int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd); + bool allEqual = true; - if (constantIndex >= 0) + for (int index = 0; index < targetCbs.Count; index++) + { + if (targetCbs[index] != entry.TargetCbs[index]) { - ubeUseMask |= 1 << constantIndex; + allEqual = false; + break; } } + + if (allEqual) + { + functionId = entry.FunctionId; + return true; + } + } + + functionId = -1; + return false; + } + + public void AddMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, uint targetCb, SearchResult result) + { + LsKey key = new LsKey(baseOffset, constOffset, type); + + if (!_sharedEntries.TryGetValue(key, out Dictionary<uint, SearchResult> targetCbs)) + { + // No entry with this base offset, create a new one. + + targetCbs = new Dictionary<uint, SearchResult>() { { targetCb, result } }; + + _sharedEntries.Add(key, targetCbs); } + else if (targetCbs.TryGetValue(targetCb, out SearchResult existingResult)) + { + // If our entry already exists, but does not match the new result, + // we set the offset to null to indicate there are multiple possible offsets. + // This will be used on the multi-target access that does not need to know the offset. - if (!(node.Value is Operation operation)) + if (existingResult.Offset != null && + (existingResult.Offset != result.Offset || + existingResult.ConstOffset != result.ConstOffset)) + { + targetCbs[targetCb] = new SearchResult(result.SbCbSlot, result.SbCbOffset); + } + } + else { - continue; + // An entry for this base offset already exists, but not for the specified + // constant buffer region where the storage buffer base address and size + // comes from. + + targetCbs.Add(targetCb, result); } + } + + public bool TryGetMemoryTargetCb(LsMemoryType type, Operand baseOffset, int constOffset, out SearchResult result) + { + LsKey key = new LsKey(baseOffset, constOffset, type); - if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) + if (_sharedEntries.TryGetValue(key, out Dictionary<uint, SearchResult> targetCbs) && targetCbs.Count == 1) { - Operand source = operation.GetSource(0); + SearchResult candidateResult = targetCbs.Values.First(); - var result = SearchForStorageBase(config, block, source); - if (!result.Found) + if (candidateResult.Found) + { + result = candidateResult; + + return true; + } + } + + result = default; + + return false; + } + } + + private struct SearchResult + { + public static SearchResult NotFound => new SearchResult(-1, 0); + public bool Found => SbCbSlot != -1; + public int SbCbSlot { get; } + public int SbCbOffset { get; } + public Operand Offset { get; } + public int ConstOffset { get; } + + public SearchResult(int sbCbSlot, int sbCbOffset) + { + SbCbSlot = sbCbSlot; + SbCbOffset = sbCbOffset; + } + + public SearchResult(int sbCbSlot, int sbCbOffset, Operand offset, int constOffset = 0) + { + SbCbSlot = sbCbSlot; + SbCbOffset = sbCbOffset; + Offset = offset; + ConstOffset = constOffset; + } + } + + public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config) + { + GtsContext gtsContext = new GtsContext(hfm); + + foreach (BasicBlock block in blocks) + { + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (!(node.Value is Operation operation)) { continue; } - if (config.Stage == ShaderStage.Compute && - operation.Inst == Instruction.LoadGlobal && - result.SbCbSlot == DriverReservedCb && - result.SbCbOffset >= UbeBaseOffset && - result.SbCbOffset < UbeBaseOffset + UbeDescsSize) + if (IsGlobalMemory(operation.StorageKind)) { - // Here we effectively try to replace a LDG instruction with LDC. - // The hardware only supports a limited amount of constant buffers - // so NVN "emulates" more constant buffers using global memory access. - // Here we try to replace the global access back to a constant buffer - // load. - node = ReplaceLdgWithLdc(node, config, (result.SbCbOffset - UbeBaseOffset) / StorageDescSize); + LinkedListNode<INode> nextNode = ReplaceGlobalMemoryWithStorage(gtsContext, config, block, node); + + if (nextNode == null) + { + // The returned value being null means that the global memory replacement failed, + // so we just make loads read 0 and stores do nothing. + + config.GpuAccessor.Log($"Failed to reserve storage buffer for global memory operation \"{operation.Inst}\"."); + + if (operation.Dest != null) + { + operation.TurnIntoCopy(Const(0)); + } + else + { + Utils.DeleteNode(node, operation); + } + } + else + { + node = nextNode; + } } - else + else if (operation.Inst == Instruction.StoreShared || operation.Inst == Instruction.StoreLocal) { - // Storage buffers are implemented using global memory access. - // If we know from where the base address of the access is loaded, - // we can guess which storage buffer it is accessing. - // We can then replace the global memory access with a storage - // buffer access. - node = ReplaceGlobalWithStorage(block, node, config, config.GetSbSlot((byte)result.SbCbSlot, (ushort)result.SbCbOffset)); + // The NVIDIA compiler can sometimes use shared or local memory as temporary + // storage to place the base address and size on, so we need + // to be able to find such information stored in memory too. + + if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand baseOffset, out int constOffset)) + { + Operand value = operation.GetSource(operation.SourcesCount - 1); + + var result = FindUniqueBaseAddressCb(gtsContext, block, value, needsOffset: false); + if (result.Found) + { + uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset); + gtsContext.AddMemoryTargetCb(type, baseOffset, constOffset, targetCb, result); + } + } } } } + } - config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); + private static bool IsGlobalMemory(StorageKind storageKind) + { + return storageKind == StorageKind.GlobalMemory || + storageKind == StorageKind.GlobalMemoryS8 || + storageKind == StorageKind.GlobalMemoryS16 || + storageKind == StorageKind.GlobalMemoryU8 || + storageKind == StorageKind.GlobalMemoryU16; } - private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex) + private static bool IsSmallInt(StorageKind storageKind) { - Operation operation = (Operation)node.Value; + return storageKind == StorageKind.GlobalMemoryS8 || + storageKind == StorageKind.GlobalMemoryS16 || + storageKind == StorageKind.GlobalMemoryU8 || + storageKind == StorageKind.GlobalMemoryU16; + } - bool isAtomic = operation.Inst.IsAtomic(); - bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; - bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; + private static LinkedListNode<INode> ReplaceGlobalMemoryWithStorage( + GtsContext gtsContext, + ShaderConfig config, + BasicBlock block, + LinkedListNode<INode> node) + { + Operation operation = node.Value as Operation; + Operand globalAddress = operation.GetSource(0); + SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, globalAddress, needsOffset: true); - config.SetUsedStorageBuffer(storageIndex, isWrite); + if (result.Found) + { + // We found the storage buffer that is being accessed. + // There are two possible paths here, if the operation is simple enough, + // we just generate the storage access code inline. + // Otherwise, we generate a function call (and the function if necessary). - Operand[] sources = new Operand[operation.SourcesCount]; + Operand offset = result.Offset; - sources[0] = Const(storageIndex); - sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8); + bool storageUnaligned = config.GpuAccessor.QueryHasUnalignedStorageBuffer(); - for (int index = 2; index < operation.SourcesCount; index++) - { - sources[index] = operation.GetSource(index); + if (storageUnaligned) + { + Operand baseAddress = Cbuf(result.SbCbSlot, result.SbCbOffset); + + Operand baseAddressMasked = Local(); + Operand hostOffset = Local(); + + int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment(); + + Operation maskOp = new Operation(Instruction.BitwiseAnd, baseAddressMasked, new[] { baseAddress, Const(-alignment) }); + Operation subOp = new Operation(Instruction.Subtract, hostOffset, new[] { globalAddress, baseAddressMasked }); + + node.List.AddBefore(node, maskOp); + node.List.AddBefore(node, subOp); + + offset = hostOffset; + } + else if (result.ConstOffset != 0) + { + Operand newOffset = Local(); + + Operation addOp = new Operation(Instruction.Add, newOffset, new[] { offset, Const(result.ConstOffset) }); + + node.List.AddBefore(node, addOp); + + offset = newOffset; + } + + if (CanUseInlineStorageOp(operation, config.Options.TargetLanguage)) + { + return GenerateInlineStorageOp(config, node, operation, offset, result); + } + else + { + if (!TryGenerateSingleTargetStorageOp(gtsContext, config, operation, result, out int functionId)) + { + return null; + } + + return GenerateCallStorageOp(node, operation, offset, functionId); + } } + else + { + // Failed to find the storage buffer directly. + // Try to walk through Phi chains and find all possible constant buffers where + // the base address might be stored. + // Generate a helper function that will check all possible storage buffers and use the right one. - Operation storageOp; + if (!TryGenerateMultiTargetStorageOp(gtsContext, config, block, operation, out int functionId)) + { + return null; + } + + return GenerateCallStorageOp(node, operation, null, functionId); + } + } - if (isAtomic) + private static bool CanUseInlineStorageOp(Operation operation, TargetLanguage targetLanguage) + { + if (operation.StorageKind != StorageKind.GlobalMemory) { - storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); + return false; } - else if (operation.Inst == Instruction.LoadGlobal) + + return (operation.Inst != Instruction.AtomicMaxS32 && + operation.Inst != Instruction.AtomicMinS32) || targetLanguage == TargetLanguage.Spirv; + } + + private static LinkedListNode<INode> GenerateInlineStorageOp( + ShaderConfig config, + LinkedListNode<INode> node, + Operation operation, + Operand offset, + SearchResult result) + { + bool isStore = operation.Inst == Instruction.Store || operation.Inst.IsAtomic(); + if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) { - storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); + return null; } - else + + Operand wordOffset = Local(); + + Operand[] sources; + + if (operation.Inst == Instruction.AtomicCompareAndSwap) { - Instruction storeInst = operation.Inst switch + sources = new Operand[] { - Instruction.StoreGlobal16 => Instruction.StoreStorage16, - Instruction.StoreGlobal8 => Instruction.StoreStorage8, - _ => Instruction.StoreStorage + Const(binding), + Const(0), + wordOffset, + operation.GetSource(operation.SourcesCount - 2), + operation.GetSource(operation.SourcesCount - 1) }; - - storageOp = new Operation(storeInst, null, sources); } - - for (int index = 0; index < operation.SourcesCount; index++) + else if (isStore) + { + sources = new Operand[] { Const(binding), Const(0), wordOffset, operation.GetSource(operation.SourcesCount - 1) }; + } + else { - operation.SetSource(index, null); + sources = new Operand[] { Const(binding), Const(0), wordOffset }; } - LinkedListNode<INode> oldNode = node; + Operation shiftOp = new Operation(Instruction.ShiftRightU32, wordOffset, new[] { offset, Const(2) }); + Operation storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); - node = node.List.AddBefore(node, storageOp); + node.List.AddBefore(node, shiftOp); + LinkedListNode<INode> newNode = node.List.AddBefore(node, storageOp); - node.List.Remove(oldNode); + Utils.DeleteNode(node, operation); - return node; + return newNode; } - private static Operand GetStorageOffset( - BasicBlock block, - LinkedListNode<INode> node, - ShaderConfig config, - int storageIndex, - Operand addrLow, - bool isStg16Or8) + private static LinkedListNode<INode> GenerateCallStorageOp(LinkedListNode<INode> node, Operation operation, Operand offset, int functionId) { - (int sbCbSlot, int sbCbOffset) = config.GetSbCbInfo(storageIndex); + // Generate call to a helper function that will perform the storage buffer operation. - bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment); + Operand[] sources = new Operand[operation.SourcesCount - 1 + (offset == null ? 2 : 1)]; - (Operand byteOffset, int constantOffset) = storageAligned ? - GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), sbCbSlot, sbCbOffset) : - (null, 0); + sources[0] = Const(functionId); - if (byteOffset != null) + if (offset != null) { - ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset); + // If the offset was supplised, we use that and skip the global address. + + sources[1] = offset; + + for (int srcIndex = 2; srcIndex < operation.SourcesCount; srcIndex++) + { + sources[srcIndex] = operation.GetSource(srcIndex); + } } + else + { + // Use the 64-bit global address which is split in 2 32-bit arguments. - if (byteOffset == null) + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + sources[srcIndex + 1] = operation.GetSource(srcIndex); + } + } + + bool returnsValue = operation.Dest != null; + Operand returnValue = returnsValue ? Local() : null; + + Operation callOp = new Operation(Instruction.Call, returnValue, sources); + + LinkedListNode<INode> newNode = node.List.AddBefore(node, callOp); + + if (returnsValue) { - Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset); - Operand baseAddrTrunc = Local(); + operation.TurnIntoCopy(returnValue); - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + return node; + } + else + { + Utils.DeleteNode(node, operation); - Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + return newNode; + } + } - node.List.AddBefore(node, andOp); + private static bool TryGenerateSingleTargetStorageOp( + GtsContext gtsContext, + ShaderConfig config, + Operation operation, + SearchResult result, + out int functionId) + { + List<uint> targetCbs = new List<uint>() { PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset) }; - Operand offset = Local(); - Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc); + if (gtsContext.TryGetFunctionId(operation, isMultiTarget: false, targetCbs, out functionId)) + { + return true; + } - node.List.AddBefore(node, subOp); + int inArgumentsCount = 1; - byteOffset = offset; + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inArgumentsCount = 3; } - else if (constantOffset != 0) + else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic()) { - Operand offset = Local(); - Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); + inArgumentsCount = 2; + } + + EmitterContext context = new EmitterContext(); + + Operand offset = Argument(0); + Operand compare = null; + Operand value = null; - node.List.AddBefore(node, addOp); + if (inArgumentsCount == 3) + { + compare = Argument(1); + value = Argument(2); + } + else if (inArgumentsCount == 2) + { + value = Argument(1); + } - byteOffset = offset; + if (!TryGenerateStorageOp( + config, + context, + operation.Inst, + operation.StorageKind, + offset, + compare, + value, + result, + out Operand resultValue)) + { + functionId = 0; + return false; } - if (isStg16Or8) + bool returnsValue = resultValue != null; + + if (returnsValue) { - return byteOffset; + context.Return(resultValue); + } + else + { + context.Return(); } - Operand wordOffset = Local(); - Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + string functionName = GetFunctionName(operation, isMultiTarget: false, targetCbs); - node.List.AddBefore(node, shrOp); + Function function = new Function( + ControlFlowGraph.Create(context.GetOperations()).Blocks, + functionName, + returnsValue, + inArgumentsCount, + 0); - return wordOffset; - } + functionId = gtsContext.AddFunction(operation, isMultiTarget: false, targetCbs, function); - private static bool IsCbOffset(Operand operand, int slot, int offset) - { - return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == slot && operand.GetCbufOffset() == offset; + return true; } - private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset) + private static bool TryGenerateMultiTargetStorageOp( + GtsContext gtsContext, + ShaderConfig config, + BasicBlock block, + Operation operation, + out int functionId) { - // When we emit 16/8-bit LDG, we add extra code to determine the address alignment. - // Eliminate the storage buffer base address from this too, leaving only the byte offset. + Queue<PhiNode> phis = new Queue<PhiNode>(); + HashSet<PhiNode> visited = new HashSet<PhiNode>(); + List<uint> targetCbs = new List<uint>(); + + Operand globalAddress = operation.GetSource(0); - foreach (INode useNode in address.UseOps) + if (globalAddress.AsgOp is Operation addOp && addOp.Inst == Instruction.Add) { - if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd) + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable) { - Operand src1 = op.GetSource(0); - Operand src2 = op.GetSource(1); + globalAddress = src2; + } + else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) + { + globalAddress = src1; + } + } + + if (globalAddress.AsgOp is PhiNode phi && visited.Add(phi)) + { + phis.Enqueue(phi); + } + else + { + SearchResult result = FindUniqueBaseAddressCb(gtsContext, block, operation.GetSource(0), needsOffset: false); - int addressIndex = -1; + if (result.Found) + { + targetCbs.Add(PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset)); + } + } - if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3) - { - addressIndex = 0; - } - else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3) - { - addressIndex = 1; - } + while (phis.TryDequeue(out phi)) + { + for (int srcIndex = 0; srcIndex < phi.SourcesCount; srcIndex++) + { + BasicBlock phiBlock = phi.GetBlock(srcIndex); + Operand phiSource = phi.GetSource(srcIndex); - if (addressIndex != -1) + SearchResult result = FindUniqueBaseAddressCb(gtsContext, phiBlock, phiSource, needsOffset: false); + + if (result.Found) { - LinkedListNode<INode> node = list.Find(op); + uint targetCb = PackCbSlotAndOffset(result.SbCbSlot, result.SbCbOffset); - // Add offset calculation before the use. Needs to be on the same block. - if (node != null) + if (!targetCbs.Contains(targetCb)) { - Operand offset = Local(); - Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); - list.AddBefore(node, addOp); - - op.SetSource(addressIndex, offset); + targetCbs.Add(targetCb); } } + else if (phiSource.AsgOp is PhiNode phi2 && visited.Add(phi2)) + { + phis.Enqueue(phi2); + } } } - } - private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int cbSlot, int baseAddressCbOffset) - { - if (IsCbOffset(address, cbSlot, baseAddressCbOffset)) + targetCbs.Sort(); + + if (targetCbs.Count == 0) { - // Direct offset: zero. - return (Const(0), 0); + config.GpuAccessor.Log($"Failed to find storage buffer for global memory operation \"{operation.Inst}\"."); } - (address, int constantOffset) = GetStorageConstantOffset(block, address); + if (gtsContext.TryGetFunctionId(operation, isMultiTarget: true, targetCbs, out functionId)) + { + return true; + } - address = Utils.FindLastOperation(address, block); + int inArgumentsCount = 2; - if (IsCbOffset(address, cbSlot, baseAddressCbOffset)) + if (operation.Inst == Instruction.AtomicCompareAndSwap) + { + inArgumentsCount = 4; + } + else if (operation.Inst == Instruction.Store || operation.Inst.IsAtomic()) { - // Only constant offset - return (Const(0), constantOffset); + inArgumentsCount = 3; } - if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) + EmitterContext context = new EmitterContext(); + + Operand globalAddressLow = Argument(0); + Operand globalAddressHigh = Argument(1); + + foreach (uint targetCb in targetCbs) { - return (null, 0); + (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb); + + Operand baseAddrLow = Cbuf(sbCbSlot, sbCbOffset); + Operand baseAddrHigh = Cbuf(sbCbSlot, sbCbOffset + 1); + Operand size = Cbuf(sbCbSlot, sbCbOffset + 2); + + Operand offset = context.ISubtract(globalAddressLow, baseAddrLow); + Operand borrow = context.ICompareLessUnsigned(globalAddressLow, baseAddrLow); + + Operand inRangeLow = context.ICompareLessUnsigned(offset, size); + + Operand addrHighBorrowed = context.IAdd(globalAddressHigh, borrow); + + Operand inRangeHigh = context.ICompareEqual(addrHighBorrowed, baseAddrHigh); + + Operand inRange = context.BitwiseAnd(inRangeLow, inRangeHigh); + + Operand lblSkip = Label(); + context.BranchIfFalse(lblSkip, inRange); + + Operand compare = null; + Operand value = null; + + if (inArgumentsCount == 4) + { + compare = Argument(2); + value = Argument(3); + } + else if (inArgumentsCount == 3) + { + value = Argument(2); + } + + SearchResult result = new SearchResult(sbCbSlot, sbCbOffset); + + int alignment = config.GpuAccessor.QueryHostStorageBufferOffsetAlignment(); + + Operand baseAddressMasked = context.BitwiseAnd(baseAddrLow, Const(-alignment)); + Operand hostOffset = context.ISubtract(globalAddressLow, baseAddressMasked); + + if (!TryGenerateStorageOp( + config, + context, + operation.Inst, + operation.StorageKind, + hostOffset, + compare, + value, + result, + out Operand resultValue)) + { + functionId = 0; + return false; + } + + if (resultValue != null) + { + context.Return(resultValue); + } + else + { + context.Return(); + } + + context.MarkLabel(lblSkip); } - Operand src1 = offsetAdd.GetSource(0); - Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block); + bool returnsValue = operation.Dest != null; - if (IsCbOffset(src2, cbSlot, baseAddressCbOffset)) + if (returnsValue) { - return (src1, constantOffset); + context.Return(Const(0)); } - else if (IsCbOffset(src1, cbSlot, baseAddressCbOffset)) + else { - return (src2, constantOffset); + context.Return(); } - return (null, 0); + string functionName = GetFunctionName(operation, isMultiTarget: true, targetCbs); + + Function function = new Function( + ControlFlowGraph.Create(context.GetOperations()).Blocks, + functionName, + returnsValue, + inArgumentsCount, + 0); + + functionId = gtsContext.AddFunction(operation, isMultiTarget: true, targetCbs, function); + + return true; } - private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address) + private static uint PackCbSlotAndOffset(int cbSlot, int cbOffset) { - if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) + return (uint)((ushort)cbSlot | ((ushort)cbOffset << 16)); + } + + private static (int, int) UnpackCbSlotAndOffset(uint packed) + { + return ((ushort)packed, (ushort)(packed >> 16)); + } + + private static string GetFunctionName(Operation baseOp, bool isMultiTarget, IReadOnlyList<uint> targetCbs) + { + string name = baseOp.Inst.ToString(); + + name += baseOp.StorageKind switch + { + StorageKind.GlobalMemoryS8 => "S8", + StorageKind.GlobalMemoryS16 => "S16", + StorageKind.GlobalMemoryU8 => "U8", + StorageKind.GlobalMemoryU16 => "U16", + _ => string.Empty + }; + + if (isMultiTarget) { - return (address, 0); + name += "Multi"; } - Operand src1 = offsetAdd.GetSource(0); - Operand src2 = offsetAdd.GetSource(1); - - if (src2.Type != OperandType.Constant) + foreach (uint targetCb in targetCbs) { - return (address, 0); + (int sbCbSlot, int sbCbOffset) = UnpackCbSlotAndOffset(targetCb); + + name += $"_c{sbCbSlot}o{sbCbOffset}"; } - return (src1, src2.Value); + return name; } - private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex) + private static bool TryGenerateStorageOp( + ShaderConfig config, + EmitterContext context, + Instruction inst, + StorageKind storageKind, + Operand offset, + Operand compare, + Operand value, + SearchResult result, + out Operand resultValue) { - Operation operation = (Operation)node.Value; + resultValue = null; + bool isStore = inst.IsAtomic() || inst == Instruction.Store; - Operand GetCbufOffset() + if (!config.ResourceManager.TryGetStorageBufferBinding(result.SbCbSlot, result.SbCbOffset, isStore, out int binding)) { - Operand addrLow = operation.GetSource(0); - - Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize); + return false; + } - Operand baseAddrTrunc = Local(); + Operand wordOffset = context.ShiftRightU32(offset, Const(2)); - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + if (inst.IsAtomic()) + { + if (IsSmallInt(storageKind)) + { + throw new NotImplementedException(); + } - Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + switch (inst) + { + case Instruction.AtomicAdd: + resultValue = context.AtomicAdd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicAnd: + resultValue = context.AtomicAnd(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicCompareAndSwap: + resultValue = context.AtomicCompareAndSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, compare, value); + break; + case Instruction.AtomicMaxS32: + if (config.Options.TargetLanguage == TargetLanguage.Spirv) + { + resultValue = context.AtomicMaxS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + else + { + resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.IMaximumS32(memValue, value); + }); + } + break; + case Instruction.AtomicMaxU32: + resultValue = context.AtomicMaxU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicMinS32: + if (config.Options.TargetLanguage == TargetLanguage.Spirv) + { + resultValue = context.AtomicMinS32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + else + { + resultValue = GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.IMinimumS32(memValue, value); + }); + } + break; + case Instruction.AtomicMinU32: + resultValue = context.AtomicMinU32(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicOr: + resultValue = context.AtomicOr(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicSwap: + resultValue = context.AtomicSwap(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + case Instruction.AtomicXor: + resultValue = context.AtomicXor(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + break; + } + } + else if (inst == Instruction.Store) + { + int bitSize = storageKind switch + { + StorageKind.GlobalMemoryS8 or + StorageKind.GlobalMemoryU8 => 8, + StorageKind.GlobalMemoryS16 or + StorageKind.GlobalMemoryU16 => 16, + _ => 32 + }; - node.List.AddBefore(node, andOp); + if (bitSize < 32) + { + Operand bitOffset = GetBitOffset(context, offset); - Operand byteOffset = Local(); - Operand wordOffset = Local(); + GenerateAtomicCasLoop(context, wordOffset, binding, (memValue) => + { + return context.BitfieldInsert(memValue, value, bitOffset, Const(bitSize)); + }); + } + else + { + context.Store(StorageKind.StorageBuffer, binding, Const(0), wordOffset, value); + } + } + else + { + value = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset); - Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc); - Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + if (IsSmallInt(storageKind)) + { + Operand bitOffset = GetBitOffset(context, offset); - node.List.AddBefore(node, subOp); - node.List.AddBefore(node, shrOp); + switch (storageKind) + { + case StorageKind.GlobalMemoryS8: + value = context.ShiftRightS32(value, bitOffset); + value = context.BitfieldExtractS32(value, Const(0), Const(8)); + break; + case StorageKind.GlobalMemoryS16: + value = context.ShiftRightS32(value, bitOffset); + value = context.BitfieldExtractS32(value, Const(0), Const(16)); + break; + case StorageKind.GlobalMemoryU8: + value = context.ShiftRightU32(value, bitOffset); + value = context.BitwiseAnd(value, Const(byte.MaxValue)); + break; + case StorageKind.GlobalMemoryU16: + value = context.ShiftRightU32(value, bitOffset); + value = context.BitwiseAnd(value, Const(ushort.MaxValue)); + break; + } + } - return wordOffset; + resultValue = value; } - Operand cbufOffset = GetCbufOffset(); - Operand vecIndex = Local(); - Operand elemIndex = Local(); - - node.List.AddBefore(node, new Operation(Instruction.ShiftRightU32, 0, vecIndex, cbufOffset, Const(2))); - node.List.AddBefore(node, new Operation(Instruction.BitwiseAnd, 0, elemIndex, cbufOffset, Const(3))); + return true; + } - Operand[] sources = new Operand[4]; + private static Operand GetBitOffset(EmitterContext context, Operand offset) + { + return context.ShiftLeft(context.BitwiseAnd(offset, Const(3)), Const(3)); + } - int cbSlot = UbeFirstCbuf + storageIndex; + private static Operand GenerateAtomicCasLoop(EmitterContext context, Operand wordOffset, int binding, Func<Operand, Operand> opCallback) + { + Operand lblLoopHead = Label(); - sources[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot)); - sources[1] = Const(0); - sources[2] = vecIndex; - sources[3] = elemIndex; + context.MarkLabel(lblLoopHead); - Operation ldcOp = new Operation(Instruction.Load, StorageKind.ConstantBuffer, operation.Dest, sources); + Operand oldValue = context.Load(StorageKind.StorageBuffer, binding, Const(0), wordOffset); + Operand newValue = opCallback(oldValue); - for (int index = 0; index < operation.SourcesCount; index++) - { - operation.SetSource(index, null); - } + Operand casResult = context.AtomicCompareAndSwap( + StorageKind.StorageBuffer, + binding, + Const(0), + wordOffset, + oldValue, + newValue); - LinkedListNode<INode> oldNode = node; + Operand casFail = context.ICompareNotEqual(casResult, oldValue); - node = node.List.AddBefore(node, ldcOp); + context.BranchIfTrue(lblLoopHead, casFail); - node.List.Remove(oldNode); - - return node; + return oldValue; } - private static SearchResult SearchForStorageBase(ShaderConfig config, BasicBlock block, Operand globalAddress) + private static SearchResult FindUniqueBaseAddressCb(GtsContext gtsContext, BasicBlock block, Operand globalAddress, bool needsOffset) { globalAddress = Utils.FindLastOperation(globalAddress, block); if (globalAddress.Type == OperandType.ConstantBuffer) { - return GetStorageIndex(config, globalAddress); + return GetBaseAddressCbWithOffset(globalAddress, Const(0), 0); } Operation operation = globalAddress.AsgOp as Operation; if (operation == null || operation.Inst != Instruction.Add) { - return SearchResult.NotFound; + return FindBaseAddressCbFromMemory(gtsContext, operation, 0, needsOffset); } Operand src1 = operation.GetSource(0); Operand src2 = operation.GetSource(1); + int constOffset = 0; + if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) || (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant)) { Operand baseAddr; + Operand offset; if (src1.Type == OperandType.LocalVariable) { baseAddr = Utils.FindLastOperation(src1, block); + offset = src2; } else { baseAddr = Utils.FindLastOperation(src2, block); + offset = src1; } - var result = GetStorageIndex(config, baseAddr); + var result = GetBaseAddressCbWithOffset(baseAddr, offset, 0); if (result.Found) { return result; } + constOffset = offset.Value; operation = baseAddr.AsgOp as Operation; if (operation == null || operation.Inst != Instruction.Add) { - return SearchResult.NotFound; + return FindBaseAddressCbFromMemory(gtsContext, operation, constOffset, needsOffset); } } - var selectedResult = SearchResult.NotFound; + src1 = operation.GetSource(0); + src2 = operation.GetSource(1); + + // If we have two possible results, we give preference to the ones from + // the driver reserved constant buffer, as those are the ones that + // contains the base address. - for (int index = 0; index < operation.SourcesCount; index++) + // If both are constant buffer, give preference to the second operand, + // because constant buffer are always encoded as the second operand, + // so the second operand will always be the one from the last instruction. + + if (src1.Type != OperandType.ConstantBuffer || + (src1.Type == OperandType.ConstantBuffer && src2.Type == OperandType.ConstantBuffer) || + (src2.Type == OperandType.ConstantBuffer && src2.GetCbufSlot() == DriverReservedCb)) { - Operand source = operation.GetSource(index); + return GetBaseAddressCbWithOffset(src2, src1, constOffset); + } - var result = GetStorageIndex(config, source); + return GetBaseAddressCbWithOffset(src1, src2, constOffset); + } - // If we already have a result, we give preference to the ones from - // the driver reserved constant buffer, as those are the ones that - // contains the base address. - if (result.Found && (!selectedResult.Found || result.SbCbSlot == GlobalMemory.DriverReservedCb)) + private static SearchResult FindBaseAddressCbFromMemory(GtsContext gtsContext, Operation operation, int constOffset, bool needsOffset) + { + if (operation != null) + { + if (TryGetMemoryOffsets(operation, out LsMemoryType type, out Operand bo, out int co) && + gtsContext.TryGetMemoryTargetCb(type, bo, co, out SearchResult result) && + (result.Offset != null || !needsOffset)) { - selectedResult = result; + if (constOffset != 0) + { + return new SearchResult( + result.SbCbSlot, + result.SbCbOffset, + result.Offset, + result.ConstOffset + constOffset); + } + + return result; } } - return selectedResult; + return SearchResult.NotFound; } - private static SearchResult GetStorageIndex(ShaderConfig config, Operand operand) + private static SearchResult GetBaseAddressCbWithOffset(Operand baseAddress, Operand offset, int constOffset) { - if (operand.Type == OperandType.ConstantBuffer) + if (baseAddress.Type == OperandType.ConstantBuffer) { - int slot = operand.GetCbufSlot(); - int offset = operand.GetCbufOffset(); + int sbCbSlot = baseAddress.GetCbufSlot(); + int sbCbOffset = baseAddress.GetCbufOffset(); - if ((offset & 3) == 0) + // We require the offset to be aligned to 1 word (64 bits), + // since the address size is 64-bit and the GPU only supports aligned memory access. + if ((sbCbOffset & 1) == 0) { - return new SearchResult(slot, offset); + return new SearchResult(sbCbSlot, sbCbOffset, offset, constOffset); } } return SearchResult.NotFound; } - private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd) + private static bool TryGetMemoryOffsets(Operation operation, out LsMemoryType type, out Operand baseOffset, out int constOffset) { - if (operand.Type == OperandType.ConstantBuffer) + baseOffset = null; + + if (operation.Inst == Instruction.LoadShared || operation.Inst == Instruction.StoreShared) + { + type = LsMemoryType.Shared; + return TryGetSharedMemoryOffsets(operation, out baseOffset, out constOffset); + } + else if (operation.Inst == Instruction.LoadLocal || operation.Inst == Instruction.StoreLocal) { - int slot = operand.GetCbufSlot(); - int offset = operand.GetCbufOffset(); + type = LsMemoryType.Local; + return TryGetLocalMemoryOffset(operation, out constOffset); + } - if (slot == 0 && offset >= sbStart && offset < sbEnd) - { - int storageIndex = (offset - sbStart) / StorageDescSize; + type = default; + constOffset = 0; + return false; + } + + private static bool TryGetSharedMemoryOffsets(Operation operation, out Operand baseOffset, out int constOffset) + { + baseOffset = null; + constOffset = 0; + + // The byte offset is right shifted by 2 to get the 32-bit word offset, + // so we want to get the byte offset back, since each one of those word + // offsets are a new "local variable" which will not match. - return storageIndex; + if (operation.GetSource(0).AsgOp is Operation shiftRightOp && + shiftRightOp.Inst == Instruction.ShiftRightU32 && + shiftRightOp.GetSource(1).Type == OperandType.Constant && + shiftRightOp.GetSource(1).Value == 2) + { + baseOffset = shiftRightOp.GetSource(0); + } + + // Check if we have a constant offset being added to the base offset. + + if (baseOffset?.AsgOp is Operation addOp && addOp.Inst == Instruction.Add) + { + Operand src1 = addOp.GetSource(0); + Operand src2 = addOp.GetSource(1); + + if (src1.Type == OperandType.Constant && src2.Type == OperandType.LocalVariable) + { + constOffset = src1.Value; + baseOffset = src2; } + else if (src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) + { + baseOffset = src1; + constOffset = src2.Value; + } + } + + return baseOffset != null && baseOffset.Type == OperandType.LocalVariable; + } + + private static bool TryGetLocalMemoryOffset(Operation operation, out int constOffset) + { + if (operation.GetSource(0).Type == OperandType.Constant) + { + constOffset = operation.GetSource(0).Value; + return true; } - return -1; + constOffset = 0; + return false; } } }
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs index b126e2c4..bdb3a62e 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -7,17 +7,15 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations { static class Optimizer { - public static void RunPass(BasicBlock[] blocks, ShaderConfig config) + public static void RunPass(HelperFunctionManager hfm, BasicBlock[] blocks, ShaderConfig config) { RunOptimizationPasses(blocks, config); - int sbUseMask = 0; - int ubeUseMask = 0; + GlobalToStorage.RunPass(hfm, blocks, config); // Those passes are looking for specific patterns and only needs to run once. for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) { - GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask); BindlessToIndexed.RunPass(blocks[blkIndex], config); BindlessElimination.RunPass(blocks[blkIndex], config); @@ -28,8 +26,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } - config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); - // Run optimizations one last time to remove any code that is now optimizable after above passes. RunOptimizationPasses(blocks, config); } diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs index 8d05f99a..9b78c8aa 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs @@ -13,7 +13,6 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations switch (operation.Inst) { case Instruction.Add: - case Instruction.BitwiseExclusiveOr: TryEliminateBinaryOpCommutative(operation, 0); break; @@ -21,6 +20,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations TryEliminateBitwiseAnd(operation); break; + case Instruction.BitwiseExclusiveOr: + if (!TryEliminateXorSwap(operation)) + { + TryEliminateBinaryOpCommutative(operation, 0); + } + break; + case Instruction.BitwiseOr: TryEliminateBitwiseOr(operation); break; @@ -49,8 +55,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations private static void TryEliminateBitwiseAnd(Operation operation) { // Try to recognize and optimize those 3 patterns (in order): - // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, - // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, + // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + Operand x = operation.GetSource(0); Operand y = operation.GetSource(1); @@ -68,11 +75,62 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations } } + private static bool TryEliminateXorSwap(Operation xCopyOp) + { + // Try to recognize XOR swap pattern: + // x = x ^ y + // y = x ^ y + // x = x ^ y + // Or, in SSA: + // x2 = x ^ y + // y2 = x2 ^ y + // x3 = x2 ^ y2 + // Transform it into something more sane: + // temp = y + // y = x + // x = temp + + // Note that because XOR is commutative, there are actually + // multiple possible combinations of this pattern, for + // simplicity this only catches one of them. + + Operand x = xCopyOp.GetSource(0); + Operand y = xCopyOp.GetSource(1); + + if (x.AsgOp is not Operation tCopyOp || tCopyOp.Inst != Instruction.BitwiseExclusiveOr || + y.AsgOp is not Operation yCopyOp || yCopyOp.Inst != Instruction.BitwiseExclusiveOr) + { + return false; + } + + if (tCopyOp == yCopyOp) + { + return false; + } + + if (yCopyOp.GetSource(0) != x || + yCopyOp.GetSource(1) != tCopyOp.GetSource(1) || + x.UseOps.Count != 2) + { + return false; + } + + x = tCopyOp.GetSource(0); + y = tCopyOp.GetSource(1); + + tCopyOp.TurnIntoCopy(y); // Temp = Y + yCopyOp.TurnIntoCopy(x); // Y = X + xCopyOp.TurnIntoCopy(tCopyOp.Dest); // X = Temp + + return true; + } + private static void TryEliminateBitwiseOr(Operation operation) { // Try to recognize and optimize those 3 patterns (in order): - // x | 0x00000000 == x, 0x00000000 | y == y, - // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + // x | 0x00000000 == x, 0x00000000 | y == y, + // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + Operand x = operation.GetSource(0); Operand y = operation.GetSource(1); diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs index a0d58d07..ffbd16f8 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs @@ -1,4 +1,5 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; namespace Ryujinx.Graphics.Shader.Translation.Optimizations { @@ -93,5 +94,17 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations return source; } + + public static void DeleteNode(LinkedListNode<INode> node, Operation operation) + { + node.List.Remove(node); + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + operation.SetSource(srcIndex, null); + } + + operation.Dest = null; + } } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index a2cfbe22..2d19a5a7 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -14,6 +14,11 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly string _stagePrefix; private readonly int[] _cbSlotToBindingMap; + private readonly int[] _sbSlotToBindingMap; + private uint _sbSlotWritten; + + private readonly Dictionary<int, int> _sbSlots; + private readonly Dictionary<int, int> _sbSlotsReverse; private readonly HashSet<int> _usedConstantBufferBindings; @@ -26,7 +31,12 @@ namespace Ryujinx.Graphics.Shader.Translation _stagePrefix = GetShaderStagePrefix(stage); _cbSlotToBindingMap = new int[18]; + _sbSlotToBindingMap = new int[16]; _cbSlotToBindingMap.AsSpan().Fill(-1); + _sbSlotToBindingMap.AsSpan().Fill(-1); + + _sbSlots = new Dictionary<int, int>(); + _sbSlotsReverse = new Dictionary<int, int>(); _usedConstantBufferBindings = new HashSet<int>(); @@ -47,6 +57,52 @@ namespace Ryujinx.Graphics.Shader.Translation return binding; } + public bool TryGetStorageBufferBinding(int sbCbSlot, int sbCbOffset, bool write, out int binding) + { + if (!TryGetSbSlot((byte)sbCbSlot, (ushort)sbCbOffset, out int slot)) + { + binding = 0; + return false; + } + + binding = _sbSlotToBindingMap[slot]; + + if (binding < 0) + { + binding = _gpuAccessor.QueryBindingStorageBuffer(slot); + _sbSlotToBindingMap[slot] = binding; + string slotNumber = slot.ToString(CultureInfo.InvariantCulture); + AddNewStorageBuffer(binding, $"{_stagePrefix}_s{slotNumber}"); + } + + if (write) + { + _sbSlotWritten |= 1u << slot; + } + + return true; + } + + private bool TryGetSbSlot(byte sbCbSlot, ushort sbCbOffset, out int slot) + { + int key = PackSbCbInfo(sbCbSlot, sbCbOffset); + + if (!_sbSlots.TryGetValue(key, out slot)) + { + slot = _sbSlots.Count; + + if (slot >= _sbSlotToBindingMap.Length) + { + return false; + } + + _sbSlots.Add(key, slot); + _sbSlotsReverse.Add(slot, key); + } + + return true; + } + public bool TryGetConstantBufferSlot(int binding, out int slot) { for (slot = 0; slot < _cbSlotToBindingMap.Length; slot++) @@ -90,6 +146,34 @@ namespace Ryujinx.Graphics.Shader.Translation return descriptors; } + public BufferDescriptor[] GetStorageBufferDescriptors() + { + var descriptors = new BufferDescriptor[_sbSlots.Count]; + + int descriptorIndex = 0; + + foreach ((int key, int slot) in _sbSlots) + { + int binding = _sbSlotToBindingMap[slot]; + + if (binding >= 0) + { + (int sbCbSlot, int sbCbOffset) = UnpackSbCbInfo(key); + descriptors[descriptorIndex++] = new BufferDescriptor(binding, slot, sbCbSlot, sbCbOffset) + { + Flags = (_sbSlotWritten & (1u << slot)) != 0 ? BufferUsageFlags.Write : BufferUsageFlags.None + }; + } + } + + if (descriptors.Length != descriptorIndex) + { + Array.Resize(ref descriptors, descriptorIndex); + } + + return descriptors; + } + private void AddNewConstantBuffer(int binding, string name) { StructureType type = new StructureType(new[] @@ -100,6 +184,16 @@ namespace Ryujinx.Graphics.Shader.Translation _properties.AddConstantBuffer(binding, new BufferDefinition(BufferLayout.Std140, 0, binding, name, type)); } + private void AddNewStorageBuffer(int binding, string name) + { + StructureType type = new StructureType(new[] + { + new StructureField(AggregateType.Array | AggregateType.U32, "data", 0) + }); + + _properties.AddStorageBuffer(binding, new BufferDefinition(BufferLayout.Std430, 1, binding, name, type)); + } + public static string GetShaderStagePrefix(ShaderStage stage) { uint index = (uint)stage; @@ -111,5 +205,15 @@ namespace Ryujinx.Graphics.Shader.Translation return _stagePrefixes[index]; } + + private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset) + { + return sbCbOffset | ((int)sbCbSlot << 16); + } + + private static (int, int) UnpackSbCbInfo(int key) + { + return ((byte)(key >> 16), (ushort)key); + } } }
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs index 866ae522..baa88251 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -2,10 +2,8 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation; using Ryujinx.Graphics.Shader.StructuredIr; using System.Collections.Generic; using System.Linq; -using System.Numerics; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; -using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; namespace Ryujinx.Graphics.Shader.Translation { @@ -23,11 +21,10 @@ namespace Ryujinx.Graphics.Shader.Translation { BasicBlock block = blocks[blkIndex]; - for (LinkedListNode<INode> node = block.Operations.First; node != null;) + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) { if (node.Value is not Operation operation) { - node = node.Next; continue; } @@ -56,8 +53,6 @@ namespace Ryujinx.Graphics.Shader.Translation InsertVectorComponentSelect(node, config); } - LinkedListNode<INode> nextNode = node.Next; - if (operation is TextureOperation texOp) { node = InsertTexelFetchScale(hfm, node, config); @@ -74,15 +69,7 @@ namespace Ryujinx.Graphics.Shader.Translation node = InsertSnormNormalization(node, config); } } - - nextNode = node.Next; - } - else if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) - { - nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode; } - - node = nextNode; } } } @@ -184,196 +171,6 @@ namespace Ryujinx.Graphics.Shader.Translation operation.TurnIntoCopy(result); } - private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config) - { - Operation operation = (Operation)node.Value; - - bool isAtomic = operation.Inst.IsAtomic(); - bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; - bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; - - Operation storageOp = null; - - Operand PrependOperation(Instruction inst, params Operand[] sources) - { - Operand local = Local(); - - node.List.AddBefore(node, new Operation(inst, local, sources)); - - return local; - } - - Operand PrependStorageOperation(Instruction inst, StorageKind storageKind, params Operand[] sources) - { - Operand local = Local(); - - node.List.AddBefore(node, new Operation(inst, storageKind, local, sources)); - - return local; - } - - Operand PrependExistingOperation(Operation operation) - { - Operand local = Local(); - - operation.Dest = local; - node.List.AddBefore(node, operation); - - return local; - } - - Operand addrLow = operation.GetSource(0); - Operand addrHigh = operation.GetSource(1); - - Operand sbBaseAddrLow = Const(0); - Operand sbSlot = Const(0); - - Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); - - Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow) - { - baseAddrLow = Cbuf(DriverReservedCb, cbOffset); - Operand baseAddrHigh = Cbuf(DriverReservedCb, cbOffset + 1); - Operand size = Cbuf(DriverReservedCb, cbOffset + 2); - - Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); - Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); - - Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size); - - Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow); - - Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh); - - return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh); - } - - int sbUseMask = config.AccessibleStorageBuffersMask; - - while (sbUseMask != 0) - { - int slot = BitOperations.TrailingZeroCount(sbUseMask); - - sbUseMask &= ~(1 << slot); - - int cbOffset = GetStorageCbOffset(config.Stage, slot); - slot = config.GetSbSlot(DriverReservedCb, (ushort)cbOffset); - - config.SetUsedStorageBuffer(slot, isWrite); - - Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); - - sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow); - sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot); - } - - if (config.AccessibleStorageBuffersMask != 0) - { - Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask); - Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc); - - Operand[] sources = new Operand[operation.SourcesCount]; - - sources[0] = sbSlot; - - if (isStg16Or8) - { - sources[1] = byteOffset; - } - else - { - sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2)); - } - - for (int index = 2; index < operation.SourcesCount; index++) - { - sources[index] = operation.GetSource(index); - } - - if (isAtomic) - { - storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); - } - else if (operation.Inst == Instruction.LoadGlobal) - { - storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); - } - else - { - Instruction storeInst = operation.Inst switch - { - Instruction.StoreGlobal16 => Instruction.StoreStorage16, - Instruction.StoreGlobal8 => Instruction.StoreStorage8, - _ => Instruction.StoreStorage - }; - - storageOp = new Operation(storeInst, null, sources); - } - } - else if (operation.Dest != null) - { - storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0)); - } - - if (operation.Inst == Instruction.LoadGlobal) - { - int cbeUseMask = config.AccessibleConstantBuffersMask; - - while (cbeUseMask != 0) - { - int slot = BitOperations.TrailingZeroCount(cbeUseMask); - int cbSlot = UbeFirstCbuf + slot; - - cbeUseMask &= ~(1 << slot); - - Operand previousResult = PrependExistingOperation(storageOp); - - int cbOffset = GetConstantUbeOffset(slot); - - Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); - - Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask); - Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst); - - Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2)); - Operand vecIndex = PrependOperation(Instruction.ShiftRightU32, cbIndex, Const(2)); - Operand elemIndex = PrependOperation(Instruction.BitwiseAnd, cbIndex, Const(3)); - - Operand[] sourcesCb = new Operand[4]; - - sourcesCb[0] = Const(config.ResourceManager.GetConstantBufferBinding(cbSlot)); - sourcesCb[1] = Const(0); - sourcesCb[2] = vecIndex; - sourcesCb[3] = elemIndex; - - Operand ldcResult = PrependStorageOperation(Instruction.Load, StorageKind.ConstantBuffer, sourcesCb); - - storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult); - } - } - - for (int index = 0; index < operation.SourcesCount; index++) - { - operation.SetSource(index, null); - } - - LinkedListNode<INode> oldNode = node; - LinkedList<INode> oldNodeList = oldNode.List; - - if (storageOp != null) - { - node = node.List.AddBefore(node, storageOp); - } - else - { - node = null; - } - - oldNodeList.Remove(oldNode); - - return node; - } - private static LinkedListNode<INode> InsertTexelFetchScale(HelperFunctionManager hfm, LinkedListNode<INode> node, ShaderConfig config) { TextureOperation texOp = (TextureOperation)node.Value; diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs index 40a32e2d..5c0a1fb6 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -110,12 +110,6 @@ namespace Ryujinx.Graphics.Shader.Translation public UInt128 NextInputAttributesComponents { get; private set; } public UInt128 ThisInputAttributesComponents { get; private set; } - public int AccessibleStorageBuffersMask { get; private set; } - public int AccessibleConstantBuffersMask { get; private set; } - - private int _usedStorageBuffers; - private int _usedStorageBuffersWrite; - private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format); private struct TextureMeta @@ -127,18 +121,9 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures; private readonly Dictionary<TextureInfo, TextureMeta> _usedImages; - - private readonly Dictionary<int, int> _sbSlots; - private readonly Dictionary<int, int> _sbSlotsReverse; - - private BufferDescriptor[] _cachedStorageBufferDescriptors; private TextureDescriptor[] _cachedTextureDescriptors; private TextureDescriptor[] _cachedImageDescriptors; - private int _firstStorageBufferBinding; - - public int FirstStorageBufferBinding => _firstStorageBufferBinding; - public ShaderConfig(ShaderStage stage, IGpuAccessor gpuAccessor, TranslationOptions options) { Stage = stage; @@ -147,18 +132,12 @@ namespace Ryujinx.Graphics.Shader.Translation _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>(); - AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1; - AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1; - UsedInputAttributesPerPatch = new HashSet<int>(); UsedOutputAttributesPerPatch = new HashSet<int>(); _usedTextures = new Dictionary<TextureInfo, TextureMeta>(); _usedImages = new Dictionary<TextureInfo, TextureMeta>(); - _sbSlots = new Dictionary<int, int>(); - _sbSlotsReverse = new Dictionary<int, int>(); - ResourceManager = new ResourceManager(stage, gpuAccessor, new ShaderProperties()); } @@ -173,11 +152,6 @@ namespace Ryujinx.Graphics.Shader.Translation OutputTopology = outputTopology; MaxOutputVertices = maxOutputVertices; TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled(); - - if (Stage != ShaderStage.Compute) - { - AccessibleConstantBuffersMask = 0; - } } public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(header.Stage, gpuAccessor, options) @@ -433,8 +407,6 @@ namespace Ryujinx.Graphics.Shader.Translation UsedInputAttributes |= other.UsedInputAttributes; UsedOutputAttributes |= other.UsedOutputAttributes; - _usedStorageBuffers |= other._usedStorageBuffers; - _usedStorageBuffersWrite |= other._usedStorageBuffersWrite; foreach (var kv in other._usedTextures) { @@ -634,23 +606,6 @@ namespace Ryujinx.Graphics.Shader.Translation UsedFeatures |= flags; } - public void SetAccessibleBufferMasks(int sbMask, int ubeMask) - { - AccessibleStorageBuffersMask = sbMask; - AccessibleConstantBuffersMask = ubeMask; - } - - public void SetUsedStorageBuffer(int slot, bool write) - { - int mask = 1 << slot; - _usedStorageBuffers |= mask; - - if (write) - { - _usedStorageBuffersWrite |= mask; - } - } - public void SetUsedTexture( Instruction inst, SamplerType type, @@ -756,76 +711,6 @@ namespace Ryujinx.Graphics.Shader.Translation return meta; } - public BufferDescriptor[] GetStorageBufferDescriptors() - { - if (_cachedStorageBufferDescriptors != null) - { - return _cachedStorageBufferDescriptors; - } - - return _cachedStorageBufferDescriptors = GetStorageBufferDescriptors( - _usedStorageBuffers, - _usedStorageBuffersWrite, - true, - out _firstStorageBufferBinding, - GpuAccessor.QueryBindingStorageBuffer); - } - - private BufferDescriptor[] GetStorageBufferDescriptors( - int usedMask, - int writtenMask, - bool isArray, - out int firstBinding, - Func<int, int> getBindingCallback) - { - firstBinding = 0; - bool hasFirstBinding = false; - var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)]; - - int lastSlot = -1; - - for (int i = 0; i < descriptors.Length; i++) - { - int slot = BitOperations.TrailingZeroCount(usedMask); - - if (isArray) - { - // The next array entries also consumes bindings, even if they are unused. - for (int j = lastSlot + 1; j < slot; j++) - { - int binding = getBindingCallback(j); - - if (!hasFirstBinding) - { - firstBinding = binding; - hasFirstBinding = true; - } - } - } - - lastSlot = slot; - - (int sbCbSlot, int sbCbOffset) = GetSbCbInfo(slot); - - descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot, sbCbSlot, sbCbOffset); - - if (!hasFirstBinding) - { - firstBinding = descriptors[i].Binding; - hasFirstBinding = true; - } - - if ((writtenMask & (1 << slot)) != 0) - { - descriptors[i].SetFlag(BufferUsageFlags.Write); - } - - usedMask &= ~(1 << slot); - } - - return descriptors; - } - public TextureDescriptor[] GetTextureDescriptors() { return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture); @@ -922,45 +807,11 @@ namespace Ryujinx.Graphics.Shader.Translation return FindDescriptorIndex(GetImageDescriptors(), texOp); } - public int GetSbSlot(byte sbCbSlot, ushort sbCbOffset) - { - int key = PackSbCbInfo(sbCbSlot, sbCbOffset); - - if (!_sbSlots.TryGetValue(key, out int slot)) - { - slot = _sbSlots.Count; - _sbSlots.Add(key, slot); - _sbSlotsReverse.Add(slot, key); - } - - return slot; - } - - public (int, int) GetSbCbInfo(int slot) - { - if (_sbSlotsReverse.TryGetValue(slot, out int key)) - { - return UnpackSbCbInfo(key); - } - - throw new ArgumentException($"Invalid slot {slot}.", nameof(slot)); - } - - private static int PackSbCbInfo(int sbCbSlot, int sbCbOffset) - { - return sbCbOffset | ((int)sbCbSlot << 16); - } - - private static (int, int) UnpackSbCbInfo(int key) - { - return ((byte)(key >> 16), (ushort)key); - } - public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None) { return new ShaderProgramInfo( ResourceManager.GetConstantBufferDescriptors(), - GetStorageBufferDescriptors(), + ResourceManager.GetStorageBufferDescriptors(), GetTextureDescriptors(), GetImageDescriptors(), identification, diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs index 867e2437..68400437 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs @@ -48,7 +48,7 @@ namespace Ryujinx.Graphics.Shader.Translation continue; } - if (IsResourceWrite(operation.Inst)) + if (IsResourceWrite(operation.Inst, operation.StorageKind)) { return false; } @@ -154,7 +154,7 @@ namespace Ryujinx.Graphics.Shader.Translation return totalVerticesCount + verticesCount == 3 && writesLayer; } - private static bool IsResourceWrite(Instruction inst) + private static bool IsResourceWrite(Instruction inst, StorageKind storageKind) { switch (inst) { @@ -170,13 +170,11 @@ namespace Ryujinx.Graphics.Shader.Translation case Instruction.AtomicXor: case Instruction.ImageAtomic: case Instruction.ImageStore: - case Instruction.StoreGlobal: - case Instruction.StoreGlobal16: - case Instruction.StoreGlobal8: - case Instruction.StoreStorage: - case Instruction.StoreStorage16: - case Instruction.StoreStorage8: return true; + case Instruction.Store: + return storageKind == StorageKind.StorageBuffer || + storageKind == StorageKind.SharedMemory || + storageKind == StorageKind.LocalMemory; } return false; diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs index 5bbc0009..c0212a5b 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -78,7 +78,7 @@ namespace Ryujinx.Graphics.Shader.Translation Ssa.Rename(cfg.Blocks); - Optimizer.RunPass(cfg.Blocks, config); + Optimizer.RunPass(hfm, cfg.Blocks, config); Rewriter.RunPass(hfm, cfg.Blocks, config); } |
