diff options
Diffstat (limited to 'Ryujinx.Graphics.Shader')
| -rw-r--r-- | Ryujinx.Graphics.Shader/Decoders/Block.cs | 36 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Shader/Decoders/Decoder.cs | 319 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Shader/IGpuAccessor.cs | 5 | ||||
| -rw-r--r-- | Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs | 6 |
4 files changed, 257 insertions, 109 deletions
diff --git a/Ryujinx.Graphics.Shader/Decoders/Block.cs b/Ryujinx.Graphics.Shader/Decoders/Block.cs index e1470237..69cb55b9 100644 --- a/Ryujinx.Graphics.Shader/Decoders/Block.cs +++ b/Ryujinx.Graphics.Shader/Decoders/Block.cs @@ -8,10 +8,38 @@ namespace Ryujinx.Graphics.Shader.Decoders public ulong Address { get; set; } public ulong EndAddress { get; set; } - public Block Next { get; set; } - public Block Branch { get; set; } + private Block _next; + private Block _branch; - public OpCodeBranchIndir BrIndir { get; set; } + public Block Next + { + get + { + return _next; + } + set + { + _next?.Predecessors.Remove(this); + value?.Predecessors.Add(this); + _next = value; + } + } + + public Block Branch + { + get + { + return _branch; + } + set + { + _branch?.Predecessors.Remove(this); + value?.Predecessors.Add(this); + _branch = value; + } + } + + public HashSet<Block> Predecessors { get; } public List<OpCode> OpCodes { get; } public List<OpCodePush> PushOpCodes { get; } @@ -20,6 +48,8 @@ namespace Ryujinx.Graphics.Shader.Decoders { Address = address; + Predecessors = new HashSet<Block>(); + OpCodes = new List<OpCode>(); PushOpCodes = new List<OpCodePush>(); } diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index 9ca58177..c916935e 100644 --- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -9,8 +9,6 @@ namespace Ryujinx.Graphics.Shader.Decoders { static class Decoder { - public const ulong ShaderEndDelimiter = 0xe2400fffff87000f; - public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless) { hasBindless = false; @@ -51,130 +49,139 @@ namespace Ryujinx.Graphics.Shader.Decoders GetBlock(funcAddress); - while (workQueue.TryDequeue(out Block currBlock)) + bool hasNewTarget; + + do { - // Check if the current block is inside another block. - if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex)) + while (workQueue.TryDequeue(out Block currBlock)) { - Block nBlock = blocks[nBlkIndex]; - - if (nBlock.Address == currBlock.Address) + // Check if the current block is inside another block. + if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex)) { - throw new InvalidOperationException("Found duplicate block address on the list."); - } + Block nBlock = blocks[nBlkIndex]; - nBlock.Split(currBlock); - blocks.Insert(nBlkIndex + 1, currBlock); - - continue; - } + if (nBlock.Address == currBlock.Address) + { + throw new InvalidOperationException("Found duplicate block address on the list."); + } - // If we have a block after the current one, set the limit address. - ulong limitAddress = ulong.MaxValue; + nBlock.Split(currBlock); + blocks.Insert(nBlkIndex + 1, currBlock); - if (nBlkIndex != blocks.Count) - { - Block nBlock = blocks[nBlkIndex]; + continue; + } - int nextIndex = nBlkIndex + 1; + // If we have a block after the current one, set the limit address. + ulong limitAddress = ulong.MaxValue; - if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count) + if (nBlkIndex != blocks.Count) { - limitAddress = blocks[nextIndex].Address; - } - else if (nBlock.Address > currBlock.Address) - { - limitAddress = blocks[nBlkIndex].Address; - } - } + Block nBlock = blocks[nBlkIndex]; - FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless); - hasBindless |= blockHasBindless; + int nextIndex = nBlkIndex + 1; - if (currBlock.OpCodes.Count != 0) - { - // We should have blocks for all possible branch targets, - // including those from SSY/PBK instructions. - foreach (OpCodePush pushOp in currBlock.PushOpCodes) - { - GetBlock(pushOp.GetAbsoluteAddress()); + if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count) + { + limitAddress = blocks[nextIndex].Address; + } + else if (nBlock.Address > currBlock.Address) + { + limitAddress = blocks[nBlkIndex].Address; + } } - // Set child blocks. "Branch" is the block the branch instruction - // points to (when taken), "Next" is the block at the next address, - // executed when the branch is not taken. For Unconditional Branches - // or end of program, Next is null. - OpCode lastOp = currBlock.GetLastOp(); + FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless); + hasBindless |= blockHasBindless; - if (lastOp is OpCodeBranch opBr) + if (currBlock.OpCodes.Count != 0) { - if (lastOp.Emitter == InstEmit.Cal) + // We should have blocks for all possible branch targets, + // including those from SSY/PBK instructions. + foreach (OpCodePush pushOp in currBlock.PushOpCodes) + { + GetBlock(pushOp.GetAbsoluteAddress()); + } + + // Set child blocks. "Branch" is the block the branch instruction + // points to (when taken), "Next" is the block at the next address, + // executed when the branch is not taken. For Unconditional Branches + // or end of program, Next is null. + OpCode lastOp = currBlock.GetLastOp(); + + if (lastOp is OpCodeBranch opBr) { - EnqueueFunction(opBr.GetAbsoluteAddress()); + if (lastOp.Emitter == InstEmit.Cal) + { + EnqueueFunction(opBr.GetAbsoluteAddress()); + } + else + { + currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress()); + } } - else + + if (!IsUnconditionalBranch(lastOp)) { - currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress()); + currBlock.Next = GetBlock(currBlock.EndAddress); } } - else if (lastOp is OpCodeBranchIndir opBrIndir) + + // Insert the new block on the list (sorted by address). + if (blocks.Count != 0) { - // An indirect branch could go anywhere, we don't know the target. - // Those instructions are usually used on a switch to jump table - // compiler optimization, and in those cases the possible targets - // seems to be always right after the BRX itself. We can assume - // that the possible targets are all the blocks in-between the - // instruction right after the BRX, and the common target that - // all the "cases" should eventually jump to, acting as the - // switch break. - Block firstTarget = GetBlock(currBlock.EndAddress); - - firstTarget.BrIndir = opBrIndir; - - opBrIndir.PossibleTargets.Add(firstTarget); - } + Block nBlock = blocks[nBlkIndex]; - if (!IsUnconditionalBranch(lastOp)) + blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock); + } + else { - currBlock.Next = GetBlock(currBlock.EndAddress); + blocks.Add(currBlock); } } - // Insert the new block on the list (sorted by address). - if (blocks.Count != 0) - { - Block nBlock = blocks[nBlkIndex]; - - blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock); - } - else + // Propagate SSY/PBK addresses into their uses (SYNC/BRK). + foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0)) { - blocks.Add(currBlock); + for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++) + { + PropagatePushOp(visited, block, pushOpIndex); + } } - // Do we have a block after the current one? - if (currBlock.BrIndir != null && HasBlockAfter(gpuAccessor, currBlock, startAddress)) + // Try to find target for BRX (indirect branch) instructions. + hasNewTarget = false; + + foreach (Block block in blocks) { - bool targetVisited = visited.ContainsKey(currBlock.EndAddress); + if (block.GetLastOp() is OpCodeBranchIndir opBrIndir && opBrIndir.PossibleTargets.Count == 0) + { + ulong baseOffset = opBrIndir.Address + 8 + (ulong)opBrIndir.Offset; - Block possibleTarget = GetBlock(currBlock.EndAddress); + // An indirect branch could go anywhere, + // try to get the possible target offsets from the constant buffer. + (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrIndir.Ra.Index); - currBlock.BrIndir.PossibleTargets.Add(possibleTarget); + if (cbOffsetsCount != 0) + { + hasNewTarget = true; + } - if (!targetVisited) - { - possibleTarget.BrIndir = currBlock.BrIndir; + for (int i = 0; i < cbOffsetsCount; i++) + { + uint targetOffset = gpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4); + Block target = GetBlock(baseOffset + targetOffset); + opBrIndir.PossibleTargets.Add(target); + target.Predecessors.Add(block); + } } } - } - foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0)) - { - for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++) - { - PropagatePushOp(visited, block, pushOpIndex); - } + // If we discovered new branch targets from the BRX instruction, + // we need another round of decoding to decode the new blocks. + // Additionally, we may have more SSY/PBK targets to propagate, + // and new BRX instructions. } + while (hasNewTarget); funcs.Add(blocks.ToArray()); } @@ -182,19 +189,6 @@ namespace Ryujinx.Graphics.Shader.Decoders return funcs.ToArray(); } - private static bool HasBlockAfter(IGpuAccessor gpuAccessor, Block currBlock, ulong startAdddress) - { - if (!gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress) || - !gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress + 7)) - { - return false; - } - - ulong inst = gpuAccessor.MemoryRead<ulong>(startAdddress + currBlock.EndAddress); - - return inst != 0UL && inst != ShaderEndDelimiter; - } - private static bool BinarySearch(List<Block> blocks, ulong address, out int index) { index = 0; @@ -320,6 +314,115 @@ namespace Ryujinx.Graphics.Shader.Decoders opCode is OpCodeExit; } + private static (int, int) FindBrxTargetRange(Block block, int brxReg) + { + // Try to match the following pattern: + // + // IMNMX.U32 Rx, Rx, UpperBound, PT + // SHL Rx, Rx, 0x2 + // LDC Rx, c[0x1][Rx+BaseOffset] + // + // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants. + // The above pattern is assumed to be generated by the compiler before BRX, + // as the instruction is usually used to implement jump tables for switch statement optimizations. + // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are + // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1. + + HashSet<Block> visited = new HashSet<Block>(); + + var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg); + if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index] is not OpCodeLdc opLdc) + { + return (0, 0); + } + + if (opLdc.Slot != 1 || opLdc.IndexMode != CbIndexMode.Default) + { + return (0, 0); + } + + var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.Ra.Index); + if (shlLocation.Block == null || shlLocation.Block.OpCodes[shlLocation.Index] is not OpCodeAluImm opShl) + { + return (0, 0); + } + + if (opShl.Emitter != InstEmit.Shl || opShl.Immediate != 2) + { + return (0, 0); + } + + var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.Ra.Index); + if (imnmxLocation.Block == null || imnmxLocation.Block.OpCodes[imnmxLocation.Index] is not OpCodeAluImm opImnmx) + { + return (0, 0); + } + + bool isImnmxS32 = opImnmx.RawOpCode.Extract(48); + + if (opImnmx.Emitter != InstEmit.Imnmx || isImnmxS32 || !opImnmx.Predicate39.IsPT || opImnmx.InvertP) + { + return (0, 0); + } + + return (opLdc.Offset, opImnmx.Immediate + 1); + } + + private struct BlockLocation + { + public Block Block { get; } + public int Index { get; } + + public BlockLocation(Block block, int index) + { + Block = block; + Index = index; + } + } + + private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex) + { + Queue<BlockLocation> toVisit = new Queue<BlockLocation>(); + toVisit.Enqueue(location); + visited.Add(location.Block); + + while (toVisit.TryDequeue(out var currentLocation)) + { + Block block = currentLocation.Block; + for (int i = currentLocation.Index - 1; i >= 0; i--) + { + if (WritesToRegister(block.OpCodes[i], regIndex)) + { + return new BlockLocation(block, i); + } + } + + foreach (Block predecessor in block.Predecessors) + { + if (visited.Add(predecessor)) + { + toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count)); + } + } + } + + return new BlockLocation(null, 0); + } + + private static bool WritesToRegister(OpCode opCode, int regIndex) + { + // Predicate instruction only ever writes to predicate, so we shouldn't check those. + if (opCode.Emitter == InstEmit.Fsetp || + opCode.Emitter == InstEmit.Hsetp2 || + opCode.Emitter == InstEmit.Isetp || + opCode.Emitter == InstEmit.R2p) + { + return false; + } + + return opCode is IOpCodeRd opRd && opRd.Rd.Index == regIndex; + } + private enum MergeType { Brk = 0, @@ -388,6 +491,8 @@ namespace Ryujinx.Graphics.Shader.Decoders { OpCodePush pushOp = currBlock.PushOpCodes[pushOpIndex]; + Block target = blocks[pushOp.GetAbsoluteAddress()]; + Stack<PathBlockState> workQueue = new Stack<PathBlockState>(); HashSet<Block> visited = new HashSet<Block>(); @@ -497,10 +602,12 @@ namespace Ryujinx.Graphics.Shader.Decoders if (branchStack.Count == 0) { // If the entire stack was consumed, then the current pop instruction - // just consumed the address from out push instruction. - op.Targets.Add(pushOp, op.Targets.Count); - - pushOp.PopOps.TryAdd(op, Local()); + // just consumed the address from our push instruction. + if (op.Targets.TryAdd(pushOp, op.Targets.Count)) + { + pushOp.PopOps.Add(op, Local()); + target.Predecessors.Add(current); + } } else { diff --git a/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/Ryujinx.Graphics.Shader/IGpuAccessor.cs index 26a8cafd..04f23061 100644 --- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs +++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs @@ -7,6 +7,11 @@ // No default log output. } + uint ConstantBuffer1Read(int offset) + { + return 0; + } + T MemoryRead<T>(ulong address) where T : unmanaged; bool MemoryMapped(ulong address) diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs index d4ab5955..1f5bf35b 100644 --- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs +++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs @@ -25,6 +25,12 @@ namespace Ryujinx.Graphics.Shader.Instructions { OpCodeBranchIndir op = (OpCodeBranchIndir)context.CurrOp; + if (op.PossibleTargets.Count == 0) + { + context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{op.Address:X}."); + return; + } + int offset = (int)op.Address + 8 + op.Offset; Operand address = context.IAdd(Register(op.Ra), Const(offset)); |
