diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Translation')
29 files changed, 7954 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs b/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs new file mode 100644 index 00000000..24993e00 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs @@ -0,0 +1,25 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + enum AggregateType + { + Invalid, + Void, + Bool, + FP32, + FP64, + S32, + U32, + + ElementTypeMask = 0xff, + + ElementCountShift = 8, + ElementCountMask = 3 << ElementCountShift, + + Scalar = 0 << ElementCountShift, + Vector2 = 1 << ElementCountShift, + Vector3 = 2 << ElementCountShift, + Vector4 = 3 << ElementCountShift, + + Array = 1 << 10 + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs new file mode 100644 index 00000000..683b0d8a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs @@ -0,0 +1,36 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + static class AttributeConsts + { + public const int PrimitiveId = 0x060; + public const int Layer = 0x064; + public const int PositionX = 0x070; + public const int PositionY = 0x074; + public const int FrontColorDiffuseR = 0x280; + public const int BackColorDiffuseR = 0x2a0; + public const int ClipDistance0 = 0x2c0; + public const int ClipDistance1 = 0x2c4; + public const int ClipDistance2 = 0x2c8; + public const int ClipDistance3 = 0x2cc; + public const int ClipDistance4 = 0x2d0; + public const int ClipDistance5 = 0x2d4; + public const int ClipDistance6 = 0x2d8; + public const int ClipDistance7 = 0x2dc; + public const int FogCoord = 0x2e8; + public const int TessCoordX = 0x2f0; + public const int TessCoordY = 0x2f4; + public const int InstanceId = 0x2f8; + public const int VertexId = 0x2fc; + public const int TexCoordCount = 10; + public const int TexCoordBase = 0x300; + public const int TexCoordEnd = TexCoordBase + TexCoordCount * 16; + public const int FrontFacing = 0x3fc; + + public const int UserAttributesCount = 32; + public const int UserAttributeBase = 0x80; + public const int UserAttributeEnd = UserAttributeBase + UserAttributesCount * 16; + + public const int UserAttributePerPatchBase = 0x18; + public const int UserAttributePerPatchEnd = 0x200; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs b/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs new file mode 100644 index 00000000..65328fd7 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs @@ -0,0 +1,176 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Translation +{ + class ControlFlowGraph + { + public BasicBlock[] Blocks { get; } + public BasicBlock[] PostOrderBlocks { get; } + public int[] PostOrderMap { get; } + + public ControlFlowGraph(BasicBlock[] blocks) + { + Blocks = blocks; + + HashSet<BasicBlock> visited = new HashSet<BasicBlock>(); + + Stack<BasicBlock> blockStack = new Stack<BasicBlock>(); + + List<BasicBlock> postOrderBlocks = new List<BasicBlock>(blocks.Length); + + PostOrderMap = new int[blocks.Length]; + + visited.Add(blocks[0]); + + blockStack.Push(blocks[0]); + + while (blockStack.TryPop(out BasicBlock block)) + { + if (block.Next != null && visited.Add(block.Next)) + { + blockStack.Push(block); + blockStack.Push(block.Next); + } + else if (block.Branch != null && visited.Add(block.Branch)) + { + blockStack.Push(block); + blockStack.Push(block.Branch); + } + else + { + PostOrderMap[block.Index] = postOrderBlocks.Count; + + postOrderBlocks.Add(block); + } + } + + PostOrderBlocks = postOrderBlocks.ToArray(); + } + + public static ControlFlowGraph Create(Operation[] operations) + { + Dictionary<Operand, BasicBlock> labels = new Dictionary<Operand, BasicBlock>(); + + List<BasicBlock> blocks = new List<BasicBlock>(); + + BasicBlock currentBlock = null; + + void NextBlock(BasicBlock nextBlock) + { + if (currentBlock != null && !EndsWithUnconditionalInst(currentBlock.GetLastOp())) + { + currentBlock.Next = nextBlock; + } + + currentBlock = nextBlock; + } + + void NewNextBlock() + { + BasicBlock block = new BasicBlock(blocks.Count); + + blocks.Add(block); + + NextBlock(block); + } + + bool needsNewBlock = true; + + for (int index = 0; index < operations.Length; index++) + { + Operation operation = operations[index]; + + if (operation.Inst == Instruction.MarkLabel) + { + Operand label = operation.Dest; + + if (labels.TryGetValue(label, out BasicBlock nextBlock)) + { + nextBlock.Index = blocks.Count; + + blocks.Add(nextBlock); + + NextBlock(nextBlock); + } + else + { + NewNextBlock(); + + labels.Add(label, currentBlock); + } + } + else + { + if (needsNewBlock) + { + NewNextBlock(); + } + + currentBlock.Operations.AddLast(operation); + } + + needsNewBlock = operation.Inst == Instruction.Branch || + operation.Inst == Instruction.BranchIfTrue || + operation.Inst == Instruction.BranchIfFalse; + + if (needsNewBlock) + { + Operand label = operation.Dest; + + if (!labels.TryGetValue(label, out BasicBlock branchBlock)) + { + branchBlock = new BasicBlock(); + + labels.Add(label, branchBlock); + } + + currentBlock.Branch = branchBlock; + } + } + + // Remove unreachable blocks. + bool hasUnreachable; + + do + { + hasUnreachable = false; + + for (int blkIndex = 1; blkIndex < blocks.Count; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + if (block.Predecessors.Count == 0) + { + block.Next = null; + block.Branch = null; + blocks.RemoveAt(blkIndex--); + hasUnreachable = true; + } + else + { + block.Index = blkIndex; + } + } + } while (hasUnreachable); + + return new ControlFlowGraph(blocks.ToArray()); + } + + private static bool EndsWithUnconditionalInst(INode node) + { + if (node is Operation operation) + { + switch (operation.Inst) + { + case Instruction.Branch: + case Instruction.Discard: + case Instruction.Return: + return true; + } + } + + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs b/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs new file mode 100644 index 00000000..09c2eb0f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs @@ -0,0 +1,94 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class Dominance + { + // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm". + // https://www.cs.rice.edu/~keith/EMBED/dom.pdf + public static void FindDominators(ControlFlowGraph cfg) + { + BasicBlock Intersect(BasicBlock block1, BasicBlock block2) + { + while (block1 != block2) + { + while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index]) + { + block1 = block1.ImmediateDominator; + } + + while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index]) + { + block2 = block2.ImmediateDominator; + } + } + + return block1; + } + + cfg.Blocks[0].ImmediateDominator = cfg.Blocks[0]; + + bool modified; + + do + { + modified = false; + + for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--) + { + BasicBlock block = cfg.PostOrderBlocks[blkIndex]; + + BasicBlock newIDom = null; + + foreach (BasicBlock predecessor in block.Predecessors) + { + if (predecessor.ImmediateDominator != null) + { + if (newIDom != null) + { + newIDom = Intersect(predecessor, newIDom); + } + else + { + newIDom = predecessor; + } + } + } + + if (block.ImmediateDominator != newIDom) + { + block.ImmediateDominator = newIDom; + + modified = true; + } + } + } + while (modified); + } + + public static void FindDominanceFrontiers(BasicBlock[] blocks) + { + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + if (block.Predecessors.Count < 2) + { + continue; + } + + for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++) + { + BasicBlock current = block.Predecessors[pBlkIndex]; + + while (current != block.ImmediateDominator) + { + current.DominanceFrontiers.Add(block); + + current = current.ImmediateDominator; + } + } + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs new file mode 100644 index 00000000..112baccf --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -0,0 +1,492 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; +using System.Runtime.CompilerServices; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + class EmitterContext + { + public DecodedProgram Program { get; } + public ShaderConfig Config { get; } + + public bool IsNonMain { get; } + + public Block CurrBlock { get; set; } + public InstOp CurrOp { get; set; } + + public int OperationsCount => _operations.Count; + + private readonly struct BrxTarget + { + public readonly Operand Selector; + public readonly int ExpectedValue; + public readonly ulong NextTargetAddress; + + public BrxTarget(Operand selector, int expectedValue, ulong nextTargetAddress) + { + Selector = selector; + ExpectedValue = expectedValue; + NextTargetAddress = nextTargetAddress; + } + } + + private class BlockLabel + { + public readonly Operand Label; + public BrxTarget BrxTarget; + + public BlockLabel(Operand label) + { + Label = label; + } + } + + private readonly List<Operation> _operations; + private readonly Dictionary<ulong, BlockLabel> _labels; + + public EmitterContext(DecodedProgram program, ShaderConfig config, bool isNonMain) + { + Program = program; + Config = config; + IsNonMain = isNonMain; + _operations = new List<Operation>(); + _labels = new Dictionary<ulong, BlockLabel>(); + + EmitStart(); + } + + private void EmitStart() + { + if (Config.Stage == ShaderStage.Vertex && + Config.Options.TargetApi == TargetApi.Vulkan && + (Config.Options.Flags & TranslationFlags.VertexA) == 0) + { + // Vulkan requires the point size to be always written on the shader if the primitive topology is points. + this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(Config.GpuAccessor.QueryPointSize())); + } + } + + public T GetOp<T>() where T : unmanaged + { + Debug.Assert(Unsafe.SizeOf<T>() == sizeof(ulong)); + ulong op = CurrOp.RawOpCode; + return Unsafe.As<ulong, T>(ref op); + } + + public Operand Add(Instruction inst, Operand dest = null, params Operand[] sources) + { + Operation operation = new Operation(inst, dest, sources); + + _operations.Add(operation); + + return dest; + } + + public Operand Add(Instruction inst, StorageKind storageKind, Operand dest = null, params Operand[] sources) + { + Operation operation = new Operation(inst, storageKind, dest, sources); + + _operations.Add(operation); + + return dest; + } + + public (Operand, Operand) Add(Instruction inst, (Operand, Operand) dest, params Operand[] sources) + { + Operand[] dests = new[] { dest.Item1, dest.Item2 }; + + Operation operation = new Operation(inst, 0, dests, sources); + + Add(operation); + + return dest; + } + + public void Add(Operation operation) + { + _operations.Add(operation); + } + + public TextureOperation CreateTextureOperation( + Instruction inst, + SamplerType type, + TextureFlags flags, + int handle, + int compIndex, + Operand[] dests, + params Operand[] sources) + { + return CreateTextureOperation(inst, type, TextureFormat.Unknown, flags, handle, compIndex, dests, sources); + } + + public TextureOperation CreateTextureOperation( + Instruction inst, + SamplerType type, + TextureFormat format, + TextureFlags flags, + int handle, + int compIndex, + Operand[] dests, + params Operand[] sources) + { + if (!flags.HasFlag(TextureFlags.Bindless)) + { + Config.SetUsedTexture(inst, type, format, flags, TextureOperation.DefaultCbufSlot, handle); + } + + return new TextureOperation(inst, type, format, flags, handle, compIndex, dests, sources); + } + + public void FlagAttributeRead(int attribute) + { + if (Config.Stage == ShaderStage.Vertex && attribute == AttributeConsts.InstanceId) + { + Config.SetUsedFeature(FeatureFlags.InstanceId); + } + else if (Config.Stage == ShaderStage.Fragment) + { + switch (attribute) + { + case AttributeConsts.PositionX: + case AttributeConsts.PositionY: + Config.SetUsedFeature(FeatureFlags.FragCoordXY); + break; + } + } + } + + public void FlagAttributeWritten(int attribute) + { + if (Config.Stage == ShaderStage.Vertex) + { + switch (attribute) + { + case AttributeConsts.ClipDistance0: + case AttributeConsts.ClipDistance1: + case AttributeConsts.ClipDistance2: + case AttributeConsts.ClipDistance3: + case AttributeConsts.ClipDistance4: + case AttributeConsts.ClipDistance5: + case AttributeConsts.ClipDistance6: + case AttributeConsts.ClipDistance7: + Config.SetClipDistanceWritten((attribute - AttributeConsts.ClipDistance0) / 4); + break; + } + } + + if (Config.Stage != ShaderStage.Fragment && attribute == AttributeConsts.Layer) + { + Config.SetUsedFeature(FeatureFlags.RtLayer); + } + } + + public void MarkLabel(Operand label) + { + Add(Instruction.MarkLabel, label); + } + + public Operand GetLabel(ulong address) + { + return EnsureBlockLabel(address).Label; + } + + public void SetBrxTarget(ulong address, Operand selector, int targetValue, ulong nextTargetAddress) + { + BlockLabel blockLabel = EnsureBlockLabel(address); + Debug.Assert(blockLabel.BrxTarget.Selector == null); + blockLabel.BrxTarget = new BrxTarget(selector, targetValue, nextTargetAddress); + } + + public void EnterBlock(ulong address) + { + BlockLabel blockLabel = EnsureBlockLabel(address); + + MarkLabel(blockLabel.Label); + + BrxTarget brxTarget = blockLabel.BrxTarget; + + if (brxTarget.Selector != null) + { + this.BranchIfFalse(GetLabel(brxTarget.NextTargetAddress), this.ICompareEqual(brxTarget.Selector, Const(brxTarget.ExpectedValue))); + } + } + + private BlockLabel EnsureBlockLabel(ulong address) + { + if (!_labels.TryGetValue(address, out BlockLabel blockLabel)) + { + blockLabel = new BlockLabel(Label()); + + _labels.Add(address, blockLabel); + } + + return blockLabel; + } + + public void PrepareForVertexReturn() + { + if (Config.GpuAccessor.QueryViewportTransformDisable()) + { + Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0)); + Operand y = this.Load(StorageKind.Output, IoVariable.Position, null, Const(1)); + Operand xScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(0)); + Operand yScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(1)); + Operand negativeOne = ConstF(-1.0f); + + this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), this.FPFusedMultiplyAdd(x, xScale, negativeOne)); + this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), this.FPFusedMultiplyAdd(y, yScale, negativeOne)); + } + + if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne()) + { + Operand z = this.Load(StorageKind.Output, IoVariable.Position, null, Const(2)); + Operand w = this.Load(StorageKind.Output, IoVariable.Position, null, Const(3)); + Operand halfW = this.FPMultiply(w, ConstF(0.5f)); + + this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW)); + } + + if (Config.Stage != ShaderStage.Geometry && Config.HasLayerInputAttribute) + { + Config.SetUsedFeature(FeatureFlags.RtLayer); + + int attrVecIndex = Config.GpLayerInputAttribute >> 2; + int attrComponentIndex = Config.GpLayerInputAttribute & 3; + + Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex)); + + this.Store(StorageKind.Output, IoVariable.Layer, null, layer); + } + } + + public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal) + { + if (Config.GpuAccessor.QueryViewportTransformDisable()) + { + oldXLocal = Local(); + this.Copy(oldXLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(0))); + oldYLocal = Local(); + this.Copy(oldYLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(1))); + } + else + { + oldXLocal = null; + oldYLocal = null; + } + + if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne()) + { + oldZLocal = Local(); + this.Copy(oldZLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(2))); + } + else + { + oldZLocal = null; + } + + PrepareForVertexReturn(); + } + + public void PrepareForReturn() + { + if (IsNonMain) + { + return; + } + + if (Config.LastInVertexPipeline && + (Config.Stage == ShaderStage.Vertex || Config.Stage == ShaderStage.TessellationEvaluation) && + (Config.Options.Flags & TranslationFlags.VertexA) == 0) + { + PrepareForVertexReturn(); + } + else if (Config.Stage == ShaderStage.Geometry) + { + void WritePositionOutput(int primIndex) + { + Operand x = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(0)); + Operand y = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(1)); + Operand z = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(2)); + Operand w = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(3)); + + this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), x); + this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), y); + this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), z); + this.Store(StorageKind.Output, IoVariable.Position, null, Const(3), w); + } + + void WriteUserDefinedOutput(int index, int primIndex) + { + Operand x = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(0)); + Operand y = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(1)); + Operand z = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(2)); + Operand w = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(3)); + + this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(0), x); + this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(1), y); + this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(2), z); + this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(3), w); + } + + if (Config.GpPassthrough && !Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices(); + + for (int primIndex = 0; primIndex < inputVertices; primIndex++) + { + WritePositionOutput(primIndex); + + int passthroughAttributes = Config.PassthroughAttributes; + while (passthroughAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(passthroughAttributes); + WriteUserDefinedOutput(index, primIndex); + Config.SetOutputUserAttribute(index); + passthroughAttributes &= ~(1 << index); + } + + this.EmitVertex(); + } + + this.EndPrimitive(); + } + } + else if (Config.Stage == ShaderStage.Fragment) + { + GenerateAlphaToCoverageDitherDiscard(); + + bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat(); + + if (Config.OmapDepth) + { + Operand src = Register(Config.GetDepthRegister(), RegisterType.Gpr); + + this.Store(StorageKind.Output, IoVariable.FragmentOutputDepth, null, src); + } + + AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare(); + + if (alphaTestOp != AlphaTestOp.Always && (Config.OmapTargets & 8) != 0) + { + if (alphaTestOp == AlphaTestOp.Never) + { + this.Discard(); + } + else + { + Instruction comparator = alphaTestOp switch + { + AlphaTestOp.Equal => Instruction.CompareEqual, + AlphaTestOp.Greater => Instruction.CompareGreater, + AlphaTestOp.GreaterOrEqual => Instruction.CompareGreaterOrEqual, + AlphaTestOp.Less => Instruction.CompareLess, + AlphaTestOp.LessOrEqual => Instruction.CompareLessOrEqual, + AlphaTestOp.NotEqual => Instruction.CompareNotEqual, + _ => 0 + }; + + Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\"."); + + Operand alpha = Register(3, RegisterType.Gpr); + Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference()); + Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef); + Operand alphaPassLabel = Label(); + + this.BranchIfTrue(alphaPassLabel, alphaPass); + this.Discard(); + this.MarkLabel(alphaPassLabel); + } + } + + int regIndexBase = 0; + + for (int rtIndex = 0; rtIndex < 8; rtIndex++) + { + for (int component = 0; component < 4; component++) + { + bool componentEnabled = (Config.OmapTargets & (1 << (rtIndex * 4 + component))) != 0; + if (!componentEnabled) + { + continue; + } + + Operand src = Register(regIndexBase + component, RegisterType.Gpr); + + // Perform B <-> R swap if needed, for BGRA formats (not supported on OpenGL). + if (!supportsBgra && (component == 0 || component == 2)) + { + Operand isBgra = this.Load(StorageKind.Input, IoVariable.FragmentOutputIsBgra, null, Const(rtIndex)); + + Operand lblIsBgra = Label(); + Operand lblEnd = Label(); + + this.BranchIfTrue(lblIsBgra, isBgra); + + this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src); + this.Branch(lblEnd); + + MarkLabel(lblIsBgra); + + this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(2 - component), src); + + MarkLabel(lblEnd); + } + else + { + this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src); + } + } + + bool targetEnabled = (Config.OmapTargets & (0xf << (rtIndex * 4))) != 0; + if (targetEnabled) + { + Config.SetOutputUserAttribute(rtIndex); + regIndexBase += 4; + } + } + } + } + + private void GenerateAlphaToCoverageDitherDiscard() + { + // If the feature is disabled, or alpha is not written, then we're done. + if (!Config.GpuAccessor.QueryAlphaToCoverageDitherEnable() || (Config.OmapTargets & 8) == 0) + { + return; + } + + // 11 11 11 10 10 10 10 00 + // 11 01 01 01 01 00 00 00 + Operand ditherMask = Const(unchecked((int)0xfbb99110u)); + + Operand fragCoordX = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(0)); + Operand fragCoordY = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(1)); + + Operand x = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordX), Const(1)); + Operand y = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordY), Const(1)); + Operand xy = this.BitwiseOr(x, this.ShiftLeft(y, Const(1))); + + Operand alpha = Register(3, RegisterType.Gpr); + Operand scaledAlpha = this.FPMultiply(this.FPSaturate(alpha), ConstF(8)); + Operand quantizedAlpha = this.IMinimumU32(this.FP32ConvertToU32(scaledAlpha), Const(7)); + Operand shift = this.BitwiseOr(this.ShiftLeft(quantizedAlpha, Const(2)), xy); + Operand opaque = this.BitwiseAnd(this.ShiftRightU32(ditherMask, shift), Const(1)); + + Operand a2cDitherEndLabel = Label(); + + this.BranchIfTrue(a2cDitherEndLabel, opaque); + this.Discard(); + this.MarkLabel(a2cDitherEndLabel); + } + + public Operation[] GetOperations() + { + return _operations.ToArray(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs new file mode 100644 index 00000000..93748249 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -0,0 +1,819 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class EmitterContextInsts + { + public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicAdd, storageKind, Local(), a, b, c); + } + + public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicAnd, storageKind, Local(), a, b, c); + } + + public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c, Operand d) + { + return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), a, b, c, d); + } + + public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), a, b, c); + } + + public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), a, b, c); + } + + public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicMinS32, storageKind, Local(), a, b, c); + } + + public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicMinU32, storageKind, Local(), a, b, c); + } + + public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicOr, storageKind, Local(), a, b, c); + } + + public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicSwap, storageKind, Local(), a, b, c); + } + + public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c); + } + + public static Operand Ballot(this EmitterContext context, Operand a) + { + return context.Add(Instruction.Ballot, Local(), a); + } + + public static Operand Barrier(this EmitterContext context) + { + return context.Add(Instruction.Barrier); + } + + public static Operand BitCount(this EmitterContext context, Operand a) + { + return context.Add(Instruction.BitCount, Local(), a); + } + + public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c); + } + + public static Operand BitfieldExtractU32(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.BitfieldExtractU32, Local(), a, b, c); + } + + public static Operand BitfieldInsert(this EmitterContext context, Operand a, Operand b, Operand c, Operand d) + { + return context.Add(Instruction.BitfieldInsert, Local(), a, b, c, d); + } + + public static Operand BitfieldReverse(this EmitterContext context, Operand a) + { + return context.Add(Instruction.BitfieldReverse, Local(), a); + } + + public static Operand BitwiseAnd(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.BitwiseAnd, Local(), a, b); + } + + public static Operand BitwiseExclusiveOr(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.BitwiseExclusiveOr, Local(), a, b); + } + + public static Operand BitwiseNot(this EmitterContext context, Operand a, bool invert) + { + if (invert) + { + a = context.BitwiseNot(a); + } + + return a; + } + + public static Operand BitwiseNot(this EmitterContext context, Operand a) + { + return context.Add(Instruction.BitwiseNot, Local(), a); + } + + public static Operand BitwiseOr(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.BitwiseOr, Local(), a, b); + } + + public static Operand Branch(this EmitterContext context, Operand d) + { + return context.Add(Instruction.Branch, d); + } + + public static Operand BranchIfFalse(this EmitterContext context, Operand d, Operand a) + { + return context.Add(Instruction.BranchIfFalse, d, a); + } + + public static Operand BranchIfTrue(this EmitterContext context, Operand d, Operand a) + { + return context.Add(Instruction.BranchIfTrue, d, a); + } + + public static Operand Call(this EmitterContext context, int funcId, bool returns, params Operand[] args) + { + Operand[] args2 = new Operand[args.Length + 1]; + + args2[0] = Const(funcId); + args.CopyTo(args2, 1); + + return context.Add(Instruction.Call, returns ? Local() : null, args2); + } + + public static Operand ConditionalSelect(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ConditionalSelect, Local(), a, b, c); + } + + public static Operand Copy(this EmitterContext context, Operand a) + { + return context.Add(Instruction.Copy, Local(), a); + } + + public static void Copy(this EmitterContext context, Operand d, Operand a) + { + if (d.Type == OperandType.Constant) + { + return; + } + + context.Add(Instruction.Copy, d, a); + } + + public static Operand Discard(this EmitterContext context) + { + return context.Add(Instruction.Discard); + } + + public static Operand EmitVertex(this EmitterContext context) + { + return context.Add(Instruction.EmitVertex); + } + + public static Operand EndPrimitive(this EmitterContext context) + { + return context.Add(Instruction.EndPrimitive); + } + + public static Operand FindLSB(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FindLSB, Local(), a); + } + + public static Operand FindMSBS32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FindMSBS32, Local(), a); + } + + public static Operand FindMSBU32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FindMSBU32, Local(), a); + } + + public static Operand FP32ConvertToFP64(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP32ToFP64, Local(), a); + } + + public static Operand FP64ConvertToFP32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP64ToFP32, Local(), a); + } + + public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg, Instruction fpType = Instruction.FP32) + { + return context.FPNegate(context.FPAbsolute(a, abs, fpType), neg, fpType); + } + + public static Operand FPAbsolute(this EmitterContext context, Operand a, bool abs, Instruction fpType = Instruction.FP32) + { + if (abs) + { + a = context.FPAbsolute(a, fpType); + } + + return a; + } + + public static Operand FPAbsolute(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Absolute, Local(), a); + } + + public static Operand FPAdd(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Add, Local(), a, b); + } + + public static Operand FPCeiling(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Ceiling, Local(), a); + } + + public static Operand FPCompareEqual(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.CompareEqual, Local(), a, b); + } + + public static Operand FPCompareLess(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.CompareLess, Local(), a, b); + } + + public static Operand FP32ConvertToS32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP32ToS32, Local(), a); + } + + public static Operand FP32ConvertToU32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP32ToU32, Local(), a); + } + + public static Operand FP64ConvertToS32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP64ToS32, Local(), a); + } + + public static Operand FP64ConvertToU32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertFP64ToU32, Local(), a); + } + + public static Operand FPCosine(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.Cosine, Local(), a); + } + + public static Operand FPDivide(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Divide, Local(), a, b); + } + + public static Operand FPExponentB2(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.ExponentB2, Local(), a); + } + + public static Operand FPFloor(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Floor, Local(), a); + } + + public static Operand FPFusedMultiplyAdd(this EmitterContext context, Operand a, Operand b, Operand c, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.FusedMultiplyAdd, Local(), a, b, c); + } + + public static Operand FPLogarithmB2(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.LogarithmB2, Local(), a); + } + + public static Operand FPMaximum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Maximum, Local(), a, b); + } + + public static Operand FPMinimum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Minimum, Local(), a, b); + } + + public static Operand FPMultiply(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Multiply, Local(), a, b); + } + + public static Operand FPNegate(this EmitterContext context, Operand a, bool neg, Instruction fpType = Instruction.FP32) + { + if (neg) + { + a = context.FPNegate(a, fpType); + } + + return a; + } + + public static Operand FPNegate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Negate, Local(), a); + } + + public static Operand FPReciprocal(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.FPDivide(fpType == Instruction.FP64 ? context.PackDouble2x32(1.0) : ConstF(1), a, fpType); + } + + public static Operand FPReciprocalSquareRoot(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.ReciprocalSquareRoot, Local(), a); + } + + public static Operand FPRound(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Round, Local(), a); + } + + public static Operand FPSaturate(this EmitterContext context, Operand a, bool sat, Instruction fpType = Instruction.FP32) + { + if (sat) + { + a = context.FPSaturate(a, fpType); + } + + return a; + } + + public static Operand FPSaturate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return fpType == Instruction.FP64 + ? context.Add(fpType | Instruction.Clamp, Local(), a, context.PackDouble2x32(0.0), context.PackDouble2x32(1.0)) + : context.Add(fpType | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1)); + } + + public static Operand FPSine(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.Sine, Local(), a); + } + + public static Operand FPSquareRoot(this EmitterContext context, Operand a) + { + return context.Add(Instruction.FP32 | Instruction.SquareRoot, Local(), a); + } + + public static Operand FPTruncate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.Truncate, Local(), a); + } + + public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask) + { + return context.Add(Instruction.SwizzleAdd, Local(), a, b, Const(mask)); + } + + public static void FSIBegin(this EmitterContext context) + { + context.Add(Instruction.FSIBegin); + } + + public static void FSIEnd(this EmitterContext context) + { + context.Add(Instruction.FSIEnd); + } + + public static Operand GroupMemoryBarrier(this EmitterContext context) + { + return context.Add(Instruction.GroupMemoryBarrier); + } + + public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg) + { + return context.INegate(context.IAbsolute(a, abs), neg); + } + + public static Operand IAbsolute(this EmitterContext context, Operand a, bool abs) + { + if (abs) + { + a = context.IAbsolute(a); + } + + return a; + } + + public static Operand IAbsolute(this EmitterContext context, Operand a) + { + return context.Add(Instruction.Absolute, Local(), a); + } + + public static Operand IAdd(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Add, Local(), a, b); + } + + public static Operand IClampS32(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.Clamp, Local(), a, b, c); + } + + public static Operand IClampU32(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ClampU32, Local(), a, b, c); + } + + public static Operand ICompareEqual(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareEqual, Local(), a, b); + } + + public static Operand ICompareGreater(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareGreater, Local(), a, b); + } + + public static Operand ICompareGreaterOrEqual(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareGreaterOrEqual, Local(), a, b); + } + + public static Operand ICompareGreaterOrEqualUnsigned(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareGreaterOrEqualU32, Local(), a, b); + } + + public static Operand ICompareGreaterUnsigned(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareGreaterU32, Local(), a, b); + } + + public static Operand ICompareLess(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareLess, Local(), a, b); + } + + public static Operand ICompareLessOrEqual(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareLessOrEqual, Local(), a, b); + } + + public static Operand ICompareLessOrEqualUnsigned(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareLessOrEqualU32, Local(), a, b); + } + + public static Operand ICompareLessUnsigned(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareLessU32, Local(), a, b); + } + + public static Operand ICompareNotEqual(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.CompareNotEqual, Local(), a, b); + } + + public static Operand IConvertS32ToFP32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertS32ToFP32, Local(), a); + } + + public static Operand IConvertS32ToFP64(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertS32ToFP64, Local(), a); + } + + public static Operand IConvertU32ToFP32(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertU32ToFP32, Local(), a); + } + + public static Operand IConvertU32ToFP64(this EmitterContext context, Operand a) + { + return context.Add(Instruction.ConvertU32ToFP64, Local(), a); + } + + public static Operand IMaximumS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Maximum, Local(), a, b); + } + + public static Operand IMaximumU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MaximumU32, Local(), a, b); + } + + public static Operand IMinimumS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Minimum, Local(), a, b); + } + + public static Operand IMinimumU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MinimumU32, Local(), a, b); + } + + public static Operand IMultiply(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Multiply, Local(), a, b); + } + + public static Operand INegate(this EmitterContext context, Operand a, bool neg) + { + if (neg) + { + a = context.INegate(a); + } + + return a; + } + + public static Operand INegate(this EmitterContext context, Operand a) + { + return context.Add(Instruction.Negate, Local(), a); + } + + public static Operand ISubtract(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.Subtract, Local(), a, b); + } + + public static Operand IsNan(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32) + { + return context.Add(fpType | Instruction.IsNan, Local(), a); + } + + public static Operand Load(this EmitterContext context, StorageKind storageKind, IoVariable ioVariable, Operand primVertex = null) + { + return primVertex != null + ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex) + : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable)); + } + + public static Operand Load( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand primVertex, + Operand elemIndex) + { + return primVertex != null + ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, elemIndex) + : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), elemIndex); + } + + public static Operand Load( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand primVertex, + Operand arrayIndex, + Operand elemIndex) + { + return primVertex != null + ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, arrayIndex, elemIndex) + : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), arrayIndex, elemIndex); + } + + public static Operand LoadConstant(this EmitterContext context, Operand a, Operand b) + { + if (a.Type == OperandType.Constant) + { + context.Config.SetUsedConstantBuffer(a.Value); + } + else + { + context.Config.SetUsedFeature(FeatureFlags.CbIndexing); + } + + return context.Add(Instruction.LoadConstant, Local(), a, b); + } + + public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.LoadGlobal, Local(), a, b); + } + + public static Operand LoadLocal(this EmitterContext context, Operand a) + { + return context.Add(Instruction.LoadLocal, Local(), a); + } + + public static Operand LoadShared(this EmitterContext context, Operand a) + { + return context.Add(Instruction.LoadShared, Local(), a); + } + + public static Operand MemoryBarrier(this EmitterContext context) + { + return context.Add(Instruction.MemoryBarrier); + } + + public static Operand MultiplyHighS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MultiplyHighS32, Local(), a, b); + } + + public static Operand MultiplyHighU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.MultiplyHighU32, Local(), a, b); + } + + public static Operand PackDouble2x32(this EmitterContext context, double value) + { + long valueAsLong = BitConverter.DoubleToInt64Bits(value); + + return context.Add(Instruction.PackDouble2x32, Local(), Const((int)valueAsLong), Const((int)(valueAsLong >> 32))); + } + + public static Operand PackDouble2x32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.PackDouble2x32, Local(), a, b); + } + + public static Operand PackHalf2x16(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.PackHalf2x16, Local(), a, b); + } + + public static void Return(this EmitterContext context) + { + context.PrepareForReturn(); + context.Add(Instruction.Return); + } + + public static void Return(this EmitterContext context, Operand returnValue) + { + context.PrepareForReturn(); + context.Add(Instruction.Return, null, returnValue); + } + + public static Operand ShiftLeft(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.ShiftLeft, Local(), a, b); + } + + public static Operand ShiftRightS32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.ShiftRightS32, Local(), a, b); + } + + public static Operand ShiftRightU32(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.ShiftRightU32, Local(), a, b); + } + + public static (Operand, Operand) Shuffle(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.Shuffle, (Local(), Local()), a, b, c); + } + + public static (Operand, Operand) ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleDown, (Local(), Local()), a, b, c); + } + + public static (Operand, Operand) ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleUp, (Local(), Local()), a, b, c); + } + + public static (Operand, Operand) ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c); + } + + public static Operand Store( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand invocationId, + Operand value) + { + return invocationId != null + ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, value) + : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), value); + } + + public static Operand Store( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand invocationId, + Operand elemIndex, + Operand value) + { + return invocationId != null + ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, elemIndex, value) + : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), elemIndex, value); + } + + public static Operand Store( + this EmitterContext context, + StorageKind storageKind, + IoVariable ioVariable, + Operand invocationId, + Operand arrayIndex, + Operand elemIndex, + Operand value) + { + return invocationId != null + ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, arrayIndex, elemIndex, value) + : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value); + } + + public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.StoreGlobal, null, a, b, c); + } + + public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.StoreGlobal16, null, a, b, c); + } + + public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c) + { + return context.Add(Instruction.StoreGlobal8, null, a, b, c); + } + + public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.StoreLocal, null, a, b); + } + + public static Operand StoreShared(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.StoreShared, null, a, b); + } + + public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.StoreShared16, null, a, b); + } + + public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b) + { + return context.Add(Instruction.StoreShared8, null, a, b); + } + + public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a) + { + return UnpackDouble2x32(context, a, 1); + } + + public static Operand UnpackDouble2x32Low(this EmitterContext context, Operand a) + { + return UnpackDouble2x32(context, a, 0); + } + + private static Operand UnpackDouble2x32(this EmitterContext context, Operand a, int index) + { + Operand dest = Local(); + + context.Add(new Operation(Instruction.UnpackDouble2x32, index, dest, a)); + + return dest; + } + + public static Operand UnpackHalf2x16High(this EmitterContext context, Operand a) + { + return UnpackHalf2x16(context, a, 1); + } + + public static Operand UnpackHalf2x16Low(this EmitterContext context, Operand a) + { + return UnpackHalf2x16(context, a, 0); + } + + private static Operand UnpackHalf2x16(this EmitterContext context, Operand a, int index) + { + Operand dest = Local(); + + context.Add(new Operation(Instruction.UnpackHalf2x16, index, dest, a)); + + return dest; + } + + public static Operand VoteAll(this EmitterContext context, Operand a) + { + return context.Add(Instruction.VoteAll, Local(), a); + } + + public static Operand VoteAllEqual(this EmitterContext context, Operand a) + { + return context.Add(Instruction.VoteAllEqual, Local(), a); + } + + public static Operand VoteAny(this EmitterContext context, Operand a) + { + return context.Add(Instruction.VoteAny, Local(), a); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs new file mode 100644 index 00000000..c035f212 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -0,0 +1,27 @@ +using System; + +namespace Ryujinx.Graphics.Shader.Translation +{ + /// <summary> + /// Features used by the shader that are important for the code generator to know in advance. + /// These typically change the declarations in the shader header. + /// </summary> + [Flags] + public enum FeatureFlags + { + None = 0, + + // Affected by resolution scaling. + IntegerSampling = 1 << 0, + FragCoordXY = 1 << 1, + + Bindless = 1 << 2, + InstanceId = 1 << 3, + DrawParameters = 1 << 4, + RtLayer = 1 << 5, + CbIndexing = 1 << 6, + IaIndexing = 1 << 7, + OaIndexing = 1 << 8, + FixedFuncAttr = 1 << 9 + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs b/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs new file mode 100644 index 00000000..073e120a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs @@ -0,0 +1,866 @@ +using Ryujinx.Graphics.Shader.Decoders; +using System; +using System.Collections.Generic; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class FunctionMatch + { + private static IPatternTreeNode[] _fsiGetAddressTree = PatternTrees.GetFsiGetAddress(); + private static IPatternTreeNode[] _fsiGetAddressV2Tree = PatternTrees.GetFsiGetAddressV2(); + private static IPatternTreeNode[] _fsiIsLastWarpThreadPatternTree = PatternTrees.GetFsiIsLastWarpThread(); + private static IPatternTreeNode[] _fsiBeginPatternTree = PatternTrees.GetFsiBeginPattern(); + private static IPatternTreeNode[] _fsiEndPatternTree = PatternTrees.GetFsiEndPattern(); + + public static void RunPass(DecodedProgram program) + { + byte[] externalRegs = new byte[4]; + bool hasGetAddress = false; + + foreach (DecodedFunction function in program) + { + if (function == program.MainFunction) + { + continue; + } + + int externalReg4 = 0; + + TreeNode[] functionTree = BuildTree(function.Blocks); + + if (Matches(_fsiGetAddressTree, functionTree)) + { + externalRegs[1] = functionTree[0].GetRd(); + externalRegs[2] = functionTree[2].GetRd(); + externalRegs[3] = functionTree[1].GetRd(); + externalReg4 = functionTree[3].GetRd(); + } + else if (Matches(_fsiGetAddressV2Tree, functionTree)) + { + externalRegs[1] = functionTree[2].GetRd(); + externalRegs[2] = functionTree[1].GetRd(); + externalRegs[3] = functionTree[0].GetRd(); + externalReg4 = functionTree[3].GetRd(); + } + + // Ensure the register allocation is valid. + // If so, then we have a match. + if (externalRegs[1] != externalRegs[2] && + externalRegs[2] != externalRegs[3] && + externalRegs[1] != externalRegs[3] && + externalRegs[1] + 1 != externalRegs[2] && + externalRegs[1] + 1 != externalRegs[3] && + externalRegs[1] + 1 == externalReg4 && + externalRegs[2] != RegisterConsts.RegisterZeroIndex && + externalRegs[3] != RegisterConsts.RegisterZeroIndex && + externalReg4 != RegisterConsts.RegisterZeroIndex) + { + hasGetAddress = true; + function.Type = FunctionType.Unused; + break; + } + } + + foreach (DecodedFunction function in program) + { + if (function.IsCompilerGenerated || function == program.MainFunction) + { + continue; + } + + if (hasGetAddress) + { + TreeNode[] functionTree = BuildTree(function.Blocks); + + if (MatchesFsi(_fsiBeginPatternTree, program, function, functionTree, externalRegs)) + { + function.Type = FunctionType.BuiltInFSIBegin; + continue; + } + else if (MatchesFsi(_fsiEndPatternTree, program, function, functionTree, externalRegs)) + { + function.Type = FunctionType.BuiltInFSIEnd; + continue; + } + } + } + } + + private readonly struct TreeNodeUse + { + public TreeNode Node { get; } + public int Index { get; } + public bool Inverted { get; } + + private TreeNodeUse(int index, bool inverted, TreeNode node) + { + Index = index; + Inverted = inverted; + Node = node; + } + + public TreeNodeUse(int index, TreeNode node) : this(index, false, node) + { + } + + public TreeNodeUse Flip() + { + return new TreeNodeUse(Index, !Inverted, Node); + } + } + + private enum TreeNodeType : byte + { + Op, + Label + } + + private class TreeNode + { + public readonly InstOp Op; + public readonly List<TreeNodeUse> Uses; + public TreeNodeType Type { get; } + public byte Order { get; } + + public TreeNode(byte order) + { + Type = TreeNodeType.Label; + Order = order; + } + + public TreeNode(InstOp op, byte order) + { + Op = op; + Uses = new List<TreeNodeUse>(); + Type = TreeNodeType.Op; + Order = order; + } + + public byte GetPd() + { + return (byte)((Op.RawOpCode >> 3) & 7); + } + + public byte GetRd() + { + return (byte)Op.RawOpCode; + } + } + + private static TreeNode[] BuildTree(Block[] blocks) + { + List<TreeNode> nodes = new List<TreeNode>(); + + Dictionary<ulong, TreeNode> labels = new Dictionary<ulong, TreeNode>(); + + TreeNodeUse[] predDefs = new TreeNodeUse[RegisterConsts.PredsCount]; + TreeNodeUse[] gprDefs = new TreeNodeUse[RegisterConsts.GprsCount]; + + void DefPred(byte predIndex, int index, TreeNode node) + { + if (predIndex != RegisterConsts.PredicateTrueIndex) + { + predDefs[predIndex] = new TreeNodeUse(index, node); + } + } + + void DefGpr(byte regIndex, int index, TreeNode node) + { + if (regIndex != RegisterConsts.RegisterZeroIndex) + { + gprDefs[regIndex] = new TreeNodeUse(index, node); + } + } + + TreeNodeUse UsePred(byte predIndex, bool predInv) + { + if (predIndex != RegisterConsts.PredicateTrueIndex) + { + TreeNodeUse use = predDefs[predIndex]; + + if (use.Node != null) + { + nodes.Remove(use.Node); + } + else + { + use = new TreeNodeUse(-(predIndex + 2), null); + } + + return predInv ? use.Flip() : use; + } + + return new TreeNodeUse(-1, null); + } + + TreeNodeUse UseGpr(byte regIndex) + { + if (regIndex != RegisterConsts.RegisterZeroIndex) + { + TreeNodeUse use = gprDefs[regIndex]; + + if (use.Node != null) + { + nodes.Remove(use.Node); + } + else + { + use = new TreeNodeUse(-(regIndex + 2), null); + } + + return use; + } + + return new TreeNodeUse(-1, null); + } + + byte order = 0; + + for (int index = 0; index < blocks.Length; index++) + { + Block block = blocks[index]; + + if (block.Predecessors.Count > 1) + { + TreeNode label = new TreeNode(order++); + nodes.Add(label); + labels.Add(block.Address, label); + } + + for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++) + { + InstOp op = block.OpCodes[opIndex]; + + TreeNode node = new TreeNode(op, IsOrderDependant(op.Name) ? order : (byte)0); + + // Add uses. + + if (!op.Props.HasFlag(InstProps.NoPred)) + { + byte predIndex = (byte)((op.RawOpCode >> 16) & 7); + bool predInv = (op.RawOpCode & 0x80000) != 0; + node.Uses.Add(UsePred(predIndex, predInv)); + } + + if (op.Props.HasFlag(InstProps.Ps)) + { + byte predIndex = (byte)((op.RawOpCode >> 39) & 7); + bool predInv = (op.RawOpCode & 0x40000000000) != 0; + node.Uses.Add(UsePred(predIndex, predInv)); + } + + if (op.Props.HasFlag(InstProps.Ra)) + { + byte ra = (byte)(op.RawOpCode >> 8); + node.Uses.Add(UseGpr(ra)); + } + + if ((op.Props & (InstProps.Rb | InstProps.Rb2)) != 0) + { + byte rb = op.Props.HasFlag(InstProps.Rb2) ? (byte)op.RawOpCode : (byte)(op.RawOpCode >> 20); + node.Uses.Add(UseGpr(rb)); + } + + if (op.Props.HasFlag(InstProps.Rc)) + { + byte rc = (byte)(op.RawOpCode >> 39); + node.Uses.Add(UseGpr(rc)); + } + + if (op.Name == InstName.Bra && labels.TryGetValue(op.GetAbsoluteAddress(), out TreeNode label)) + { + node.Uses.Add(new TreeNodeUse(0, label)); + } + + // Make definitions. + + int defIndex = 0; + + InstProps pdType = op.Props & InstProps.PdMask; + + if (pdType != 0) + { + int bit = pdType switch + { + InstProps.Pd => 3, + InstProps.LPd => 48, + InstProps.SPd => 30, + InstProps.TPd => 51, + InstProps.VPd => 45, + _ => throw new InvalidOperationException($"Table has unknown predicate destination {pdType}.") + }; + + byte predIndex = (byte)((op.RawOpCode >> bit) & 7); + DefPred(predIndex, defIndex++, node); + } + + if (op.Props.HasFlag(InstProps.Rd)) + { + byte rd = (byte)op.RawOpCode; + DefGpr(rd, defIndex++, node); + } + + nodes.Add(node); + } + } + + return nodes.ToArray(); + } + + private static bool IsOrderDependant(InstName name) + { + switch (name) + { + case InstName.Atom: + case InstName.AtomCas: + case InstName.Atoms: + case InstName.AtomsCas: + case InstName.Ld: + case InstName.Ldg: + case InstName.Ldl: + case InstName.Lds: + case InstName.Suatom: + case InstName.SuatomB: + case InstName.SuatomB2: + case InstName.SuatomCas: + case InstName.SuatomCasB: + case InstName.Suld: + case InstName.SuldB: + case InstName.SuldD: + case InstName.SuldDB: + return true; + } + + return false; + } + + private interface IPatternTreeNode + { + List<PatternTreeNodeUse> Uses { get; } + InstName Name { get; } + TreeNodeType Type { get; } + byte Order { get; } + bool IsImm { get; } + bool Matches(in InstOp opInfo); + } + + private readonly struct PatternTreeNodeUse + { + public IPatternTreeNode Node { get; } + public int Index { get; } + public bool Inverted { get; } + public PatternTreeNodeUse Inv => new PatternTreeNodeUse(Index, !Inverted, Node); + + private PatternTreeNodeUse(int index, bool inverted, IPatternTreeNode node) + { + Index = index; + Inverted = inverted; + Node = node; + } + + public PatternTreeNodeUse(int index, IPatternTreeNode node) : this(index, false, node) + { + } + } + + private class PatternTreeNode<T> : IPatternTreeNode + { + public List<PatternTreeNodeUse> Uses { get; } + private readonly Func<T, bool> _match; + + public InstName Name { get; } + public TreeNodeType Type { get; } + public byte Order { get; } + public bool IsImm { get; } + public PatternTreeNodeUse Out => new PatternTreeNodeUse(0, this); + + public PatternTreeNode(InstName name, Func<T, bool> match, TreeNodeType type = TreeNodeType.Op, byte order = 0, bool isImm = false) + { + Name = name; + _match = match; + Type = type; + Order = order; + IsImm = isImm; + Uses = new List<PatternTreeNodeUse>(); + } + + public PatternTreeNode<T> Use(PatternTreeNodeUse use) + { + Uses.Add(use); + return this; + } + + public PatternTreeNodeUse OutAt(int index) + { + return new PatternTreeNodeUse(index, this); + } + + public bool Matches(in InstOp opInfo) + { + if (opInfo.Name != Name) + { + return false; + } + + ulong rawOp = opInfo.RawOpCode; + T op = Unsafe.As<ulong, T>(ref rawOp); + + if (!_match(op)) + { + return false; + } + + return true; + } + } + + private static bool MatchesFsi( + IPatternTreeNode[] pattern, + DecodedProgram program, + DecodedFunction function, + TreeNode[] functionTree, + byte[] externalRegs) + { + if (function.Blocks.Length == 0) + { + return false; + } + + InstOp callOp = function.Blocks[0].GetLastOp(); + + if (callOp.Name != InstName.Cal) + { + return false; + } + + DecodedFunction callTarget = program.GetFunctionByAddress(callOp.GetAbsoluteAddress()); + TreeNode[] callTargetTree = null; + + if (callTarget == null || !Matches(_fsiIsLastWarpThreadPatternTree, callTargetTree = BuildTree(callTarget.Blocks))) + { + return false; + } + + externalRegs[0] = callTargetTree[0].GetPd(); + + if (Matches(pattern, functionTree, externalRegs)) + { + callTarget.RemoveCaller(function); + return true; + } + + return false; + } + + private static bool Matches(IPatternTreeNode[] pTree, TreeNode[] cTree, byte[] externalRegs = null) + { + if (pTree.Length != cTree.Length) + { + return false; + } + + for (int index = 0; index < pTree.Length; index++) + { + if (!Matches(pTree[index], cTree[index], externalRegs)) + { + return false; + } + } + + return true; + } + + private static bool Matches(IPatternTreeNode pTreeNode, TreeNode cTreeNode, byte[] externalRegs) + { + if (!pTreeNode.Matches(in cTreeNode.Op) || + pTreeNode.Type != cTreeNode.Type || + pTreeNode.Order != cTreeNode.Order || + pTreeNode.IsImm != cTreeNode.Op.Props.HasFlag(InstProps.Ib)) + { + return false; + } + + if (pTreeNode.Type == TreeNodeType.Op) + { + if (pTreeNode.Uses.Count != cTreeNode.Uses.Count) + { + return false; + } + + for (int index = 0; index < pTreeNode.Uses.Count; index++) + { + var pUse = pTreeNode.Uses[index]; + var cUse = cTreeNode.Uses[index]; + + if (pUse.Index <= -2) + { + if (externalRegs[-pUse.Index - 2] != (-cUse.Index - 2)) + { + return false; + } + } + else if (pUse.Index != cUse.Index) + { + return false; + } + + if (pUse.Inverted != cUse.Inverted || (pUse.Node == null) != (cUse.Node == null)) + { + return false; + } + + if (pUse.Node != null && !Matches(pUse.Node, cUse.Node, externalRegs)) + { + return false; + } + } + } + + return true; + } + + private static class PatternTrees + { + public static IPatternTreeNode[] GetFsiGetAddress() + { + var affinityValue = S2r(SReg.Affinity).Use(PT).Out; + var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out; + + return new IPatternTreeNode[] + { + Iscadd(cc: true, 2, 0, 404) + .Use(PT) + .Use(Iscadd(cc: false, 8) + .Use(PT) + .Use(Lop32i(LogicOp.And, 0xff) + .Use(PT) + .Use(affinityValue).Out) + .Use(Lop32i(LogicOp.And, 0xff) + .Use(PT) + .Use(orderingTicketValue).Out).Out), + ShrU32W(16) + .Use(PT) + .Use(orderingTicketValue), + Iadd32i(0x200) + .Use(PT) + .Use(Lop32i(LogicOp.And, 0xfe00) + .Use(PT) + .Use(orderingTicketValue).Out), + Iadd(x: true, 0, 405).Use(PT).Use(RZ), + Ret().Use(PT) + }; + } + + public static IPatternTreeNode[] GetFsiGetAddressV2() + { + var affinityValue = S2r(SReg.Affinity).Use(PT).Out; + var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out; + + return new IPatternTreeNode[] + { + ShrU32W(16) + .Use(PT) + .Use(orderingTicketValue), + Iadd32i(0x200) + .Use(PT) + .Use(Lop32i(LogicOp.And, 0xfe00) + .Use(PT) + .Use(orderingTicketValue).Out), + Iscadd(cc: true, 2, 0, 404) + .Use(PT) + .Use(Bfi(0x808) + .Use(PT) + .Use(affinityValue) + .Use(Lop32i(LogicOp.And, 0xff) + .Use(PT) + .Use(orderingTicketValue).Out).Out), + Iadd(x: true, 0, 405).Use(PT).Use(RZ), + Ret().Use(PT) + }; + } + + public static IPatternTreeNode[] GetFsiIsLastWarpThread() + { + var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out; + var laneIdValue = S2r(SReg.LaneId).Use(PT).Out; + + return new IPatternTreeNode[] + { + IsetpU32(IComp.Eq) + .Use(PT) + .Use(PT) + .Use(FloU32() + .Use(PT) + .Use(Vote(VoteMode.Any) + .Use(PT) + .Use(IsetpU32(IComp.Ne) + .Use(PT) + .Use(PT) + .Use(Lop(negB: true, LogicOp.PassB) + .Use(PT) + .Use(RZ) + .Use(threadKillValue).OutAt(1)) + .Use(RZ).Out).OutAt(1)).Out) + .Use(laneIdValue), + Ret().Use(PT) + }; + } + + public static IPatternTreeNode[] GetFsiBeginPattern() + { + var addressLowValue = CallArg(1); + + static PatternTreeNodeUse HighU16Equals(PatternTreeNodeUse x) + { + var expectedValue = CallArg(3); + + return IsetpU32(IComp.Eq) + .Use(PT) + .Use(PT) + .Use(ShrU32W(16).Use(PT).Use(x).Out) + .Use(expectedValue).Out; + } + + PatternTreeNode<byte> label; + + return new IPatternTreeNode[] + { + Cal(), + Ret().Use(CallArg(0).Inv), + Ret() + .Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32) + .Use(PT) + .Use(addressLowValue).Out)), + label = Label(), + Bra() + .Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32, 1) + .Use(PT) + .Use(addressLowValue).Out).Inv) + .Use(label.Out), + Ret().Use(PT) + }; + } + + public static IPatternTreeNode[] GetFsiEndPattern() + { + var voteResult = Vote(VoteMode.All).Use(PT).Use(PT).OutAt(1); + var popcResult = Popc().Use(PT).Use(voteResult).Out; + var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out; + var laneIdValue = S2r(SReg.LaneId).Use(PT).Out; + + var addressLowValue = CallArg(1); + var incrementValue = CallArg(2); + + return new IPatternTreeNode[] + { + Cal(), + Ret().Use(CallArg(0).Inv), + Membar(Decoders.Membar.Vc).Use(PT), + Ret().Use(IsetpU32(IComp.Ne) + .Use(PT) + .Use(PT) + .Use(threadKillValue) + .Use(RZ).Out), + RedE(RedOp.Add, AtomSize.U32) + .Use(IsetpU32(IComp.Eq) + .Use(PT) + .Use(PT) + .Use(FloU32() + .Use(PT) + .Use(voteResult).Out) + .Use(laneIdValue).Out) + .Use(addressLowValue) + .Use(Xmad(XmadCop.Cbcc, psl: true, hiloA: true, hiloB: true) + .Use(PT) + .Use(incrementValue) + .Use(Xmad(XmadCop.Cfull, mrg: true, hiloB: true) + .Use(PT) + .Use(incrementValue) + .Use(popcResult) + .Use(RZ).Out) + .Use(Xmad(XmadCop.Cfull) + .Use(PT) + .Use(incrementValue) + .Use(popcResult) + .Use(RZ).Out).Out), + Ret().Use(PT) + }; + } + + private static PatternTreeNode<InstBfiI> Bfi(int imm) + { + return new(InstName.Bfi, (op) => !op.WriteCC && op.Imm20 == imm, isImm: true); + } + + private static PatternTreeNode<InstBra> Bra() + { + return new(InstName.Bra, (op) => op.Ccc == Ccc.T && !op.Ca); + } + + private static PatternTreeNode<InstCal> Cal() + { + return new(InstName.Cal, (op) => !op.Ca && op.Inc); + } + + private static PatternTreeNode<InstFloR> FloU32() + { + return new(InstName.Flo, (op) => !op.Signed && !op.Sh && !op.NegB && !op.WriteCC); + } + + private static PatternTreeNode<InstIaddC> Iadd(bool x, int cbufSlot, int cbufOffset) + { + return new(InstName.Iadd, (op) => + !op.Sat && + !op.WriteCC && + op.X == x && + op.AvgMode == AvgMode.NoNeg && + op.CbufSlot == cbufSlot && + op.CbufOffset == cbufOffset); + } + + private static PatternTreeNode<InstIadd32i> Iadd32i(int imm) + { + return new(InstName.Iadd32i, (op) => !op.Sat && !op.WriteCC && !op.X && op.AvgMode == AvgMode.NoNeg && op.Imm32 == imm); + } + + private static PatternTreeNode<InstIscaddR> Iscadd(bool cc, int imm) + { + return new(InstName.Iscadd, (op) => op.WriteCC == cc && op.AvgMode == AvgMode.NoNeg && op.Imm5 == imm); + } + + private static PatternTreeNode<InstIscaddC> Iscadd(bool cc, int imm, int cbufSlot, int cbufOffset) + { + return new(InstName.Iscadd, (op) => + op.WriteCC == cc && + op.AvgMode == AvgMode.NoNeg && + op.Imm5 == imm && + op.CbufSlot == cbufSlot && + op.CbufOffset == cbufOffset); + } + + private static PatternTreeNode<InstIsetpR> IsetpU32(IComp comp) + { + return new(InstName.Isetp, (op) => !op.Signed && op.IComp == comp && op.Bop == BoolOp.And); + } + + private static PatternTreeNode<byte> Label() + { + return new(InstName.Invalid, (op) => true, type: TreeNodeType.Label); + } + + private static PatternTreeNode<InstLopR> Lop(bool negB, LogicOp logicOp) + { + return new(InstName.Lop, (op) => !op.NegA && op.NegB == negB && !op.WriteCC && !op.X && op.Lop == logicOp && op.PredicateOp == PredicateOp.F); + } + + private static PatternTreeNode<InstLop32i> Lop32i(LogicOp logicOp, int imm) + { + return new(InstName.Lop32i, (op) => !op.NegA && !op.NegB && !op.X && !op.WriteCC && op.LogicOp == logicOp && op.Imm32 == imm); + } + + private static PatternTreeNode<InstMembar> Membar(Membar membar) + { + return new(InstName.Membar, (op) => op.Membar == membar); + } + + private static PatternTreeNode<InstPopcR> Popc() + { + return new(InstName.Popc, (op) => !op.NegB); + } + + private static PatternTreeNode<InstRet> Ret() + { + return new(InstName.Ret, (op) => op.Ccc == Ccc.T); + } + + private static PatternTreeNode<InstS2r> S2r(SReg reg) + { + return new(InstName.S2r, (op) => op.SReg == reg); + } + + private static PatternTreeNode<InstShrI> ShrU32W(int imm) + { + return new(InstName.Shr, (op) => !op.Signed && !op.Brev && op.M && op.XMode == 0 && op.Imm20 == imm, isImm: true); + } + + private static PatternTreeNode<InstLdg> LdgE(CacheOpLd cacheOp, LsSize size, byte order = 0) + { + return new(InstName.Ldg, (op) => op.E && op.CacheOp == cacheOp && op.LsSize == size, order: order); + } + + private static PatternTreeNode<InstRed> RedE(RedOp redOp, AtomSize size, byte order = 0) + { + return new(InstName.Red, (op) => op.E && op.RedOp == redOp && op.RedSize == size, order: order); + } + + private static PatternTreeNode<InstVote> Vote(VoteMode mode) + { + return new(InstName.Vote, (op) => op.VoteMode == mode); + } + + private static PatternTreeNode<InstXmadR> Xmad(XmadCop cop, bool psl = false, bool mrg = false, bool hiloA = false, bool hiloB = false) + { + return new(InstName.Xmad, (op) => op.XmadCop == cop && op.Psl == psl && op.Mrg == mrg && op.HiloA == hiloA && op.HiloB == hiloB); + } + + private static PatternTreeNodeUse PT => PTOrRZ(); + private static PatternTreeNodeUse RZ => PTOrRZ(); + private static PatternTreeNodeUse Undef => new PatternTreeNodeUse(0, null); + + private static PatternTreeNodeUse CallArg(int index) + { + return new PatternTreeNodeUse(-(index + 2), null); + } + + private static PatternTreeNodeUse PTOrRZ() + { + return new PatternTreeNodeUse(-1, null); + } + } + + private static void PrintTreeNode(TreeNode node, string indentation) + { + Console.WriteLine($" {node.Op.Name}"); + + for (int i = 0; i < node.Uses.Count; i++) + { + TreeNodeUse use = node.Uses[i]; + bool last = i == node.Uses.Count - 1; + char separator = last ? '`' : '|'; + + if (use.Node != null) + { + Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})"); + PrintTreeNode(use.Node, indentation + (last ? " " : " | ")); + } + else + { + Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL"); + } + } + } + + private static void PrintTreeNode(IPatternTreeNode node, string indentation) + { + Console.WriteLine($" {node.Name}"); + + for (int i = 0; i < node.Uses.Count; i++) + { + PatternTreeNodeUse use = node.Uses[i]; + bool last = i == node.Uses.Count - 1; + char separator = last ? '`' : '|'; + + if (use.Node != null) + { + Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})"); + PrintTreeNode(use.Node, indentation + (last ? " " : " | ")); + } + else + { + Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL"); + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs new file mode 100644 index 00000000..774a128d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs @@ -0,0 +1,52 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class GlobalMemory + { + private const int StorageDescsBaseOffset = 0x44; // In words. + + public const int StorageDescSize = 4; // In words. + public const int StorageMaxCount = 16; + + public const int StorageDescsSize = StorageDescSize * StorageMaxCount; + + public const int UbeBaseOffset = 0x98; // In words. + public const int UbeMaxCount = 9; + public const int UbeDescsSize = StorageDescSize * UbeMaxCount; + public const int UbeFirstCbuf = 8; + + public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind) + { + return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) || + inst == Instruction.LoadGlobal || + inst == Instruction.StoreGlobal || + inst == Instruction.StoreGlobal16 || + inst == Instruction.StoreGlobal8; + } + + public static int GetStorageCbOffset(ShaderStage stage, int slot) + { + return GetStorageBaseCbOffset(stage) + slot * StorageDescSize; + } + + public static int GetStorageBaseCbOffset(ShaderStage stage) + { + return stage switch + { + ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize, + ShaderStage.Vertex => StorageDescsBaseOffset, + ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize, + ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize, + ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize, + ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize, + _ => 0 + }; + } + + public static int GetConstantUbeOffset(int slot) + { + return UbeBaseOffset + slot * StorageDescSize; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs new file mode 100644 index 00000000..0c196c4d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs @@ -0,0 +1,263 @@ +using Ryujinx.Graphics.Shader.Instructions; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + class BindlessElimination + { + public static void RunPass(BasicBlock block, ShaderConfig config) + { + // We can turn a bindless into regular access by recognizing the pattern + // produced by the compiler for separate texture and sampler. + // We check for the following conditions: + // - The handle is a constant buffer value. + // - The handle is the result of a bitwise OR logical operation. + // - Both sources of the OR operation comes from a constant buffer. + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (!(node.Value is TextureOperation texOp)) + { + continue; + } + + if ((texOp.Flags & TextureFlags.Bindless) == 0) + { + continue; + } + + if (texOp.Inst == Instruction.Lod || + texOp.Inst == Instruction.TextureSample || + texOp.Inst == Instruction.TextureSize) + { + Operand bindlessHandle = Utils.FindLastOperation(texOp.GetSource(0), block); + + // Some instructions do not encode an accurate sampler type: + // - Most instructions uses the same type for 1D and Buffer. + // - Query instructions may not have any type. + // For those cases, we need to try getting the type from current GPU state, + // as long bindless elimination is successful and we know where the texture descriptor is located. + bool rewriteSamplerType = + texOp.Type == SamplerType.TextureBuffer || + texOp.Inst == Instruction.TextureSize; + + if (bindlessHandle.Type == OperandType.ConstantBuffer) + { + SetHandle(config, texOp, bindlessHandle.GetCbufOffset(), bindlessHandle.GetCbufSlot(), rewriteSamplerType, isImage: false); + continue; + } + + if (!(bindlessHandle.AsgOp is Operation handleCombineOp)) + { + continue; + } + + if (handleCombineOp.Inst != Instruction.BitwiseOr) + { + continue; + } + + Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block); + Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block); + + // For cases where we have a constant, ensure that the constant is always + // the second operand. + // Since this is a commutative operation, both are fine, + // and having a "canonical" representation simplifies some checks below. + if (src0.Type == OperandType.Constant && src1.Type != OperandType.Constant) + { + Operand temp = src1; + src1 = src0; + src0 = temp; + } + + TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle; + + // Try to match the following patterns: + // Masked pattern: + // - samplerHandle = samplerHandle & 0xFFF00000; + // - textureHandle = textureHandle & 0xFFFFF; + // - combinedHandle = samplerHandle | textureHandle; + // Where samplerHandle and textureHandle comes from a constant buffer. + // Shifted pattern: + // - samplerHandle = samplerId << 20; + // - combinedHandle = samplerHandle | textureHandle; + // Where samplerId and textureHandle comes from a constant buffer. + // Constant pattern: + // - combinedHandle = samplerHandleConstant | textureHandle; + // Where samplerHandleConstant is a constant value, and textureHandle comes from a constant buffer. + if (src0.AsgOp is Operation src0AsgOp) + { + if (src1.AsgOp is Operation src1AsgOp && + src0AsgOp.Inst == Instruction.BitwiseAnd && + src1AsgOp.Inst == Instruction.BitwiseAnd) + { + src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF); + src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000); + + // The OR operation is commutative, so we can also try to swap the operands to get a match. + if (src0 == null || src1 == null) + { + src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF); + src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000); + } + + if (src0 == null || src1 == null) + { + continue; + } + } + else if (src0AsgOp.Inst == Instruction.ShiftLeft) + { + Operand shift = src0AsgOp.GetSource(1); + + if (shift.Type == OperandType.Constant && shift.Value == 20) + { + src0 = src1; + src1 = src0AsgOp.GetSource(0); + handleType = TextureHandleType.SeparateSamplerId; + } + } + } + else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft) + { + Operand shift = src1AsgOp.GetSource(1); + + if (shift.Type == OperandType.Constant && shift.Value == 20) + { + src1 = src1AsgOp.GetSource(0); + handleType = TextureHandleType.SeparateSamplerId; + } + } + else if (src1.Type == OperandType.Constant && (src1.Value & 0xfffff) == 0) + { + handleType = TextureHandleType.SeparateConstantSamplerHandle; + } + + if (src0.Type != OperandType.ConstantBuffer) + { + continue; + } + + if (handleType == TextureHandleType.SeparateConstantSamplerHandle) + { + SetHandle( + config, + texOp, + TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType), + TextureHandle.PackSlots(src0.GetCbufSlot(), 0), + rewriteSamplerType, + isImage: false); + } + else if (src1.Type == OperandType.ConstantBuffer) + { + SetHandle( + config, + texOp, + TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType), + TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()), + rewriteSamplerType, + isImage: false); + } + } + else if (texOp.Inst == Instruction.ImageLoad || + texOp.Inst == Instruction.ImageStore || + texOp.Inst == Instruction.ImageAtomic) + { + Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block); + + if (src0.Type == OperandType.ConstantBuffer) + { + int cbufOffset = src0.GetCbufOffset(); + int cbufSlot = src0.GetCbufSlot(); + + if (texOp.Format == TextureFormat.Unknown) + { + if (texOp.Inst == Instruction.ImageAtomic) + { + texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot); + } + else + { + texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot); + } + } + + bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer; + + SetHandle(config, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true); + } + } + } + } + + private static Operand GetSourceForMaskedHandle(Operation asgOp, uint mask) + { + // Assume it was already checked that the operation is bitwise AND. + Operand src0 = asgOp.GetSource(0); + Operand src1 = asgOp.GetSource(1); + + if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.ConstantBuffer) + { + // We can't check if the mask matches here as both operands are from a constant buffer. + // Be optimistic and assume it matches. Avoid constant buffer 1 as official drivers + // uses this one to store compiler constants. + return src0.GetCbufSlot() == 1 ? src1 : src0; + } + else if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.Constant) + { + if ((uint)src1.Value == mask) + { + return src0; + } + } + else if (src0.Type == OperandType.Constant && src1.Type == OperandType.ConstantBuffer) + { + if ((uint)src0.Value == mask) + { + return src1; + } + } + + return null; + } + + private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType, bool isImage) + { + texOp.SetHandle(cbufOffset, cbufSlot); + + if (rewriteSamplerType) + { + SamplerType newType = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot); + + if (texOp.Inst.IsTextureQuery()) + { + texOp.Type = newType; + } + else if (texOp.Type == SamplerType.TextureBuffer && newType == SamplerType.Texture1D) + { + int coordsCount = 1; + + if (InstEmit.Sample1DAs2D) + { + newType = SamplerType.Texture2D; + texOp.InsertSource(coordsCount++, OperandHelper.Const(0)); + } + + if (!isImage && + (texOp.Flags & TextureFlags.IntCoords) != 0 && + (texOp.Flags & TextureFlags.LodLevel) == 0) + { + // IntCoords textures must always have explicit LOD. + texOp.SetLodLevelFlag(); + texOp.InsertSource(coordsCount, OperandHelper.Const(0)); + } + + texOp.Type = newType; + } + } + + config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, cbufSlot, cbufOffset); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs new file mode 100644 index 00000000..ca46a1f5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs @@ -0,0 +1,85 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class BindlessToIndexed + { + public static void RunPass(BasicBlock block, ShaderConfig config) + { + // We can turn a bindless texture access into a indexed access, + // as long the following conditions are true: + // - The handle is loaded using a LDC instruction. + // - The handle is loaded from the constant buffer with the handles (CB2 for NVN). + // - The load has a constant offset. + // The base offset of the array of handles on the constant buffer is the constant offset. + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (!(node.Value is TextureOperation texOp)) + { + continue; + } + + if ((texOp.Flags & TextureFlags.Bindless) == 0) + { + continue; + } + + if (!(texOp.GetSource(0).AsgOp is Operation handleAsgOp)) + { + continue; + } + + if (handleAsgOp.Inst != Instruction.LoadConstant) + { + continue; + } + + Operand ldcSrc0 = handleAsgOp.GetSource(0); + Operand ldcSrc1 = handleAsgOp.GetSource(1); + + if (ldcSrc0.Type != OperandType.Constant || ldcSrc0.Value != 2) + { + continue; + } + + if (!(ldcSrc1.AsgOp is Operation shrOp) || shrOp.Inst != Instruction.ShiftRightU32) + { + continue; + } + + if (!(shrOp.GetSource(0).AsgOp is Operation addOp) || addOp.Inst != Instruction.Add) + { + continue; + } + + Operand addSrc1 = addOp.GetSource(1); + + if (addSrc1.Type != OperandType.Constant) + { + continue; + } + + TurnIntoIndexed(config, texOp, addSrc1.Value / 4); + + Operand index = Local(); + + Operand source = addOp.GetSource(0); + + Operation shrBy3 = new Operation(Instruction.ShiftRightU32, index, source, Const(3)); + + block.Operations.AddBefore(node, shrBy3); + + texOp.SetSource(0, index); + } + } + + private static void TurnIntoIndexed(ShaderConfig config, TextureOperation texOp, int handle) + { + texOp.TurnIntoIndexed(handle); + config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, handle); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs new file mode 100644 index 00000000..c87d1474 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs @@ -0,0 +1,64 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class BranchElimination + { + public static bool RunPass(BasicBlock block) + { + if (block.HasBranch && IsRedundantBranch((Operation)block.GetLastOp(), Next(block))) + { + block.Branch = null; + + return true; + } + + return false; + } + + private static bool IsRedundantBranch(Operation current, BasicBlock nextBlock) + { + // Here we check that: + // - The current block ends with a branch. + // - The next block only contains a branch. + // - The branch on the next block is unconditional. + // - Both branches are jumping to the same location. + // In this case, the branch on the current block can be removed, + // as the next block is going to jump to the same place anyway. + if (nextBlock == null) + { + return false; + } + + if (!(nextBlock.Operations.First?.Value is Operation next)) + { + return false; + } + + if (next.Inst != Instruction.Branch) + { + return false; + } + + return current.Dest == next.Dest; + } + + private static BasicBlock Next(BasicBlock block) + { + block = block.Next; + + while (block != null && block.Operations.Count == 0) + { + if (block.HasBranch) + { + throw new InvalidOperationException("Found a bogus empty block that \"ends with a branch\"."); + } + + block = block.Next; + } + + return block; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs new file mode 100644 index 00000000..6729f077 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs @@ -0,0 +1,346 @@ +using Ryujinx.Common.Utilities; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class ConstantFolding + { + public static void RunPass(Operation operation) + { + if (!AreAllSourcesConstant(operation)) + { + return; + } + + switch (operation.Inst) + { + case Instruction.Add: + EvaluateBinary(operation, (x, y) => x + y); + break; + + case Instruction.BitCount: + EvaluateUnary(operation, (x) => BitCount(x)); + break; + + case Instruction.BitwiseAnd: + EvaluateBinary(operation, (x, y) => x & y); + break; + + case Instruction.BitwiseExclusiveOr: + EvaluateBinary(operation, (x, y) => x ^ y); + break; + + case Instruction.BitwiseNot: + EvaluateUnary(operation, (x) => ~x); + break; + + case Instruction.BitwiseOr: + EvaluateBinary(operation, (x, y) => x | y); + break; + + case Instruction.BitfieldExtractS32: + BitfieldExtractS32(operation); + break; + + case Instruction.BitfieldExtractU32: + BitfieldExtractU32(operation); + break; + + case Instruction.Clamp: + EvaluateTernary(operation, (x, y, z) => Math.Clamp(x, y, z)); + break; + + case Instruction.ClampU32: + EvaluateTernary(operation, (x, y, z) => (int)Math.Clamp((uint)x, (uint)y, (uint)z)); + break; + + case Instruction.CompareEqual: + EvaluateBinary(operation, (x, y) => x == y); + break; + + case Instruction.CompareGreater: + EvaluateBinary(operation, (x, y) => x > y); + break; + + case Instruction.CompareGreaterOrEqual: + EvaluateBinary(operation, (x, y) => x >= y); + break; + + case Instruction.CompareGreaterOrEqualU32: + EvaluateBinary(operation, (x, y) => (uint)x >= (uint)y); + break; + + case Instruction.CompareGreaterU32: + EvaluateBinary(operation, (x, y) => (uint)x > (uint)y); + break; + + case Instruction.CompareLess: + EvaluateBinary(operation, (x, y) => x < y); + break; + + case Instruction.CompareLessOrEqual: + EvaluateBinary(operation, (x, y) => x <= y); + break; + + case Instruction.CompareLessOrEqualU32: + EvaluateBinary(operation, (x, y) => (uint)x <= (uint)y); + break; + + case Instruction.CompareLessU32: + EvaluateBinary(operation, (x, y) => (uint)x < (uint)y); + break; + + case Instruction.CompareNotEqual: + EvaluateBinary(operation, (x, y) => x != y); + break; + + case Instruction.Divide: + EvaluateBinary(operation, (x, y) => y != 0 ? x / y : 0); + break; + + case Instruction.FP32 | Instruction.Add: + EvaluateFPBinary(operation, (x, y) => x + y); + break; + + case Instruction.FP32 | Instruction.Clamp: + EvaluateFPTernary(operation, (x, y, z) => Math.Clamp(x, y, z)); + break; + + case Instruction.FP32 | Instruction.CompareEqual: + EvaluateFPBinary(operation, (x, y) => x == y); + break; + + case Instruction.FP32 | Instruction.CompareGreater: + EvaluateFPBinary(operation, (x, y) => x > y); + break; + + case Instruction.FP32 | Instruction.CompareGreaterOrEqual: + EvaluateFPBinary(operation, (x, y) => x >= y); + break; + + case Instruction.FP32 | Instruction.CompareLess: + EvaluateFPBinary(operation, (x, y) => x < y); + break; + + case Instruction.FP32 | Instruction.CompareLessOrEqual: + EvaluateFPBinary(operation, (x, y) => x <= y); + break; + + case Instruction.FP32 | Instruction.CompareNotEqual: + EvaluateFPBinary(operation, (x, y) => x != y); + break; + + case Instruction.FP32 | Instruction.Divide: + EvaluateFPBinary(operation, (x, y) => x / y); + break; + + case Instruction.FP32 | Instruction.Multiply: + EvaluateFPBinary(operation, (x, y) => x * y); + break; + + case Instruction.FP32 | Instruction.Negate: + EvaluateFPUnary(operation, (x) => -x); + break; + + case Instruction.FP32 | Instruction.Subtract: + EvaluateFPBinary(operation, (x, y) => x - y); + break; + + case Instruction.IsNan: + EvaluateFPUnary(operation, (x) => float.IsNaN(x)); + break; + + case Instruction.LoadConstant: + operation.TurnIntoCopy(Cbuf(operation.GetSource(0).Value, operation.GetSource(1).Value)); + break; + + case Instruction.Maximum: + EvaluateBinary(operation, (x, y) => Math.Max(x, y)); + break; + + case Instruction.MaximumU32: + EvaluateBinary(operation, (x, y) => (int)Math.Max((uint)x, (uint)y)); + break; + + case Instruction.Minimum: + EvaluateBinary(operation, (x, y) => Math.Min(x, y)); + break; + + case Instruction.MinimumU32: + EvaluateBinary(operation, (x, y) => (int)Math.Min((uint)x, (uint)y)); + break; + + case Instruction.Multiply: + EvaluateBinary(operation, (x, y) => x * y); + break; + + case Instruction.Negate: + EvaluateUnary(operation, (x) => -x); + break; + + case Instruction.ShiftLeft: + EvaluateBinary(operation, (x, y) => x << y); + break; + + case Instruction.ShiftRightS32: + EvaluateBinary(operation, (x, y) => x >> y); + break; + + case Instruction.ShiftRightU32: + EvaluateBinary(operation, (x, y) => (int)((uint)x >> y)); + break; + + case Instruction.Subtract: + EvaluateBinary(operation, (x, y) => x - y); + break; + + case Instruction.UnpackHalf2x16: + UnpackHalf2x16(operation); + break; + } + } + + private static bool AreAllSourcesConstant(Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + if (operation.GetSource(index).Type != OperandType.Constant) + { + return false; + } + } + + return true; + } + + private static int BitCount(int value) + { + int count = 0; + + for (int bit = 0; bit < 32; bit++) + { + if (value.Extract(bit)) + { + count++; + } + } + + return count; + } + + private static void BitfieldExtractS32(Operation operation) + { + int value = GetBitfieldExtractValue(operation); + + int shift = 32 - operation.GetSource(2).Value; + + value = (value << shift) >> shift; + + operation.TurnIntoCopy(Const(value)); + } + + private static void BitfieldExtractU32(Operation operation) + { + operation.TurnIntoCopy(Const(GetBitfieldExtractValue(operation))); + } + + private static int GetBitfieldExtractValue(Operation operation) + { + int value = operation.GetSource(0).Value; + int lsb = operation.GetSource(1).Value; + int length = operation.GetSource(2).Value; + + return value.Extract(lsb, length); + } + + private static void UnpackHalf2x16(Operation operation) + { + int value = operation.GetSource(0).Value; + + value = (value >> operation.Index * 16) & 0xffff; + + operation.TurnIntoCopy(ConstF((float)BitConverter.UInt16BitsToHalf((ushort)value))); + } + + private static void FPNegate(Operation operation) + { + float value = operation.GetSource(0).AsFloat(); + + operation.TurnIntoCopy(ConstF(-value)); + } + + private static void EvaluateUnary(Operation operation, Func<int, int> op) + { + int x = operation.GetSource(0).Value; + + operation.TurnIntoCopy(Const(op(x))); + } + + private static void EvaluateFPUnary(Operation operation, Func<float, float> op) + { + float x = operation.GetSource(0).AsFloat(); + + operation.TurnIntoCopy(ConstF(op(x))); + } + + private static void EvaluateFPUnary(Operation operation, Func<float, bool> op) + { + float x = operation.GetSource(0).AsFloat(); + + operation.TurnIntoCopy(Const(op(x) ? IrConsts.True : IrConsts.False)); + } + + private static void EvaluateBinary(Operation operation, Func<int, int, int> op) + { + int x = operation.GetSource(0).Value; + int y = operation.GetSource(1).Value; + + operation.TurnIntoCopy(Const(op(x, y))); + } + + private static void EvaluateBinary(Operation operation, Func<int, int, bool> op) + { + int x = operation.GetSource(0).Value; + int y = operation.GetSource(1).Value; + + operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False)); + } + + private static void EvaluateFPBinary(Operation operation, Func<float, float, float> op) + { + float x = operation.GetSource(0).AsFloat(); + float y = operation.GetSource(1).AsFloat(); + + operation.TurnIntoCopy(ConstF(op(x, y))); + } + + private static void EvaluateFPBinary(Operation operation, Func<float, float, bool> op) + { + float x = operation.GetSource(0).AsFloat(); + float y = operation.GetSource(1).AsFloat(); + + operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False)); + } + + private static void EvaluateTernary(Operation operation, Func<int, int, int, int> op) + { + int x = operation.GetSource(0).Value; + int y = operation.GetSource(1).Value; + int z = operation.GetSource(2).Value; + + operation.TurnIntoCopy(Const(op(x, y, z))); + } + + private static void EvaluateFPTernary(Operation operation, Func<float, float, float, float> op) + { + float x = operation.GetSource(0).AsFloat(); + float y = operation.GetSource(1).AsFloat(); + float z = operation.GetSource(2).AsFloat(); + + operation.TurnIntoCopy(ConstF(op(x, y, z))); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs new file mode 100644 index 00000000..2a4070e0 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs @@ -0,0 +1,433 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; +using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class GlobalToStorage + { + public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask) + { + int sbStart = GetStorageBaseCbOffset(config.Stage); + int sbEnd = sbStart + StorageDescsSize; + + int ubeStart = UbeBaseOffset; + int ubeEnd = UbeBaseOffset + UbeDescsSize; + + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + for (int index = 0; index < node.Value.SourcesCount; index++) + { + Operand src = node.Value.GetSource(index); + + int storageIndex = GetStorageIndex(src, sbStart, sbEnd); + + if (storageIndex >= 0) + { + sbUseMask |= 1 << storageIndex; + } + + if (config.Stage == ShaderStage.Compute) + { + int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd); + + if (constantIndex >= 0) + { + ubeUseMask |= 1 << constantIndex; + } + } + } + + if (!(node.Value is Operation operation)) + { + continue; + } + + if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) + { + Operand source = operation.GetSource(0); + + int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd); + + if (storageIndex >= 0) + { + // Storage buffers are implemented using global memory access. + // If we know from where the base address of the access is loaded, + // we can guess which storage buffer it is accessing. + // We can then replace the global memory access with a storage + // buffer access. + node = ReplaceGlobalWithStorage(block, node, config, storageIndex); + } + else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal) + { + // Here we effectively try to replace a LDG instruction with LDC. + // The hardware only supports a limited amount of constant buffers + // so NVN "emulates" more constant buffers using global memory access. + // Here we try to replace the global access back to a constant buffer + // load. + storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd); + + if (storageIndex >= 0) + { + node = ReplaceLdgWithLdc(node, config, storageIndex); + } + } + } + } + + config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); + } + + private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex) + { + Operation operation = (Operation)node.Value; + + bool isAtomic = operation.Inst.IsAtomic(); + bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; + bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; + + config.SetUsedStorageBuffer(storageIndex, isWrite); + + Operand[] sources = new Operand[operation.SourcesCount]; + + sources[0] = Const(storageIndex); + sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8); + + for (int index = 2; index < operation.SourcesCount; index++) + { + sources[index] = operation.GetSource(index); + } + + Operation storageOp; + + if (isAtomic) + { + storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); + } + else if (operation.Inst == Instruction.LoadGlobal) + { + storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); + } + else + { + Instruction storeInst = operation.Inst switch + { + Instruction.StoreGlobal16 => Instruction.StoreStorage16, + Instruction.StoreGlobal8 => Instruction.StoreStorage8, + _ => Instruction.StoreStorage + }; + + storageOp = new Operation(storeInst, null, sources); + } + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + LinkedListNode<INode> oldNode = node; + + node = node.List.AddBefore(node, storageOp); + + node.List.Remove(oldNode); + + return node; + } + + private static Operand GetStorageOffset( + BasicBlock block, + LinkedListNode<INode> node, + ShaderConfig config, + int storageIndex, + Operand addrLow, + bool isStg16Or8) + { + int baseAddressCbOffset = GetStorageCbOffset(config.Stage, storageIndex); + + bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment); + + (Operand byteOffset, int constantOffset) = storageAligned ? + GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), baseAddressCbOffset) : + (null, 0); + + if (byteOffset != null) + { + ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset); + } + + if (byteOffset == null) + { + Operand baseAddrLow = Cbuf(0, baseAddressCbOffset); + Operand baseAddrTrunc = Local(); + + Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + + Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + + node.List.AddBefore(node, andOp); + + Operand offset = Local(); + Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc); + + node.List.AddBefore(node, subOp); + + byteOffset = offset; + } + else if (constantOffset != 0) + { + Operand offset = Local(); + Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); + + node.List.AddBefore(node, addOp); + + byteOffset = offset; + } + + if (isStg16Or8) + { + return byteOffset; + } + + Operand wordOffset = Local(); + Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + + node.List.AddBefore(node, shrOp); + + return wordOffset; + } + + private static bool IsCb0Offset(Operand operand, int offset) + { + return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == 0 && operand.GetCbufOffset() == offset; + } + + private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset) + { + // When we emit 16/8-bit LDG, we add extra code to determine the address alignment. + // Eliminate the storage buffer base address from this too, leaving only the byte offset. + + foreach (INode useNode in address.UseOps) + { + if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd) + { + Operand src1 = op.GetSource(0); + Operand src2 = op.GetSource(1); + + int addressIndex = -1; + + if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3) + { + addressIndex = 0; + } + else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3) + { + addressIndex = 1; + } + + if (addressIndex != -1) + { + LinkedListNode<INode> node = list.Find(op); + + // Add offset calculation before the use. Needs to be on the same block. + if (node != null) + { + Operand offset = Local(); + Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset)); + list.AddBefore(node, addOp); + + op.SetSource(addressIndex, offset); + } + } + } + } + } + + private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int baseAddressCbOffset) + { + if (IsCb0Offset(address, baseAddressCbOffset)) + { + // Direct offset: zero. + return (Const(0), 0); + } + + (address, int constantOffset) = GetStorageConstantOffset(block, address); + + address = Utils.FindLastOperation(address, block); + + if (IsCb0Offset(address, baseAddressCbOffset)) + { + // Only constant offset + return (Const(0), constantOffset); + } + + if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) + { + return (null, 0); + } + + Operand src1 = offsetAdd.GetSource(0); + Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block); + + if (IsCb0Offset(src2, baseAddressCbOffset)) + { + return (src1, constantOffset); + } + else if (IsCb0Offset(src1, baseAddressCbOffset)) + { + return (src2, constantOffset); + } + + return (null, 0); + } + + private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address) + { + if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add) + { + return (address, 0); + } + + Operand src1 = offsetAdd.GetSource(0); + Operand src2 = offsetAdd.GetSource(1); + + if (src2.Type != OperandType.Constant) + { + return (address, 0); + } + + return (src1, src2.Value); + } + + private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex) + { + Operation operation = (Operation)node.Value; + + Operand GetCbufOffset() + { + Operand addrLow = operation.GetSource(0); + + Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize); + + Operand baseAddrTrunc = Local(); + + Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + + Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask); + + node.List.AddBefore(node, andOp); + + Operand byteOffset = Local(); + Operand wordOffset = Local(); + + Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc); + Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2)); + + node.List.AddBefore(node, subOp); + node.List.AddBefore(node, shrOp); + + return wordOffset; + } + + Operand[] sources = new Operand[operation.SourcesCount]; + + int cbSlot = UbeFirstCbuf + storageIndex; + + sources[0] = Const(cbSlot); + sources[1] = GetCbufOffset(); + + config.SetUsedConstantBuffer(cbSlot); + + for (int index = 2; index < operation.SourcesCount; index++) + { + sources[index] = operation.GetSource(index); + } + + Operation ldcOp = new Operation(Instruction.LoadConstant, operation.Dest, sources); + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + LinkedListNode<INode> oldNode = node; + + node = node.List.AddBefore(node, ldcOp); + + node.List.Remove(oldNode); + + return node; + } + + private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd) + { + globalAddress = Utils.FindLastOperation(globalAddress, block); + + if (globalAddress.Type == OperandType.ConstantBuffer) + { + return GetStorageIndex(globalAddress, sbStart, sbEnd); + } + + Operation operation = globalAddress.AsgOp as Operation; + + if (operation == null || operation.Inst != Instruction.Add) + { + return -1; + } + + Operand src1 = operation.GetSource(0); + Operand src2 = operation.GetSource(1); + + if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) || + (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant)) + { + if (src1.Type == OperandType.LocalVariable) + { + operation = Utils.FindLastOperation(src1, block).AsgOp as Operation; + } + else + { + operation = Utils.FindLastOperation(src2, block).AsgOp as Operation; + } + + if (operation == null || operation.Inst != Instruction.Add) + { + return -1; + } + } + + for (int index = 0; index < operation.SourcesCount; index++) + { + Operand source = operation.GetSource(index); + + int storageIndex = GetStorageIndex(source, sbStart, sbEnd); + + if (storageIndex != -1) + { + return storageIndex; + } + } + + return -1; + } + + private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd) + { + if (operand.Type == OperandType.ConstantBuffer) + { + int slot = operand.GetCbufSlot(); + int offset = operand.GetCbufOffset(); + + if (slot == 0 && offset >= sbStart && offset < sbEnd) + { + int storageIndex = (offset - sbStart) / StorageDescSize; + + return storageIndex; + } + } + + return -1; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs new file mode 100644 index 00000000..bae774ee --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs @@ -0,0 +1,380 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class Optimizer + { + public static void RunPass(BasicBlock[] blocks, ShaderConfig config) + { + RunOptimizationPasses(blocks); + + int sbUseMask = 0; + int ubeUseMask = 0; + + // Those passes are looking for specific patterns and only needs to run once. + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask); + BindlessToIndexed.RunPass(blocks[blkIndex], config); + BindlessElimination.RunPass(blocks[blkIndex], config); + } + + config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask); + + // Run optimizations one last time to remove any code that is now optimizable after above passes. + RunOptimizationPasses(blocks); + } + + private static void RunOptimizationPasses(BasicBlock[] blocks) + { + bool modified; + + do + { + modified = false; + + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + LinkedListNode<INode> node = block.Operations.First; + + while (node != null) + { + LinkedListNode<INode> nextNode = node.Next; + + bool isUnused = IsUnused(node.Value); + + if (!(node.Value is Operation operation) || isUnused) + { + if (node.Value is PhiNode phi && !isUnused) + { + isUnused = PropagatePhi(phi); + } + + if (isUnused) + { + RemoveNode(block, node); + + modified = true; + } + + node = nextNode; + + continue; + } + + ConstantFolding.RunPass(operation); + + Simplification.RunPass(operation); + + if (DestIsLocalVar(operation)) + { + if (operation.Inst == Instruction.Copy) + { + PropagateCopy(operation); + + RemoveNode(block, node); + + modified = true; + } + else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) || + (operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation))) + { + if (DestHasNoUses(operation)) + { + RemoveNode(block, node); + } + + modified = true; + } + } + + node = nextNode; + } + + if (BranchElimination.RunPass(block)) + { + RemoveNode(block, block.Operations.Last); + + modified = true; + } + } + } + while (modified); + } + + private static void PropagateCopy(Operation copyOp) + { + // Propagate copy source operand to all uses of + // the destination operand. + + Operand dest = copyOp.Dest; + Operand src = copyOp.GetSource(0); + + INode[] uses = dest.UseOps.ToArray(); + + foreach (INode useNode in uses) + { + for (int index = 0; index < useNode.SourcesCount; index++) + { + if (useNode.GetSource(index) == dest) + { + useNode.SetSource(index, src); + } + } + } + } + + private static bool PropagatePhi(PhiNode phi) + { + // If all phi sources are the same, we can propagate it and remove the phi. + + Operand firstSrc = phi.GetSource(0); + + for (int index = 1; index < phi.SourcesCount; index++) + { + if (!IsSameOperand(firstSrc, phi.GetSource(index))) + { + return false; + } + } + + // All sources are equal, we can propagate the value. + + Operand dest = phi.Dest; + + INode[] uses = dest.UseOps.ToArray(); + + foreach (INode useNode in uses) + { + for (int index = 0; index < useNode.SourcesCount; index++) + { + if (useNode.GetSource(index) == dest) + { + useNode.SetSource(index, firstSrc); + } + } + } + + return true; + } + + private static bool IsSameOperand(Operand x, Operand y) + { + if (x.Type != y.Type || x.Value != y.Value) + { + return false; + } + + // TODO: Handle Load operations with the same storage and the same constant parameters. + return x.Type == OperandType.Constant || x.Type == OperandType.ConstantBuffer; + } + + private static bool PropagatePack(Operation packOp) + { + // Propagate pack source operands to uses by unpack + // instruction. The source depends on the unpack instruction. + bool modified = false; + + Operand dest = packOp.Dest; + Operand src0 = packOp.GetSource(0); + Operand src1 = packOp.GetSource(1); + + INode[] uses = dest.UseOps.ToArray(); + + foreach (INode useNode in uses) + { + if (!(useNode is Operation operation) || operation.Inst != Instruction.UnpackHalf2x16) + { + continue; + } + + if (operation.GetSource(0) == dest) + { + operation.TurnIntoCopy(operation.Index == 1 ? src1 : src0); + + modified = true; + } + } + + return modified; + } + + public static bool MatchDdxOrDdy(Operation operation) + { + // It's assumed that "operation.Inst" is ShuffleXor, + // that should be checked before calling this method. + Debug.Assert(operation.Inst == Instruction.ShuffleXor); + + bool modified = false; + + Operand src2 = operation.GetSource(1); + Operand src3 = operation.GetSource(2); + + if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2)) + { + return false; + } + + if (src3.Type != OperandType.Constant || src3.Value != 0x1c03) + { + return false; + } + + bool isDdy = src2.Value == 2; + bool isDdx = !isDdy; + + // We can replace any use by a FSWZADD with DDX/DDY, when + // the following conditions are true: + // - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX. + // - The first source operand must be the shuffle output. + // - The second source operand must be the shuffle first source operand. + INode[] uses = operation.Dest.UseOps.ToArray(); + + foreach (INode use in uses) + { + if (!(use is Operation test)) + { + continue; + } + + if (!(use is Operation useOp) || useOp.Inst != Instruction.SwizzleAdd) + { + continue; + } + + Operand fswzaddSrc1 = useOp.GetSource(0); + Operand fswzaddSrc2 = useOp.GetSource(1); + Operand fswzaddSrc3 = useOp.GetSource(2); + + if (fswzaddSrc1 != operation.Dest) + { + continue; + } + + if (fswzaddSrc2 != operation.GetSource(0)) + { + continue; + } + + if (fswzaddSrc3.Type != OperandType.Constant) + { + continue; + } + + int mask = fswzaddSrc3.Value; + + if ((isDdx && mask != 0b10011001) || + (isDdy && mask != 0b10100101)) + { + continue; + } + + useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2); + + modified = true; + } + + return modified; + } + + private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode) + { + // Remove a node from the nodes list, and also remove itself + // from all the use lists on the operands that this node uses. + block.Operations.Remove(llNode); + + Queue<INode> nodes = new Queue<INode>(); + + nodes.Enqueue(llNode.Value); + + while (nodes.TryDequeue(out INode node)) + { + for (int index = 0; index < node.SourcesCount; index++) + { + Operand src = node.GetSource(index); + + if (src.Type != OperandType.LocalVariable) + { + continue; + } + + if (src.UseOps.Remove(node) && src.UseOps.Count == 0) + { + Debug.Assert(src.AsgOp != null); + nodes.Enqueue(src.AsgOp); + } + } + } + } + + private static bool IsUnused(INode node) + { + return !HasSideEffects(node) && DestIsLocalVar(node) && DestHasNoUses(node); + } + + private static bool HasSideEffects(INode node) + { + if (node is Operation operation) + { + switch (operation.Inst & Instruction.Mask) + { + case Instruction.AtomicAdd: + case Instruction.AtomicAnd: + case Instruction.AtomicCompareAndSwap: + case Instruction.AtomicMaxS32: + case Instruction.AtomicMaxU32: + case Instruction.AtomicMinS32: + case Instruction.AtomicMinU32: + case Instruction.AtomicOr: + case Instruction.AtomicSwap: + case Instruction.AtomicXor: + case Instruction.Call: + case Instruction.ImageAtomic: + return true; + } + } + + return false; + } + + private static bool DestIsLocalVar(INode node) + { + if (node.DestsCount == 0) + { + return false; + } + + for (int index = 0; index < node.DestsCount; index++) + { + Operand dest = node.GetDest(index); + + if (dest != null && dest.Type != OperandType.LocalVariable) + { + return false; + } + } + + return true; + } + + private static bool DestHasNoUses(INode node) + { + for (int index = 0; index < node.DestsCount; index++) + { + Operand dest = node.GetDest(index); + + if (dest != null && dest.UseOps.Count != 0) + { + return false; + } + } + + return true; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs new file mode 100644 index 00000000..8d05f99a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs @@ -0,0 +1,147 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class Simplification + { + private const int AllOnes = ~0; + + public static void RunPass(Operation operation) + { + switch (operation.Inst) + { + case Instruction.Add: + case Instruction.BitwiseExclusiveOr: + TryEliminateBinaryOpCommutative(operation, 0); + break; + + case Instruction.BitwiseAnd: + TryEliminateBitwiseAnd(operation); + break; + + case Instruction.BitwiseOr: + TryEliminateBitwiseOr(operation); + break; + + case Instruction.ConditionalSelect: + TryEliminateConditionalSelect(operation); + break; + + case Instruction.Divide: + TryEliminateBinaryOpY(operation, 1); + break; + + case Instruction.Multiply: + TryEliminateBinaryOpCommutative(operation, 1); + break; + + case Instruction.ShiftLeft: + case Instruction.ShiftRightS32: + case Instruction.ShiftRightU32: + case Instruction.Subtract: + TryEliminateBinaryOpY(operation, 0); + break; + } + } + + private static void TryEliminateBitwiseAnd(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y, + // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000 + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, AllOnes)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, AllOnes)) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, 0) || IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(Const(0)); + } + } + + private static void TryEliminateBitwiseOr(Operation operation) + { + // Try to recognize and optimize those 3 patterns (in order): + // x | 0x00000000 == x, 0x00000000 | y == y, + // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, 0)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, 0)) + { + operation.TurnIntoCopy(x); + } + else if (IsConstEqual(x, AllOnes) || IsConstEqual(y, AllOnes)) + { + operation.TurnIntoCopy(Const(AllOnes)); + } + } + + private static void TryEliminateBinaryOpY(Operation operation, int comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateBinaryOpCommutative(Operation operation, int comparand) + { + Operand x = operation.GetSource(0); + Operand y = operation.GetSource(1); + + if (IsConstEqual(x, comparand)) + { + operation.TurnIntoCopy(y); + } + else if (IsConstEqual(y, comparand)) + { + operation.TurnIntoCopy(x); + } + } + + private static void TryEliminateConditionalSelect(Operation operation) + { + Operand cond = operation.GetSource(0); + + if (cond.Type != OperandType.Constant) + { + return; + } + + // The condition is constant, we can turn it into a copy, and select + // the source based on the condition value. + int srcIndex = cond.Value != 0 ? 1 : 2; + + Operand source = operation.GetSource(srcIndex); + + operation.TurnIntoCopy(source); + } + + private static bool IsConstEqual(Operand operand, int comparand) + { + if (operand.Type != OperandType.Constant) + { + return false; + } + + return operand.Value == comparand; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs new file mode 100644 index 00000000..4ca6d687 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs @@ -0,0 +1,68 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; + +namespace Ryujinx.Graphics.Shader.Translation.Optimizations +{ + static class Utils + { + private static Operation FindBranchSource(BasicBlock block) + { + foreach (BasicBlock sourceBlock in block.Predecessors) + { + if (sourceBlock.Operations.Count > 0) + { + if (sourceBlock.GetLastOp() is Operation lastOp && IsConditionalBranch(lastOp.Inst) && sourceBlock.Next == block) + { + return lastOp; + } + } + } + + return null; + } + + private static bool IsConditionalBranch(Instruction inst) + { + return inst == Instruction.BranchIfFalse || inst == Instruction.BranchIfTrue; + } + + private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock) + { + // Check if all the conditions for the query block are satisfied by the current block. + // Just checks the top-most conditional for now. + + Operation currentBranch = FindBranchSource(currentBlock); + Operation queryBranch = FindBranchSource(queryBlock); + + Operand currentCondition = currentBranch?.GetSource(0); + Operand queryCondition = queryBranch?.GetSource(0); + + // The condition should be the same operand instance. + + return currentBranch != null && queryBranch != null && + currentBranch.Inst == queryBranch.Inst && + currentCondition == queryCondition; + } + + public static Operand FindLastOperation(Operand source, BasicBlock block) + { + if (source.AsgOp is PhiNode phiNode) + { + // This source can have a different value depending on a previous branch. + // Ensure that conditions met for that branch are also met for the current one. + // Prefer the latest sources for the phi node. + + for (int i = phiNode.SourcesCount - 1; i >= 0; i--) + { + BasicBlock phiBlock = phiNode.GetBlock(i); + + if (BlockConditionsMatch(block, phiBlock)) + { + return phiNode.GetSource(i); + } + } + } + + return source; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs b/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs new file mode 100644 index 00000000..9e31831d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs @@ -0,0 +1,486 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class RegisterUsage + { + private const int RegsCount = 256; + private const int RegsMask = RegsCount - 1; + + private const int GprMasks = 4; + private const int PredMasks = 1; + private const int FlagMasks = 1; + private const int TotalMasks = GprMasks + PredMasks + FlagMasks; + + private struct RegisterMask : IEquatable<RegisterMask> + { + public long GprMask0 { get; set; } + public long GprMask1 { get; set; } + public long GprMask2 { get; set; } + public long GprMask3 { get; set; } + public long PredMask { get; set; } + public long FlagMask { get; set; } + + public RegisterMask(long gprMask0, long gprMask1, long gprMask2, long gprMask3, long predMask, long flagMask) + { + GprMask0 = gprMask0; + GprMask1 = gprMask1; + GprMask2 = gprMask2; + GprMask3 = gprMask3; + PredMask = predMask; + FlagMask = flagMask; + } + + public long GetMask(int index) + { + return index switch + { + 0 => GprMask0, + 1 => GprMask1, + 2 => GprMask2, + 3 => GprMask3, + 4 => PredMask, + 5 => FlagMask, + _ => throw new ArgumentOutOfRangeException(nameof(index)) + }; + } + + public static RegisterMask operator &(RegisterMask x, RegisterMask y) + { + return new RegisterMask( + x.GprMask0 & y.GprMask0, + x.GprMask1 & y.GprMask1, + x.GprMask2 & y.GprMask2, + x.GprMask3 & y.GprMask3, + x.PredMask & y.PredMask, + x.FlagMask & y.FlagMask); + } + + public static RegisterMask operator |(RegisterMask x, RegisterMask y) + { + return new RegisterMask( + x.GprMask0 | y.GprMask0, + x.GprMask1 | y.GprMask1, + x.GprMask2 | y.GprMask2, + x.GprMask3 | y.GprMask3, + x.PredMask | y.PredMask, + x.FlagMask | y.FlagMask); + } + + public static RegisterMask operator ~(RegisterMask x) + { + return new RegisterMask( + ~x.GprMask0, + ~x.GprMask1, + ~x.GprMask2, + ~x.GprMask3, + ~x.PredMask, + ~x.FlagMask); + } + + public static bool operator ==(RegisterMask x, RegisterMask y) + { + return x.Equals(y); + } + + public static bool operator !=(RegisterMask x, RegisterMask y) + { + return !x.Equals(y); + } + + public override bool Equals(object obj) + { + return obj is RegisterMask regMask && Equals(regMask); + } + + public bool Equals(RegisterMask other) + { + return GprMask0 == other.GprMask0 && + GprMask1 == other.GprMask1 && + GprMask2 == other.GprMask2 && + GprMask3 == other.GprMask3 && + PredMask == other.PredMask && + FlagMask == other.FlagMask; + } + + public override int GetHashCode() + { + return HashCode.Combine(GprMask0, GprMask1, GprMask2, GprMask3, PredMask, FlagMask); + } + } + + public readonly struct FunctionRegisterUsage + { + public Register[] InArguments { get; } + public Register[] OutArguments { get; } + + public FunctionRegisterUsage(Register[] inArguments, Register[] outArguments) + { + InArguments = inArguments; + OutArguments = outArguments; + } + } + + public static FunctionRegisterUsage RunPass(ControlFlowGraph cfg) + { + List<Register> inArguments = new List<Register>(); + List<Register> outArguments = new List<Register>(); + + // Compute local register inputs and outputs used inside blocks. + RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Length]; + RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Length]; + + foreach (BasicBlock block in cfg.Blocks) + { + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + Operation operation = node.Value as Operation; + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand source = operation.GetSource(srcIndex); + + if (source.Type != OperandType.Register) + { + continue; + } + + Register register = source.GetRegister(); + + localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index]; + } + + if (operation.Dest != null && operation.Dest.Type == OperandType.Register) + { + localOutputs[block.Index] |= GetMask(operation.Dest.GetRegister()); + } + } + } + + // Compute global register inputs and outputs used across blocks. + RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Length]; + + RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Length]; + RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Length]; + + RegisterMask allOutputs = new RegisterMask(); + RegisterMask allCmnOutputs = new RegisterMask(-1L, -1L, -1L, -1L, -1L, -1L); + + bool modified; + + bool firstPass = true; + + do + { + modified = false; + + // Compute register outputs. + for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + if (block.Predecessors.Count != 0) + { + BasicBlock predecessor = block.Predecessors[0]; + + RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; + + RegisterMask outputs = globalOutputs[predecessor.Index]; + + for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++) + { + predecessor = block.Predecessors[pIndex]; + + cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index]; + + outputs |= globalOutputs[predecessor.Index]; + } + + globalInputs[block.Index] |= outputs & ~cmnOutputs; + + if (!firstPass) + { + cmnOutputs &= globalCmnOutputs[block.Index]; + } + + if (EndsWithReturn(block)) + { + allCmnOutputs &= cmnOutputs | localOutputs[block.Index]; + } + + if (Exchange(globalCmnOutputs, block.Index, cmnOutputs)) + { + modified = true; + } + + outputs |= localOutputs[block.Index]; + + if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs)) + { + allOutputs |= outputs; + modified = true; + } + } + else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index])) + { + allOutputs |= localOutputs[block.Index]; + modified = true; + } + } + + // Compute register inputs. + for (int index = 0; index < cfg.PostOrderBlocks.Length; index++) + { + BasicBlock block = cfg.PostOrderBlocks[index]; + + RegisterMask inputs = localInputs[block.Index]; + + if (block.Next != null) + { + inputs |= globalInputs[block.Next.Index]; + } + + if (block.Branch != null) + { + inputs |= globalInputs[block.Branch.Index]; + } + + inputs &= ~globalCmnOutputs[block.Index]; + + if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs)) + { + modified = true; + } + } + + firstPass = false; + } + while (modified); + + // Insert load and store context instructions where needed. + foreach (BasicBlock block in cfg.Blocks) + { + // The only block without any predecessor should be the entry block. + // It always needs a context load as it is the first block to run. + if (block.Predecessors.Count == 0) + { + RegisterMask inputs = globalInputs[block.Index] | (allOutputs & ~allCmnOutputs); + + LoadLocals(block, inputs, inArguments); + } + + if (EndsWithReturn(block)) + { + StoreLocals(block, allOutputs, inArguments.Count, outArguments); + } + } + + return new FunctionRegisterUsage(inArguments.ToArray(), outArguments.ToArray()); + } + + public static void FixupCalls(BasicBlock[] blocks, FunctionRegisterUsage[] frus) + { + foreach (BasicBlock block in blocks) + { + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + Operation operation = node.Value as Operation; + + if (operation.Inst == Instruction.Call) + { + Operand funcId = operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + var fru = frus[funcId.Value]; + + Operand[] inRegs = new Operand[fru.InArguments.Length]; + + for (int i = 0; i < fru.InArguments.Length; i++) + { + inRegs[i] = OperandHelper.Register(fru.InArguments[i]); + } + + operation.AppendSources(inRegs); + + Operand[] outRegs = new Operand[1 + fru.OutArguments.Length]; + + for (int i = 0; i < fru.OutArguments.Length; i++) + { + outRegs[1 + i] = OperandHelper.Register(fru.OutArguments[i]); + } + + operation.AppendDests(outRegs); + } + } + } + } + + private static bool StartsWith(BasicBlock block, Instruction inst) + { + if (block.Operations.Count == 0) + { + return false; + } + + return block.Operations.First.Value is Operation operation && operation.Inst == inst; + } + + private static bool EndsWith(BasicBlock block, Instruction inst) + { + if (block.Operations.Count == 0) + { + return false; + } + + return block.Operations.Last.Value is Operation operation && operation.Inst == inst; + } + + private static RegisterMask GetMask(Register register) + { + Span<long> gprMasks = stackalloc long[4]; + long predMask = 0; + long flagMask = 0; + + switch (register.Type) + { + case RegisterType.Gpr: + gprMasks[register.Index >> 6] = 1L << (register.Index & 0x3f); + break; + case RegisterType.Predicate: + predMask = 1L << register.Index; + break; + case RegisterType.Flag: + flagMask = 1L << register.Index; + break; + } + + return new RegisterMask(gprMasks[0], gprMasks[1], gprMasks[2], gprMasks[3], predMask, flagMask); + } + + private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value) + { + RegisterMask oldValue = masks[blkIndex]; + + masks[blkIndex] = value; + + return oldValue != value; + } + + private static void LoadLocals(BasicBlock block, RegisterMask masks, List<Register> inArguments) + { + bool fillArgsList = inArguments.Count == 0; + LinkedListNode<INode> node = null; + int argIndex = 0; + + for (int i = 0; i < TotalMasks; i++) + { + (RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i); + long mask = masks.GetMask(i); + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + mask &= ~(1L << bit); + + Register register = new Register(baseRegIndex + bit, regType); + + if (fillArgsList) + { + inArguments.Add(register); + } + + Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Register(register), OperandHelper.Argument(argIndex++)); + + if (node == null) + { + node = block.Operations.AddFirst(copyOp); + } + else + { + node = block.Operations.AddAfter(node, copyOp); + } + } + } + + Debug.Assert(argIndex <= inArguments.Count); + } + + private static void StoreLocals(BasicBlock block, RegisterMask masks, int inArgumentsCount, List<Register> outArguments) + { + LinkedListNode<INode> node = null; + int argIndex = inArgumentsCount; + bool fillArgsList = outArguments.Count == 0; + + for (int i = 0; i < TotalMasks; i++) + { + (RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i); + long mask = masks.GetMask(i); + + while (mask != 0) + { + int bit = BitOperations.TrailingZeroCount(mask); + + mask &= ~(1L << bit); + + Register register = new Register(baseRegIndex + bit, regType); + + if (fillArgsList) + { + outArguments.Add(register); + } + + Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Argument(argIndex++), OperandHelper.Register(register)); + + if (node == null) + { + node = block.Operations.AddBefore(block.Operations.Last, copyOp); + } + else + { + node = block.Operations.AddAfter(node, copyOp); + } + } + } + + Debug.Assert(argIndex <= inArgumentsCount + outArguments.Count); + } + + private static (RegisterType RegType, int BaseRegIndex) GetRegTypeAndBaseIndex(int i) + { + RegisterType regType = RegisterType.Gpr; + int baseRegIndex = 0; + + if (i < GprMasks) + { + baseRegIndex = i * sizeof(long) * 8; + } + else if (i == GprMasks) + { + regType = RegisterType.Predicate; + } + else + { + regType = RegisterType.Flag; + } + + return (regType, baseRegIndex); + } + + private static bool EndsWithReturn(BasicBlock block) + { + if (!(block.GetLastOp() is Operation operation)) + { + return false; + } + + return operation.Inst == Instruction.Return; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs new file mode 100644 index 00000000..91e7ace1 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs @@ -0,0 +1,768 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; +using static Ryujinx.Graphics.Shader.Translation.GlobalMemory; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class Rewriter + { + public static void RunPass(BasicBlock[] blocks, ShaderConfig config) + { + bool isVertexShader = config.Stage == ShaderStage.Vertex; + bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters(); + bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat(); + + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + for (LinkedListNode<INode> node = block.Operations.First; node != null;) + { + if (node.Value is not Operation operation) + { + node = node.Next; + continue; + } + + if (isVertexShader) + { + if (hasConstantBufferDrawParameters) + { + if (ReplaceConstantBufferWithDrawParameters(node, operation)) + { + config.SetUsedFeature(FeatureFlags.DrawParameters); + } + } + else if (HasConstantBufferDrawParameters(operation)) + { + config.SetUsedFeature(FeatureFlags.DrawParameters); + } + } + + LinkedListNode<INode> nextNode = node.Next; + + if (operation is TextureOperation texOp) + { + if (texOp.Inst == Instruction.TextureSample) + { + node = RewriteTextureSample(node, config); + + if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat) + { + node = InsertSnormNormalization(node, config); + } + } + + nextNode = node.Next; + } + else if (UsesGlobalMemory(operation.Inst, operation.StorageKind)) + { + nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode; + } + + node = nextNode; + } + } + } + + private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config) + { + Operation operation = (Operation)node.Value; + + bool isAtomic = operation.Inst.IsAtomic(); + bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8; + bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8; + + Operation storageOp = null; + + Operand PrependOperation(Instruction inst, params Operand[] sources) + { + Operand local = Local(); + + node.List.AddBefore(node, new Operation(inst, local, sources)); + + return local; + } + + Operand PrependExistingOperation(Operation operation) + { + Operand local = Local(); + + operation.Dest = local; + node.List.AddBefore(node, operation); + + return local; + } + + Operand addrLow = operation.GetSource(0); + Operand addrHigh = operation.GetSource(1); + + Operand sbBaseAddrLow = Const(0); + Operand sbSlot = Const(0); + + Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment()); + + Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow) + { + baseAddrLow = Cbuf(0, cbOffset); + Operand baseAddrHigh = Cbuf(0, cbOffset + 1); + Operand size = Cbuf(0, cbOffset + 2); + + Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow); + Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow); + + Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size); + + Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow); + + Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh); + + return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh); + } + + int sbUseMask = config.AccessibleStorageBuffersMask; + + while (sbUseMask != 0) + { + int slot = BitOperations.TrailingZeroCount(sbUseMask); + + sbUseMask &= ~(1 << slot); + + config.SetUsedStorageBuffer(slot, isWrite); + + int cbOffset = GetStorageCbOffset(config.Stage, slot); + + Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); + + sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow); + sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot); + } + + if (config.AccessibleStorageBuffersMask != 0) + { + Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask); + Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc); + + Operand[] sources = new Operand[operation.SourcesCount]; + + sources[0] = sbSlot; + + if (isStg16Or8) + { + sources[1] = byteOffset; + } + else + { + sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2)); + } + + for (int index = 2; index < operation.SourcesCount; index++) + { + sources[index] = operation.GetSource(index); + } + + if (isAtomic) + { + storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources); + } + else if (operation.Inst == Instruction.LoadGlobal) + { + storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources); + } + else + { + Instruction storeInst = operation.Inst switch + { + Instruction.StoreGlobal16 => Instruction.StoreStorage16, + Instruction.StoreGlobal8 => Instruction.StoreStorage8, + _ => Instruction.StoreStorage + }; + + storageOp = new Operation(storeInst, null, sources); + } + } + else if (operation.Dest != null) + { + storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0)); + } + + if (operation.Inst == Instruction.LoadGlobal) + { + int cbeUseMask = config.AccessibleConstantBuffersMask; + + while (cbeUseMask != 0) + { + int slot = BitOperations.TrailingZeroCount(cbeUseMask); + int cbSlot = UbeFirstCbuf + slot; + + cbeUseMask &= ~(1 << slot); + + config.SetUsedConstantBuffer(cbSlot); + + Operand previousResult = PrependExistingOperation(storageOp); + + int cbOffset = GetConstantUbeOffset(slot); + + Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow); + + Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask); + Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst); + + Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2)); + + Operand[] sourcesCb = new Operand[operation.SourcesCount]; + + sourcesCb[0] = Const(cbSlot); + sourcesCb[1] = cbIndex; + + for (int index = 2; index < operation.SourcesCount; index++) + { + sourcesCb[index] = operation.GetSource(index); + } + + Operand ldcResult = PrependOperation(Instruction.LoadConstant, sourcesCb); + + storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult); + } + } + + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, null); + } + + LinkedListNode<INode> oldNode = node; + LinkedList<INode> oldNodeList = oldNode.List; + + if (storageOp != null) + { + node = node.List.AddBefore(node, storageOp); + } + else + { + node = null; + } + + oldNodeList.Remove(oldNode); + + return node; + } + + private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config) + { + TextureOperation texOp = (TextureOperation)node.Value; + + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + + bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset(); + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot); + + if (!hasInvalidOffset && isCoordNormalized) + { + return node; + } + + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; + bool isShadow = (texOp.Type & SamplerType.Shadow) != 0; + + int coordsCount = texOp.Type.GetDimensions(); + + int offsetsCount; + + if (hasOffsets) + { + offsetsCount = coordsCount * 4; + } + else if (hasOffset) + { + offsetsCount = coordsCount; + } + else + { + offsetsCount = 0; + } + + Operand[] offsets = new Operand[offsetsCount]; + Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount]; + + int copyCount = 0; + + if (isBindless || isIndexed) + { + copyCount++; + } + + Operand[] lodSources = new Operand[copyCount + coordsCount]; + + for (int index = 0; index < lodSources.Length; index++) + { + lodSources[index] = texOp.GetSource(index); + } + + copyCount += coordsCount; + + if (isArray) + { + copyCount++; + } + + if (isShadow) + { + copyCount++; + } + + if (hasDerivatives) + { + copyCount += coordsCount * 2; + } + + if (isMultisample) + { + copyCount++; + } + else if (hasLodLevel) + { + copyCount++; + } + + int srcIndex = 0; + int dstIndex = 0; + + for (int index = 0; index < copyCount; index++) + { + sources[dstIndex++] = texOp.GetSource(srcIndex++); + } + + bool areAllOffsetsConstant = true; + + for (int index = 0; index < offsetsCount; index++) + { + Operand offset = texOp.GetSource(srcIndex++); + + areAllOffsetsConstant &= offset.Type == OperandType.Constant; + + offsets[index] = offset; + } + + hasInvalidOffset &= !areAllOffsetsConstant; + + if (!hasInvalidOffset && isCoordNormalized) + { + return node; + } + + if (hasLodBias) + { + sources[dstIndex++] = texOp.GetSource(srcIndex++); + } + + if (isGather && !isShadow) + { + sources[dstIndex++] = texOp.GetSource(srcIndex++); + } + + int coordsIndex = isBindless || isIndexed ? 1 : 0; + + int componentIndex = texOp.Index; + + Operand Float(Operand value) + { + Operand res = Local(); + + node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value)); + + return res; + } + + // Emulate non-normalized coordinates by normalizing the coordinates on the shader. + // Without normalization, the coordinates are expected to the in the [0, W or H] range, + // and otherwise, it is expected to be in the [0, 1] range. + // We normalize by dividing the coords by the texture size. + if (!isCoordNormalized && !intCoords) + { + config.SetUsedFeature(FeatureFlags.IntegerSampling); + + int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount; + + for (int index = 0; index < normCoordsCount; index++) + { + Operand coordSize = Local(); + + Operand[] texSizeSources; + + if (isBindless || isIndexed) + { + texSizeSources = new Operand[] { sources[0], Const(0) }; + } + else + { + texSizeSources = new Operand[] { Const(0) }; + } + + node.List.AddBefore(node, new TextureOperation( + Instruction.TextureSize, + texOp.Type, + texOp.Format, + texOp.Flags, + texOp.CbufSlot, + texOp.Handle, + index, + new[] { coordSize }, + texSizeSources)); + + config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle); + + Operand source = sources[coordsIndex + index]; + + Operand coordNormalized = Local(); + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize))); + + sources[coordsIndex + index] = coordNormalized; + } + } + + Operand[] dests = new Operand[texOp.DestsCount]; + + for (int i = 0; i < texOp.DestsCount; i++) + { + dests[i] = texOp.GetDest(i); + } + + Operand bindlessHandle = isBindless || isIndexed ? sources[0] : null; + + LinkedListNode<INode> oldNode = node; + + // Technically, non-constant texture offsets are not allowed (according to the spec), + // however some GPUs does support that. + // For GPUs where it is not supported, we can replace the instruction with the following: + // For texture*Offset, we replace it by texture*, and add the offset to the P coords. + // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords). + // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly. + // For textureGatherOffset, we split the operation into up to 4 operations, one for each component + // that is accessed, where each textureGather operation has a different offset for each pixel. + if (hasInvalidOffset && isGather && !isShadow) + { + config.SetUsedFeature(FeatureFlags.IntegerSampling); + + Operand[] newSources = new Operand[sources.Length]; + + sources.CopyTo(newSources, 0); + + Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount); + + int destIndex = 0; + + for (int compIndex = 0; compIndex < 4; compIndex++) + { + if (((texOp.Index >> compIndex) & 1) == 0) + { + continue; + } + + for (int index = 0; index < coordsCount; index++) + { + config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle); + + Operand offset = Local(); + + Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)]; + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index]))); + + Operand source = sources[coordsIndex + index]; + + Operand coordPlusOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset)); + + newSources[coordsIndex + index] = coordPlusOffset; + } + + TextureOperation newTexOp = new TextureOperation( + Instruction.TextureSample, + texOp.Type, + texOp.Format, + texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets), + texOp.CbufSlot, + texOp.Handle, + 1, + new[] { dests[destIndex++] }, + newSources); + + node = node.List.AddBefore(node, newTexOp); + } + } + else + { + if (hasInvalidOffset) + { + if (intCoords) + { + for (int index = 0; index < coordsCount; index++) + { + Operand source = sources[coordsIndex + index]; + + Operand coordPlusOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index])); + + sources[coordsIndex + index] = coordPlusOffset; + } + } + else + { + config.SetUsedFeature(FeatureFlags.IntegerSampling); + + Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount); + + for (int index = 0; index < coordsCount; index++) + { + config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle); + + Operand offset = Local(); + + Operand intOffset = offsets[index]; + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index]))); + + Operand source = sources[coordsIndex + index]; + + Operand coordPlusOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset)); + + sources[coordsIndex + index] = coordPlusOffset; + } + } + } + + TextureOperation newTexOp = new TextureOperation( + Instruction.TextureSample, + texOp.Type, + texOp.Format, + texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets), + texOp.CbufSlot, + texOp.Handle, + componentIndex, + dests, + sources); + + node = node.List.AddBefore(node, newTexOp); + } + + node.List.Remove(oldNode); + + for (int index = 0; index < texOp.SourcesCount; index++) + { + texOp.SetSource(index, null); + } + + return node; + } + + private static Operand[] InsertTextureSize( + LinkedListNode<INode> node, + TextureOperation texOp, + Operand[] lodSources, + Operand bindlessHandle, + int coordsCount) + { + Operand Int(Operand value) + { + Operand res = Local(); + + node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value)); + + return res; + } + + Operand[] texSizes = new Operand[coordsCount]; + + Operand lod = Local(); + + node.List.AddBefore(node, new TextureOperation( + Instruction.Lod, + texOp.Type, + texOp.Format, + texOp.Flags, + texOp.CbufSlot, + texOp.Handle, + 0, + new[] { lod }, + lodSources)); + + for (int index = 0; index < coordsCount; index++) + { + texSizes[index] = Local(); + + Operand[] texSizeSources; + + if (bindlessHandle != null) + { + texSizeSources = new Operand[] { bindlessHandle, Int(lod) }; + } + else + { + texSizeSources = new Operand[] { Int(lod) }; + } + + node.List.AddBefore(node, new TextureOperation( + Instruction.TextureSize, + texOp.Type, + texOp.Format, + texOp.Flags, + texOp.CbufSlot, + texOp.Handle, + index, + new[] { texSizes[index] }, + texSizeSources)); + } + + return texSizes; + } + + private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config) + { + TextureOperation texOp = (TextureOperation)node.Value; + + // We can't query the format of a bindless texture, + // because the handle is unknown, it can have any format. + if (texOp.Flags.HasFlag(TextureFlags.Bindless)) + { + return node; + } + + TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot); + + int maxPositive = format switch + { + TextureFormat.R8Snorm => sbyte.MaxValue, + TextureFormat.R8G8Snorm => sbyte.MaxValue, + TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue, + TextureFormat.R16Snorm => short.MaxValue, + TextureFormat.R16G16Snorm => short.MaxValue, + TextureFormat.R16G16B16A16Snorm => short.MaxValue, + _ => 0 + }; + + // The value being 0 means that the format is not a SNORM format, + // so there's nothing to do here. + if (maxPositive == 0) + { + return node; + } + + // Do normalization. We assume SINT formats are being used + // as replacement for SNORM (which is not supported). + for (int i = 0; i < texOp.DestsCount; i++) + { + Operand dest = texOp.GetDest(i); + + INode[] uses = dest.UseOps.ToArray(); + + Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), dest); + Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive)); + + node = node.List.AddAfter(node, convOp); + node = node.List.AddAfter(node, normOp); + + foreach (INode useOp in uses) + { + if (useOp is not Operation op) + { + continue; + } + + // Replace all uses of the texture pixel value with the normalized value. + for (int index = 0; index < op.SourcesCount; index++) + { + if (op.GetSource(index) == dest) + { + op.SetSource(index, normOp.Dest); + } + } + } + } + + return node; + } + + private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation) + { + Operand GenerateLoad(IoVariable ioVariable) + { + Operand value = Local(); + node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable))); + return value; + } + + bool modified = false; + + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand src = operation.GetSource(srcIndex); + + if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0) + { + switch (src.GetCbufOffset()) + { + case Constants.NvnBaseVertexByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex)); + modified = true; + break; + case Constants.NvnBaseInstanceByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance)); + modified = true; + break; + case Constants.NvnDrawIndexByteOffset / 4: + operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex)); + modified = true; + break; + } + } + } + + return modified; + } + + private static bool HasConstantBufferDrawParameters(Operation operation) + { + for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++) + { + Operand src = operation.GetSource(srcIndex); + + if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0) + { + switch (src.GetCbufOffset()) + { + case Constants.NvnBaseVertexByteOffset / 4: + case Constants.NvnBaseInstanceByteOffset / 4: + case Constants.NvnDrawIndexByteOffset / 4: + return true; + } + } + } + + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs new file mode 100644 index 00000000..22f5a671 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs @@ -0,0 +1,944 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.Translation +{ + class ShaderConfig + { + // TODO: Non-hardcoded array size. + public const int SamplerArraySize = 4; + + private const int ThreadsPerWarp = 32; + + public ShaderStage Stage { get; } + + public bool GpPassthrough { get; } + public bool LastInVertexPipeline { get; private set; } + + public bool HasLayerInputAttribute { get; private set; } + public int GpLayerInputAttribute { get; private set; } + public int ThreadsPerInputPrimitive { get; } + + public OutputTopology OutputTopology { get; } + + public int MaxOutputVertices { get; } + + public int LocalMemorySize { get; } + + public ImapPixelType[] ImapTypes { get; } + + public int OmapTargets { get; } + public bool OmapSampleMask { get; } + public bool OmapDepth { get; } + + public IGpuAccessor GpuAccessor { get; } + + public TranslationOptions Options { get; } + + public bool TransformFeedbackEnabled { get; } + + private TransformFeedbackOutput[] _transformFeedbackOutputs; + + readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable> + { + public IoVariable IoVariable { get; } + public int Location { get; } + public int Component { get; } + + public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0) + { + IoVariable = ioVariable; + Location = location; + Component = component; + } + + public override bool Equals(object other) + { + return other is TransformFeedbackVariable tfbVar && Equals(tfbVar); + } + + public bool Equals(TransformFeedbackVariable other) + { + return IoVariable == other.IoVariable && + Location == other.Location && + Component == other.Component; + } + + public override int GetHashCode() + { + return (int)IoVariable | (Location << 8) | (Component << 16); + } + + public override string ToString() + { + return $"{IoVariable}.{Location}.{Component}"; + } + } + + private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions; + + public int Size { get; private set; } + + public byte ClipDistancesWritten { get; private set; } + + public FeatureFlags UsedFeatures { get; private set; } + + public int Cb1DataSize { get; private set; } + + public bool LayerOutputWritten { get; private set; } + public int LayerOutputAttribute { get; private set; } + + public bool NextUsesFixedFuncAttributes { get; private set; } + public int UsedInputAttributes { get; private set; } + public int UsedOutputAttributes { get; private set; } + public HashSet<int> UsedInputAttributesPerPatch { get; } + public HashSet<int> UsedOutputAttributesPerPatch { get; } + public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; } + public int PassthroughAttributes { get; private set; } + private int _nextUsedInputAttributes; + private int _thisUsedInputAttributes; + private Dictionary<int, int> _perPatchAttributeLocations; + + public UInt128 NextInputAttributesComponents { get; private set; } + public UInt128 ThisInputAttributesComponents { get; private set; } + + public int AccessibleStorageBuffersMask { get; private set; } + public int AccessibleConstantBuffersMask { get; private set; } + + private int _usedConstantBuffers; + private int _usedStorageBuffers; + private int _usedStorageBuffersWrite; + + private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format); + + private struct TextureMeta + { + public bool AccurateType; + public SamplerType Type; + public TextureUsageFlags UsageFlags; + } + + private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures; + private readonly Dictionary<TextureInfo, TextureMeta> _usedImages; + + private BufferDescriptor[] _cachedConstantBufferDescriptors; + private BufferDescriptor[] _cachedStorageBufferDescriptors; + private TextureDescriptor[] _cachedTextureDescriptors; + private TextureDescriptor[] _cachedImageDescriptors; + + private int _firstConstantBufferBinding; + private int _firstStorageBufferBinding; + + public int FirstConstantBufferBinding => _firstConstantBufferBinding; + public int FirstStorageBufferBinding => _firstStorageBufferBinding; + + public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options) + { + Stage = ShaderStage.Compute; + GpuAccessor = gpuAccessor; + Options = options; + + _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>(); + + AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1; + AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1; + + UsedInputAttributesPerPatch = new HashSet<int>(); + UsedOutputAttributesPerPatch = new HashSet<int>(); + + _usedTextures = new Dictionary<TextureInfo, TextureMeta>(); + _usedImages = new Dictionary<TextureInfo, TextureMeta>(); + } + + public ShaderConfig( + ShaderStage stage, + OutputTopology outputTopology, + int maxOutputVertices, + IGpuAccessor gpuAccessor, + TranslationOptions options) : this(gpuAccessor, options) + { + Stage = stage; + ThreadsPerInputPrimitive = 1; + OutputTopology = outputTopology; + MaxOutputVertices = maxOutputVertices; + TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled(); + + if (Stage != ShaderStage.Compute) + { + AccessibleConstantBuffersMask = 0; + } + } + + public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options) + { + Stage = header.Stage; + GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough; + ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive; + OutputTopology = header.OutputTopology; + MaxOutputVertices = header.MaxOutputVertexCount; + LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp); + ImapTypes = header.ImapTypes; + OmapTargets = header.OmapTargets; + OmapSampleMask = header.OmapSampleMask; + OmapDepth = header.OmapDepth; + TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled(); + LastInVertexPipeline = header.Stage < ShaderStage.Fragment; + } + + private void EnsureTransformFeedbackInitialized() + { + if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null) + { + TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0]; + ulong vecMap = 0UL; + + for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++) + { + var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); + var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex); + + for (int i = 0; i < locations.Length; i++) + { + byte wordOffset = locations[i]; + if (wordOffset < 0xc0) + { + transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride); + vecMap |= 1UL << (wordOffset / 4); + } + } + } + + _transformFeedbackOutputs = transformFeedbackOutputs; + + while (vecMap != 0) + { + int vecIndex = BitOperations.TrailingZeroCount(vecMap); + + for (int subIndex = 0; subIndex < 4; subIndex++) + { + int wordOffset = vecIndex * 4 + subIndex; + int byteOffset = wordOffset * 4; + + if (transformFeedbackOutputs[wordOffset].Valid) + { + IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location); + int component = 0; + + if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true)) + { + component = subIndex; + } + + var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component); + _transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]); + } + } + + vecMap &= ~(1UL << vecIndex); + } + } + } + + public TransformFeedbackOutput[] GetTransformFeedbackOutputs() + { + EnsureTransformFeedbackInitialized(); + return _transformFeedbackOutputs; + } + + public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput) + { + EnsureTransformFeedbackInitialized(); + var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component); + return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput); + } + + private bool HasTransformFeedbackOutputs() + { + return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment); + } + + public bool HasTransformFeedbackOutputs(bool isOutput) + { + return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment)); + } + + public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput) + { + if (ioVariable == IoVariable.UserDefined) + { + return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) || + (isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing)); + } + + return ioVariable == IoVariable.FragmentOutputColor; + } + + public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput) + { + if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput)) + { + return false; + } + + return GetTransformFeedbackOutputComponents(location, component) == 1; + } + + public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset) + { + EnsureTransformFeedbackInitialized(); + + return _transformFeedbackOutputs[wordOffset]; + } + + public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component) + { + return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component); + } + + public int GetTransformFeedbackOutputComponents(int location, int component) + { + EnsureTransformFeedbackInitialized(); + + int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4; + int index = baseIndex + component; + int count = 1; + + for (; count < 4; count++) + { + ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1]; + ref var curr = ref _transformFeedbackOutputs[baseIndex + count]; + + int prevOffset = prev.Offset; + int currOffset = curr.Offset; + + if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset) + { + break; + } + } + + if (baseIndex + count <= index) + { + return 1; + } + + return count; + } + + public AggregateType GetFragmentOutputColorType(int location) + { + return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType(); + } + + public AggregateType GetUserDefinedType(int location, bool isOutput) + { + if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) || + (isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing))) + { + return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32; + } + + AggregateType type = AggregateType.Vector4; + + if (Stage == ShaderStage.Vertex && !isOutput) + { + type |= GpuAccessor.QueryAttributeType(location).ToAggregateType(); + } + else + { + type |= AggregateType.FP32; + } + + return type; + } + + public int GetDepthRegister() + { + // The depth register is always two registers after the last color output. + return BitOperations.PopCount((uint)OmapTargets) + 1; + } + + public uint ConstantBuffer1Read(int offset) + { + if (Cb1DataSize < offset + 4) + { + Cb1DataSize = offset + 4; + } + + return GpuAccessor.ConstantBuffer1Read(offset); + } + + public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1) + { + // When the formatted load extension is supported, we don't need to + // specify a format, we can just declare it without a format and the GPU will handle it. + if (GpuAccessor.QueryHostSupportsImageLoadFormatted()) + { + return TextureFormat.Unknown; + } + + var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot); + + if (format == TextureFormat.Unknown) + { + GpuAccessor.Log($"Unknown format for texture {handle}."); + + format = TextureFormat.R8G8B8A8Unorm; + } + + return format; + } + + private static bool FormatSupportsAtomic(TextureFormat format) + { + return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint; + } + + public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1) + { + // Atomic image instructions do not support GL_EXT_shader_image_load_formatted, + // and must have a type specified. Default to R32Sint if not available. + + var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot); + + if (!FormatSupportsAtomic(format)) + { + GpuAccessor.Log($"Unsupported format for texture {handle}: {format}."); + + format = TextureFormat.R32Sint; + } + + return format; + } + + public void SizeAdd(int size) + { + Size += size; + } + + public void InheritFrom(ShaderConfig other) + { + ClipDistancesWritten |= other.ClipDistancesWritten; + UsedFeatures |= other.UsedFeatures; + + UsedInputAttributes |= other.UsedInputAttributes; + UsedOutputAttributes |= other.UsedOutputAttributes; + _usedConstantBuffers |= other._usedConstantBuffers; + _usedStorageBuffers |= other._usedStorageBuffers; + _usedStorageBuffersWrite |= other._usedStorageBuffersWrite; + + foreach (var kv in other._usedTextures) + { + if (!_usedTextures.TryAdd(kv.Key, kv.Value)) + { + _usedTextures[kv.Key] = MergeTextureMeta(kv.Value, _usedTextures[kv.Key]); + } + } + + foreach (var kv in other._usedImages) + { + if (!_usedImages.TryAdd(kv.Key, kv.Value)) + { + _usedImages[kv.Key] = MergeTextureMeta(kv.Value, _usedImages[kv.Key]); + } + } + } + + public void SetLayerOutputAttribute(int attr) + { + LayerOutputWritten = true; + LayerOutputAttribute = attr; + } + + public void SetGeometryShaderLayerInputAttribute(int attr) + { + HasLayerInputAttribute = true; + GpLayerInputAttribute = attr; + } + + public void SetLastInVertexPipeline() + { + LastInVertexPipeline = true; + } + + public void SetInputUserAttributeFixedFunc(int index) + { + UsedInputAttributes |= 1 << index; + } + + public void SetOutputUserAttributeFixedFunc(int index) + { + UsedOutputAttributes |= 1 << index; + } + + public void SetInputUserAttribute(int index, int component) + { + int mask = 1 << index; + + UsedInputAttributes |= mask; + _thisUsedInputAttributes |= mask; + ThisInputAttributesComponents |= UInt128.One << (index * 4 + component); + } + + public void SetInputUserAttributePerPatch(int index) + { + UsedInputAttributesPerPatch.Add(index); + } + + public void SetOutputUserAttribute(int index) + { + UsedOutputAttributes |= 1 << index; + } + + public void SetOutputUserAttributePerPatch(int index) + { + UsedOutputAttributesPerPatch.Add(index); + } + + public void MergeFromtNextStage(ShaderConfig config) + { + NextInputAttributesComponents = config.ThisInputAttributesComponents; + NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch; + NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr); + MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch); + + if (UsedOutputAttributesPerPatch.Count != 0) + { + // Regular and per-patch input/output locations can't overlap, + // so we must assign on our location using unused regular input/output locations. + + Dictionary<int, int> locationsMap = new Dictionary<int, int>(); + + int freeMask = ~UsedOutputAttributes; + + foreach (int attr in UsedOutputAttributesPerPatch) + { + int location = BitOperations.TrailingZeroCount(freeMask); + if (location == 32) + { + config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}."); + break; + } + + locationsMap.Add(attr, location); + freeMask &= ~(1 << location); + } + + // Both stages must agree on the locations, so use the same "map" for both. + _perPatchAttributeLocations = locationsMap; + config._perPatchAttributeLocations = locationsMap; + } + + // We don't consider geometry shaders using the geometry shader passthrough feature + // as being the last because when this feature is used, it can't actually modify any of the outputs, + // so the stage that comes before it is the last one that can do modifications. + if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough)) + { + LastInVertexPipeline = false; + } + } + + public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch) + { + _nextUsedInputAttributes = mask; + + if (GpPassthrough) + { + PassthroughAttributes = mask & ~UsedOutputAttributes; + } + else + { + UsedOutputAttributes |= mask; + UsedOutputAttributesPerPatch.UnionWith(perPatch); + } + } + + public int GetPerPatchAttributeLocation(int index) + { + if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location)) + { + return index; + } + + return location; + } + + public bool IsUsedOutputAttribute(int attr) + { + // The check for fixed function attributes on the next stage is conservative, + // returning false if the output is just not used by the next stage is also valid. + if (NextUsesFixedFuncAttributes && + attr >= AttributeConsts.UserAttributeBase && + attr < AttributeConsts.UserAttributeEnd) + { + int index = (attr - AttributeConsts.UserAttributeBase) >> 4; + return (_nextUsedInputAttributes & (1 << index)) != 0; + } + + return true; + } + + public int GetFreeUserAttribute(bool isOutput, int index) + { + int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes; + int bit = -1; + + while (useMask != -1) + { + bit = BitOperations.TrailingZeroCount(~useMask); + + if (bit == 32) + { + bit = -1; + break; + } + else if (index < 1) + { + break; + } + + useMask |= 1 << bit; + index--; + } + + return bit; + } + + public void SetAllInputUserAttributes() + { + UsedInputAttributes |= Constants.AllAttributesMask; + ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4); + } + + public void SetAllOutputUserAttributes() + { + UsedOutputAttributes |= Constants.AllAttributesMask; + } + + public void SetClipDistanceWritten(int index) + { + ClipDistancesWritten |= (byte)(1 << index); + } + + public void SetUsedFeature(FeatureFlags flags) + { + UsedFeatures |= flags; + } + + public void SetAccessibleBufferMasks(int sbMask, int ubeMask) + { + AccessibleStorageBuffersMask = sbMask; + AccessibleConstantBuffersMask = ubeMask; + } + + public void SetUsedConstantBuffer(int slot) + { + _usedConstantBuffers |= 1 << slot; + } + + public void SetUsedStorageBuffer(int slot, bool write) + { + int mask = 1 << slot; + _usedStorageBuffers |= mask; + + if (write) + { + _usedStorageBuffersWrite |= mask; + } + } + + public void SetUsedTexture( + Instruction inst, + SamplerType type, + TextureFormat format, + TextureFlags flags, + int cbufSlot, + int handle) + { + inst &= Instruction.Mask; + bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic; + bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic; + bool accurateType = inst != Instruction.Lod && inst != Instruction.TextureSize; + bool coherent = flags.HasFlag(TextureFlags.Coherent); + + if (isImage) + { + SetUsedTextureOrImage(_usedImages, cbufSlot, handle, type, format, true, isWrite, false, coherent); + } + else + { + bool intCoords = flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureSize; + SetUsedTextureOrImage(_usedTextures, cbufSlot, handle, type, TextureFormat.Unknown, intCoords, false, accurateType, coherent); + } + + GpuAccessor.RegisterTexture(handle, cbufSlot); + } + + private void SetUsedTextureOrImage( + Dictionary<TextureInfo, TextureMeta> dict, + int cbufSlot, + int handle, + SamplerType type, + TextureFormat format, + bool intCoords, + bool write, + bool accurateType, + bool coherent) + { + var dimensions = type.GetDimensions(); + var isIndexed = type.HasFlag(SamplerType.Indexed); + + var usageFlags = TextureUsageFlags.None; + + if (intCoords) + { + usageFlags |= TextureUsageFlags.NeedsScaleValue; + + var canScale = Stage.SupportsRenderScale() && !isIndexed && !write && dimensions == 2; + + if (!canScale) + { + // Resolution scaling cannot be applied to this texture right now. + // Flag so that we know to blacklist scaling on related textures when binding them. + usageFlags |= TextureUsageFlags.ResScaleUnsupported; + } + } + + if (write) + { + usageFlags |= TextureUsageFlags.ImageStore; + } + + if (coherent) + { + usageFlags |= TextureUsageFlags.ImageCoherent; + } + + int arraySize = isIndexed ? SamplerArraySize : 1; + + for (int layer = 0; layer < arraySize; layer++) + { + var info = new TextureInfo(cbufSlot, handle + layer * 2, isIndexed, format); + var meta = new TextureMeta() + { + AccurateType = accurateType, + Type = type, + UsageFlags = usageFlags + }; + + if (dict.TryGetValue(info, out var existingMeta)) + { + dict[info] = MergeTextureMeta(meta, existingMeta); + } + else + { + dict.Add(info, meta); + } + } + } + + private static TextureMeta MergeTextureMeta(TextureMeta meta, TextureMeta existingMeta) + { + meta.UsageFlags |= existingMeta.UsageFlags; + + // If the texture we have has inaccurate type information, then + // we prefer the most accurate one. + if (existingMeta.AccurateType) + { + meta.AccurateType = true; + meta.Type = existingMeta.Type; + } + + return meta; + } + + public BufferDescriptor[] GetConstantBufferDescriptors() + { + if (_cachedConstantBufferDescriptors != null) + { + return _cachedConstantBufferDescriptors; + } + + int usedMask = _usedConstantBuffers; + + if (UsedFeatures.HasFlag(FeatureFlags.CbIndexing)) + { + usedMask |= (int)GpuAccessor.QueryConstantBufferUse(); + } + + return _cachedConstantBufferDescriptors = GetBufferDescriptors( + usedMask, + 0, + UsedFeatures.HasFlag(FeatureFlags.CbIndexing), + out _firstConstantBufferBinding, + GpuAccessor.QueryBindingConstantBuffer); + } + + public BufferDescriptor[] GetStorageBufferDescriptors() + { + if (_cachedStorageBufferDescriptors != null) + { + return _cachedStorageBufferDescriptors; + } + + return _cachedStorageBufferDescriptors = GetBufferDescriptors( + _usedStorageBuffers, + _usedStorageBuffersWrite, + true, + out _firstStorageBufferBinding, + GpuAccessor.QueryBindingStorageBuffer); + } + + private static BufferDescriptor[] GetBufferDescriptors( + int usedMask, + int writtenMask, + bool isArray, + out int firstBinding, + Func<int, int> getBindingCallback) + { + firstBinding = 0; + bool hasFirstBinding = false; + var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)]; + + int lastSlot = -1; + + for (int i = 0; i < descriptors.Length; i++) + { + int slot = BitOperations.TrailingZeroCount(usedMask); + + if (isArray) + { + // The next array entries also consumes bindings, even if they are unused. + for (int j = lastSlot + 1; j < slot; j++) + { + int binding = getBindingCallback(j); + + if (!hasFirstBinding) + { + firstBinding = binding; + hasFirstBinding = true; + } + } + } + + lastSlot = slot; + + descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot); + + if (!hasFirstBinding) + { + firstBinding = descriptors[i].Binding; + hasFirstBinding = true; + } + + if ((writtenMask & (1 << slot)) != 0) + { + descriptors[i].SetFlag(BufferUsageFlags.Write); + } + + usedMask &= ~(1 << slot); + } + + return descriptors; + } + + public TextureDescriptor[] GetTextureDescriptors() + { + return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture); + } + + public TextureDescriptor[] GetImageDescriptors() + { + return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage); + } + + private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary<TextureInfo, TextureMeta> dict, Func<int, bool, int> getBindingCallback) + { + var descriptors = new TextureDescriptor[dict.Count]; + + int i = 0; + foreach (var kv in dict.OrderBy(x => x.Key.Indexed).OrderBy(x => x.Key.Handle)) + { + var info = kv.Key; + var meta = kv.Value; + + bool isBuffer = (meta.Type & SamplerType.Mask) == SamplerType.TextureBuffer; + int binding = getBindingCallback(i, isBuffer); + + descriptors[i] = new TextureDescriptor(binding, meta.Type, info.Format, info.CbufSlot, info.Handle); + descriptors[i].SetFlag(meta.UsageFlags); + i++; + } + + return descriptors; + } + + public (TextureDescriptor, int) FindTextureDescriptor(AstTextureOperation texOp) + { + TextureDescriptor[] descriptors = GetTextureDescriptors(); + + for (int i = 0; i < descriptors.Length; i++) + { + var descriptor = descriptors[i]; + + if (descriptor.CbufSlot == texOp.CbufSlot && + descriptor.HandleIndex == texOp.Handle && + descriptor.Format == texOp.Format) + { + return (descriptor, i); + } + } + + return (default, -1); + } + + private static int FindDescriptorIndex(TextureDescriptor[] array, AstTextureOperation texOp) + { + for (int i = 0; i < array.Length; i++) + { + var descriptor = array[i]; + + if (descriptor.Type == texOp.Type && + descriptor.CbufSlot == texOp.CbufSlot && + descriptor.HandleIndex == texOp.Handle && + descriptor.Format == texOp.Format) + { + return i; + } + } + + return -1; + } + + public int FindTextureDescriptorIndex(AstTextureOperation texOp) + { + return FindDescriptorIndex(GetTextureDescriptors(), texOp); + } + + public int FindImageDescriptorIndex(AstTextureOperation texOp) + { + return FindDescriptorIndex(GetImageDescriptors(), texOp); + } + + public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None) + { + return new ShaderProgramInfo( + GetConstantBufferDescriptors(), + GetStorageBufferDescriptors(), + GetTextureDescriptors(), + GetImageDescriptors(), + identification, + GpLayerInputAttribute, + Stage, + UsedFeatures.HasFlag(FeatureFlags.InstanceId), + UsedFeatures.HasFlag(FeatureFlags.DrawParameters), + UsedFeatures.HasFlag(FeatureFlags.RtLayer), + ClipDistancesWritten, + OmapTargets); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs new file mode 100644 index 00000000..01f7f08a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs @@ -0,0 +1,158 @@ +using Ryujinx.Common.Utilities; +using System; +using System.Runtime.InteropServices; + +namespace Ryujinx.Graphics.Shader.Translation +{ + enum PixelImap + { + Unused = 0, + Constant = 1, + Perspective = 2, + ScreenLinear = 3 + } + + readonly struct ImapPixelType + { + public PixelImap X { get; } + public PixelImap Y { get; } + public PixelImap Z { get; } + public PixelImap W { get; } + + public ImapPixelType(PixelImap x, PixelImap y, PixelImap z, PixelImap w) + { + X = x; + Y = y; + Z = z; + W = w; + } + + public PixelImap GetFirstUsedType() + { + if (X != PixelImap.Unused) return X; + if (Y != PixelImap.Unused) return Y; + if (Z != PixelImap.Unused) return Z; + return W; + } + } + + class ShaderHeader + { + public int SphType { get; } + public int Version { get; } + + public ShaderStage Stage { get; } + + public bool MrtEnable { get; } + + public bool KillsPixels { get; } + + public bool DoesGlobalStore { get; } + + public int SassVersion { get; } + + public bool GpPassthrough { get; } + + public bool DoesLoadOrStore { get; } + public bool DoesFp64 { get; } + + public int StreamOutMask { get; } + + public int ShaderLocalMemoryLowSize { get; } + + public int PerPatchAttributeCount { get; } + + public int ShaderLocalMemoryHighSize { get; } + + public int ThreadsPerInputPrimitive { get; } + + public int ShaderLocalMemoryCrsSize { get; } + + public OutputTopology OutputTopology { get; } + + public int MaxOutputVertexCount { get; } + + public int StoreReqStart { get; } + public int StoreReqEnd { get; } + + public ImapPixelType[] ImapTypes { get; } + + public int OmapTargets { get; } + public bool OmapSampleMask { get; } + public bool OmapDepth { get; } + + public ShaderHeader(IGpuAccessor gpuAccessor, ulong address) + { + ReadOnlySpan<int> header = MemoryMarshal.Cast<ulong, int>(gpuAccessor.GetCode(address, 0x50)); + + int commonWord0 = header[0]; + int commonWord1 = header[1]; + int commonWord2 = header[2]; + int commonWord3 = header[3]; + int commonWord4 = header[4]; + + SphType = commonWord0.Extract(0, 5); + Version = commonWord0.Extract(5, 5); + + Stage = (ShaderStage)commonWord0.Extract(10, 4); + + // Invalid. + if (Stage == ShaderStage.Compute) + { + Stage = ShaderStage.Vertex; + } + + MrtEnable = commonWord0.Extract(14); + + KillsPixels = commonWord0.Extract(15); + + DoesGlobalStore = commonWord0.Extract(16); + + SassVersion = commonWord0.Extract(17, 4); + + GpPassthrough = commonWord0.Extract(24); + + DoesLoadOrStore = commonWord0.Extract(26); + DoesFp64 = commonWord0.Extract(27); + + StreamOutMask = commonWord0.Extract(28, 4); + + ShaderLocalMemoryLowSize = commonWord1.Extract(0, 24); + + PerPatchAttributeCount = commonWord1.Extract(24, 8); + + ShaderLocalMemoryHighSize = commonWord2.Extract(0, 24); + + ThreadsPerInputPrimitive = commonWord2.Extract(24, 8); + + ShaderLocalMemoryCrsSize = commonWord3.Extract(0, 24); + + OutputTopology = (OutputTopology)commonWord3.Extract(24, 4); + + MaxOutputVertexCount = commonWord4.Extract(0, 12); + + StoreReqStart = commonWord4.Extract(12, 8); + StoreReqEnd = commonWord4.Extract(24, 8); + + ImapTypes = new ImapPixelType[32]; + + for (int i = 0; i < 32; i++) + { + byte imap = (byte)(header[6 + (i >> 2)] >> ((i & 3) * 8)); + + ImapTypes[i] = new ImapPixelType( + (PixelImap)((imap >> 0) & 3), + (PixelImap)((imap >> 2) & 3), + (PixelImap)((imap >> 4) & 3), + (PixelImap)((imap >> 6) & 3)); + } + + int type2OmapTarget = header[18]; + int type2Omap = header[19]; + + OmapTargets = type2OmapTarget; + OmapSampleMask = type2Omap.Extract(0); + OmapDepth = type2Omap.Extract(1); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs new file mode 100644 index 00000000..53f1e847 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs @@ -0,0 +1,185 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class ShaderIdentifier + { + public static ShaderIdentification Identify(Function[] functions, ShaderConfig config) + { + if (config.Stage == ShaderStage.Geometry && + config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles && + !config.GpuAccessor.QueryHostSupportsGeometryShader() && + IsLayerPassthroughGeometryShader(functions, out int layerInputAttr)) + { + config.SetGeometryShaderLayerInputAttribute(layerInputAttr); + + return ShaderIdentification.GeometryLayerPassthrough; + } + + return ShaderIdentification.None; + } + + private static bool IsLayerPassthroughGeometryShader(Function[] functions, out int layerInputAttr) + { + bool writesLayer = false; + layerInputAttr = 0; + + if (functions.Length != 1) + { + return false; + } + + int verticesCount = 0; + int totalVerticesCount = 0; + + foreach (BasicBlock block in functions[0].Blocks) + { + // We are not expecting loops or any complex control flow here, so fail in those cases. + if (block.Branch != null && block.Branch.Index <= block.Index) + { + return false; + } + + foreach (INode node in block.Operations) + { + if (!(node is Operation operation)) + { + continue; + } + + if (IsResourceWrite(operation.Inst)) + { + return false; + } + + if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output) + { + Operand src = operation.GetSource(operation.SourcesCount - 1); + Operation srcAttributeAsgOp = null; + + if (src.Type == OperandType.LocalVariable && + src.AsgOp is Operation asgOp && + asgOp.Inst == Instruction.Load && + asgOp.StorageKind.IsInputOrOutput()) + { + if (asgOp.StorageKind != StorageKind.Input) + { + return false; + } + + srcAttributeAsgOp = asgOp; + } + + if (srcAttributeAsgOp != null) + { + IoVariable dstAttribute = (IoVariable)operation.GetSource(0).Value; + IoVariable srcAttribute = (IoVariable)srcAttributeAsgOp.GetSource(0).Value; + + if (dstAttribute == IoVariable.Layer && srcAttribute == IoVariable.UserDefined) + { + if (srcAttributeAsgOp.SourcesCount != 4) + { + return false; + } + + writesLayer = true; + layerInputAttr = srcAttributeAsgOp.GetSource(1).Value * 4 + srcAttributeAsgOp.GetSource(3).Value;; + } + else + { + if (dstAttribute != srcAttribute) + { + return false; + } + + int inputsCount = operation.SourcesCount - 2; + + if (dstAttribute == IoVariable.UserDefined) + { + if (operation.GetSource(1).Value != srcAttributeAsgOp.GetSource(1).Value) + { + return false; + } + + inputsCount--; + } + + for (int i = 0; i < inputsCount; i++) + { + int dstIndex = operation.SourcesCount - 2 - i; + int srcIndex = srcAttributeAsgOp.SourcesCount - 1 - i; + + if ((dstIndex | srcIndex) < 0) + { + return false; + } + + if (operation.GetSource(dstIndex).Type != OperandType.Constant || + srcAttributeAsgOp.GetSource(srcIndex).Type != OperandType.Constant || + operation.GetSource(dstIndex).Value != srcAttributeAsgOp.GetSource(srcIndex).Value) + { + return false; + } + } + } + } + else if (src.Type == OperandType.Constant) + { + int dstComponent = operation.GetSource(operation.SourcesCount - 2).Value; + float expectedValue = dstComponent == 3 ? 1f : 0f; + + if (src.AsFloat() != expectedValue) + { + return false; + } + } + else + { + return false; + } + } + else if (operation.Inst == Instruction.EmitVertex) + { + verticesCount++; + } + else if (operation.Inst == Instruction.EndPrimitive) + { + totalVerticesCount += verticesCount; + verticesCount = 0; + } + } + } + + return totalVerticesCount + verticesCount == 3 && writesLayer; + } + + private static bool IsResourceWrite(Instruction inst) + { + switch (inst) + { + case Instruction.AtomicAdd: + case Instruction.AtomicAnd: + case Instruction.AtomicCompareAndSwap: + case Instruction.AtomicMaxS32: + case Instruction.AtomicMaxU32: + case Instruction.AtomicMinS32: + case Instruction.AtomicMinU32: + case Instruction.AtomicOr: + case Instruction.AtomicSwap: + case Instruction.AtomicXor: + case Instruction.ImageAtomic: + case Instruction.ImageStore: + case Instruction.StoreGlobal: + case Instruction.StoreGlobal16: + case Instruction.StoreGlobal8: + case Instruction.StoreStorage: + case Instruction.StoreStorage16: + case Instruction.StoreStorage8: + return true; + } + + return false; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs b/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs new file mode 100644 index 00000000..16b8b924 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs @@ -0,0 +1,376 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + static class Ssa + { + private const int GprsAndPredsCount = RegisterConsts.GprsCount + RegisterConsts.PredsCount; + + private class DefMap + { + private Dictionary<Register, Operand> _map; + + private long[] _phiMasks; + + public DefMap() + { + _map = new Dictionary<Register, Operand>(); + + _phiMasks = new long[(RegisterConsts.TotalCount + 63) / 64]; + } + + public bool TryAddOperand(Register reg, Operand operand) + { + return _map.TryAdd(reg, operand); + } + + public bool TryGetOperand(Register reg, out Operand operand) + { + return _map.TryGetValue(reg, out operand); + } + + public bool AddPhi(Register reg) + { + int key = GetKeyFromRegister(reg); + + int index = key / 64; + int bit = key & 63; + + long mask = 1L << bit; + + if ((_phiMasks[index] & mask) != 0) + { + return false; + } + + _phiMasks[index] |= mask; + + return true; + } + + public bool HasPhi(Register reg) + { + int key = GetKeyFromRegister(reg); + + int index = key / 64; + int bit = key & 63; + + return (_phiMasks[index] & (1L << bit)) != 0; + } + } + + private class LocalDefMap + { + private Operand[] _map; + private int[] _uses; + public int UseCount { get; private set; } + + public LocalDefMap() + { + _map = new Operand[RegisterConsts.TotalCount]; + _uses = new int[RegisterConsts.TotalCount]; + } + + public Operand Get(int key) + { + return _map[key]; + } + + public void Add(int key, Operand operand) + { + if (_map[key] == null) + { + _uses[UseCount++] = key; + } + + _map[key] = operand; + } + + public Operand GetUse(int index, out int key) + { + key = _uses[index]; + + return _map[key]; + } + + public void Clear() + { + for (int i = 0; i < UseCount; i++) + { + _map[_uses[i]] = null; + } + + UseCount = 0; + } + } + + private readonly struct Definition + { + public BasicBlock Block { get; } + public Operand Local { get; } + + public Definition(BasicBlock block, Operand local) + { + Block = block; + Local = local; + } + } + + public static void Rename(BasicBlock[] blocks) + { + DefMap[] globalDefs = new DefMap[blocks.Length]; + LocalDefMap localDefs = new LocalDefMap(); + + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + globalDefs[blkIndex] = new DefMap(); + } + + Queue<BasicBlock> dfPhiBlocks = new Queue<BasicBlock>(); + + // First pass, get all defs and locals uses. + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + Operand RenameLocal(Operand operand) + { + if (operand != null && operand.Type == OperandType.Register) + { + Operand local = localDefs.Get(GetKeyFromRegister(operand.GetRegister())); + + operand = local ?? operand; + } + + return operand; + } + + BasicBlock block = blocks[blkIndex]; + + LinkedListNode<INode> node = block.Operations.First; + + while (node != null) + { + if (node.Value is Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, RenameLocal(operation.GetSource(index))); + } + + for (int index = 0; index < operation.DestsCount; index++) + { + Operand dest = operation.GetDest(index); + + if (dest != null && dest.Type == OperandType.Register) + { + Operand local = Local(); + + localDefs.Add(GetKeyFromRegister(dest.GetRegister()), local); + + operation.SetDest(index, local); + } + } + } + + node = node.Next; + } + + int localUses = localDefs.UseCount; + for (int index = 0; index < localUses; index++) + { + Operand local = localDefs.GetUse(index, out int key); + + Register reg = GetRegisterFromKey(key); + + globalDefs[block.Index].TryAddOperand(reg, local); + + dfPhiBlocks.Enqueue(block); + + while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock)) + { + foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers) + { + if (globalDefs[domFrontier.Index].AddPhi(reg)) + { + dfPhiBlocks.Enqueue(domFrontier); + } + } + } + } + + localDefs.Clear(); + } + + // Second pass, rename variables with definitions on different blocks. + for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++) + { + BasicBlock block = blocks[blkIndex]; + + Operand RenameGlobal(Operand operand) + { + if (operand != null && operand.Type == OperandType.Register) + { + int key = GetKeyFromRegister(operand.GetRegister()); + + Operand local = localDefs.Get(key); + + if (local != null) + { + return local; + } + + operand = FindDefinitionForCurr(globalDefs, block, operand.GetRegister()); + + localDefs.Add(key, operand); + } + + return operand; + } + + for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next) + { + if (node.Value is Operation operation) + { + for (int index = 0; index < operation.SourcesCount; index++) + { + operation.SetSource(index, RenameGlobal(operation.GetSource(index))); + } + } + } + + if (blkIndex < blocks.Length - 1) + { + localDefs.Clear(); + } + } + } + + private static Operand FindDefinitionForCurr(DefMap[] globalDefs, BasicBlock current, Register reg) + { + if (globalDefs[current.Index].HasPhi(reg)) + { + return InsertPhi(globalDefs, current, reg); + } + + if (current != current.ImmediateDominator) + { + return FindDefinition(globalDefs, current.ImmediateDominator, reg).Local; + } + + return Undef(); + } + + private static Definition FindDefinition(DefMap[] globalDefs, BasicBlock current, Register reg) + { + foreach (BasicBlock block in SelfAndImmediateDominators(current)) + { + DefMap defMap = globalDefs[block.Index]; + + if (defMap.TryGetOperand(reg, out Operand lastDef)) + { + return new Definition(block, lastDef); + } + + if (defMap.HasPhi(reg)) + { + return new Definition(block, InsertPhi(globalDefs, block, reg)); + } + } + + return new Definition(current, Undef()); + } + + private static IEnumerable<BasicBlock> SelfAndImmediateDominators(BasicBlock block) + { + while (block != block.ImmediateDominator) + { + yield return block; + + block = block.ImmediateDominator; + } + + yield return block; + } + + private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Register reg) + { + // This block has a Phi that has not been materialized yet, but that + // would define a new version of the variable we're looking for. We need + // to materialize the Phi, add all the block/operand pairs into the Phi, and + // then use the definition from that Phi. + Operand local = Local(); + + PhiNode phi = new PhiNode(local); + + AddPhi(block, phi); + + globalDefs[block.Index].TryAddOperand(reg, local); + + foreach (BasicBlock predecessor in block.Predecessors) + { + Definition def = FindDefinition(globalDefs, predecessor, reg); + + phi.AddSource(def.Block, def.Local); + } + + return local; + } + + private static void AddPhi(BasicBlock block, PhiNode phi) + { + LinkedListNode<INode> node = block.Operations.First; + + if (node != null) + { + while (node.Next?.Value is PhiNode) + { + node = node.Next; + } + } + + if (node?.Value is PhiNode) + { + block.Operations.AddAfter(node, phi); + } + else + { + block.Operations.AddFirst(phi); + } + } + + private static int GetKeyFromRegister(Register reg) + { + if (reg.Type == RegisterType.Gpr) + { + return reg.Index; + } + else if (reg.Type == RegisterType.Predicate) + { + return RegisterConsts.GprsCount + reg.Index; + } + else /* if (reg.Type == RegisterType.Flag) */ + { + return GprsAndPredsCount + reg.Index; + } + } + + private static Register GetRegisterFromKey(int key) + { + if (key < RegisterConsts.GprsCount) + { + return new Register(key, RegisterType.Gpr); + } + else if (key < GprsAndPredsCount) + { + return new Register(key - RegisterConsts.GprsCount, RegisterType.Predicate); + } + else /* if (key < RegisterConsts.TotalCount) */ + { + return new Register(key - GprsAndPredsCount, RegisterType.Flag); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs b/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs new file mode 100644 index 00000000..6ac235a4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs @@ -0,0 +1,8 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + public enum TargetApi + { + OpenGL, + Vulkan + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs b/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs new file mode 100644 index 00000000..8314b223 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs @@ -0,0 +1,9 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + public enum TargetLanguage + { + Glsl, + Spirv, + Arb + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs new file mode 100644 index 00000000..1874dec3 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs @@ -0,0 +1,14 @@ +using System; + +namespace Ryujinx.Graphics.Shader.Translation +{ + [Flags] + public enum TranslationFlags + { + None = 0, + + VertexA = 1 << 0, + Compute = 1 << 1, + DebugMode = 1 << 2 + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs new file mode 100644 index 00000000..d9829ac4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs @@ -0,0 +1,16 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + public readonly struct TranslationOptions + { + public TargetLanguage TargetLanguage { get; } + public TargetApi TargetApi { get; } + public TranslationFlags Flags { get; } + + public TranslationOptions(TargetLanguage targetLanguage, TargetApi targetApi, TranslationFlags flags) + { + TargetLanguage = targetLanguage; + TargetApi = targetApi; + Flags = flags; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs new file mode 100644 index 00000000..77d3b568 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -0,0 +1,362 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl; +using Ryujinx.Graphics.Shader.CodeGen.Spirv; +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using System; +using System.Linq; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation +{ + public static class Translator + { + private const int HeaderSize = 0x50; + + internal readonly struct FunctionCode + { + public Operation[] Code { get; } + + public FunctionCode(Operation[] code) + { + Code = code; + } + } + + public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) + { + return DecodeShader(address, gpuAccessor, options); + } + + internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config) + { + var cfgs = new ControlFlowGraph[functions.Length]; + var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length]; + + for (int i = 0; i < functions.Length; i++) + { + cfgs[i] = ControlFlowGraph.Create(functions[i].Code); + + if (i != 0) + { + frus[i] = RegisterUsage.RunPass(cfgs[i]); + } + } + + Function[] funcs = new Function[functions.Length]; + + for (int i = 0; i < functions.Length; i++) + { + var cfg = cfgs[i]; + + int inArgumentsCount = 0; + int outArgumentsCount = 0; + + if (i != 0) + { + var fru = frus[i]; + + inArgumentsCount = fru.InArguments.Length; + outArgumentsCount = fru.OutArguments.Length; + } + + if (cfg.Blocks.Length != 0) + { + RegisterUsage.FixupCalls(cfg.Blocks, frus); + + Dominance.FindDominators(cfg); + Dominance.FindDominanceFrontiers(cfg.Blocks); + + Ssa.Rename(cfg.Blocks); + + Optimizer.RunPass(cfg.Blocks, config); + Rewriter.RunPass(cfg.Blocks, config); + } + + funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount); + } + + var identification = ShaderIdentifier.Identify(funcs, config); + + var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config); + + var info = config.CreateProgramInfo(identification); + + return config.Options.TargetLanguage switch + { + TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), + TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)), + _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()) + }; + } + + private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options) + { + ShaderConfig config; + DecodedProgram program; + ulong maxEndAddress = 0; + + if (options.Flags.HasFlag(TranslationFlags.Compute)) + { + config = new ShaderConfig(gpuAccessor, options); + + program = Decoder.Decode(config, address); + } + else + { + config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options); + + program = Decoder.Decode(config, address + HeaderSize); + } + + foreach (DecodedFunction function in program) + { + foreach (Block block in function.Blocks) + { + if (maxEndAddress < block.EndAddress) + { + maxEndAddress = block.EndAddress; + } + } + } + + config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize)); + + return new TranslatorContext(address, program, config); + } + + internal static FunctionCode[] EmitShader(DecodedProgram program, ShaderConfig config, bool initializeOutputs, out int initializationOperations) + { + initializationOperations = 0; + + FunctionMatch.RunPass(program); + + foreach (DecodedFunction function in program.OrderBy(x => x.Address).Where(x => !x.IsCompilerGenerated)) + { + program.AddFunctionAndSetId(function); + } + + FunctionCode[] functions = new FunctionCode[program.FunctionsWithIdCount]; + + for (int index = 0; index < functions.Length; index++) + { + EmitterContext context = new EmitterContext(program, config, index != 0); + + if (initializeOutputs && index == 0) + { + EmitOutputsInitialization(context, config); + initializationOperations = context.OperationsCount; + } + + DecodedFunction function = program.GetFunctionById(index); + + foreach (Block block in function.Blocks) + { + context.CurrBlock = block; + + context.EnterBlock(block.Address); + + EmitOps(context, block); + } + + functions[index] = new FunctionCode(context.GetOperations()); + } + + return functions; + } + + private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config) + { + // Compute has no output attributes, and fragment is the last stage, so we + // don't need to initialize outputs on those stages. + if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment) + { + return; + } + + if (config.Stage == ShaderStage.Vertex) + { + InitializePositionOutput(context); + } + + UInt128 usedAttributes = context.Config.NextInputAttributesComponents; + while (usedAttributes != UInt128.Zero) + { + int index = (int)UInt128.TrailingZeroCount(usedAttributes); + int vecIndex = index / 4; + + usedAttributes &= ~(UInt128.One << index); + + // We don't need to initialize passthrough attributes. + if ((context.Config.PassthroughAttributes & (1 << vecIndex)) != 0) + { + continue; + } + + InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false); + } + + if (context.Config.NextUsedInputAttributesPerPatch != null) + { + foreach (int vecIndex in context.Config.NextUsedInputAttributesPerPatch.Order()) + { + InitializeOutput(context, vecIndex, perPatch: true); + } + } + + if (config.NextUsesFixedFuncAttributes) + { + bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation(); + int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1; + + for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++) + { + int index = config.GetFreeUserAttribute(isOutput: true, i); + if (index < 0) + { + break; + } + + InitializeOutput(context, index, perPatch: false); + + config.SetOutputUserAttributeFixedFunc(index); + } + } + } + + private static void InitializePositionOutput(EmitterContext context) + { + for (int c = 0; c < 4; c++) + { + context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), ConstF(c == 3 ? 1f : 0f)); + } + } + + private static void InitializeOutput(EmitterContext context, int location, bool perPatch) + { + for (int c = 0; c < 4; c++) + { + InitializeOutputComponent(context, location, c, perPatch); + } + } + + private static void InitializeOutputComponent(EmitterContext context, int location, int c, bool perPatch) + { + StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output; + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing)) + { + Operand invocationId = null; + + if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch) + { + invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); + } + + int index = location * 4 + c; + + context.Store(storageKind, IoVariable.UserDefined, invocationId, Const(index), ConstF(c == 3 ? 1f : 0f)); + } + else + { + if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch) + { + Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); + context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f)); + } + else + { + context.Store(storageKind, IoVariable.UserDefined, null, Const(location), Const(c), ConstF(c == 3 ? 1f : 0f)); + } + } + } + + private static void EmitOps(EmitterContext context, Block block) + { + for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++) + { + InstOp op = block.OpCodes[opIndex]; + + if (context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode)) + { + string instName; + + if (op.Emitter != null) + { + instName = op.Name.ToString(); + } + else + { + instName = "???"; + + context.Config.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16})."); + } + + string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}"; + + context.Add(new CommentNode(dbgComment)); + } + + InstConditional opConditional = new InstConditional(op.RawOpCode); + + bool noPred = op.Props.HasFlag(InstProps.NoPred); + if (!noPred && opConditional.Pred == RegisterConsts.PredicateTrueIndex && opConditional.PredInv) + { + continue; + } + + Operand predSkipLbl = null; + + if (Decoder.IsPopBranch(op.Name)) + { + // If the instruction is a SYNC or BRK instruction with only one + // possible target address, then the instruction is basically + // just a simple branch, we can generate code similar to branch + // instructions, with the condition check on the branch itself. + noPred = block.SyncTargets.Count <= 1; + } + else if (op.Name == InstName.Bra) + { + noPred = true; + } + + if (!(opConditional.Pred == RegisterConsts.PredicateTrueIndex || noPred)) + { + Operand label; + + if (opIndex == block.OpCodes.Count - 1 && block.HasNext()) + { + label = context.GetLabel(block.Successors[0].Address); + } + else + { + label = Label(); + + predSkipLbl = label; + } + + Operand pred = Register(opConditional.Pred, RegisterType.Predicate); + + if (opConditional.PredInv) + { + context.BranchIfTrue(label, pred); + } + else + { + context.BranchIfFalse(label, pred); + } + } + + context.CurrOp = op; + + op.Emitter?.Invoke(context); + + if (predSkipLbl != null) + { + context.MarkLabel(predSkipLbl); + } + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs new file mode 100644 index 00000000..4b4cc8d9 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -0,0 +1,255 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl; +using Ryujinx.Graphics.Shader.CodeGen.Spirv; +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; +using static Ryujinx.Graphics.Shader.Translation.Translator; + +namespace Ryujinx.Graphics.Shader.Translation +{ + public class TranslatorContext + { + private readonly DecodedProgram _program; + private ShaderConfig _config; + + public ulong Address { get; } + + public ShaderStage Stage => _config.Stage; + public int Size => _config.Size; + public int Cb1DataSize => _config.Cb1DataSize; + public bool LayerOutputWritten => _config.LayerOutputWritten; + + public IGpuAccessor GpuAccessor => _config.GpuAccessor; + + internal TranslatorContext(ulong address, DecodedProgram program, ShaderConfig config) + { + Address = address; + _program = program; + _config = config; + } + + private static bool IsLoadUserDefined(Operation operation) + { + // TODO: Check if sources count match and all sources are constant. + return operation.Inst == Instruction.Load && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined; + } + + private static bool IsStoreUserDefined(Operation operation) + { + // TODO: Check if sources count match and all sources are constant. + return operation.Inst == Instruction.Store && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined; + } + + private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b, int aStart) + { + // Here we combine two shaders. + // For shader A: + // - All user attribute stores on shader A are turned into copies to a + // temporary variable. It's assumed that shader B will consume them. + // - All return instructions are turned into branch instructions, the + // branch target being the start of the shader B code. + // For shader B: + // - All user attribute loads on shader B are turned into copies from a + // temporary variable, as long that attribute is written by shader A. + FunctionCode[] output = new FunctionCode[a.Length + b.Length - 1]; + + List<Operation> ops = new List<Operation>(a.Length + b.Length); + + Operand[] temps = new Operand[AttributeConsts.UserAttributesCount * 4]; + + Operand lblB = Label(); + + for (int index = aStart; index < a[0].Code.Length; index++) + { + Operation operation = a[0].Code[index]; + + if (IsStoreUserDefined(operation)) + { + int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value; + + Operand temp = temps[tIndex]; + + if (temp == null) + { + temp = Local(); + + temps[tIndex] = temp; + } + + operation.Dest = temp; + operation.TurnIntoCopy(operation.GetSource(operation.SourcesCount - 1)); + } + + if (operation.Inst == Instruction.Return) + { + ops.Add(new Operation(Instruction.Branch, lblB)); + } + else + { + ops.Add(operation); + } + } + + ops.Add(new Operation(Instruction.MarkLabel, lblB)); + + for (int index = 0; index < b[0].Code.Length; index++) + { + Operation operation = b[0].Code[index]; + + if (IsLoadUserDefined(operation)) + { + int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value; + + Operand temp = temps[tIndex]; + + if (temp != null) + { + operation.TurnIntoCopy(temp); + } + } + + ops.Add(operation); + } + + output[0] = new FunctionCode(ops.ToArray()); + + for (int i = 1; i < a.Length; i++) + { + output[i] = a[i]; + } + + for (int i = 1; i < b.Length; i++) + { + output[a.Length + i - 1] = b[i]; + } + + return output; + } + + public void SetNextStage(TranslatorContext nextStage) + { + _config.MergeFromtNextStage(nextStage._config); + } + + public void SetGeometryShaderLayerInputAttribute(int attr) + { + _config.SetGeometryShaderLayerInputAttribute(attr); + } + + public void SetLastInVertexPipeline() + { + _config.SetLastInVertexPipeline(); + } + + public ShaderProgram Translate(TranslatorContext other = null) + { + FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _); + + if (other != null) + { + other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes, Enumerable.Empty<int>()); + + FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart); + + code = Combine(otherCode, code, aStart); + + _config.InheritFrom(other._config); + } + + return Translator.Translate(code, _config); + } + + public ShaderProgram GenerateGeometryPassthrough() + { + int outputAttributesMask = _config.UsedOutputAttributes; + int layerOutputAttr = _config.LayerOutputAttribute; + + OutputTopology outputTopology; + int maxOutputVertices; + + switch (GpuAccessor.QueryPrimitiveTopology()) + { + case InputTopology.Points: + outputTopology = OutputTopology.PointList; + maxOutputVertices = 1; + break; + case InputTopology.Lines: + case InputTopology.LinesAdjacency: + outputTopology = OutputTopology.LineStrip; + maxOutputVertices = 2; + break; + default: + outputTopology = OutputTopology.TriangleStrip; + maxOutputVertices = 3; + break; + } + + ShaderConfig config = new ShaderConfig(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options); + + EmitterContext context = new EmitterContext(default, config, false); + + for (int v = 0; v < maxOutputVertices; v++) + { + int outAttrsMask = outputAttributesMask; + + while (outAttrsMask != 0) + { + int attrIndex = BitOperations.TrailingZeroCount(outAttrsMask); + + outAttrsMask &= ~(1 << attrIndex); + + for (int c = 0; c < 4; c++) + { + int attr = AttributeConsts.UserAttributeBase + attrIndex * 16 + c * 4; + + Operand value = context.Load(StorageKind.Input, IoVariable.UserDefined, Const(attrIndex), Const(v), Const(c)); + + if (attr == layerOutputAttr) + { + context.Store(StorageKind.Output, IoVariable.Layer, null, value); + } + else + { + context.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(attrIndex), Const(c), value); + config.SetOutputUserAttribute(attrIndex); + } + + config.SetInputUserAttribute(attrIndex, c); + } + } + + for (int c = 0; c < 4; c++) + { + Operand value = context.Load(StorageKind.Input, IoVariable.Position, Const(v), Const(c)); + + context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), value); + } + + context.EmitVertex(); + } + + context.EndPrimitive(); + + var operations = context.GetOperations(); + var cfg = ControlFlowGraph.Create(operations); + var function = new Function(cfg.Blocks, "main", false, 0, 0); + + var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config); + + var info = config.CreateProgramInfo(); + + return config.Options.TargetLanguage switch + { + TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)), + TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)), + _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString()) + }; + } + } +} |
