aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Shader/Translation
diff options
context:
space:
mode:
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Translation')
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs25
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs36
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs176
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Dominance.cs94
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs492
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs819
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs27
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs866
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs52
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs263
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs85
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs64
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs346
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs433
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs380
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs147
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs68
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs486
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs768
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs944
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs158
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs185
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Ssa.cs376
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs8
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs9
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs14
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs16
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Translator.cs362
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs255
29 files changed, 7954 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs b/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs
new file mode 100644
index 00000000..24993e00
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs
@@ -0,0 +1,25 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ enum AggregateType
+ {
+ Invalid,
+ Void,
+ Bool,
+ FP32,
+ FP64,
+ S32,
+ U32,
+
+ ElementTypeMask = 0xff,
+
+ ElementCountShift = 8,
+ ElementCountMask = 3 << ElementCountShift,
+
+ Scalar = 0 << ElementCountShift,
+ Vector2 = 1 << ElementCountShift,
+ Vector3 = 2 << ElementCountShift,
+ Vector4 = 3 << ElementCountShift,
+
+ Array = 1 << 10
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs
new file mode 100644
index 00000000..683b0d8a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs
@@ -0,0 +1,36 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class AttributeConsts
+ {
+ public const int PrimitiveId = 0x060;
+ public const int Layer = 0x064;
+ public const int PositionX = 0x070;
+ public const int PositionY = 0x074;
+ public const int FrontColorDiffuseR = 0x280;
+ public const int BackColorDiffuseR = 0x2a0;
+ public const int ClipDistance0 = 0x2c0;
+ public const int ClipDistance1 = 0x2c4;
+ public const int ClipDistance2 = 0x2c8;
+ public const int ClipDistance3 = 0x2cc;
+ public const int ClipDistance4 = 0x2d0;
+ public const int ClipDistance5 = 0x2d4;
+ public const int ClipDistance6 = 0x2d8;
+ public const int ClipDistance7 = 0x2dc;
+ public const int FogCoord = 0x2e8;
+ public const int TessCoordX = 0x2f0;
+ public const int TessCoordY = 0x2f4;
+ public const int InstanceId = 0x2f8;
+ public const int VertexId = 0x2fc;
+ public const int TexCoordCount = 10;
+ public const int TexCoordBase = 0x300;
+ public const int TexCoordEnd = TexCoordBase + TexCoordCount * 16;
+ public const int FrontFacing = 0x3fc;
+
+ public const int UserAttributesCount = 32;
+ public const int UserAttributeBase = 0x80;
+ public const int UserAttributeEnd = UserAttributeBase + UserAttributesCount * 16;
+
+ public const int UserAttributePerPatchBase = 0x18;
+ public const int UserAttributePerPatchEnd = 0x200;
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs b/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs
new file mode 100644
index 00000000..65328fd7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs
@@ -0,0 +1,176 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ class ControlFlowGraph
+ {
+ public BasicBlock[] Blocks { get; }
+ public BasicBlock[] PostOrderBlocks { get; }
+ public int[] PostOrderMap { get; }
+
+ public ControlFlowGraph(BasicBlock[] blocks)
+ {
+ Blocks = blocks;
+
+ HashSet<BasicBlock> visited = new HashSet<BasicBlock>();
+
+ Stack<BasicBlock> blockStack = new Stack<BasicBlock>();
+
+ List<BasicBlock> postOrderBlocks = new List<BasicBlock>(blocks.Length);
+
+ PostOrderMap = new int[blocks.Length];
+
+ visited.Add(blocks[0]);
+
+ blockStack.Push(blocks[0]);
+
+ while (blockStack.TryPop(out BasicBlock block))
+ {
+ if (block.Next != null && visited.Add(block.Next))
+ {
+ blockStack.Push(block);
+ blockStack.Push(block.Next);
+ }
+ else if (block.Branch != null && visited.Add(block.Branch))
+ {
+ blockStack.Push(block);
+ blockStack.Push(block.Branch);
+ }
+ else
+ {
+ PostOrderMap[block.Index] = postOrderBlocks.Count;
+
+ postOrderBlocks.Add(block);
+ }
+ }
+
+ PostOrderBlocks = postOrderBlocks.ToArray();
+ }
+
+ public static ControlFlowGraph Create(Operation[] operations)
+ {
+ Dictionary<Operand, BasicBlock> labels = new Dictionary<Operand, BasicBlock>();
+
+ List<BasicBlock> blocks = new List<BasicBlock>();
+
+ BasicBlock currentBlock = null;
+
+ void NextBlock(BasicBlock nextBlock)
+ {
+ if (currentBlock != null && !EndsWithUnconditionalInst(currentBlock.GetLastOp()))
+ {
+ currentBlock.Next = nextBlock;
+ }
+
+ currentBlock = nextBlock;
+ }
+
+ void NewNextBlock()
+ {
+ BasicBlock block = new BasicBlock(blocks.Count);
+
+ blocks.Add(block);
+
+ NextBlock(block);
+ }
+
+ bool needsNewBlock = true;
+
+ for (int index = 0; index < operations.Length; index++)
+ {
+ Operation operation = operations[index];
+
+ if (operation.Inst == Instruction.MarkLabel)
+ {
+ Operand label = operation.Dest;
+
+ if (labels.TryGetValue(label, out BasicBlock nextBlock))
+ {
+ nextBlock.Index = blocks.Count;
+
+ blocks.Add(nextBlock);
+
+ NextBlock(nextBlock);
+ }
+ else
+ {
+ NewNextBlock();
+
+ labels.Add(label, currentBlock);
+ }
+ }
+ else
+ {
+ if (needsNewBlock)
+ {
+ NewNextBlock();
+ }
+
+ currentBlock.Operations.AddLast(operation);
+ }
+
+ needsNewBlock = operation.Inst == Instruction.Branch ||
+ operation.Inst == Instruction.BranchIfTrue ||
+ operation.Inst == Instruction.BranchIfFalse;
+
+ if (needsNewBlock)
+ {
+ Operand label = operation.Dest;
+
+ if (!labels.TryGetValue(label, out BasicBlock branchBlock))
+ {
+ branchBlock = new BasicBlock();
+
+ labels.Add(label, branchBlock);
+ }
+
+ currentBlock.Branch = branchBlock;
+ }
+ }
+
+ // Remove unreachable blocks.
+ bool hasUnreachable;
+
+ do
+ {
+ hasUnreachable = false;
+
+ for (int blkIndex = 1; blkIndex < blocks.Count; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ if (block.Predecessors.Count == 0)
+ {
+ block.Next = null;
+ block.Branch = null;
+ blocks.RemoveAt(blkIndex--);
+ hasUnreachable = true;
+ }
+ else
+ {
+ block.Index = blkIndex;
+ }
+ }
+ } while (hasUnreachable);
+
+ return new ControlFlowGraph(blocks.ToArray());
+ }
+
+ private static bool EndsWithUnconditionalInst(INode node)
+ {
+ if (node is Operation operation)
+ {
+ switch (operation.Inst)
+ {
+ case Instruction.Branch:
+ case Instruction.Discard:
+ case Instruction.Return:
+ return true;
+ }
+ }
+
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs b/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs
new file mode 100644
index 00000000..09c2eb0f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs
@@ -0,0 +1,94 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class Dominance
+ {
+ // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm".
+ // https://www.cs.rice.edu/~keith/EMBED/dom.pdf
+ public static void FindDominators(ControlFlowGraph cfg)
+ {
+ BasicBlock Intersect(BasicBlock block1, BasicBlock block2)
+ {
+ while (block1 != block2)
+ {
+ while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index])
+ {
+ block1 = block1.ImmediateDominator;
+ }
+
+ while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index])
+ {
+ block2 = block2.ImmediateDominator;
+ }
+ }
+
+ return block1;
+ }
+
+ cfg.Blocks[0].ImmediateDominator = cfg.Blocks[0];
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[blkIndex];
+
+ BasicBlock newIDom = null;
+
+ foreach (BasicBlock predecessor in block.Predecessors)
+ {
+ if (predecessor.ImmediateDominator != null)
+ {
+ if (newIDom != null)
+ {
+ newIDom = Intersect(predecessor, newIDom);
+ }
+ else
+ {
+ newIDom = predecessor;
+ }
+ }
+ }
+
+ if (block.ImmediateDominator != newIDom)
+ {
+ block.ImmediateDominator = newIDom;
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+ }
+
+ public static void FindDominanceFrontiers(BasicBlock[] blocks)
+ {
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ if (block.Predecessors.Count < 2)
+ {
+ continue;
+ }
+
+ for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++)
+ {
+ BasicBlock current = block.Predecessors[pBlkIndex];
+
+ while (current != block.ImmediateDominator)
+ {
+ current.DominanceFrontiers.Add(block);
+
+ current = current.ImmediateDominator;
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs
new file mode 100644
index 00000000..112baccf
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs
@@ -0,0 +1,492 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ class EmitterContext
+ {
+ public DecodedProgram Program { get; }
+ public ShaderConfig Config { get; }
+
+ public bool IsNonMain { get; }
+
+ public Block CurrBlock { get; set; }
+ public InstOp CurrOp { get; set; }
+
+ public int OperationsCount => _operations.Count;
+
+ private readonly struct BrxTarget
+ {
+ public readonly Operand Selector;
+ public readonly int ExpectedValue;
+ public readonly ulong NextTargetAddress;
+
+ public BrxTarget(Operand selector, int expectedValue, ulong nextTargetAddress)
+ {
+ Selector = selector;
+ ExpectedValue = expectedValue;
+ NextTargetAddress = nextTargetAddress;
+ }
+ }
+
+ private class BlockLabel
+ {
+ public readonly Operand Label;
+ public BrxTarget BrxTarget;
+
+ public BlockLabel(Operand label)
+ {
+ Label = label;
+ }
+ }
+
+ private readonly List<Operation> _operations;
+ private readonly Dictionary<ulong, BlockLabel> _labels;
+
+ public EmitterContext(DecodedProgram program, ShaderConfig config, bool isNonMain)
+ {
+ Program = program;
+ Config = config;
+ IsNonMain = isNonMain;
+ _operations = new List<Operation>();
+ _labels = new Dictionary<ulong, BlockLabel>();
+
+ EmitStart();
+ }
+
+ private void EmitStart()
+ {
+ if (Config.Stage == ShaderStage.Vertex &&
+ Config.Options.TargetApi == TargetApi.Vulkan &&
+ (Config.Options.Flags & TranslationFlags.VertexA) == 0)
+ {
+ // Vulkan requires the point size to be always written on the shader if the primitive topology is points.
+ this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(Config.GpuAccessor.QueryPointSize()));
+ }
+ }
+
+ public T GetOp<T>() where T : unmanaged
+ {
+ Debug.Assert(Unsafe.SizeOf<T>() == sizeof(ulong));
+ ulong op = CurrOp.RawOpCode;
+ return Unsafe.As<ulong, T>(ref op);
+ }
+
+ public Operand Add(Instruction inst, Operand dest = null, params Operand[] sources)
+ {
+ Operation operation = new Operation(inst, dest, sources);
+
+ _operations.Add(operation);
+
+ return dest;
+ }
+
+ public Operand Add(Instruction inst, StorageKind storageKind, Operand dest = null, params Operand[] sources)
+ {
+ Operation operation = new Operation(inst, storageKind, dest, sources);
+
+ _operations.Add(operation);
+
+ return dest;
+ }
+
+ public (Operand, Operand) Add(Instruction inst, (Operand, Operand) dest, params Operand[] sources)
+ {
+ Operand[] dests = new[] { dest.Item1, dest.Item2 };
+
+ Operation operation = new Operation(inst, 0, dests, sources);
+
+ Add(operation);
+
+ return dest;
+ }
+
+ public void Add(Operation operation)
+ {
+ _operations.Add(operation);
+ }
+
+ public TextureOperation CreateTextureOperation(
+ Instruction inst,
+ SamplerType type,
+ TextureFlags flags,
+ int handle,
+ int compIndex,
+ Operand[] dests,
+ params Operand[] sources)
+ {
+ return CreateTextureOperation(inst, type, TextureFormat.Unknown, flags, handle, compIndex, dests, sources);
+ }
+
+ public TextureOperation CreateTextureOperation(
+ Instruction inst,
+ SamplerType type,
+ TextureFormat format,
+ TextureFlags flags,
+ int handle,
+ int compIndex,
+ Operand[] dests,
+ params Operand[] sources)
+ {
+ if (!flags.HasFlag(TextureFlags.Bindless))
+ {
+ Config.SetUsedTexture(inst, type, format, flags, TextureOperation.DefaultCbufSlot, handle);
+ }
+
+ return new TextureOperation(inst, type, format, flags, handle, compIndex, dests, sources);
+ }
+
+ public void FlagAttributeRead(int attribute)
+ {
+ if (Config.Stage == ShaderStage.Vertex && attribute == AttributeConsts.InstanceId)
+ {
+ Config.SetUsedFeature(FeatureFlags.InstanceId);
+ }
+ else if (Config.Stage == ShaderStage.Fragment)
+ {
+ switch (attribute)
+ {
+ case AttributeConsts.PositionX:
+ case AttributeConsts.PositionY:
+ Config.SetUsedFeature(FeatureFlags.FragCoordXY);
+ break;
+ }
+ }
+ }
+
+ public void FlagAttributeWritten(int attribute)
+ {
+ if (Config.Stage == ShaderStage.Vertex)
+ {
+ switch (attribute)
+ {
+ case AttributeConsts.ClipDistance0:
+ case AttributeConsts.ClipDistance1:
+ case AttributeConsts.ClipDistance2:
+ case AttributeConsts.ClipDistance3:
+ case AttributeConsts.ClipDistance4:
+ case AttributeConsts.ClipDistance5:
+ case AttributeConsts.ClipDistance6:
+ case AttributeConsts.ClipDistance7:
+ Config.SetClipDistanceWritten((attribute - AttributeConsts.ClipDistance0) / 4);
+ break;
+ }
+ }
+
+ if (Config.Stage != ShaderStage.Fragment && attribute == AttributeConsts.Layer)
+ {
+ Config.SetUsedFeature(FeatureFlags.RtLayer);
+ }
+ }
+
+ public void MarkLabel(Operand label)
+ {
+ Add(Instruction.MarkLabel, label);
+ }
+
+ public Operand GetLabel(ulong address)
+ {
+ return EnsureBlockLabel(address).Label;
+ }
+
+ public void SetBrxTarget(ulong address, Operand selector, int targetValue, ulong nextTargetAddress)
+ {
+ BlockLabel blockLabel = EnsureBlockLabel(address);
+ Debug.Assert(blockLabel.BrxTarget.Selector == null);
+ blockLabel.BrxTarget = new BrxTarget(selector, targetValue, nextTargetAddress);
+ }
+
+ public void EnterBlock(ulong address)
+ {
+ BlockLabel blockLabel = EnsureBlockLabel(address);
+
+ MarkLabel(blockLabel.Label);
+
+ BrxTarget brxTarget = blockLabel.BrxTarget;
+
+ if (brxTarget.Selector != null)
+ {
+ this.BranchIfFalse(GetLabel(brxTarget.NextTargetAddress), this.ICompareEqual(brxTarget.Selector, Const(brxTarget.ExpectedValue)));
+ }
+ }
+
+ private BlockLabel EnsureBlockLabel(ulong address)
+ {
+ if (!_labels.TryGetValue(address, out BlockLabel blockLabel))
+ {
+ blockLabel = new BlockLabel(Label());
+
+ _labels.Add(address, blockLabel);
+ }
+
+ return blockLabel;
+ }
+
+ public void PrepareForVertexReturn()
+ {
+ if (Config.GpuAccessor.QueryViewportTransformDisable())
+ {
+ Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0));
+ Operand y = this.Load(StorageKind.Output, IoVariable.Position, null, Const(1));
+ Operand xScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(0));
+ Operand yScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(1));
+ Operand negativeOne = ConstF(-1.0f);
+
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), this.FPFusedMultiplyAdd(x, xScale, negativeOne));
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), this.FPFusedMultiplyAdd(y, yScale, negativeOne));
+ }
+
+ if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne())
+ {
+ Operand z = this.Load(StorageKind.Output, IoVariable.Position, null, Const(2));
+ Operand w = this.Load(StorageKind.Output, IoVariable.Position, null, Const(3));
+ Operand halfW = this.FPMultiply(w, ConstF(0.5f));
+
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW));
+ }
+
+ if (Config.Stage != ShaderStage.Geometry && Config.HasLayerInputAttribute)
+ {
+ Config.SetUsedFeature(FeatureFlags.RtLayer);
+
+ int attrVecIndex = Config.GpLayerInputAttribute >> 2;
+ int attrComponentIndex = Config.GpLayerInputAttribute & 3;
+
+ Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex));
+
+ this.Store(StorageKind.Output, IoVariable.Layer, null, layer);
+ }
+ }
+
+ public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal)
+ {
+ if (Config.GpuAccessor.QueryViewportTransformDisable())
+ {
+ oldXLocal = Local();
+ this.Copy(oldXLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(0)));
+ oldYLocal = Local();
+ this.Copy(oldYLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(1)));
+ }
+ else
+ {
+ oldXLocal = null;
+ oldYLocal = null;
+ }
+
+ if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne())
+ {
+ oldZLocal = Local();
+ this.Copy(oldZLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(2)));
+ }
+ else
+ {
+ oldZLocal = null;
+ }
+
+ PrepareForVertexReturn();
+ }
+
+ public void PrepareForReturn()
+ {
+ if (IsNonMain)
+ {
+ return;
+ }
+
+ if (Config.LastInVertexPipeline &&
+ (Config.Stage == ShaderStage.Vertex || Config.Stage == ShaderStage.TessellationEvaluation) &&
+ (Config.Options.Flags & TranslationFlags.VertexA) == 0)
+ {
+ PrepareForVertexReturn();
+ }
+ else if (Config.Stage == ShaderStage.Geometry)
+ {
+ void WritePositionOutput(int primIndex)
+ {
+ Operand x = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(0));
+ Operand y = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(1));
+ Operand z = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(2));
+ Operand w = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(3));
+
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), x);
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), y);
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), z);
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(3), w);
+ }
+
+ void WriteUserDefinedOutput(int index, int primIndex)
+ {
+ Operand x = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(0));
+ Operand y = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(1));
+ Operand z = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(2));
+ Operand w = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(3));
+
+ this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(0), x);
+ this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(1), y);
+ this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(2), z);
+ this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(3), w);
+ }
+
+ if (Config.GpPassthrough && !Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
+ {
+ int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices();
+
+ for (int primIndex = 0; primIndex < inputVertices; primIndex++)
+ {
+ WritePositionOutput(primIndex);
+
+ int passthroughAttributes = Config.PassthroughAttributes;
+ while (passthroughAttributes != 0)
+ {
+ int index = BitOperations.TrailingZeroCount(passthroughAttributes);
+ WriteUserDefinedOutput(index, primIndex);
+ Config.SetOutputUserAttribute(index);
+ passthroughAttributes &= ~(1 << index);
+ }
+
+ this.EmitVertex();
+ }
+
+ this.EndPrimitive();
+ }
+ }
+ else if (Config.Stage == ShaderStage.Fragment)
+ {
+ GenerateAlphaToCoverageDitherDiscard();
+
+ bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat();
+
+ if (Config.OmapDepth)
+ {
+ Operand src = Register(Config.GetDepthRegister(), RegisterType.Gpr);
+
+ this.Store(StorageKind.Output, IoVariable.FragmentOutputDepth, null, src);
+ }
+
+ AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare();
+
+ if (alphaTestOp != AlphaTestOp.Always && (Config.OmapTargets & 8) != 0)
+ {
+ if (alphaTestOp == AlphaTestOp.Never)
+ {
+ this.Discard();
+ }
+ else
+ {
+ Instruction comparator = alphaTestOp switch
+ {
+ AlphaTestOp.Equal => Instruction.CompareEqual,
+ AlphaTestOp.Greater => Instruction.CompareGreater,
+ AlphaTestOp.GreaterOrEqual => Instruction.CompareGreaterOrEqual,
+ AlphaTestOp.Less => Instruction.CompareLess,
+ AlphaTestOp.LessOrEqual => Instruction.CompareLessOrEqual,
+ AlphaTestOp.NotEqual => Instruction.CompareNotEqual,
+ _ => 0
+ };
+
+ Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\".");
+
+ Operand alpha = Register(3, RegisterType.Gpr);
+ Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference());
+ Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef);
+ Operand alphaPassLabel = Label();
+
+ this.BranchIfTrue(alphaPassLabel, alphaPass);
+ this.Discard();
+ this.MarkLabel(alphaPassLabel);
+ }
+ }
+
+ int regIndexBase = 0;
+
+ for (int rtIndex = 0; rtIndex < 8; rtIndex++)
+ {
+ for (int component = 0; component < 4; component++)
+ {
+ bool componentEnabled = (Config.OmapTargets & (1 << (rtIndex * 4 + component))) != 0;
+ if (!componentEnabled)
+ {
+ continue;
+ }
+
+ Operand src = Register(regIndexBase + component, RegisterType.Gpr);
+
+ // Perform B <-> R swap if needed, for BGRA formats (not supported on OpenGL).
+ if (!supportsBgra && (component == 0 || component == 2))
+ {
+ Operand isBgra = this.Load(StorageKind.Input, IoVariable.FragmentOutputIsBgra, null, Const(rtIndex));
+
+ Operand lblIsBgra = Label();
+ Operand lblEnd = Label();
+
+ this.BranchIfTrue(lblIsBgra, isBgra);
+
+ this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src);
+ this.Branch(lblEnd);
+
+ MarkLabel(lblIsBgra);
+
+ this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(2 - component), src);
+
+ MarkLabel(lblEnd);
+ }
+ else
+ {
+ this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src);
+ }
+ }
+
+ bool targetEnabled = (Config.OmapTargets & (0xf << (rtIndex * 4))) != 0;
+ if (targetEnabled)
+ {
+ Config.SetOutputUserAttribute(rtIndex);
+ regIndexBase += 4;
+ }
+ }
+ }
+ }
+
+ private void GenerateAlphaToCoverageDitherDiscard()
+ {
+ // If the feature is disabled, or alpha is not written, then we're done.
+ if (!Config.GpuAccessor.QueryAlphaToCoverageDitherEnable() || (Config.OmapTargets & 8) == 0)
+ {
+ return;
+ }
+
+ // 11 11 11 10 10 10 10 00
+ // 11 01 01 01 01 00 00 00
+ Operand ditherMask = Const(unchecked((int)0xfbb99110u));
+
+ Operand fragCoordX = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(0));
+ Operand fragCoordY = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(1));
+
+ Operand x = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordX), Const(1));
+ Operand y = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordY), Const(1));
+ Operand xy = this.BitwiseOr(x, this.ShiftLeft(y, Const(1)));
+
+ Operand alpha = Register(3, RegisterType.Gpr);
+ Operand scaledAlpha = this.FPMultiply(this.FPSaturate(alpha), ConstF(8));
+ Operand quantizedAlpha = this.IMinimumU32(this.FP32ConvertToU32(scaledAlpha), Const(7));
+ Operand shift = this.BitwiseOr(this.ShiftLeft(quantizedAlpha, Const(2)), xy);
+ Operand opaque = this.BitwiseAnd(this.ShiftRightU32(ditherMask, shift), Const(1));
+
+ Operand a2cDitherEndLabel = Label();
+
+ this.BranchIfTrue(a2cDitherEndLabel, opaque);
+ this.Discard();
+ this.MarkLabel(a2cDitherEndLabel);
+ }
+
+ public Operation[] GetOperations()
+ {
+ return _operations.ToArray();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
new file mode 100644
index 00000000..93748249
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
@@ -0,0 +1,819 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class EmitterContextInsts
+ {
+ public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicAdd, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicAnd, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c, Operand d)
+ {
+ return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), a, b, c, d);
+ }
+
+ public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicMinS32, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicMinU32, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicOr, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicSwap, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand Ballot(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.Ballot, Local(), a);
+ }
+
+ public static Operand Barrier(this EmitterContext context)
+ {
+ return context.Add(Instruction.Barrier);
+ }
+
+ public static Operand BitCount(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.BitCount, Local(), a);
+ }
+
+ public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c);
+ }
+
+ public static Operand BitfieldExtractU32(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.BitfieldExtractU32, Local(), a, b, c);
+ }
+
+ public static Operand BitfieldInsert(this EmitterContext context, Operand a, Operand b, Operand c, Operand d)
+ {
+ return context.Add(Instruction.BitfieldInsert, Local(), a, b, c, d);
+ }
+
+ public static Operand BitfieldReverse(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.BitfieldReverse, Local(), a);
+ }
+
+ public static Operand BitwiseAnd(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.BitwiseAnd, Local(), a, b);
+ }
+
+ public static Operand BitwiseExclusiveOr(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.BitwiseExclusiveOr, Local(), a, b);
+ }
+
+ public static Operand BitwiseNot(this EmitterContext context, Operand a, bool invert)
+ {
+ if (invert)
+ {
+ a = context.BitwiseNot(a);
+ }
+
+ return a;
+ }
+
+ public static Operand BitwiseNot(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.BitwiseNot, Local(), a);
+ }
+
+ public static Operand BitwiseOr(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.BitwiseOr, Local(), a, b);
+ }
+
+ public static Operand Branch(this EmitterContext context, Operand d)
+ {
+ return context.Add(Instruction.Branch, d);
+ }
+
+ public static Operand BranchIfFalse(this EmitterContext context, Operand d, Operand a)
+ {
+ return context.Add(Instruction.BranchIfFalse, d, a);
+ }
+
+ public static Operand BranchIfTrue(this EmitterContext context, Operand d, Operand a)
+ {
+ return context.Add(Instruction.BranchIfTrue, d, a);
+ }
+
+ public static Operand Call(this EmitterContext context, int funcId, bool returns, params Operand[] args)
+ {
+ Operand[] args2 = new Operand[args.Length + 1];
+
+ args2[0] = Const(funcId);
+ args.CopyTo(args2, 1);
+
+ return context.Add(Instruction.Call, returns ? Local() : null, args2);
+ }
+
+ public static Operand ConditionalSelect(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ConditionalSelect, Local(), a, b, c);
+ }
+
+ public static Operand Copy(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.Copy, Local(), a);
+ }
+
+ public static void Copy(this EmitterContext context, Operand d, Operand a)
+ {
+ if (d.Type == OperandType.Constant)
+ {
+ return;
+ }
+
+ context.Add(Instruction.Copy, d, a);
+ }
+
+ public static Operand Discard(this EmitterContext context)
+ {
+ return context.Add(Instruction.Discard);
+ }
+
+ public static Operand EmitVertex(this EmitterContext context)
+ {
+ return context.Add(Instruction.EmitVertex);
+ }
+
+ public static Operand EndPrimitive(this EmitterContext context)
+ {
+ return context.Add(Instruction.EndPrimitive);
+ }
+
+ public static Operand FindLSB(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FindLSB, Local(), a);
+ }
+
+ public static Operand FindMSBS32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FindMSBS32, Local(), a);
+ }
+
+ public static Operand FindMSBU32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FindMSBU32, Local(), a);
+ }
+
+ public static Operand FP32ConvertToFP64(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP32ToFP64, Local(), a);
+ }
+
+ public static Operand FP64ConvertToFP32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP64ToFP32, Local(), a);
+ }
+
+ public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg, Instruction fpType = Instruction.FP32)
+ {
+ return context.FPNegate(context.FPAbsolute(a, abs, fpType), neg, fpType);
+ }
+
+ public static Operand FPAbsolute(this EmitterContext context, Operand a, bool abs, Instruction fpType = Instruction.FP32)
+ {
+ if (abs)
+ {
+ a = context.FPAbsolute(a, fpType);
+ }
+
+ return a;
+ }
+
+ public static Operand FPAbsolute(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Absolute, Local(), a);
+ }
+
+ public static Operand FPAdd(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Add, Local(), a, b);
+ }
+
+ public static Operand FPCeiling(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Ceiling, Local(), a);
+ }
+
+ public static Operand FPCompareEqual(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.CompareEqual, Local(), a, b);
+ }
+
+ public static Operand FPCompareLess(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.CompareLess, Local(), a, b);
+ }
+
+ public static Operand FP32ConvertToS32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP32ToS32, Local(), a);
+ }
+
+ public static Operand FP32ConvertToU32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP32ToU32, Local(), a);
+ }
+
+ public static Operand FP64ConvertToS32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP64ToS32, Local(), a);
+ }
+
+ public static Operand FP64ConvertToU32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP64ToU32, Local(), a);
+ }
+
+ public static Operand FPCosine(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.Cosine, Local(), a);
+ }
+
+ public static Operand FPDivide(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Divide, Local(), a, b);
+ }
+
+ public static Operand FPExponentB2(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.ExponentB2, Local(), a);
+ }
+
+ public static Operand FPFloor(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Floor, Local(), a);
+ }
+
+ public static Operand FPFusedMultiplyAdd(this EmitterContext context, Operand a, Operand b, Operand c, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.FusedMultiplyAdd, Local(), a, b, c);
+ }
+
+ public static Operand FPLogarithmB2(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.LogarithmB2, Local(), a);
+ }
+
+ public static Operand FPMaximum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Maximum, Local(), a, b);
+ }
+
+ public static Operand FPMinimum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Minimum, Local(), a, b);
+ }
+
+ public static Operand FPMultiply(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Multiply, Local(), a, b);
+ }
+
+ public static Operand FPNegate(this EmitterContext context, Operand a, bool neg, Instruction fpType = Instruction.FP32)
+ {
+ if (neg)
+ {
+ a = context.FPNegate(a, fpType);
+ }
+
+ return a;
+ }
+
+ public static Operand FPNegate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Negate, Local(), a);
+ }
+
+ public static Operand FPReciprocal(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.FPDivide(fpType == Instruction.FP64 ? context.PackDouble2x32(1.0) : ConstF(1), a, fpType);
+ }
+
+ public static Operand FPReciprocalSquareRoot(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.ReciprocalSquareRoot, Local(), a);
+ }
+
+ public static Operand FPRound(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Round, Local(), a);
+ }
+
+ public static Operand FPSaturate(this EmitterContext context, Operand a, bool sat, Instruction fpType = Instruction.FP32)
+ {
+ if (sat)
+ {
+ a = context.FPSaturate(a, fpType);
+ }
+
+ return a;
+ }
+
+ public static Operand FPSaturate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return fpType == Instruction.FP64
+ ? context.Add(fpType | Instruction.Clamp, Local(), a, context.PackDouble2x32(0.0), context.PackDouble2x32(1.0))
+ : context.Add(fpType | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1));
+ }
+
+ public static Operand FPSine(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.Sine, Local(), a);
+ }
+
+ public static Operand FPSquareRoot(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.SquareRoot, Local(), a);
+ }
+
+ public static Operand FPTruncate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Truncate, Local(), a);
+ }
+
+ public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask)
+ {
+ return context.Add(Instruction.SwizzleAdd, Local(), a, b, Const(mask));
+ }
+
+ public static void FSIBegin(this EmitterContext context)
+ {
+ context.Add(Instruction.FSIBegin);
+ }
+
+ public static void FSIEnd(this EmitterContext context)
+ {
+ context.Add(Instruction.FSIEnd);
+ }
+
+ public static Operand GroupMemoryBarrier(this EmitterContext context)
+ {
+ return context.Add(Instruction.GroupMemoryBarrier);
+ }
+
+ public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg)
+ {
+ return context.INegate(context.IAbsolute(a, abs), neg);
+ }
+
+ public static Operand IAbsolute(this EmitterContext context, Operand a, bool abs)
+ {
+ if (abs)
+ {
+ a = context.IAbsolute(a);
+ }
+
+ return a;
+ }
+
+ public static Operand IAbsolute(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.Absolute, Local(), a);
+ }
+
+ public static Operand IAdd(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Add, Local(), a, b);
+ }
+
+ public static Operand IClampS32(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.Clamp, Local(), a, b, c);
+ }
+
+ public static Operand IClampU32(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ClampU32, Local(), a, b, c);
+ }
+
+ public static Operand ICompareEqual(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareEqual, Local(), a, b);
+ }
+
+ public static Operand ICompareGreater(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareGreater, Local(), a, b);
+ }
+
+ public static Operand ICompareGreaterOrEqual(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareGreaterOrEqual, Local(), a, b);
+ }
+
+ public static Operand ICompareGreaterOrEqualUnsigned(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareGreaterOrEqualU32, Local(), a, b);
+ }
+
+ public static Operand ICompareGreaterUnsigned(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareGreaterU32, Local(), a, b);
+ }
+
+ public static Operand ICompareLess(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareLess, Local(), a, b);
+ }
+
+ public static Operand ICompareLessOrEqual(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareLessOrEqual, Local(), a, b);
+ }
+
+ public static Operand ICompareLessOrEqualUnsigned(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareLessOrEqualU32, Local(), a, b);
+ }
+
+ public static Operand ICompareLessUnsigned(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareLessU32, Local(), a, b);
+ }
+
+ public static Operand ICompareNotEqual(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareNotEqual, Local(), a, b);
+ }
+
+ public static Operand IConvertS32ToFP32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertS32ToFP32, Local(), a);
+ }
+
+ public static Operand IConvertS32ToFP64(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertS32ToFP64, Local(), a);
+ }
+
+ public static Operand IConvertU32ToFP32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertU32ToFP32, Local(), a);
+ }
+
+ public static Operand IConvertU32ToFP64(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertU32ToFP64, Local(), a);
+ }
+
+ public static Operand IMaximumS32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Maximum, Local(), a, b);
+ }
+
+ public static Operand IMaximumU32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.MaximumU32, Local(), a, b);
+ }
+
+ public static Operand IMinimumS32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Minimum, Local(), a, b);
+ }
+
+ public static Operand IMinimumU32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.MinimumU32, Local(), a, b);
+ }
+
+ public static Operand IMultiply(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Multiply, Local(), a, b);
+ }
+
+ public static Operand INegate(this EmitterContext context, Operand a, bool neg)
+ {
+ if (neg)
+ {
+ a = context.INegate(a);
+ }
+
+ return a;
+ }
+
+ public static Operand INegate(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.Negate, Local(), a);
+ }
+
+ public static Operand ISubtract(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Subtract, Local(), a, b);
+ }
+
+ public static Operand IsNan(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.IsNan, Local(), a);
+ }
+
+ public static Operand Load(this EmitterContext context, StorageKind storageKind, IoVariable ioVariable, Operand primVertex = null)
+ {
+ return primVertex != null
+ ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex)
+ : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable));
+ }
+
+ public static Operand Load(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand primVertex,
+ Operand elemIndex)
+ {
+ return primVertex != null
+ ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, elemIndex)
+ : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), elemIndex);
+ }
+
+ public static Operand Load(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand primVertex,
+ Operand arrayIndex,
+ Operand elemIndex)
+ {
+ return primVertex != null
+ ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, arrayIndex, elemIndex)
+ : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), arrayIndex, elemIndex);
+ }
+
+ public static Operand LoadConstant(this EmitterContext context, Operand a, Operand b)
+ {
+ if (a.Type == OperandType.Constant)
+ {
+ context.Config.SetUsedConstantBuffer(a.Value);
+ }
+ else
+ {
+ context.Config.SetUsedFeature(FeatureFlags.CbIndexing);
+ }
+
+ return context.Add(Instruction.LoadConstant, Local(), a, b);
+ }
+
+ public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.LoadGlobal, Local(), a, b);
+ }
+
+ public static Operand LoadLocal(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.LoadLocal, Local(), a);
+ }
+
+ public static Operand LoadShared(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.LoadShared, Local(), a);
+ }
+
+ public static Operand MemoryBarrier(this EmitterContext context)
+ {
+ return context.Add(Instruction.MemoryBarrier);
+ }
+
+ public static Operand MultiplyHighS32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.MultiplyHighS32, Local(), a, b);
+ }
+
+ public static Operand MultiplyHighU32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.MultiplyHighU32, Local(), a, b);
+ }
+
+ public static Operand PackDouble2x32(this EmitterContext context, double value)
+ {
+ long valueAsLong = BitConverter.DoubleToInt64Bits(value);
+
+ return context.Add(Instruction.PackDouble2x32, Local(), Const((int)valueAsLong), Const((int)(valueAsLong >> 32)));
+ }
+
+ public static Operand PackDouble2x32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.PackDouble2x32, Local(), a, b);
+ }
+
+ public static Operand PackHalf2x16(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.PackHalf2x16, Local(), a, b);
+ }
+
+ public static void Return(this EmitterContext context)
+ {
+ context.PrepareForReturn();
+ context.Add(Instruction.Return);
+ }
+
+ public static void Return(this EmitterContext context, Operand returnValue)
+ {
+ context.PrepareForReturn();
+ context.Add(Instruction.Return, null, returnValue);
+ }
+
+ public static Operand ShiftLeft(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.ShiftLeft, Local(), a, b);
+ }
+
+ public static Operand ShiftRightS32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.ShiftRightS32, Local(), a, b);
+ }
+
+ public static Operand ShiftRightU32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.ShiftRightU32, Local(), a, b);
+ }
+
+ public static (Operand, Operand) Shuffle(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.Shuffle, (Local(), Local()), a, b, c);
+ }
+
+ public static (Operand, Operand) ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleDown, (Local(), Local()), a, b, c);
+ }
+
+ public static (Operand, Operand) ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleUp, (Local(), Local()), a, b, c);
+ }
+
+ public static (Operand, Operand) ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c);
+ }
+
+ public static Operand Store(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand invocationId,
+ Operand value)
+ {
+ return invocationId != null
+ ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, value)
+ : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), value);
+ }
+
+ public static Operand Store(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand invocationId,
+ Operand elemIndex,
+ Operand value)
+ {
+ return invocationId != null
+ ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, elemIndex, value)
+ : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), elemIndex, value);
+ }
+
+ public static Operand Store(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand invocationId,
+ Operand arrayIndex,
+ Operand elemIndex,
+ Operand value)
+ {
+ return invocationId != null
+ ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, arrayIndex, elemIndex, value)
+ : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value);
+ }
+
+ public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.StoreGlobal, null, a, b, c);
+ }
+
+ public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.StoreGlobal16, null, a, b, c);
+ }
+
+ public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.StoreGlobal8, null, a, b, c);
+ }
+
+ public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.StoreLocal, null, a, b);
+ }
+
+ public static Operand StoreShared(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.StoreShared, null, a, b);
+ }
+
+ public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.StoreShared16, null, a, b);
+ }
+
+ public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.StoreShared8, null, a, b);
+ }
+
+ public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a)
+ {
+ return UnpackDouble2x32(context, a, 1);
+ }
+
+ public static Operand UnpackDouble2x32Low(this EmitterContext context, Operand a)
+ {
+ return UnpackDouble2x32(context, a, 0);
+ }
+
+ private static Operand UnpackDouble2x32(this EmitterContext context, Operand a, int index)
+ {
+ Operand dest = Local();
+
+ context.Add(new Operation(Instruction.UnpackDouble2x32, index, dest, a));
+
+ return dest;
+ }
+
+ public static Operand UnpackHalf2x16High(this EmitterContext context, Operand a)
+ {
+ return UnpackHalf2x16(context, a, 1);
+ }
+
+ public static Operand UnpackHalf2x16Low(this EmitterContext context, Operand a)
+ {
+ return UnpackHalf2x16(context, a, 0);
+ }
+
+ private static Operand UnpackHalf2x16(this EmitterContext context, Operand a, int index)
+ {
+ Operand dest = Local();
+
+ context.Add(new Operation(Instruction.UnpackHalf2x16, index, dest, a));
+
+ return dest;
+ }
+
+ public static Operand VoteAll(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.VoteAll, Local(), a);
+ }
+
+ public static Operand VoteAllEqual(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.VoteAllEqual, Local(), a);
+ }
+
+ public static Operand VoteAny(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.VoteAny, Local(), a);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs
new file mode 100644
index 00000000..c035f212
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs
@@ -0,0 +1,27 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ /// <summary>
+ /// Features used by the shader that are important for the code generator to know in advance.
+ /// These typically change the declarations in the shader header.
+ /// </summary>
+ [Flags]
+ public enum FeatureFlags
+ {
+ None = 0,
+
+ // Affected by resolution scaling.
+ IntegerSampling = 1 << 0,
+ FragCoordXY = 1 << 1,
+
+ Bindless = 1 << 2,
+ InstanceId = 1 << 3,
+ DrawParameters = 1 << 4,
+ RtLayer = 1 << 5,
+ CbIndexing = 1 << 6,
+ IaIndexing = 1 << 7,
+ OaIndexing = 1 << 8,
+ FixedFuncAttr = 1 << 9
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs b/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs
new file mode 100644
index 00000000..073e120a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs
@@ -0,0 +1,866 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class FunctionMatch
+ {
+ private static IPatternTreeNode[] _fsiGetAddressTree = PatternTrees.GetFsiGetAddress();
+ private static IPatternTreeNode[] _fsiGetAddressV2Tree = PatternTrees.GetFsiGetAddressV2();
+ private static IPatternTreeNode[] _fsiIsLastWarpThreadPatternTree = PatternTrees.GetFsiIsLastWarpThread();
+ private static IPatternTreeNode[] _fsiBeginPatternTree = PatternTrees.GetFsiBeginPattern();
+ private static IPatternTreeNode[] _fsiEndPatternTree = PatternTrees.GetFsiEndPattern();
+
+ public static void RunPass(DecodedProgram program)
+ {
+ byte[] externalRegs = new byte[4];
+ bool hasGetAddress = false;
+
+ foreach (DecodedFunction function in program)
+ {
+ if (function == program.MainFunction)
+ {
+ continue;
+ }
+
+ int externalReg4 = 0;
+
+ TreeNode[] functionTree = BuildTree(function.Blocks);
+
+ if (Matches(_fsiGetAddressTree, functionTree))
+ {
+ externalRegs[1] = functionTree[0].GetRd();
+ externalRegs[2] = functionTree[2].GetRd();
+ externalRegs[3] = functionTree[1].GetRd();
+ externalReg4 = functionTree[3].GetRd();
+ }
+ else if (Matches(_fsiGetAddressV2Tree, functionTree))
+ {
+ externalRegs[1] = functionTree[2].GetRd();
+ externalRegs[2] = functionTree[1].GetRd();
+ externalRegs[3] = functionTree[0].GetRd();
+ externalReg4 = functionTree[3].GetRd();
+ }
+
+ // Ensure the register allocation is valid.
+ // If so, then we have a match.
+ if (externalRegs[1] != externalRegs[2] &&
+ externalRegs[2] != externalRegs[3] &&
+ externalRegs[1] != externalRegs[3] &&
+ externalRegs[1] + 1 != externalRegs[2] &&
+ externalRegs[1] + 1 != externalRegs[3] &&
+ externalRegs[1] + 1 == externalReg4 &&
+ externalRegs[2] != RegisterConsts.RegisterZeroIndex &&
+ externalRegs[3] != RegisterConsts.RegisterZeroIndex &&
+ externalReg4 != RegisterConsts.RegisterZeroIndex)
+ {
+ hasGetAddress = true;
+ function.Type = FunctionType.Unused;
+ break;
+ }
+ }
+
+ foreach (DecodedFunction function in program)
+ {
+ if (function.IsCompilerGenerated || function == program.MainFunction)
+ {
+ continue;
+ }
+
+ if (hasGetAddress)
+ {
+ TreeNode[] functionTree = BuildTree(function.Blocks);
+
+ if (MatchesFsi(_fsiBeginPatternTree, program, function, functionTree, externalRegs))
+ {
+ function.Type = FunctionType.BuiltInFSIBegin;
+ continue;
+ }
+ else if (MatchesFsi(_fsiEndPatternTree, program, function, functionTree, externalRegs))
+ {
+ function.Type = FunctionType.BuiltInFSIEnd;
+ continue;
+ }
+ }
+ }
+ }
+
+ private readonly struct TreeNodeUse
+ {
+ public TreeNode Node { get; }
+ public int Index { get; }
+ public bool Inverted { get; }
+
+ private TreeNodeUse(int index, bool inverted, TreeNode node)
+ {
+ Index = index;
+ Inverted = inverted;
+ Node = node;
+ }
+
+ public TreeNodeUse(int index, TreeNode node) : this(index, false, node)
+ {
+ }
+
+ public TreeNodeUse Flip()
+ {
+ return new TreeNodeUse(Index, !Inverted, Node);
+ }
+ }
+
+ private enum TreeNodeType : byte
+ {
+ Op,
+ Label
+ }
+
+ private class TreeNode
+ {
+ public readonly InstOp Op;
+ public readonly List<TreeNodeUse> Uses;
+ public TreeNodeType Type { get; }
+ public byte Order { get; }
+
+ public TreeNode(byte order)
+ {
+ Type = TreeNodeType.Label;
+ Order = order;
+ }
+
+ public TreeNode(InstOp op, byte order)
+ {
+ Op = op;
+ Uses = new List<TreeNodeUse>();
+ Type = TreeNodeType.Op;
+ Order = order;
+ }
+
+ public byte GetPd()
+ {
+ return (byte)((Op.RawOpCode >> 3) & 7);
+ }
+
+ public byte GetRd()
+ {
+ return (byte)Op.RawOpCode;
+ }
+ }
+
+ private static TreeNode[] BuildTree(Block[] blocks)
+ {
+ List<TreeNode> nodes = new List<TreeNode>();
+
+ Dictionary<ulong, TreeNode> labels = new Dictionary<ulong, TreeNode>();
+
+ TreeNodeUse[] predDefs = new TreeNodeUse[RegisterConsts.PredsCount];
+ TreeNodeUse[] gprDefs = new TreeNodeUse[RegisterConsts.GprsCount];
+
+ void DefPred(byte predIndex, int index, TreeNode node)
+ {
+ if (predIndex != RegisterConsts.PredicateTrueIndex)
+ {
+ predDefs[predIndex] = new TreeNodeUse(index, node);
+ }
+ }
+
+ void DefGpr(byte regIndex, int index, TreeNode node)
+ {
+ if (regIndex != RegisterConsts.RegisterZeroIndex)
+ {
+ gprDefs[regIndex] = new TreeNodeUse(index, node);
+ }
+ }
+
+ TreeNodeUse UsePred(byte predIndex, bool predInv)
+ {
+ if (predIndex != RegisterConsts.PredicateTrueIndex)
+ {
+ TreeNodeUse use = predDefs[predIndex];
+
+ if (use.Node != null)
+ {
+ nodes.Remove(use.Node);
+ }
+ else
+ {
+ use = new TreeNodeUse(-(predIndex + 2), null);
+ }
+
+ return predInv ? use.Flip() : use;
+ }
+
+ return new TreeNodeUse(-1, null);
+ }
+
+ TreeNodeUse UseGpr(byte regIndex)
+ {
+ if (regIndex != RegisterConsts.RegisterZeroIndex)
+ {
+ TreeNodeUse use = gprDefs[regIndex];
+
+ if (use.Node != null)
+ {
+ nodes.Remove(use.Node);
+ }
+ else
+ {
+ use = new TreeNodeUse(-(regIndex + 2), null);
+ }
+
+ return use;
+ }
+
+ return new TreeNodeUse(-1, null);
+ }
+
+ byte order = 0;
+
+ for (int index = 0; index < blocks.Length; index++)
+ {
+ Block block = blocks[index];
+
+ if (block.Predecessors.Count > 1)
+ {
+ TreeNode label = new TreeNode(order++);
+ nodes.Add(label);
+ labels.Add(block.Address, label);
+ }
+
+ for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
+ {
+ InstOp op = block.OpCodes[opIndex];
+
+ TreeNode node = new TreeNode(op, IsOrderDependant(op.Name) ? order : (byte)0);
+
+ // Add uses.
+
+ if (!op.Props.HasFlag(InstProps.NoPred))
+ {
+ byte predIndex = (byte)((op.RawOpCode >> 16) & 7);
+ bool predInv = (op.RawOpCode & 0x80000) != 0;
+ node.Uses.Add(UsePred(predIndex, predInv));
+ }
+
+ if (op.Props.HasFlag(InstProps.Ps))
+ {
+ byte predIndex = (byte)((op.RawOpCode >> 39) & 7);
+ bool predInv = (op.RawOpCode & 0x40000000000) != 0;
+ node.Uses.Add(UsePred(predIndex, predInv));
+ }
+
+ if (op.Props.HasFlag(InstProps.Ra))
+ {
+ byte ra = (byte)(op.RawOpCode >> 8);
+ node.Uses.Add(UseGpr(ra));
+ }
+
+ if ((op.Props & (InstProps.Rb | InstProps.Rb2)) != 0)
+ {
+ byte rb = op.Props.HasFlag(InstProps.Rb2) ? (byte)op.RawOpCode : (byte)(op.RawOpCode >> 20);
+ node.Uses.Add(UseGpr(rb));
+ }
+
+ if (op.Props.HasFlag(InstProps.Rc))
+ {
+ byte rc = (byte)(op.RawOpCode >> 39);
+ node.Uses.Add(UseGpr(rc));
+ }
+
+ if (op.Name == InstName.Bra && labels.TryGetValue(op.GetAbsoluteAddress(), out TreeNode label))
+ {
+ node.Uses.Add(new TreeNodeUse(0, label));
+ }
+
+ // Make definitions.
+
+ int defIndex = 0;
+
+ InstProps pdType = op.Props & InstProps.PdMask;
+
+ if (pdType != 0)
+ {
+ int bit = pdType switch
+ {
+ InstProps.Pd => 3,
+ InstProps.LPd => 48,
+ InstProps.SPd => 30,
+ InstProps.TPd => 51,
+ InstProps.VPd => 45,
+ _ => throw new InvalidOperationException($"Table has unknown predicate destination {pdType}.")
+ };
+
+ byte predIndex = (byte)((op.RawOpCode >> bit) & 7);
+ DefPred(predIndex, defIndex++, node);
+ }
+
+ if (op.Props.HasFlag(InstProps.Rd))
+ {
+ byte rd = (byte)op.RawOpCode;
+ DefGpr(rd, defIndex++, node);
+ }
+
+ nodes.Add(node);
+ }
+ }
+
+ return nodes.ToArray();
+ }
+
+ private static bool IsOrderDependant(InstName name)
+ {
+ switch (name)
+ {
+ case InstName.Atom:
+ case InstName.AtomCas:
+ case InstName.Atoms:
+ case InstName.AtomsCas:
+ case InstName.Ld:
+ case InstName.Ldg:
+ case InstName.Ldl:
+ case InstName.Lds:
+ case InstName.Suatom:
+ case InstName.SuatomB:
+ case InstName.SuatomB2:
+ case InstName.SuatomCas:
+ case InstName.SuatomCasB:
+ case InstName.Suld:
+ case InstName.SuldB:
+ case InstName.SuldD:
+ case InstName.SuldDB:
+ return true;
+ }
+
+ return false;
+ }
+
+ private interface IPatternTreeNode
+ {
+ List<PatternTreeNodeUse> Uses { get; }
+ InstName Name { get; }
+ TreeNodeType Type { get; }
+ byte Order { get; }
+ bool IsImm { get; }
+ bool Matches(in InstOp opInfo);
+ }
+
+ private readonly struct PatternTreeNodeUse
+ {
+ public IPatternTreeNode Node { get; }
+ public int Index { get; }
+ public bool Inverted { get; }
+ public PatternTreeNodeUse Inv => new PatternTreeNodeUse(Index, !Inverted, Node);
+
+ private PatternTreeNodeUse(int index, bool inverted, IPatternTreeNode node)
+ {
+ Index = index;
+ Inverted = inverted;
+ Node = node;
+ }
+
+ public PatternTreeNodeUse(int index, IPatternTreeNode node) : this(index, false, node)
+ {
+ }
+ }
+
+ private class PatternTreeNode<T> : IPatternTreeNode
+ {
+ public List<PatternTreeNodeUse> Uses { get; }
+ private readonly Func<T, bool> _match;
+
+ public InstName Name { get; }
+ public TreeNodeType Type { get; }
+ public byte Order { get; }
+ public bool IsImm { get; }
+ public PatternTreeNodeUse Out => new PatternTreeNodeUse(0, this);
+
+ public PatternTreeNode(InstName name, Func<T, bool> match, TreeNodeType type = TreeNodeType.Op, byte order = 0, bool isImm = false)
+ {
+ Name = name;
+ _match = match;
+ Type = type;
+ Order = order;
+ IsImm = isImm;
+ Uses = new List<PatternTreeNodeUse>();
+ }
+
+ public PatternTreeNode<T> Use(PatternTreeNodeUse use)
+ {
+ Uses.Add(use);
+ return this;
+ }
+
+ public PatternTreeNodeUse OutAt(int index)
+ {
+ return new PatternTreeNodeUse(index, this);
+ }
+
+ public bool Matches(in InstOp opInfo)
+ {
+ if (opInfo.Name != Name)
+ {
+ return false;
+ }
+
+ ulong rawOp = opInfo.RawOpCode;
+ T op = Unsafe.As<ulong, T>(ref rawOp);
+
+ if (!_match(op))
+ {
+ return false;
+ }
+
+ return true;
+ }
+ }
+
+ private static bool MatchesFsi(
+ IPatternTreeNode[] pattern,
+ DecodedProgram program,
+ DecodedFunction function,
+ TreeNode[] functionTree,
+ byte[] externalRegs)
+ {
+ if (function.Blocks.Length == 0)
+ {
+ return false;
+ }
+
+ InstOp callOp = function.Blocks[0].GetLastOp();
+
+ if (callOp.Name != InstName.Cal)
+ {
+ return false;
+ }
+
+ DecodedFunction callTarget = program.GetFunctionByAddress(callOp.GetAbsoluteAddress());
+ TreeNode[] callTargetTree = null;
+
+ if (callTarget == null || !Matches(_fsiIsLastWarpThreadPatternTree, callTargetTree = BuildTree(callTarget.Blocks)))
+ {
+ return false;
+ }
+
+ externalRegs[0] = callTargetTree[0].GetPd();
+
+ if (Matches(pattern, functionTree, externalRegs))
+ {
+ callTarget.RemoveCaller(function);
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool Matches(IPatternTreeNode[] pTree, TreeNode[] cTree, byte[] externalRegs = null)
+ {
+ if (pTree.Length != cTree.Length)
+ {
+ return false;
+ }
+
+ for (int index = 0; index < pTree.Length; index++)
+ {
+ if (!Matches(pTree[index], cTree[index], externalRegs))
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static bool Matches(IPatternTreeNode pTreeNode, TreeNode cTreeNode, byte[] externalRegs)
+ {
+ if (!pTreeNode.Matches(in cTreeNode.Op) ||
+ pTreeNode.Type != cTreeNode.Type ||
+ pTreeNode.Order != cTreeNode.Order ||
+ pTreeNode.IsImm != cTreeNode.Op.Props.HasFlag(InstProps.Ib))
+ {
+ return false;
+ }
+
+ if (pTreeNode.Type == TreeNodeType.Op)
+ {
+ if (pTreeNode.Uses.Count != cTreeNode.Uses.Count)
+ {
+ return false;
+ }
+
+ for (int index = 0; index < pTreeNode.Uses.Count; index++)
+ {
+ var pUse = pTreeNode.Uses[index];
+ var cUse = cTreeNode.Uses[index];
+
+ if (pUse.Index <= -2)
+ {
+ if (externalRegs[-pUse.Index - 2] != (-cUse.Index - 2))
+ {
+ return false;
+ }
+ }
+ else if (pUse.Index != cUse.Index)
+ {
+ return false;
+ }
+
+ if (pUse.Inverted != cUse.Inverted || (pUse.Node == null) != (cUse.Node == null))
+ {
+ return false;
+ }
+
+ if (pUse.Node != null && !Matches(pUse.Node, cUse.Node, externalRegs))
+ {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private static class PatternTrees
+ {
+ public static IPatternTreeNode[] GetFsiGetAddress()
+ {
+ var affinityValue = S2r(SReg.Affinity).Use(PT).Out;
+ var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out;
+
+ return new IPatternTreeNode[]
+ {
+ Iscadd(cc: true, 2, 0, 404)
+ .Use(PT)
+ .Use(Iscadd(cc: false, 8)
+ .Use(PT)
+ .Use(Lop32i(LogicOp.And, 0xff)
+ .Use(PT)
+ .Use(affinityValue).Out)
+ .Use(Lop32i(LogicOp.And, 0xff)
+ .Use(PT)
+ .Use(orderingTicketValue).Out).Out),
+ ShrU32W(16)
+ .Use(PT)
+ .Use(orderingTicketValue),
+ Iadd32i(0x200)
+ .Use(PT)
+ .Use(Lop32i(LogicOp.And, 0xfe00)
+ .Use(PT)
+ .Use(orderingTicketValue).Out),
+ Iadd(x: true, 0, 405).Use(PT).Use(RZ),
+ Ret().Use(PT)
+ };
+ }
+
+ public static IPatternTreeNode[] GetFsiGetAddressV2()
+ {
+ var affinityValue = S2r(SReg.Affinity).Use(PT).Out;
+ var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out;
+
+ return new IPatternTreeNode[]
+ {
+ ShrU32W(16)
+ .Use(PT)
+ .Use(orderingTicketValue),
+ Iadd32i(0x200)
+ .Use(PT)
+ .Use(Lop32i(LogicOp.And, 0xfe00)
+ .Use(PT)
+ .Use(orderingTicketValue).Out),
+ Iscadd(cc: true, 2, 0, 404)
+ .Use(PT)
+ .Use(Bfi(0x808)
+ .Use(PT)
+ .Use(affinityValue)
+ .Use(Lop32i(LogicOp.And, 0xff)
+ .Use(PT)
+ .Use(orderingTicketValue).Out).Out),
+ Iadd(x: true, 0, 405).Use(PT).Use(RZ),
+ Ret().Use(PT)
+ };
+ }
+
+ public static IPatternTreeNode[] GetFsiIsLastWarpThread()
+ {
+ var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out;
+ var laneIdValue = S2r(SReg.LaneId).Use(PT).Out;
+
+ return new IPatternTreeNode[]
+ {
+ IsetpU32(IComp.Eq)
+ .Use(PT)
+ .Use(PT)
+ .Use(FloU32()
+ .Use(PT)
+ .Use(Vote(VoteMode.Any)
+ .Use(PT)
+ .Use(IsetpU32(IComp.Ne)
+ .Use(PT)
+ .Use(PT)
+ .Use(Lop(negB: true, LogicOp.PassB)
+ .Use(PT)
+ .Use(RZ)
+ .Use(threadKillValue).OutAt(1))
+ .Use(RZ).Out).OutAt(1)).Out)
+ .Use(laneIdValue),
+ Ret().Use(PT)
+ };
+ }
+
+ public static IPatternTreeNode[] GetFsiBeginPattern()
+ {
+ var addressLowValue = CallArg(1);
+
+ static PatternTreeNodeUse HighU16Equals(PatternTreeNodeUse x)
+ {
+ var expectedValue = CallArg(3);
+
+ return IsetpU32(IComp.Eq)
+ .Use(PT)
+ .Use(PT)
+ .Use(ShrU32W(16).Use(PT).Use(x).Out)
+ .Use(expectedValue).Out;
+ }
+
+ PatternTreeNode<byte> label;
+
+ return new IPatternTreeNode[]
+ {
+ Cal(),
+ Ret().Use(CallArg(0).Inv),
+ Ret()
+ .Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32)
+ .Use(PT)
+ .Use(addressLowValue).Out)),
+ label = Label(),
+ Bra()
+ .Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32, 1)
+ .Use(PT)
+ .Use(addressLowValue).Out).Inv)
+ .Use(label.Out),
+ Ret().Use(PT)
+ };
+ }
+
+ public static IPatternTreeNode[] GetFsiEndPattern()
+ {
+ var voteResult = Vote(VoteMode.All).Use(PT).Use(PT).OutAt(1);
+ var popcResult = Popc().Use(PT).Use(voteResult).Out;
+ var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out;
+ var laneIdValue = S2r(SReg.LaneId).Use(PT).Out;
+
+ var addressLowValue = CallArg(1);
+ var incrementValue = CallArg(2);
+
+ return new IPatternTreeNode[]
+ {
+ Cal(),
+ Ret().Use(CallArg(0).Inv),
+ Membar(Decoders.Membar.Vc).Use(PT),
+ Ret().Use(IsetpU32(IComp.Ne)
+ .Use(PT)
+ .Use(PT)
+ .Use(threadKillValue)
+ .Use(RZ).Out),
+ RedE(RedOp.Add, AtomSize.U32)
+ .Use(IsetpU32(IComp.Eq)
+ .Use(PT)
+ .Use(PT)
+ .Use(FloU32()
+ .Use(PT)
+ .Use(voteResult).Out)
+ .Use(laneIdValue).Out)
+ .Use(addressLowValue)
+ .Use(Xmad(XmadCop.Cbcc, psl: true, hiloA: true, hiloB: true)
+ .Use(PT)
+ .Use(incrementValue)
+ .Use(Xmad(XmadCop.Cfull, mrg: true, hiloB: true)
+ .Use(PT)
+ .Use(incrementValue)
+ .Use(popcResult)
+ .Use(RZ).Out)
+ .Use(Xmad(XmadCop.Cfull)
+ .Use(PT)
+ .Use(incrementValue)
+ .Use(popcResult)
+ .Use(RZ).Out).Out),
+ Ret().Use(PT)
+ };
+ }
+
+ private static PatternTreeNode<InstBfiI> Bfi(int imm)
+ {
+ return new(InstName.Bfi, (op) => !op.WriteCC && op.Imm20 == imm, isImm: true);
+ }
+
+ private static PatternTreeNode<InstBra> Bra()
+ {
+ return new(InstName.Bra, (op) => op.Ccc == Ccc.T && !op.Ca);
+ }
+
+ private static PatternTreeNode<InstCal> Cal()
+ {
+ return new(InstName.Cal, (op) => !op.Ca && op.Inc);
+ }
+
+ private static PatternTreeNode<InstFloR> FloU32()
+ {
+ return new(InstName.Flo, (op) => !op.Signed && !op.Sh && !op.NegB && !op.WriteCC);
+ }
+
+ private static PatternTreeNode<InstIaddC> Iadd(bool x, int cbufSlot, int cbufOffset)
+ {
+ return new(InstName.Iadd, (op) =>
+ !op.Sat &&
+ !op.WriteCC &&
+ op.X == x &&
+ op.AvgMode == AvgMode.NoNeg &&
+ op.CbufSlot == cbufSlot &&
+ op.CbufOffset == cbufOffset);
+ }
+
+ private static PatternTreeNode<InstIadd32i> Iadd32i(int imm)
+ {
+ return new(InstName.Iadd32i, (op) => !op.Sat && !op.WriteCC && !op.X && op.AvgMode == AvgMode.NoNeg && op.Imm32 == imm);
+ }
+
+ private static PatternTreeNode<InstIscaddR> Iscadd(bool cc, int imm)
+ {
+ return new(InstName.Iscadd, (op) => op.WriteCC == cc && op.AvgMode == AvgMode.NoNeg && op.Imm5 == imm);
+ }
+
+ private static PatternTreeNode<InstIscaddC> Iscadd(bool cc, int imm, int cbufSlot, int cbufOffset)
+ {
+ return new(InstName.Iscadd, (op) =>
+ op.WriteCC == cc &&
+ op.AvgMode == AvgMode.NoNeg &&
+ op.Imm5 == imm &&
+ op.CbufSlot == cbufSlot &&
+ op.CbufOffset == cbufOffset);
+ }
+
+ private static PatternTreeNode<InstIsetpR> IsetpU32(IComp comp)
+ {
+ return new(InstName.Isetp, (op) => !op.Signed && op.IComp == comp && op.Bop == BoolOp.And);
+ }
+
+ private static PatternTreeNode<byte> Label()
+ {
+ return new(InstName.Invalid, (op) => true, type: TreeNodeType.Label);
+ }
+
+ private static PatternTreeNode<InstLopR> Lop(bool negB, LogicOp logicOp)
+ {
+ return new(InstName.Lop, (op) => !op.NegA && op.NegB == negB && !op.WriteCC && !op.X && op.Lop == logicOp && op.PredicateOp == PredicateOp.F);
+ }
+
+ private static PatternTreeNode<InstLop32i> Lop32i(LogicOp logicOp, int imm)
+ {
+ return new(InstName.Lop32i, (op) => !op.NegA && !op.NegB && !op.X && !op.WriteCC && op.LogicOp == logicOp && op.Imm32 == imm);
+ }
+
+ private static PatternTreeNode<InstMembar> Membar(Membar membar)
+ {
+ return new(InstName.Membar, (op) => op.Membar == membar);
+ }
+
+ private static PatternTreeNode<InstPopcR> Popc()
+ {
+ return new(InstName.Popc, (op) => !op.NegB);
+ }
+
+ private static PatternTreeNode<InstRet> Ret()
+ {
+ return new(InstName.Ret, (op) => op.Ccc == Ccc.T);
+ }
+
+ private static PatternTreeNode<InstS2r> S2r(SReg reg)
+ {
+ return new(InstName.S2r, (op) => op.SReg == reg);
+ }
+
+ private static PatternTreeNode<InstShrI> ShrU32W(int imm)
+ {
+ return new(InstName.Shr, (op) => !op.Signed && !op.Brev && op.M && op.XMode == 0 && op.Imm20 == imm, isImm: true);
+ }
+
+ private static PatternTreeNode<InstLdg> LdgE(CacheOpLd cacheOp, LsSize size, byte order = 0)
+ {
+ return new(InstName.Ldg, (op) => op.E && op.CacheOp == cacheOp && op.LsSize == size, order: order);
+ }
+
+ private static PatternTreeNode<InstRed> RedE(RedOp redOp, AtomSize size, byte order = 0)
+ {
+ return new(InstName.Red, (op) => op.E && op.RedOp == redOp && op.RedSize == size, order: order);
+ }
+
+ private static PatternTreeNode<InstVote> Vote(VoteMode mode)
+ {
+ return new(InstName.Vote, (op) => op.VoteMode == mode);
+ }
+
+ private static PatternTreeNode<InstXmadR> Xmad(XmadCop cop, bool psl = false, bool mrg = false, bool hiloA = false, bool hiloB = false)
+ {
+ return new(InstName.Xmad, (op) => op.XmadCop == cop && op.Psl == psl && op.Mrg == mrg && op.HiloA == hiloA && op.HiloB == hiloB);
+ }
+
+ private static PatternTreeNodeUse PT => PTOrRZ();
+ private static PatternTreeNodeUse RZ => PTOrRZ();
+ private static PatternTreeNodeUse Undef => new PatternTreeNodeUse(0, null);
+
+ private static PatternTreeNodeUse CallArg(int index)
+ {
+ return new PatternTreeNodeUse(-(index + 2), null);
+ }
+
+ private static PatternTreeNodeUse PTOrRZ()
+ {
+ return new PatternTreeNodeUse(-1, null);
+ }
+ }
+
+ private static void PrintTreeNode(TreeNode node, string indentation)
+ {
+ Console.WriteLine($" {node.Op.Name}");
+
+ for (int i = 0; i < node.Uses.Count; i++)
+ {
+ TreeNodeUse use = node.Uses[i];
+ bool last = i == node.Uses.Count - 1;
+ char separator = last ? '`' : '|';
+
+ if (use.Node != null)
+ {
+ Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})");
+ PrintTreeNode(use.Node, indentation + (last ? " " : " | "));
+ }
+ else
+ {
+ Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL");
+ }
+ }
+ }
+
+ private static void PrintTreeNode(IPatternTreeNode node, string indentation)
+ {
+ Console.WriteLine($" {node.Name}");
+
+ for (int i = 0; i < node.Uses.Count; i++)
+ {
+ PatternTreeNodeUse use = node.Uses[i];
+ bool last = i == node.Uses.Count - 1;
+ char separator = last ? '`' : '|';
+
+ if (use.Node != null)
+ {
+ Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})");
+ PrintTreeNode(use.Node, indentation + (last ? " " : " | "));
+ }
+ else
+ {
+ Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL");
+ }
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
new file mode 100644
index 00000000..774a128d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
@@ -0,0 +1,52 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class GlobalMemory
+ {
+ private const int StorageDescsBaseOffset = 0x44; // In words.
+
+ public const int StorageDescSize = 4; // In words.
+ public const int StorageMaxCount = 16;
+
+ public const int StorageDescsSize = StorageDescSize * StorageMaxCount;
+
+ public const int UbeBaseOffset = 0x98; // In words.
+ public const int UbeMaxCount = 9;
+ public const int UbeDescsSize = StorageDescSize * UbeMaxCount;
+ public const int UbeFirstCbuf = 8;
+
+ public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind)
+ {
+ return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) ||
+ inst == Instruction.LoadGlobal ||
+ inst == Instruction.StoreGlobal ||
+ inst == Instruction.StoreGlobal16 ||
+ inst == Instruction.StoreGlobal8;
+ }
+
+ public static int GetStorageCbOffset(ShaderStage stage, int slot)
+ {
+ return GetStorageBaseCbOffset(stage) + slot * StorageDescSize;
+ }
+
+ public static int GetStorageBaseCbOffset(ShaderStage stage)
+ {
+ return stage switch
+ {
+ ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize,
+ ShaderStage.Vertex => StorageDescsBaseOffset,
+ ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize,
+ ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize,
+ ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize,
+ ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize,
+ _ => 0
+ };
+ }
+
+ public static int GetConstantUbeOffset(int slot)
+ {
+ return UbeBaseOffset + slot * StorageDescSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs
new file mode 100644
index 00000000..0c196c4d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs
@@ -0,0 +1,263 @@
+using Ryujinx.Graphics.Shader.Instructions;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ class BindlessElimination
+ {
+ public static void RunPass(BasicBlock block, ShaderConfig config)
+ {
+ // We can turn a bindless into regular access by recognizing the pattern
+ // produced by the compiler for separate texture and sampler.
+ // We check for the following conditions:
+ // - The handle is a constant buffer value.
+ // - The handle is the result of a bitwise OR logical operation.
+ // - Both sources of the OR operation comes from a constant buffer.
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ if (!(node.Value is TextureOperation texOp))
+ {
+ continue;
+ }
+
+ if ((texOp.Flags & TextureFlags.Bindless) == 0)
+ {
+ continue;
+ }
+
+ if (texOp.Inst == Instruction.Lod ||
+ texOp.Inst == Instruction.TextureSample ||
+ texOp.Inst == Instruction.TextureSize)
+ {
+ Operand bindlessHandle = Utils.FindLastOperation(texOp.GetSource(0), block);
+
+ // Some instructions do not encode an accurate sampler type:
+ // - Most instructions uses the same type for 1D and Buffer.
+ // - Query instructions may not have any type.
+ // For those cases, we need to try getting the type from current GPU state,
+ // as long bindless elimination is successful and we know where the texture descriptor is located.
+ bool rewriteSamplerType =
+ texOp.Type == SamplerType.TextureBuffer ||
+ texOp.Inst == Instruction.TextureSize;
+
+ if (bindlessHandle.Type == OperandType.ConstantBuffer)
+ {
+ SetHandle(config, texOp, bindlessHandle.GetCbufOffset(), bindlessHandle.GetCbufSlot(), rewriteSamplerType, isImage: false);
+ continue;
+ }
+
+ if (!(bindlessHandle.AsgOp is Operation handleCombineOp))
+ {
+ continue;
+ }
+
+ if (handleCombineOp.Inst != Instruction.BitwiseOr)
+ {
+ continue;
+ }
+
+ Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
+ Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
+
+ // For cases where we have a constant, ensure that the constant is always
+ // the second operand.
+ // Since this is a commutative operation, both are fine,
+ // and having a "canonical" representation simplifies some checks below.
+ if (src0.Type == OperandType.Constant && src1.Type != OperandType.Constant)
+ {
+ Operand temp = src1;
+ src1 = src0;
+ src0 = temp;
+ }
+
+ TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle;
+
+ // Try to match the following patterns:
+ // Masked pattern:
+ // - samplerHandle = samplerHandle & 0xFFF00000;
+ // - textureHandle = textureHandle & 0xFFFFF;
+ // - combinedHandle = samplerHandle | textureHandle;
+ // Where samplerHandle and textureHandle comes from a constant buffer.
+ // Shifted pattern:
+ // - samplerHandle = samplerId << 20;
+ // - combinedHandle = samplerHandle | textureHandle;
+ // Where samplerId and textureHandle comes from a constant buffer.
+ // Constant pattern:
+ // - combinedHandle = samplerHandleConstant | textureHandle;
+ // Where samplerHandleConstant is a constant value, and textureHandle comes from a constant buffer.
+ if (src0.AsgOp is Operation src0AsgOp)
+ {
+ if (src1.AsgOp is Operation src1AsgOp &&
+ src0AsgOp.Inst == Instruction.BitwiseAnd &&
+ src1AsgOp.Inst == Instruction.BitwiseAnd)
+ {
+ src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF);
+ src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000);
+
+ // The OR operation is commutative, so we can also try to swap the operands to get a match.
+ if (src0 == null || src1 == null)
+ {
+ src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF);
+ src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000);
+ }
+
+ if (src0 == null || src1 == null)
+ {
+ continue;
+ }
+ }
+ else if (src0AsgOp.Inst == Instruction.ShiftLeft)
+ {
+ Operand shift = src0AsgOp.GetSource(1);
+
+ if (shift.Type == OperandType.Constant && shift.Value == 20)
+ {
+ src0 = src1;
+ src1 = src0AsgOp.GetSource(0);
+ handleType = TextureHandleType.SeparateSamplerId;
+ }
+ }
+ }
+ else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft)
+ {
+ Operand shift = src1AsgOp.GetSource(1);
+
+ if (shift.Type == OperandType.Constant && shift.Value == 20)
+ {
+ src1 = src1AsgOp.GetSource(0);
+ handleType = TextureHandleType.SeparateSamplerId;
+ }
+ }
+ else if (src1.Type == OperandType.Constant && (src1.Value & 0xfffff) == 0)
+ {
+ handleType = TextureHandleType.SeparateConstantSamplerHandle;
+ }
+
+ if (src0.Type != OperandType.ConstantBuffer)
+ {
+ continue;
+ }
+
+ if (handleType == TextureHandleType.SeparateConstantSamplerHandle)
+ {
+ SetHandle(
+ config,
+ texOp,
+ TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType),
+ TextureHandle.PackSlots(src0.GetCbufSlot(), 0),
+ rewriteSamplerType,
+ isImage: false);
+ }
+ else if (src1.Type == OperandType.ConstantBuffer)
+ {
+ SetHandle(
+ config,
+ texOp,
+ TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
+ TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
+ rewriteSamplerType,
+ isImage: false);
+ }
+ }
+ else if (texOp.Inst == Instruction.ImageLoad ||
+ texOp.Inst == Instruction.ImageStore ||
+ texOp.Inst == Instruction.ImageAtomic)
+ {
+ Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
+
+ if (src0.Type == OperandType.ConstantBuffer)
+ {
+ int cbufOffset = src0.GetCbufOffset();
+ int cbufSlot = src0.GetCbufSlot();
+
+ if (texOp.Format == TextureFormat.Unknown)
+ {
+ if (texOp.Inst == Instruction.ImageAtomic)
+ {
+ texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot);
+ }
+ else
+ {
+ texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot);
+ }
+ }
+
+ bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer;
+
+ SetHandle(config, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
+ }
+ }
+ }
+ }
+
+ private static Operand GetSourceForMaskedHandle(Operation asgOp, uint mask)
+ {
+ // Assume it was already checked that the operation is bitwise AND.
+ Operand src0 = asgOp.GetSource(0);
+ Operand src1 = asgOp.GetSource(1);
+
+ if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.ConstantBuffer)
+ {
+ // We can't check if the mask matches here as both operands are from a constant buffer.
+ // Be optimistic and assume it matches. Avoid constant buffer 1 as official drivers
+ // uses this one to store compiler constants.
+ return src0.GetCbufSlot() == 1 ? src1 : src0;
+ }
+ else if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.Constant)
+ {
+ if ((uint)src1.Value == mask)
+ {
+ return src0;
+ }
+ }
+ else if (src0.Type == OperandType.Constant && src1.Type == OperandType.ConstantBuffer)
+ {
+ if ((uint)src0.Value == mask)
+ {
+ return src1;
+ }
+ }
+
+ return null;
+ }
+
+ private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType, bool isImage)
+ {
+ texOp.SetHandle(cbufOffset, cbufSlot);
+
+ if (rewriteSamplerType)
+ {
+ SamplerType newType = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);
+
+ if (texOp.Inst.IsTextureQuery())
+ {
+ texOp.Type = newType;
+ }
+ else if (texOp.Type == SamplerType.TextureBuffer && newType == SamplerType.Texture1D)
+ {
+ int coordsCount = 1;
+
+ if (InstEmit.Sample1DAs2D)
+ {
+ newType = SamplerType.Texture2D;
+ texOp.InsertSource(coordsCount++, OperandHelper.Const(0));
+ }
+
+ if (!isImage &&
+ (texOp.Flags & TextureFlags.IntCoords) != 0 &&
+ (texOp.Flags & TextureFlags.LodLevel) == 0)
+ {
+ // IntCoords textures must always have explicit LOD.
+ texOp.SetLodLevelFlag();
+ texOp.InsertSource(coordsCount, OperandHelper.Const(0));
+ }
+
+ texOp.Type = newType;
+ }
+ }
+
+ config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, cbufSlot, cbufOffset);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs
new file mode 100644
index 00000000..ca46a1f5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs
@@ -0,0 +1,85 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class BindlessToIndexed
+ {
+ public static void RunPass(BasicBlock block, ShaderConfig config)
+ {
+ // We can turn a bindless texture access into a indexed access,
+ // as long the following conditions are true:
+ // - The handle is loaded using a LDC instruction.
+ // - The handle is loaded from the constant buffer with the handles (CB2 for NVN).
+ // - The load has a constant offset.
+ // The base offset of the array of handles on the constant buffer is the constant offset.
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ if (!(node.Value is TextureOperation texOp))
+ {
+ continue;
+ }
+
+ if ((texOp.Flags & TextureFlags.Bindless) == 0)
+ {
+ continue;
+ }
+
+ if (!(texOp.GetSource(0).AsgOp is Operation handleAsgOp))
+ {
+ continue;
+ }
+
+ if (handleAsgOp.Inst != Instruction.LoadConstant)
+ {
+ continue;
+ }
+
+ Operand ldcSrc0 = handleAsgOp.GetSource(0);
+ Operand ldcSrc1 = handleAsgOp.GetSource(1);
+
+ if (ldcSrc0.Type != OperandType.Constant || ldcSrc0.Value != 2)
+ {
+ continue;
+ }
+
+ if (!(ldcSrc1.AsgOp is Operation shrOp) || shrOp.Inst != Instruction.ShiftRightU32)
+ {
+ continue;
+ }
+
+ if (!(shrOp.GetSource(0).AsgOp is Operation addOp) || addOp.Inst != Instruction.Add)
+ {
+ continue;
+ }
+
+ Operand addSrc1 = addOp.GetSource(1);
+
+ if (addSrc1.Type != OperandType.Constant)
+ {
+ continue;
+ }
+
+ TurnIntoIndexed(config, texOp, addSrc1.Value / 4);
+
+ Operand index = Local();
+
+ Operand source = addOp.GetSource(0);
+
+ Operation shrBy3 = new Operation(Instruction.ShiftRightU32, index, source, Const(3));
+
+ block.Operations.AddBefore(node, shrBy3);
+
+ texOp.SetSource(0, index);
+ }
+ }
+
+ private static void TurnIntoIndexed(ShaderConfig config, TextureOperation texOp, int handle)
+ {
+ texOp.TurnIntoIndexed(handle);
+ config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, handle);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs
new file mode 100644
index 00000000..c87d1474
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs
@@ -0,0 +1,64 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class BranchElimination
+ {
+ public static bool RunPass(BasicBlock block)
+ {
+ if (block.HasBranch && IsRedundantBranch((Operation)block.GetLastOp(), Next(block)))
+ {
+ block.Branch = null;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsRedundantBranch(Operation current, BasicBlock nextBlock)
+ {
+ // Here we check that:
+ // - The current block ends with a branch.
+ // - The next block only contains a branch.
+ // - The branch on the next block is unconditional.
+ // - Both branches are jumping to the same location.
+ // In this case, the branch on the current block can be removed,
+ // as the next block is going to jump to the same place anyway.
+ if (nextBlock == null)
+ {
+ return false;
+ }
+
+ if (!(nextBlock.Operations.First?.Value is Operation next))
+ {
+ return false;
+ }
+
+ if (next.Inst != Instruction.Branch)
+ {
+ return false;
+ }
+
+ return current.Dest == next.Dest;
+ }
+
+ private static BasicBlock Next(BasicBlock block)
+ {
+ block = block.Next;
+
+ while (block != null && block.Operations.Count == 0)
+ {
+ if (block.HasBranch)
+ {
+ throw new InvalidOperationException("Found a bogus empty block that \"ends with a branch\".");
+ }
+
+ block = block.Next;
+ }
+
+ return block;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
new file mode 100644
index 00000000..6729f077
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
@@ -0,0 +1,346 @@
+using Ryujinx.Common.Utilities;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class ConstantFolding
+ {
+ public static void RunPass(Operation operation)
+ {
+ if (!AreAllSourcesConstant(operation))
+ {
+ return;
+ }
+
+ switch (operation.Inst)
+ {
+ case Instruction.Add:
+ EvaluateBinary(operation, (x, y) => x + y);
+ break;
+
+ case Instruction.BitCount:
+ EvaluateUnary(operation, (x) => BitCount(x));
+ break;
+
+ case Instruction.BitwiseAnd:
+ EvaluateBinary(operation, (x, y) => x & y);
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ EvaluateBinary(operation, (x, y) => x ^ y);
+ break;
+
+ case Instruction.BitwiseNot:
+ EvaluateUnary(operation, (x) => ~x);
+ break;
+
+ case Instruction.BitwiseOr:
+ EvaluateBinary(operation, (x, y) => x | y);
+ break;
+
+ case Instruction.BitfieldExtractS32:
+ BitfieldExtractS32(operation);
+ break;
+
+ case Instruction.BitfieldExtractU32:
+ BitfieldExtractU32(operation);
+ break;
+
+ case Instruction.Clamp:
+ EvaluateTernary(operation, (x, y, z) => Math.Clamp(x, y, z));
+ break;
+
+ case Instruction.ClampU32:
+ EvaluateTernary(operation, (x, y, z) => (int)Math.Clamp((uint)x, (uint)y, (uint)z));
+ break;
+
+ case Instruction.CompareEqual:
+ EvaluateBinary(operation, (x, y) => x == y);
+ break;
+
+ case Instruction.CompareGreater:
+ EvaluateBinary(operation, (x, y) => x > y);
+ break;
+
+ case Instruction.CompareGreaterOrEqual:
+ EvaluateBinary(operation, (x, y) => x >= y);
+ break;
+
+ case Instruction.CompareGreaterOrEqualU32:
+ EvaluateBinary(operation, (x, y) => (uint)x >= (uint)y);
+ break;
+
+ case Instruction.CompareGreaterU32:
+ EvaluateBinary(operation, (x, y) => (uint)x > (uint)y);
+ break;
+
+ case Instruction.CompareLess:
+ EvaluateBinary(operation, (x, y) => x < y);
+ break;
+
+ case Instruction.CompareLessOrEqual:
+ EvaluateBinary(operation, (x, y) => x <= y);
+ break;
+
+ case Instruction.CompareLessOrEqualU32:
+ EvaluateBinary(operation, (x, y) => (uint)x <= (uint)y);
+ break;
+
+ case Instruction.CompareLessU32:
+ EvaluateBinary(operation, (x, y) => (uint)x < (uint)y);
+ break;
+
+ case Instruction.CompareNotEqual:
+ EvaluateBinary(operation, (x, y) => x != y);
+ break;
+
+ case Instruction.Divide:
+ EvaluateBinary(operation, (x, y) => y != 0 ? x / y : 0);
+ break;
+
+ case Instruction.FP32 | Instruction.Add:
+ EvaluateFPBinary(operation, (x, y) => x + y);
+ break;
+
+ case Instruction.FP32 | Instruction.Clamp:
+ EvaluateFPTernary(operation, (x, y, z) => Math.Clamp(x, y, z));
+ break;
+
+ case Instruction.FP32 | Instruction.CompareEqual:
+ EvaluateFPBinary(operation, (x, y) => x == y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareGreater:
+ EvaluateFPBinary(operation, (x, y) => x > y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareGreaterOrEqual:
+ EvaluateFPBinary(operation, (x, y) => x >= y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareLess:
+ EvaluateFPBinary(operation, (x, y) => x < y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareLessOrEqual:
+ EvaluateFPBinary(operation, (x, y) => x <= y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareNotEqual:
+ EvaluateFPBinary(operation, (x, y) => x != y);
+ break;
+
+ case Instruction.FP32 | Instruction.Divide:
+ EvaluateFPBinary(operation, (x, y) => x / y);
+ break;
+
+ case Instruction.FP32 | Instruction.Multiply:
+ EvaluateFPBinary(operation, (x, y) => x * y);
+ break;
+
+ case Instruction.FP32 | Instruction.Negate:
+ EvaluateFPUnary(operation, (x) => -x);
+ break;
+
+ case Instruction.FP32 | Instruction.Subtract:
+ EvaluateFPBinary(operation, (x, y) => x - y);
+ break;
+
+ case Instruction.IsNan:
+ EvaluateFPUnary(operation, (x) => float.IsNaN(x));
+ break;
+
+ case Instruction.LoadConstant:
+ operation.TurnIntoCopy(Cbuf(operation.GetSource(0).Value, operation.GetSource(1).Value));
+ break;
+
+ case Instruction.Maximum:
+ EvaluateBinary(operation, (x, y) => Math.Max(x, y));
+ break;
+
+ case Instruction.MaximumU32:
+ EvaluateBinary(operation, (x, y) => (int)Math.Max((uint)x, (uint)y));
+ break;
+
+ case Instruction.Minimum:
+ EvaluateBinary(operation, (x, y) => Math.Min(x, y));
+ break;
+
+ case Instruction.MinimumU32:
+ EvaluateBinary(operation, (x, y) => (int)Math.Min((uint)x, (uint)y));
+ break;
+
+ case Instruction.Multiply:
+ EvaluateBinary(operation, (x, y) => x * y);
+ break;
+
+ case Instruction.Negate:
+ EvaluateUnary(operation, (x) => -x);
+ break;
+
+ case Instruction.ShiftLeft:
+ EvaluateBinary(operation, (x, y) => x << y);
+ break;
+
+ case Instruction.ShiftRightS32:
+ EvaluateBinary(operation, (x, y) => x >> y);
+ break;
+
+ case Instruction.ShiftRightU32:
+ EvaluateBinary(operation, (x, y) => (int)((uint)x >> y));
+ break;
+
+ case Instruction.Subtract:
+ EvaluateBinary(operation, (x, y) => x - y);
+ break;
+
+ case Instruction.UnpackHalf2x16:
+ UnpackHalf2x16(operation);
+ break;
+ }
+ }
+
+ private static bool AreAllSourcesConstant(Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ if (operation.GetSource(index).Type != OperandType.Constant)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static int BitCount(int value)
+ {
+ int count = 0;
+
+ for (int bit = 0; bit < 32; bit++)
+ {
+ if (value.Extract(bit))
+ {
+ count++;
+ }
+ }
+
+ return count;
+ }
+
+ private static void BitfieldExtractS32(Operation operation)
+ {
+ int value = GetBitfieldExtractValue(operation);
+
+ int shift = 32 - operation.GetSource(2).Value;
+
+ value = (value << shift) >> shift;
+
+ operation.TurnIntoCopy(Const(value));
+ }
+
+ private static void BitfieldExtractU32(Operation operation)
+ {
+ operation.TurnIntoCopy(Const(GetBitfieldExtractValue(operation)));
+ }
+
+ private static int GetBitfieldExtractValue(Operation operation)
+ {
+ int value = operation.GetSource(0).Value;
+ int lsb = operation.GetSource(1).Value;
+ int length = operation.GetSource(2).Value;
+
+ return value.Extract(lsb, length);
+ }
+
+ private static void UnpackHalf2x16(Operation operation)
+ {
+ int value = operation.GetSource(0).Value;
+
+ value = (value >> operation.Index * 16) & 0xffff;
+
+ operation.TurnIntoCopy(ConstF((float)BitConverter.UInt16BitsToHalf((ushort)value)));
+ }
+
+ private static void FPNegate(Operation operation)
+ {
+ float value = operation.GetSource(0).AsFloat();
+
+ operation.TurnIntoCopy(ConstF(-value));
+ }
+
+ private static void EvaluateUnary(Operation operation, Func<int, int> op)
+ {
+ int x = operation.GetSource(0).Value;
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateFPUnary(Operation operation, Func<float, float> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+
+ operation.TurnIntoCopy(ConstF(op(x)));
+ }
+
+ private static void EvaluateFPUnary(Operation operation, Func<float, bool> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+
+ operation.TurnIntoCopy(Const(op(x) ? IrConsts.True : IrConsts.False));
+ }
+
+ private static void EvaluateBinary(Operation operation, Func<int, int, int> op)
+ {
+ int x = operation.GetSource(0).Value;
+ int y = operation.GetSource(1).Value;
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+
+ private static void EvaluateBinary(Operation operation, Func<int, int, bool> op)
+ {
+ int x = operation.GetSource(0).Value;
+ int y = operation.GetSource(1).Value;
+
+ operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False));
+ }
+
+ private static void EvaluateFPBinary(Operation operation, Func<float, float, float> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+ float y = operation.GetSource(1).AsFloat();
+
+ operation.TurnIntoCopy(ConstF(op(x, y)));
+ }
+
+ private static void EvaluateFPBinary(Operation operation, Func<float, float, bool> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+ float y = operation.GetSource(1).AsFloat();
+
+ operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False));
+ }
+
+ private static void EvaluateTernary(Operation operation, Func<int, int, int, int> op)
+ {
+ int x = operation.GetSource(0).Value;
+ int y = operation.GetSource(1).Value;
+ int z = operation.GetSource(2).Value;
+
+ operation.TurnIntoCopy(Const(op(x, y, z)));
+ }
+
+ private static void EvaluateFPTernary(Operation operation, Func<float, float, float, float> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+ float y = operation.GetSource(1).AsFloat();
+ float z = operation.GetSource(2).AsFloat();
+
+ operation.TurnIntoCopy(ConstF(op(x, y, z)));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
new file mode 100644
index 00000000..2a4070e0
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
@@ -0,0 +1,433 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class GlobalToStorage
+ {
+ public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask)
+ {
+ int sbStart = GetStorageBaseCbOffset(config.Stage);
+ int sbEnd = sbStart + StorageDescsSize;
+
+ int ubeStart = UbeBaseOffset;
+ int ubeEnd = UbeBaseOffset + UbeDescsSize;
+
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ for (int index = 0; index < node.Value.SourcesCount; index++)
+ {
+ Operand src = node.Value.GetSource(index);
+
+ int storageIndex = GetStorageIndex(src, sbStart, sbEnd);
+
+ if (storageIndex >= 0)
+ {
+ sbUseMask |= 1 << storageIndex;
+ }
+
+ if (config.Stage == ShaderStage.Compute)
+ {
+ int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd);
+
+ if (constantIndex >= 0)
+ {
+ ubeUseMask |= 1 << constantIndex;
+ }
+ }
+ }
+
+ if (!(node.Value is Operation operation))
+ {
+ continue;
+ }
+
+ if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
+ {
+ Operand source = operation.GetSource(0);
+
+ int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd);
+
+ if (storageIndex >= 0)
+ {
+ // Storage buffers are implemented using global memory access.
+ // If we know from where the base address of the access is loaded,
+ // we can guess which storage buffer it is accessing.
+ // We can then replace the global memory access with a storage
+ // buffer access.
+ node = ReplaceGlobalWithStorage(block, node, config, storageIndex);
+ }
+ else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
+ {
+ // Here we effectively try to replace a LDG instruction with LDC.
+ // The hardware only supports a limited amount of constant buffers
+ // so NVN "emulates" more constant buffers using global memory access.
+ // Here we try to replace the global access back to a constant buffer
+ // load.
+ storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd);
+
+ if (storageIndex >= 0)
+ {
+ node = ReplaceLdgWithLdc(node, config, storageIndex);
+ }
+ }
+ }
+ }
+
+ config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
+ }
+
+ private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
+ {
+ Operation operation = (Operation)node.Value;
+
+ bool isAtomic = operation.Inst.IsAtomic();
+ bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
+ bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
+
+ config.SetUsedStorageBuffer(storageIndex, isWrite);
+
+ Operand[] sources = new Operand[operation.SourcesCount];
+
+ sources[0] = Const(storageIndex);
+ sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8);
+
+ for (int index = 2; index < operation.SourcesCount; index++)
+ {
+ sources[index] = operation.GetSource(index);
+ }
+
+ Operation storageOp;
+
+ if (isAtomic)
+ {
+ storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
+ }
+ else if (operation.Inst == Instruction.LoadGlobal)
+ {
+ storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
+ }
+ else
+ {
+ Instruction storeInst = operation.Inst switch
+ {
+ Instruction.StoreGlobal16 => Instruction.StoreStorage16,
+ Instruction.StoreGlobal8 => Instruction.StoreStorage8,
+ _ => Instruction.StoreStorage
+ };
+
+ storageOp = new Operation(storeInst, null, sources);
+ }
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, null);
+ }
+
+ LinkedListNode<INode> oldNode = node;
+
+ node = node.List.AddBefore(node, storageOp);
+
+ node.List.Remove(oldNode);
+
+ return node;
+ }
+
+ private static Operand GetStorageOffset(
+ BasicBlock block,
+ LinkedListNode<INode> node,
+ ShaderConfig config,
+ int storageIndex,
+ Operand addrLow,
+ bool isStg16Or8)
+ {
+ int baseAddressCbOffset = GetStorageCbOffset(config.Stage, storageIndex);
+
+ bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment);
+
+ (Operand byteOffset, int constantOffset) = storageAligned ?
+ GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), baseAddressCbOffset) :
+ (null, 0);
+
+ if (byteOffset != null)
+ {
+ ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset);
+ }
+
+ if (byteOffset == null)
+ {
+ Operand baseAddrLow = Cbuf(0, baseAddressCbOffset);
+ Operand baseAddrTrunc = Local();
+
+ Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
+
+ Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
+
+ node.List.AddBefore(node, andOp);
+
+ Operand offset = Local();
+ Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc);
+
+ node.List.AddBefore(node, subOp);
+
+ byteOffset = offset;
+ }
+ else if (constantOffset != 0)
+ {
+ Operand offset = Local();
+ Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
+
+ node.List.AddBefore(node, addOp);
+
+ byteOffset = offset;
+ }
+
+ if (isStg16Or8)
+ {
+ return byteOffset;
+ }
+
+ Operand wordOffset = Local();
+ Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
+
+ node.List.AddBefore(node, shrOp);
+
+ return wordOffset;
+ }
+
+ private static bool IsCb0Offset(Operand operand, int offset)
+ {
+ return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == 0 && operand.GetCbufOffset() == offset;
+ }
+
+ private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset)
+ {
+ // When we emit 16/8-bit LDG, we add extra code to determine the address alignment.
+ // Eliminate the storage buffer base address from this too, leaving only the byte offset.
+
+ foreach (INode useNode in address.UseOps)
+ {
+ if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd)
+ {
+ Operand src1 = op.GetSource(0);
+ Operand src2 = op.GetSource(1);
+
+ int addressIndex = -1;
+
+ if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3)
+ {
+ addressIndex = 0;
+ }
+ else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3)
+ {
+ addressIndex = 1;
+ }
+
+ if (addressIndex != -1)
+ {
+ LinkedListNode<INode> node = list.Find(op);
+
+ // Add offset calculation before the use. Needs to be on the same block.
+ if (node != null)
+ {
+ Operand offset = Local();
+ Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
+ list.AddBefore(node, addOp);
+
+ op.SetSource(addressIndex, offset);
+ }
+ }
+ }
+ }
+ }
+
+ private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int baseAddressCbOffset)
+ {
+ if (IsCb0Offset(address, baseAddressCbOffset))
+ {
+ // Direct offset: zero.
+ return (Const(0), 0);
+ }
+
+ (address, int constantOffset) = GetStorageConstantOffset(block, address);
+
+ address = Utils.FindLastOperation(address, block);
+
+ if (IsCb0Offset(address, baseAddressCbOffset))
+ {
+ // Only constant offset
+ return (Const(0), constantOffset);
+ }
+
+ if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
+ {
+ return (null, 0);
+ }
+
+ Operand src1 = offsetAdd.GetSource(0);
+ Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block);
+
+ if (IsCb0Offset(src2, baseAddressCbOffset))
+ {
+ return (src1, constantOffset);
+ }
+ else if (IsCb0Offset(src1, baseAddressCbOffset))
+ {
+ return (src2, constantOffset);
+ }
+
+ return (null, 0);
+ }
+
+ private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address)
+ {
+ if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
+ {
+ return (address, 0);
+ }
+
+ Operand src1 = offsetAdd.GetSource(0);
+ Operand src2 = offsetAdd.GetSource(1);
+
+ if (src2.Type != OperandType.Constant)
+ {
+ return (address, 0);
+ }
+
+ return (src1, src2.Value);
+ }
+
+ private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
+ {
+ Operation operation = (Operation)node.Value;
+
+ Operand GetCbufOffset()
+ {
+ Operand addrLow = operation.GetSource(0);
+
+ Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize);
+
+ Operand baseAddrTrunc = Local();
+
+ Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
+
+ Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
+
+ node.List.AddBefore(node, andOp);
+
+ Operand byteOffset = Local();
+ Operand wordOffset = Local();
+
+ Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
+ Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
+
+ node.List.AddBefore(node, subOp);
+ node.List.AddBefore(node, shrOp);
+
+ return wordOffset;
+ }
+
+ Operand[] sources = new Operand[operation.SourcesCount];
+
+ int cbSlot = UbeFirstCbuf + storageIndex;
+
+ sources[0] = Const(cbSlot);
+ sources[1] = GetCbufOffset();
+
+ config.SetUsedConstantBuffer(cbSlot);
+
+ for (int index = 2; index < operation.SourcesCount; index++)
+ {
+ sources[index] = operation.GetSource(index);
+ }
+
+ Operation ldcOp = new Operation(Instruction.LoadConstant, operation.Dest, sources);
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, null);
+ }
+
+ LinkedListNode<INode> oldNode = node;
+
+ node = node.List.AddBefore(node, ldcOp);
+
+ node.List.Remove(oldNode);
+
+ return node;
+ }
+
+ private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd)
+ {
+ globalAddress = Utils.FindLastOperation(globalAddress, block);
+
+ if (globalAddress.Type == OperandType.ConstantBuffer)
+ {
+ return GetStorageIndex(globalAddress, sbStart, sbEnd);
+ }
+
+ Operation operation = globalAddress.AsgOp as Operation;
+
+ if (operation == null || operation.Inst != Instruction.Add)
+ {
+ return -1;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
+ (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
+ {
+ if (src1.Type == OperandType.LocalVariable)
+ {
+ operation = Utils.FindLastOperation(src1, block).AsgOp as Operation;
+ }
+ else
+ {
+ operation = Utils.FindLastOperation(src2, block).AsgOp as Operation;
+ }
+
+ if (operation == null || operation.Inst != Instruction.Add)
+ {
+ return -1;
+ }
+ }
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ int storageIndex = GetStorageIndex(source, sbStart, sbEnd);
+
+ if (storageIndex != -1)
+ {
+ return storageIndex;
+ }
+ }
+
+ return -1;
+ }
+
+ private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
+ {
+ if (operand.Type == OperandType.ConstantBuffer)
+ {
+ int slot = operand.GetCbufSlot();
+ int offset = operand.GetCbufOffset();
+
+ if (slot == 0 && offset >= sbStart && offset < sbEnd)
+ {
+ int storageIndex = (offset - sbStart) / StorageDescSize;
+
+ return storageIndex;
+ }
+ }
+
+ return -1;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
new file mode 100644
index 00000000..bae774ee
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
@@ -0,0 +1,380 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class Optimizer
+ {
+ public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
+ {
+ RunOptimizationPasses(blocks);
+
+ int sbUseMask = 0;
+ int ubeUseMask = 0;
+
+ // Those passes are looking for specific patterns and only needs to run once.
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
+ BindlessToIndexed.RunPass(blocks[blkIndex], config);
+ BindlessElimination.RunPass(blocks[blkIndex], config);
+ }
+
+ config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
+
+ // Run optimizations one last time to remove any code that is now optimizable after above passes.
+ RunOptimizationPasses(blocks);
+ }
+
+ private static void RunOptimizationPasses(BasicBlock[] blocks)
+ {
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ LinkedListNode<INode> node = block.Operations.First;
+
+ while (node != null)
+ {
+ LinkedListNode<INode> nextNode = node.Next;
+
+ bool isUnused = IsUnused(node.Value);
+
+ if (!(node.Value is Operation operation) || isUnused)
+ {
+ if (node.Value is PhiNode phi && !isUnused)
+ {
+ isUnused = PropagatePhi(phi);
+ }
+
+ if (isUnused)
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+
+ node = nextNode;
+
+ continue;
+ }
+
+ ConstantFolding.RunPass(operation);
+
+ Simplification.RunPass(operation);
+
+ if (DestIsLocalVar(operation))
+ {
+ if (operation.Inst == Instruction.Copy)
+ {
+ PropagateCopy(operation);
+
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+ else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) ||
+ (operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation)))
+ {
+ if (DestHasNoUses(operation))
+ {
+ RemoveNode(block, node);
+ }
+
+ modified = true;
+ }
+ }
+
+ node = nextNode;
+ }
+
+ if (BranchElimination.RunPass(block))
+ {
+ RemoveNode(block, block.Operations.Last);
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+ }
+
+ private static void PropagateCopy(Operation copyOp)
+ {
+ // Propagate copy source operand to all uses of
+ // the destination operand.
+
+ Operand dest = copyOp.Dest;
+ Operand src = copyOp.GetSource(0);
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ foreach (INode useNode in uses)
+ {
+ for (int index = 0; index < useNode.SourcesCount; index++)
+ {
+ if (useNode.GetSource(index) == dest)
+ {
+ useNode.SetSource(index, src);
+ }
+ }
+ }
+ }
+
+ private static bool PropagatePhi(PhiNode phi)
+ {
+ // If all phi sources are the same, we can propagate it and remove the phi.
+
+ Operand firstSrc = phi.GetSource(0);
+
+ for (int index = 1; index < phi.SourcesCount; index++)
+ {
+ if (!IsSameOperand(firstSrc, phi.GetSource(index)))
+ {
+ return false;
+ }
+ }
+
+ // All sources are equal, we can propagate the value.
+
+ Operand dest = phi.Dest;
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ foreach (INode useNode in uses)
+ {
+ for (int index = 0; index < useNode.SourcesCount; index++)
+ {
+ if (useNode.GetSource(index) == dest)
+ {
+ useNode.SetSource(index, firstSrc);
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private static bool IsSameOperand(Operand x, Operand y)
+ {
+ if (x.Type != y.Type || x.Value != y.Value)
+ {
+ return false;
+ }
+
+ // TODO: Handle Load operations with the same storage and the same constant parameters.
+ return x.Type == OperandType.Constant || x.Type == OperandType.ConstantBuffer;
+ }
+
+ private static bool PropagatePack(Operation packOp)
+ {
+ // Propagate pack source operands to uses by unpack
+ // instruction. The source depends on the unpack instruction.
+ bool modified = false;
+
+ Operand dest = packOp.Dest;
+ Operand src0 = packOp.GetSource(0);
+ Operand src1 = packOp.GetSource(1);
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ foreach (INode useNode in uses)
+ {
+ if (!(useNode is Operation operation) || operation.Inst != Instruction.UnpackHalf2x16)
+ {
+ continue;
+ }
+
+ if (operation.GetSource(0) == dest)
+ {
+ operation.TurnIntoCopy(operation.Index == 1 ? src1 : src0);
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ public static bool MatchDdxOrDdy(Operation operation)
+ {
+ // It's assumed that "operation.Inst" is ShuffleXor,
+ // that should be checked before calling this method.
+ Debug.Assert(operation.Inst == Instruction.ShuffleXor);
+
+ bool modified = false;
+
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2))
+ {
+ return false;
+ }
+
+ if (src3.Type != OperandType.Constant || src3.Value != 0x1c03)
+ {
+ return false;
+ }
+
+ bool isDdy = src2.Value == 2;
+ bool isDdx = !isDdy;
+
+ // We can replace any use by a FSWZADD with DDX/DDY, when
+ // the following conditions are true:
+ // - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX.
+ // - The first source operand must be the shuffle output.
+ // - The second source operand must be the shuffle first source operand.
+ INode[] uses = operation.Dest.UseOps.ToArray();
+
+ foreach (INode use in uses)
+ {
+ if (!(use is Operation test))
+ {
+ continue;
+ }
+
+ if (!(use is Operation useOp) || useOp.Inst != Instruction.SwizzleAdd)
+ {
+ continue;
+ }
+
+ Operand fswzaddSrc1 = useOp.GetSource(0);
+ Operand fswzaddSrc2 = useOp.GetSource(1);
+ Operand fswzaddSrc3 = useOp.GetSource(2);
+
+ if (fswzaddSrc1 != operation.Dest)
+ {
+ continue;
+ }
+
+ if (fswzaddSrc2 != operation.GetSource(0))
+ {
+ continue;
+ }
+
+ if (fswzaddSrc3.Type != OperandType.Constant)
+ {
+ continue;
+ }
+
+ int mask = fswzaddSrc3.Value;
+
+ if ((isDdx && mask != 0b10011001) ||
+ (isDdy && mask != 0b10100101))
+ {
+ continue;
+ }
+
+ useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2);
+
+ modified = true;
+ }
+
+ return modified;
+ }
+
+ private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode)
+ {
+ // Remove a node from the nodes list, and also remove itself
+ // from all the use lists on the operands that this node uses.
+ block.Operations.Remove(llNode);
+
+ Queue<INode> nodes = new Queue<INode>();
+
+ nodes.Enqueue(llNode.Value);
+
+ while (nodes.TryDequeue(out INode node))
+ {
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand src = node.GetSource(index);
+
+ if (src.Type != OperandType.LocalVariable)
+ {
+ continue;
+ }
+
+ if (src.UseOps.Remove(node) && src.UseOps.Count == 0)
+ {
+ Debug.Assert(src.AsgOp != null);
+ nodes.Enqueue(src.AsgOp);
+ }
+ }
+ }
+ }
+
+ private static bool IsUnused(INode node)
+ {
+ return !HasSideEffects(node) && DestIsLocalVar(node) && DestHasNoUses(node);
+ }
+
+ private static bool HasSideEffects(INode node)
+ {
+ if (node is Operation operation)
+ {
+ switch (operation.Inst & Instruction.Mask)
+ {
+ case Instruction.AtomicAdd:
+ case Instruction.AtomicAnd:
+ case Instruction.AtomicCompareAndSwap:
+ case Instruction.AtomicMaxS32:
+ case Instruction.AtomicMaxU32:
+ case Instruction.AtomicMinS32:
+ case Instruction.AtomicMinU32:
+ case Instruction.AtomicOr:
+ case Instruction.AtomicSwap:
+ case Instruction.AtomicXor:
+ case Instruction.Call:
+ case Instruction.ImageAtomic:
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private static bool DestIsLocalVar(INode node)
+ {
+ if (node.DestsCount == 0)
+ {
+ return false;
+ }
+
+ for (int index = 0; index < node.DestsCount; index++)
+ {
+ Operand dest = node.GetDest(index);
+
+ if (dest != null && dest.Type != OperandType.LocalVariable)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static bool DestHasNoUses(INode node)
+ {
+ for (int index = 0; index < node.DestsCount; index++)
+ {
+ Operand dest = node.GetDest(index);
+
+ if (dest != null && dest.UseOps.Count != 0)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs
new file mode 100644
index 00000000..8d05f99a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs
@@ -0,0 +1,147 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class Simplification
+ {
+ private const int AllOnes = ~0;
+
+ public static void RunPass(Operation operation)
+ {
+ switch (operation.Inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseExclusiveOr:
+ TryEliminateBinaryOpCommutative(operation, 0);
+ break;
+
+ case Instruction.BitwiseAnd:
+ TryEliminateBitwiseAnd(operation);
+ break;
+
+ case Instruction.BitwiseOr:
+ TryEliminateBitwiseOr(operation);
+ break;
+
+ case Instruction.ConditionalSelect:
+ TryEliminateConditionalSelect(operation);
+ break;
+
+ case Instruction.Divide:
+ TryEliminateBinaryOpY(operation, 1);
+ break;
+
+ case Instruction.Multiply:
+ TryEliminateBinaryOpCommutative(operation, 1);
+ break;
+
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightS32:
+ case Instruction.ShiftRightU32:
+ case Instruction.Subtract:
+ TryEliminateBinaryOpY(operation, 0);
+ break;
+ }
+ }
+
+ private static void TryEliminateBitwiseAnd(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
+ // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, AllOnes))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, AllOnes))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(Const(0));
+ }
+ }
+
+ private static void TryEliminateBitwiseOr(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x | 0x00000000 == x, 0x00000000 | y == y,
+ // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, 0))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, AllOnes) || IsConstEqual(y, AllOnes))
+ {
+ operation.TurnIntoCopy(Const(AllOnes));
+ }
+ }
+
+ private static void TryEliminateBinaryOpY(Operation operation, int comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateBinaryOpCommutative(Operation operation, int comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, comparand))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateConditionalSelect(Operation operation)
+ {
+ Operand cond = operation.GetSource(0);
+
+ if (cond.Type != OperandType.Constant)
+ {
+ return;
+ }
+
+ // The condition is constant, we can turn it into a copy, and select
+ // the source based on the condition value.
+ int srcIndex = cond.Value != 0 ? 1 : 2;
+
+ Operand source = operation.GetSource(srcIndex);
+
+ operation.TurnIntoCopy(source);
+ }
+
+ private static bool IsConstEqual(Operand operand, int comparand)
+ {
+ if (operand.Type != OperandType.Constant)
+ {
+ return false;
+ }
+
+ return operand.Value == comparand;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
new file mode 100644
index 00000000..4ca6d687
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
@@ -0,0 +1,68 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class Utils
+ {
+ private static Operation FindBranchSource(BasicBlock block)
+ {
+ foreach (BasicBlock sourceBlock in block.Predecessors)
+ {
+ if (sourceBlock.Operations.Count > 0)
+ {
+ if (sourceBlock.GetLastOp() is Operation lastOp && IsConditionalBranch(lastOp.Inst) && sourceBlock.Next == block)
+ {
+ return lastOp;
+ }
+ }
+ }
+
+ return null;
+ }
+
+ private static bool IsConditionalBranch(Instruction inst)
+ {
+ return inst == Instruction.BranchIfFalse || inst == Instruction.BranchIfTrue;
+ }
+
+ private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock)
+ {
+ // Check if all the conditions for the query block are satisfied by the current block.
+ // Just checks the top-most conditional for now.
+
+ Operation currentBranch = FindBranchSource(currentBlock);
+ Operation queryBranch = FindBranchSource(queryBlock);
+
+ Operand currentCondition = currentBranch?.GetSource(0);
+ Operand queryCondition = queryBranch?.GetSource(0);
+
+ // The condition should be the same operand instance.
+
+ return currentBranch != null && queryBranch != null &&
+ currentBranch.Inst == queryBranch.Inst &&
+ currentCondition == queryCondition;
+ }
+
+ public static Operand FindLastOperation(Operand source, BasicBlock block)
+ {
+ if (source.AsgOp is PhiNode phiNode)
+ {
+ // This source can have a different value depending on a previous branch.
+ // Ensure that conditions met for that branch are also met for the current one.
+ // Prefer the latest sources for the phi node.
+
+ for (int i = phiNode.SourcesCount - 1; i >= 0; i--)
+ {
+ BasicBlock phiBlock = phiNode.GetBlock(i);
+
+ if (BlockConditionsMatch(block, phiBlock))
+ {
+ return phiNode.GetSource(i);
+ }
+ }
+ }
+
+ return source;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs b/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs
new file mode 100644
index 00000000..9e31831d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs
@@ -0,0 +1,486 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class RegisterUsage
+ {
+ private const int RegsCount = 256;
+ private const int RegsMask = RegsCount - 1;
+
+ private const int GprMasks = 4;
+ private const int PredMasks = 1;
+ private const int FlagMasks = 1;
+ private const int TotalMasks = GprMasks + PredMasks + FlagMasks;
+
+ private struct RegisterMask : IEquatable<RegisterMask>
+ {
+ public long GprMask0 { get; set; }
+ public long GprMask1 { get; set; }
+ public long GprMask2 { get; set; }
+ public long GprMask3 { get; set; }
+ public long PredMask { get; set; }
+ public long FlagMask { get; set; }
+
+ public RegisterMask(long gprMask0, long gprMask1, long gprMask2, long gprMask3, long predMask, long flagMask)
+ {
+ GprMask0 = gprMask0;
+ GprMask1 = gprMask1;
+ GprMask2 = gprMask2;
+ GprMask3 = gprMask3;
+ PredMask = predMask;
+ FlagMask = flagMask;
+ }
+
+ public long GetMask(int index)
+ {
+ return index switch
+ {
+ 0 => GprMask0,
+ 1 => GprMask1,
+ 2 => GprMask2,
+ 3 => GprMask3,
+ 4 => PredMask,
+ 5 => FlagMask,
+ _ => throw new ArgumentOutOfRangeException(nameof(index))
+ };
+ }
+
+ public static RegisterMask operator &(RegisterMask x, RegisterMask y)
+ {
+ return new RegisterMask(
+ x.GprMask0 & y.GprMask0,
+ x.GprMask1 & y.GprMask1,
+ x.GprMask2 & y.GprMask2,
+ x.GprMask3 & y.GprMask3,
+ x.PredMask & y.PredMask,
+ x.FlagMask & y.FlagMask);
+ }
+
+ public static RegisterMask operator |(RegisterMask x, RegisterMask y)
+ {
+ return new RegisterMask(
+ x.GprMask0 | y.GprMask0,
+ x.GprMask1 | y.GprMask1,
+ x.GprMask2 | y.GprMask2,
+ x.GprMask3 | y.GprMask3,
+ x.PredMask | y.PredMask,
+ x.FlagMask | y.FlagMask);
+ }
+
+ public static RegisterMask operator ~(RegisterMask x)
+ {
+ return new RegisterMask(
+ ~x.GprMask0,
+ ~x.GprMask1,
+ ~x.GprMask2,
+ ~x.GprMask3,
+ ~x.PredMask,
+ ~x.FlagMask);
+ }
+
+ public static bool operator ==(RegisterMask x, RegisterMask y)
+ {
+ return x.Equals(y);
+ }
+
+ public static bool operator !=(RegisterMask x, RegisterMask y)
+ {
+ return !x.Equals(y);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is RegisterMask regMask && Equals(regMask);
+ }
+
+ public bool Equals(RegisterMask other)
+ {
+ return GprMask0 == other.GprMask0 &&
+ GprMask1 == other.GprMask1 &&
+ GprMask2 == other.GprMask2 &&
+ GprMask3 == other.GprMask3 &&
+ PredMask == other.PredMask &&
+ FlagMask == other.FlagMask;
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(GprMask0, GprMask1, GprMask2, GprMask3, PredMask, FlagMask);
+ }
+ }
+
+ public readonly struct FunctionRegisterUsage
+ {
+ public Register[] InArguments { get; }
+ public Register[] OutArguments { get; }
+
+ public FunctionRegisterUsage(Register[] inArguments, Register[] outArguments)
+ {
+ InArguments = inArguments;
+ OutArguments = outArguments;
+ }
+ }
+
+ public static FunctionRegisterUsage RunPass(ControlFlowGraph cfg)
+ {
+ List<Register> inArguments = new List<Register>();
+ List<Register> outArguments = new List<Register>();
+
+ // Compute local register inputs and outputs used inside blocks.
+ RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Length];
+ RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Length];
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ Operation operation = node.Value as Operation;
+
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand source = operation.GetSource(srcIndex);
+
+ if (source.Type != OperandType.Register)
+ {
+ continue;
+ }
+
+ Register register = source.GetRegister();
+
+ localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index];
+ }
+
+ if (operation.Dest != null && operation.Dest.Type == OperandType.Register)
+ {
+ localOutputs[block.Index] |= GetMask(operation.Dest.GetRegister());
+ }
+ }
+ }
+
+ // Compute global register inputs and outputs used across blocks.
+ RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Length];
+
+ RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Length];
+ RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Length];
+
+ RegisterMask allOutputs = new RegisterMask();
+ RegisterMask allCmnOutputs = new RegisterMask(-1L, -1L, -1L, -1L, -1L, -1L);
+
+ bool modified;
+
+ bool firstPass = true;
+
+ do
+ {
+ modified = false;
+
+ // Compute register outputs.
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ if (block.Predecessors.Count != 0)
+ {
+ BasicBlock predecessor = block.Predecessors[0];
+
+ RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+
+ RegisterMask outputs = globalOutputs[predecessor.Index];
+
+ for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
+ {
+ predecessor = block.Predecessors[pIndex];
+
+ cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+
+ outputs |= globalOutputs[predecessor.Index];
+ }
+
+ globalInputs[block.Index] |= outputs & ~cmnOutputs;
+
+ if (!firstPass)
+ {
+ cmnOutputs &= globalCmnOutputs[block.Index];
+ }
+
+ if (EndsWithReturn(block))
+ {
+ allCmnOutputs &= cmnOutputs | localOutputs[block.Index];
+ }
+
+ if (Exchange(globalCmnOutputs, block.Index, cmnOutputs))
+ {
+ modified = true;
+ }
+
+ outputs |= localOutputs[block.Index];
+
+ if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs))
+ {
+ allOutputs |= outputs;
+ modified = true;
+ }
+ }
+ else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index]))
+ {
+ allOutputs |= localOutputs[block.Index];
+ modified = true;
+ }
+ }
+
+ // Compute register inputs.
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ RegisterMask inputs = localInputs[block.Index];
+
+ if (block.Next != null)
+ {
+ inputs |= globalInputs[block.Next.Index];
+ }
+
+ if (block.Branch != null)
+ {
+ inputs |= globalInputs[block.Branch.Index];
+ }
+
+ inputs &= ~globalCmnOutputs[block.Index];
+
+ if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs))
+ {
+ modified = true;
+ }
+ }
+
+ firstPass = false;
+ }
+ while (modified);
+
+ // Insert load and store context instructions where needed.
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ // The only block without any predecessor should be the entry block.
+ // It always needs a context load as it is the first block to run.
+ if (block.Predecessors.Count == 0)
+ {
+ RegisterMask inputs = globalInputs[block.Index] | (allOutputs & ~allCmnOutputs);
+
+ LoadLocals(block, inputs, inArguments);
+ }
+
+ if (EndsWithReturn(block))
+ {
+ StoreLocals(block, allOutputs, inArguments.Count, outArguments);
+ }
+ }
+
+ return new FunctionRegisterUsage(inArguments.ToArray(), outArguments.ToArray());
+ }
+
+ public static void FixupCalls(BasicBlock[] blocks, FunctionRegisterUsage[] frus)
+ {
+ foreach (BasicBlock block in blocks)
+ {
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ Operation operation = node.Value as Operation;
+
+ if (operation.Inst == Instruction.Call)
+ {
+ Operand funcId = operation.GetSource(0);
+
+ Debug.Assert(funcId.Type == OperandType.Constant);
+
+ var fru = frus[funcId.Value];
+
+ Operand[] inRegs = new Operand[fru.InArguments.Length];
+
+ for (int i = 0; i < fru.InArguments.Length; i++)
+ {
+ inRegs[i] = OperandHelper.Register(fru.InArguments[i]);
+ }
+
+ operation.AppendSources(inRegs);
+
+ Operand[] outRegs = new Operand[1 + fru.OutArguments.Length];
+
+ for (int i = 0; i < fru.OutArguments.Length; i++)
+ {
+ outRegs[1 + i] = OperandHelper.Register(fru.OutArguments[i]);
+ }
+
+ operation.AppendDests(outRegs);
+ }
+ }
+ }
+ }
+
+ private static bool StartsWith(BasicBlock block, Instruction inst)
+ {
+ if (block.Operations.Count == 0)
+ {
+ return false;
+ }
+
+ return block.Operations.First.Value is Operation operation && operation.Inst == inst;
+ }
+
+ private static bool EndsWith(BasicBlock block, Instruction inst)
+ {
+ if (block.Operations.Count == 0)
+ {
+ return false;
+ }
+
+ return block.Operations.Last.Value is Operation operation && operation.Inst == inst;
+ }
+
+ private static RegisterMask GetMask(Register register)
+ {
+ Span<long> gprMasks = stackalloc long[4];
+ long predMask = 0;
+ long flagMask = 0;
+
+ switch (register.Type)
+ {
+ case RegisterType.Gpr:
+ gprMasks[register.Index >> 6] = 1L << (register.Index & 0x3f);
+ break;
+ case RegisterType.Predicate:
+ predMask = 1L << register.Index;
+ break;
+ case RegisterType.Flag:
+ flagMask = 1L << register.Index;
+ break;
+ }
+
+ return new RegisterMask(gprMasks[0], gprMasks[1], gprMasks[2], gprMasks[3], predMask, flagMask);
+ }
+
+ private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
+ {
+ RegisterMask oldValue = masks[blkIndex];
+
+ masks[blkIndex] = value;
+
+ return oldValue != value;
+ }
+
+ private static void LoadLocals(BasicBlock block, RegisterMask masks, List<Register> inArguments)
+ {
+ bool fillArgsList = inArguments.Count == 0;
+ LinkedListNode<INode> node = null;
+ int argIndex = 0;
+
+ for (int i = 0; i < TotalMasks; i++)
+ {
+ (RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i);
+ long mask = masks.GetMask(i);
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ mask &= ~(1L << bit);
+
+ Register register = new Register(baseRegIndex + bit, regType);
+
+ if (fillArgsList)
+ {
+ inArguments.Add(register);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Register(register), OperandHelper.Argument(argIndex++));
+
+ if (node == null)
+ {
+ node = block.Operations.AddFirst(copyOp);
+ }
+ else
+ {
+ node = block.Operations.AddAfter(node, copyOp);
+ }
+ }
+ }
+
+ Debug.Assert(argIndex <= inArguments.Count);
+ }
+
+ private static void StoreLocals(BasicBlock block, RegisterMask masks, int inArgumentsCount, List<Register> outArguments)
+ {
+ LinkedListNode<INode> node = null;
+ int argIndex = inArgumentsCount;
+ bool fillArgsList = outArguments.Count == 0;
+
+ for (int i = 0; i < TotalMasks; i++)
+ {
+ (RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i);
+ long mask = masks.GetMask(i);
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ mask &= ~(1L << bit);
+
+ Register register = new Register(baseRegIndex + bit, regType);
+
+ if (fillArgsList)
+ {
+ outArguments.Add(register);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Argument(argIndex++), OperandHelper.Register(register));
+
+ if (node == null)
+ {
+ node = block.Operations.AddBefore(block.Operations.Last, copyOp);
+ }
+ else
+ {
+ node = block.Operations.AddAfter(node, copyOp);
+ }
+ }
+ }
+
+ Debug.Assert(argIndex <= inArgumentsCount + outArguments.Count);
+ }
+
+ private static (RegisterType RegType, int BaseRegIndex) GetRegTypeAndBaseIndex(int i)
+ {
+ RegisterType regType = RegisterType.Gpr;
+ int baseRegIndex = 0;
+
+ if (i < GprMasks)
+ {
+ baseRegIndex = i * sizeof(long) * 8;
+ }
+ else if (i == GprMasks)
+ {
+ regType = RegisterType.Predicate;
+ }
+ else
+ {
+ regType = RegisterType.Flag;
+ }
+
+ return (regType, baseRegIndex);
+ }
+
+ private static bool EndsWithReturn(BasicBlock block)
+ {
+ if (!(block.GetLastOp() is Operation operation))
+ {
+ return false;
+ }
+
+ return operation.Inst == Instruction.Return;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
new file mode 100644
index 00000000..91e7ace1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
@@ -0,0 +1,768 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class Rewriter
+ {
+ public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
+ {
+ bool isVertexShader = config.Stage == ShaderStage.Vertex;
+ bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
+ bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat();
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ for (LinkedListNode<INode> node = block.Operations.First; node != null;)
+ {
+ if (node.Value is not Operation operation)
+ {
+ node = node.Next;
+ continue;
+ }
+
+ if (isVertexShader)
+ {
+ if (hasConstantBufferDrawParameters)
+ {
+ if (ReplaceConstantBufferWithDrawParameters(node, operation))
+ {
+ config.SetUsedFeature(FeatureFlags.DrawParameters);
+ }
+ }
+ else if (HasConstantBufferDrawParameters(operation))
+ {
+ config.SetUsedFeature(FeatureFlags.DrawParameters);
+ }
+ }
+
+ LinkedListNode<INode> nextNode = node.Next;
+
+ if (operation is TextureOperation texOp)
+ {
+ if (texOp.Inst == Instruction.TextureSample)
+ {
+ node = RewriteTextureSample(node, config);
+
+ if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat)
+ {
+ node = InsertSnormNormalization(node, config);
+ }
+ }
+
+ nextNode = node.Next;
+ }
+ else if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
+ {
+ nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode;
+ }
+
+ node = nextNode;
+ }
+ }
+ }
+
+ private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
+ {
+ Operation operation = (Operation)node.Value;
+
+ bool isAtomic = operation.Inst.IsAtomic();
+ bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
+ bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
+
+ Operation storageOp = null;
+
+ Operand PrependOperation(Instruction inst, params Operand[] sources)
+ {
+ Operand local = Local();
+
+ node.List.AddBefore(node, new Operation(inst, local, sources));
+
+ return local;
+ }
+
+ Operand PrependExistingOperation(Operation operation)
+ {
+ Operand local = Local();
+
+ operation.Dest = local;
+ node.List.AddBefore(node, operation);
+
+ return local;
+ }
+
+ Operand addrLow = operation.GetSource(0);
+ Operand addrHigh = operation.GetSource(1);
+
+ Operand sbBaseAddrLow = Const(0);
+ Operand sbSlot = Const(0);
+
+ Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
+
+ Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow)
+ {
+ baseAddrLow = Cbuf(0, cbOffset);
+ Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
+ Operand size = Cbuf(0, cbOffset + 2);
+
+ Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
+ Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
+
+ Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
+
+ Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
+
+ Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
+
+ return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
+ }
+
+ int sbUseMask = config.AccessibleStorageBuffersMask;
+
+ while (sbUseMask != 0)
+ {
+ int slot = BitOperations.TrailingZeroCount(sbUseMask);
+
+ sbUseMask &= ~(1 << slot);
+
+ config.SetUsedStorageBuffer(slot, isWrite);
+
+ int cbOffset = GetStorageCbOffset(config.Stage, slot);
+
+ Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
+
+ sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
+ sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
+ }
+
+ if (config.AccessibleStorageBuffersMask != 0)
+ {
+ Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
+ Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
+
+ Operand[] sources = new Operand[operation.SourcesCount];
+
+ sources[0] = sbSlot;
+
+ if (isStg16Or8)
+ {
+ sources[1] = byteOffset;
+ }
+ else
+ {
+ sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
+ }
+
+ for (int index = 2; index < operation.SourcesCount; index++)
+ {
+ sources[index] = operation.GetSource(index);
+ }
+
+ if (isAtomic)
+ {
+ storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
+ }
+ else if (operation.Inst == Instruction.LoadGlobal)
+ {
+ storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
+ }
+ else
+ {
+ Instruction storeInst = operation.Inst switch
+ {
+ Instruction.StoreGlobal16 => Instruction.StoreStorage16,
+ Instruction.StoreGlobal8 => Instruction.StoreStorage8,
+ _ => Instruction.StoreStorage
+ };
+
+ storageOp = new Operation(storeInst, null, sources);
+ }
+ }
+ else if (operation.Dest != null)
+ {
+ storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0));
+ }
+
+ if (operation.Inst == Instruction.LoadGlobal)
+ {
+ int cbeUseMask = config.AccessibleConstantBuffersMask;
+
+ while (cbeUseMask != 0)
+ {
+ int slot = BitOperations.TrailingZeroCount(cbeUseMask);
+ int cbSlot = UbeFirstCbuf + slot;
+
+ cbeUseMask &= ~(1 << slot);
+
+ config.SetUsedConstantBuffer(cbSlot);
+
+ Operand previousResult = PrependExistingOperation(storageOp);
+
+ int cbOffset = GetConstantUbeOffset(slot);
+
+ Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
+
+ Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask);
+ Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst);
+
+ Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2));
+
+ Operand[] sourcesCb = new Operand[operation.SourcesCount];
+
+ sourcesCb[0] = Const(cbSlot);
+ sourcesCb[1] = cbIndex;
+
+ for (int index = 2; index < operation.SourcesCount; index++)
+ {
+ sourcesCb[index] = operation.GetSource(index);
+ }
+
+ Operand ldcResult = PrependOperation(Instruction.LoadConstant, sourcesCb);
+
+ storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult);
+ }
+ }
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, null);
+ }
+
+ LinkedListNode<INode> oldNode = node;
+ LinkedList<INode> oldNodeList = oldNode.List;
+
+ if (storageOp != null)
+ {
+ node = node.List.AddBefore(node, storageOp);
+ }
+ else
+ {
+ node = null;
+ }
+
+ oldNodeList.Remove(oldNode);
+
+ return node;
+ }
+
+ private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
+ {
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
+ bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
+
+ bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
+
+ if (!hasInvalidOffset && isCoordNormalized)
+ {
+ return node;
+ }
+
+ bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
+ bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
+ bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+ bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
+ bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+ bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
+ bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int offsetsCount;
+
+ if (hasOffsets)
+ {
+ offsetsCount = coordsCount * 4;
+ }
+ else if (hasOffset)
+ {
+ offsetsCount = coordsCount;
+ }
+ else
+ {
+ offsetsCount = 0;
+ }
+
+ Operand[] offsets = new Operand[offsetsCount];
+ Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
+
+ int copyCount = 0;
+
+ if (isBindless || isIndexed)
+ {
+ copyCount++;
+ }
+
+ Operand[] lodSources = new Operand[copyCount + coordsCount];
+
+ for (int index = 0; index < lodSources.Length; index++)
+ {
+ lodSources[index] = texOp.GetSource(index);
+ }
+
+ copyCount += coordsCount;
+
+ if (isArray)
+ {
+ copyCount++;
+ }
+
+ if (isShadow)
+ {
+ copyCount++;
+ }
+
+ if (hasDerivatives)
+ {
+ copyCount += coordsCount * 2;
+ }
+
+ if (isMultisample)
+ {
+ copyCount++;
+ }
+ else if (hasLodLevel)
+ {
+ copyCount++;
+ }
+
+ int srcIndex = 0;
+ int dstIndex = 0;
+
+ for (int index = 0; index < copyCount; index++)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ bool areAllOffsetsConstant = true;
+
+ for (int index = 0; index < offsetsCount; index++)
+ {
+ Operand offset = texOp.GetSource(srcIndex++);
+
+ areAllOffsetsConstant &= offset.Type == OperandType.Constant;
+
+ offsets[index] = offset;
+ }
+
+ hasInvalidOffset &= !areAllOffsetsConstant;
+
+ if (!hasInvalidOffset && isCoordNormalized)
+ {
+ return node;
+ }
+
+ if (hasLodBias)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ if (isGather && !isShadow)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ int coordsIndex = isBindless || isIndexed ? 1 : 0;
+
+ int componentIndex = texOp.Index;
+
+ Operand Float(Operand value)
+ {
+ Operand res = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
+
+ return res;
+ }
+
+ // Emulate non-normalized coordinates by normalizing the coordinates on the shader.
+ // Without normalization, the coordinates are expected to the in the [0, W or H] range,
+ // and otherwise, it is expected to be in the [0, 1] range.
+ // We normalize by dividing the coords by the texture size.
+ if (!isCoordNormalized && !intCoords)
+ {
+ config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
+
+ for (int index = 0; index < normCoordsCount; index++)
+ {
+ Operand coordSize = Local();
+
+ Operand[] texSizeSources;
+
+ if (isBindless || isIndexed)
+ {
+ texSizeSources = new Operand[] { sources[0], Const(0) };
+ }
+ else
+ {
+ texSizeSources = new Operand[] { Const(0) };
+ }
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.TextureSize,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.CbufSlot,
+ texOp.Handle,
+ index,
+ new[] { coordSize },
+ texSizeSources));
+
+ config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
+
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordNormalized = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
+
+ sources[coordsIndex + index] = coordNormalized;
+ }
+ }
+
+ Operand[] dests = new Operand[texOp.DestsCount];
+
+ for (int i = 0; i < texOp.DestsCount; i++)
+ {
+ dests[i] = texOp.GetDest(i);
+ }
+
+ Operand bindlessHandle = isBindless || isIndexed ? sources[0] : null;
+
+ LinkedListNode<INode> oldNode = node;
+
+ // Technically, non-constant texture offsets are not allowed (according to the spec),
+ // however some GPUs does support that.
+ // For GPUs where it is not supported, we can replace the instruction with the following:
+ // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
+ // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
+ // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
+ // For textureGatherOffset, we split the operation into up to 4 operations, one for each component
+ // that is accessed, where each textureGather operation has a different offset for each pixel.
+ if (hasInvalidOffset && isGather && !isShadow)
+ {
+ config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ Operand[] newSources = new Operand[sources.Length];
+
+ sources.CopyTo(newSources, 0);
+
+ Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount);
+
+ int destIndex = 0;
+
+ for (int compIndex = 0; compIndex < 4; compIndex++)
+ {
+ if (((texOp.Index >> compIndex) & 1) == 0)
+ {
+ continue;
+ }
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
+
+ Operand offset = Local();
+
+ Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)];
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index])));
+
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
+
+ newSources[coordsIndex + index] = coordPlusOffset;
+ }
+
+ TextureOperation newTexOp = new TextureOperation(
+ Instruction.TextureSample,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
+ texOp.CbufSlot,
+ texOp.Handle,
+ 1,
+ new[] { dests[destIndex++] },
+ newSources);
+
+ node = node.List.AddBefore(node, newTexOp);
+ }
+ }
+ else
+ {
+ if (hasInvalidOffset)
+ {
+ if (intCoords)
+ {
+ for (int index = 0; index < coordsCount; index++)
+ {
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
+
+ sources[coordsIndex + index] = coordPlusOffset;
+ }
+ }
+ else
+ {
+ config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount);
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
+
+ Operand offset = Local();
+
+ Operand intOffset = offsets[index];
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index])));
+
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
+
+ sources[coordsIndex + index] = coordPlusOffset;
+ }
+ }
+ }
+
+ TextureOperation newTexOp = new TextureOperation(
+ Instruction.TextureSample,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
+ texOp.CbufSlot,
+ texOp.Handle,
+ componentIndex,
+ dests,
+ sources);
+
+ node = node.List.AddBefore(node, newTexOp);
+ }
+
+ node.List.Remove(oldNode);
+
+ for (int index = 0; index < texOp.SourcesCount; index++)
+ {
+ texOp.SetSource(index, null);
+ }
+
+ return node;
+ }
+
+ private static Operand[] InsertTextureSize(
+ LinkedListNode<INode> node,
+ TextureOperation texOp,
+ Operand[] lodSources,
+ Operand bindlessHandle,
+ int coordsCount)
+ {
+ Operand Int(Operand value)
+ {
+ Operand res = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
+
+ return res;
+ }
+
+ Operand[] texSizes = new Operand[coordsCount];
+
+ Operand lod = Local();
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.Lod,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.CbufSlot,
+ texOp.Handle,
+ 0,
+ new[] { lod },
+ lodSources));
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ texSizes[index] = Local();
+
+ Operand[] texSizeSources;
+
+ if (bindlessHandle != null)
+ {
+ texSizeSources = new Operand[] { bindlessHandle, Int(lod) };
+ }
+ else
+ {
+ texSizeSources = new Operand[] { Int(lod) };
+ }
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.TextureSize,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.CbufSlot,
+ texOp.Handle,
+ index,
+ new[] { texSizes[index] },
+ texSizeSources));
+ }
+
+ return texSizes;
+ }
+
+ private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
+ {
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ // We can't query the format of a bindless texture,
+ // because the handle is unknown, it can have any format.
+ if (texOp.Flags.HasFlag(TextureFlags.Bindless))
+ {
+ return node;
+ }
+
+ TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
+
+ int maxPositive = format switch
+ {
+ TextureFormat.R8Snorm => sbyte.MaxValue,
+ TextureFormat.R8G8Snorm => sbyte.MaxValue,
+ TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
+ TextureFormat.R16Snorm => short.MaxValue,
+ TextureFormat.R16G16Snorm => short.MaxValue,
+ TextureFormat.R16G16B16A16Snorm => short.MaxValue,
+ _ => 0
+ };
+
+ // The value being 0 means that the format is not a SNORM format,
+ // so there's nothing to do here.
+ if (maxPositive == 0)
+ {
+ return node;
+ }
+
+ // Do normalization. We assume SINT formats are being used
+ // as replacement for SNORM (which is not supported).
+ for (int i = 0; i < texOp.DestsCount; i++)
+ {
+ Operand dest = texOp.GetDest(i);
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), dest);
+ Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
+
+ node = node.List.AddAfter(node, convOp);
+ node = node.List.AddAfter(node, normOp);
+
+ foreach (INode useOp in uses)
+ {
+ if (useOp is not Operation op)
+ {
+ continue;
+ }
+
+ // Replace all uses of the texture pixel value with the normalized value.
+ for (int index = 0; index < op.SourcesCount; index++)
+ {
+ if (op.GetSource(index) == dest)
+ {
+ op.SetSource(index, normOp.Dest);
+ }
+ }
+ }
+ }
+
+ return node;
+ }
+
+ private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
+ {
+ Operand GenerateLoad(IoVariable ioVariable)
+ {
+ Operand value = Local();
+ node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
+ return value;
+ }
+
+ bool modified = false;
+
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand src = operation.GetSource(srcIndex);
+
+ if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
+ {
+ switch (src.GetCbufOffset())
+ {
+ case Constants.NvnBaseVertexByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
+ modified = true;
+ break;
+ case Constants.NvnBaseInstanceByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
+ modified = true;
+ break;
+ case Constants.NvnDrawIndexByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
+ modified = true;
+ break;
+ }
+ }
+ }
+
+ return modified;
+ }
+
+ private static bool HasConstantBufferDrawParameters(Operation operation)
+ {
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand src = operation.GetSource(srcIndex);
+
+ if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
+ {
+ switch (src.GetCbufOffset())
+ {
+ case Constants.NvnBaseVertexByteOffset / 4:
+ case Constants.NvnBaseInstanceByteOffset / 4:
+ case Constants.NvnDrawIndexByteOffset / 4:
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
new file mode 100644
index 00000000..22f5a671
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
@@ -0,0 +1,944 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ class ShaderConfig
+ {
+ // TODO: Non-hardcoded array size.
+ public const int SamplerArraySize = 4;
+
+ private const int ThreadsPerWarp = 32;
+
+ public ShaderStage Stage { get; }
+
+ public bool GpPassthrough { get; }
+ public bool LastInVertexPipeline { get; private set; }
+
+ public bool HasLayerInputAttribute { get; private set; }
+ public int GpLayerInputAttribute { get; private set; }
+ public int ThreadsPerInputPrimitive { get; }
+
+ public OutputTopology OutputTopology { get; }
+
+ public int MaxOutputVertices { get; }
+
+ public int LocalMemorySize { get; }
+
+ public ImapPixelType[] ImapTypes { get; }
+
+ public int OmapTargets { get; }
+ public bool OmapSampleMask { get; }
+ public bool OmapDepth { get; }
+
+ public IGpuAccessor GpuAccessor { get; }
+
+ public TranslationOptions Options { get; }
+
+ public bool TransformFeedbackEnabled { get; }
+
+ private TransformFeedbackOutput[] _transformFeedbackOutputs;
+
+ readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable>
+ {
+ public IoVariable IoVariable { get; }
+ public int Location { get; }
+ public int Component { get; }
+
+ public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0)
+ {
+ IoVariable = ioVariable;
+ Location = location;
+ Component = component;
+ }
+
+ public override bool Equals(object other)
+ {
+ return other is TransformFeedbackVariable tfbVar && Equals(tfbVar);
+ }
+
+ public bool Equals(TransformFeedbackVariable other)
+ {
+ return IoVariable == other.IoVariable &&
+ Location == other.Location &&
+ Component == other.Component;
+ }
+
+ public override int GetHashCode()
+ {
+ return (int)IoVariable | (Location << 8) | (Component << 16);
+ }
+
+ public override string ToString()
+ {
+ return $"{IoVariable}.{Location}.{Component}";
+ }
+ }
+
+ private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions;
+
+ public int Size { get; private set; }
+
+ public byte ClipDistancesWritten { get; private set; }
+
+ public FeatureFlags UsedFeatures { get; private set; }
+
+ public int Cb1DataSize { get; private set; }
+
+ public bool LayerOutputWritten { get; private set; }
+ public int LayerOutputAttribute { get; private set; }
+
+ public bool NextUsesFixedFuncAttributes { get; private set; }
+ public int UsedInputAttributes { get; private set; }
+ public int UsedOutputAttributes { get; private set; }
+ public HashSet<int> UsedInputAttributesPerPatch { get; }
+ public HashSet<int> UsedOutputAttributesPerPatch { get; }
+ public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; }
+ public int PassthroughAttributes { get; private set; }
+ private int _nextUsedInputAttributes;
+ private int _thisUsedInputAttributes;
+ private Dictionary<int, int> _perPatchAttributeLocations;
+
+ public UInt128 NextInputAttributesComponents { get; private set; }
+ public UInt128 ThisInputAttributesComponents { get; private set; }
+
+ public int AccessibleStorageBuffersMask { get; private set; }
+ public int AccessibleConstantBuffersMask { get; private set; }
+
+ private int _usedConstantBuffers;
+ private int _usedStorageBuffers;
+ private int _usedStorageBuffersWrite;
+
+ private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format);
+
+ private struct TextureMeta
+ {
+ public bool AccurateType;
+ public SamplerType Type;
+ public TextureUsageFlags UsageFlags;
+ }
+
+ private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
+ private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
+
+ private BufferDescriptor[] _cachedConstantBufferDescriptors;
+ private BufferDescriptor[] _cachedStorageBufferDescriptors;
+ private TextureDescriptor[] _cachedTextureDescriptors;
+ private TextureDescriptor[] _cachedImageDescriptors;
+
+ private int _firstConstantBufferBinding;
+ private int _firstStorageBufferBinding;
+
+ public int FirstConstantBufferBinding => _firstConstantBufferBinding;
+ public int FirstStorageBufferBinding => _firstStorageBufferBinding;
+
+ public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options)
+ {
+ Stage = ShaderStage.Compute;
+ GpuAccessor = gpuAccessor;
+ Options = options;
+
+ _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
+
+ AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1;
+ AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1;
+
+ UsedInputAttributesPerPatch = new HashSet<int>();
+ UsedOutputAttributesPerPatch = new HashSet<int>();
+
+ _usedTextures = new Dictionary<TextureInfo, TextureMeta>();
+ _usedImages = new Dictionary<TextureInfo, TextureMeta>();
+ }
+
+ public ShaderConfig(
+ ShaderStage stage,
+ OutputTopology outputTopology,
+ int maxOutputVertices,
+ IGpuAccessor gpuAccessor,
+ TranslationOptions options) : this(gpuAccessor, options)
+ {
+ Stage = stage;
+ ThreadsPerInputPrimitive = 1;
+ OutputTopology = outputTopology;
+ MaxOutputVertices = maxOutputVertices;
+ TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
+
+ if (Stage != ShaderStage.Compute)
+ {
+ AccessibleConstantBuffersMask = 0;
+ }
+ }
+
+ public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options)
+ {
+ Stage = header.Stage;
+ GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
+ ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive;
+ OutputTopology = header.OutputTopology;
+ MaxOutputVertices = header.MaxOutputVertexCount;
+ LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
+ ImapTypes = header.ImapTypes;
+ OmapTargets = header.OmapTargets;
+ OmapSampleMask = header.OmapSampleMask;
+ OmapDepth = header.OmapDepth;
+ TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
+ LastInVertexPipeline = header.Stage < ShaderStage.Fragment;
+ }
+
+ private void EnsureTransformFeedbackInitialized()
+ {
+ if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null)
+ {
+ TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
+ ulong vecMap = 0UL;
+
+ for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
+ {
+ var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
+ var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
+
+ for (int i = 0; i < locations.Length; i++)
+ {
+ byte wordOffset = locations[i];
+ if (wordOffset < 0xc0)
+ {
+ transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
+ vecMap |= 1UL << (wordOffset / 4);
+ }
+ }
+ }
+
+ _transformFeedbackOutputs = transformFeedbackOutputs;
+
+ while (vecMap != 0)
+ {
+ int vecIndex = BitOperations.TrailingZeroCount(vecMap);
+
+ for (int subIndex = 0; subIndex < 4; subIndex++)
+ {
+ int wordOffset = vecIndex * 4 + subIndex;
+ int byteOffset = wordOffset * 4;
+
+ if (transformFeedbackOutputs[wordOffset].Valid)
+ {
+ IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location);
+ int component = 0;
+
+ if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true))
+ {
+ component = subIndex;
+ }
+
+ var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
+ _transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]);
+ }
+ }
+
+ vecMap &= ~(1UL << vecIndex);
+ }
+ }
+ }
+
+ public TransformFeedbackOutput[] GetTransformFeedbackOutputs()
+ {
+ EnsureTransformFeedbackInitialized();
+ return _transformFeedbackOutputs;
+ }
+
+ public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput)
+ {
+ EnsureTransformFeedbackInitialized();
+ var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
+ return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput);
+ }
+
+ private bool HasTransformFeedbackOutputs()
+ {
+ return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment);
+ }
+
+ public bool HasTransformFeedbackOutputs(bool isOutput)
+ {
+ return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment));
+ }
+
+ public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput)
+ {
+ if (ioVariable == IoVariable.UserDefined)
+ {
+ return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
+ (isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing));
+ }
+
+ return ioVariable == IoVariable.FragmentOutputColor;
+ }
+
+ public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput)
+ {
+ if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput))
+ {
+ return false;
+ }
+
+ return GetTransformFeedbackOutputComponents(location, component) == 1;
+ }
+
+ public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset)
+ {
+ EnsureTransformFeedbackInitialized();
+
+ return _transformFeedbackOutputs[wordOffset];
+ }
+
+ public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component)
+ {
+ return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component);
+ }
+
+ public int GetTransformFeedbackOutputComponents(int location, int component)
+ {
+ EnsureTransformFeedbackInitialized();
+
+ int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4;
+ int index = baseIndex + component;
+ int count = 1;
+
+ for (; count < 4; count++)
+ {
+ ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1];
+ ref var curr = ref _transformFeedbackOutputs[baseIndex + count];
+
+ int prevOffset = prev.Offset;
+ int currOffset = curr.Offset;
+
+ if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset)
+ {
+ break;
+ }
+ }
+
+ if (baseIndex + count <= index)
+ {
+ return 1;
+ }
+
+ return count;
+ }
+
+ public AggregateType GetFragmentOutputColorType(int location)
+ {
+ return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType();
+ }
+
+ public AggregateType GetUserDefinedType(int location, bool isOutput)
+ {
+ if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
+ (isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing)))
+ {
+ return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32;
+ }
+
+ AggregateType type = AggregateType.Vector4;
+
+ if (Stage == ShaderStage.Vertex && !isOutput)
+ {
+ type |= GpuAccessor.QueryAttributeType(location).ToAggregateType();
+ }
+ else
+ {
+ type |= AggregateType.FP32;
+ }
+
+ return type;
+ }
+
+ public int GetDepthRegister()
+ {
+ // The depth register is always two registers after the last color output.
+ return BitOperations.PopCount((uint)OmapTargets) + 1;
+ }
+
+ public uint ConstantBuffer1Read(int offset)
+ {
+ if (Cb1DataSize < offset + 4)
+ {
+ Cb1DataSize = offset + 4;
+ }
+
+ return GpuAccessor.ConstantBuffer1Read(offset);
+ }
+
+ public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1)
+ {
+ // When the formatted load extension is supported, we don't need to
+ // specify a format, we can just declare it without a format and the GPU will handle it.
+ if (GpuAccessor.QueryHostSupportsImageLoadFormatted())
+ {
+ return TextureFormat.Unknown;
+ }
+
+ var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
+
+ if (format == TextureFormat.Unknown)
+ {
+ GpuAccessor.Log($"Unknown format for texture {handle}.");
+
+ format = TextureFormat.R8G8B8A8Unorm;
+ }
+
+ return format;
+ }
+
+ private static bool FormatSupportsAtomic(TextureFormat format)
+ {
+ return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint;
+ }
+
+ public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1)
+ {
+ // Atomic image instructions do not support GL_EXT_shader_image_load_formatted,
+ // and must have a type specified. Default to R32Sint if not available.
+
+ var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
+
+ if (!FormatSupportsAtomic(format))
+ {
+ GpuAccessor.Log($"Unsupported format for texture {handle}: {format}.");
+
+ format = TextureFormat.R32Sint;
+ }
+
+ return format;
+ }
+
+ public void SizeAdd(int size)
+ {
+ Size += size;
+ }
+
+ public void InheritFrom(ShaderConfig other)
+ {
+ ClipDistancesWritten |= other.ClipDistancesWritten;
+ UsedFeatures |= other.UsedFeatures;
+
+ UsedInputAttributes |= other.UsedInputAttributes;
+ UsedOutputAttributes |= other.UsedOutputAttributes;
+ _usedConstantBuffers |= other._usedConstantBuffers;
+ _usedStorageBuffers |= other._usedStorageBuffers;
+ _usedStorageBuffersWrite |= other._usedStorageBuffersWrite;
+
+ foreach (var kv in other._usedTextures)
+ {
+ if (!_usedTextures.TryAdd(kv.Key, kv.Value))
+ {
+ _usedTextures[kv.Key] = MergeTextureMeta(kv.Value, _usedTextures[kv.Key]);
+ }
+ }
+
+ foreach (var kv in other._usedImages)
+ {
+ if (!_usedImages.TryAdd(kv.Key, kv.Value))
+ {
+ _usedImages[kv.Key] = MergeTextureMeta(kv.Value, _usedImages[kv.Key]);
+ }
+ }
+ }
+
+ public void SetLayerOutputAttribute(int attr)
+ {
+ LayerOutputWritten = true;
+ LayerOutputAttribute = attr;
+ }
+
+ public void SetGeometryShaderLayerInputAttribute(int attr)
+ {
+ HasLayerInputAttribute = true;
+ GpLayerInputAttribute = attr;
+ }
+
+ public void SetLastInVertexPipeline()
+ {
+ LastInVertexPipeline = true;
+ }
+
+ public void SetInputUserAttributeFixedFunc(int index)
+ {
+ UsedInputAttributes |= 1 << index;
+ }
+
+ public void SetOutputUserAttributeFixedFunc(int index)
+ {
+ UsedOutputAttributes |= 1 << index;
+ }
+
+ public void SetInputUserAttribute(int index, int component)
+ {
+ int mask = 1 << index;
+
+ UsedInputAttributes |= mask;
+ _thisUsedInputAttributes |= mask;
+ ThisInputAttributesComponents |= UInt128.One << (index * 4 + component);
+ }
+
+ public void SetInputUserAttributePerPatch(int index)
+ {
+ UsedInputAttributesPerPatch.Add(index);
+ }
+
+ public void SetOutputUserAttribute(int index)
+ {
+ UsedOutputAttributes |= 1 << index;
+ }
+
+ public void SetOutputUserAttributePerPatch(int index)
+ {
+ UsedOutputAttributesPerPatch.Add(index);
+ }
+
+ public void MergeFromtNextStage(ShaderConfig config)
+ {
+ NextInputAttributesComponents = config.ThisInputAttributesComponents;
+ NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch;
+ NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr);
+ MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch);
+
+ if (UsedOutputAttributesPerPatch.Count != 0)
+ {
+ // Regular and per-patch input/output locations can't overlap,
+ // so we must assign on our location using unused regular input/output locations.
+
+ Dictionary<int, int> locationsMap = new Dictionary<int, int>();
+
+ int freeMask = ~UsedOutputAttributes;
+
+ foreach (int attr in UsedOutputAttributesPerPatch)
+ {
+ int location = BitOperations.TrailingZeroCount(freeMask);
+ if (location == 32)
+ {
+ config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}.");
+ break;
+ }
+
+ locationsMap.Add(attr, location);
+ freeMask &= ~(1 << location);
+ }
+
+ // Both stages must agree on the locations, so use the same "map" for both.
+ _perPatchAttributeLocations = locationsMap;
+ config._perPatchAttributeLocations = locationsMap;
+ }
+
+ // We don't consider geometry shaders using the geometry shader passthrough feature
+ // as being the last because when this feature is used, it can't actually modify any of the outputs,
+ // so the stage that comes before it is the last one that can do modifications.
+ if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough))
+ {
+ LastInVertexPipeline = false;
+ }
+ }
+
+ public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch)
+ {
+ _nextUsedInputAttributes = mask;
+
+ if (GpPassthrough)
+ {
+ PassthroughAttributes = mask & ~UsedOutputAttributes;
+ }
+ else
+ {
+ UsedOutputAttributes |= mask;
+ UsedOutputAttributesPerPatch.UnionWith(perPatch);
+ }
+ }
+
+ public int GetPerPatchAttributeLocation(int index)
+ {
+ if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location))
+ {
+ return index;
+ }
+
+ return location;
+ }
+
+ public bool IsUsedOutputAttribute(int attr)
+ {
+ // The check for fixed function attributes on the next stage is conservative,
+ // returning false if the output is just not used by the next stage is also valid.
+ if (NextUsesFixedFuncAttributes &&
+ attr >= AttributeConsts.UserAttributeBase &&
+ attr < AttributeConsts.UserAttributeEnd)
+ {
+ int index = (attr - AttributeConsts.UserAttributeBase) >> 4;
+ return (_nextUsedInputAttributes & (1 << index)) != 0;
+ }
+
+ return true;
+ }
+
+ public int GetFreeUserAttribute(bool isOutput, int index)
+ {
+ int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes;
+ int bit = -1;
+
+ while (useMask != -1)
+ {
+ bit = BitOperations.TrailingZeroCount(~useMask);
+
+ if (bit == 32)
+ {
+ bit = -1;
+ break;
+ }
+ else if (index < 1)
+ {
+ break;
+ }
+
+ useMask |= 1 << bit;
+ index--;
+ }
+
+ return bit;
+ }
+
+ public void SetAllInputUserAttributes()
+ {
+ UsedInputAttributes |= Constants.AllAttributesMask;
+ ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4);
+ }
+
+ public void SetAllOutputUserAttributes()
+ {
+ UsedOutputAttributes |= Constants.AllAttributesMask;
+ }
+
+ public void SetClipDistanceWritten(int index)
+ {
+ ClipDistancesWritten |= (byte)(1 << index);
+ }
+
+ public void SetUsedFeature(FeatureFlags flags)
+ {
+ UsedFeatures |= flags;
+ }
+
+ public void SetAccessibleBufferMasks(int sbMask, int ubeMask)
+ {
+ AccessibleStorageBuffersMask = sbMask;
+ AccessibleConstantBuffersMask = ubeMask;
+ }
+
+ public void SetUsedConstantBuffer(int slot)
+ {
+ _usedConstantBuffers |= 1 << slot;
+ }
+
+ public void SetUsedStorageBuffer(int slot, bool write)
+ {
+ int mask = 1 << slot;
+ _usedStorageBuffers |= mask;
+
+ if (write)
+ {
+ _usedStorageBuffersWrite |= mask;
+ }
+ }
+
+ public void SetUsedTexture(
+ Instruction inst,
+ SamplerType type,
+ TextureFormat format,
+ TextureFlags flags,
+ int cbufSlot,
+ int handle)
+ {
+ inst &= Instruction.Mask;
+ bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
+ bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
+ bool accurateType = inst != Instruction.Lod && inst != Instruction.TextureSize;
+ bool coherent = flags.HasFlag(TextureFlags.Coherent);
+
+ if (isImage)
+ {
+ SetUsedTextureOrImage(_usedImages, cbufSlot, handle, type, format, true, isWrite, false, coherent);
+ }
+ else
+ {
+ bool intCoords = flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureSize;
+ SetUsedTextureOrImage(_usedTextures, cbufSlot, handle, type, TextureFormat.Unknown, intCoords, false, accurateType, coherent);
+ }
+
+ GpuAccessor.RegisterTexture(handle, cbufSlot);
+ }
+
+ private void SetUsedTextureOrImage(
+ Dictionary<TextureInfo, TextureMeta> dict,
+ int cbufSlot,
+ int handle,
+ SamplerType type,
+ TextureFormat format,
+ bool intCoords,
+ bool write,
+ bool accurateType,
+ bool coherent)
+ {
+ var dimensions = type.GetDimensions();
+ var isIndexed = type.HasFlag(SamplerType.Indexed);
+
+ var usageFlags = TextureUsageFlags.None;
+
+ if (intCoords)
+ {
+ usageFlags |= TextureUsageFlags.NeedsScaleValue;
+
+ var canScale = Stage.SupportsRenderScale() && !isIndexed && !write && dimensions == 2;
+
+ if (!canScale)
+ {
+ // Resolution scaling cannot be applied to this texture right now.
+ // Flag so that we know to blacklist scaling on related textures when binding them.
+ usageFlags |= TextureUsageFlags.ResScaleUnsupported;
+ }
+ }
+
+ if (write)
+ {
+ usageFlags |= TextureUsageFlags.ImageStore;
+ }
+
+ if (coherent)
+ {
+ usageFlags |= TextureUsageFlags.ImageCoherent;
+ }
+
+ int arraySize = isIndexed ? SamplerArraySize : 1;
+
+ for (int layer = 0; layer < arraySize; layer++)
+ {
+ var info = new TextureInfo(cbufSlot, handle + layer * 2, isIndexed, format);
+ var meta = new TextureMeta()
+ {
+ AccurateType = accurateType,
+ Type = type,
+ UsageFlags = usageFlags
+ };
+
+ if (dict.TryGetValue(info, out var existingMeta))
+ {
+ dict[info] = MergeTextureMeta(meta, existingMeta);
+ }
+ else
+ {
+ dict.Add(info, meta);
+ }
+ }
+ }
+
+ private static TextureMeta MergeTextureMeta(TextureMeta meta, TextureMeta existingMeta)
+ {
+ meta.UsageFlags |= existingMeta.UsageFlags;
+
+ // If the texture we have has inaccurate type information, then
+ // we prefer the most accurate one.
+ if (existingMeta.AccurateType)
+ {
+ meta.AccurateType = true;
+ meta.Type = existingMeta.Type;
+ }
+
+ return meta;
+ }
+
+ public BufferDescriptor[] GetConstantBufferDescriptors()
+ {
+ if (_cachedConstantBufferDescriptors != null)
+ {
+ return _cachedConstantBufferDescriptors;
+ }
+
+ int usedMask = _usedConstantBuffers;
+
+ if (UsedFeatures.HasFlag(FeatureFlags.CbIndexing))
+ {
+ usedMask |= (int)GpuAccessor.QueryConstantBufferUse();
+ }
+
+ return _cachedConstantBufferDescriptors = GetBufferDescriptors(
+ usedMask,
+ 0,
+ UsedFeatures.HasFlag(FeatureFlags.CbIndexing),
+ out _firstConstantBufferBinding,
+ GpuAccessor.QueryBindingConstantBuffer);
+ }
+
+ public BufferDescriptor[] GetStorageBufferDescriptors()
+ {
+ if (_cachedStorageBufferDescriptors != null)
+ {
+ return _cachedStorageBufferDescriptors;
+ }
+
+ return _cachedStorageBufferDescriptors = GetBufferDescriptors(
+ _usedStorageBuffers,
+ _usedStorageBuffersWrite,
+ true,
+ out _firstStorageBufferBinding,
+ GpuAccessor.QueryBindingStorageBuffer);
+ }
+
+ private static BufferDescriptor[] GetBufferDescriptors(
+ int usedMask,
+ int writtenMask,
+ bool isArray,
+ out int firstBinding,
+ Func<int, int> getBindingCallback)
+ {
+ firstBinding = 0;
+ bool hasFirstBinding = false;
+ var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
+
+ int lastSlot = -1;
+
+ for (int i = 0; i < descriptors.Length; i++)
+ {
+ int slot = BitOperations.TrailingZeroCount(usedMask);
+
+ if (isArray)
+ {
+ // The next array entries also consumes bindings, even if they are unused.
+ for (int j = lastSlot + 1; j < slot; j++)
+ {
+ int binding = getBindingCallback(j);
+
+ if (!hasFirstBinding)
+ {
+ firstBinding = binding;
+ hasFirstBinding = true;
+ }
+ }
+ }
+
+ lastSlot = slot;
+
+ descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot);
+
+ if (!hasFirstBinding)
+ {
+ firstBinding = descriptors[i].Binding;
+ hasFirstBinding = true;
+ }
+
+ if ((writtenMask & (1 << slot)) != 0)
+ {
+ descriptors[i].SetFlag(BufferUsageFlags.Write);
+ }
+
+ usedMask &= ~(1 << slot);
+ }
+
+ return descriptors;
+ }
+
+ public TextureDescriptor[] GetTextureDescriptors()
+ {
+ return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture);
+ }
+
+ public TextureDescriptor[] GetImageDescriptors()
+ {
+ return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage);
+ }
+
+ private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary<TextureInfo, TextureMeta> dict, Func<int, bool, int> getBindingCallback)
+ {
+ var descriptors = new TextureDescriptor[dict.Count];
+
+ int i = 0;
+ foreach (var kv in dict.OrderBy(x => x.Key.Indexed).OrderBy(x => x.Key.Handle))
+ {
+ var info = kv.Key;
+ var meta = kv.Value;
+
+ bool isBuffer = (meta.Type & SamplerType.Mask) == SamplerType.TextureBuffer;
+ int binding = getBindingCallback(i, isBuffer);
+
+ descriptors[i] = new TextureDescriptor(binding, meta.Type, info.Format, info.CbufSlot, info.Handle);
+ descriptors[i].SetFlag(meta.UsageFlags);
+ i++;
+ }
+
+ return descriptors;
+ }
+
+ public (TextureDescriptor, int) FindTextureDescriptor(AstTextureOperation texOp)
+ {
+ TextureDescriptor[] descriptors = GetTextureDescriptors();
+
+ for (int i = 0; i < descriptors.Length; i++)
+ {
+ var descriptor = descriptors[i];
+
+ if (descriptor.CbufSlot == texOp.CbufSlot &&
+ descriptor.HandleIndex == texOp.Handle &&
+ descriptor.Format == texOp.Format)
+ {
+ return (descriptor, i);
+ }
+ }
+
+ return (default, -1);
+ }
+
+ private static int FindDescriptorIndex(TextureDescriptor[] array, AstTextureOperation texOp)
+ {
+ for (int i = 0; i < array.Length; i++)
+ {
+ var descriptor = array[i];
+
+ if (descriptor.Type == texOp.Type &&
+ descriptor.CbufSlot == texOp.CbufSlot &&
+ descriptor.HandleIndex == texOp.Handle &&
+ descriptor.Format == texOp.Format)
+ {
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+ public int FindTextureDescriptorIndex(AstTextureOperation texOp)
+ {
+ return FindDescriptorIndex(GetTextureDescriptors(), texOp);
+ }
+
+ public int FindImageDescriptorIndex(AstTextureOperation texOp)
+ {
+ return FindDescriptorIndex(GetImageDescriptors(), texOp);
+ }
+
+ public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
+ {
+ return new ShaderProgramInfo(
+ GetConstantBufferDescriptors(),
+ GetStorageBufferDescriptors(),
+ GetTextureDescriptors(),
+ GetImageDescriptors(),
+ identification,
+ GpLayerInputAttribute,
+ Stage,
+ UsedFeatures.HasFlag(FeatureFlags.InstanceId),
+ UsedFeatures.HasFlag(FeatureFlags.DrawParameters),
+ UsedFeatures.HasFlag(FeatureFlags.RtLayer),
+ ClipDistancesWritten,
+ OmapTargets);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs
new file mode 100644
index 00000000..01f7f08a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs
@@ -0,0 +1,158 @@
+using Ryujinx.Common.Utilities;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ enum PixelImap
+ {
+ Unused = 0,
+ Constant = 1,
+ Perspective = 2,
+ ScreenLinear = 3
+ }
+
+ readonly struct ImapPixelType
+ {
+ public PixelImap X { get; }
+ public PixelImap Y { get; }
+ public PixelImap Z { get; }
+ public PixelImap W { get; }
+
+ public ImapPixelType(PixelImap x, PixelImap y, PixelImap z, PixelImap w)
+ {
+ X = x;
+ Y = y;
+ Z = z;
+ W = w;
+ }
+
+ public PixelImap GetFirstUsedType()
+ {
+ if (X != PixelImap.Unused) return X;
+ if (Y != PixelImap.Unused) return Y;
+ if (Z != PixelImap.Unused) return Z;
+ return W;
+ }
+ }
+
+ class ShaderHeader
+ {
+ public int SphType { get; }
+ public int Version { get; }
+
+ public ShaderStage Stage { get; }
+
+ public bool MrtEnable { get; }
+
+ public bool KillsPixels { get; }
+
+ public bool DoesGlobalStore { get; }
+
+ public int SassVersion { get; }
+
+ public bool GpPassthrough { get; }
+
+ public bool DoesLoadOrStore { get; }
+ public bool DoesFp64 { get; }
+
+ public int StreamOutMask { get; }
+
+ public int ShaderLocalMemoryLowSize { get; }
+
+ public int PerPatchAttributeCount { get; }
+
+ public int ShaderLocalMemoryHighSize { get; }
+
+ public int ThreadsPerInputPrimitive { get; }
+
+ public int ShaderLocalMemoryCrsSize { get; }
+
+ public OutputTopology OutputTopology { get; }
+
+ public int MaxOutputVertexCount { get; }
+
+ public int StoreReqStart { get; }
+ public int StoreReqEnd { get; }
+
+ public ImapPixelType[] ImapTypes { get; }
+
+ public int OmapTargets { get; }
+ public bool OmapSampleMask { get; }
+ public bool OmapDepth { get; }
+
+ public ShaderHeader(IGpuAccessor gpuAccessor, ulong address)
+ {
+ ReadOnlySpan<int> header = MemoryMarshal.Cast<ulong, int>(gpuAccessor.GetCode(address, 0x50));
+
+ int commonWord0 = header[0];
+ int commonWord1 = header[1];
+ int commonWord2 = header[2];
+ int commonWord3 = header[3];
+ int commonWord4 = header[4];
+
+ SphType = commonWord0.Extract(0, 5);
+ Version = commonWord0.Extract(5, 5);
+
+ Stage = (ShaderStage)commonWord0.Extract(10, 4);
+
+ // Invalid.
+ if (Stage == ShaderStage.Compute)
+ {
+ Stage = ShaderStage.Vertex;
+ }
+
+ MrtEnable = commonWord0.Extract(14);
+
+ KillsPixels = commonWord0.Extract(15);
+
+ DoesGlobalStore = commonWord0.Extract(16);
+
+ SassVersion = commonWord0.Extract(17, 4);
+
+ GpPassthrough = commonWord0.Extract(24);
+
+ DoesLoadOrStore = commonWord0.Extract(26);
+ DoesFp64 = commonWord0.Extract(27);
+
+ StreamOutMask = commonWord0.Extract(28, 4);
+
+ ShaderLocalMemoryLowSize = commonWord1.Extract(0, 24);
+
+ PerPatchAttributeCount = commonWord1.Extract(24, 8);
+
+ ShaderLocalMemoryHighSize = commonWord2.Extract(0, 24);
+
+ ThreadsPerInputPrimitive = commonWord2.Extract(24, 8);
+
+ ShaderLocalMemoryCrsSize = commonWord3.Extract(0, 24);
+
+ OutputTopology = (OutputTopology)commonWord3.Extract(24, 4);
+
+ MaxOutputVertexCount = commonWord4.Extract(0, 12);
+
+ StoreReqStart = commonWord4.Extract(12, 8);
+ StoreReqEnd = commonWord4.Extract(24, 8);
+
+ ImapTypes = new ImapPixelType[32];
+
+ for (int i = 0; i < 32; i++)
+ {
+ byte imap = (byte)(header[6 + (i >> 2)] >> ((i & 3) * 8));
+
+ ImapTypes[i] = new ImapPixelType(
+ (PixelImap)((imap >> 0) & 3),
+ (PixelImap)((imap >> 2) & 3),
+ (PixelImap)((imap >> 4) & 3),
+ (PixelImap)((imap >> 6) & 3));
+ }
+
+ int type2OmapTarget = header[18];
+ int type2Omap = header[19];
+
+ OmapTargets = type2OmapTarget;
+ OmapSampleMask = type2Omap.Extract(0);
+ OmapDepth = type2Omap.Extract(1);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
new file mode 100644
index 00000000..53f1e847
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
@@ -0,0 +1,185 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class ShaderIdentifier
+ {
+ public static ShaderIdentification Identify(Function[] functions, ShaderConfig config)
+ {
+ if (config.Stage == ShaderStage.Geometry &&
+ config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles &&
+ !config.GpuAccessor.QueryHostSupportsGeometryShader() &&
+ IsLayerPassthroughGeometryShader(functions, out int layerInputAttr))
+ {
+ config.SetGeometryShaderLayerInputAttribute(layerInputAttr);
+
+ return ShaderIdentification.GeometryLayerPassthrough;
+ }
+
+ return ShaderIdentification.None;
+ }
+
+ private static bool IsLayerPassthroughGeometryShader(Function[] functions, out int layerInputAttr)
+ {
+ bool writesLayer = false;
+ layerInputAttr = 0;
+
+ if (functions.Length != 1)
+ {
+ return false;
+ }
+
+ int verticesCount = 0;
+ int totalVerticesCount = 0;
+
+ foreach (BasicBlock block in functions[0].Blocks)
+ {
+ // We are not expecting loops or any complex control flow here, so fail in those cases.
+ if (block.Branch != null && block.Branch.Index <= block.Index)
+ {
+ return false;
+ }
+
+ foreach (INode node in block.Operations)
+ {
+ if (!(node is Operation operation))
+ {
+ continue;
+ }
+
+ if (IsResourceWrite(operation.Inst))
+ {
+ return false;
+ }
+
+ if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output)
+ {
+ Operand src = operation.GetSource(operation.SourcesCount - 1);
+ Operation srcAttributeAsgOp = null;
+
+ if (src.Type == OperandType.LocalVariable &&
+ src.AsgOp is Operation asgOp &&
+ asgOp.Inst == Instruction.Load &&
+ asgOp.StorageKind.IsInputOrOutput())
+ {
+ if (asgOp.StorageKind != StorageKind.Input)
+ {
+ return false;
+ }
+
+ srcAttributeAsgOp = asgOp;
+ }
+
+ if (srcAttributeAsgOp != null)
+ {
+ IoVariable dstAttribute = (IoVariable)operation.GetSource(0).Value;
+ IoVariable srcAttribute = (IoVariable)srcAttributeAsgOp.GetSource(0).Value;
+
+ if (dstAttribute == IoVariable.Layer && srcAttribute == IoVariable.UserDefined)
+ {
+ if (srcAttributeAsgOp.SourcesCount != 4)
+ {
+ return false;
+ }
+
+ writesLayer = true;
+ layerInputAttr = srcAttributeAsgOp.GetSource(1).Value * 4 + srcAttributeAsgOp.GetSource(3).Value;;
+ }
+ else
+ {
+ if (dstAttribute != srcAttribute)
+ {
+ return false;
+ }
+
+ int inputsCount = operation.SourcesCount - 2;
+
+ if (dstAttribute == IoVariable.UserDefined)
+ {
+ if (operation.GetSource(1).Value != srcAttributeAsgOp.GetSource(1).Value)
+ {
+ return false;
+ }
+
+ inputsCount--;
+ }
+
+ for (int i = 0; i < inputsCount; i++)
+ {
+ int dstIndex = operation.SourcesCount - 2 - i;
+ int srcIndex = srcAttributeAsgOp.SourcesCount - 1 - i;
+
+ if ((dstIndex | srcIndex) < 0)
+ {
+ return false;
+ }
+
+ if (operation.GetSource(dstIndex).Type != OperandType.Constant ||
+ srcAttributeAsgOp.GetSource(srcIndex).Type != OperandType.Constant ||
+ operation.GetSource(dstIndex).Value != srcAttributeAsgOp.GetSource(srcIndex).Value)
+ {
+ return false;
+ }
+ }
+ }
+ }
+ else if (src.Type == OperandType.Constant)
+ {
+ int dstComponent = operation.GetSource(operation.SourcesCount - 2).Value;
+ float expectedValue = dstComponent == 3 ? 1f : 0f;
+
+ if (src.AsFloat() != expectedValue)
+ {
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else if (operation.Inst == Instruction.EmitVertex)
+ {
+ verticesCount++;
+ }
+ else if (operation.Inst == Instruction.EndPrimitive)
+ {
+ totalVerticesCount += verticesCount;
+ verticesCount = 0;
+ }
+ }
+ }
+
+ return totalVerticesCount + verticesCount == 3 && writesLayer;
+ }
+
+ private static bool IsResourceWrite(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.AtomicAdd:
+ case Instruction.AtomicAnd:
+ case Instruction.AtomicCompareAndSwap:
+ case Instruction.AtomicMaxS32:
+ case Instruction.AtomicMaxU32:
+ case Instruction.AtomicMinS32:
+ case Instruction.AtomicMinU32:
+ case Instruction.AtomicOr:
+ case Instruction.AtomicSwap:
+ case Instruction.AtomicXor:
+ case Instruction.ImageAtomic:
+ case Instruction.ImageStore:
+ case Instruction.StoreGlobal:
+ case Instruction.StoreGlobal16:
+ case Instruction.StoreGlobal8:
+ case Instruction.StoreStorage:
+ case Instruction.StoreStorage16:
+ case Instruction.StoreStorage8:
+ return true;
+ }
+
+ return false;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs b/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs
new file mode 100644
index 00000000..16b8b924
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs
@@ -0,0 +1,376 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class Ssa
+ {
+ private const int GprsAndPredsCount = RegisterConsts.GprsCount + RegisterConsts.PredsCount;
+
+ private class DefMap
+ {
+ private Dictionary<Register, Operand> _map;
+
+ private long[] _phiMasks;
+
+ public DefMap()
+ {
+ _map = new Dictionary<Register, Operand>();
+
+ _phiMasks = new long[(RegisterConsts.TotalCount + 63) / 64];
+ }
+
+ public bool TryAddOperand(Register reg, Operand operand)
+ {
+ return _map.TryAdd(reg, operand);
+ }
+
+ public bool TryGetOperand(Register reg, out Operand operand)
+ {
+ return _map.TryGetValue(reg, out operand);
+ }
+
+ public bool AddPhi(Register reg)
+ {
+ int key = GetKeyFromRegister(reg);
+
+ int index = key / 64;
+ int bit = key & 63;
+
+ long mask = 1L << bit;
+
+ if ((_phiMasks[index] & mask) != 0)
+ {
+ return false;
+ }
+
+ _phiMasks[index] |= mask;
+
+ return true;
+ }
+
+ public bool HasPhi(Register reg)
+ {
+ int key = GetKeyFromRegister(reg);
+
+ int index = key / 64;
+ int bit = key & 63;
+
+ return (_phiMasks[index] & (1L << bit)) != 0;
+ }
+ }
+
+ private class LocalDefMap
+ {
+ private Operand[] _map;
+ private int[] _uses;
+ public int UseCount { get; private set; }
+
+ public LocalDefMap()
+ {
+ _map = new Operand[RegisterConsts.TotalCount];
+ _uses = new int[RegisterConsts.TotalCount];
+ }
+
+ public Operand Get(int key)
+ {
+ return _map[key];
+ }
+
+ public void Add(int key, Operand operand)
+ {
+ if (_map[key] == null)
+ {
+ _uses[UseCount++] = key;
+ }
+
+ _map[key] = operand;
+ }
+
+ public Operand GetUse(int index, out int key)
+ {
+ key = _uses[index];
+
+ return _map[key];
+ }
+
+ public void Clear()
+ {
+ for (int i = 0; i < UseCount; i++)
+ {
+ _map[_uses[i]] = null;
+ }
+
+ UseCount = 0;
+ }
+ }
+
+ private readonly struct Definition
+ {
+ public BasicBlock Block { get; }
+ public Operand Local { get; }
+
+ public Definition(BasicBlock block, Operand local)
+ {
+ Block = block;
+ Local = local;
+ }
+ }
+
+ public static void Rename(BasicBlock[] blocks)
+ {
+ DefMap[] globalDefs = new DefMap[blocks.Length];
+ LocalDefMap localDefs = new LocalDefMap();
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ globalDefs[blkIndex] = new DefMap();
+ }
+
+ Queue<BasicBlock> dfPhiBlocks = new Queue<BasicBlock>();
+
+ // First pass, get all defs and locals uses.
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ Operand RenameLocal(Operand operand)
+ {
+ if (operand != null && operand.Type == OperandType.Register)
+ {
+ Operand local = localDefs.Get(GetKeyFromRegister(operand.GetRegister()));
+
+ operand = local ?? operand;
+ }
+
+ return operand;
+ }
+
+ BasicBlock block = blocks[blkIndex];
+
+ LinkedListNode<INode> node = block.Operations.First;
+
+ while (node != null)
+ {
+ if (node.Value is Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, RenameLocal(operation.GetSource(index)));
+ }
+
+ for (int index = 0; index < operation.DestsCount; index++)
+ {
+ Operand dest = operation.GetDest(index);
+
+ if (dest != null && dest.Type == OperandType.Register)
+ {
+ Operand local = Local();
+
+ localDefs.Add(GetKeyFromRegister(dest.GetRegister()), local);
+
+ operation.SetDest(index, local);
+ }
+ }
+ }
+
+ node = node.Next;
+ }
+
+ int localUses = localDefs.UseCount;
+ for (int index = 0; index < localUses; index++)
+ {
+ Operand local = localDefs.GetUse(index, out int key);
+
+ Register reg = GetRegisterFromKey(key);
+
+ globalDefs[block.Index].TryAddOperand(reg, local);
+
+ dfPhiBlocks.Enqueue(block);
+
+ while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock))
+ {
+ foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers)
+ {
+ if (globalDefs[domFrontier.Index].AddPhi(reg))
+ {
+ dfPhiBlocks.Enqueue(domFrontier);
+ }
+ }
+ }
+ }
+
+ localDefs.Clear();
+ }
+
+ // Second pass, rename variables with definitions on different blocks.
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ Operand RenameGlobal(Operand operand)
+ {
+ if (operand != null && operand.Type == OperandType.Register)
+ {
+ int key = GetKeyFromRegister(operand.GetRegister());
+
+ Operand local = localDefs.Get(key);
+
+ if (local != null)
+ {
+ return local;
+ }
+
+ operand = FindDefinitionForCurr(globalDefs, block, operand.GetRegister());
+
+ localDefs.Add(key, operand);
+ }
+
+ return operand;
+ }
+
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ if (node.Value is Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, RenameGlobal(operation.GetSource(index)));
+ }
+ }
+ }
+
+ if (blkIndex < blocks.Length - 1)
+ {
+ localDefs.Clear();
+ }
+ }
+ }
+
+ private static Operand FindDefinitionForCurr(DefMap[] globalDefs, BasicBlock current, Register reg)
+ {
+ if (globalDefs[current.Index].HasPhi(reg))
+ {
+ return InsertPhi(globalDefs, current, reg);
+ }
+
+ if (current != current.ImmediateDominator)
+ {
+ return FindDefinition(globalDefs, current.ImmediateDominator, reg).Local;
+ }
+
+ return Undef();
+ }
+
+ private static Definition FindDefinition(DefMap[] globalDefs, BasicBlock current, Register reg)
+ {
+ foreach (BasicBlock block in SelfAndImmediateDominators(current))
+ {
+ DefMap defMap = globalDefs[block.Index];
+
+ if (defMap.TryGetOperand(reg, out Operand lastDef))
+ {
+ return new Definition(block, lastDef);
+ }
+
+ if (defMap.HasPhi(reg))
+ {
+ return new Definition(block, InsertPhi(globalDefs, block, reg));
+ }
+ }
+
+ return new Definition(current, Undef());
+ }
+
+ private static IEnumerable<BasicBlock> SelfAndImmediateDominators(BasicBlock block)
+ {
+ while (block != block.ImmediateDominator)
+ {
+ yield return block;
+
+ block = block.ImmediateDominator;
+ }
+
+ yield return block;
+ }
+
+ private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Register reg)
+ {
+ // This block has a Phi that has not been materialized yet, but that
+ // would define a new version of the variable we're looking for. We need
+ // to materialize the Phi, add all the block/operand pairs into the Phi, and
+ // then use the definition from that Phi.
+ Operand local = Local();
+
+ PhiNode phi = new PhiNode(local);
+
+ AddPhi(block, phi);
+
+ globalDefs[block.Index].TryAddOperand(reg, local);
+
+ foreach (BasicBlock predecessor in block.Predecessors)
+ {
+ Definition def = FindDefinition(globalDefs, predecessor, reg);
+
+ phi.AddSource(def.Block, def.Local);
+ }
+
+ return local;
+ }
+
+ private static void AddPhi(BasicBlock block, PhiNode phi)
+ {
+ LinkedListNode<INode> node = block.Operations.First;
+
+ if (node != null)
+ {
+ while (node.Next?.Value is PhiNode)
+ {
+ node = node.Next;
+ }
+ }
+
+ if (node?.Value is PhiNode)
+ {
+ block.Operations.AddAfter(node, phi);
+ }
+ else
+ {
+ block.Operations.AddFirst(phi);
+ }
+ }
+
+ private static int GetKeyFromRegister(Register reg)
+ {
+ if (reg.Type == RegisterType.Gpr)
+ {
+ return reg.Index;
+ }
+ else if (reg.Type == RegisterType.Predicate)
+ {
+ return RegisterConsts.GprsCount + reg.Index;
+ }
+ else /* if (reg.Type == RegisterType.Flag) */
+ {
+ return GprsAndPredsCount + reg.Index;
+ }
+ }
+
+ private static Register GetRegisterFromKey(int key)
+ {
+ if (key < RegisterConsts.GprsCount)
+ {
+ return new Register(key, RegisterType.Gpr);
+ }
+ else if (key < GprsAndPredsCount)
+ {
+ return new Register(key - RegisterConsts.GprsCount, RegisterType.Predicate);
+ }
+ else /* if (key < RegisterConsts.TotalCount) */
+ {
+ return new Register(key - GprsAndPredsCount, RegisterType.Flag);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs b/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs
new file mode 100644
index 00000000..6ac235a4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public enum TargetApi
+ {
+ OpenGL,
+ Vulkan
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs b/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs
new file mode 100644
index 00000000..8314b223
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs
@@ -0,0 +1,9 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public enum TargetLanguage
+ {
+ Glsl,
+ Spirv,
+ Arb
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs
new file mode 100644
index 00000000..1874dec3
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs
@@ -0,0 +1,14 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ [Flags]
+ public enum TranslationFlags
+ {
+ None = 0,
+
+ VertexA = 1 << 0,
+ Compute = 1 << 1,
+ DebugMode = 1 << 2
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs
new file mode 100644
index 00000000..d9829ac4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs
@@ -0,0 +1,16 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public readonly struct TranslationOptions
+ {
+ public TargetLanguage TargetLanguage { get; }
+ public TargetApi TargetApi { get; }
+ public TranslationFlags Flags { get; }
+
+ public TranslationOptions(TargetLanguage targetLanguage, TargetApi targetApi, TranslationFlags flags)
+ {
+ TargetLanguage = targetLanguage;
+ TargetApi = targetApi;
+ Flags = flags;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs
new file mode 100644
index 00000000..77d3b568
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs
@@ -0,0 +1,362 @@
+using Ryujinx.Graphics.Shader.CodeGen.Glsl;
+using Ryujinx.Graphics.Shader.CodeGen.Spirv;
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation.Optimizations;
+using System;
+using System.Linq;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public static class Translator
+ {
+ private const int HeaderSize = 0x50;
+
+ internal readonly struct FunctionCode
+ {
+ public Operation[] Code { get; }
+
+ public FunctionCode(Operation[] code)
+ {
+ Code = code;
+ }
+ }
+
+ public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
+ {
+ return DecodeShader(address, gpuAccessor, options);
+ }
+
+ internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config)
+ {
+ var cfgs = new ControlFlowGraph[functions.Length];
+ var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length];
+
+ for (int i = 0; i < functions.Length; i++)
+ {
+ cfgs[i] = ControlFlowGraph.Create(functions[i].Code);
+
+ if (i != 0)
+ {
+ frus[i] = RegisterUsage.RunPass(cfgs[i]);
+ }
+ }
+
+ Function[] funcs = new Function[functions.Length];
+
+ for (int i = 0; i < functions.Length; i++)
+ {
+ var cfg = cfgs[i];
+
+ int inArgumentsCount = 0;
+ int outArgumentsCount = 0;
+
+ if (i != 0)
+ {
+ var fru = frus[i];
+
+ inArgumentsCount = fru.InArguments.Length;
+ outArgumentsCount = fru.OutArguments.Length;
+ }
+
+ if (cfg.Blocks.Length != 0)
+ {
+ RegisterUsage.FixupCalls(cfg.Blocks, frus);
+
+ Dominance.FindDominators(cfg);
+ Dominance.FindDominanceFrontiers(cfg.Blocks);
+
+ Ssa.Rename(cfg.Blocks);
+
+ Optimizer.RunPass(cfg.Blocks, config);
+ Rewriter.RunPass(cfg.Blocks, config);
+ }
+
+ funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount);
+ }
+
+ var identification = ShaderIdentifier.Identify(funcs, config);
+
+ var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config);
+
+ var info = config.CreateProgramInfo(identification);
+
+ return config.Options.TargetLanguage switch
+ {
+ TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
+ TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
+ _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString())
+ };
+ }
+
+ private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
+ {
+ ShaderConfig config;
+ DecodedProgram program;
+ ulong maxEndAddress = 0;
+
+ if (options.Flags.HasFlag(TranslationFlags.Compute))
+ {
+ config = new ShaderConfig(gpuAccessor, options);
+
+ program = Decoder.Decode(config, address);
+ }
+ else
+ {
+ config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options);
+
+ program = Decoder.Decode(config, address + HeaderSize);
+ }
+
+ foreach (DecodedFunction function in program)
+ {
+ foreach (Block block in function.Blocks)
+ {
+ if (maxEndAddress < block.EndAddress)
+ {
+ maxEndAddress = block.EndAddress;
+ }
+ }
+ }
+
+ config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));
+
+ return new TranslatorContext(address, program, config);
+ }
+
+ internal static FunctionCode[] EmitShader(DecodedProgram program, ShaderConfig config, bool initializeOutputs, out int initializationOperations)
+ {
+ initializationOperations = 0;
+
+ FunctionMatch.RunPass(program);
+
+ foreach (DecodedFunction function in program.OrderBy(x => x.Address).Where(x => !x.IsCompilerGenerated))
+ {
+ program.AddFunctionAndSetId(function);
+ }
+
+ FunctionCode[] functions = new FunctionCode[program.FunctionsWithIdCount];
+
+ for (int index = 0; index < functions.Length; index++)
+ {
+ EmitterContext context = new EmitterContext(program, config, index != 0);
+
+ if (initializeOutputs && index == 0)
+ {
+ EmitOutputsInitialization(context, config);
+ initializationOperations = context.OperationsCount;
+ }
+
+ DecodedFunction function = program.GetFunctionById(index);
+
+ foreach (Block block in function.Blocks)
+ {
+ context.CurrBlock = block;
+
+ context.EnterBlock(block.Address);
+
+ EmitOps(context, block);
+ }
+
+ functions[index] = new FunctionCode(context.GetOperations());
+ }
+
+ return functions;
+ }
+
+ private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config)
+ {
+ // Compute has no output attributes, and fragment is the last stage, so we
+ // don't need to initialize outputs on those stages.
+ if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment)
+ {
+ return;
+ }
+
+ if (config.Stage == ShaderStage.Vertex)
+ {
+ InitializePositionOutput(context);
+ }
+
+ UInt128 usedAttributes = context.Config.NextInputAttributesComponents;
+ while (usedAttributes != UInt128.Zero)
+ {
+ int index = (int)UInt128.TrailingZeroCount(usedAttributes);
+ int vecIndex = index / 4;
+
+ usedAttributes &= ~(UInt128.One << index);
+
+ // We don't need to initialize passthrough attributes.
+ if ((context.Config.PassthroughAttributes & (1 << vecIndex)) != 0)
+ {
+ continue;
+ }
+
+ InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false);
+ }
+
+ if (context.Config.NextUsedInputAttributesPerPatch != null)
+ {
+ foreach (int vecIndex in context.Config.NextUsedInputAttributesPerPatch.Order())
+ {
+ InitializeOutput(context, vecIndex, perPatch: true);
+ }
+ }
+
+ if (config.NextUsesFixedFuncAttributes)
+ {
+ bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
+ int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
+
+ for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++)
+ {
+ int index = config.GetFreeUserAttribute(isOutput: true, i);
+ if (index < 0)
+ {
+ break;
+ }
+
+ InitializeOutput(context, index, perPatch: false);
+
+ config.SetOutputUserAttributeFixedFunc(index);
+ }
+ }
+ }
+
+ private static void InitializePositionOutput(EmitterContext context)
+ {
+ for (int c = 0; c < 4; c++)
+ {
+ context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), ConstF(c == 3 ? 1f : 0f));
+ }
+ }
+
+ private static void InitializeOutput(EmitterContext context, int location, bool perPatch)
+ {
+ for (int c = 0; c < 4; c++)
+ {
+ InitializeOutputComponent(context, location, c, perPatch);
+ }
+ }
+
+ private static void InitializeOutputComponent(EmitterContext context, int location, int c, bool perPatch)
+ {
+ StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
+
+ if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing))
+ {
+ Operand invocationId = null;
+
+ if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
+ {
+ invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ }
+
+ int index = location * 4 + c;
+
+ context.Store(storageKind, IoVariable.UserDefined, invocationId, Const(index), ConstF(c == 3 ? 1f : 0f));
+ }
+ else
+ {
+ if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
+ {
+ Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f));
+ }
+ else
+ {
+ context.Store(storageKind, IoVariable.UserDefined, null, Const(location), Const(c), ConstF(c == 3 ? 1f : 0f));
+ }
+ }
+ }
+
+ private static void EmitOps(EmitterContext context, Block block)
+ {
+ for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
+ {
+ InstOp op = block.OpCodes[opIndex];
+
+ if (context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode))
+ {
+ string instName;
+
+ if (op.Emitter != null)
+ {
+ instName = op.Name.ToString();
+ }
+ else
+ {
+ instName = "???";
+
+ context.Config.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
+ }
+
+ string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
+
+ context.Add(new CommentNode(dbgComment));
+ }
+
+ InstConditional opConditional = new InstConditional(op.RawOpCode);
+
+ bool noPred = op.Props.HasFlag(InstProps.NoPred);
+ if (!noPred && opConditional.Pred == RegisterConsts.PredicateTrueIndex && opConditional.PredInv)
+ {
+ continue;
+ }
+
+ Operand predSkipLbl = null;
+
+ if (Decoder.IsPopBranch(op.Name))
+ {
+ // If the instruction is a SYNC or BRK instruction with only one
+ // possible target address, then the instruction is basically
+ // just a simple branch, we can generate code similar to branch
+ // instructions, with the condition check on the branch itself.
+ noPred = block.SyncTargets.Count <= 1;
+ }
+ else if (op.Name == InstName.Bra)
+ {
+ noPred = true;
+ }
+
+ if (!(opConditional.Pred == RegisterConsts.PredicateTrueIndex || noPred))
+ {
+ Operand label;
+
+ if (opIndex == block.OpCodes.Count - 1 && block.HasNext())
+ {
+ label = context.GetLabel(block.Successors[0].Address);
+ }
+ else
+ {
+ label = Label();
+
+ predSkipLbl = label;
+ }
+
+ Operand pred = Register(opConditional.Pred, RegisterType.Predicate);
+
+ if (opConditional.PredInv)
+ {
+ context.BranchIfTrue(label, pred);
+ }
+ else
+ {
+ context.BranchIfFalse(label, pred);
+ }
+ }
+
+ context.CurrOp = op;
+
+ op.Emitter?.Invoke(context);
+
+ if (predSkipLbl != null)
+ {
+ context.MarkLabel(predSkipLbl);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
new file mode 100644
index 00000000..4b4cc8d9
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
@@ -0,0 +1,255 @@
+using Ryujinx.Graphics.Shader.CodeGen.Glsl;
+using Ryujinx.Graphics.Shader.CodeGen.Spirv;
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+using static Ryujinx.Graphics.Shader.Translation.Translator;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public class TranslatorContext
+ {
+ private readonly DecodedProgram _program;
+ private ShaderConfig _config;
+
+ public ulong Address { get; }
+
+ public ShaderStage Stage => _config.Stage;
+ public int Size => _config.Size;
+ public int Cb1DataSize => _config.Cb1DataSize;
+ public bool LayerOutputWritten => _config.LayerOutputWritten;
+
+ public IGpuAccessor GpuAccessor => _config.GpuAccessor;
+
+ internal TranslatorContext(ulong address, DecodedProgram program, ShaderConfig config)
+ {
+ Address = address;
+ _program = program;
+ _config = config;
+ }
+
+ private static bool IsLoadUserDefined(Operation operation)
+ {
+ // TODO: Check if sources count match and all sources are constant.
+ return operation.Inst == Instruction.Load && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined;
+ }
+
+ private static bool IsStoreUserDefined(Operation operation)
+ {
+ // TODO: Check if sources count match and all sources are constant.
+ return operation.Inst == Instruction.Store && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined;
+ }
+
+ private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b, int aStart)
+ {
+ // Here we combine two shaders.
+ // For shader A:
+ // - All user attribute stores on shader A are turned into copies to a
+ // temporary variable. It's assumed that shader B will consume them.
+ // - All return instructions are turned into branch instructions, the
+ // branch target being the start of the shader B code.
+ // For shader B:
+ // - All user attribute loads on shader B are turned into copies from a
+ // temporary variable, as long that attribute is written by shader A.
+ FunctionCode[] output = new FunctionCode[a.Length + b.Length - 1];
+
+ List<Operation> ops = new List<Operation>(a.Length + b.Length);
+
+ Operand[] temps = new Operand[AttributeConsts.UserAttributesCount * 4];
+
+ Operand lblB = Label();
+
+ for (int index = aStart; index < a[0].Code.Length; index++)
+ {
+ Operation operation = a[0].Code[index];
+
+ if (IsStoreUserDefined(operation))
+ {
+ int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value;
+
+ Operand temp = temps[tIndex];
+
+ if (temp == null)
+ {
+ temp = Local();
+
+ temps[tIndex] = temp;
+ }
+
+ operation.Dest = temp;
+ operation.TurnIntoCopy(operation.GetSource(operation.SourcesCount - 1));
+ }
+
+ if (operation.Inst == Instruction.Return)
+ {
+ ops.Add(new Operation(Instruction.Branch, lblB));
+ }
+ else
+ {
+ ops.Add(operation);
+ }
+ }
+
+ ops.Add(new Operation(Instruction.MarkLabel, lblB));
+
+ for (int index = 0; index < b[0].Code.Length; index++)
+ {
+ Operation operation = b[0].Code[index];
+
+ if (IsLoadUserDefined(operation))
+ {
+ int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value;
+
+ Operand temp = temps[tIndex];
+
+ if (temp != null)
+ {
+ operation.TurnIntoCopy(temp);
+ }
+ }
+
+ ops.Add(operation);
+ }
+
+ output[0] = new FunctionCode(ops.ToArray());
+
+ for (int i = 1; i < a.Length; i++)
+ {
+ output[i] = a[i];
+ }
+
+ for (int i = 1; i < b.Length; i++)
+ {
+ output[a.Length + i - 1] = b[i];
+ }
+
+ return output;
+ }
+
+ public void SetNextStage(TranslatorContext nextStage)
+ {
+ _config.MergeFromtNextStage(nextStage._config);
+ }
+
+ public void SetGeometryShaderLayerInputAttribute(int attr)
+ {
+ _config.SetGeometryShaderLayerInputAttribute(attr);
+ }
+
+ public void SetLastInVertexPipeline()
+ {
+ _config.SetLastInVertexPipeline();
+ }
+
+ public ShaderProgram Translate(TranslatorContext other = null)
+ {
+ FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _);
+
+ if (other != null)
+ {
+ other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes, Enumerable.Empty<int>());
+
+ FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart);
+
+ code = Combine(otherCode, code, aStart);
+
+ _config.InheritFrom(other._config);
+ }
+
+ return Translator.Translate(code, _config);
+ }
+
+ public ShaderProgram GenerateGeometryPassthrough()
+ {
+ int outputAttributesMask = _config.UsedOutputAttributes;
+ int layerOutputAttr = _config.LayerOutputAttribute;
+
+ OutputTopology outputTopology;
+ int maxOutputVertices;
+
+ switch (GpuAccessor.QueryPrimitiveTopology())
+ {
+ case InputTopology.Points:
+ outputTopology = OutputTopology.PointList;
+ maxOutputVertices = 1;
+ break;
+ case InputTopology.Lines:
+ case InputTopology.LinesAdjacency:
+ outputTopology = OutputTopology.LineStrip;
+ maxOutputVertices = 2;
+ break;
+ default:
+ outputTopology = OutputTopology.TriangleStrip;
+ maxOutputVertices = 3;
+ break;
+ }
+
+ ShaderConfig config = new ShaderConfig(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options);
+
+ EmitterContext context = new EmitterContext(default, config, false);
+
+ for (int v = 0; v < maxOutputVertices; v++)
+ {
+ int outAttrsMask = outputAttributesMask;
+
+ while (outAttrsMask != 0)
+ {
+ int attrIndex = BitOperations.TrailingZeroCount(outAttrsMask);
+
+ outAttrsMask &= ~(1 << attrIndex);
+
+ for (int c = 0; c < 4; c++)
+ {
+ int attr = AttributeConsts.UserAttributeBase + attrIndex * 16 + c * 4;
+
+ Operand value = context.Load(StorageKind.Input, IoVariable.UserDefined, Const(attrIndex), Const(v), Const(c));
+
+ if (attr == layerOutputAttr)
+ {
+ context.Store(StorageKind.Output, IoVariable.Layer, null, value);
+ }
+ else
+ {
+ context.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(attrIndex), Const(c), value);
+ config.SetOutputUserAttribute(attrIndex);
+ }
+
+ config.SetInputUserAttribute(attrIndex, c);
+ }
+ }
+
+ for (int c = 0; c < 4; c++)
+ {
+ Operand value = context.Load(StorageKind.Input, IoVariable.Position, Const(v), Const(c));
+
+ context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), value);
+ }
+
+ context.EmitVertex();
+ }
+
+ context.EndPrimitive();
+
+ var operations = context.GetOperations();
+ var cfg = ControlFlowGraph.Create(operations);
+ var function = new Function(cfg.Blocks, "main", false, 0, 0);
+
+ var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config);
+
+ var info = config.CreateProgramInfo();
+
+ return config.Options.TargetLanguage switch
+ {
+ TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
+ TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
+ _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString())
+ };
+ }
+ }
+}