aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Shader
diff options
context:
space:
mode:
authorTSR Berry <20988865+TSRBerry@users.noreply.github.com>2023-04-08 01:22:00 +0200
committerMary <thog@protonmail.com>2023-04-27 23:51:14 +0200
commitcee712105850ac3385cd0091a923438167433f9f (patch)
tree4a5274b21d8b7f938c0d0ce18736d3f2993b11b1 /src/Ryujinx.Graphics.Shader
parentcd124bda587ef09668a971fa1cac1c3f0cfc9f21 (diff)
Move solution and projects to src
Diffstat (limited to 'src/Ryujinx.Graphics.Shader')
-rw-r--r--src/Ryujinx.Graphics.Shader/AlphaTestOp.cs14
-rw-r--r--src/Ryujinx.Graphics.Shader/AttributeType.cs38
-rw-r--r--src/Ryujinx.Graphics.Shader/BufferDescriptor.cs26
-rw-r--r--src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs18
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs95
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs818
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs37
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs154
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl21
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl21
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs22
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl7
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl7
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl11
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl11
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl9
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl11
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl23
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl23
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl7
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl19
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl26
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl20
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs238
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs27
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs29
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs29
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs231
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs939
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs56
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs32
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs18
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs33
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs145
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs104
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs254
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs87
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs409
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs615
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs22
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs2480
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs86
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs19
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs227
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs226
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs415
-rw-r--r--src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs4
-rw-r--r--src/Ryujinx.Graphics.Shader/Constants.cs16
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/Block.cs168
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/DecodedFunction.cs48
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs57
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs765
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/FunctionType.cs10
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs5383
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/InstName.cs188
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/InstOp.cs27
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/InstProps.cs28
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/InstTable.cs390
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/Register.cs36
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/RegisterConsts.cs13
-rw-r--r--src/Ryujinx.Graphics.Shader/Decoders/RegisterType.cs9
-rw-r--r--src/Ryujinx.Graphics.Shader/IGpuAccessor.cs528
-rw-r--r--src/Ryujinx.Graphics.Shader/InputTopology.cs40
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs351
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs379
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs160
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs383
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs44
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs194
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs87
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs425
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs532
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs575
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs106
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs322
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs266
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs699
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs310
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs167
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs71
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs541
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs237
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs97
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs15
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs54
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs249
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs796
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs1312
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs118
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs183
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs84
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs6
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs141
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs91
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/CommentNode.cs12
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Function.cs23
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/INode.cs15
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs178
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs51
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IrConsts.cs8
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operand.cs79
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandHelper.cs62
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandType.cs13
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs257
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/PhiNode.cs107
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs39
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs32
-rw-r--r--src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureOperation.cs69
-rw-r--r--src/Ryujinx.Graphics.Shader/OutputTopology.cs24
-rw-r--r--src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj33
-rw-r--r--src/Ryujinx.Graphics.Shader/SamplerType.cs100
-rw-r--r--src/Ryujinx.Graphics.Shader/ShaderIdentification.cs8
-rw-r--r--src/Ryujinx.Graphics.Shader/ShaderProgram.cs35
-rw-r--r--src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs51
-rw-r--r--src/Ryujinx.Graphics.Shader/ShaderStage.cs27
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstAssignment.cs35
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs117
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockType.cs12
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockVisitor.cs68
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstComment.cs12
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstHelper.cs74
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstNode.cs11
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstOperand.cs50
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstOperation.cs80
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs155
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/AstTextureOperation.cs36
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/GotoElimination.cs459
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/GotoStatement.cs23
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs21
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/IAstNode.cs11
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs216
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/IoDefinition.cs44
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/OperandInfo.cs33
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs45
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/StructuredFunction.cs42
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs421
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs330
-rw-r--r--src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs36
-rw-r--r--src/Ryujinx.Graphics.Shader/SupportBuffer.cs58
-rw-r--r--src/Ryujinx.Graphics.Shader/TessPatchType.cs22
-rw-r--r--src/Ryujinx.Graphics.Shader/TessSpacing.cs22
-rw-r--r--src/Ryujinx.Graphics.Shader/TextureDescriptor.cs34
-rw-r--r--src/Ryujinx.Graphics.Shader/TextureFormat.cs128
-rw-r--r--src/Ryujinx.Graphics.Shader/TextureHandle.cs124
-rw-r--r--src/Ryujinx.Graphics.Shader/TextureUsageFlags.cs19
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs25
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs36
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs176
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Dominance.cs94
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs492
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs819
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs27
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs866
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs52
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs263
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs85
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs64
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs346
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs433
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs380
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs147
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs68
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs486
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs768
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs944
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs158
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs185
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Ssa.cs376
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs8
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs9
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs14
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs16
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/Translator.cs362
-rw-r--r--src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs255
174 files changed, 36779 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/AlphaTestOp.cs b/src/Ryujinx.Graphics.Shader/AlphaTestOp.cs
new file mode 100644
index 00000000..57c0d131
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/AlphaTestOp.cs
@@ -0,0 +1,14 @@
+namespace Ryujinx.Graphics.Shader
+{
+ public enum AlphaTestOp
+ {
+ Never = 1,
+ Less,
+ Equal,
+ LessOrEqual,
+ Greater,
+ NotEqual,
+ GreaterOrEqual,
+ Always
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/AttributeType.cs b/src/Ryujinx.Graphics.Shader/AttributeType.cs
new file mode 100644
index 00000000..4e6cad59
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/AttributeType.cs
@@ -0,0 +1,38 @@
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+namespace Ryujinx.Graphics.Shader
+{
+ public enum AttributeType : byte
+ {
+ // Generic types.
+ Float,
+ Sint,
+ Uint
+ }
+
+ static class AttributeTypeExtensions
+ {
+ public static string ToVec4Type(this AttributeType type)
+ {
+ return type switch
+ {
+ AttributeType.Float => "vec4",
+ AttributeType.Sint => "ivec4",
+ AttributeType.Uint => "uvec4",
+ _ => throw new ArgumentException($"Invalid attribute type \"{type}\".")
+ };
+ }
+
+ public static AggregateType ToAggregateType(this AttributeType type)
+ {
+ return type switch
+ {
+ AttributeType.Float => AggregateType.FP32,
+ AttributeType.Sint => AggregateType.S32,
+ AttributeType.Uint => AggregateType.U32,
+ _ => throw new ArgumentException($"Invalid attribute type \"{type}\".")
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs b/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs
new file mode 100644
index 00000000..4ce8a896
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/BufferDescriptor.cs
@@ -0,0 +1,26 @@
+namespace Ryujinx.Graphics.Shader
+{
+ public struct BufferDescriptor
+ {
+ // New fields should be added to the end of the struct to keep disk shader cache compatibility.
+
+ public readonly int Binding;
+ public readonly int Slot;
+ public BufferUsageFlags Flags;
+
+ public BufferDescriptor(int binding, int slot)
+ {
+ Binding = binding;
+ Slot = slot;
+
+ Flags = BufferUsageFlags.None;
+ }
+
+ public BufferDescriptor SetFlag(BufferUsageFlags flag)
+ {
+ Flags |= flag;
+
+ return this;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs b/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs
new file mode 100644
index 00000000..657546cb
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/BufferUsageFlags.cs
@@ -0,0 +1,18 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader
+{
+ /// <summary>
+ /// Flags that indicate how a buffer will be used in a shader.
+ /// </summary>
+ [Flags]
+ public enum BufferUsageFlags
+ {
+ None = 0,
+
+ /// <summary>
+ /// Buffer is written to.
+ /// </summary>
+ Write = 1 << 0
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs
new file mode 100644
index 00000000..9eb20f6f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs
@@ -0,0 +1,95 @@
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Text;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ class CodeGenContext
+ {
+ public const string Tab = " ";
+
+ public StructuredFunction CurrentFunction { get; set; }
+
+ public StructuredProgramInfo Info { get; }
+
+ public ShaderConfig Config { get; }
+
+ public OperandManager OperandManager { get; }
+
+ private readonly StringBuilder _sb;
+
+ private int _level;
+
+ private string _indentation;
+
+ public CodeGenContext(StructuredProgramInfo info, ShaderConfig config)
+ {
+ Info = info;
+ Config = config;
+
+ OperandManager = new OperandManager();
+
+ _sb = new StringBuilder();
+ }
+
+ public void AppendLine()
+ {
+ _sb.AppendLine();
+ }
+
+ public void AppendLine(string str)
+ {
+ _sb.AppendLine(_indentation + str);
+ }
+
+ public string GetCode()
+ {
+ return _sb.ToString();
+ }
+
+ public void EnterScope()
+ {
+ AppendLine("{");
+
+ _level++;
+
+ UpdateIndentation();
+ }
+
+ public void LeaveScope(string suffix = "")
+ {
+ if (_level == 0)
+ {
+ return;
+ }
+
+ _level--;
+
+ UpdateIndentation();
+
+ AppendLine("}" + suffix);
+ }
+
+ public StructuredFunction GetFunction(int id)
+ {
+ return Info.Functions[id];
+ }
+
+ private void UpdateIndentation()
+ {
+ _indentation = GetIndentation(_level);
+ }
+
+ private static string GetIndentation(int level)
+ {
+ string indentation = string.Empty;
+
+ for (int index = 0; index < level; index++)
+ {
+ indentation += Tab;
+ }
+
+ return indentation;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
new file mode 100644
index 00000000..81b79ec4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
@@ -0,0 +1,818 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ static class Declarations
+ {
+ public static void Declare(CodeGenContext context, StructuredProgramInfo info)
+ {
+ context.AppendLine(context.Config.Options.TargetApi == TargetApi.Vulkan ? "#version 460 core" : "#version 450 core");
+ context.AppendLine("#extension GL_ARB_gpu_shader_int64 : enable");
+
+ if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
+ {
+ context.AppendLine("#extension GL_ARB_shader_ballot : enable");
+ }
+ else
+ {
+ context.AppendLine("#extension GL_KHR_shader_subgroup_basic : enable");
+ context.AppendLine("#extension GL_KHR_shader_subgroup_ballot : enable");
+ }
+
+ context.AppendLine("#extension GL_ARB_shader_group_vote : enable");
+ context.AppendLine("#extension GL_EXT_shader_image_load_formatted : enable");
+ context.AppendLine("#extension GL_EXT_texture_shadow_lod : enable");
+
+ if (context.Config.Stage == ShaderStage.Compute)
+ {
+ context.AppendLine("#extension GL_ARB_compute_shader : enable");
+ }
+ else if (context.Config.Stage == ShaderStage.Fragment)
+ {
+ if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
+ {
+ context.AppendLine("#extension GL_ARB_fragment_shader_interlock : enable");
+ }
+ else if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel())
+ {
+ context.AppendLine("#extension GL_INTEL_fragment_shader_ordering : enable");
+ }
+ }
+ else
+ {
+ if (context.Config.Stage == ShaderStage.Vertex)
+ {
+ context.AppendLine("#extension GL_ARB_shader_draw_parameters : enable");
+ }
+
+ context.AppendLine("#extension GL_ARB_shader_viewport_layer_array : enable");
+ }
+
+ if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
+ {
+ context.AppendLine("#extension GL_NV_geometry_shader_passthrough : enable");
+ }
+
+ if (context.Config.GpuAccessor.QueryHostSupportsViewportMask())
+ {
+ context.AppendLine("#extension GL_NV_viewport_array2 : enable");
+ }
+
+ context.AppendLine("#pragma optionNV(fastmath off)");
+ context.AppendLine();
+
+ context.AppendLine($"const int {DefaultNames.UndefinedName} = 0;");
+ context.AppendLine();
+
+ if (context.Config.Stage == ShaderStage.Compute)
+ {
+ int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4);
+
+ if (localMemorySize != 0)
+ {
+ string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
+
+ context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
+ context.AppendLine();
+ }
+
+ int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4);
+
+ if (sharedMemorySize != 0)
+ {
+ string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize);
+
+ context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];");
+ context.AppendLine();
+ }
+ }
+ else if (context.Config.LocalMemorySize != 0)
+ {
+ int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
+
+ string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
+
+ context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
+ context.AppendLine();
+ }
+
+ var cBufferDescriptors = context.Config.GetConstantBufferDescriptors();
+ if (cBufferDescriptors.Length != 0)
+ {
+ DeclareUniforms(context, cBufferDescriptors);
+
+ context.AppendLine();
+ }
+
+ var sBufferDescriptors = context.Config.GetStorageBufferDescriptors();
+ if (sBufferDescriptors.Length != 0)
+ {
+ DeclareStorages(context, sBufferDescriptors);
+
+ context.AppendLine();
+ }
+
+ var textureDescriptors = context.Config.GetTextureDescriptors();
+ if (textureDescriptors.Length != 0)
+ {
+ DeclareSamplers(context, textureDescriptors);
+
+ context.AppendLine();
+ }
+
+ var imageDescriptors = context.Config.GetImageDescriptors();
+ if (imageDescriptors.Length != 0)
+ {
+ DeclareImages(context, imageDescriptors);
+
+ context.AppendLine();
+ }
+
+ if (context.Config.Stage != ShaderStage.Compute)
+ {
+ if (context.Config.Stage == ShaderStage.Geometry)
+ {
+ InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology();
+ string inPrimitive = inputTopology.ToGlslString();
+
+ context.AppendLine($"layout (invocations = {context.Config.ThreadsPerInputPrimitive}, {inPrimitive}) in;");
+
+ if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
+ {
+ context.AppendLine($"layout (passthrough) in gl_PerVertex");
+ context.EnterScope();
+ context.AppendLine("vec4 gl_Position;");
+ context.AppendLine("float gl_PointSize;");
+ context.AppendLine("float gl_ClipDistance[];");
+ context.LeaveScope(";");
+ }
+ else
+ {
+ string outPrimitive = context.Config.OutputTopology.ToGlslString();
+
+ int maxOutputVertices = context.Config.GpPassthrough
+ ? inputTopology.ToInputVertices()
+ : context.Config.MaxOutputVertices;
+
+ context.AppendLine($"layout ({outPrimitive}, max_vertices = {maxOutputVertices}) out;");
+ }
+
+ context.AppendLine();
+ }
+ else if (context.Config.Stage == ShaderStage.TessellationControl)
+ {
+ int threadsPerInputPrimitive = context.Config.ThreadsPerInputPrimitive;
+
+ context.AppendLine($"layout (vertices = {threadsPerInputPrimitive}) out;");
+ context.AppendLine();
+ }
+ else if (context.Config.Stage == ShaderStage.TessellationEvaluation)
+ {
+ bool tessCw = context.Config.GpuAccessor.QueryTessCw();
+
+ if (context.Config.Options.TargetApi == TargetApi.Vulkan)
+ {
+ // We invert the front face on Vulkan backend, so we need to do that here aswell.
+ tessCw = !tessCw;
+ }
+
+ string patchType = context.Config.GpuAccessor.QueryTessPatchType().ToGlsl();
+ string spacing = context.Config.GpuAccessor.QueryTessSpacing().ToGlsl();
+ string windingOrder = tessCw ? "cw" : "ccw";
+
+ context.AppendLine($"layout ({patchType}, {spacing}, {windingOrder}) in;");
+ context.AppendLine();
+ }
+
+ if (context.Config.UsedInputAttributes != 0 || context.Config.GpPassthrough)
+ {
+ DeclareInputAttributes(context, info);
+
+ context.AppendLine();
+ }
+
+ if (context.Config.UsedOutputAttributes != 0 || context.Config.Stage != ShaderStage.Fragment)
+ {
+ DeclareOutputAttributes(context, info);
+
+ context.AppendLine();
+ }
+
+ if (context.Config.UsedInputAttributesPerPatch.Count != 0)
+ {
+ DeclareInputAttributesPerPatch(context, context.Config.UsedInputAttributesPerPatch);
+
+ context.AppendLine();
+ }
+
+ if (context.Config.UsedOutputAttributesPerPatch.Count != 0)
+ {
+ DeclareUsedOutputAttributesPerPatch(context, context.Config.UsedOutputAttributesPerPatch);
+
+ context.AppendLine();
+ }
+
+ if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline)
+ {
+ var tfOutput = context.Config.GetTransformFeedbackOutput(AttributeConsts.PositionX);
+ if (tfOutput.Valid)
+ {
+ context.AppendLine($"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) out gl_PerVertex");
+ context.EnterScope();
+ context.AppendLine("vec4 gl_Position;");
+ context.LeaveScope(context.Config.Stage == ShaderStage.TessellationControl ? " gl_out[];" : ";");
+ }
+ }
+ }
+ else
+ {
+ string localSizeX = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeX());
+ string localSizeY = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeY());
+ string localSizeZ = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeZ());
+
+ context.AppendLine(
+ "layout (" +
+ $"local_size_x = {localSizeX}, " +
+ $"local_size_y = {localSizeY}, " +
+ $"local_size_z = {localSizeZ}) in;");
+ context.AppendLine();
+ }
+
+ bool isFragment = context.Config.Stage == ShaderStage.Fragment;
+
+ if (isFragment || context.Config.Stage == ShaderStage.Compute || context.Config.Stage == ShaderStage.Vertex)
+ {
+ if (isFragment && context.Config.GpuAccessor.QueryEarlyZForce())
+ {
+ context.AppendLine("layout(early_fragment_tests) in;");
+ context.AppendLine();
+ }
+
+ if ((context.Config.UsedFeatures & (FeatureFlags.FragCoordXY | FeatureFlags.IntegerSampling)) != 0)
+ {
+ string stage = OperandManager.GetShaderStagePrefix(context.Config.Stage);
+
+ int scaleElements = context.Config.GetTextureDescriptors().Length + context.Config.GetImageDescriptors().Length;
+
+ if (isFragment)
+ {
+ scaleElements++; // Also includes render target scale, for gl_FragCoord.
+ }
+
+ DeclareSupportUniformBlock(context, context.Config.Stage, scaleElements);
+
+ if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IntegerSampling) && scaleElements != 0)
+ {
+ AppendHelperFunction(context, $"Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_{stage}.glsl");
+ context.AppendLine();
+ }
+ }
+ else if (isFragment || context.Config.Stage == ShaderStage.Vertex)
+ {
+ DeclareSupportUniformBlock(context, context.Config.Stage, 0);
+ }
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighU32) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreSharedSmallInt) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreStorageSmallInt) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");
+ }
+ }
+
+ private static string GetTfLayout(TransformFeedbackOutput tfOutput)
+ {
+ if (tfOutput.Valid)
+ {
+ return $"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) ";
+ }
+
+ return string.Empty;
+ }
+
+ public static void DeclareLocals(CodeGenContext context, StructuredFunction function)
+ {
+ foreach (AstOperand decl in function.Locals)
+ {
+ string name = context.OperandManager.DeclareLocal(decl);
+
+ context.AppendLine(GetVarTypeName(context, decl.VarType) + " " + name + ";");
+ }
+ }
+
+ public static string GetVarTypeName(CodeGenContext context, AggregateType type, bool precise = true)
+ {
+ if (context.Config.GpuAccessor.QueryHostReducedPrecision())
+ {
+ precise = false;
+ }
+
+ return type switch
+ {
+ AggregateType.Void => "void",
+ AggregateType.Bool => "bool",
+ AggregateType.FP32 => precise ? "precise float" : "float",
+ AggregateType.FP64 => "double",
+ AggregateType.S32 => "int",
+ AggregateType.U32 => "uint",
+ AggregateType.Vector2 | AggregateType.Bool => "bvec2",
+ AggregateType.Vector2 | AggregateType.FP32 => precise ? "precise vec2" : "vec2",
+ AggregateType.Vector2 | AggregateType.FP64 => "dvec2",
+ AggregateType.Vector2 | AggregateType.S32 => "ivec2",
+ AggregateType.Vector2 | AggregateType.U32 => "uvec2",
+ AggregateType.Vector3 | AggregateType.Bool => "bvec3",
+ AggregateType.Vector3 | AggregateType.FP32 => precise ? "precise vec3" : "vec3",
+ AggregateType.Vector3 | AggregateType.FP64 => "dvec3",
+ AggregateType.Vector3 | AggregateType.S32 => "ivec3",
+ AggregateType.Vector3 | AggregateType.U32 => "uvec3",
+ AggregateType.Vector4 | AggregateType.Bool => "bvec4",
+ AggregateType.Vector4 | AggregateType.FP32 => precise ? "precise vec4" : "vec4",
+ AggregateType.Vector4 | AggregateType.FP64 => "dvec4",
+ AggregateType.Vector4 | AggregateType.S32 => "ivec4",
+ AggregateType.Vector4 | AggregateType.U32 => "uvec4",
+ _ => throw new ArgumentException($"Invalid variable type \"{type}\".")
+ };
+ }
+
+ private static void DeclareUniforms(CodeGenContext context, BufferDescriptor[] descriptors)
+ {
+ string ubSize = "[" + NumberFormatter.FormatInt(Constants.ConstantBufferSize / 16) + "]";
+
+ if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing))
+ {
+ string ubName = OperandManager.GetShaderStagePrefix(context.Config.Stage);
+
+ ubName += "_" + DefaultNames.UniformNamePrefix;
+
+ string blockName = $"{ubName}_{DefaultNames.BlockSuffix}";
+
+ context.AppendLine($"layout (binding = {context.Config.FirstConstantBufferBinding}, std140) uniform {blockName}");
+ context.EnterScope();
+ context.AppendLine("vec4 " + DefaultNames.DataName + ubSize + ";");
+ context.LeaveScope($" {ubName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];");
+ }
+ else
+ {
+ foreach (var descriptor in descriptors)
+ {
+ string ubName = OperandManager.GetShaderStagePrefix(context.Config.Stage);
+
+ ubName += "_" + DefaultNames.UniformNamePrefix + descriptor.Slot;
+
+ context.AppendLine($"layout (binding = {descriptor.Binding}, std140) uniform {ubName}");
+ context.EnterScope();
+ context.AppendLine("vec4 " + OperandManager.GetUbName(context.Config.Stage, descriptor.Slot, false) + ubSize + ";");
+ context.LeaveScope(";");
+ }
+ }
+ }
+
+ private static void DeclareStorages(CodeGenContext context, BufferDescriptor[] descriptors)
+ {
+ string sbName = OperandManager.GetShaderStagePrefix(context.Config.Stage);
+
+ sbName += "_" + DefaultNames.StorageNamePrefix;
+
+ string blockName = $"{sbName}_{DefaultNames.BlockSuffix}";
+
+ string layout = context.Config.Options.TargetApi == TargetApi.Vulkan ? ", set = 1" : string.Empty;
+
+ context.AppendLine($"layout (binding = {context.Config.FirstStorageBufferBinding}{layout}, std430) buffer {blockName}");
+ context.EnterScope();
+ context.AppendLine("uint " + DefaultNames.DataName + "[];");
+ context.LeaveScope($" {sbName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];");
+ }
+
+ private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors)
+ {
+ int arraySize = 0;
+ foreach (var descriptor in descriptors)
+ {
+ if (descriptor.Type.HasFlag(SamplerType.Indexed))
+ {
+ if (arraySize == 0)
+ {
+ arraySize = ShaderConfig.SamplerArraySize;
+ }
+ else if (--arraySize != 0)
+ {
+ continue;
+ }
+ }
+
+ string indexExpr = NumberFormatter.FormatInt(arraySize);
+
+ string samplerName = OperandManager.GetSamplerName(
+ context.Config.Stage,
+ descriptor.CbufSlot,
+ descriptor.HandleIndex,
+ descriptor.Type.HasFlag(SamplerType.Indexed),
+ indexExpr);
+
+ string samplerTypeName = descriptor.Type.ToGlslSamplerType();
+
+ string layout = string.Empty;
+
+ if (context.Config.Options.TargetApi == TargetApi.Vulkan)
+ {
+ layout = ", set = 2";
+ }
+
+ context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {samplerTypeName} {samplerName};");
+ }
+ }
+
+ private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors)
+ {
+ int arraySize = 0;
+ foreach (var descriptor in descriptors)
+ {
+ if (descriptor.Type.HasFlag(SamplerType.Indexed))
+ {
+ if (arraySize == 0)
+ {
+ arraySize = ShaderConfig.SamplerArraySize;
+ }
+ else if (--arraySize != 0)
+ {
+ continue;
+ }
+ }
+
+ string indexExpr = NumberFormatter.FormatInt(arraySize);
+
+ string imageName = OperandManager.GetImageName(
+ context.Config.Stage,
+ descriptor.CbufSlot,
+ descriptor.HandleIndex,
+ descriptor.Format,
+ descriptor.Type.HasFlag(SamplerType.Indexed),
+ indexExpr);
+
+ string imageTypeName = descriptor.Type.ToGlslImageType(descriptor.Format.GetComponentType());
+
+ if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent))
+ {
+ imageTypeName = "coherent " + imageTypeName;
+ }
+
+ string layout = descriptor.Format.ToGlslFormat();
+
+ if (!string.IsNullOrEmpty(layout))
+ {
+ layout = ", " + layout;
+ }
+
+ if (context.Config.Options.TargetApi == TargetApi.Vulkan)
+ {
+ layout = $", set = 3{layout}";
+ }
+
+ context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {imageTypeName} {imageName};");
+ }
+ }
+
+ private static void DeclareInputAttributes(CodeGenContext context, StructuredProgramInfo info)
+ {
+ if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IaIndexing))
+ {
+ string suffix = context.Config.Stage == ShaderStage.Geometry ? "[]" : string.Empty;
+
+ context.AppendLine($"layout (location = 0) in vec4 {DefaultNames.IAttributePrefix}{suffix}[{Constants.MaxAttributes}];");
+ }
+ else
+ {
+ int usedAttributes = context.Config.UsedInputAttributes | context.Config.PassthroughAttributes;
+ while (usedAttributes != 0)
+ {
+ int index = BitOperations.TrailingZeroCount(usedAttributes);
+ DeclareInputAttribute(context, info, index);
+ usedAttributes &= ~(1 << index);
+ }
+ }
+ }
+
+ private static void DeclareInputAttributesPerPatch(CodeGenContext context, HashSet<int> attrs)
+ {
+ foreach (int attr in attrs.Order())
+ {
+ DeclareInputAttributePerPatch(context, attr);
+ }
+ }
+
+ private static void DeclareInputAttribute(CodeGenContext context, StructuredProgramInfo info, int attr)
+ {
+ string suffix = IsArrayAttributeGlsl(context.Config.Stage, isOutAttr: false) ? "[]" : string.Empty;
+ string iq = string.Empty;
+
+ if (context.Config.Stage == ShaderStage.Fragment)
+ {
+ iq = context.Config.ImapTypes[attr].GetFirstUsedType() switch
+ {
+ PixelImap.Constant => "flat ",
+ PixelImap.ScreenLinear => "noperspective ",
+ _ => string.Empty
+ };
+ }
+
+ string name = $"{DefaultNames.IAttributePrefix}{attr}";
+
+ if (context.Config.TransformFeedbackEnabled && context.Config.Stage == ShaderStage.Fragment)
+ {
+ int components = context.Config.GetTransformFeedbackOutputComponents(attr, 0);
+
+ if (components > 1)
+ {
+ string type = components switch
+ {
+ 2 => "vec2",
+ 3 => "vec3",
+ 4 => "vec4",
+ _ => "float"
+ };
+
+ context.AppendLine($"layout (location = {attr}) in {type} {name};");
+ }
+
+ for (int c = components > 1 ? components : 0; c < 4; c++)
+ {
+ char swzMask = "xyzw"[c];
+
+ context.AppendLine($"layout (location = {attr}, component = {c}) {iq}in float {name}_{swzMask}{suffix};");
+ }
+ }
+ else
+ {
+ bool passthrough = (context.Config.PassthroughAttributes & (1 << attr)) != 0;
+ string pass = passthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough() ? "passthrough, " : string.Empty;
+ string type;
+
+ if (context.Config.Stage == ShaderStage.Vertex)
+ {
+ type = context.Config.GpuAccessor.QueryAttributeType(attr).ToVec4Type();
+ }
+ else
+ {
+ type = AttributeType.Float.ToVec4Type();
+ }
+
+ context.AppendLine($"layout ({pass}location = {attr}) {iq}in {type} {name}{suffix};");
+ }
+ }
+
+ private static void DeclareInputAttributePerPatch(CodeGenContext context, int attr)
+ {
+ int location = context.Config.GetPerPatchAttributeLocation(attr);
+ string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}";
+
+ context.AppendLine($"layout (location = {location}) patch in vec4 {name};");
+ }
+
+ private static void DeclareOutputAttributes(CodeGenContext context, StructuredProgramInfo info)
+ {
+ if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing))
+ {
+ context.AppendLine($"layout (location = 0) out vec4 {DefaultNames.OAttributePrefix}[{Constants.MaxAttributes}];");
+ }
+ else
+ {
+ int usedAttributes = context.Config.UsedOutputAttributes;
+
+ if (context.Config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryDualSourceBlendEnable())
+ {
+ int firstOutput = BitOperations.TrailingZeroCount(usedAttributes);
+ int mask = 3 << firstOutput;
+
+ if ((usedAttributes & mask) == mask)
+ {
+ usedAttributes &= ~mask;
+ DeclareOutputDualSourceBlendAttribute(context, firstOutput);
+ }
+ }
+
+ while (usedAttributes != 0)
+ {
+ int index = BitOperations.TrailingZeroCount(usedAttributes);
+ DeclareOutputAttribute(context, index);
+ usedAttributes &= ~(1 << index);
+ }
+ }
+ }
+
+ private static void DeclareOutputAttribute(CodeGenContext context, int attr)
+ {
+ string suffix = IsArrayAttributeGlsl(context.Config.Stage, isOutAttr: true) ? "[]" : string.Empty;
+ string name = $"{DefaultNames.OAttributePrefix}{attr}{suffix}";
+
+ if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline)
+ {
+ int components = context.Config.GetTransformFeedbackOutputComponents(attr, 0);
+
+ if (components > 1)
+ {
+ string type = components switch
+ {
+ 2 => "vec2",
+ 3 => "vec3",
+ 4 => "vec4",
+ _ => "float"
+ };
+
+ string xfb = string.Empty;
+
+ var tfOutput = context.Config.GetTransformFeedbackOutput(attr, 0);
+ if (tfOutput.Valid)
+ {
+ xfb = $", xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}";
+ }
+
+ context.AppendLine($"layout (location = {attr}{xfb}) out {type} {name};");
+ }
+
+ for (int c = components > 1 ? components : 0; c < 4; c++)
+ {
+ char swzMask = "xyzw"[c];
+
+ string xfb = string.Empty;
+
+ var tfOutput = context.Config.GetTransformFeedbackOutput(attr, c);
+ if (tfOutput.Valid)
+ {
+ xfb = $", xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}";
+ }
+
+ context.AppendLine($"layout (location = {attr}, component = {c}{xfb}) out float {name}_{swzMask};");
+ }
+ }
+ else
+ {
+ string type = context.Config.Stage != ShaderStage.Fragment ? "vec4" :
+ context.Config.GpuAccessor.QueryFragmentOutputType(attr) switch
+ {
+ AttributeType.Sint => "ivec4",
+ AttributeType.Uint => "uvec4",
+ _ => "vec4"
+ };
+
+ if (context.Config.GpuAccessor.QueryHostReducedPrecision() && context.Config.Stage == ShaderStage.Vertex && attr == 0)
+ {
+ context.AppendLine($"layout (location = {attr}) invariant out {type} {name};");
+ }
+ else
+ {
+ context.AppendLine($"layout (location = {attr}) out {type} {name};");
+ }
+ }
+ }
+
+ private static void DeclareOutputDualSourceBlendAttribute(CodeGenContext context, int attr)
+ {
+ string name = $"{DefaultNames.OAttributePrefix}{attr}";
+ string name2 = $"{DefaultNames.OAttributePrefix}{(attr + 1)}";
+
+ context.AppendLine($"layout (location = {attr}, index = 0) out vec4 {name};");
+ context.AppendLine($"layout (location = {attr}, index = 1) out vec4 {name2};");
+ }
+
+ private static bool IsArrayAttributeGlsl(ShaderStage stage, bool isOutAttr)
+ {
+ if (isOutAttr)
+ {
+ return stage == ShaderStage.TessellationControl;
+ }
+ else
+ {
+ return stage == ShaderStage.TessellationControl ||
+ stage == ShaderStage.TessellationEvaluation ||
+ stage == ShaderStage.Geometry;
+ }
+ }
+
+ private static void DeclareUsedOutputAttributesPerPatch(CodeGenContext context, HashSet<int> attrs)
+ {
+ foreach (int attr in attrs.Order())
+ {
+ DeclareOutputAttributePerPatch(context, attr);
+ }
+ }
+
+ private static void DeclareOutputAttributePerPatch(CodeGenContext context, int attr)
+ {
+ int location = context.Config.GetPerPatchAttributeLocation(attr);
+ string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}";
+
+ context.AppendLine($"layout (location = {location}) patch out vec4 {name};");
+ }
+
+ private static void DeclareSupportUniformBlock(CodeGenContext context, ShaderStage stage, int scaleElements)
+ {
+ bool needsSupportBlock = stage == ShaderStage.Fragment ||
+ (context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable());
+
+ if (!needsSupportBlock && scaleElements == 0)
+ {
+ return;
+ }
+
+ context.AppendLine($"layout (binding = 0, std140) uniform {DefaultNames.SupportBlockName}");
+ context.EnterScope();
+
+ switch (stage)
+ {
+ case ShaderStage.Fragment:
+ case ShaderStage.Vertex:
+ context.AppendLine($"uint {DefaultNames.SupportBlockAlphaTestName};");
+ context.AppendLine($"bool {DefaultNames.SupportBlockIsBgraName}[{SupportBuffer.FragmentIsBgraCount}];");
+ context.AppendLine($"vec4 {DefaultNames.SupportBlockViewportInverse};");
+ context.AppendLine($"int {DefaultNames.SupportBlockFragmentScaleCount};");
+ break;
+ case ShaderStage.Compute:
+ context.AppendLine($"uint s_reserved[{SupportBuffer.ComputeRenderScaleOffset / SupportBuffer.FieldSize}];");
+ break;
+ }
+
+ context.AppendLine($"float {DefaultNames.SupportBlockRenderScaleName}[{SupportBuffer.RenderScaleMaxCount}];");
+
+ context.LeaveScope(";");
+ context.AppendLine();
+ }
+
+ private static void AppendHelperFunction(CodeGenContext context, string filename)
+ {
+ string code = EmbeddedResources.ReadAllText(filename);
+
+ code = code.Replace("\t", CodeGenContext.Tab);
+ code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName);
+ code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix);
+
+ if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
+ {
+ code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubGroupInvocationARB");
+ code = code.Replace("$SUBGROUP_BROADCAST$", "readInvocationARB");
+ }
+ else
+ {
+ code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubgroupInvocationID");
+ code = code.Replace("$SUBGROUP_BROADCAST$", "subgroupBroadcast");
+ }
+
+ context.AppendLine(code);
+ context.AppendLine();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs
new file mode 100644
index 00000000..3ab4814c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs
@@ -0,0 +1,37 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ static class DefaultNames
+ {
+ public const string LocalNamePrefix = "temp";
+
+ public const string SamplerNamePrefix = "tex";
+ public const string ImageNamePrefix = "img";
+
+ public const string PerPatchAttributePrefix = "patch_attr_";
+ public const string IAttributePrefix = "in_attr";
+ public const string OAttributePrefix = "out_attr";
+
+ public const string StorageNamePrefix = "s";
+
+ public const string DataName = "data";
+
+ public const string SupportBlockName = "support_block";
+ public const string SupportBlockAlphaTestName = "s_alpha_test";
+ public const string SupportBlockIsBgraName = "s_is_bgra";
+ public const string SupportBlockViewportInverse = "s_viewport_inverse";
+ public const string SupportBlockFragmentScaleCount = "s_frag_scale_count";
+ public const string SupportBlockRenderScaleName = "s_render_scale";
+
+ public const string BlockSuffix = "block";
+
+ public const string UniformNamePrefix = "c";
+ public const string UniformNameSuffix = "data";
+
+ public const string LocalMemoryName = "local_mem";
+ public const string SharedMemoryName = "shared_mem";
+
+ public const string ArgumentNamePrefix = "a";
+
+ public const string UndefinedName = "undef";
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs
new file mode 100644
index 00000000..751d0350
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs
@@ -0,0 +1,154 @@
+using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.TypeConversion;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ static class GlslGenerator
+ {
+ private const string MainFunctionName = "main";
+
+ public static string Generate(StructuredProgramInfo info, ShaderConfig config)
+ {
+ CodeGenContext context = new CodeGenContext(info, config);
+
+ Declarations.Declare(context, info);
+
+ if (info.Functions.Count != 0)
+ {
+ for (int i = 1; i < info.Functions.Count; i++)
+ {
+ context.AppendLine($"{GetFunctionSignature(context, info.Functions[i])};");
+ }
+
+ context.AppendLine();
+
+ for (int i = 1; i < info.Functions.Count; i++)
+ {
+ PrintFunction(context, info, info.Functions[i]);
+
+ context.AppendLine();
+ }
+ }
+
+ PrintFunction(context, info, info.Functions[0], MainFunctionName);
+
+ return context.GetCode();
+ }
+
+ private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null)
+ {
+ context.CurrentFunction = function;
+
+ context.AppendLine(GetFunctionSignature(context, function, funcName));
+ context.EnterScope();
+
+ Declarations.DeclareLocals(context, function);
+
+ PrintBlock(context, function.MainBlock);
+
+ context.LeaveScope();
+ }
+
+ private static string GetFunctionSignature(CodeGenContext context, StructuredFunction function, string funcName = null)
+ {
+ string[] args = new string[function.InArguments.Length + function.OutArguments.Length];
+
+ for (int i = 0; i < function.InArguments.Length; i++)
+ {
+ args[i] = $"{Declarations.GetVarTypeName(context, function.InArguments[i])} {OperandManager.GetArgumentName(i)}";
+ }
+
+ for (int i = 0; i < function.OutArguments.Length; i++)
+ {
+ int j = i + function.InArguments.Length;
+
+ args[j] = $"out {Declarations.GetVarTypeName(context, function.OutArguments[i])} {OperandManager.GetArgumentName(j)}";
+ }
+
+ return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})";
+ }
+
+ private static void PrintBlock(CodeGenContext context, AstBlock block)
+ {
+ AstBlockVisitor visitor = new AstBlockVisitor(block);
+
+ visitor.BlockEntered += (sender, e) =>
+ {
+ switch (e.Block.Type)
+ {
+ case AstBlockType.DoWhile:
+ context.AppendLine("do");
+ break;
+
+ case AstBlockType.Else:
+ context.AppendLine("else");
+ break;
+
+ case AstBlockType.ElseIf:
+ context.AppendLine($"else if ({GetCondExpr(context, e.Block.Condition)})");
+ break;
+
+ case AstBlockType.If:
+ context.AppendLine($"if ({GetCondExpr(context, e.Block.Condition)})");
+ break;
+
+ default: throw new InvalidOperationException($"Found unexpected block type \"{e.Block.Type}\".");
+ }
+
+ context.EnterScope();
+ };
+
+ visitor.BlockLeft += (sender, e) =>
+ {
+ context.LeaveScope();
+
+ if (e.Block.Type == AstBlockType.DoWhile)
+ {
+ context.AppendLine($"while ({GetCondExpr(context, e.Block.Condition)});");
+ }
+ };
+
+ foreach (IAstNode node in visitor.Visit())
+ {
+ if (node is AstOperation operation)
+ {
+ string expr = InstGen.GetExpression(context, operation);
+
+ if (expr != null)
+ {
+ context.AppendLine(expr + ";");
+ }
+ }
+ else if (node is AstAssignment assignment)
+ {
+ AggregateType dstType = OperandManager.GetNodeDestType(context, assignment.Destination);
+ AggregateType srcType = OperandManager.GetNodeDestType(context, assignment.Source);
+
+ string dest = InstGen.GetExpression(context, assignment.Destination);
+ string src = ReinterpretCast(context, assignment.Source, srcType, dstType);
+
+ context.AppendLine(dest + " = " + src + ";");
+ }
+ else if (node is AstComment comment)
+ {
+ context.AppendLine("// " + comment.Comment);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Found unexpected node type \"{node?.GetType().Name ?? "null"}\".");
+ }
+ }
+ }
+
+ private static string GetCondExpr(CodeGenContext context, IAstNode cond)
+ {
+ AggregateType srcType = OperandManager.GetNodeDestType(context, cond);
+
+ return ReinterpretCast(context, cond, srcType, AggregateType.Bool);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl
new file mode 100644
index 00000000..82b76bcc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl
@@ -0,0 +1,21 @@
+int Helper_AtomicMaxS32(int offset, int value)
+{
+ uint oldValue, newValue;
+ do
+ {
+ oldValue = $SHARED_MEM$[offset];
+ newValue = uint(max(int(oldValue), value));
+ } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
+ return int(oldValue);
+}
+
+int Helper_AtomicMinS32(int offset, int value)
+{
+ uint oldValue, newValue;
+ do
+ {
+ oldValue = $SHARED_MEM$[offset];
+ newValue = uint(min(int(oldValue), value));
+ } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue);
+ return int(oldValue);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl
new file mode 100644
index 00000000..0862a71b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl
@@ -0,0 +1,21 @@
+int Helper_AtomicMaxS32(int index, int offset, int value)
+{
+ uint oldValue, newValue;
+ do
+ {
+ oldValue = $STORAGE_MEM$[index].data[offset];
+ newValue = uint(max(int(oldValue), value));
+ } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
+ return int(oldValue);
+}
+
+int Helper_AtomicMinS32(int index, int offset, int value)
+{
+ uint oldValue, newValue;
+ do
+ {
+ oldValue = $STORAGE_MEM$[index].data[offset];
+ newValue = uint(min(int(oldValue), value));
+ } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue);
+ return int(oldValue);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs
new file mode 100644
index 00000000..54f35b15
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs
@@ -0,0 +1,22 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ static class HelperFunctionNames
+ {
+ public static string AtomicMaxS32 = "Helper_AtomicMaxS32";
+ public static string AtomicMinS32 = "Helper_AtomicMinS32";
+
+ public static string MultiplyHighS32 = "Helper_MultiplyHighS32";
+ public static string MultiplyHighU32 = "Helper_MultiplyHighU32";
+
+ public static string Shuffle = "Helper_Shuffle";
+ public static string ShuffleDown = "Helper_ShuffleDown";
+ public static string ShuffleUp = "Helper_ShuffleUp";
+ public static string ShuffleXor = "Helper_ShuffleXor";
+ public static string SwizzleAdd = "Helper_SwizzleAdd";
+
+ public static string StoreShared16 = "Helper_StoreShared16";
+ public static string StoreShared8 = "Helper_StoreShared8";
+ public static string StoreStorage16 = "Helper_StoreStorage16";
+ public static string StoreStorage8 = "Helper_StoreStorage8";
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl
new file mode 100644
index 00000000..caad6f56
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl
@@ -0,0 +1,7 @@
+int Helper_MultiplyHighS32(int x, int y)
+{
+ int msb;
+ int lsb;
+ imulExtended(x, y, msb, lsb);
+ return msb;
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl
new file mode 100644
index 00000000..617a925f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl
@@ -0,0 +1,7 @@
+uint Helper_MultiplyHighU32(uint x, uint y)
+{
+ uint msb;
+ uint lsb;
+ umulExtended(x, y, msb, lsb);
+ return msb;
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl
new file mode 100644
index 00000000..7cb4764d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl
@@ -0,0 +1,11 @@
+float Helper_Shuffle(float x, uint index, uint mask, out bool valid)
+{
+ uint clamp = mask & 0x1fu;
+ uint segMask = (mask >> 8) & 0x1fu;
+ uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
+ uint maxThreadId = minThreadId | (clamp & ~segMask);
+ uint srcThreadId = (index & ~segMask) | minThreadId;
+ valid = srcThreadId <= maxThreadId;
+ float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
+ return valid ? v : x;
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl
new file mode 100644
index 00000000..71d901d5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl
@@ -0,0 +1,11 @@
+float Helper_ShuffleDown(float x, uint index, uint mask, out bool valid)
+{
+ uint clamp = mask & 0x1fu;
+ uint segMask = (mask >> 8) & 0x1fu;
+ uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
+ uint maxThreadId = minThreadId | (clamp & ~segMask);
+ uint srcThreadId = $SUBGROUP_INVOCATION$ + index;
+ valid = srcThreadId <= maxThreadId;
+ float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
+ return valid ? v : x;
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl
new file mode 100644
index 00000000..ae264d87
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl
@@ -0,0 +1,9 @@
+float Helper_ShuffleUp(float x, uint index, uint mask, out bool valid)
+{
+ uint segMask = (mask >> 8) & 0x1fu;
+ uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
+ uint srcThreadId = $SUBGROUP_INVOCATION$ - index;
+ valid = int(srcThreadId) >= int(minThreadId);
+ float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
+ return valid ? v : x;
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl
new file mode 100644
index 00000000..789089d6
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl
@@ -0,0 +1,11 @@
+float Helper_ShuffleXor(float x, uint index, uint mask, out bool valid)
+{
+ uint clamp = mask & 0x1fu;
+ uint segMask = (mask >> 8) & 0x1fu;
+ uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
+ uint maxThreadId = minThreadId | (clamp & ~segMask);
+ uint srcThreadId = $SUBGROUP_INVOCATION$ ^ index;
+ valid = srcThreadId <= maxThreadId;
+ float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
+ return valid ? v : x;
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl
new file mode 100644
index 00000000..2f57b5ff
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl
@@ -0,0 +1,23 @@
+void Helper_StoreShared16(int offset, uint value)
+{
+ int wordOffset = offset >> 2;
+ int bitOffset = (offset & 3) * 8;
+ uint oldValue, newValue;
+ do
+ {
+ oldValue = $SHARED_MEM$[wordOffset];
+ newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
+ } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
+}
+
+void Helper_StoreShared8(int offset, uint value)
+{
+ int wordOffset = offset >> 2;
+ int bitOffset = (offset & 3) * 8;
+ uint oldValue, newValue;
+ do
+ {
+ oldValue = $SHARED_MEM$[wordOffset];
+ newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
+ } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl
new file mode 100644
index 00000000..f2253a79
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl
@@ -0,0 +1,23 @@
+void Helper_StoreStorage16(int index, int offset, uint value)
+{
+ int wordOffset = offset >> 2;
+ int bitOffset = (offset & 3) * 8;
+ uint oldValue, newValue;
+ do
+ {
+ oldValue = $STORAGE_MEM$[index].data[wordOffset];
+ newValue = bitfieldInsert(oldValue, value, bitOffset, 16);
+ } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
+}
+
+void Helper_StoreStorage8(int index, int offset, uint value)
+{
+ int wordOffset = offset >> 2;
+ int bitOffset = (offset & 3) * 8;
+ uint oldValue, newValue;
+ do
+ {
+ oldValue = $STORAGE_MEM$[index].data[wordOffset];
+ newValue = bitfieldInsert(oldValue, value, bitOffset, 8);
+ } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl
new file mode 100644
index 00000000..057cb6ca
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl
@@ -0,0 +1,7 @@
+float Helper_SwizzleAdd(float x, float y, int mask)
+{
+ vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0);
+ vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0);
+ int lutIdx = (mask >> (int($SUBGROUP_INVOCATION$ & 3u) * 2)) & 3;
+ return x * xLut[lutIdx] + y * yLut[lutIdx];
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl
new file mode 100644
index 00000000..4ebade5e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl
@@ -0,0 +1,19 @@
+ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
+{
+ float scale = s_render_scale[samplerIndex];
+ if (scale == 1.0)
+ {
+ return inputVec;
+ }
+ return ivec2(vec2(inputVec) * scale);
+}
+
+int Helper_TextureSizeUnscale(int size, int samplerIndex)
+{
+ float scale = s_render_scale[samplerIndex];
+ if (scale == 1.0)
+ {
+ return size;
+ }
+ return int(float(size) / scale);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl
new file mode 100644
index 00000000..6c670f91
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl
@@ -0,0 +1,26 @@
+ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
+{
+ float scale = s_render_scale[1 + samplerIndex];
+ if (scale == 1.0)
+ {
+ return inputVec;
+ }
+ if (scale < 0.0) // If less than 0, try interpolate between texels by using the screen position.
+ {
+ return ivec2(vec2(inputVec) * (-scale) + mod(gl_FragCoord.xy, 0.0 - scale));
+ }
+ else
+ {
+ return ivec2(vec2(inputVec) * scale);
+ }
+}
+
+int Helper_TextureSizeUnscale(int size, int samplerIndex)
+{
+ float scale = abs(s_render_scale[1 + samplerIndex]);
+ if (scale == 1.0)
+ {
+ return size;
+ }
+ return int(float(size) / scale);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl
new file mode 100644
index 00000000..19eb119d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl
@@ -0,0 +1,20 @@
+ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex)
+{
+ float scale = abs(s_render_scale[1 + samplerIndex + s_frag_scale_count]);
+ if (scale == 1.0)
+ {
+ return inputVec;
+ }
+
+ return ivec2(vec2(inputVec) * scale);
+}
+
+int Helper_TextureSizeUnscale(int size, int samplerIndex)
+{
+ float scale = abs(s_render_scale[1 + samplerIndex + s_frag_scale_count]);
+ if (scale == 1.0)
+ {
+ return size;
+ }
+ return int(float(size) / scale);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
new file mode 100644
index 00000000..01bd11e5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs
@@ -0,0 +1,238 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenBallot;
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenCall;
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenFSI;
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenMemory;
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenPacking;
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenVector;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class InstGen
+ {
+ public static string GetExpression(CodeGenContext context, IAstNode node)
+ {
+ if (node is AstOperation operation)
+ {
+ return GetExpression(context, operation);
+ }
+ else if (node is AstOperand operand)
+ {
+ return context.OperandManager.GetExpression(context, operand);
+ }
+
+ throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\".");
+ }
+
+ public static string Negate(CodeGenContext context, AstOperation operation, InstInfo info)
+ {
+ IAstNode src = operation.GetSource(0);
+
+ AggregateType type = GetSrcVarType(operation.Inst, 0);
+
+ string srcExpr = GetSoureExpr(context, src, type);
+ string zero;
+
+ if (type == AggregateType.FP64)
+ {
+ zero = "0.0";
+ }
+ else
+ {
+ NumberFormatter.TryFormat(0, type, out zero);
+ }
+
+ // Starting in the 496.13 NVIDIA driver, there's an issue with assigning variables to negated expressions.
+ // (-expr) does not work, but (0.0 - expr) does. This should be removed once the issue is resolved.
+
+ return $"{zero} - {Enclose(srcExpr, src, operation.Inst, info, false)}";
+ }
+
+ private static string GetExpression(CodeGenContext context, AstOperation operation)
+ {
+ Instruction inst = operation.Inst;
+
+ InstInfo info = GetInstructionInfo(inst);
+
+ if ((info.Type & InstType.Call) != 0)
+ {
+ bool atomic = (info.Type & InstType.Atomic) != 0;
+
+ int arity = (int)(info.Type & InstType.ArityMask);
+
+ string args = string.Empty;
+
+ for (int argIndex = 0; argIndex < arity; argIndex++)
+ {
+ // For shared memory access, the second argument is unused and should be ignored.
+ // It is there to make both storage and shared access have the same number of arguments.
+ // For storage, both inputs are consumed when the argument index is 0, so we should skip it here.
+ if (argIndex == 1 && (atomic || operation.StorageKind == StorageKind.SharedMemory))
+ {
+ continue;
+ }
+
+ if (argIndex != 0)
+ {
+ args += ", ";
+ }
+
+ if (argIndex == 0 && atomic)
+ {
+ switch (operation.StorageKind)
+ {
+ case StorageKind.SharedMemory: args += LoadShared(context, operation); break;
+ case StorageKind.StorageBuffer: args += LoadStorage(context, operation); break;
+
+ default: throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\".");
+ }
+ }
+ else
+ {
+ AggregateType dstType = GetSrcVarType(inst, argIndex);
+
+ args += GetSoureExpr(context, operation.GetSource(argIndex), dstType);
+ }
+ }
+
+ return info.OpName + '(' + args + ')';
+ }
+ else if ((info.Type & InstType.Op) != 0)
+ {
+ string op = info.OpName;
+
+ // Return may optionally have a return value (and in this case it is unary).
+ if (inst == Instruction.Return && operation.SourcesCount != 0)
+ {
+ return $"{op} {GetSoureExpr(context, operation.GetSource(0), context.CurrentFunction.ReturnType)}";
+ }
+
+ int arity = (int)(info.Type & InstType.ArityMask);
+
+ string[] expr = new string[arity];
+
+ for (int index = 0; index < arity; index++)
+ {
+ IAstNode src = operation.GetSource(index);
+
+ string srcExpr = GetSoureExpr(context, src, GetSrcVarType(inst, index));
+
+ bool isLhs = arity == 2 && index == 0;
+
+ expr[index] = Enclose(srcExpr, src, inst, info, isLhs);
+ }
+
+ switch (arity)
+ {
+ case 0:
+ return op;
+
+ case 1:
+ return op + expr[0];
+
+ case 2:
+ return $"{expr[0]} {op} {expr[1]}";
+
+ case 3:
+ return $"{expr[0]} {op[0]} {expr[1]} {op[1]} {expr[2]}";
+ }
+ }
+ else if ((info.Type & InstType.Special) != 0)
+ {
+ switch (inst & Instruction.Mask)
+ {
+ case Instruction.Ballot:
+ return Ballot(context, operation);
+
+ case Instruction.Call:
+ return Call(context, operation);
+
+ case Instruction.FSIBegin:
+ return FSIBegin(context);
+
+ case Instruction.FSIEnd:
+ return FSIEnd(context);
+
+ case Instruction.ImageLoad:
+ case Instruction.ImageStore:
+ case Instruction.ImageAtomic:
+ return ImageLoadOrStore(context, operation);
+
+ case Instruction.Load:
+ return Load(context, operation);
+
+ case Instruction.LoadConstant:
+ return LoadConstant(context, operation);
+
+ case Instruction.LoadLocal:
+ return LoadLocal(context, operation);
+
+ case Instruction.LoadShared:
+ return LoadShared(context, operation);
+
+ case Instruction.LoadStorage:
+ return LoadStorage(context, operation);
+
+ case Instruction.Lod:
+ return Lod(context, operation);
+
+ case Instruction.Negate:
+ return Negate(context, operation, info);
+
+ case Instruction.PackDouble2x32:
+ return PackDouble2x32(context, operation);
+
+ case Instruction.PackHalf2x16:
+ return PackHalf2x16(context, operation);
+
+ case Instruction.Store:
+ return Store(context, operation);
+
+ case Instruction.StoreLocal:
+ return StoreLocal(context, operation);
+
+ case Instruction.StoreShared:
+ return StoreShared(context, operation);
+
+ case Instruction.StoreShared16:
+ return StoreShared16(context, operation);
+
+ case Instruction.StoreShared8:
+ return StoreShared8(context, operation);
+
+ case Instruction.StoreStorage:
+ return StoreStorage(context, operation);
+
+ case Instruction.StoreStorage16:
+ return StoreStorage16(context, operation);
+
+ case Instruction.StoreStorage8:
+ return StoreStorage8(context, operation);
+
+ case Instruction.TextureSample:
+ return TextureSample(context, operation);
+
+ case Instruction.TextureSize:
+ return TextureSize(context, operation);
+
+ case Instruction.UnpackDouble2x32:
+ return UnpackDouble2x32(context, operation);
+
+ case Instruction.UnpackHalf2x16:
+ return UnpackHalf2x16(context, operation);
+
+ case Instruction.VectorExtract:
+ return VectorExtract(context, operation);
+ }
+ }
+
+ throw new InvalidOperationException($"Unexpected instruction type \"{info.Type}\".");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs
new file mode 100644
index 00000000..68793c5d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs
@@ -0,0 +1,27 @@
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class InstGenBallot
+ {
+ public static string Ballot(CodeGenContext context, AstOperation operation)
+ {
+ AggregateType dstType = GetSrcVarType(operation.Inst, 0);
+
+ string arg = GetSoureExpr(context, operation.GetSource(0), dstType);
+
+ if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
+ {
+ return $"unpackUint2x32(ballotARB({arg})).x";
+ }
+ else
+ {
+ return $"subgroupBallot({arg}).x";
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs
new file mode 100644
index 00000000..2df6960d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs
@@ -0,0 +1,29 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using System.Diagnostics;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class InstGenCall
+ {
+ public static string Call(CodeGenContext context, AstOperation operation)
+ {
+ AstOperand funcId = (AstOperand)operation.GetSource(0);
+
+ Debug.Assert(funcId.Type == OperandType.Constant);
+
+ var function = context.GetFunction(funcId.Value);
+
+ string[] args = new string[operation.SourcesCount - 1];
+
+ for (int i = 0; i < args.Length; i++)
+ {
+ args[i] = GetSoureExpr(context, operation.GetSource(i + 1), function.GetArgumentType(i));
+ }
+
+ return $"{function.Name}({string.Join(", ", args)})";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs
new file mode 100644
index 00000000..f61a53cb
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs
@@ -0,0 +1,29 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class InstGenFSI
+ {
+ public static string FSIBegin(CodeGenContext context)
+ {
+ if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
+ {
+ return "beginInvocationInterlockARB()";
+ }
+ else if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel())
+ {
+ return "beginFragmentShaderOrderingINTEL()";
+ }
+
+ return null;
+ }
+
+ public static string FSIEnd(CodeGenContext context)
+ {
+ if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
+ {
+ return "endInvocationInterlockARB()";
+ }
+
+ return null;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
new file mode 100644
index 00000000..00478f6a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
@@ -0,0 +1,231 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.TypeConversion;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class InstGenHelper
+ {
+ private static readonly InstInfo[] _infoTable;
+
+ static InstGenHelper()
+ {
+ _infoTable = new InstInfo[(int)Instruction.Count];
+
+ Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd");
+ Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd");
+ Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap");
+ Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32);
+ Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax");
+ Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32);
+ Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin");
+ Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr");
+ Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange");
+ Add(Instruction.AtomicXor, InstType.AtomicBinary, "atomicXor");
+ Add(Instruction.Absolute, InstType.CallUnary, "abs");
+ Add(Instruction.Add, InstType.OpBinaryCom, "+", 2);
+ Add(Instruction.Ballot, InstType.Special);
+ Add(Instruction.Barrier, InstType.CallNullary, "barrier");
+ Add(Instruction.BitCount, InstType.CallUnary, "bitCount");
+ Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract");
+ Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract");
+ Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert");
+ Add(Instruction.BitfieldReverse, InstType.CallUnary, "bitfieldReverse");
+ Add(Instruction.BitwiseAnd, InstType.OpBinaryCom, "&", 6);
+ Add(Instruction.BitwiseExclusiveOr, InstType.OpBinaryCom, "^", 7);
+ Add(Instruction.BitwiseNot, InstType.OpUnary, "~", 0);
+ Add(Instruction.BitwiseOr, InstType.OpBinaryCom, "|", 8);
+ Add(Instruction.Call, InstType.Special);
+ Add(Instruction.Ceiling, InstType.CallUnary, "ceil");
+ Add(Instruction.Clamp, InstType.CallTernary, "clamp");
+ Add(Instruction.ClampU32, InstType.CallTernary, "clamp");
+ Add(Instruction.CompareEqual, InstType.OpBinaryCom, "==", 5);
+ Add(Instruction.CompareGreater, InstType.OpBinary, ">", 4);
+ Add(Instruction.CompareGreaterOrEqual, InstType.OpBinary, ">=", 4);
+ Add(Instruction.CompareGreaterOrEqualU32, InstType.OpBinary, ">=", 4);
+ Add(Instruction.CompareGreaterU32, InstType.OpBinary, ">", 4);
+ Add(Instruction.CompareLess, InstType.OpBinary, "<", 4);
+ Add(Instruction.CompareLessOrEqual, InstType.OpBinary, "<=", 4);
+ Add(Instruction.CompareLessOrEqualU32, InstType.OpBinary, "<=", 4);
+ Add(Instruction.CompareLessU32, InstType.OpBinary, "<", 4);
+ Add(Instruction.CompareNotEqual, InstType.OpBinaryCom, "!=", 5);
+ Add(Instruction.ConditionalSelect, InstType.OpTernary, "?:", 12);
+ Add(Instruction.ConvertFP32ToFP64, InstType.CallUnary, "double");
+ Add(Instruction.ConvertFP64ToFP32, InstType.CallUnary, "float");
+ Add(Instruction.ConvertFP32ToS32, InstType.CallUnary, "int");
+ Add(Instruction.ConvertFP32ToU32, InstType.CallUnary, "uint");
+ Add(Instruction.ConvertFP64ToS32, InstType.CallUnary, "int");
+ Add(Instruction.ConvertFP64ToU32, InstType.CallUnary, "uint");
+ Add(Instruction.ConvertS32ToFP32, InstType.CallUnary, "float");
+ Add(Instruction.ConvertS32ToFP64, InstType.CallUnary, "double");
+ Add(Instruction.ConvertU32ToFP32, InstType.CallUnary, "float");
+ Add(Instruction.ConvertU32ToFP64, InstType.CallUnary, "double");
+ Add(Instruction.Cosine, InstType.CallUnary, "cos");
+ Add(Instruction.Ddx, InstType.CallUnary, "dFdx");
+ Add(Instruction.Ddy, InstType.CallUnary, "dFdy");
+ Add(Instruction.Discard, InstType.OpNullary, "discard");
+ Add(Instruction.Divide, InstType.OpBinary, "/", 1);
+ Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex");
+ Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive");
+ Add(Instruction.ExponentB2, InstType.CallUnary, "exp2");
+ Add(Instruction.FSIBegin, InstType.Special);
+ Add(Instruction.FSIEnd, InstType.Special);
+ Add(Instruction.FindLSB, InstType.CallUnary, "findLSB");
+ Add(Instruction.FindMSBS32, InstType.CallUnary, "findMSB");
+ Add(Instruction.FindMSBU32, InstType.CallUnary, "findMSB");
+ Add(Instruction.Floor, InstType.CallUnary, "floor");
+ Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma");
+ Add(Instruction.GroupMemoryBarrier, InstType.CallNullary, "groupMemoryBarrier");
+ Add(Instruction.ImageLoad, InstType.Special);
+ Add(Instruction.ImageStore, InstType.Special);
+ Add(Instruction.ImageAtomic, InstType.Special);
+ Add(Instruction.IsNan, InstType.CallUnary, "isnan");
+ Add(Instruction.Load, InstType.Special);
+ Add(Instruction.LoadConstant, InstType.Special);
+ Add(Instruction.LoadLocal, InstType.Special);
+ Add(Instruction.LoadShared, InstType.Special);
+ Add(Instruction.LoadStorage, InstType.Special);
+ Add(Instruction.Lod, InstType.Special);
+ Add(Instruction.LogarithmB2, InstType.CallUnary, "log2");
+ Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9);
+ Add(Instruction.LogicalExclusiveOr, InstType.OpBinaryCom, "^^", 10);
+ Add(Instruction.LogicalNot, InstType.OpUnary, "!", 0);
+ Add(Instruction.LogicalOr, InstType.OpBinaryCom, "||", 11);
+ Add(Instruction.LoopBreak, InstType.OpNullary, "break");
+ Add(Instruction.LoopContinue, InstType.OpNullary, "continue");
+ Add(Instruction.PackDouble2x32, InstType.Special);
+ Add(Instruction.PackHalf2x16, InstType.Special);
+ Add(Instruction.Maximum, InstType.CallBinary, "max");
+ Add(Instruction.MaximumU32, InstType.CallBinary, "max");
+ Add(Instruction.MemoryBarrier, InstType.CallNullary, "memoryBarrier");
+ Add(Instruction.Minimum, InstType.CallBinary, "min");
+ Add(Instruction.MinimumU32, InstType.CallBinary, "min");
+ Add(Instruction.Multiply, InstType.OpBinaryCom, "*", 1);
+ Add(Instruction.MultiplyHighS32, InstType.CallBinary, HelperFunctionNames.MultiplyHighS32);
+ Add(Instruction.MultiplyHighU32, InstType.CallBinary, HelperFunctionNames.MultiplyHighU32);
+ Add(Instruction.Negate, InstType.Special);
+ Add(Instruction.ReciprocalSquareRoot, InstType.CallUnary, "inversesqrt");
+ Add(Instruction.Return, InstType.OpNullary, "return");
+ Add(Instruction.Round, InstType.CallUnary, "roundEven");
+ Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3);
+ Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3);
+ Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3);
+ Add(Instruction.Shuffle, InstType.CallQuaternary, HelperFunctionNames.Shuffle);
+ Add(Instruction.ShuffleDown, InstType.CallQuaternary, HelperFunctionNames.ShuffleDown);
+ Add(Instruction.ShuffleUp, InstType.CallQuaternary, HelperFunctionNames.ShuffleUp);
+ Add(Instruction.ShuffleXor, InstType.CallQuaternary, HelperFunctionNames.ShuffleXor);
+ Add(Instruction.Sine, InstType.CallUnary, "sin");
+ Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt");
+ Add(Instruction.Store, InstType.Special);
+ Add(Instruction.StoreLocal, InstType.Special);
+ Add(Instruction.StoreShared, InstType.Special);
+ Add(Instruction.StoreShared16, InstType.Special);
+ Add(Instruction.StoreShared8, InstType.Special);
+ Add(Instruction.StoreStorage, InstType.Special);
+ Add(Instruction.StoreStorage16, InstType.Special);
+ Add(Instruction.StoreStorage8, InstType.Special);
+ Add(Instruction.Subtract, InstType.OpBinary, "-", 2);
+ Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd);
+ Add(Instruction.TextureSample, InstType.Special);
+ Add(Instruction.TextureSize, InstType.Special);
+ Add(Instruction.Truncate, InstType.CallUnary, "trunc");
+ Add(Instruction.UnpackDouble2x32, InstType.Special);
+ Add(Instruction.UnpackHalf2x16, InstType.Special);
+ Add(Instruction.VectorExtract, InstType.Special);
+ Add(Instruction.VoteAll, InstType.CallUnary, "allInvocationsARB");
+ Add(Instruction.VoteAllEqual, InstType.CallUnary, "allInvocationsEqualARB");
+ Add(Instruction.VoteAny, InstType.CallUnary, "anyInvocationARB");
+ }
+
+ private static void Add(Instruction inst, InstType flags, string opName = null, int precedence = 0)
+ {
+ _infoTable[(int)inst] = new InstInfo(flags, opName, precedence);
+ }
+
+ public static InstInfo GetInstructionInfo(Instruction inst)
+ {
+ return _infoTable[(int)(inst & Instruction.Mask)];
+ }
+
+ public static string GetSoureExpr(CodeGenContext context, IAstNode node, AggregateType dstType)
+ {
+ return ReinterpretCast(context, node, OperandManager.GetNodeDestType(context, node), dstType);
+ }
+
+ public static string Enclose(string expr, IAstNode node, Instruction pInst, bool isLhs)
+ {
+ InstInfo pInfo = GetInstructionInfo(pInst);
+
+ return Enclose(expr, node, pInst, pInfo, isLhs);
+ }
+
+ public static string Enclose(string expr, IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs = false)
+ {
+ if (NeedsParenthesis(node, pInst, pInfo, isLhs))
+ {
+ expr = "(" + expr + ")";
+ }
+
+ return expr;
+ }
+
+ public static bool NeedsParenthesis(IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs)
+ {
+ // If the node isn't a operation, then it can only be a operand,
+ // and those never needs to be surrounded in parenthesis.
+ if (!(node is AstOperation operation))
+ {
+ // This is sort of a special case, if this is a negative constant,
+ // and it is consumed by a unary operation, we need to put on the parenthesis,
+ // as in GLSL a sequence like --2 or ~-1 is not valid.
+ if (IsNegativeConst(node) && pInfo.Type == InstType.OpUnary)
+ {
+ return true;
+ }
+
+ return false;
+ }
+
+ if ((pInfo.Type & (InstType.Call | InstType.Special)) != 0)
+ {
+ return false;
+ }
+
+ InstInfo info = _infoTable[(int)(operation.Inst & Instruction.Mask)];
+
+ if ((info.Type & (InstType.Call | InstType.Special)) != 0)
+ {
+ return false;
+ }
+
+ if (info.Precedence < pInfo.Precedence)
+ {
+ return false;
+ }
+
+ if (info.Precedence == pInfo.Precedence && isLhs)
+ {
+ return false;
+ }
+
+ if (pInst == operation.Inst && info.Type == InstType.OpBinaryCom)
+ {
+ return false;
+ }
+
+ return true;
+ }
+
+ private static bool IsNegativeConst(IAstNode node)
+ {
+ if (!(node is AstOperand operand))
+ {
+ return false;
+ }
+
+ return operand.Type == OperandType.Constant && operand.Value < 0;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
new file mode 100644
index 00000000..99519837
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
@@ -0,0 +1,939 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Text;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class InstGenMemory
+ {
+ public static string ImageLoadOrStore(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ // TODO: Bindless texture support. For now we just return 0/do nothing.
+ if (isBindless)
+ {
+ switch (texOp.Inst)
+ {
+ case Instruction.ImageStore:
+ return "// imageStore(bindless)";
+ case Instruction.ImageLoad:
+ AggregateType componentType = texOp.Format.GetComponentType();
+
+ NumberFormatter.TryFormat(0, componentType, out string imageConst);
+
+ AggregateType outputType = texOp.GetVectorType(componentType);
+
+ if ((outputType & AggregateType.ElementCountMask) != 0)
+ {
+ return $"{Declarations.GetVarTypeName(context, outputType, precise: false)}({imageConst})";
+ }
+
+ return imageConst;
+ default:
+ return NumberFormatter.FormatInt(0);
+ }
+ }
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ var texCallBuilder = new StringBuilder();
+
+ if (texOp.Inst == Instruction.ImageAtomic)
+ {
+ texCallBuilder.Append((texOp.Flags & TextureFlags.AtomicMask) switch {
+ TextureFlags.Add => "imageAtomicAdd",
+ TextureFlags.Minimum => "imageAtomicMin",
+ TextureFlags.Maximum => "imageAtomicMax",
+ TextureFlags.Increment => "imageAtomicAdd", // TODO: Clamp value.
+ TextureFlags.Decrement => "imageAtomicAdd", // TODO: Clamp value.
+ TextureFlags.BitwiseAnd => "imageAtomicAnd",
+ TextureFlags.BitwiseOr => "imageAtomicOr",
+ TextureFlags.BitwiseXor => "imageAtomicXor",
+ TextureFlags.Swap => "imageAtomicExchange",
+ TextureFlags.CAS => "imageAtomicCompSwap",
+ _ => "imageAtomicAdd",
+ });
+ }
+ else
+ {
+ texCallBuilder.Append(texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore");
+ }
+
+ int srcIndex = isBindless ? 1 : 0;
+
+ string Src(AggregateType type)
+ {
+ return GetSoureExpr(context, texOp.GetSource(srcIndex++), type);
+ }
+
+ string indexExpr = null;
+
+ if (isIndexed)
+ {
+ indexExpr = Src(AggregateType.S32);
+ }
+
+ string imageName = OperandManager.GetImageName(context.Config.Stage, texOp, indexExpr);
+
+ texCallBuilder.Append('(');
+ texCallBuilder.Append(imageName);
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int pCount = coordsCount + (isArray ? 1 : 0);
+
+ void Append(string str)
+ {
+ texCallBuilder.Append(", ");
+ texCallBuilder.Append(str);
+ }
+
+ string ApplyScaling(string vector)
+ {
+ if (context.Config.Stage.SupportsRenderScale() &&
+ texOp.Inst == Instruction.ImageLoad &&
+ !isBindless &&
+ !isIndexed)
+ {
+ // Image scales start after texture ones.
+ int scaleIndex = context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp);
+
+ if (pCount == 3 && isArray)
+ {
+ // The array index is not scaled, just x and y.
+ vector = $"ivec3(Helper_TexelFetchScale(({vector}).xy, {scaleIndex}), ({vector}).z)";
+ }
+ else if (pCount == 2 && !isArray)
+ {
+ vector = $"Helper_TexelFetchScale({vector}, {scaleIndex})";
+ }
+ }
+
+ return vector;
+ }
+
+ if (pCount > 1)
+ {
+ string[] elems = new string[pCount];
+
+ for (int index = 0; index < pCount; index++)
+ {
+ elems[index] = Src(AggregateType.S32);
+ }
+
+ Append(ApplyScaling($"ivec{pCount}({string.Join(", ", elems)})"));
+ }
+ else
+ {
+ Append(Src(AggregateType.S32));
+ }
+
+ if (texOp.Inst == Instruction.ImageStore)
+ {
+ AggregateType type = texOp.Format.GetComponentType();
+
+ string[] cElems = new string[4];
+
+ for (int index = 0; index < 4; index++)
+ {
+ if (srcIndex < texOp.SourcesCount)
+ {
+ cElems[index] = Src(type);
+ }
+ else
+ {
+ cElems[index] = type switch
+ {
+ AggregateType.S32 => NumberFormatter.FormatInt(0),
+ AggregateType.U32 => NumberFormatter.FormatUint(0),
+ _ => NumberFormatter.FormatFloat(0)
+ };
+ }
+ }
+
+ string prefix = type switch
+ {
+ AggregateType.S32 => "i",
+ AggregateType.U32 => "u",
+ _ => string.Empty
+ };
+
+ Append($"{prefix}vec4({string.Join(", ", cElems)})");
+ }
+
+ if (texOp.Inst == Instruction.ImageAtomic)
+ {
+ AggregateType type = texOp.Format.GetComponentType();
+
+ if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS)
+ {
+ Append(Src(type)); // Compare value.
+ }
+
+ string value = (texOp.Flags & TextureFlags.AtomicMask) switch
+ {
+ TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value
+ TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value
+ _ => Src(type)
+ };
+
+ Append(value);
+
+ texCallBuilder.Append(')');
+
+ if (type != AggregateType.S32)
+ {
+ texCallBuilder
+ .Insert(0, "int(")
+ .Append(')');
+ }
+ }
+ else
+ {
+ texCallBuilder.Append(')');
+
+ if (texOp.Inst == Instruction.ImageLoad)
+ {
+ texCallBuilder.Append(GetMaskMultiDest(texOp.Index));
+ }
+ }
+
+ return texCallBuilder.ToString();
+ }
+
+ public static string Load(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateLoadOrStore(context, operation, isStore: false);
+ }
+
+ public static string LoadConstant(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src1 = operation.GetSource(0);
+ IAstNode src2 = operation.GetSource(1);
+
+ string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
+ offsetExpr = Enclose(offsetExpr, src2, Instruction.ShiftRightS32, isLhs: true);
+
+ var config = context.Config;
+ bool indexElement = !config.GpuAccessor.QueryHostHasVectorIndexingBug();
+
+ if (src1 is AstOperand operand && operand.Type == OperandType.Constant)
+ {
+ bool cbIndexable = config.UsedFeatures.HasFlag(Translation.FeatureFlags.CbIndexing);
+ return OperandManager.GetConstantBufferName(operand.Value, offsetExpr, config.Stage, cbIndexable, indexElement);
+ }
+ else
+ {
+ string slotExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+ return OperandManager.GetConstantBufferName(slotExpr, offsetExpr, config.Stage, indexElement);
+ }
+ }
+
+ public static string LoadLocal(CodeGenContext context, AstOperation operation)
+ {
+ return LoadLocalOrShared(context, operation, DefaultNames.LocalMemoryName);
+ }
+
+ public static string LoadShared(CodeGenContext context, AstOperation operation)
+ {
+ return LoadLocalOrShared(context, operation, DefaultNames.SharedMemoryName);
+ }
+
+ private static string LoadLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName)
+ {
+ IAstNode src1 = operation.GetSource(0);
+
+ string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+
+ return $"{arrayName}[{offsetExpr}]";
+ }
+
+ public static string LoadStorage(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src1 = operation.GetSource(0);
+ IAstNode src2 = operation.GetSource(1);
+
+ string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+ string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
+
+ return GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
+ }
+
+ public static string Lod(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ // TODO: Bindless texture support. For now we just return 0.
+ if (isBindless)
+ {
+ return NumberFormatter.FormatFloat(0);
+ }
+
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ string indexExpr = null;
+
+ if (isIndexed)
+ {
+ indexExpr = GetSoureExpr(context, texOp.GetSource(0), AggregateType.S32);
+ }
+
+ string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr);
+
+ int coordsIndex = isBindless || isIndexed ? 1 : 0;
+
+ string coordsExpr;
+
+ if (coordsCount > 1)
+ {
+ string[] elems = new string[coordsCount];
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ elems[index] = GetSoureExpr(context, texOp.GetSource(coordsIndex + index), AggregateType.FP32);
+ }
+
+ coordsExpr = "vec" + coordsCount + "(" + string.Join(", ", elems) + ")";
+ }
+ else
+ {
+ coordsExpr = GetSoureExpr(context, texOp.GetSource(coordsIndex), AggregateType.FP32);
+ }
+
+ return $"textureQueryLod({samplerName}, {coordsExpr}){GetMask(texOp.Index)}";
+ }
+
+ public static string Store(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateLoadOrStore(context, operation, isStore: true);
+ }
+
+ public static string StoreLocal(CodeGenContext context, AstOperation operation)
+ {
+ return StoreLocalOrShared(context, operation, DefaultNames.LocalMemoryName);
+ }
+
+ public static string StoreShared(CodeGenContext context, AstOperation operation)
+ {
+ return StoreLocalOrShared(context, operation, DefaultNames.SharedMemoryName);
+ }
+
+ private static string StoreLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName)
+ {
+ IAstNode src1 = operation.GetSource(0);
+ IAstNode src2 = operation.GetSource(1);
+
+ string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+
+ AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
+
+ string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
+
+ return $"{arrayName}[{offsetExpr}] = {src}";
+ }
+
+ public static string StoreShared16(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src1 = operation.GetSource(0);
+ IAstNode src2 = operation.GetSource(1);
+
+ string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+
+ AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
+
+ string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
+
+ return $"{HelperFunctionNames.StoreShared16}({offsetExpr}, {src})";
+ }
+
+ public static string StoreShared8(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src1 = operation.GetSource(0);
+ IAstNode src2 = operation.GetSource(1);
+
+ string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+
+ AggregateType srcType = OperandManager.GetNodeDestType(context, src2);
+
+ string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32);
+
+ return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})";
+ }
+
+ public static string StoreStorage(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src1 = operation.GetSource(0);
+ IAstNode src2 = operation.GetSource(1);
+ IAstNode src3 = operation.GetSource(2);
+
+ string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+ string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
+
+ AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
+
+ string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
+
+ string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
+
+ return $"{sb} = {src}";
+ }
+
+ public static string StoreStorage16(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src1 = operation.GetSource(0);
+ IAstNode src2 = operation.GetSource(1);
+ IAstNode src3 = operation.GetSource(2);
+
+ string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+ string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
+
+ AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
+
+ string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
+
+ string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
+
+ return $"{HelperFunctionNames.StoreStorage16}({indexExpr}, {offsetExpr}, {src})";
+ }
+
+ public static string StoreStorage8(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src1 = operation.GetSource(0);
+ IAstNode src2 = operation.GetSource(1);
+ IAstNode src3 = operation.GetSource(2);
+
+ string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0));
+ string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1));
+
+ AggregateType srcType = OperandManager.GetNodeDestType(context, src3);
+
+ string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32);
+
+ string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage);
+
+ return $"{HelperFunctionNames.StoreStorage8}({indexExpr}, {offsetExpr}, {src})";
+ }
+
+ public static string TextureSample(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+ bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
+ bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
+ bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+ bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
+ bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
+ bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
+ bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+ bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
+ bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
+
+ bool colorIsVector = isGather || !isShadow;
+
+ SamplerType type = texOp.Type & SamplerType.Mask;
+
+ bool is2D = type == SamplerType.Texture2D;
+ bool isCube = type == SamplerType.TextureCube;
+
+ // 2D Array and Cube shadow samplers with LOD level or bias requires an extension.
+ // If the extension is not supported, just remove the LOD parameter.
+ if (isArray && isShadow && (is2D || isCube) && !context.Config.GpuAccessor.QueryHostSupportsTextureShadowLod())
+ {
+ hasLodBias = false;
+ hasLodLevel = false;
+ }
+
+ // Cube shadow samplers with LOD level requires an extension.
+ // If the extension is not supported, just remove the LOD level parameter.
+ if (isShadow && isCube && !context.Config.GpuAccessor.QueryHostSupportsTextureShadowLod())
+ {
+ hasLodLevel = false;
+ }
+
+ // TODO: Bindless texture support. For now we just return 0.
+ if (isBindless)
+ {
+ string scalarValue = NumberFormatter.FormatFloat(0);
+
+ if (colorIsVector)
+ {
+ AggregateType outputType = texOp.GetVectorType(AggregateType.FP32);
+
+ if ((outputType & AggregateType.ElementCountMask) != 0)
+ {
+ return $"{Declarations.GetVarTypeName(context, outputType, precise: false)}({scalarValue})";
+ }
+ }
+
+ return scalarValue;
+ }
+
+ string texCall = intCoords ? "texelFetch" : "texture";
+
+ if (isGather)
+ {
+ texCall += "Gather";
+ }
+ else if (hasDerivatives)
+ {
+ texCall += "Grad";
+ }
+ else if (hasLodLevel && !intCoords)
+ {
+ texCall += "Lod";
+ }
+
+ if (hasOffset)
+ {
+ texCall += "Offset";
+ }
+ else if (hasOffsets)
+ {
+ texCall += "Offsets";
+ }
+
+ int srcIndex = isBindless ? 1 : 0;
+
+ string Src(AggregateType type)
+ {
+ return GetSoureExpr(context, texOp.GetSource(srcIndex++), type);
+ }
+
+ string indexExpr = null;
+
+ if (isIndexed)
+ {
+ indexExpr = Src(AggregateType.S32);
+ }
+
+ string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr);
+
+ texCall += "(" + samplerName;
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int pCount = coordsCount;
+
+ int arrayIndexElem = -1;
+
+ if (isArray)
+ {
+ arrayIndexElem = pCount++;
+ }
+
+ // The sampler 1D shadow overload expects a
+ // dummy value on the middle of the vector, who knows why...
+ bool hasDummy1DShadowElem = texOp.Type == (SamplerType.Texture1D | SamplerType.Shadow);
+
+ if (hasDummy1DShadowElem)
+ {
+ pCount++;
+ }
+
+ if (isShadow && !isGather)
+ {
+ pCount++;
+ }
+
+ // On textureGather*, the comparison value is
+ // always specified as an extra argument.
+ bool hasExtraCompareArg = isShadow && isGather;
+
+ if (pCount == 5)
+ {
+ pCount = 4;
+
+ hasExtraCompareArg = true;
+ }
+
+ void Append(string str)
+ {
+ texCall += ", " + str;
+ }
+
+ AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32;
+
+ string AssemblePVector(int count)
+ {
+ if (count > 1)
+ {
+ string[] elems = new string[count];
+
+ for (int index = 0; index < count; index++)
+ {
+ if (arrayIndexElem == index)
+ {
+ elems[index] = Src(AggregateType.S32);
+
+ if (!intCoords)
+ {
+ elems[index] = "float(" + elems[index] + ")";
+ }
+ }
+ else if (index == 1 && hasDummy1DShadowElem)
+ {
+ elems[index] = NumberFormatter.FormatFloat(0);
+ }
+ else
+ {
+ elems[index] = Src(coordType);
+ }
+ }
+
+ string prefix = intCoords ? "i" : string.Empty;
+
+ return prefix + "vec" + count + "(" + string.Join(", ", elems) + ")";
+ }
+ else
+ {
+ return Src(coordType);
+ }
+ }
+
+ string ApplyScaling(string vector)
+ {
+ if (intCoords)
+ {
+ if (context.Config.Stage.SupportsRenderScale() &&
+ !isBindless &&
+ !isIndexed)
+ {
+ int index = context.Config.FindTextureDescriptorIndex(texOp);
+
+ if (pCount == 3 && isArray)
+ {
+ // The array index is not scaled, just x and y.
+ vector = "ivec3(Helper_TexelFetchScale((" + vector + ").xy, " + index + "), (" + vector + ").z)";
+ }
+ else if (pCount == 2 && !isArray)
+ {
+ vector = "Helper_TexelFetchScale(" + vector + ", " + index + ")";
+ }
+ }
+ }
+
+ return vector;
+ }
+
+ string ApplyBias(string vector)
+ {
+ int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
+ if (isGather && gatherBiasPrecision != 0)
+ {
+ // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
+ // Offset by the gather precision divided by 2 to correct for rounding.
+
+ if (pCount == 1)
+ {
+ vector = $"{vector} + (1.0 / (float(textureSize({samplerName}, 0)) * float({1 << (gatherBiasPrecision + 1)})))";
+ }
+ else
+ {
+ vector = $"{vector} + (1.0 / (vec{pCount}(textureSize({samplerName}, 0).{"xyz".Substring(0, pCount)}) * float({1 << (gatherBiasPrecision + 1)})))";
+ }
+ }
+
+ return vector;
+ }
+
+ Append(ApplyBias(ApplyScaling(AssemblePVector(pCount))));
+
+ string AssembleDerivativesVector(int count)
+ {
+ if (count > 1)
+ {
+ string[] elems = new string[count];
+
+ for (int index = 0; index < count; index++)
+ {
+ elems[index] = Src(AggregateType.FP32);
+ }
+
+ return "vec" + count + "(" + string.Join(", ", elems) + ")";
+ }
+ else
+ {
+ return Src(AggregateType.FP32);
+ }
+ }
+
+ if (hasExtraCompareArg)
+ {
+ Append(Src(AggregateType.FP32));
+ }
+
+ if (hasDerivatives)
+ {
+ Append(AssembleDerivativesVector(coordsCount)); // dPdx
+ Append(AssembleDerivativesVector(coordsCount)); // dPdy
+ }
+
+ if (isMultisample)
+ {
+ Append(Src(AggregateType.S32));
+ }
+ else if (hasLodLevel)
+ {
+ Append(Src(coordType));
+ }
+
+ string AssembleOffsetVector(int count)
+ {
+ if (count > 1)
+ {
+ string[] elems = new string[count];
+
+ for (int index = 0; index < count; index++)
+ {
+ elems[index] = Src(AggregateType.S32);
+ }
+
+ return "ivec" + count + "(" + string.Join(", ", elems) + ")";
+ }
+ else
+ {
+ return Src(AggregateType.S32);
+ }
+ }
+
+ if (hasOffset)
+ {
+ Append(AssembleOffsetVector(coordsCount));
+ }
+ else if (hasOffsets)
+ {
+ texCall += $", ivec{coordsCount}[4](";
+
+ texCall += AssembleOffsetVector(coordsCount) + ", ";
+ texCall += AssembleOffsetVector(coordsCount) + ", ";
+ texCall += AssembleOffsetVector(coordsCount) + ", ";
+ texCall += AssembleOffsetVector(coordsCount) + ")";
+ }
+
+ if (hasLodBias)
+ {
+ Append(Src(AggregateType.FP32));
+ }
+
+ // textureGather* optional extra component index,
+ // not needed for shadow samplers.
+ if (isGather && !isShadow)
+ {
+ Append(Src(AggregateType.S32));
+ }
+
+ texCall += ")" + (colorIsVector ? GetMaskMultiDest(texOp.Index) : "");
+
+ return texCall;
+ }
+
+ public static string TextureSize(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ // TODO: Bindless texture support. For now we just return 0.
+ if (isBindless)
+ {
+ return NumberFormatter.FormatInt(0);
+ }
+
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ string indexExpr = null;
+
+ if (isIndexed)
+ {
+ indexExpr = GetSoureExpr(context, texOp.GetSource(0), AggregateType.S32);
+ }
+
+ string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr);
+
+ if (texOp.Index == 3)
+ {
+ return $"textureQueryLevels({samplerName})";
+ }
+ else
+ {
+ (TextureDescriptor descriptor, int descriptorIndex) = context.Config.FindTextureDescriptor(texOp);
+ bool hasLod = !descriptor.Type.HasFlag(SamplerType.Multisample) && descriptor.Type != SamplerType.TextureBuffer;
+ string texCall;
+
+ if (hasLod)
+ {
+ int lodSrcIndex = isBindless || isIndexed ? 1 : 0;
+ IAstNode lod = operation.GetSource(lodSrcIndex);
+ string lodExpr = GetSoureExpr(context, lod, GetSrcVarType(operation.Inst, lodSrcIndex));
+
+ texCall = $"textureSize({samplerName}, {lodExpr}){GetMask(texOp.Index)}";
+ }
+ else
+ {
+ texCall = $"textureSize({samplerName}){GetMask(texOp.Index)}";
+ }
+
+ if (context.Config.Stage.SupportsRenderScale() &&
+ (texOp.Index < 2 || (texOp.Type & SamplerType.Mask) == SamplerType.Texture3D) &&
+ !isBindless &&
+ !isIndexed)
+ {
+ texCall = $"Helper_TextureSizeUnscale({texCall}, {descriptorIndex})";
+ }
+
+ return texCall;
+ }
+ }
+
+ private static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore)
+ {
+ StorageKind storageKind = operation.StorageKind;
+
+ string varName;
+ AggregateType varType;
+ int srcIndex = 0;
+
+ switch (storageKind)
+ {
+ case StorageKind.Input:
+ case StorageKind.InputPerPatch:
+ case StorageKind.Output:
+ case StorageKind.OutputPerPatch:
+ if (!(operation.GetSource(srcIndex++) is AstOperand varId) || varId.Type != OperandType.Constant)
+ {
+ throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+ }
+
+ IoVariable ioVariable = (IoVariable)varId.Value;
+ bool isOutput = storageKind.IsOutput();
+ bool isPerPatch = storageKind.IsPerPatch();
+ int location = -1;
+ int component = 0;
+
+ if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput))
+ {
+ if (!(operation.GetSource(srcIndex++) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant)
+ {
+ throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+ }
+
+ location = vecIndex.Value;
+
+ if (operation.SourcesCount > srcIndex &&
+ operation.GetSource(srcIndex) is AstOperand elemIndex &&
+ elemIndex.Type == OperandType.Constant &&
+ context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput))
+ {
+ component = elemIndex.Value;
+ srcIndex++;
+ }
+ }
+
+ (varName, varType) = IoMap.GetGlslVariable(context.Config, ioVariable, location, component, isOutput, isPerPatch);
+
+ if (IoMap.IsPerVertexBuiltIn(context.Config.Stage, ioVariable, isOutput))
+ {
+ // Since those exist both as input and output on geometry and tessellation shaders,
+ // we need the gl_in and gl_out prefixes to disambiguate.
+
+ if (storageKind == StorageKind.Input)
+ {
+ string expr = GetSoureExpr(context, operation.GetSource(srcIndex++), AggregateType.S32);
+ varName = $"gl_in[{expr}].{varName}";
+ }
+ else if (storageKind == StorageKind.Output)
+ {
+ string expr = GetSoureExpr(context, operation.GetSource(srcIndex++), AggregateType.S32);
+ varName = $"gl_out[{expr}].{varName}";
+ }
+ }
+
+ int firstSrcIndex = srcIndex;
+ int inputsCount = isStore ? operation.SourcesCount - 1 : operation.SourcesCount;
+
+ for (; srcIndex < inputsCount; srcIndex++)
+ {
+ IAstNode src = operation.GetSource(srcIndex);
+
+ if ((varType & AggregateType.ElementCountMask) != 0 &&
+ srcIndex == inputsCount - 1 &&
+ src is AstOperand elementIndex &&
+ elementIndex.Type == OperandType.Constant)
+ {
+ varName += "." + "xyzw"[elementIndex.Value & 3];
+ }
+ else if (srcIndex == firstSrcIndex && context.Config.Stage == ShaderStage.TessellationControl && storageKind == StorageKind.Output)
+ {
+ // GLSL requires that for tessellation control shader outputs,
+ // that the index expression must be *exactly* "gl_InvocationID",
+ // otherwise the compilation fails.
+ // TODO: Get rid of this and use expression propagation to make sure we generate the correct code from IR.
+ varName += "[gl_InvocationID]";
+ }
+ else
+ {
+ varName += $"[{GetSoureExpr(context, src, AggregateType.S32)}]";
+ }
+ }
+ break;
+
+ default:
+ throw new InvalidOperationException($"Invalid storage kind {storageKind}.");
+ }
+
+ if (isStore)
+ {
+ varType &= AggregateType.ElementTypeMask;
+ varName = $"{varName} = {GetSoureExpr(context, operation.GetSource(srcIndex), varType)}";
+ }
+
+ return varName;
+ }
+
+ private static string GetStorageBufferAccessor(string slotExpr, string offsetExpr, ShaderStage stage)
+ {
+ string sbName = OperandManager.GetShaderStagePrefix(stage);
+
+ sbName += "_" + DefaultNames.StorageNamePrefix;
+
+ return $"{sbName}[{slotExpr}].{DefaultNames.DataName}[{offsetExpr}]";
+ }
+
+ private static string GetMask(int index)
+ {
+ return $".{"rgba".AsSpan(index, 1)}";
+ }
+
+ private static string GetMaskMultiDest(int mask)
+ {
+ string swizzle = ".";
+
+ for (int i = 0; i < 4; i++)
+ {
+ if ((mask & (1 << i)) != 0)
+ {
+ swizzle += "xyzw"[i];
+ }
+ }
+
+ return swizzle;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs
new file mode 100644
index 00000000..5a888e9c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs
@@ -0,0 +1,56 @@
+using Ryujinx.Graphics.Shader.StructuredIr;
+using System;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class InstGenPacking
+ {
+ public static string PackDouble2x32(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src0 = operation.GetSource(0);
+ IAstNode src1 = operation.GetSource(1);
+
+ string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0));
+ string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1));
+
+ return $"packDouble2x32(uvec2({src0Expr}, {src1Expr}))";
+ }
+
+ public static string PackHalf2x16(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src0 = operation.GetSource(0);
+ IAstNode src1 = operation.GetSource(1);
+
+ string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0));
+ string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1));
+
+ return $"packHalf2x16(vec2({src0Expr}, {src1Expr}))";
+ }
+
+ public static string UnpackDouble2x32(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src = operation.GetSource(0);
+
+ string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0));
+
+ return $"unpackDouble2x32({srcExpr}){GetMask(operation.Index)}";
+ }
+
+ public static string UnpackHalf2x16(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode src = operation.GetSource(0);
+
+ string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0));
+
+ return $"unpackHalf2x16({srcExpr}){GetMask(operation.Index)}";
+ }
+
+ private static string GetMask(int index)
+ {
+ return $".{"xy".AsSpan(index, 1)}";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs
new file mode 100644
index 00000000..f09ea2e8
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs
@@ -0,0 +1,32 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+
+using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class InstGenVector
+ {
+ public static string VectorExtract(CodeGenContext context, AstOperation operation)
+ {
+ IAstNode vector = operation.GetSource(0);
+ IAstNode index = operation.GetSource(1);
+
+ string vectorExpr = GetSoureExpr(context, vector, OperandManager.GetNodeDestType(context, vector));
+
+ if (index is AstOperand indexOperand && indexOperand.Type == OperandType.Constant)
+ {
+ char elem = "xyzw"[indexOperand.Value];
+
+ return $"{vectorExpr}.{elem}";
+ }
+ else
+ {
+ string indexExpr = GetSoureExpr(context, index, GetSrcVarType(operation.Inst, 1));
+
+ return $"{vectorExpr}[{indexExpr}]";
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs
new file mode 100644
index 00000000..7b2a6b46
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs
@@ -0,0 +1,18 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ readonly struct InstInfo
+ {
+ public InstType Type { get; }
+
+ public string OpName { get; }
+
+ public int Precedence { get; }
+
+ public InstInfo(InstType type, string opName, int precedence)
+ {
+ Type = type;
+ OpName = opName;
+ Precedence = precedence;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs
new file mode 100644
index 00000000..84e36cdd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs
@@ -0,0 +1,33 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ [Flags]
+ enum InstType
+ {
+ OpNullary = Op | 0,
+ OpUnary = Op | 1,
+ OpBinary = Op | 2,
+ OpBinaryCom = Op | 2 | Commutative,
+ OpTernary = Op | 3,
+
+ CallNullary = Call | 0,
+ CallUnary = Call | 1,
+ CallBinary = Call | 2,
+ CallTernary = Call | 3,
+ CallQuaternary = Call | 4,
+
+ // The atomic instructions have one extra operand,
+ // for the storage slot and offset pair.
+ AtomicBinary = Call | Atomic | 3,
+ AtomicTernary = Call | Atomic | 4,
+
+ Commutative = 1 << 8,
+ Op = 1 << 9,
+ Call = 1 << 10,
+ Atomic = 1 << 11,
+ Special = 1 << 12,
+
+ ArityMask = 0xff
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs
new file mode 100644
index 00000000..093ee232
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs
@@ -0,0 +1,145 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Globalization;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
+{
+ static class IoMap
+ {
+ public static (string, AggregateType) GetGlslVariable(
+ ShaderConfig config,
+ IoVariable ioVariable,
+ int location,
+ int component,
+ bool isOutput,
+ bool isPerPatch)
+ {
+ return ioVariable switch
+ {
+ IoVariable.BackColorDiffuse => ("gl_BackColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
+ IoVariable.BackColorSpecular => ("gl_BackSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
+ IoVariable.BaseInstance => ("gl_BaseInstanceARB", AggregateType.S32),
+ IoVariable.BaseVertex => ("gl_BaseVertexARB", AggregateType.S32),
+ IoVariable.ClipDistance => ("gl_ClipDistance", AggregateType.Array | AggregateType.FP32),
+ IoVariable.CtaId => ("gl_WorkGroupID", AggregateType.Vector3 | AggregateType.U32),
+ IoVariable.DrawIndex => ("gl_DrawIDARB", AggregateType.S32),
+ IoVariable.FogCoord => ("gl_FogFragCoord", AggregateType.FP32), // Deprecated.
+ IoVariable.FragmentCoord => ("gl_FragCoord", AggregateType.Vector4 | AggregateType.FP32),
+ IoVariable.FragmentOutputColor => GetFragmentOutputColorVariableName(config, location),
+ IoVariable.FragmentOutputDepth => ("gl_FragDepth", AggregateType.FP32),
+ IoVariable.FragmentOutputIsBgra => (DefaultNames.SupportBlockIsBgraName, AggregateType.Array | AggregateType.Bool),
+ IoVariable.FrontColorDiffuse => ("gl_FrontColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
+ IoVariable.FrontColorSpecular => ("gl_FrontSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
+ IoVariable.FrontFacing => ("gl_FrontFacing", AggregateType.Bool),
+ IoVariable.InstanceId => ("gl_InstanceID", AggregateType.S32),
+ IoVariable.InstanceIndex => ("gl_InstanceIndex", AggregateType.S32),
+ IoVariable.InvocationId => ("gl_InvocationID", AggregateType.S32),
+ IoVariable.Layer => ("gl_Layer", AggregateType.S32),
+ IoVariable.PatchVertices => ("gl_PatchVerticesIn", AggregateType.S32),
+ IoVariable.PointCoord => ("gl_PointCoord", AggregateType.Vector2 | AggregateType.FP32),
+ IoVariable.PointSize => ("gl_PointSize", AggregateType.FP32),
+ IoVariable.Position => ("gl_Position", AggregateType.Vector4 | AggregateType.FP32),
+ IoVariable.PrimitiveId => GetPrimitiveIdVariableName(config.Stage, isOutput),
+ IoVariable.SubgroupEqMask => GetSubgroupMaskVariableName(config, "Eq"),
+ IoVariable.SubgroupGeMask => GetSubgroupMaskVariableName(config, "Ge"),
+ IoVariable.SubgroupGtMask => GetSubgroupMaskVariableName(config, "Gt"),
+ IoVariable.SubgroupLaneId => GetSubgroupInvocationIdVariableName(config),
+ IoVariable.SubgroupLeMask => GetSubgroupMaskVariableName(config, "Le"),
+ IoVariable.SubgroupLtMask => GetSubgroupMaskVariableName(config, "Lt"),
+ IoVariable.SupportBlockRenderScale => (DefaultNames.SupportBlockRenderScaleName, AggregateType.Array | AggregateType.FP32),
+ IoVariable.SupportBlockViewInverse => (DefaultNames.SupportBlockViewportInverse, AggregateType.Vector2 | AggregateType.FP32),
+ IoVariable.TessellationCoord => ("gl_TessCoord", AggregateType.Vector3 | AggregateType.FP32),
+ IoVariable.TessellationLevelInner => ("gl_TessLevelInner", AggregateType.Array | AggregateType.FP32),
+ IoVariable.TessellationLevelOuter => ("gl_TessLevelOuter", AggregateType.Array | AggregateType.FP32),
+ IoVariable.TextureCoord => ("gl_TexCoord", AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32), // Deprecated.
+ IoVariable.ThreadId => ("gl_LocalInvocationID", AggregateType.Vector3 | AggregateType.U32),
+ IoVariable.ThreadKill => ("gl_HelperInvocation", AggregateType.Bool),
+ IoVariable.UserDefined => GetUserDefinedVariableName(config, location, component, isOutput, isPerPatch),
+ IoVariable.VertexId => ("gl_VertexID", AggregateType.S32),
+ IoVariable.VertexIndex => ("gl_VertexIndex", AggregateType.S32),
+ IoVariable.ViewportIndex => ("gl_ViewportIndex", AggregateType.S32),
+ IoVariable.ViewportMask => ("gl_ViewportMask", AggregateType.Array | AggregateType.S32),
+ _ => (null, AggregateType.Invalid)
+ };
+ }
+
+ public static bool IsPerVertexBuiltIn(ShaderStage stage, IoVariable ioVariable, bool isOutput)
+ {
+ switch (ioVariable)
+ {
+ case IoVariable.Layer:
+ case IoVariable.ViewportIndex:
+ case IoVariable.PointSize:
+ case IoVariable.Position:
+ case IoVariable.ClipDistance:
+ case IoVariable.PointCoord:
+ case IoVariable.ViewportMask:
+ if (isOutput)
+ {
+ return stage == ShaderStage.TessellationControl;
+ }
+ else
+ {
+ return stage == ShaderStage.TessellationControl ||
+ stage == ShaderStage.TessellationEvaluation ||
+ stage == ShaderStage.Geometry;
+ }
+ }
+
+ return false;
+ }
+
+ private static (string, AggregateType) GetFragmentOutputColorVariableName(ShaderConfig config, int location)
+ {
+ if (location < 0)
+ {
+ return (DefaultNames.OAttributePrefix, config.GetFragmentOutputColorType(0));
+ }
+
+ string name = DefaultNames.OAttributePrefix + location.ToString(CultureInfo.InvariantCulture);
+
+ return (name, config.GetFragmentOutputColorType(location));
+ }
+
+ private static (string, AggregateType) GetPrimitiveIdVariableName(ShaderStage stage, bool isOutput)
+ {
+ // The geometry stage has an additional gl_PrimitiveIDIn variable.
+ return (isOutput || stage != ShaderStage.Geometry ? "gl_PrimitiveID" : "gl_PrimitiveIDIn", AggregateType.S32);
+ }
+
+ private static (string, AggregateType) GetSubgroupMaskVariableName(ShaderConfig config, string cc)
+ {
+ return config.GpuAccessor.QueryHostSupportsShaderBallot()
+ ? ($"unpackUint2x32(gl_SubGroup{cc}MaskARB)", AggregateType.Vector2 | AggregateType.U32)
+ : ($"gl_Subgroup{cc}Mask", AggregateType.Vector4 | AggregateType.U32);
+ }
+
+ private static (string, AggregateType) GetSubgroupInvocationIdVariableName(ShaderConfig config)
+ {
+ return config.GpuAccessor.QueryHostSupportsShaderBallot()
+ ? ("gl_SubGroupInvocationARB", AggregateType.U32)
+ : ("gl_SubgroupInvocationID", AggregateType.U32);
+ }
+
+ private static (string, AggregateType) GetUserDefinedVariableName(ShaderConfig config, int location, int component, bool isOutput, bool isPerPatch)
+ {
+ string name = isPerPatch
+ ? DefaultNames.PerPatchAttributePrefix
+ : (isOutput ? DefaultNames.OAttributePrefix : DefaultNames.IAttributePrefix);
+
+ if (location < 0)
+ {
+ return (name, config.GetUserDefinedType(0, isOutput));
+ }
+
+ name += location.ToString(CultureInfo.InvariantCulture);
+
+ if (config.HasPerLocationInputOrOutputComponent(IoVariable.UserDefined, location, component, isOutput))
+ {
+ name += "_" + "xyzw"[component & 3];
+ }
+
+ return (name, config.GetUserDefinedType(location, isOutput));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs
new file mode 100644
index 00000000..eb27e9bf
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs
@@ -0,0 +1,104 @@
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Globalization;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ static class NumberFormatter
+ {
+ private const int MaxDecimal = 256;
+
+ public static bool TryFormat(int value, AggregateType dstType, out string formatted)
+ {
+ if (dstType == AggregateType.FP32)
+ {
+ return TryFormatFloat(BitConverter.Int32BitsToSingle(value), out formatted);
+ }
+ else if (dstType == AggregateType.S32)
+ {
+ formatted = FormatInt(value);
+ }
+ else if (dstType == AggregateType.U32)
+ {
+ formatted = FormatUint((uint)value);
+ }
+ else if (dstType == AggregateType.Bool)
+ {
+ formatted = value != 0 ? "true" : "false";
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid variable type \"{dstType}\".");
+ }
+
+ return true;
+ }
+
+ public static string FormatFloat(float value)
+ {
+ if (!TryFormatFloat(value, out string formatted))
+ {
+ throw new ArgumentException("Failed to convert float value to string.");
+ }
+
+ return formatted;
+ }
+
+ public static bool TryFormatFloat(float value, out string formatted)
+ {
+ if (float.IsNaN(value) || float.IsInfinity(value))
+ {
+ formatted = null;
+
+ return false;
+ }
+
+ formatted = value.ToString("G9", CultureInfo.InvariantCulture);
+
+ if (!(formatted.Contains('.') ||
+ formatted.Contains('e') ||
+ formatted.Contains('E')))
+ {
+ formatted += ".0";
+ }
+
+ return true;
+ }
+
+ public static string FormatInt(int value, AggregateType dstType)
+ {
+ if (dstType == AggregateType.S32)
+ {
+ return FormatInt(value);
+ }
+ else if (dstType == AggregateType.U32)
+ {
+ return FormatUint((uint)value);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid variable type \"{dstType}\".");
+ }
+ }
+
+ public static string FormatInt(int value)
+ {
+ if (value <= MaxDecimal && value >= -MaxDecimal)
+ {
+ return value.ToString(CultureInfo.InvariantCulture);
+ }
+
+ return "0x" + value.ToString("X", CultureInfo.InvariantCulture);
+ }
+
+ public static string FormatUint(uint value)
+ {
+ if (value <= MaxDecimal && value >= 0)
+ {
+ return value.ToString(CultureInfo.InvariantCulture) + "u";
+ }
+
+ return "0x" + value.ToString("X", CultureInfo.InvariantCulture) + "u";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs
new file mode 100644
index 00000000..92e83358
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs
@@ -0,0 +1,254 @@
+using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+
+using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ class OperandManager
+ {
+ private static readonly string[] _stagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" };
+
+ private Dictionary<AstOperand, string> _locals;
+
+ public OperandManager()
+ {
+ _locals = new Dictionary<AstOperand, string>();
+ }
+
+ public string DeclareLocal(AstOperand operand)
+ {
+ string name = $"{DefaultNames.LocalNamePrefix}_{_locals.Count}";
+
+ _locals.Add(operand, name);
+
+ return name;
+ }
+
+ public string GetExpression(CodeGenContext context, AstOperand operand)
+ {
+ return operand.Type switch
+ {
+ OperandType.Argument => GetArgumentName(operand.Value),
+ OperandType.Constant => NumberFormatter.FormatInt(operand.Value),
+ OperandType.ConstantBuffer => GetConstantBufferName(operand, context.Config),
+ OperandType.LocalVariable => _locals[operand],
+ OperandType.Undefined => DefaultNames.UndefinedName,
+ _ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".")
+ };
+ }
+
+ private static string GetConstantBufferName(AstOperand operand, ShaderConfig config)
+ {
+ return GetConstantBufferName(operand.CbufSlot, operand.CbufOffset, config.Stage, config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing));
+ }
+
+ public static string GetConstantBufferName(int slot, int offset, ShaderStage stage, bool cbIndexable)
+ {
+ return $"{GetUbName(stage, slot, cbIndexable)}[{offset >> 2}].{GetSwizzleMask(offset & 3)}";
+ }
+
+ private static string GetVec4Indexed(string vectorName, string indexExpr, bool indexElement)
+ {
+ if (indexElement)
+ {
+ return $"{vectorName}[{indexExpr}]";
+ }
+
+ string result = $"{vectorName}.x";
+ for (int i = 1; i < 4; i++)
+ {
+ result = $"(({indexExpr}) == {i}) ? ({vectorName}.{GetSwizzleMask(i)}) : ({result})";
+ }
+ return $"({result})";
+ }
+
+ public static string GetConstantBufferName(int slot, string offsetExpr, ShaderStage stage, bool cbIndexable, bool indexElement)
+ {
+ return GetVec4Indexed(GetUbName(stage, slot, cbIndexable) + $"[{offsetExpr} >> 2]", offsetExpr + " & 3", indexElement);
+ }
+
+ public static string GetConstantBufferName(string slotExpr, string offsetExpr, ShaderStage stage, bool indexElement)
+ {
+ return GetVec4Indexed(GetUbName(stage, slotExpr) + $"[{offsetExpr} >> 2]", offsetExpr + " & 3", indexElement);
+ }
+
+ public static string GetUbName(ShaderStage stage, int slot, bool cbIndexable)
+ {
+ if (cbIndexable)
+ {
+ return GetUbName(stage, NumberFormatter.FormatInt(slot, AggregateType.S32));
+ }
+
+ return $"{GetShaderStagePrefix(stage)}_{DefaultNames.UniformNamePrefix}{slot}_{DefaultNames.UniformNameSuffix}";
+ }
+
+ private static string GetUbName(ShaderStage stage, string slotExpr)
+ {
+ return $"{GetShaderStagePrefix(stage)}_{DefaultNames.UniformNamePrefix}[{slotExpr}].{DefaultNames.DataName}";
+ }
+
+ public static string GetSamplerName(ShaderStage stage, AstTextureOperation texOp, string indexExpr)
+ {
+ return GetSamplerName(stage, texOp.CbufSlot, texOp.Handle, texOp.Type.HasFlag(SamplerType.Indexed), indexExpr);
+ }
+
+ public static string GetSamplerName(ShaderStage stage, int cbufSlot, int handle, bool indexed, string indexExpr)
+ {
+ string suffix = cbufSlot < 0 ? $"_tcb_{handle:X}" : $"_cb{cbufSlot}_{handle:X}";
+
+ if (indexed)
+ {
+ suffix += $"a[{indexExpr}]";
+ }
+
+ return GetShaderStagePrefix(stage) + "_" + DefaultNames.SamplerNamePrefix + suffix;
+ }
+
+ public static string GetImageName(ShaderStage stage, AstTextureOperation texOp, string indexExpr)
+ {
+ return GetImageName(stage, texOp.CbufSlot, texOp.Handle, texOp.Format, texOp.Type.HasFlag(SamplerType.Indexed), indexExpr);
+ }
+
+ public static string GetImageName(
+ ShaderStage stage,
+ int cbufSlot,
+ int handle,
+ TextureFormat format,
+ bool indexed,
+ string indexExpr)
+ {
+ string suffix = cbufSlot < 0
+ ? $"_tcb_{handle:X}_{format.ToGlslFormat()}"
+ : $"_cb{cbufSlot}_{handle:X}_{format.ToGlslFormat()}";
+
+ if (indexed)
+ {
+ suffix += $"a[{indexExpr}]";
+ }
+
+ return GetShaderStagePrefix(stage) + "_" + DefaultNames.ImageNamePrefix + suffix;
+ }
+
+ public static string GetShaderStagePrefix(ShaderStage stage)
+ {
+ int index = (int)stage;
+
+ if ((uint)index >= _stagePrefixes.Length)
+ {
+ return "invalid";
+ }
+
+ return _stagePrefixes[index];
+ }
+
+ private static char GetSwizzleMask(int value)
+ {
+ return "xyzw"[value];
+ }
+
+ public static string GetArgumentName(int argIndex)
+ {
+ return $"{DefaultNames.ArgumentNamePrefix}{argIndex}";
+ }
+
+ public static AggregateType GetNodeDestType(CodeGenContext context, IAstNode node)
+ {
+ // TODO: Get rid of that function entirely and return the type from the operation generation
+ // functions directly, like SPIR-V does.
+
+ if (node is AstOperation operation)
+ {
+ if (operation.Inst == Instruction.Load)
+ {
+ switch (operation.StorageKind)
+ {
+ case StorageKind.Input:
+ case StorageKind.InputPerPatch:
+ case StorageKind.Output:
+ case StorageKind.OutputPerPatch:
+ if (!(operation.GetSource(0) is AstOperand varId) || varId.Type != OperandType.Constant)
+ {
+ throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand.");
+ }
+
+ IoVariable ioVariable = (IoVariable)varId.Value;
+ bool isOutput = operation.StorageKind == StorageKind.Output || operation.StorageKind == StorageKind.OutputPerPatch;
+ bool isPerPatch = operation.StorageKind == StorageKind.InputPerPatch || operation.StorageKind == StorageKind.OutputPerPatch;
+ int location = 0;
+ int component = 0;
+
+ if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput))
+ {
+ if (!(operation.GetSource(1) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant)
+ {
+ throw new InvalidOperationException($"Second input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand.");
+ }
+
+ location = vecIndex.Value;
+
+ if (operation.SourcesCount > 2 &&
+ operation.GetSource(2) is AstOperand elemIndex &&
+ elemIndex.Type == OperandType.Constant &&
+ context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput))
+ {
+ component = elemIndex.Value;
+ }
+ }
+
+ (_, AggregateType varType) = IoMap.GetGlslVariable(context.Config, ioVariable, location, component, isOutput, isPerPatch);
+
+ return varType & AggregateType.ElementTypeMask;
+ }
+ }
+ else if (operation.Inst == Instruction.Call)
+ {
+ AstOperand funcId = (AstOperand)operation.GetSource(0);
+
+ Debug.Assert(funcId.Type == OperandType.Constant);
+
+ return context.GetFunction(funcId.Value).ReturnType;
+ }
+ else if (operation.Inst == Instruction.VectorExtract)
+ {
+ return GetNodeDestType(context, operation.GetSource(0)) & ~AggregateType.ElementCountMask;
+ }
+ else if (operation is AstTextureOperation texOp)
+ {
+ if (texOp.Inst == Instruction.ImageLoad ||
+ texOp.Inst == Instruction.ImageStore ||
+ texOp.Inst == Instruction.ImageAtomic)
+ {
+ return texOp.GetVectorType(texOp.Format.GetComponentType());
+ }
+ else if (texOp.Inst == Instruction.TextureSample)
+ {
+ return texOp.GetVectorType(GetDestVarType(operation.Inst));
+ }
+ }
+
+ return GetDestVarType(operation.Inst);
+ }
+ else if (node is AstOperand operand)
+ {
+ if (operand.Type == OperandType.Argument)
+ {
+ int argIndex = operand.Value;
+
+ return context.CurrentFunction.GetArgumentType(argIndex);
+ }
+
+ return OperandInfo.GetVarType(operand);
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\".");
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs
new file mode 100644
index 00000000..22c8623c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs
@@ -0,0 +1,87 @@
+using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ static class TypeConversion
+ {
+ public static string ReinterpretCast(
+ CodeGenContext context,
+ IAstNode node,
+ AggregateType srcType,
+ AggregateType dstType)
+ {
+ if (node is AstOperand operand && operand.Type == OperandType.Constant)
+ {
+ if (NumberFormatter.TryFormat(operand.Value, dstType, out string formatted))
+ {
+ return formatted;
+ }
+ }
+
+ string expr = InstGen.GetExpression(context, node);
+
+ return ReinterpretCast(expr, node, srcType, dstType);
+ }
+
+ private static string ReinterpretCast(string expr, IAstNode node, AggregateType srcType, AggregateType dstType)
+ {
+ if (srcType == dstType)
+ {
+ return expr;
+ }
+
+ if (srcType == AggregateType.FP32)
+ {
+ switch (dstType)
+ {
+ case AggregateType.Bool: return $"(floatBitsToInt({expr}) != 0)";
+ case AggregateType.S32: return $"floatBitsToInt({expr})";
+ case AggregateType.U32: return $"floatBitsToUint({expr})";
+ }
+ }
+ else if (dstType == AggregateType.FP32)
+ {
+ switch (srcType)
+ {
+ case AggregateType.Bool: return $"intBitsToFloat({ReinterpretBoolToInt(expr, node, AggregateType.S32)})";
+ case AggregateType.S32: return $"intBitsToFloat({expr})";
+ case AggregateType.U32: return $"uintBitsToFloat({expr})";
+ }
+ }
+ else if (srcType == AggregateType.Bool)
+ {
+ return ReinterpretBoolToInt(expr, node, dstType);
+ }
+ else if (dstType == AggregateType.Bool)
+ {
+ expr = InstGenHelper.Enclose(expr, node, Instruction.CompareNotEqual, isLhs: true);
+
+ return $"({expr} != 0)";
+ }
+ else if (dstType == AggregateType.S32)
+ {
+ return $"int({expr})";
+ }
+ else if (dstType == AggregateType.U32)
+ {
+ return $"uint({expr})";
+ }
+
+ throw new ArgumentException($"Invalid reinterpret cast from \"{srcType}\" to \"{dstType}\".");
+ }
+
+ private static string ReinterpretBoolToInt(string expr, IAstNode node, AggregateType dstType)
+ {
+ string trueExpr = NumberFormatter.FormatInt(IrConsts.True, dstType);
+ string falseExpr = NumberFormatter.FormatInt(IrConsts.False, dstType);
+
+ expr = InstGenHelper.Enclose(expr, node, Instruction.ConditionalSelect, isLhs: false);
+
+ return $"({expr} ? {trueExpr} : {falseExpr})";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs
new file mode 100644
index 00000000..ed292ef1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs
@@ -0,0 +1,409 @@
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using Spv.Generator;
+using System;
+using System.Collections.Generic;
+using static Spv.Specification;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ using IrConsts = IntermediateRepresentation.IrConsts;
+ using IrOperandType = IntermediateRepresentation.OperandType;
+
+ partial class CodeGenContext : Module
+ {
+ private const uint SpirvVersionMajor = 1;
+ private const uint SpirvVersionMinor = 3;
+ private const uint SpirvVersionRevision = 0;
+ private const uint SpirvVersionPacked = (SpirvVersionMajor << 16) | (SpirvVersionMinor << 8) | SpirvVersionRevision;
+
+ public StructuredProgramInfo Info { get; }
+
+ public ShaderConfig Config { get; }
+
+ public int InputVertices { get; }
+
+ public Dictionary<int, Instruction> UniformBuffers { get; } = new Dictionary<int, Instruction>();
+ public Instruction SupportBuffer { get; set; }
+ public Instruction UniformBuffersArray { get; set; }
+ public Instruction StorageBuffersArray { get; set; }
+ public Instruction LocalMemory { get; set; }
+ public Instruction SharedMemory { get; set; }
+ public Dictionary<TextureMeta, SamplerType> SamplersTypes { get; } = new Dictionary<TextureMeta, SamplerType>();
+ public Dictionary<TextureMeta, (Instruction, Instruction, Instruction)> Samplers { get; } = new Dictionary<TextureMeta, (Instruction, Instruction, Instruction)>();
+ public Dictionary<TextureMeta, (Instruction, Instruction)> Images { get; } = new Dictionary<TextureMeta, (Instruction, Instruction)>();
+ public Dictionary<IoDefinition, Instruction> Inputs { get; } = new Dictionary<IoDefinition, Instruction>();
+ public Dictionary<IoDefinition, Instruction> Outputs { get; } = new Dictionary<IoDefinition, Instruction>();
+ public Dictionary<IoDefinition, Instruction> InputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>();
+ public Dictionary<IoDefinition, Instruction> OutputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>();
+
+ public Instruction CoordTemp { get; set; }
+ private readonly Dictionary<AstOperand, Instruction> _locals = new Dictionary<AstOperand, Instruction>();
+ private readonly Dictionary<int, Instruction[]> _localForArgs = new Dictionary<int, Instruction[]>();
+ private readonly Dictionary<int, Instruction> _funcArgs = new Dictionary<int, Instruction>();
+ private readonly Dictionary<int, (StructuredFunction, Instruction)> _functions = new Dictionary<int, (StructuredFunction, Instruction)>();
+
+ private class BlockState
+ {
+ private int _entryCount;
+ private readonly List<Instruction> _labels = new List<Instruction>();
+
+ public Instruction GetNextLabel(CodeGenContext context)
+ {
+ return GetLabel(context, _entryCount);
+ }
+
+ public Instruction GetNextLabelAutoIncrement(CodeGenContext context)
+ {
+ return GetLabel(context, _entryCount++);
+ }
+
+ public Instruction GetLabel(CodeGenContext context, int index)
+ {
+ while (index >= _labels.Count)
+ {
+ _labels.Add(context.Label());
+ }
+
+ return _labels[index];
+ }
+ }
+
+ private readonly Dictionary<AstBlock, BlockState> _labels = new Dictionary<AstBlock, BlockState>();
+
+ public Dictionary<AstBlock, (Instruction, Instruction)> LoopTargets { get; set; }
+
+ public AstBlock CurrentBlock { get; private set; }
+
+ public SpirvDelegates Delegates { get; }
+
+ public CodeGenContext(
+ StructuredProgramInfo info,
+ ShaderConfig config,
+ GeneratorPool<Instruction> instPool,
+ GeneratorPool<LiteralInteger> integerPool) : base(SpirvVersionPacked, instPool, integerPool)
+ {
+ Info = info;
+ Config = config;
+
+ if (config.Stage == ShaderStage.Geometry)
+ {
+ InputTopology inPrimitive = config.GpuAccessor.QueryPrimitiveTopology();
+
+ InputVertices = inPrimitive switch
+ {
+ InputTopology.Points => 1,
+ InputTopology.Lines => 2,
+ InputTopology.LinesAdjacency => 2,
+ InputTopology.Triangles => 3,
+ InputTopology.TrianglesAdjacency => 3,
+ _ => throw new InvalidOperationException($"Invalid input topology \"{inPrimitive}\".")
+ };
+ }
+
+ AddCapability(Capability.Shader);
+ AddCapability(Capability.Float64);
+
+ SetMemoryModel(AddressingModel.Logical, MemoryModel.GLSL450);
+
+ Delegates = new SpirvDelegates(this);
+ }
+
+ public void StartFunction()
+ {
+ _locals.Clear();
+ _localForArgs.Clear();
+ _funcArgs.Clear();
+ }
+
+ public void EnterBlock(AstBlock block)
+ {
+ CurrentBlock = block;
+ AddLabel(GetBlockStateLazy(block).GetNextLabelAutoIncrement(this));
+ }
+
+ public Instruction GetFirstLabel(AstBlock block)
+ {
+ return GetBlockStateLazy(block).GetLabel(this, 0);
+ }
+
+ public Instruction GetNextLabel(AstBlock block)
+ {
+ return GetBlockStateLazy(block).GetNextLabel(this);
+ }
+
+ private BlockState GetBlockStateLazy(AstBlock block)
+ {
+ if (!_labels.TryGetValue(block, out var blockState))
+ {
+ blockState = new BlockState();
+
+ _labels.Add(block, blockState);
+ }
+
+ return blockState;
+ }
+
+ public Instruction NewBlock()
+ {
+ var label = Label();
+ Branch(label);
+ AddLabel(label);
+ return label;
+ }
+
+ public Instruction[] GetMainInterface()
+ {
+ var mainInterface = new List<Instruction>();
+
+ mainInterface.AddRange(Inputs.Values);
+ mainInterface.AddRange(Outputs.Values);
+ mainInterface.AddRange(InputsPerPatch.Values);
+ mainInterface.AddRange(OutputsPerPatch.Values);
+
+ return mainInterface.ToArray();
+ }
+
+ public void DeclareLocal(AstOperand local, Instruction spvLocal)
+ {
+ _locals.Add(local, spvLocal);
+ }
+
+ public void DeclareLocalForArgs(int funcIndex, Instruction[] spvLocals)
+ {
+ _localForArgs.Add(funcIndex, spvLocals);
+ }
+
+ public void DeclareArgument(int argIndex, Instruction spvLocal)
+ {
+ _funcArgs.Add(argIndex, spvLocal);
+ }
+
+ public void DeclareFunction(int funcIndex, StructuredFunction function, Instruction spvFunc)
+ {
+ _functions.Add(funcIndex, (function, spvFunc));
+ }
+
+ public Instruction GetFP32(IAstNode node)
+ {
+ return Get(AggregateType.FP32, node);
+ }
+
+ public Instruction GetFP64(IAstNode node)
+ {
+ return Get(AggregateType.FP64, node);
+ }
+
+ public Instruction GetS32(IAstNode node)
+ {
+ return Get(AggregateType.S32, node);
+ }
+
+ public Instruction GetU32(IAstNode node)
+ {
+ return Get(AggregateType.U32, node);
+ }
+
+ public Instruction Get(AggregateType type, IAstNode node)
+ {
+ if (node is AstOperation operation)
+ {
+ var opResult = Instructions.Generate(this, operation);
+ return BitcastIfNeeded(type, opResult.Type, opResult.Value);
+ }
+ else if (node is AstOperand operand)
+ {
+ return operand.Type switch
+ {
+ IrOperandType.Argument => GetArgument(type, operand),
+ IrOperandType.Constant => GetConstant(type, operand),
+ IrOperandType.ConstantBuffer => GetConstantBuffer(type, operand),
+ IrOperandType.LocalVariable => GetLocal(type, operand),
+ IrOperandType.Undefined => GetUndefined(type),
+ _ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".")
+ };
+ }
+
+ throw new NotImplementedException(node.GetType().Name);
+ }
+
+ public Instruction GetWithType(IAstNode node, out AggregateType type)
+ {
+ if (node is AstOperation operation)
+ {
+ var opResult = Instructions.Generate(this, operation);
+ type = opResult.Type;
+ return opResult.Value;
+ }
+ else if (node is AstOperand operand)
+ {
+ switch (operand.Type)
+ {
+ case IrOperandType.LocalVariable:
+ type = operand.VarType;
+ return GetLocal(type, operand);
+ default:
+ throw new ArgumentException($"Invalid operand type \"{operand.Type}\".");
+ }
+ }
+
+ throw new NotImplementedException(node.GetType().Name);
+ }
+
+ private Instruction GetUndefined(AggregateType type)
+ {
+ return type switch
+ {
+ AggregateType.Bool => ConstantFalse(TypeBool()),
+ AggregateType.FP32 => Constant(TypeFP32(), 0f),
+ AggregateType.FP64 => Constant(TypeFP64(), 0d),
+ _ => Constant(GetType(type), 0)
+ };
+ }
+
+ public Instruction GetConstant(AggregateType type, AstOperand operand)
+ {
+ return type switch
+ {
+ AggregateType.Bool => operand.Value != 0 ? ConstantTrue(TypeBool()) : ConstantFalse(TypeBool()),
+ AggregateType.FP32 => Constant(TypeFP32(), BitConverter.Int32BitsToSingle(operand.Value)),
+ AggregateType.FP64 => Constant(TypeFP64(), (double)BitConverter.Int32BitsToSingle(operand.Value)),
+ AggregateType.S32 => Constant(TypeS32(), operand.Value),
+ AggregateType.U32 => Constant(TypeU32(), (uint)operand.Value),
+ _ => throw new ArgumentException($"Invalid type \"{type}\".")
+ };
+ }
+
+ public Instruction GetConstantBuffer(AggregateType type, AstOperand operand)
+ {
+ var i1 = Constant(TypeS32(), 0);
+ var i2 = Constant(TypeS32(), operand.CbufOffset >> 2);
+ var i3 = Constant(TypeU32(), operand.CbufOffset & 3);
+
+ Instruction elemPointer;
+
+ if (UniformBuffersArray != null)
+ {
+ var ubVariable = UniformBuffersArray;
+ var i0 = Constant(TypeS32(), operand.CbufSlot);
+
+ elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i0, i1, i2, i3);
+ }
+ else
+ {
+ var ubVariable = UniformBuffers[operand.CbufSlot];
+
+ elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i1, i2, i3);
+ }
+
+ return BitcastIfNeeded(type, AggregateType.FP32, Load(TypeFP32(), elemPointer));
+ }
+
+ public Instruction GetLocalPointer(AstOperand local)
+ {
+ return _locals[local];
+ }
+
+ public Instruction[] GetLocalForArgsPointers(int funcIndex)
+ {
+ return _localForArgs[funcIndex];
+ }
+
+ public Instruction GetArgumentPointer(AstOperand funcArg)
+ {
+ return _funcArgs[funcArg.Value];
+ }
+
+ public Instruction GetLocal(AggregateType dstType, AstOperand local)
+ {
+ var srcType = local.VarType;
+ return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetLocalPointer(local)));
+ }
+
+ public Instruction GetArgument(AggregateType dstType, AstOperand funcArg)
+ {
+ var srcType = funcArg.VarType;
+ return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetArgumentPointer(funcArg)));
+ }
+
+ public (StructuredFunction, Instruction) GetFunction(int funcIndex)
+ {
+ return _functions[funcIndex];
+ }
+
+ public Instruction GetType(AggregateType type, int length = 1)
+ {
+ if ((type & AggregateType.Array) != 0)
+ {
+ return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length));
+ }
+ else if ((type & AggregateType.ElementCountMask) != 0)
+ {
+ int vectorLength = (type & AggregateType.ElementCountMask) switch
+ {
+ AggregateType.Vector2 => 2,
+ AggregateType.Vector3 => 3,
+ AggregateType.Vector4 => 4,
+ _ => 1
+ };
+
+ return TypeVector(GetType(type & ~AggregateType.ElementCountMask), vectorLength);
+ }
+
+ return type switch
+ {
+ AggregateType.Void => TypeVoid(),
+ AggregateType.Bool => TypeBool(),
+ AggregateType.FP32 => TypeFP32(),
+ AggregateType.FP64 => TypeFP64(),
+ AggregateType.S32 => TypeS32(),
+ AggregateType.U32 => TypeU32(),
+ _ => throw new ArgumentException($"Invalid attribute type \"{type}\".")
+ };
+ }
+
+ public Instruction BitcastIfNeeded(AggregateType dstType, AggregateType srcType, Instruction value)
+ {
+ if (dstType == srcType)
+ {
+ return value;
+ }
+
+ if (dstType == AggregateType.Bool)
+ {
+ return INotEqual(TypeBool(), BitcastIfNeeded(AggregateType.S32, srcType, value), Constant(TypeS32(), 0));
+ }
+ else if (srcType == AggregateType.Bool)
+ {
+ var intTrue = Constant(TypeS32(), IrConsts.True);
+ var intFalse = Constant(TypeS32(), IrConsts.False);
+
+ return BitcastIfNeeded(dstType, AggregateType.S32, Select(TypeS32(), value, intTrue, intFalse));
+ }
+ else
+ {
+ return Bitcast(GetType(dstType, 1), value);
+ }
+ }
+
+ public Instruction TypeS32()
+ {
+ return TypeInt(32, true);
+ }
+
+ public Instruction TypeU32()
+ {
+ return TypeInt(32, false);
+ }
+
+ public Instruction TypeFP32()
+ {
+ return TypeFloat(32);
+ }
+
+ public Instruction TypeFP64()
+ {
+ return TypeFloat(64);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs
new file mode 100644
index 00000000..821da477
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs
@@ -0,0 +1,615 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using Spv.Generator;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+using static Spv.Specification;
+using SpvInstruction = Spv.Generator.Instruction;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ static class Declarations
+ {
+ private static readonly string[] StagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" };
+
+ public static void DeclareParameters(CodeGenContext context, StructuredFunction function)
+ {
+ DeclareParameters(context, function.InArguments, 0);
+ DeclareParameters(context, function.OutArguments, function.InArguments.Length);
+ }
+
+ private static void DeclareParameters(CodeGenContext context, IEnumerable<AggregateType> argTypes, int argIndex)
+ {
+ foreach (var argType in argTypes)
+ {
+ var argPointerType = context.TypePointer(StorageClass.Function, context.GetType(argType));
+ var spvArg = context.FunctionParameter(argPointerType);
+
+ context.DeclareArgument(argIndex++, spvArg);
+ }
+ }
+
+ public static void DeclareLocals(CodeGenContext context, StructuredFunction function)
+ {
+ foreach (AstOperand local in function.Locals)
+ {
+ var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(local.VarType));
+ var spvLocal = context.Variable(localPointerType, StorageClass.Function);
+
+ context.AddLocalVariable(spvLocal);
+ context.DeclareLocal(local, spvLocal);
+ }
+
+ var ivector2Type = context.TypeVector(context.TypeS32(), 2);
+ var coordTempPointerType = context.TypePointer(StorageClass.Function, ivector2Type);
+ var coordTemp = context.Variable(coordTempPointerType, StorageClass.Function);
+
+ context.AddLocalVariable(coordTemp);
+ context.CoordTemp = coordTemp;
+ }
+
+ public static void DeclareLocalForArgs(CodeGenContext context, List<StructuredFunction> functions)
+ {
+ for (int funcIndex = 0; funcIndex < functions.Count; funcIndex++)
+ {
+ StructuredFunction function = functions[funcIndex];
+ SpvInstruction[] locals = new SpvInstruction[function.InArguments.Length];
+
+ for (int i = 0; i < function.InArguments.Length; i++)
+ {
+ var type = function.GetArgumentType(i);
+ var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(type));
+ var spvLocal = context.Variable(localPointerType, StorageClass.Function);
+
+ context.AddLocalVariable(spvLocal);
+
+ locals[i] = spvLocal;
+ }
+
+ context.DeclareLocalForArgs(funcIndex, locals);
+ }
+ }
+
+ public static void DeclareAll(CodeGenContext context, StructuredProgramInfo info)
+ {
+ if (context.Config.Stage == ShaderStage.Compute)
+ {
+ int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4);
+
+ if (localMemorySize != 0)
+ {
+ DeclareLocalMemory(context, localMemorySize);
+ }
+
+ int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4);
+
+ if (sharedMemorySize != 0)
+ {
+ DeclareSharedMemory(context, sharedMemorySize);
+ }
+ }
+ else if (context.Config.LocalMemorySize != 0)
+ {
+ int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
+ DeclareLocalMemory(context, localMemorySize);
+ }
+
+ DeclareSupportBuffer(context);
+ DeclareUniformBuffers(context, context.Config.GetConstantBufferDescriptors());
+ DeclareStorageBuffers(context, context.Config.GetStorageBufferDescriptors());
+ DeclareSamplers(context, context.Config.GetTextureDescriptors());
+ DeclareImages(context, context.Config.GetImageDescriptors());
+ DeclareInputsAndOutputs(context, info);
+ }
+
+ private static void DeclareLocalMemory(CodeGenContext context, int size)
+ {
+ context.LocalMemory = DeclareMemory(context, StorageClass.Private, size);
+ }
+
+ private static void DeclareSharedMemory(CodeGenContext context, int size)
+ {
+ context.SharedMemory = DeclareMemory(context, StorageClass.Workgroup, size);
+ }
+
+ private static SpvInstruction DeclareMemory(CodeGenContext context, StorageClass storage, int size)
+ {
+ var arrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), size));
+ var pointerType = context.TypePointer(storage, arrayType);
+ var variable = context.Variable(pointerType, storage);
+
+ context.AddGlobalVariable(variable);
+
+ return variable;
+ }
+
+ private static void DeclareSupportBuffer(CodeGenContext context)
+ {
+ if (!context.Config.Stage.SupportsRenderScale() && !(context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable()))
+ {
+ return;
+ }
+
+ var isBgraArrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), SupportBuffer.FragmentIsBgraCount));
+ var viewportInverseVectorType = context.TypeVector(context.TypeFP32(), 4);
+ var renderScaleArrayType = context.TypeArray(context.TypeFP32(), context.Constant(context.TypeU32(), SupportBuffer.RenderScaleMaxCount));
+
+ context.Decorate(isBgraArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize);
+ context.Decorate(renderScaleArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize);
+
+ var supportBufferStructType = context.TypeStruct(false, context.TypeU32(), isBgraArrayType, viewportInverseVectorType, context.TypeS32(), renderScaleArrayType);
+
+ context.MemberDecorate(supportBufferStructType, 0, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentAlphaTestOffset);
+ context.MemberDecorate(supportBufferStructType, 1, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentIsBgraOffset);
+ context.MemberDecorate(supportBufferStructType, 2, Decoration.Offset, (LiteralInteger)SupportBuffer.ViewportInverseOffset);
+ context.MemberDecorate(supportBufferStructType, 3, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentRenderScaleCountOffset);
+ context.MemberDecorate(supportBufferStructType, 4, Decoration.Offset, (LiteralInteger)SupportBuffer.GraphicsRenderScaleOffset);
+ context.Decorate(supportBufferStructType, Decoration.Block);
+
+ var supportBufferPointerType = context.TypePointer(StorageClass.Uniform, supportBufferStructType);
+ var supportBufferVariable = context.Variable(supportBufferPointerType, StorageClass.Uniform);
+
+ context.Decorate(supportBufferVariable, Decoration.DescriptorSet, (LiteralInteger)0);
+ context.Decorate(supportBufferVariable, Decoration.Binding, (LiteralInteger)0);
+
+ context.AddGlobalVariable(supportBufferVariable);
+
+ context.SupportBuffer = supportBufferVariable;
+ }
+
+ private static void DeclareUniformBuffers(CodeGenContext context, BufferDescriptor[] descriptors)
+ {
+ if (descriptors.Length == 0)
+ {
+ return;
+ }
+
+ uint ubSize = Constants.ConstantBufferSize / 16;
+
+ var ubArrayType = context.TypeArray(context.TypeVector(context.TypeFP32(), 4), context.Constant(context.TypeU32(), ubSize), true);
+ context.Decorate(ubArrayType, Decoration.ArrayStride, (LiteralInteger)16);
+ var ubStructType = context.TypeStruct(true, ubArrayType);
+ context.Decorate(ubStructType, Decoration.Block);
+ context.MemberDecorate(ubStructType, 0, Decoration.Offset, (LiteralInteger)0);
+
+ if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing))
+ {
+ int count = descriptors.Max(x => x.Slot) + 1;
+
+ var ubStructArrayType = context.TypeArray(ubStructType, context.Constant(context.TypeU32(), count));
+ var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructArrayType);
+ var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform);
+
+ context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_u");
+ context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0);
+ context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstConstantBufferBinding);
+ context.AddGlobalVariable(ubVariable);
+
+ context.UniformBuffersArray = ubVariable;
+ }
+ else
+ {
+ var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructType);
+
+ foreach (var descriptor in descriptors)
+ {
+ var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform);
+
+ context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_c{descriptor.Slot}");
+ context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0);
+ context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding);
+ context.AddGlobalVariable(ubVariable);
+ context.UniformBuffers.Add(descriptor.Slot, ubVariable);
+ }
+ }
+ }
+
+ private static void DeclareStorageBuffers(CodeGenContext context, BufferDescriptor[] descriptors)
+ {
+ if (descriptors.Length == 0)
+ {
+ return;
+ }
+
+ int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 1 : 0;
+ int count = descriptors.Max(x => x.Slot) + 1;
+
+ var sbArrayType = context.TypeRuntimeArray(context.TypeU32());
+ context.Decorate(sbArrayType, Decoration.ArrayStride, (LiteralInteger)4);
+ var sbStructType = context.TypeStruct(true, sbArrayType);
+ context.Decorate(sbStructType, Decoration.BufferBlock);
+ context.MemberDecorate(sbStructType, 0, Decoration.Offset, (LiteralInteger)0);
+ var sbStructArrayType = context.TypeArray(sbStructType, context.Constant(context.TypeU32(), count));
+ var sbPointerType = context.TypePointer(StorageClass.Uniform, sbStructArrayType);
+ var sbVariable = context.Variable(sbPointerType, StorageClass.Uniform);
+
+ context.Name(sbVariable, $"{GetStagePrefix(context.Config.Stage)}_s");
+ context.Decorate(sbVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex);
+ context.Decorate(sbVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstStorageBufferBinding);
+ context.AddGlobalVariable(sbVariable);
+
+ context.StorageBuffersArray = sbVariable;
+ }
+
+ private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors)
+ {
+ foreach (var descriptor in descriptors)
+ {
+ var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format);
+
+ if (context.Samplers.ContainsKey(meta))
+ {
+ continue;
+ }
+
+ int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 2 : 0;
+
+ var dim = (descriptor.Type & SamplerType.Mask) switch
+ {
+ SamplerType.Texture1D => Dim.Dim1D,
+ SamplerType.Texture2D => Dim.Dim2D,
+ SamplerType.Texture3D => Dim.Dim3D,
+ SamplerType.TextureCube => Dim.Cube,
+ SamplerType.TextureBuffer => Dim.Buffer,
+ _ => throw new InvalidOperationException($"Invalid sampler type \"{descriptor.Type & SamplerType.Mask}\".")
+ };
+
+ var imageType = context.TypeImage(
+ context.TypeFP32(),
+ dim,
+ descriptor.Type.HasFlag(SamplerType.Shadow),
+ descriptor.Type.HasFlag(SamplerType.Array),
+ descriptor.Type.HasFlag(SamplerType.Multisample),
+ 1,
+ ImageFormat.Unknown);
+
+ var nameSuffix = meta.CbufSlot < 0 ? $"_tcb_{meta.Handle:X}" : $"_cb{meta.CbufSlot}_{meta.Handle:X}";
+
+ var sampledImageType = context.TypeSampledImage(imageType);
+ var sampledImagePointerType = context.TypePointer(StorageClass.UniformConstant, sampledImageType);
+ var sampledImageVariable = context.Variable(sampledImagePointerType, StorageClass.UniformConstant);
+
+ context.Samplers.Add(meta, (imageType, sampledImageType, sampledImageVariable));
+ context.SamplersTypes.Add(meta, descriptor.Type);
+
+ context.Name(sampledImageVariable, $"{GetStagePrefix(context.Config.Stage)}_tex{nameSuffix}");
+ context.Decorate(sampledImageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex);
+ context.Decorate(sampledImageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding);
+ context.AddGlobalVariable(sampledImageVariable);
+ }
+ }
+
+ private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors)
+ {
+ foreach (var descriptor in descriptors)
+ {
+ var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format);
+
+ if (context.Images.ContainsKey(meta))
+ {
+ continue;
+ }
+
+ int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 3 : 0;
+
+ var dim = GetDim(descriptor.Type);
+
+ var imageType = context.TypeImage(
+ context.GetType(meta.Format.GetComponentType()),
+ dim,
+ descriptor.Type.HasFlag(SamplerType.Shadow),
+ descriptor.Type.HasFlag(SamplerType.Array),
+ descriptor.Type.HasFlag(SamplerType.Multisample),
+ AccessQualifier.ReadWrite,
+ GetImageFormat(meta.Format));
+
+ var nameSuffix = meta.CbufSlot < 0 ?
+ $"_tcb_{meta.Handle:X}_{meta.Format.ToGlslFormat()}" :
+ $"_cb{meta.CbufSlot}_{meta.Handle:X}_{meta.Format.ToGlslFormat()}";
+
+ var imagePointerType = context.TypePointer(StorageClass.UniformConstant, imageType);
+ var imageVariable = context.Variable(imagePointerType, StorageClass.UniformConstant);
+
+ context.Images.Add(meta, (imageType, imageVariable));
+
+ context.Name(imageVariable, $"{GetStagePrefix(context.Config.Stage)}_img{nameSuffix}");
+ context.Decorate(imageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex);
+ context.Decorate(imageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding);
+
+ if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent))
+ {
+ context.Decorate(imageVariable, Decoration.Coherent);
+ }
+
+ context.AddGlobalVariable(imageVariable);
+ }
+ }
+
+ private static Dim GetDim(SamplerType type)
+ {
+ return (type & SamplerType.Mask) switch
+ {
+ SamplerType.Texture1D => Dim.Dim1D,
+ SamplerType.Texture2D => Dim.Dim2D,
+ SamplerType.Texture3D => Dim.Dim3D,
+ SamplerType.TextureCube => Dim.Cube,
+ SamplerType.TextureBuffer => Dim.Buffer,
+ _ => throw new ArgumentException($"Invalid sampler type \"{type & SamplerType.Mask}\".")
+ };
+ }
+
+ private static ImageFormat GetImageFormat(TextureFormat format)
+ {
+ return format switch
+ {
+ TextureFormat.Unknown => ImageFormat.Unknown,
+ TextureFormat.R8Unorm => ImageFormat.R8,
+ TextureFormat.R8Snorm => ImageFormat.R8Snorm,
+ TextureFormat.R8Uint => ImageFormat.R8ui,
+ TextureFormat.R8Sint => ImageFormat.R8i,
+ TextureFormat.R16Float => ImageFormat.R16f,
+ TextureFormat.R16Unorm => ImageFormat.R16,
+ TextureFormat.R16Snorm => ImageFormat.R16Snorm,
+ TextureFormat.R16Uint => ImageFormat.R16ui,
+ TextureFormat.R16Sint => ImageFormat.R16i,
+ TextureFormat.R32Float => ImageFormat.R32f,
+ TextureFormat.R32Uint => ImageFormat.R32ui,
+ TextureFormat.R32Sint => ImageFormat.R32i,
+ TextureFormat.R8G8Unorm => ImageFormat.Rg8,
+ TextureFormat.R8G8Snorm => ImageFormat.Rg8Snorm,
+ TextureFormat.R8G8Uint => ImageFormat.Rg8ui,
+ TextureFormat.R8G8Sint => ImageFormat.Rg8i,
+ TextureFormat.R16G16Float => ImageFormat.Rg16f,
+ TextureFormat.R16G16Unorm => ImageFormat.Rg16,
+ TextureFormat.R16G16Snorm => ImageFormat.Rg16Snorm,
+ TextureFormat.R16G16Uint => ImageFormat.Rg16ui,
+ TextureFormat.R16G16Sint => ImageFormat.Rg16i,
+ TextureFormat.R32G32Float => ImageFormat.Rg32f,
+ TextureFormat.R32G32Uint => ImageFormat.Rg32ui,
+ TextureFormat.R32G32Sint => ImageFormat.Rg32i,
+ TextureFormat.R8G8B8A8Unorm => ImageFormat.Rgba8,
+ TextureFormat.R8G8B8A8Snorm => ImageFormat.Rgba8Snorm,
+ TextureFormat.R8G8B8A8Uint => ImageFormat.Rgba8ui,
+ TextureFormat.R8G8B8A8Sint => ImageFormat.Rgba8i,
+ TextureFormat.R16G16B16A16Float => ImageFormat.Rgba16f,
+ TextureFormat.R16G16B16A16Unorm => ImageFormat.Rgba16,
+ TextureFormat.R16G16B16A16Snorm => ImageFormat.Rgba16Snorm,
+ TextureFormat.R16G16B16A16Uint => ImageFormat.Rgba16ui,
+ TextureFormat.R16G16B16A16Sint => ImageFormat.Rgba16i,
+ TextureFormat.R32G32B32A32Float => ImageFormat.Rgba32f,
+ TextureFormat.R32G32B32A32Uint => ImageFormat.Rgba32ui,
+ TextureFormat.R32G32B32A32Sint => ImageFormat.Rgba32i,
+ TextureFormat.R10G10B10A2Unorm => ImageFormat.Rgb10A2,
+ TextureFormat.R10G10B10A2Uint => ImageFormat.Rgb10a2ui,
+ TextureFormat.R11G11B10Float => ImageFormat.R11fG11fB10f,
+ _ => throw new ArgumentException($"Invalid texture format \"{format}\".")
+ };
+ }
+
+ private static void DeclareInputsAndOutputs(CodeGenContext context, StructuredProgramInfo info)
+ {
+ foreach (var ioDefinition in info.IoDefinitions)
+ {
+ var ioVariable = ioDefinition.IoVariable;
+
+ // Those are actually from constant buffer, rather than being actual inputs or outputs,
+ // so we must ignore them here as they are declared as part of the support buffer.
+ // TODO: Delete this after we represent this properly on the IR (as a constant buffer rather than "input").
+ if (ioVariable == IoVariable.FragmentOutputIsBgra ||
+ ioVariable == IoVariable.SupportBlockRenderScale ||
+ ioVariable == IoVariable.SupportBlockViewInverse)
+ {
+ continue;
+ }
+
+ bool isOutput = ioDefinition.StorageKind.IsOutput();
+ bool isPerPatch = ioDefinition.StorageKind.IsPerPatch();
+
+ PixelImap iq = PixelImap.Unused;
+
+ if (context.Config.Stage == ShaderStage.Fragment)
+ {
+ if (ioVariable == IoVariable.UserDefined)
+ {
+ iq = context.Config.ImapTypes[ioDefinition.Location].GetFirstUsedType();
+ }
+ else
+ {
+ (_, AggregateType varType) = IoMap.GetSpirvBuiltIn(ioVariable);
+ AggregateType elemType = varType & AggregateType.ElementTypeMask;
+
+ if (elemType == AggregateType.S32 || elemType == AggregateType.U32)
+ {
+ iq = PixelImap.Constant;
+ }
+ }
+ }
+
+ DeclareInputOrOutput(context, ioDefinition, isOutput, isPerPatch, iq);
+ }
+ }
+
+ private static void DeclareInputOrOutput(CodeGenContext context, IoDefinition ioDefinition, bool isOutput, bool isPerPatch, PixelImap iq = PixelImap.Unused)
+ {
+ IoVariable ioVariable = ioDefinition.IoVariable;
+ var storageClass = isOutput ? StorageClass.Output : StorageClass.Input;
+
+ bool isBuiltIn;
+ BuiltIn builtIn = default;
+ AggregateType varType;
+
+ if (ioVariable == IoVariable.UserDefined)
+ {
+ varType = context.Config.GetUserDefinedType(ioDefinition.Location, isOutput);
+ isBuiltIn = false;
+ }
+ else if (ioVariable == IoVariable.FragmentOutputColor)
+ {
+ varType = context.Config.GetFragmentOutputColorType(ioDefinition.Location);
+ isBuiltIn = false;
+ }
+ else
+ {
+ (builtIn, varType) = IoMap.GetSpirvBuiltIn(ioVariable);
+ isBuiltIn = true;
+
+ if (varType == AggregateType.Invalid)
+ {
+ throw new InvalidOperationException($"Unknown variable {ioVariable}.");
+ }
+ }
+
+ bool hasComponent = context.Config.HasPerLocationInputOrOutputComponent(ioVariable, ioDefinition.Location, ioDefinition.Component, isOutput);
+
+ if (hasComponent)
+ {
+ varType &= AggregateType.ElementTypeMask;
+ }
+ else if (ioVariable == IoVariable.UserDefined && context.Config.HasTransformFeedbackOutputs(isOutput))
+ {
+ varType &= AggregateType.ElementTypeMask;
+ varType |= context.Config.GetTransformFeedbackOutputComponents(ioDefinition.Location, ioDefinition.Component) switch
+ {
+ 2 => AggregateType.Vector2,
+ 3 => AggregateType.Vector3,
+ 4 => AggregateType.Vector4,
+ _ => AggregateType.Invalid
+ };
+ }
+
+ var spvType = context.GetType(varType, IoMap.GetSpirvBuiltInArrayLength(ioVariable));
+ bool builtInPassthrough = false;
+
+ if (!isPerPatch && IoMap.IsPerVertex(ioVariable, context.Config.Stage, isOutput))
+ {
+ int arraySize = context.Config.Stage == ShaderStage.Geometry ? context.InputVertices : 32;
+ spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), (LiteralInteger)arraySize));
+
+ if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
+ {
+ builtInPassthrough = true;
+ }
+ }
+
+ if (context.Config.Stage == ShaderStage.TessellationControl && isOutput && !isPerPatch)
+ {
+ spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), context.Config.ThreadsPerInputPrimitive));
+ }
+
+ var spvPointerType = context.TypePointer(storageClass, spvType);
+ var spvVar = context.Variable(spvPointerType, storageClass);
+
+ if (builtInPassthrough)
+ {
+ context.Decorate(spvVar, Decoration.PassthroughNV);
+ }
+
+ if (isBuiltIn)
+ {
+ if (isPerPatch)
+ {
+ context.Decorate(spvVar, Decoration.Patch);
+ }
+
+ if (context.Config.GpuAccessor.QueryHostReducedPrecision() && ioVariable == IoVariable.Position)
+ {
+ context.Decorate(spvVar, Decoration.Invariant);
+ }
+
+ context.Decorate(spvVar, Decoration.BuiltIn, (LiteralInteger)builtIn);
+ }
+ else if (isPerPatch)
+ {
+ context.Decorate(spvVar, Decoration.Patch);
+
+ if (ioVariable == IoVariable.UserDefined)
+ {
+ int location = context.Config.GetPerPatchAttributeLocation(ioDefinition.Location);
+
+ context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location);
+ }
+ }
+ else if (ioVariable == IoVariable.UserDefined)
+ {
+ context.Decorate(spvVar, Decoration.Location, (LiteralInteger)ioDefinition.Location);
+
+ if (hasComponent)
+ {
+ context.Decorate(spvVar, Decoration.Component, (LiteralInteger)ioDefinition.Component);
+ }
+
+ if (!isOutput &&
+ !isPerPatch &&
+ (context.Config.PassthroughAttributes & (1 << ioDefinition.Location)) != 0 &&
+ context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
+ {
+ context.Decorate(spvVar, Decoration.PassthroughNV);
+ }
+ }
+ else if (ioVariable == IoVariable.FragmentOutputColor)
+ {
+ int location = ioDefinition.Location;
+
+ if (context.Config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryDualSourceBlendEnable())
+ {
+ int firstLocation = BitOperations.TrailingZeroCount(context.Config.UsedOutputAttributes);
+ int index = location - firstLocation;
+ int mask = 3 << firstLocation;
+
+ if ((uint)index < 2 && (context.Config.UsedOutputAttributes & mask) == mask)
+ {
+ context.Decorate(spvVar, Decoration.Location, (LiteralInteger)firstLocation);
+ context.Decorate(spvVar, Decoration.Index, (LiteralInteger)index);
+ }
+ else
+ {
+ context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location);
+ }
+ }
+ else
+ {
+ context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location);
+ }
+ }
+
+ if (!isOutput)
+ {
+ switch (iq)
+ {
+ case PixelImap.Constant:
+ context.Decorate(spvVar, Decoration.Flat);
+ break;
+ case PixelImap.ScreenLinear:
+ context.Decorate(spvVar, Decoration.NoPerspective);
+ break;
+ }
+ }
+ else if (context.Config.TryGetTransformFeedbackOutput(
+ ioVariable,
+ ioDefinition.Location,
+ ioDefinition.Component,
+ out var transformFeedbackOutput))
+ {
+ context.Decorate(spvVar, Decoration.XfbBuffer, (LiteralInteger)transformFeedbackOutput.Buffer);
+ context.Decorate(spvVar, Decoration.XfbStride, (LiteralInteger)transformFeedbackOutput.Stride);
+ context.Decorate(spvVar, Decoration.Offset, (LiteralInteger)transformFeedbackOutput.Offset);
+ }
+
+ context.AddGlobalVariable(spvVar);
+
+ var dict = isPerPatch
+ ? (isOutput ? context.OutputsPerPatch : context.InputsPerPatch)
+ : (isOutput ? context.Outputs : context.Inputs);
+ dict.Add(ioDefinition, spvVar);
+ }
+
+ private static string GetStagePrefix(ShaderStage stage)
+ {
+ return StagePrefixes[(int)stage];
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs
new file mode 100644
index 00000000..72541774
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs
@@ -0,0 +1,22 @@
+using System;
+using static Spv.Specification;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ static class EnumConversion
+ {
+ public static ExecutionModel Convert(this ShaderStage stage)
+ {
+ return stage switch
+ {
+ ShaderStage.Compute => ExecutionModel.GLCompute,
+ ShaderStage.Vertex => ExecutionModel.Vertex,
+ ShaderStage.TessellationControl => ExecutionModel.TessellationControl,
+ ShaderStage.TessellationEvaluation => ExecutionModel.TessellationEvaluation,
+ ShaderStage.Geometry => ExecutionModel.Geometry,
+ ShaderStage.Fragment => ExecutionModel.Fragment,
+ _ => throw new ArgumentException($"Invalid shader stage \"{stage}\".")
+ };
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
new file mode 100644
index 00000000..b6ffdb7a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs
@@ -0,0 +1,2480 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Numerics;
+using static Spv.Specification;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ using SpvInstruction = Spv.Generator.Instruction;
+ using SpvLiteralInteger = Spv.Generator.LiteralInteger;
+
+ static class Instructions
+ {
+ private const MemorySemanticsMask DefaultMemorySemantics =
+ MemorySemanticsMask.ImageMemory |
+ MemorySemanticsMask.AtomicCounterMemory |
+ MemorySemanticsMask.WorkgroupMemory |
+ MemorySemanticsMask.UniformMemory |
+ MemorySemanticsMask.AcquireRelease;
+
+ private static readonly Func<CodeGenContext, AstOperation, OperationResult>[] InstTable;
+
+ static Instructions()
+ {
+ InstTable = new Func<CodeGenContext, AstOperation, OperationResult>[(int)Instruction.Count];
+
+ Add(Instruction.Absolute, GenerateAbsolute);
+ Add(Instruction.Add, GenerateAdd);
+ Add(Instruction.AtomicAdd, GenerateAtomicAdd);
+ Add(Instruction.AtomicAnd, GenerateAtomicAnd);
+ Add(Instruction.AtomicCompareAndSwap, GenerateAtomicCompareAndSwap);
+ Add(Instruction.AtomicMinS32, GenerateAtomicMinS32);
+ Add(Instruction.AtomicMinU32, GenerateAtomicMinU32);
+ Add(Instruction.AtomicMaxS32, GenerateAtomicMaxS32);
+ Add(Instruction.AtomicMaxU32, GenerateAtomicMaxU32);
+ Add(Instruction.AtomicOr, GenerateAtomicOr);
+ Add(Instruction.AtomicSwap, GenerateAtomicSwap);
+ Add(Instruction.AtomicXor, GenerateAtomicXor);
+ Add(Instruction.Ballot, GenerateBallot);
+ Add(Instruction.Barrier, GenerateBarrier);
+ Add(Instruction.BitCount, GenerateBitCount);
+ Add(Instruction.BitfieldExtractS32, GenerateBitfieldExtractS32);
+ Add(Instruction.BitfieldExtractU32, GenerateBitfieldExtractU32);
+ Add(Instruction.BitfieldInsert, GenerateBitfieldInsert);
+ Add(Instruction.BitfieldReverse, GenerateBitfieldReverse);
+ Add(Instruction.BitwiseAnd, GenerateBitwiseAnd);
+ Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr);
+ Add(Instruction.BitwiseNot, GenerateBitwiseNot);
+ Add(Instruction.BitwiseOr, GenerateBitwiseOr);
+ Add(Instruction.Call, GenerateCall);
+ Add(Instruction.Ceiling, GenerateCeiling);
+ Add(Instruction.Clamp, GenerateClamp);
+ Add(Instruction.ClampU32, GenerateClampU32);
+ Add(Instruction.Comment, GenerateComment);
+ Add(Instruction.CompareEqual, GenerateCompareEqual);
+ Add(Instruction.CompareGreater, GenerateCompareGreater);
+ Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual);
+ Add(Instruction.CompareGreaterOrEqualU32, GenerateCompareGreaterOrEqualU32);
+ Add(Instruction.CompareGreaterU32, GenerateCompareGreaterU32);
+ Add(Instruction.CompareLess, GenerateCompareLess);
+ Add(Instruction.CompareLessOrEqual, GenerateCompareLessOrEqual);
+ Add(Instruction.CompareLessOrEqualU32, GenerateCompareLessOrEqualU32);
+ Add(Instruction.CompareLessU32, GenerateCompareLessU32);
+ Add(Instruction.CompareNotEqual, GenerateCompareNotEqual);
+ Add(Instruction.ConditionalSelect, GenerateConditionalSelect);
+ Add(Instruction.ConvertFP32ToFP64, GenerateConvertFP32ToFP64);
+ Add(Instruction.ConvertFP32ToS32, GenerateConvertFP32ToS32);
+ Add(Instruction.ConvertFP32ToU32, GenerateConvertFP32ToU32);
+ Add(Instruction.ConvertFP64ToFP32, GenerateConvertFP64ToFP32);
+ Add(Instruction.ConvertFP64ToS32, GenerateConvertFP64ToS32);
+ Add(Instruction.ConvertFP64ToU32, GenerateConvertFP64ToU32);
+ Add(Instruction.ConvertS32ToFP32, GenerateConvertS32ToFP32);
+ Add(Instruction.ConvertS32ToFP64, GenerateConvertS32ToFP64);
+ Add(Instruction.ConvertU32ToFP32, GenerateConvertU32ToFP32);
+ Add(Instruction.ConvertU32ToFP64, GenerateConvertU32ToFP64);
+ Add(Instruction.Cosine, GenerateCosine);
+ Add(Instruction.Ddx, GenerateDdx);
+ Add(Instruction.Ddy, GenerateDdy);
+ Add(Instruction.Discard, GenerateDiscard);
+ Add(Instruction.Divide, GenerateDivide);
+ Add(Instruction.EmitVertex, GenerateEmitVertex);
+ Add(Instruction.EndPrimitive, GenerateEndPrimitive);
+ Add(Instruction.ExponentB2, GenerateExponentB2);
+ Add(Instruction.FSIBegin, GenerateFSIBegin);
+ Add(Instruction.FSIEnd, GenerateFSIEnd);
+ Add(Instruction.FindLSB, GenerateFindLSB);
+ Add(Instruction.FindMSBS32, GenerateFindMSBS32);
+ Add(Instruction.FindMSBU32, GenerateFindMSBU32);
+ Add(Instruction.Floor, GenerateFloor);
+ Add(Instruction.FusedMultiplyAdd, GenerateFusedMultiplyAdd);
+ Add(Instruction.GroupMemoryBarrier, GenerateGroupMemoryBarrier);
+ Add(Instruction.ImageAtomic, GenerateImageAtomic);
+ Add(Instruction.ImageLoad, GenerateImageLoad);
+ Add(Instruction.ImageStore, GenerateImageStore);
+ Add(Instruction.IsNan, GenerateIsNan);
+ Add(Instruction.Load, GenerateLoad);
+ Add(Instruction.LoadConstant, GenerateLoadConstant);
+ Add(Instruction.LoadLocal, GenerateLoadLocal);
+ Add(Instruction.LoadShared, GenerateLoadShared);
+ Add(Instruction.LoadStorage, GenerateLoadStorage);
+ Add(Instruction.Lod, GenerateLod);
+ Add(Instruction.LogarithmB2, GenerateLogarithmB2);
+ Add(Instruction.LogicalAnd, GenerateLogicalAnd);
+ Add(Instruction.LogicalExclusiveOr, GenerateLogicalExclusiveOr);
+ Add(Instruction.LogicalNot, GenerateLogicalNot);
+ Add(Instruction.LogicalOr, GenerateLogicalOr);
+ Add(Instruction.LoopBreak, GenerateLoopBreak);
+ Add(Instruction.LoopContinue, GenerateLoopContinue);
+ Add(Instruction.Maximum, GenerateMaximum);
+ Add(Instruction.MaximumU32, GenerateMaximumU32);
+ Add(Instruction.MemoryBarrier, GenerateMemoryBarrier);
+ Add(Instruction.Minimum, GenerateMinimum);
+ Add(Instruction.MinimumU32, GenerateMinimumU32);
+ Add(Instruction.Multiply, GenerateMultiply);
+ Add(Instruction.MultiplyHighS32, GenerateMultiplyHighS32);
+ Add(Instruction.MultiplyHighU32, GenerateMultiplyHighU32);
+ Add(Instruction.Negate, GenerateNegate);
+ Add(Instruction.PackDouble2x32, GeneratePackDouble2x32);
+ Add(Instruction.PackHalf2x16, GeneratePackHalf2x16);
+ Add(Instruction.ReciprocalSquareRoot, GenerateReciprocalSquareRoot);
+ Add(Instruction.Return, GenerateReturn);
+ Add(Instruction.Round, GenerateRound);
+ Add(Instruction.ShiftLeft, GenerateShiftLeft);
+ Add(Instruction.ShiftRightS32, GenerateShiftRightS32);
+ Add(Instruction.ShiftRightU32, GenerateShiftRightU32);
+ Add(Instruction.Shuffle, GenerateShuffle);
+ Add(Instruction.ShuffleDown, GenerateShuffleDown);
+ Add(Instruction.ShuffleUp, GenerateShuffleUp);
+ Add(Instruction.ShuffleXor, GenerateShuffleXor);
+ Add(Instruction.Sine, GenerateSine);
+ Add(Instruction.SquareRoot, GenerateSquareRoot);
+ Add(Instruction.Store, GenerateStore);
+ Add(Instruction.StoreLocal, GenerateStoreLocal);
+ Add(Instruction.StoreShared, GenerateStoreShared);
+ Add(Instruction.StoreShared16, GenerateStoreShared16);
+ Add(Instruction.StoreShared8, GenerateStoreShared8);
+ Add(Instruction.StoreStorage, GenerateStoreStorage);
+ Add(Instruction.StoreStorage16, GenerateStoreStorage16);
+ Add(Instruction.StoreStorage8, GenerateStoreStorage8);
+ Add(Instruction.Subtract, GenerateSubtract);
+ Add(Instruction.SwizzleAdd, GenerateSwizzleAdd);
+ Add(Instruction.TextureSample, GenerateTextureSample);
+ Add(Instruction.TextureSize, GenerateTextureSize);
+ Add(Instruction.Truncate, GenerateTruncate);
+ Add(Instruction.UnpackDouble2x32, GenerateUnpackDouble2x32);
+ Add(Instruction.UnpackHalf2x16, GenerateUnpackHalf2x16);
+ Add(Instruction.VectorExtract, GenerateVectorExtract);
+ Add(Instruction.VoteAll, GenerateVoteAll);
+ Add(Instruction.VoteAllEqual, GenerateVoteAllEqual);
+ Add(Instruction.VoteAny, GenerateVoteAny);
+ }
+
+ private static void Add(Instruction inst, Func<CodeGenContext, AstOperation, OperationResult> handler)
+ {
+ InstTable[(int)(inst & Instruction.Mask)] = handler;
+ }
+
+ public static OperationResult Generate(CodeGenContext context, AstOperation operation)
+ {
+ var handler = InstTable[(int)(operation.Inst & Instruction.Mask)];
+ if (handler != null)
+ {
+ return handler(context, operation);
+ }
+ else
+ {
+ throw new NotImplementedException(operation.Inst.ToString());
+ }
+ }
+
+ private static OperationResult GenerateAbsolute(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslFAbs, context.Delegates.GlslSAbs);
+ }
+
+ private static OperationResult GenerateAdd(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinary(context, operation, context.Delegates.FAdd, context.Delegates.IAdd);
+ }
+
+ private static OperationResult GenerateAtomicAdd(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicIAdd);
+ }
+
+ private static OperationResult GenerateAtomicAnd(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicAnd);
+ }
+
+ private static OperationResult GenerateAtomicCompareAndSwap(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryCas(context, operation);
+ }
+
+ private static OperationResult GenerateAtomicMinS32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicSMin);
+ }
+
+ private static OperationResult GenerateAtomicMinU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicUMin);
+ }
+
+ private static OperationResult GenerateAtomicMaxS32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicSMax);
+ }
+
+ private static OperationResult GenerateAtomicMaxU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicUMax);
+ }
+
+ private static OperationResult GenerateAtomicOr(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicOr);
+ }
+
+ private static OperationResult GenerateAtomicSwap(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicExchange);
+ }
+
+ private static OperationResult GenerateAtomicXor(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicXor);
+ }
+
+ private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ var uvec4Type = context.TypeVector(context.TypeU32(), 4);
+ var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
+
+ var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source));
+ var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)0);
+
+ return new OperationResult(AggregateType.U32, mask);
+ }
+
+ private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation)
+ {
+ context.ControlBarrier(
+ context.Constant(context.TypeU32(), Scope.Workgroup),
+ context.Constant(context.TypeU32(), Scope.Workgroup),
+ context.Constant(context.TypeU32(), MemorySemanticsMask.WorkgroupMemory | MemorySemanticsMask.AcquireRelease));
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateBitCount(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnaryS32(context, operation, context.Delegates.BitCount);
+ }
+
+ private static OperationResult GenerateBitfieldExtractS32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBitfieldExtractS32(context, operation, context.Delegates.BitFieldSExtract);
+ }
+
+ private static OperationResult GenerateBitfieldExtractU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateTernaryU32(context, operation, context.Delegates.BitFieldUExtract);
+ }
+
+ private static OperationResult GenerateBitfieldInsert(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBitfieldInsert(context, operation, context.Delegates.BitFieldInsert);
+ }
+
+ private static OperationResult GenerateBitfieldReverse(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnaryS32(context, operation, context.Delegates.BitReverse);
+ }
+
+ private static OperationResult GenerateBitwiseAnd(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryS32(context, operation, context.Delegates.BitwiseAnd);
+ }
+
+ private static OperationResult GenerateBitwiseExclusiveOr(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryS32(context, operation, context.Delegates.BitwiseXor);
+ }
+
+ private static OperationResult GenerateBitwiseNot(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnaryS32(context, operation, context.Delegates.Not);
+ }
+
+ private static OperationResult GenerateBitwiseOr(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryS32(context, operation, context.Delegates.BitwiseOr);
+ }
+
+ private static OperationResult GenerateCall(CodeGenContext context, AstOperation operation)
+ {
+ AstOperand funcId = (AstOperand)operation.GetSource(0);
+
+ Debug.Assert(funcId.Type == OperandType.Constant);
+
+ (var function, var spvFunc) = context.GetFunction(funcId.Value);
+
+ var args = new SpvInstruction[operation.SourcesCount - 1];
+ var spvLocals = context.GetLocalForArgsPointers(funcId.Value);
+
+ for (int i = 0; i < args.Length; i++)
+ {
+ var operand = (AstOperand)operation.GetSource(i + 1);
+ if (i >= function.InArguments.Length)
+ {
+ args[i] = context.GetLocalPointer(operand);
+ }
+ else
+ {
+ var type = function.GetArgumentType(i);
+ var value = context.Get(type, operand);
+ var spvLocal = spvLocals[i];
+
+ context.Store(spvLocal, value);
+
+ args[i] = spvLocal;
+ }
+ }
+
+ var retType = function.ReturnType;
+ var result = context.FunctionCall(context.GetType(retType), spvFunc, args);
+ return new OperationResult(retType, result);
+ }
+
+ private static OperationResult GenerateCeiling(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslCeil, null);
+ }
+
+ private static OperationResult GenerateClamp(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateTernary(context, operation, context.Delegates.GlslFClamp, context.Delegates.GlslSClamp);
+ }
+
+ private static OperationResult GenerateClampU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateTernaryU32(context, operation, context.Delegates.GlslUClamp);
+ }
+
+ private static OperationResult GenerateComment(CodeGenContext context, AstOperation operation)
+ {
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateCompareEqual(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompare(context, operation, context.Delegates.FOrdEqual, context.Delegates.IEqual);
+ }
+
+ private static OperationResult GenerateCompareGreater(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompare(context, operation, context.Delegates.FOrdGreaterThan, context.Delegates.SGreaterThan);
+ }
+
+ private static OperationResult GenerateCompareGreaterOrEqual(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompare(context, operation, context.Delegates.FOrdGreaterThanEqual, context.Delegates.SGreaterThanEqual);
+ }
+
+ private static OperationResult GenerateCompareGreaterOrEqualU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompareU32(context, operation, context.Delegates.UGreaterThanEqual);
+ }
+
+ private static OperationResult GenerateCompareGreaterU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompareU32(context, operation, context.Delegates.UGreaterThan);
+ }
+
+ private static OperationResult GenerateCompareLess(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompare(context, operation, context.Delegates.FOrdLessThan, context.Delegates.SLessThan);
+ }
+
+ private static OperationResult GenerateCompareLessOrEqual(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompare(context, operation, context.Delegates.FOrdLessThanEqual, context.Delegates.SLessThanEqual);
+ }
+
+ private static OperationResult GenerateCompareLessOrEqualU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompareU32(context, operation, context.Delegates.ULessThanEqual);
+ }
+
+ private static OperationResult GenerateCompareLessU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompareU32(context, operation, context.Delegates.ULessThan);
+ }
+
+ private static OperationResult GenerateCompareNotEqual(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateCompare(context, operation, context.Delegates.FOrdNotEqual, context.Delegates.INotEqual);
+ }
+
+ private static OperationResult GenerateConditionalSelect(CodeGenContext context, AstOperation operation)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+ var src3 = operation.GetSource(2);
+
+ var cond = context.Get(AggregateType.Bool, src1);
+
+ if (operation.Inst.HasFlag(Instruction.FP64))
+ {
+ return new OperationResult(AggregateType.FP64, context.Select(context.TypeFP64(), cond, context.GetFP64(src2), context.GetFP64(src3)));
+ }
+ else if (operation.Inst.HasFlag(Instruction.FP32))
+ {
+ return new OperationResult(AggregateType.FP32, context.Select(context.TypeFP32(), cond, context.GetFP32(src2), context.GetFP32(src3)));
+ }
+ else
+ {
+ return new OperationResult(AggregateType.S32, context.Select(context.TypeS32(), cond, context.GetS32(src2), context.GetS32(src3)));
+ }
+ }
+
+ private static OperationResult GenerateConvertFP32ToFP64(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.FP64, context.FConvert(context.TypeFP64(), context.GetFP32(source)));
+ }
+
+ private static OperationResult GenerateConvertFP32ToS32(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.S32, context.ConvertFToS(context.TypeS32(), context.GetFP32(source)));
+ }
+
+ private static OperationResult GenerateConvertFP32ToU32(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.U32, context.ConvertFToU(context.TypeU32(), context.GetFP32(source)));
+ }
+
+ private static OperationResult GenerateConvertFP64ToFP32(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.FP32, context.FConvert(context.TypeFP32(), context.GetFP64(source)));
+ }
+
+ private static OperationResult GenerateConvertFP64ToS32(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.S32, context.ConvertFToS(context.TypeS32(), context.GetFP64(source)));
+ }
+
+ private static OperationResult GenerateConvertFP64ToU32(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.U32, context.ConvertFToU(context.TypeU32(), context.GetFP64(source)));
+ }
+
+ private static OperationResult GenerateConvertS32ToFP32(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.FP32, context.ConvertSToF(context.TypeFP32(), context.GetS32(source)));
+ }
+
+ private static OperationResult GenerateConvertS32ToFP64(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.FP64, context.ConvertSToF(context.TypeFP64(), context.GetS32(source)));
+ }
+
+ private static OperationResult GenerateConvertU32ToFP32(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.FP32, context.ConvertUToF(context.TypeFP32(), context.GetU32(source)));
+ }
+
+ private static OperationResult GenerateConvertU32ToFP64(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ return new OperationResult(AggregateType.FP64, context.ConvertUToF(context.TypeFP64(), context.GetU32(source)));
+ }
+
+ private static OperationResult GenerateCosine(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslCos, null);
+ }
+
+ private static OperationResult GenerateDdx(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnaryFP32(context, operation, context.Delegates.DPdx);
+ }
+
+ private static OperationResult GenerateDdy(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnaryFP32(context, operation, context.Delegates.DPdy);
+ }
+
+ private static OperationResult GenerateDiscard(CodeGenContext context, AstOperation operation)
+ {
+ context.Kill();
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateDivide(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinary(context, operation, context.Delegates.FDiv, context.Delegates.SDiv);
+ }
+
+ private static OperationResult GenerateEmitVertex(CodeGenContext context, AstOperation operation)
+ {
+ context.EmitVertex();
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateEndPrimitive(CodeGenContext context, AstOperation operation)
+ {
+ context.EndPrimitive();
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateExponentB2(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslExp2, null);
+ }
+
+ private static OperationResult GenerateFSIBegin(CodeGenContext context, AstOperation operation)
+ {
+ if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
+ {
+ context.BeginInvocationInterlockEXT();
+ }
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateFSIEnd(CodeGenContext context, AstOperation operation)
+ {
+ if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
+ {
+ context.EndInvocationInterlockEXT();
+ }
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateFindLSB(CodeGenContext context, AstOperation operation)
+ {
+ var source = context.GetU32(operation.GetSource(0));
+ return new OperationResult(AggregateType.U32, context.GlslFindILsb(context.TypeU32(), source));
+ }
+
+ private static OperationResult GenerateFindMSBS32(CodeGenContext context, AstOperation operation)
+ {
+ var source = context.GetS32(operation.GetSource(0));
+ return new OperationResult(AggregateType.U32, context.GlslFindSMsb(context.TypeU32(), source));
+ }
+
+ private static OperationResult GenerateFindMSBU32(CodeGenContext context, AstOperation operation)
+ {
+ var source = context.GetU32(operation.GetSource(0));
+ return new OperationResult(AggregateType.U32, context.GlslFindUMsb(context.TypeU32(), source));
+ }
+
+ private static OperationResult GenerateFloor(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslFloor, null);
+ }
+
+ private static OperationResult GenerateFusedMultiplyAdd(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateTernary(context, operation, context.Delegates.GlslFma, null);
+ }
+
+ private static OperationResult GenerateGroupMemoryBarrier(CodeGenContext context, AstOperation operation)
+ {
+ context.MemoryBarrier(context.Constant(context.TypeU32(), Scope.Workgroup), context.Constant(context.TypeU32(), DefaultMemorySemantics));
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateImageAtomic(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ var componentType = texOp.Format.GetComponentType();
+
+ // TODO: Bindless texture support. For now we just return 0/do nothing.
+ if (isBindless)
+ {
+ return new OperationResult(componentType, componentType switch
+ {
+ AggregateType.S32 => context.Constant(context.TypeS32(), 0),
+ AggregateType.U32 => context.Constant(context.TypeU32(), 0u),
+ _ => context.Constant(context.TypeFP32(), 0f),
+ });
+ }
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ int srcIndex = isBindless ? 1 : 0;
+
+ SpvInstruction Src(AggregateType type)
+ {
+ return context.Get(type, texOp.GetSource(srcIndex++));
+ }
+
+ SpvInstruction index = null;
+
+ if (isIndexed)
+ {
+ index = Src(AggregateType.S32);
+ }
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int pCount = coordsCount + (isArray ? 1 : 0);
+
+ SpvInstruction pCoords;
+
+ if (pCount > 1)
+ {
+ SpvInstruction[] elems = new SpvInstruction[pCount];
+
+ for (int i = 0; i < pCount; i++)
+ {
+ elems[i] = Src(AggregateType.S32);
+ }
+
+ var vectorType = context.TypeVector(context.TypeS32(), pCount);
+ pCoords = context.CompositeConstruct(vectorType, elems);
+ }
+ else
+ {
+ pCoords = Src(AggregateType.S32);
+ }
+
+ SpvInstruction value = Src(componentType);
+
+ (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)];
+
+ var image = context.Load(imageType, imageVariable);
+
+ SpvInstruction resultType = context.GetType(componentType);
+ SpvInstruction imagePointerType = context.TypePointer(StorageClass.Image, resultType);
+
+ var pointer = context.ImageTexelPointer(imagePointerType, imageVariable, pCoords, context.Constant(context.TypeU32(), 0));
+ var one = context.Constant(context.TypeU32(), 1);
+ var zero = context.Constant(context.TypeU32(), 0);
+
+ var result = (texOp.Flags & TextureFlags.AtomicMask) switch
+ {
+ TextureFlags.Add => context.AtomicIAdd(resultType, pointer, one, zero, value),
+ TextureFlags.Minimum => componentType == AggregateType.S32
+ ? context.AtomicSMin(resultType, pointer, one, zero, value)
+ : context.AtomicUMin(resultType, pointer, one, zero, value),
+ TextureFlags.Maximum => componentType == AggregateType.S32
+ ? context.AtomicSMax(resultType, pointer, one, zero, value)
+ : context.AtomicUMax(resultType, pointer, one, zero, value),
+ TextureFlags.Increment => context.AtomicIIncrement(resultType, pointer, one, zero),
+ TextureFlags.Decrement => context.AtomicIDecrement(resultType, pointer, one, zero),
+ TextureFlags.BitwiseAnd => context.AtomicAnd(resultType, pointer, one, zero, value),
+ TextureFlags.BitwiseOr => context.AtomicOr(resultType, pointer, one, zero, value),
+ TextureFlags.BitwiseXor => context.AtomicXor(resultType, pointer, one, zero, value),
+ TextureFlags.Swap => context.AtomicExchange(resultType, pointer, one, zero, value),
+ TextureFlags.CAS => context.AtomicCompareExchange(resultType, pointer, one, zero, zero, Src(componentType), value),
+ _ => context.AtomicIAdd(resultType, pointer, one, zero, value),
+ };
+
+ return new OperationResult(componentType, result);
+ }
+
+ private static OperationResult GenerateImageLoad(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ var componentType = texOp.Format.GetComponentType();
+
+ // TODO: Bindless texture support. For now we just return 0/do nothing.
+ if (isBindless)
+ {
+ return GetZeroOperationResult(context, texOp, componentType, isVector: true);
+ }
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ int srcIndex = isBindless ? 1 : 0;
+
+ SpvInstruction Src(AggregateType type)
+ {
+ return context.Get(type, texOp.GetSource(srcIndex++));
+ }
+
+ SpvInstruction index = null;
+
+ if (isIndexed)
+ {
+ index = Src(AggregateType.S32);
+ }
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int pCount = coordsCount + (isArray ? 1 : 0);
+
+ SpvInstruction pCoords;
+
+ if (pCount > 1)
+ {
+ SpvInstruction[] elems = new SpvInstruction[pCount];
+
+ for (int i = 0; i < pCount; i++)
+ {
+ elems[i] = Src(AggregateType.S32);
+ }
+
+ var vectorType = context.TypeVector(context.TypeS32(), pCount);
+ pCoords = context.CompositeConstruct(vectorType, elems);
+ }
+ else
+ {
+ pCoords = Src(AggregateType.S32);
+ }
+
+ pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords: true, isBindless, isIndexed, isArray, pCount);
+
+ (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)];
+
+ var image = context.Load(imageType, imageVariable);
+ var imageComponentType = context.GetType(componentType);
+ var swizzledResultType = texOp.GetVectorType(componentType);
+
+ var texel = context.ImageRead(context.TypeVector(imageComponentType, 4), image, pCoords, ImageOperandsMask.MaskNone);
+ var result = GetSwizzledResult(context, texel, swizzledResultType, texOp.Index);
+
+ return new OperationResult(componentType, result);
+ }
+
+ private static OperationResult GenerateImageStore(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ // TODO: Bindless texture support. For now we just return 0/do nothing.
+ if (isBindless)
+ {
+ return OperationResult.Invalid;
+ }
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ int srcIndex = isBindless ? 1 : 0;
+
+ SpvInstruction Src(AggregateType type)
+ {
+ return context.Get(type, texOp.GetSource(srcIndex++));
+ }
+
+ SpvInstruction index = null;
+
+ if (isIndexed)
+ {
+ index = Src(AggregateType.S32);
+ }
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int pCount = coordsCount + (isArray ? 1 : 0);
+
+ SpvInstruction pCoords;
+
+ if (pCount > 1)
+ {
+ SpvInstruction[] elems = new SpvInstruction[pCount];
+
+ for (int i = 0; i < pCount; i++)
+ {
+ elems[i] = Src(AggregateType.S32);
+ }
+
+ var vectorType = context.TypeVector(context.TypeS32(), pCount);
+ pCoords = context.CompositeConstruct(vectorType, elems);
+ }
+ else
+ {
+ pCoords = Src(AggregateType.S32);
+ }
+
+ var componentType = texOp.Format.GetComponentType();
+
+ const int ComponentsCount = 4;
+
+ SpvInstruction[] cElems = new SpvInstruction[ComponentsCount];
+
+ for (int i = 0; i < ComponentsCount; i++)
+ {
+ if (srcIndex < texOp.SourcesCount)
+ {
+ cElems[i] = Src(componentType);
+ }
+ else
+ {
+ cElems[i] = componentType switch
+ {
+ AggregateType.S32 => context.Constant(context.TypeS32(), 0),
+ AggregateType.U32 => context.Constant(context.TypeU32(), 0u),
+ _ => context.Constant(context.TypeFP32(), 0f),
+ };
+ }
+ }
+
+ var texel = context.CompositeConstruct(context.TypeVector(context.GetType(componentType), ComponentsCount), cElems);
+
+ (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)];
+
+ var image = context.Load(imageType, imageVariable);
+
+ context.ImageWrite(image, pCoords, texel, ImageOperandsMask.MaskNone);
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateIsNan(CodeGenContext context, AstOperation operation)
+ {
+ var source = operation.GetSource(0);
+
+ SpvInstruction result;
+
+ if (operation.Inst.HasFlag(Instruction.FP64))
+ {
+ result = context.IsNan(context.TypeBool(), context.GetFP64(source));
+ }
+ else
+ {
+ result = context.IsNan(context.TypeBool(), context.GetFP32(source));
+ }
+
+ return new OperationResult(AggregateType.Bool, result);
+ }
+
+ private static OperationResult GenerateLoad(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateLoadOrStore(context, operation, isStore: false);
+ }
+
+ private static OperationResult GenerateLoadConstant(CodeGenContext context, AstOperation operation)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = context.Get(AggregateType.S32, operation.GetSource(1));
+
+ var i1 = context.Constant(context.TypeS32(), 0);
+ var i2 = context.ShiftRightArithmetic(context.TypeS32(), src2, context.Constant(context.TypeS32(), 2));
+ var i3 = context.BitwiseAnd(context.TypeS32(), src2, context.Constant(context.TypeS32(), 3));
+
+ SpvInstruction value = null;
+
+ if (context.Config.GpuAccessor.QueryHostHasVectorIndexingBug())
+ {
+ // Test for each component individually.
+ for (int i = 0; i < 4; i++)
+ {
+ var component = context.Constant(context.TypeS32(), i);
+
+ SpvInstruction elemPointer;
+ if (context.UniformBuffersArray != null)
+ {
+ var ubVariable = context.UniformBuffersArray;
+ var i0 = context.Get(AggregateType.S32, src1);
+
+ elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i0, i1, i2, component);
+ }
+ else
+ {
+ var ubVariable = context.UniformBuffers[((AstOperand)src1).Value];
+
+ elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i1, i2, component);
+ }
+
+ SpvInstruction newValue = context.Load(context.TypeFP32(), elemPointer);
+
+ value = value != null ? context.Select(context.TypeFP32(), context.IEqual(context.TypeBool(), i3, component), newValue, value) : newValue;
+ }
+ }
+ else
+ {
+ SpvInstruction elemPointer;
+
+ if (context.UniformBuffersArray != null)
+ {
+ var ubVariable = context.UniformBuffersArray;
+ var i0 = context.Get(AggregateType.S32, src1);
+
+ elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i0, i1, i2, i3);
+ }
+ else
+ {
+ var ubVariable = context.UniformBuffers[((AstOperand)src1).Value];
+
+ elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i1, i2, i3);
+ }
+
+ value = context.Load(context.TypeFP32(), elemPointer);
+ }
+
+ return new OperationResult(AggregateType.FP32, value);
+ }
+
+ private static OperationResult GenerateLoadLocal(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateLoadLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory);
+ }
+
+ private static OperationResult GenerateLoadShared(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateLoadLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory);
+ }
+
+ private static OperationResult GenerateLoadLocalOrShared(
+ CodeGenContext context,
+ AstOperation operation,
+ StorageClass storageClass,
+ SpvInstruction memory)
+ {
+ var offset = context.Get(AggregateType.S32, operation.GetSource(0));
+
+ var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset);
+ var value = context.Load(context.TypeU32(), elemPointer);
+
+ return new OperationResult(AggregateType.U32, value);
+ }
+
+ private static OperationResult GenerateLoadStorage(CodeGenContext context, AstOperation operation)
+ {
+ var elemPointer = GetStorageElemPointer(context, operation);
+ var value = context.Load(context.TypeU32(), elemPointer);
+
+ return new OperationResult(AggregateType.U32, value);
+ }
+
+ private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ // TODO: Bindless texture support. For now we just return 0.
+ if (isBindless)
+ {
+ return new OperationResult(AggregateType.S32, context.Constant(context.TypeS32(), 0));
+ }
+
+ int srcIndex = 0;
+
+ SpvInstruction Src(AggregateType type)
+ {
+ return context.Get(type, texOp.GetSource(srcIndex++));
+ }
+
+ SpvInstruction index = null;
+
+ if (isIndexed)
+ {
+ index = Src(AggregateType.S32);
+ }
+
+ int pCount = texOp.Type.GetDimensions();
+
+ SpvInstruction pCoords;
+
+ if (pCount > 1)
+ {
+ SpvInstruction[] elems = new SpvInstruction[pCount];
+
+ for (int i = 0; i < pCount; i++)
+ {
+ elems[i] = Src(AggregateType.FP32);
+ }
+
+ var vectorType = context.TypeVector(context.TypeFP32(), pCount);
+ pCoords = context.CompositeConstruct(vectorType, elems);
+ }
+ else
+ {
+ pCoords = Src(AggregateType.FP32);
+ }
+
+ var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format);
+
+ (_, var sampledImageType, var sampledImageVariable) = context.Samplers[meta];
+
+ var image = context.Load(sampledImageType, sampledImageVariable);
+
+ var resultType = context.TypeVector(context.TypeFP32(), 2);
+ var packed = context.ImageQueryLod(resultType, image, pCoords);
+ var result = context.CompositeExtract(context.TypeFP32(), packed, (SpvLiteralInteger)texOp.Index);
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+
+ private static OperationResult GenerateLogarithmB2(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslLog2, null);
+ }
+
+ private static OperationResult GenerateLogicalAnd(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryBool(context, operation, context.Delegates.LogicalAnd);
+ }
+
+ private static OperationResult GenerateLogicalExclusiveOr(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryBool(context, operation, context.Delegates.LogicalNotEqual);
+ }
+
+ private static OperationResult GenerateLogicalNot(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnaryBool(context, operation, context.Delegates.LogicalNot);
+ }
+
+ private static OperationResult GenerateLogicalOr(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryBool(context, operation, context.Delegates.LogicalOr);
+ }
+
+ private static OperationResult GenerateLoopBreak(CodeGenContext context, AstOperation operation)
+ {
+ AstBlock loopBlock = context.CurrentBlock;
+ while (loopBlock.Type != AstBlockType.DoWhile)
+ {
+ loopBlock = loopBlock.Parent;
+ }
+
+ context.Branch(context.GetNextLabel(loopBlock.Parent));
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateLoopContinue(CodeGenContext context, AstOperation operation)
+ {
+ AstBlock loopBlock = context.CurrentBlock;
+ while (loopBlock.Type != AstBlockType.DoWhile)
+ {
+ loopBlock = loopBlock.Parent;
+ }
+
+ (var loopTarget, var continueTarget) = context.LoopTargets[loopBlock];
+
+ context.Branch(continueTarget);
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateMaximum(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinary(context, operation, context.Delegates.GlslFMax, context.Delegates.GlslSMax);
+ }
+
+ private static OperationResult GenerateMaximumU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryU32(context, operation, context.Delegates.GlslUMax);
+ }
+
+ private static OperationResult GenerateMemoryBarrier(CodeGenContext context, AstOperation operation)
+ {
+ context.MemoryBarrier(context.Constant(context.TypeU32(), Scope.Device), context.Constant(context.TypeU32(), DefaultMemorySemantics));
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateMinimum(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinary(context, operation, context.Delegates.GlslFMin, context.Delegates.GlslSMin);
+ }
+
+ private static OperationResult GenerateMinimumU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryU32(context, operation, context.Delegates.GlslUMin);
+ }
+
+ private static OperationResult GenerateMultiply(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinary(context, operation, context.Delegates.FMul, context.Delegates.IMul);
+ }
+
+ private static OperationResult GenerateMultiplyHighS32(CodeGenContext context, AstOperation operation)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+
+ var resultType = context.TypeStruct(false, context.TypeS32(), context.TypeS32());
+ var result = context.SMulExtended(resultType, context.GetS32(src1), context.GetS32(src2));
+ result = context.CompositeExtract(context.TypeS32(), result, 1);
+
+ return new OperationResult(AggregateType.S32, result);
+ }
+
+ private static OperationResult GenerateMultiplyHighU32(CodeGenContext context, AstOperation operation)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+
+ var resultType = context.TypeStruct(false, context.TypeU32(), context.TypeU32());
+ var result = context.UMulExtended(resultType, context.GetU32(src1), context.GetU32(src2));
+ result = context.CompositeExtract(context.TypeU32(), result, 1);
+
+ return new OperationResult(AggregateType.U32, result);
+ }
+
+ private static OperationResult GenerateNegate(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.FNegate, context.Delegates.SNegate);
+ }
+
+ private static OperationResult GeneratePackDouble2x32(CodeGenContext context, AstOperation operation)
+ {
+ var value0 = context.GetU32(operation.GetSource(0));
+ var value1 = context.GetU32(operation.GetSource(1));
+ var vector = context.CompositeConstruct(context.TypeVector(context.TypeU32(), 2), value0, value1);
+ var result = context.GlslPackDouble2x32(context.TypeFP64(), vector);
+
+ return new OperationResult(AggregateType.FP64, result);
+ }
+
+ private static OperationResult GeneratePackHalf2x16(CodeGenContext context, AstOperation operation)
+ {
+ var value0 = context.GetFP32(operation.GetSource(0));
+ var value1 = context.GetFP32(operation.GetSource(1));
+ var vector = context.CompositeConstruct(context.TypeVector(context.TypeFP32(), 2), value0, value1);
+ var result = context.GlslPackHalf2x16(context.TypeU32(), vector);
+
+ return new OperationResult(AggregateType.U32, result);
+ }
+
+ private static OperationResult GenerateReciprocalSquareRoot(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslInverseSqrt, null);
+ }
+
+ private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation)
+ {
+ context.Return();
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateRound(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslRoundEven, null);
+ }
+
+ private static OperationResult GenerateShiftLeft(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryS32(context, operation, context.Delegates.ShiftLeftLogical);
+ }
+
+ private static OperationResult GenerateShiftRightS32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryS32(context, operation, context.Delegates.ShiftRightArithmetic);
+ }
+
+ private static OperationResult GenerateShiftRightU32(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinaryS32(context, operation, context.Delegates.ShiftRightLogical);
+ }
+
+ private static OperationResult GenerateShuffle(CodeGenContext context, AstOperation operation)
+ {
+ var x = context.GetFP32(operation.GetSource(0));
+ var index = context.GetU32(operation.GetSource(1));
+ var mask = context.GetU32(operation.GetSource(2));
+
+ var const31 = context.Constant(context.TypeU32(), 31);
+ var const8 = context.Constant(context.TypeU32(), 8);
+
+ var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
+ var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
+ var notSegMask = context.Not(context.TypeU32(), segMask);
+ var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
+ var indexNotSegMask = context.BitwiseAnd(context.TypeU32(), index, notSegMask);
+
+ var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
+
+ var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
+ var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
+ var srcThreadId = context.BitwiseOr(context.TypeU32(), indexNotSegMask, minThreadId);
+ var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
+ var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
+ var result = context.Select(context.TypeFP32(), valid, value, x);
+
+ var validLocal = (AstOperand)operation.GetSource(3);
+
+ context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+
+ private static OperationResult GenerateShuffleDown(CodeGenContext context, AstOperation operation)
+ {
+ var x = context.GetFP32(operation.GetSource(0));
+ var index = context.GetU32(operation.GetSource(1));
+ var mask = context.GetU32(operation.GetSource(2));
+
+ var const31 = context.Constant(context.TypeU32(), 31);
+ var const8 = context.Constant(context.TypeU32(), 8);
+
+ var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
+ var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
+ var notSegMask = context.Not(context.TypeU32(), segMask);
+ var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
+
+ var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
+
+ var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
+ var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
+ var srcThreadId = context.IAdd(context.TypeU32(), threadId, index);
+ var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
+ var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
+ var result = context.Select(context.TypeFP32(), valid, value, x);
+
+ var validLocal = (AstOperand)operation.GetSource(3);
+
+ context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+
+ private static OperationResult GenerateShuffleUp(CodeGenContext context, AstOperation operation)
+ {
+ var x = context.GetFP32(operation.GetSource(0));
+ var index = context.GetU32(operation.GetSource(1));
+ var mask = context.GetU32(operation.GetSource(2));
+
+ var const31 = context.Constant(context.TypeU32(), 31);
+ var const8 = context.Constant(context.TypeU32(), 8);
+
+ var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
+
+ var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
+
+ var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
+ var srcThreadId = context.ISub(context.TypeU32(), threadId, index);
+ var valid = context.SGreaterThanEqual(context.TypeBool(), srcThreadId, minThreadId);
+ var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
+ var result = context.Select(context.TypeFP32(), valid, value, x);
+
+ var validLocal = (AstOperand)operation.GetSource(3);
+
+ context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+
+ private static OperationResult GenerateShuffleXor(CodeGenContext context, AstOperation operation)
+ {
+ var x = context.GetFP32(operation.GetSource(0));
+ var index = context.GetU32(operation.GetSource(1));
+ var mask = context.GetU32(operation.GetSource(2));
+
+ var const31 = context.Constant(context.TypeU32(), 31);
+ var const8 = context.Constant(context.TypeU32(), 8);
+
+ var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31);
+ var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31);
+ var notSegMask = context.Not(context.TypeU32(), segMask);
+ var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask);
+
+ var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
+
+ var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask);
+ var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask);
+ var srcThreadId = context.BitwiseXor(context.TypeU32(), threadId, index);
+ var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId);
+ var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId);
+ var result = context.Select(context.TypeFP32(), valid, value, x);
+
+ var validLocal = (AstOperand)operation.GetSource(3);
+
+ context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid));
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+
+ private static OperationResult GenerateSine(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslSin, null);
+ }
+
+ private static OperationResult GenerateSquareRoot(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslSqrt, null);
+ }
+
+ private static OperationResult GenerateStore(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateLoadOrStore(context, operation, isStore: true);
+ }
+
+ private static OperationResult GenerateStoreLocal(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateStoreLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory);
+ }
+
+ private static OperationResult GenerateStoreShared(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateStoreLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory);
+ }
+
+ private static OperationResult GenerateStoreLocalOrShared(
+ CodeGenContext context,
+ AstOperation operation,
+ StorageClass storageClass,
+ SpvInstruction memory)
+ {
+ var offset = context.Get(AggregateType.S32, operation.GetSource(0));
+ var value = context.Get(AggregateType.U32, operation.GetSource(1));
+
+ var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset);
+ context.Store(elemPointer, value);
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateStoreShared16(CodeGenContext context, AstOperation operation)
+ {
+ GenerateStoreSharedSmallInt(context, operation, 16);
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateStoreShared8(CodeGenContext context, AstOperation operation)
+ {
+ GenerateStoreSharedSmallInt(context, operation, 8);
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateStoreStorage(CodeGenContext context, AstOperation operation)
+ {
+ var elemPointer = GetStorageElemPointer(context, operation);
+ context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2)));
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateStoreStorage16(CodeGenContext context, AstOperation operation)
+ {
+ GenerateStoreStorageSmallInt(context, operation, 16);
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateStoreStorage8(CodeGenContext context, AstOperation operation)
+ {
+ GenerateStoreStorageSmallInt(context, operation, 8);
+
+ return OperationResult.Invalid;
+ }
+
+ private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub);
+ }
+
+ private static OperationResult GenerateSwizzleAdd(CodeGenContext context, AstOperation operation)
+ {
+ var x = context.Get(AggregateType.FP32, operation.GetSource(0));
+ var y = context.Get(AggregateType.FP32, operation.GetSource(1));
+ var mask = context.Get(AggregateType.U32, operation.GetSource(2));
+
+ var v4float = context.TypeVector(context.TypeFP32(), 4);
+ var one = context.Constant(context.TypeFP32(), 1.0f);
+ var minusOne = context.Constant(context.TypeFP32(), -1.0f);
+ var zero = context.Constant(context.TypeFP32(), 0.0f);
+ var xLut = context.ConstantComposite(v4float, one, minusOne, one, zero);
+ var yLut = context.ConstantComposite(v4float, one, one, minusOne, one);
+
+ var three = context.Constant(context.TypeU32(), 3);
+
+ var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId);
+ var shift = context.BitwiseAnd(context.TypeU32(), threadId, three);
+ shift = context.ShiftLeftLogical(context.TypeU32(), shift, context.Constant(context.TypeU32(), 1));
+ var lutIdx = context.ShiftRightLogical(context.TypeU32(), mask, shift);
+ lutIdx = context.BitwiseAnd(context.TypeU32(), lutIdx, three);
+
+ var xLutValue = context.VectorExtractDynamic(context.TypeFP32(), xLut, lutIdx);
+ var yLutValue = context.VectorExtractDynamic(context.TypeFP32(), yLut, lutIdx);
+
+ var xResult = context.FMul(context.TypeFP32(), x, xLutValue);
+ var yResult = context.FMul(context.TypeFP32(), y, yLutValue);
+ var result = context.FAdd(context.TypeFP32(), xResult, yResult);
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+
+ private static OperationResult GenerateTextureSample(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+ bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
+ bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
+ bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+ bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
+ bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
+ bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
+ bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+ bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
+ bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
+
+ bool colorIsVector = isGather || !isShadow;
+
+ // TODO: Bindless texture support. For now we just return 0.
+ if (isBindless)
+ {
+ return GetZeroOperationResult(context, texOp, AggregateType.FP32, colorIsVector);
+ }
+
+ // This combination is valid, but not available on GLSL.
+ // For now, ignore the LOD level and do a normal sample.
+ // TODO: How to implement it properly?
+ if (hasLodLevel && isArray && isShadow)
+ {
+ hasLodLevel = false;
+ }
+
+ int srcIndex = isBindless ? 1 : 0;
+
+ SpvInstruction Src(AggregateType type)
+ {
+ return context.Get(type, texOp.GetSource(srcIndex++));
+ }
+
+ SpvInstruction index = null;
+
+ if (isIndexed)
+ {
+ index = Src(AggregateType.S32);
+ }
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int pCount = coordsCount;
+
+ int arrayIndexElem = -1;
+
+ if (isArray)
+ {
+ arrayIndexElem = pCount++;
+ }
+
+ AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32;
+
+ SpvInstruction AssemblePVector(int count)
+ {
+ if (count > 1)
+ {
+ SpvInstruction[] elems = new SpvInstruction[count];
+
+ for (int index = 0; index < count; index++)
+ {
+ if (arrayIndexElem == index)
+ {
+ elems[index] = Src(AggregateType.S32);
+
+ if (!intCoords)
+ {
+ elems[index] = context.ConvertSToF(context.TypeFP32(), elems[index]);
+ }
+ }
+ else
+ {
+ elems[index] = Src(coordType);
+ }
+ }
+
+ var vectorType = context.TypeVector(intCoords ? context.TypeS32() : context.TypeFP32(), count);
+ return context.CompositeConstruct(vectorType, elems);
+ }
+ else
+ {
+ return Src(coordType);
+ }
+ }
+
+ SpvInstruction ApplyBias(SpvInstruction vector, SpvInstruction image)
+ {
+ int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision();
+ if (isGather && gatherBiasPrecision != 0)
+ {
+ // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels.
+ // Offset by the gather precision divided by 2 to correct for rounding.
+ var sizeType = pCount == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), pCount);
+ var pVectorType = pCount == 1 ? context.TypeFP32() : context.TypeVector(context.TypeFP32(), pCount);
+
+ var bias = context.Constant(context.TypeFP32(), (float)(1 << (gatherBiasPrecision + 1)));
+ var biasVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(bias, pCount).ToArray());
+
+ var one = context.Constant(context.TypeFP32(), 1f);
+ var oneVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(one, pCount).ToArray());
+
+ var divisor = context.FMul(
+ pVectorType,
+ context.ConvertSToF(pVectorType, context.ImageQuerySize(sizeType, image)),
+ biasVector);
+
+ vector = context.FAdd(pVectorType, vector, context.FDiv(pVectorType, oneVector, divisor));
+ }
+
+ return vector;
+ }
+
+ SpvInstruction pCoords = AssemblePVector(pCount);
+ pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount);
+
+ SpvInstruction AssembleDerivativesVector(int count)
+ {
+ if (count > 1)
+ {
+ SpvInstruction[] elems = new SpvInstruction[count];
+
+ for (int index = 0; index < count; index++)
+ {
+ elems[index] = Src(AggregateType.FP32);
+ }
+
+ var vectorType = context.TypeVector(context.TypeFP32(), count);
+ return context.CompositeConstruct(vectorType, elems);
+ }
+ else
+ {
+ return Src(AggregateType.FP32);
+ }
+ }
+
+ SpvInstruction dRef = null;
+
+ if (isShadow)
+ {
+ dRef = Src(AggregateType.FP32);
+ }
+
+ SpvInstruction[] derivatives = null;
+
+ if (hasDerivatives)
+ {
+ derivatives = new[]
+ {
+ AssembleDerivativesVector(coordsCount), // dPdx
+ AssembleDerivativesVector(coordsCount) // dPdy
+ };
+ }
+
+ SpvInstruction sample = null;
+ SpvInstruction lod = null;
+
+ if (isMultisample)
+ {
+ sample = Src(AggregateType.S32);
+ }
+ else if (hasLodLevel)
+ {
+ lod = Src(coordType);
+ }
+
+ SpvInstruction AssembleOffsetVector(int count)
+ {
+ if (count > 1)
+ {
+ SpvInstruction[] elems = new SpvInstruction[count];
+
+ for (int index = 0; index < count; index++)
+ {
+ elems[index] = Src(AggregateType.S32);
+ }
+
+ var vectorType = context.TypeVector(context.TypeS32(), count);
+
+ return context.ConstantComposite(vectorType, elems);
+ }
+ else
+ {
+ return Src(AggregateType.S32);
+ }
+ }
+
+ SpvInstruction[] offsets = null;
+
+ if (hasOffset)
+ {
+ offsets = new[] { AssembleOffsetVector(coordsCount) };
+ }
+ else if (hasOffsets)
+ {
+ offsets = new[]
+ {
+ AssembleOffsetVector(coordsCount),
+ AssembleOffsetVector(coordsCount),
+ AssembleOffsetVector(coordsCount),
+ AssembleOffsetVector(coordsCount)
+ };
+ }
+
+ SpvInstruction lodBias = null;
+
+ if (hasLodBias)
+ {
+ lodBias = Src(AggregateType.FP32);
+ }
+
+ SpvInstruction compIdx = null;
+
+ // textureGather* optional extra component index,
+ // not needed for shadow samplers.
+ if (isGather && !isShadow)
+ {
+ compIdx = Src(AggregateType.S32);
+ }
+
+ var operandsList = new List<SpvInstruction>();
+ var operandsMask = ImageOperandsMask.MaskNone;
+
+ if (hasLodBias)
+ {
+ operandsMask |= ImageOperandsMask.Bias;
+ operandsList.Add(lodBias);
+ }
+
+ if (!isMultisample && hasLodLevel)
+ {
+ operandsMask |= ImageOperandsMask.Lod;
+ operandsList.Add(lod);
+ }
+
+ if (hasDerivatives)
+ {
+ operandsMask |= ImageOperandsMask.Grad;
+ operandsList.Add(derivatives[0]);
+ operandsList.Add(derivatives[1]);
+ }
+
+ if (hasOffset)
+ {
+ operandsMask |= ImageOperandsMask.ConstOffset;
+ operandsList.Add(offsets[0]);
+ }
+ else if (hasOffsets)
+ {
+ operandsMask |= ImageOperandsMask.ConstOffsets;
+ SpvInstruction arrayv2 = context.TypeArray(context.TypeVector(context.TypeS32(), 2), context.Constant(context.TypeU32(), 4));
+ operandsList.Add(context.ConstantComposite(arrayv2, offsets[0], offsets[1], offsets[2], offsets[3]));
+ }
+
+ if (isMultisample)
+ {
+ operandsMask |= ImageOperandsMask.Sample;
+ operandsList.Add(sample);
+ }
+
+ var resultType = colorIsVector ? context.TypeVector(context.TypeFP32(), 4) : context.TypeFP32();
+
+ var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format);
+
+ (var imageType, var sampledImageType, var sampledImageVariable) = context.Samplers[meta];
+
+ var image = context.Load(sampledImageType, sampledImageVariable);
+
+ if (intCoords)
+ {
+ image = context.Image(imageType, image);
+ }
+
+ pCoords = ApplyBias(pCoords, image);
+
+ var operands = operandsList.ToArray();
+
+ SpvInstruction result;
+
+ if (intCoords)
+ {
+ result = context.ImageFetch(resultType, image, pCoords, operandsMask, operands);
+ }
+ else if (isGather)
+ {
+ if (isShadow)
+ {
+ result = context.ImageDrefGather(resultType, image, pCoords, dRef, operandsMask, operands);
+ }
+ else
+ {
+ result = context.ImageGather(resultType, image, pCoords, compIdx, operandsMask, operands);
+ }
+ }
+ else if (isShadow)
+ {
+ if (hasLodLevel)
+ {
+ result = context.ImageSampleDrefExplicitLod(resultType, image, pCoords, dRef, operandsMask, operands);
+ }
+ else
+ {
+ result = context.ImageSampleDrefImplicitLod(resultType, image, pCoords, dRef, operandsMask, operands);
+ }
+ }
+ else if (hasDerivatives || hasLodLevel)
+ {
+ result = context.ImageSampleExplicitLod(resultType, image, pCoords, operandsMask, operands);
+ }
+ else
+ {
+ result = context.ImageSampleImplicitLod(resultType, image, pCoords, operandsMask, operands);
+ }
+
+ var swizzledResultType = AggregateType.FP32;
+
+ if (colorIsVector)
+ {
+ swizzledResultType = texOp.GetVectorType(swizzledResultType);
+
+ result = GetSwizzledResult(context, result, swizzledResultType, texOp.Index);
+ }
+
+ return new OperationResult(swizzledResultType, result);
+ }
+
+ private static OperationResult GenerateTextureSize(CodeGenContext context, AstOperation operation)
+ {
+ AstTextureOperation texOp = (AstTextureOperation)operation;
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ // TODO: Bindless texture support. For now we just return 0.
+ if (isBindless)
+ {
+ return new OperationResult(AggregateType.S32, context.Constant(context.TypeS32(), 0));
+ }
+
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+
+ SpvInstruction index = null;
+
+ if (isIndexed)
+ {
+ index = context.GetS32(texOp.GetSource(0));
+ }
+
+ var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format);
+
+ (var imageType, var sampledImageType, var sampledImageVariable) = context.Samplers[meta];
+
+ var image = context.Load(sampledImageType, sampledImageVariable);
+ image = context.Image(imageType, image);
+
+ if (texOp.Index == 3)
+ {
+ return new OperationResult(AggregateType.S32, context.ImageQueryLevels(context.TypeS32(), image));
+ }
+ else
+ {
+ var type = context.SamplersTypes[meta];
+ bool hasLod = !type.HasFlag(SamplerType.Multisample) && type != SamplerType.TextureBuffer;
+
+ int dimensions = (type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : type.GetDimensions();
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ dimensions++;
+ }
+
+ var resultType = dimensions == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), dimensions);
+
+ SpvInstruction result;
+
+ if (hasLod)
+ {
+ int lodSrcIndex = isBindless || isIndexed ? 1 : 0;
+ var lod = context.GetS32(operation.GetSource(lodSrcIndex));
+ result = context.ImageQuerySizeLod(resultType, image, lod);
+ }
+ else
+ {
+ result = context.ImageQuerySize(resultType, image);
+ }
+
+ if (dimensions != 1)
+ {
+ result = context.CompositeExtract(context.TypeS32(), result, (SpvLiteralInteger)texOp.Index);
+ }
+
+ if (texOp.Index < 2 || (type & SamplerType.Mask) == SamplerType.Texture3D)
+ {
+ result = ScalingHelpers.ApplyUnscaling(context, texOp.WithType(type), result, isBindless, isIndexed);
+ }
+
+ return new OperationResult(AggregateType.S32, result);
+ }
+ }
+
+ private static OperationResult GenerateTruncate(CodeGenContext context, AstOperation operation)
+ {
+ return GenerateUnary(context, operation, context.Delegates.GlslTrunc, null);
+ }
+
+ private static OperationResult GenerateUnpackDouble2x32(CodeGenContext context, AstOperation operation)
+ {
+ var value = context.GetFP64(operation.GetSource(0));
+ var vector = context.GlslUnpackDouble2x32(context.TypeVector(context.TypeU32(), 2), value);
+ var result = context.CompositeExtract(context.TypeU32(), vector, operation.Index);
+
+ return new OperationResult(AggregateType.U32, result);
+ }
+
+ private static OperationResult GenerateUnpackHalf2x16(CodeGenContext context, AstOperation operation)
+ {
+ var value = context.GetU32(operation.GetSource(0));
+ var vector = context.GlslUnpackHalf2x16(context.TypeVector(context.TypeFP32(), 2), value);
+ var result = context.CompositeExtract(context.TypeFP32(), vector, operation.Index);
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+
+ private static OperationResult GenerateVectorExtract(CodeGenContext context, AstOperation operation)
+ {
+ var vector = context.GetWithType(operation.GetSource(0), out AggregateType vectorType);
+ var scalarType = vectorType & ~AggregateType.ElementCountMask;
+ var resultType = context.GetType(scalarType);
+ SpvInstruction result;
+
+ if (operation.GetSource(1) is AstOperand indexOperand && indexOperand.Type == OperandType.Constant)
+ {
+ result = context.CompositeExtract(resultType, vector, (SpvLiteralInteger)indexOperand.Value);
+ }
+ else
+ {
+ var index = context.Get(AggregateType.S32, operation.GetSource(1));
+ result = context.VectorExtractDynamic(resultType, vector, index);
+ }
+
+ return new OperationResult(scalarType, result);
+ }
+
+ private static OperationResult GenerateVoteAll(CodeGenContext context, AstOperation operation)
+ {
+ var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
+ var result = context.GroupNonUniformAll(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0)));
+ return new OperationResult(AggregateType.Bool, result);
+ }
+
+ private static OperationResult GenerateVoteAllEqual(CodeGenContext context, AstOperation operation)
+ {
+ var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
+ var result = context.GroupNonUniformAllEqual(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0)));
+ return new OperationResult(AggregateType.Bool, result);
+ }
+
+ private static OperationResult GenerateVoteAny(CodeGenContext context, AstOperation operation)
+ {
+ var execution = context.Constant(context.TypeU32(), Scope.Subgroup);
+ var result = context.GroupNonUniformAny(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0)));
+ return new OperationResult(AggregateType.Bool, result);
+ }
+
+ private static OperationResult GenerateCompare(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+
+ SpvInstruction result;
+
+ if (operation.Inst.HasFlag(Instruction.FP64))
+ {
+ result = emitF(context.TypeBool(), context.GetFP64(src1), context.GetFP64(src2));
+ }
+ else if (operation.Inst.HasFlag(Instruction.FP32))
+ {
+ result = emitF(context.TypeBool(), context.GetFP32(src1), context.GetFP32(src2));
+ }
+ else
+ {
+ result = emitI(context.TypeBool(), context.GetS32(src1), context.GetS32(src2));
+ }
+
+ return new OperationResult(AggregateType.Bool, result);
+ }
+
+ private static OperationResult GenerateCompareU32(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+
+ var result = emitU(context.TypeBool(), context.GetU32(src1), context.GetU32(src2));
+
+ return new OperationResult(AggregateType.Bool, result);
+ }
+
+ private static OperationResult GenerateAtomicMemoryBinary(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU)
+ {
+ var value = context.GetU32(operation.GetSource(2));
+
+ SpvInstruction elemPointer;
+
+ if (operation.StorageKind == StorageKind.StorageBuffer)
+ {
+ elemPointer = GetStorageElemPointer(context, operation);
+ }
+ else if (operation.StorageKind == StorageKind.SharedMemory)
+ {
+ var offset = context.GetU32(operation.GetSource(0));
+ elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\".");
+ }
+
+ var one = context.Constant(context.TypeU32(), 1);
+ var zero = context.Constant(context.TypeU32(), 0);
+
+ return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), elemPointer, one, zero, value));
+ }
+
+ private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation)
+ {
+ var value0 = context.GetU32(operation.GetSource(2));
+ var value1 = context.GetU32(operation.GetSource(3));
+
+ SpvInstruction elemPointer;
+
+ if (operation.StorageKind == StorageKind.StorageBuffer)
+ {
+ elemPointer = GetStorageElemPointer(context, operation);
+ }
+ else if (operation.StorageKind == StorageKind.SharedMemory)
+ {
+ var offset = context.GetU32(operation.GetSource(0));
+ elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset);
+ }
+ else
+ {
+ throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\".");
+ }
+
+ var one = context.Constant(context.TypeU32(), 1);
+ var zero = context.Constant(context.TypeU32(), 0);
+
+ return new OperationResult(AggregateType.U32, context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, value1, value0));
+ }
+
+ private static OperationResult GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore)
+ {
+ StorageKind storageKind = operation.StorageKind;
+
+ SpvInstruction pointer;
+ AggregateType varType;
+ int srcIndex = 0;
+
+ switch (storageKind)
+ {
+ case StorageKind.Input:
+ case StorageKind.InputPerPatch:
+ case StorageKind.Output:
+ case StorageKind.OutputPerPatch:
+ if (!(operation.GetSource(srcIndex++) is AstOperand varId) || varId.Type != OperandType.Constant)
+ {
+ throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+ }
+
+ IoVariable ioVariable = (IoVariable)varId.Value;
+ bool isOutput = storageKind.IsOutput();
+ bool isPerPatch = storageKind.IsPerPatch();
+ int location = 0;
+ int component = 0;
+
+ if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput))
+ {
+ if (!(operation.GetSource(srcIndex++) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant)
+ {
+ throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand.");
+ }
+
+ location = vecIndex.Value;
+
+ if (operation.SourcesCount > srcIndex &&
+ operation.GetSource(srcIndex) is AstOperand elemIndex &&
+ elemIndex.Type == OperandType.Constant &&
+ context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput))
+ {
+ component = elemIndex.Value;
+ srcIndex++;
+ }
+ }
+
+ if (ioVariable == IoVariable.UserDefined)
+ {
+ varType = context.Config.GetUserDefinedType(location, isOutput);
+ }
+ else if (ioVariable == IoVariable.FragmentOutputColor)
+ {
+ varType = context.Config.GetFragmentOutputColorType(location);
+ }
+ else if (ioVariable == IoVariable.FragmentOutputIsBgra)
+ {
+ var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeU32());
+ var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 1), elemIndex);
+ varType = AggregateType.U32;
+
+ break;
+ }
+ else if (ioVariable == IoVariable.SupportBlockRenderScale)
+ {
+ var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32());
+ var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 4), elemIndex);
+ varType = AggregateType.FP32;
+
+ break;
+ }
+ else if (ioVariable == IoVariable.SupportBlockViewInverse)
+ {
+ var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32());
+ var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 2), elemIndex);
+ varType = AggregateType.FP32;
+
+ break;
+ }
+ else
+ {
+ (_, varType) = IoMap.GetSpirvBuiltIn(ioVariable);
+ }
+
+ varType &= AggregateType.ElementTypeMask;
+
+ int inputsCount = (isStore ? operation.SourcesCount - 1 : operation.SourcesCount) - srcIndex;
+ var storageClass = isOutput ? StorageClass.Output : StorageClass.Input;
+
+ var ioDefinition = new IoDefinition(storageKind, ioVariable, location, component);
+ var dict = isPerPatch
+ ? (isOutput ? context.OutputsPerPatch : context.InputsPerPatch)
+ : (isOutput ? context.Outputs : context.Inputs);
+
+ SpvInstruction baseObj = dict[ioDefinition];
+ SpvInstruction e0, e1, e2;
+
+ switch (inputsCount)
+ {
+ case 0:
+ pointer = baseObj;
+ break;
+ case 1:
+ e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0);
+ break;
+ case 2:
+ e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ e1 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0, e1);
+ break;
+ case 3:
+ e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ e1 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ e2 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++));
+ pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0, e1, e2);
+ break;
+ default:
+ var indexes = new SpvInstruction[inputsCount];
+ int index = 0;
+
+ for (; index < inputsCount; srcIndex++, index++)
+ {
+ indexes[index] = context.Get(AggregateType.S32, operation.GetSource(srcIndex));
+ }
+
+ pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, indexes);
+ break;
+ }
+ break;
+
+ default:
+ throw new InvalidOperationException($"Invalid storage kind {storageKind}.");
+ }
+
+ if (isStore)
+ {
+ context.Store(pointer, context.Get(varType, operation.GetSource(srcIndex)));
+ return OperationResult.Invalid;
+ }
+ else
+ {
+ var result = context.Load(context.GetType(varType), pointer);
+ return new OperationResult(varType, result);
+ }
+ }
+
+ private static SpvInstruction GetScalarInput(CodeGenContext context, IoVariable ioVariable)
+ {
+ (_, var varType) = IoMap.GetSpirvBuiltIn(ioVariable);
+ varType &= AggregateType.ElementTypeMask;
+
+ var ioDefinition = new IoDefinition(StorageKind.Input, ioVariable);
+
+ return context.Load(context.GetType(varType), context.Inputs[ioDefinition]);
+ }
+
+ private static void GenerateStoreSharedSmallInt(CodeGenContext context, AstOperation operation, int bitSize)
+ {
+ var offset = context.Get(AggregateType.U32, operation.GetSource(0));
+ var value = context.Get(AggregateType.U32, operation.GetSource(1));
+
+ var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2));
+ var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3));
+ bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3));
+
+ var memory = context.SharedMemory;
+
+ var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), memory, wordOffset);
+
+ GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize);
+ }
+
+ private static void GenerateStoreStorageSmallInt(CodeGenContext context, AstOperation operation, int bitSize)
+ {
+ var i0 = context.Get(AggregateType.S32, operation.GetSource(0));
+ var offset = context.Get(AggregateType.U32, operation.GetSource(1));
+ var value = context.Get(AggregateType.U32, operation.GetSource(2));
+
+ var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2));
+ var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3));
+ bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3));
+
+ var sbVariable = context.StorageBuffersArray;
+
+ var i1 = context.Constant(context.TypeS32(), 0);
+
+ var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, wordOffset);
+
+ GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize);
+ }
+
+ private static void GenerateStoreSmallInt(
+ CodeGenContext context,
+ SpvInstruction elemPointer,
+ SpvInstruction bitOffset,
+ SpvInstruction value,
+ int bitSize)
+ {
+ var loopStart = context.Label();
+ var loopEnd = context.Label();
+
+ context.Branch(loopStart);
+ context.AddLabel(loopStart);
+
+ var oldValue = context.Load(context.TypeU32(), elemPointer);
+ var newValue = context.BitFieldInsert(context.TypeU32(), oldValue, value, bitOffset, context.Constant(context.TypeU32(), bitSize));
+
+ var one = context.Constant(context.TypeU32(), 1);
+ var zero = context.Constant(context.TypeU32(), 0);
+
+ var result = context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, newValue, oldValue);
+ var failed = context.INotEqual(context.TypeBool(), result, oldValue);
+
+ context.LoopMerge(loopEnd, loopStart, LoopControlMask.MaskNone);
+ context.BranchConditional(failed, loopStart, loopEnd);
+
+ context.AddLabel(loopEnd);
+ }
+
+ private static OperationResult GetZeroOperationResult(
+ CodeGenContext context,
+ AstTextureOperation texOp,
+ AggregateType scalarType,
+ bool isVector)
+ {
+ var zero = scalarType switch
+ {
+ AggregateType.S32 => context.Constant(context.TypeS32(), 0),
+ AggregateType.U32 => context.Constant(context.TypeU32(), 0u),
+ _ => context.Constant(context.TypeFP32(), 0f),
+ };
+
+ if (isVector)
+ {
+ AggregateType outputType = texOp.GetVectorType(scalarType);
+
+ if ((outputType & AggregateType.ElementCountMask) != 0)
+ {
+ int componentsCount = BitOperations.PopCount((uint)texOp.Index);
+
+ SpvInstruction[] values = new SpvInstruction[componentsCount];
+
+ values.AsSpan().Fill(zero);
+
+ return new OperationResult(outputType, context.ConstantComposite(context.GetType(outputType), values));
+ }
+ }
+
+ return new OperationResult(scalarType, zero);
+ }
+
+ private static SpvInstruction GetSwizzledResult(CodeGenContext context, SpvInstruction vector, AggregateType swizzledResultType, int mask)
+ {
+ if ((swizzledResultType & AggregateType.ElementCountMask) != 0)
+ {
+ SpvLiteralInteger[] components = new SpvLiteralInteger[BitOperations.PopCount((uint)mask)];
+
+ int componentIndex = 0;
+
+ for (int i = 0; i < 4; i++)
+ {
+ if ((mask & (1 << i)) != 0)
+ {
+ components[componentIndex++] = i;
+ }
+ }
+
+ return context.VectorShuffle(context.GetType(swizzledResultType), vector, vector, components);
+ }
+ else
+ {
+ int componentIndex = (int)BitOperations.TrailingZeroCount(mask);
+
+ return context.CompositeExtract(context.GetType(swizzledResultType), vector, (SpvLiteralInteger)componentIndex);
+ }
+ }
+
+ private static SpvInstruction GetStorageElemPointer(CodeGenContext context, AstOperation operation)
+ {
+ var sbVariable = context.StorageBuffersArray;
+ var i0 = context.Get(AggregateType.S32, operation.GetSource(0));
+ var i1 = context.Constant(context.TypeS32(), 0);
+ var i2 = context.Get(AggregateType.S32, operation.GetSource(1));
+
+ return context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, i2);
+ }
+
+ private static OperationResult GenerateUnary(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction> emitF,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction> emitI)
+ {
+ var source = operation.GetSource(0);
+
+ if (operation.Inst.HasFlag(Instruction.FP64))
+ {
+ return new OperationResult(AggregateType.FP64, emitF(context.TypeFP64(), context.GetFP64(source)));
+ }
+ else if (operation.Inst.HasFlag(Instruction.FP32))
+ {
+ return new OperationResult(AggregateType.FP32, emitF(context.TypeFP32(), context.GetFP32(source)));
+ }
+ else
+ {
+ return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(source)));
+ }
+ }
+
+ private static OperationResult GenerateUnaryBool(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction> emitB)
+ {
+ var source = operation.GetSource(0);
+ return new OperationResult(AggregateType.Bool, emitB(context.TypeBool(), context.Get(AggregateType.Bool, source)));
+ }
+
+ private static OperationResult GenerateUnaryFP32(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction> emit)
+ {
+ var source = operation.GetSource(0);
+ return new OperationResult(AggregateType.FP32, emit(context.TypeFP32(), context.GetFP32(source)));
+ }
+
+ private static OperationResult GenerateUnaryS32(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction> emitS)
+ {
+ var source = operation.GetSource(0);
+ return new OperationResult(AggregateType.S32, emitS(context.TypeS32(), context.GetS32(source)));
+ }
+
+ private static OperationResult GenerateBinary(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+
+ if (operation.Inst.HasFlag(Instruction.FP64))
+ {
+ var result = emitF(context.TypeFP64(), context.GetFP64(src1), context.GetFP64(src2));
+
+ if (!context.Config.GpuAccessor.QueryHostReducedPrecision())
+ {
+ context.Decorate(result, Decoration.NoContraction);
+ }
+
+ return new OperationResult(AggregateType.FP64, result);
+ }
+ else if (operation.Inst.HasFlag(Instruction.FP32))
+ {
+ var result = emitF(context.TypeFP32(), context.GetFP32(src1), context.GetFP32(src2));
+
+ if (!context.Config.GpuAccessor.QueryHostReducedPrecision())
+ {
+ context.Decorate(result, Decoration.NoContraction);
+ }
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+ else
+ {
+ return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(src1), context.GetS32(src2)));
+ }
+ }
+
+ private static OperationResult GenerateBinaryBool(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitB)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+
+ return new OperationResult(AggregateType.Bool, emitB(context.TypeBool(), context.Get(AggregateType.Bool, src1), context.Get(AggregateType.Bool, src2)));
+ }
+
+ private static OperationResult GenerateBinaryS32(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+
+ return new OperationResult(AggregateType.S32, emitS(context.TypeS32(), context.GetS32(src1), context.GetS32(src2)));
+ }
+
+ private static OperationResult GenerateBinaryU32(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+
+ return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), context.GetU32(src1), context.GetU32(src2)));
+ }
+
+ private static OperationResult GenerateTernary(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+ var src3 = operation.GetSource(2);
+
+ if (operation.Inst.HasFlag(Instruction.FP64))
+ {
+ var result = emitF(context.TypeFP64(), context.GetFP64(src1), context.GetFP64(src2), context.GetFP64(src3));
+
+ if (!context.Config.GpuAccessor.QueryHostReducedPrecision())
+ {
+ context.Decorate(result, Decoration.NoContraction);
+ }
+
+ return new OperationResult(AggregateType.FP64, result);
+ }
+ else if (operation.Inst.HasFlag(Instruction.FP32))
+ {
+ var result = emitF(context.TypeFP32(), context.GetFP32(src1), context.GetFP32(src2), context.GetFP32(src3));
+
+ if (!context.Config.GpuAccessor.QueryHostReducedPrecision())
+ {
+ context.Decorate(result, Decoration.NoContraction);
+ }
+
+ return new OperationResult(AggregateType.FP32, result);
+ }
+ else
+ {
+ return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(src1), context.GetS32(src2), context.GetS32(src3)));
+ }
+ }
+
+ private static OperationResult GenerateTernaryU32(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+ var src3 = operation.GetSource(2);
+
+ return new OperationResult(AggregateType.U32, emitU(
+ context.TypeU32(),
+ context.GetU32(src1),
+ context.GetU32(src2),
+ context.GetU32(src3)));
+ }
+
+ private static OperationResult GenerateBitfieldExtractS32(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+ var src3 = operation.GetSource(2);
+
+ return new OperationResult(AggregateType.S32, emitS(
+ context.TypeS32(),
+ context.GetS32(src1),
+ context.GetU32(src2),
+ context.GetU32(src3)));
+ }
+
+ private static OperationResult GenerateBitfieldInsert(
+ CodeGenContext context,
+ AstOperation operation,
+ Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS)
+ {
+ var src1 = operation.GetSource(0);
+ var src2 = operation.GetSource(1);
+ var src3 = operation.GetSource(2);
+ var src4 = operation.GetSource(3);
+
+ return new OperationResult(AggregateType.U32, emitS(
+ context.TypeU32(),
+ context.GetU32(src1),
+ context.GetU32(src2),
+ context.GetU32(src3),
+ context.GetU32(src4)));
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs
new file mode 100644
index 00000000..d2ff0085
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs
@@ -0,0 +1,86 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using static Spv.Specification;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ static class IoMap
+ {
+ // At least 16 attributes are guaranteed by the spec.
+ private const int MaxAttributes = 16;
+
+ public static (BuiltIn, AggregateType) GetSpirvBuiltIn(IoVariable ioVariable)
+ {
+ return ioVariable switch
+ {
+ IoVariable.BaseInstance => (BuiltIn.BaseInstance, AggregateType.S32),
+ IoVariable.BaseVertex => (BuiltIn.BaseVertex, AggregateType.S32),
+ IoVariable.ClipDistance => (BuiltIn.ClipDistance, AggregateType.Array | AggregateType.FP32),
+ IoVariable.CtaId => (BuiltIn.WorkgroupId, AggregateType.Vector3 | AggregateType.U32),
+ IoVariable.DrawIndex => (BuiltIn.DrawIndex, AggregateType.S32),
+ IoVariable.FragmentCoord => (BuiltIn.FragCoord, AggregateType.Vector4 | AggregateType.FP32),
+ IoVariable.FragmentOutputDepth => (BuiltIn.FragDepth, AggregateType.FP32),
+ IoVariable.FrontFacing => (BuiltIn.FrontFacing, AggregateType.Bool),
+ IoVariable.InstanceId => (BuiltIn.InstanceId, AggregateType.S32),
+ IoVariable.InstanceIndex => (BuiltIn.InstanceIndex, AggregateType.S32),
+ IoVariable.InvocationId => (BuiltIn.InvocationId, AggregateType.S32),
+ IoVariable.Layer => (BuiltIn.Layer, AggregateType.S32),
+ IoVariable.PatchVertices => (BuiltIn.PatchVertices, AggregateType.S32),
+ IoVariable.PointCoord => (BuiltIn.PointCoord, AggregateType.Vector2 | AggregateType.FP32),
+ IoVariable.PointSize => (BuiltIn.PointSize, AggregateType.FP32),
+ IoVariable.Position => (BuiltIn.Position, AggregateType.Vector4 | AggregateType.FP32),
+ IoVariable.PrimitiveId => (BuiltIn.PrimitiveId, AggregateType.S32),
+ IoVariable.SubgroupEqMask => (BuiltIn.SubgroupEqMask, AggregateType.Vector4 | AggregateType.U32),
+ IoVariable.SubgroupGeMask => (BuiltIn.SubgroupGeMask, AggregateType.Vector4 | AggregateType.U32),
+ IoVariable.SubgroupGtMask => (BuiltIn.SubgroupGtMask, AggregateType.Vector4 | AggregateType.U32),
+ IoVariable.SubgroupLaneId => (BuiltIn.SubgroupLocalInvocationId, AggregateType.U32),
+ IoVariable.SubgroupLeMask => (BuiltIn.SubgroupLeMask, AggregateType.Vector4 | AggregateType.U32),
+ IoVariable.SubgroupLtMask => (BuiltIn.SubgroupLtMask, AggregateType.Vector4 | AggregateType.U32),
+ IoVariable.TessellationCoord => (BuiltIn.TessCoord, AggregateType.Vector3 | AggregateType.FP32),
+ IoVariable.TessellationLevelInner => (BuiltIn.TessLevelInner, AggregateType.Array | AggregateType.FP32),
+ IoVariable.TessellationLevelOuter => (BuiltIn.TessLevelOuter, AggregateType.Array | AggregateType.FP32),
+ IoVariable.ThreadId => (BuiltIn.LocalInvocationId, AggregateType.Vector3 | AggregateType.U32),
+ IoVariable.ThreadKill => (BuiltIn.HelperInvocation, AggregateType.Bool),
+ IoVariable.VertexId => (BuiltIn.VertexId, AggregateType.S32),
+ IoVariable.VertexIndex => (BuiltIn.VertexIndex, AggregateType.S32),
+ IoVariable.ViewportIndex => (BuiltIn.ViewportIndex, AggregateType.S32),
+ IoVariable.ViewportMask => (BuiltIn.ViewportMaskNV, AggregateType.Array | AggregateType.S32),
+ _ => (default, AggregateType.Invalid)
+ };
+ }
+
+ public static int GetSpirvBuiltInArrayLength(IoVariable ioVariable)
+ {
+ return ioVariable switch
+ {
+ IoVariable.ClipDistance => 8,
+ IoVariable.TessellationLevelInner => 2,
+ IoVariable.TessellationLevelOuter => 4,
+ IoVariable.ViewportMask => 1,
+ IoVariable.UserDefined => MaxAttributes,
+ _ => 1
+ };
+ }
+
+ public static bool IsPerVertex(IoVariable ioVariable, ShaderStage stage, bool isOutput)
+ {
+ switch (ioVariable)
+ {
+ case IoVariable.Layer:
+ case IoVariable.ViewportIndex:
+ case IoVariable.PointSize:
+ case IoVariable.Position:
+ case IoVariable.UserDefined:
+ case IoVariable.ClipDistance:
+ case IoVariable.PointCoord:
+ case IoVariable.ViewportMask:
+ return !isOutput &&
+ (stage == ShaderStage.TessellationControl ||
+ stage == ShaderStage.TessellationEvaluation ||
+ stage == ShaderStage.Geometry);
+ }
+
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs
new file mode 100644
index 00000000..f80c8110
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs
@@ -0,0 +1,19 @@
+using Ryujinx.Graphics.Shader.Translation;
+using Spv.Generator;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ readonly struct OperationResult
+ {
+ public static OperationResult Invalid => new OperationResult(AggregateType.Invalid, null);
+
+ public AggregateType Type { get; }
+ public Instruction Value { get; }
+
+ public OperationResult(AggregateType type, Instruction value)
+ {
+ Type = type;
+ Value = value;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs
new file mode 100644
index 00000000..f6c218c6
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs
@@ -0,0 +1,227 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using static Spv.Specification;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ using SpvInstruction = Spv.Generator.Instruction;
+
+ static class ScalingHelpers
+ {
+ public static SpvInstruction ApplyScaling(
+ CodeGenContext context,
+ AstTextureOperation texOp,
+ SpvInstruction vector,
+ bool intCoords,
+ bool isBindless,
+ bool isIndexed,
+ bool isArray,
+ int pCount)
+ {
+ if (intCoords)
+ {
+ if (context.Config.Stage.SupportsRenderScale() &&
+ !isBindless &&
+ !isIndexed)
+ {
+ int index = texOp.Inst == Instruction.ImageLoad
+ ? context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp)
+ : context.Config.FindTextureDescriptorIndex(texOp);
+
+ if (pCount == 3 && isArray)
+ {
+ return ApplyScaling2DArray(context, vector, index);
+ }
+ else if (pCount == 2 && !isArray)
+ {
+ return ApplyScaling2D(context, vector, index);
+ }
+ }
+ }
+
+ return vector;
+ }
+
+ private static SpvInstruction ApplyScaling2DArray(CodeGenContext context, SpvInstruction vector, int index)
+ {
+ // The array index is not scaled, just x and y.
+ var vectorXY = context.VectorShuffle(context.TypeVector(context.TypeS32(), 2), vector, vector, 0, 1);
+ var vectorZ = context.CompositeExtract(context.TypeS32(), vector, 2);
+ var vectorXYScaled = ApplyScaling2D(context, vectorXY, index);
+ var vectorScaled = context.CompositeConstruct(context.TypeVector(context.TypeS32(), 3), vectorXYScaled, vectorZ);
+
+ return vectorScaled;
+ }
+
+ private static SpvInstruction ApplyScaling2D(CodeGenContext context, SpvInstruction vector, int index)
+ {
+ var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32());
+ var fieldIndex = context.Constant(context.TypeU32(), 4);
+ var scaleIndex = context.Constant(context.TypeU32(), index);
+
+ if (context.Config.Stage == ShaderStage.Vertex)
+ {
+ var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32());
+ var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3));
+ var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer);
+
+ scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount);
+ }
+
+ scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1));
+
+ var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex);
+ var scale = context.Load(context.TypeFP32(), scaleElemPointer);
+
+ var ivector2Type = context.TypeVector(context.TypeS32(), 2);
+ var localVector = context.CoordTemp;
+
+ var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f));
+
+ var mergeLabel = context.Label();
+
+ if (context.Config.Stage == ShaderStage.Fragment)
+ {
+ var scaledInterpolatedLabel = context.Label();
+ var scaledNoInterpolationLabel = context.Label();
+
+ var needsInterpolation = context.FOrdLessThan(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 0f));
+
+ context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone);
+ context.BranchConditional(needsInterpolation, scaledInterpolatedLabel, scaledNoInterpolationLabel);
+
+ // scale < 0.0
+ context.AddLabel(scaledInterpolatedLabel);
+
+ ApplyScalingInterpolated(context, localVector, vector, scale);
+ context.Branch(mergeLabel);
+
+ // scale >= 0.0
+ context.AddLabel(scaledNoInterpolationLabel);
+
+ ApplyScalingNoInterpolation(context, localVector, vector, scale);
+ context.Branch(mergeLabel);
+
+ context.AddLabel(mergeLabel);
+
+ var passthroughLabel = context.Label();
+ var finalMergeLabel = context.Label();
+
+ context.SelectionMerge(finalMergeLabel, SelectionControlMask.MaskNone);
+ context.BranchConditional(passthrough, passthroughLabel, finalMergeLabel);
+
+ context.AddLabel(passthroughLabel);
+
+ context.Store(localVector, vector);
+ context.Branch(finalMergeLabel);
+
+ context.AddLabel(finalMergeLabel);
+
+ return context.Load(ivector2Type, localVector);
+ }
+ else
+ {
+ var passthroughLabel = context.Label();
+ var scaledLabel = context.Label();
+
+ context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone);
+ context.BranchConditional(passthrough, passthroughLabel, scaledLabel);
+
+ // scale == 1.0
+ context.AddLabel(passthroughLabel);
+
+ context.Store(localVector, vector);
+ context.Branch(mergeLabel);
+
+ // scale != 1.0
+ context.AddLabel(scaledLabel);
+
+ ApplyScalingNoInterpolation(context, localVector, vector, scale);
+ context.Branch(mergeLabel);
+
+ context.AddLabel(mergeLabel);
+
+ return context.Load(ivector2Type, localVector);
+ }
+ }
+
+ private static void ApplyScalingInterpolated(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale)
+ {
+ var vector2Type = context.TypeVector(context.TypeFP32(), 2);
+
+ var scaleNegated = context.FNegate(context.TypeFP32(), scale);
+ var scaleVector = context.CompositeConstruct(vector2Type, scaleNegated, scaleNegated);
+
+ var vectorFloat = context.ConvertSToF(vector2Type, vector);
+ var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scaleNegated);
+
+ var fragCoordPointer = context.Inputs[new IoDefinition(StorageKind.Input, IoVariable.FragmentCoord)];
+ var fragCoord = context.Load(context.TypeVector(context.TypeFP32(), 4), fragCoordPointer);
+ var fragCoordXY = context.VectorShuffle(vector2Type, fragCoord, fragCoord, 0, 1);
+
+ var scaleMod = context.FMod(vector2Type, fragCoordXY, scaleVector);
+ var vectorInterpolated = context.FAdd(vector2Type, vectorScaled, scaleMod);
+
+ context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorInterpolated));
+ }
+
+ private static void ApplyScalingNoInterpolation(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale)
+ {
+ if (context.Config.Stage == ShaderStage.Vertex)
+ {
+ scale = context.GlslFAbs(context.TypeFP32(), scale);
+ }
+
+ var vector2Type = context.TypeVector(context.TypeFP32(), 2);
+
+ var vectorFloat = context.ConvertSToF(vector2Type, vector);
+ var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scale);
+
+ context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorScaled));
+ }
+
+ public static SpvInstruction ApplyUnscaling(
+ CodeGenContext context,
+ AstTextureOperation texOp,
+ SpvInstruction size,
+ bool isBindless,
+ bool isIndexed)
+ {
+ if (context.Config.Stage.SupportsRenderScale() &&
+ !isBindless &&
+ !isIndexed)
+ {
+ int index = context.Config.FindTextureDescriptorIndex(texOp);
+
+ var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32());
+ var fieldIndex = context.Constant(context.TypeU32(), 4);
+ var scaleIndex = context.Constant(context.TypeU32(), index);
+
+ if (context.Config.Stage == ShaderStage.Vertex)
+ {
+ var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32());
+ var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3));
+ var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer);
+
+ scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount);
+ }
+
+ scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1));
+
+ var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex);
+ var scale = context.GlslFAbs(context.TypeFP32(), context.Load(context.TypeFP32(), scaleElemPointer));
+
+ var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f));
+
+ var sizeFloat = context.ConvertSToF(context.TypeFP32(), size);
+ var sizeUnscaled = context.FDiv(context.TypeFP32(), sizeFloat, scale);
+ var sizeUnscaledInt = context.ConvertFToS(context.TypeS32(), sizeUnscaled);
+
+ return context.Select(context.TypeS32(), passthrough, size, sizeUnscaledInt);
+ }
+
+ return size;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs
new file mode 100644
index 00000000..3ccfd7f5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs
@@ -0,0 +1,226 @@
+using FuncBinaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>;
+using FuncQuaternaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>;
+using FuncTernaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>;
+using FuncUnaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ /// <summary>
+ /// Delegate cache for SPIR-V instruction generators. Avoids delegate allocation when passing generators as arguments.
+ /// </summary>
+ internal readonly struct SpirvDelegates
+ {
+ // Unary
+ public readonly FuncUnaryInstruction GlslFAbs;
+ public readonly FuncUnaryInstruction GlslSAbs;
+ public readonly FuncUnaryInstruction GlslCeil;
+ public readonly FuncUnaryInstruction GlslCos;
+ public readonly FuncUnaryInstruction GlslExp2;
+ public readonly FuncUnaryInstruction GlslFloor;
+ public readonly FuncUnaryInstruction GlslLog2;
+ public readonly FuncUnaryInstruction FNegate;
+ public readonly FuncUnaryInstruction SNegate;
+ public readonly FuncUnaryInstruction GlslInverseSqrt;
+ public readonly FuncUnaryInstruction GlslRoundEven;
+ public readonly FuncUnaryInstruction GlslSin;
+ public readonly FuncUnaryInstruction GlslSqrt;
+ public readonly FuncUnaryInstruction GlslTrunc;
+
+ // UnaryBool
+ public readonly FuncUnaryInstruction LogicalNot;
+
+ // UnaryFP32
+ public readonly FuncUnaryInstruction DPdx;
+ public readonly FuncUnaryInstruction DPdy;
+
+ // UnaryS32
+ public readonly FuncUnaryInstruction BitCount;
+ public readonly FuncUnaryInstruction BitReverse;
+ public readonly FuncUnaryInstruction Not;
+
+ // Compare
+ public readonly FuncBinaryInstruction FOrdEqual;
+ public readonly FuncBinaryInstruction IEqual;
+ public readonly FuncBinaryInstruction FOrdGreaterThan;
+ public readonly FuncBinaryInstruction SGreaterThan;
+ public readonly FuncBinaryInstruction FOrdGreaterThanEqual;
+ public readonly FuncBinaryInstruction SGreaterThanEqual;
+ public readonly FuncBinaryInstruction FOrdLessThan;
+ public readonly FuncBinaryInstruction SLessThan;
+ public readonly FuncBinaryInstruction FOrdLessThanEqual;
+ public readonly FuncBinaryInstruction SLessThanEqual;
+ public readonly FuncBinaryInstruction FOrdNotEqual;
+ public readonly FuncBinaryInstruction INotEqual;
+
+ // CompareU32
+ public readonly FuncBinaryInstruction UGreaterThanEqual;
+ public readonly FuncBinaryInstruction UGreaterThan;
+ public readonly FuncBinaryInstruction ULessThanEqual;
+ public readonly FuncBinaryInstruction ULessThan;
+
+ // Binary
+ public readonly FuncBinaryInstruction FAdd;
+ public readonly FuncBinaryInstruction IAdd;
+ public readonly FuncBinaryInstruction FDiv;
+ public readonly FuncBinaryInstruction SDiv;
+ public readonly FuncBinaryInstruction GlslFMax;
+ public readonly FuncBinaryInstruction GlslSMax;
+ public readonly FuncBinaryInstruction GlslFMin;
+ public readonly FuncBinaryInstruction GlslSMin;
+ public readonly FuncBinaryInstruction FMul;
+ public readonly FuncBinaryInstruction IMul;
+ public readonly FuncBinaryInstruction FSub;
+ public readonly FuncBinaryInstruction ISub;
+
+ // BinaryBool
+ public readonly FuncBinaryInstruction LogicalAnd;
+ public readonly FuncBinaryInstruction LogicalNotEqual;
+ public readonly FuncBinaryInstruction LogicalOr;
+
+ // BinaryS32
+ public readonly FuncBinaryInstruction BitwiseAnd;
+ public readonly FuncBinaryInstruction BitwiseXor;
+ public readonly FuncBinaryInstruction BitwiseOr;
+ public readonly FuncBinaryInstruction ShiftLeftLogical;
+ public readonly FuncBinaryInstruction ShiftRightArithmetic;
+ public readonly FuncBinaryInstruction ShiftRightLogical;
+
+ // BinaryU32
+ public readonly FuncBinaryInstruction GlslUMax;
+ public readonly FuncBinaryInstruction GlslUMin;
+
+ // AtomicMemoryBinary
+ public readonly FuncQuaternaryInstruction AtomicIAdd;
+ public readonly FuncQuaternaryInstruction AtomicAnd;
+ public readonly FuncQuaternaryInstruction AtomicSMin;
+ public readonly FuncQuaternaryInstruction AtomicUMin;
+ public readonly FuncQuaternaryInstruction AtomicSMax;
+ public readonly FuncQuaternaryInstruction AtomicUMax;
+ public readonly FuncQuaternaryInstruction AtomicOr;
+ public readonly FuncQuaternaryInstruction AtomicExchange;
+ public readonly FuncQuaternaryInstruction AtomicXor;
+
+ // Ternary
+ public readonly FuncTernaryInstruction GlslFClamp;
+ public readonly FuncTernaryInstruction GlslSClamp;
+ public readonly FuncTernaryInstruction GlslFma;
+
+ // TernaryS32
+ public readonly FuncTernaryInstruction BitFieldSExtract;
+ public readonly FuncTernaryInstruction BitFieldUExtract;
+
+ // TernaryU32
+ public readonly FuncTernaryInstruction GlslUClamp;
+
+ // QuaternaryS32
+ public readonly FuncQuaternaryInstruction BitFieldInsert;
+
+ public SpirvDelegates(CodeGenContext context)
+ {
+ // Unary
+ GlslFAbs = context.GlslFAbs;
+ GlslSAbs = context.GlslSAbs;
+ GlslCeil = context.GlslCeil;
+ GlslCos = context.GlslCos;
+ GlslExp2 = context.GlslExp2;
+ GlslFloor = context.GlslFloor;
+ GlslLog2 = context.GlslLog2;
+ FNegate = context.FNegate;
+ SNegate = context.SNegate;
+ GlslInverseSqrt = context.GlslInverseSqrt;
+ GlslRoundEven = context.GlslRoundEven;
+ GlslSin = context.GlslSin;
+ GlslSqrt = context.GlslSqrt;
+ GlslTrunc = context.GlslTrunc;
+
+ // UnaryBool
+ LogicalNot = context.LogicalNot;
+
+ // UnaryFP32
+ DPdx = context.DPdx;
+ DPdy = context.DPdy;
+
+ // UnaryS32
+ BitCount = context.BitCount;
+ BitReverse = context.BitReverse;
+ Not = context.Not;
+
+ // Compare
+ FOrdEqual = context.FOrdEqual;
+ IEqual = context.IEqual;
+ FOrdGreaterThan = context.FOrdGreaterThan;
+ SGreaterThan = context.SGreaterThan;
+ FOrdGreaterThanEqual = context.FOrdGreaterThanEqual;
+ SGreaterThanEqual = context.SGreaterThanEqual;
+ FOrdLessThan = context.FOrdLessThan;
+ SLessThan = context.SLessThan;
+ FOrdLessThanEqual = context.FOrdLessThanEqual;
+ SLessThanEqual = context.SLessThanEqual;
+ FOrdNotEqual = context.FOrdNotEqual;
+ INotEqual = context.INotEqual;
+
+ // CompareU32
+ UGreaterThanEqual = context.UGreaterThanEqual;
+ UGreaterThan = context.UGreaterThan;
+ ULessThanEqual = context.ULessThanEqual;
+ ULessThan = context.ULessThan;
+
+ // Binary
+ FAdd = context.FAdd;
+ IAdd = context.IAdd;
+ FDiv = context.FDiv;
+ SDiv = context.SDiv;
+ GlslFMax = context.GlslFMax;
+ GlslSMax = context.GlslSMax;
+ GlslFMin = context.GlslFMin;
+ GlslSMin = context.GlslSMin;
+ FMul = context.FMul;
+ IMul = context.IMul;
+ FSub = context.FSub;
+ ISub = context.ISub;
+
+ // BinaryBool
+ LogicalAnd = context.LogicalAnd;
+ LogicalNotEqual = context.LogicalNotEqual;
+ LogicalOr = context.LogicalOr;
+
+ // BinaryS32
+ BitwiseAnd = context.BitwiseAnd;
+ BitwiseXor = context.BitwiseXor;
+ BitwiseOr = context.BitwiseOr;
+ ShiftLeftLogical = context.ShiftLeftLogical;
+ ShiftRightArithmetic = context.ShiftRightArithmetic;
+ ShiftRightLogical = context.ShiftRightLogical;
+
+ // BinaryU32
+ GlslUMax = context.GlslUMax;
+ GlslUMin = context.GlslUMin;
+
+ // AtomicMemoryBinary
+ AtomicIAdd = context.AtomicIAdd;
+ AtomicAnd = context.AtomicAnd;
+ AtomicSMin = context.AtomicSMin;
+ AtomicUMin = context.AtomicUMin;
+ AtomicSMax = context.AtomicSMax;
+ AtomicUMax = context.AtomicUMax;
+ AtomicOr = context.AtomicOr;
+ AtomicExchange = context.AtomicExchange;
+ AtomicXor = context.AtomicXor;
+
+ // Ternary
+ GlslFClamp = context.GlslFClamp;
+ GlslSClamp = context.GlslSClamp;
+ GlslFma = context.GlslFma;
+
+ // TernaryS32
+ BitFieldSExtract = context.BitFieldSExtract;
+ BitFieldUExtract = context.BitFieldUExtract;
+
+ // TernaryU32
+ GlslUClamp = context.GlslUClamp;
+
+ // QuaternaryS32
+ BitFieldInsert = context.BitFieldInsert;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
new file mode 100644
index 00000000..3e11a974
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs
@@ -0,0 +1,415 @@
+using Ryujinx.Common;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using static Spv.Specification;
+
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ using SpvInstruction = Spv.Generator.Instruction;
+ using SpvInstructionPool = Spv.Generator.GeneratorPool<Spv.Generator.Instruction>;
+ using SpvLiteralInteger = Spv.Generator.LiteralInteger;
+ using SpvLiteralIntegerPool = Spv.Generator.GeneratorPool<Spv.Generator.LiteralInteger>;
+
+ static class SpirvGenerator
+ {
+ // Resource pools for Spirv generation. Note: Increase count when more threads are being used.
+ private const int GeneratorPoolCount = 1;
+ private static ObjectPool<SpvInstructionPool> InstructionPool;
+ private static ObjectPool<SpvLiteralIntegerPool> IntegerPool;
+ private static object PoolLock;
+
+ static SpirvGenerator()
+ {
+ InstructionPool = new (() => new SpvInstructionPool(), GeneratorPoolCount);
+ IntegerPool = new (() => new SpvLiteralIntegerPool(), GeneratorPoolCount);
+ PoolLock = new object();
+ }
+
+ private const HelperFunctionsMask NeedsInvocationIdMask =
+ HelperFunctionsMask.Shuffle |
+ HelperFunctionsMask.ShuffleDown |
+ HelperFunctionsMask.ShuffleUp |
+ HelperFunctionsMask.ShuffleXor |
+ HelperFunctionsMask.SwizzleAdd;
+
+ public static byte[] Generate(StructuredProgramInfo info, ShaderConfig config)
+ {
+ SpvInstructionPool instPool;
+ SpvLiteralIntegerPool integerPool;
+
+ lock (PoolLock)
+ {
+ instPool = InstructionPool.Allocate();
+ integerPool = IntegerPool.Allocate();
+ }
+
+ CodeGenContext context = new CodeGenContext(info, config, instPool, integerPool);
+
+ context.AddCapability(Capability.GroupNonUniformBallot);
+ context.AddCapability(Capability.GroupNonUniformShuffle);
+ context.AddCapability(Capability.GroupNonUniformVote);
+ context.AddCapability(Capability.ImageBuffer);
+ context.AddCapability(Capability.ImageGatherExtended);
+ context.AddCapability(Capability.ImageQuery);
+ context.AddCapability(Capability.SampledBuffer);
+
+ if (config.TransformFeedbackEnabled && config.LastInVertexPipeline)
+ {
+ context.AddCapability(Capability.TransformFeedback);
+ }
+
+ if (config.Stage == ShaderStage.Fragment)
+ {
+ if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Input, IoVariable.Layer)))
+ {
+ context.AddCapability(Capability.Geometry);
+ }
+
+ if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
+ {
+ context.AddCapability(Capability.FragmentShaderPixelInterlockEXT);
+ context.AddExtension("SPV_EXT_fragment_shader_interlock");
+ }
+ }
+ else if (config.Stage == ShaderStage.Geometry)
+ {
+ context.AddCapability(Capability.Geometry);
+
+ if (config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
+ {
+ context.AddExtension("SPV_NV_geometry_shader_passthrough");
+ context.AddCapability(Capability.GeometryShaderPassthroughNV);
+ }
+ }
+ else if (config.Stage == ShaderStage.TessellationControl || config.Stage == ShaderStage.TessellationEvaluation)
+ {
+ context.AddCapability(Capability.Tessellation);
+ }
+ else if (config.Stage == ShaderStage.Vertex)
+ {
+ context.AddCapability(Capability.DrawParameters);
+ }
+
+ if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.ViewportMask)))
+ {
+ context.AddExtension("SPV_NV_viewport_array2");
+ context.AddCapability(Capability.ShaderViewportMaskNV);
+ }
+
+ if ((info.HelperFunctionsMask & NeedsInvocationIdMask) != 0)
+ {
+ info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.SubgroupLaneId));
+ }
+
+ Declarations.DeclareAll(context, info);
+
+ for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++)
+ {
+ var function = info.Functions[funcIndex];
+ var retType = context.GetType(function.ReturnType);
+
+ var funcArgs = new SpvInstruction[function.InArguments.Length + function.OutArguments.Length];
+
+ for (int argIndex = 0; argIndex < funcArgs.Length; argIndex++)
+ {
+ var argType = context.GetType(function.GetArgumentType(argIndex));
+ var argPointerType = context.TypePointer(StorageClass.Function, argType);
+ funcArgs[argIndex] = argPointerType;
+ }
+
+ var funcType = context.TypeFunction(retType, false, funcArgs);
+ var spvFunc = context.Function(retType, FunctionControlMask.MaskNone, funcType);
+
+ context.DeclareFunction(funcIndex, function, spvFunc);
+ }
+
+ for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++)
+ {
+ Generate(context, info, funcIndex);
+ }
+
+ byte[] result = context.Generate();
+
+ lock (PoolLock)
+ {
+ InstructionPool.Release(instPool);
+ IntegerPool.Release(integerPool);
+ }
+
+ return result;
+ }
+
+ private static void Generate(CodeGenContext context, StructuredProgramInfo info, int funcIndex)
+ {
+ var function = info.Functions[funcIndex];
+
+ (_, var spvFunc) = context.GetFunction(funcIndex);
+
+ context.AddFunction(spvFunc);
+ context.StartFunction();
+
+ Declarations.DeclareParameters(context, function);
+
+ context.EnterBlock(function.MainBlock);
+
+ Declarations.DeclareLocals(context, function);
+ Declarations.DeclareLocalForArgs(context, info.Functions);
+
+ Generate(context, function.MainBlock);
+
+ // Functions must always end with a return.
+ if (!(function.MainBlock.Last is AstOperation operation) ||
+ (operation.Inst != Instruction.Return && operation.Inst != Instruction.Discard))
+ {
+ context.Return();
+ }
+
+ context.FunctionEnd();
+
+ if (funcIndex == 0)
+ {
+ context.AddEntryPoint(context.Config.Stage.Convert(), spvFunc, "main", context.GetMainInterface());
+
+ if (context.Config.Stage == ShaderStage.TessellationControl)
+ {
+ context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive);
+ }
+ else if (context.Config.Stage == ShaderStage.TessellationEvaluation)
+ {
+ switch (context.Config.GpuAccessor.QueryTessPatchType())
+ {
+ case TessPatchType.Isolines:
+ context.AddExecutionMode(spvFunc, ExecutionMode.Isolines);
+ break;
+ case TessPatchType.Triangles:
+ context.AddExecutionMode(spvFunc, ExecutionMode.Triangles);
+ break;
+ case TessPatchType.Quads:
+ context.AddExecutionMode(spvFunc, ExecutionMode.Quads);
+ break;
+ }
+
+ switch (context.Config.GpuAccessor.QueryTessSpacing())
+ {
+ case TessSpacing.EqualSpacing:
+ context.AddExecutionMode(spvFunc, ExecutionMode.SpacingEqual);
+ break;
+ case TessSpacing.FractionalEventSpacing:
+ context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalEven);
+ break;
+ case TessSpacing.FractionalOddSpacing:
+ context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalOdd);
+ break;
+ }
+
+ bool tessCw = context.Config.GpuAccessor.QueryTessCw();
+
+ if (context.Config.Options.TargetApi == TargetApi.Vulkan)
+ {
+ // We invert the front face on Vulkan backend, so we need to do that here as well.
+ tessCw = !tessCw;
+ }
+
+ if (tessCw)
+ {
+ context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCw);
+ }
+ else
+ {
+ context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCcw);
+ }
+ }
+ else if (context.Config.Stage == ShaderStage.Geometry)
+ {
+ InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology();
+
+ context.AddExecutionMode(spvFunc, inputTopology switch
+ {
+ InputTopology.Points => ExecutionMode.InputPoints,
+ InputTopology.Lines => ExecutionMode.InputLines,
+ InputTopology.LinesAdjacency => ExecutionMode.InputLinesAdjacency,
+ InputTopology.Triangles => ExecutionMode.Triangles,
+ InputTopology.TrianglesAdjacency => ExecutionMode.InputTrianglesAdjacency,
+ _ => throw new InvalidOperationException($"Invalid input topology \"{inputTopology}\".")
+ });
+
+ context.AddExecutionMode(spvFunc, ExecutionMode.Invocations, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive);
+
+ context.AddExecutionMode(spvFunc, context.Config.OutputTopology switch
+ {
+ OutputTopology.PointList => ExecutionMode.OutputPoints,
+ OutputTopology.LineStrip => ExecutionMode.OutputLineStrip,
+ OutputTopology.TriangleStrip => ExecutionMode.OutputTriangleStrip,
+ _ => throw new InvalidOperationException($"Invalid output topology \"{context.Config.OutputTopology}\".")
+ });
+
+ int maxOutputVertices = context.Config.GpPassthrough ? context.InputVertices : context.Config.MaxOutputVertices;
+
+ context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)maxOutputVertices);
+ }
+ else if (context.Config.Stage == ShaderStage.Fragment)
+ {
+ context.AddExecutionMode(spvFunc, context.Config.Options.TargetApi == TargetApi.Vulkan
+ ? ExecutionMode.OriginUpperLeft
+ : ExecutionMode.OriginLowerLeft);
+
+ if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.FragmentOutputDepth)))
+ {
+ context.AddExecutionMode(spvFunc, ExecutionMode.DepthReplacing);
+ }
+
+ if (context.Config.GpuAccessor.QueryEarlyZForce())
+ {
+ context.AddExecutionMode(spvFunc, ExecutionMode.EarlyFragmentTests);
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.FSI) != 0 &&
+ context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock())
+ {
+ context.AddExecutionMode(spvFunc, ExecutionMode.PixelInterlockOrderedEXT);
+ }
+ }
+ else if (context.Config.Stage == ShaderStage.Compute)
+ {
+ var localSizeX = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeX();
+ var localSizeY = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeY();
+ var localSizeZ = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeZ();
+
+ context.AddExecutionMode(
+ spvFunc,
+ ExecutionMode.LocalSize,
+ localSizeX,
+ localSizeY,
+ localSizeZ);
+ }
+
+ if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline)
+ {
+ context.AddExecutionMode(spvFunc, ExecutionMode.Xfb);
+ }
+ }
+ }
+
+ private static void Generate(CodeGenContext context, AstBlock block)
+ {
+ AstBlockVisitor visitor = new AstBlockVisitor(block);
+
+ var loopTargets = new Dictionary<AstBlock, (SpvInstruction, SpvInstruction)>();
+
+ context.LoopTargets = loopTargets;
+
+ visitor.BlockEntered += (sender, e) =>
+ {
+ AstBlock mergeBlock = e.Block.Parent;
+
+ if (e.Block.Type == AstBlockType.If)
+ {
+ AstBlock ifTrueBlock = e.Block;
+ AstBlock ifFalseBlock;
+
+ if (AstHelper.Next(e.Block) is AstBlock nextBlock && nextBlock.Type == AstBlockType.Else)
+ {
+ ifFalseBlock = nextBlock;
+ }
+ else
+ {
+ ifFalseBlock = mergeBlock;
+ }
+
+ var condition = context.Get(AggregateType.Bool, e.Block.Condition);
+
+ context.SelectionMerge(context.GetNextLabel(mergeBlock), SelectionControlMask.MaskNone);
+ context.BranchConditional(condition, context.GetNextLabel(ifTrueBlock), context.GetNextLabel(ifFalseBlock));
+ }
+ else if (e.Block.Type == AstBlockType.DoWhile)
+ {
+ var continueTarget = context.Label();
+
+ loopTargets.Add(e.Block, (context.NewBlock(), continueTarget));
+
+ context.LoopMerge(context.GetNextLabel(mergeBlock), continueTarget, LoopControlMask.MaskNone);
+ context.Branch(context.GetFirstLabel(e.Block));
+ }
+
+ context.EnterBlock(e.Block);
+ };
+
+ visitor.BlockLeft += (sender, e) =>
+ {
+ if (e.Block.Parent != null)
+ {
+ if (e.Block.Type == AstBlockType.DoWhile)
+ {
+ // This is a loop, we need to jump back to the loop header
+ // if the condition is true.
+ AstBlock mergeBlock = e.Block.Parent;
+
+ (var loopTarget, var continueTarget) = loopTargets[e.Block];
+
+ context.Branch(continueTarget);
+ context.AddLabel(continueTarget);
+
+ var condition = context.Get(AggregateType.Bool, e.Block.Condition);
+
+ context.BranchConditional(condition, loopTarget, context.GetNextLabel(mergeBlock));
+ }
+ else
+ {
+ // We only need a branch if the last instruction didn't
+ // already cause the program to exit or jump elsewhere.
+ bool lastIsCf = e.Block.Last is AstOperation lastOp &&
+ (lastOp.Inst == Instruction.Discard ||
+ lastOp.Inst == Instruction.LoopBreak ||
+ lastOp.Inst == Instruction.LoopContinue ||
+ lastOp.Inst == Instruction.Return);
+
+ if (!lastIsCf)
+ {
+ context.Branch(context.GetNextLabel(e.Block.Parent));
+ }
+ }
+
+ bool hasElse = AstHelper.Next(e.Block) is AstBlock nextBlock &&
+ (nextBlock.Type == AstBlockType.Else ||
+ nextBlock.Type == AstBlockType.ElseIf);
+
+ // Re-enter the parent block.
+ if (e.Block.Parent != null && !hasElse)
+ {
+ context.EnterBlock(e.Block.Parent);
+ }
+ }
+ };
+
+ foreach (IAstNode node in visitor.Visit())
+ {
+ if (node is AstAssignment assignment)
+ {
+ var dest = (AstOperand)assignment.Destination;
+
+ if (dest.Type == OperandType.LocalVariable)
+ {
+ var source = context.Get(dest.VarType, assignment.Source);
+ context.Store(context.GetLocalPointer(dest), source);
+ }
+ else if (dest.Type == OperandType.Argument)
+ {
+ var source = context.Get(dest.VarType, assignment.Source);
+ context.Store(context.GetArgumentPointer(dest), source);
+ }
+ else
+ {
+ throw new NotImplementedException(dest.Type.ToString());
+ }
+ }
+ else if (node is AstOperation operation)
+ {
+ Instructions.Generate(context, operation);
+ }
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs
new file mode 100644
index 00000000..4de05603
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs
@@ -0,0 +1,4 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Spirv
+{
+ readonly record struct TextureMeta(int CbufSlot, int Handle, TextureFormat Format);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Constants.cs b/src/Ryujinx.Graphics.Shader/Constants.cs
new file mode 100644
index 00000000..c6f9ef49
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Constants.cs
@@ -0,0 +1,16 @@
+namespace Ryujinx.Graphics.Shader
+{
+ static class Constants
+ {
+ public const int ConstantBufferSize = 0x10000; // In bytes
+
+ public const int MaxAttributes = 16;
+ public const int AllAttributesMask = (int)(uint.MaxValue >> (32 - MaxAttributes));
+
+ public const int NvnBaseVertexByteOffset = 0x640;
+ public const int NvnBaseInstanceByteOffset = 0x644;
+ public const int NvnDrawIndexByteOffset = 0x648;
+
+ public const int StorageAlignment = 16;
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Block.cs b/src/Ryujinx.Graphics.Shader/Decoders/Block.cs
new file mode 100644
index 00000000..7d94e3f9
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/Block.cs
@@ -0,0 +1,168 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ class PushOpInfo
+ {
+ public InstOp Op { get; }
+ public Dictionary<Block, Operand> Consumers;
+
+ public PushOpInfo(InstOp op)
+ {
+ Op = op;
+ Consumers = new Dictionary<Block, Operand>();
+ }
+ }
+
+ readonly struct SyncTarget
+ {
+ public PushOpInfo PushOpInfo { get; }
+ public int PushOpId { get; }
+
+ public SyncTarget(PushOpInfo pushOpInfo, int pushOpId)
+ {
+ PushOpInfo = pushOpInfo;
+ PushOpId = pushOpId;
+ }
+ }
+
+ class Block
+ {
+ public ulong Address { get; set; }
+ public ulong EndAddress { get; set; }
+
+ public List<Block> Predecessors { get; }
+ public List<Block> Successors { get; }
+
+ public List<InstOp> OpCodes { get; }
+ public List<PushOpInfo> PushOpCodes { get; }
+ public Dictionary<ulong, SyncTarget> SyncTargets { get; }
+
+ public Block(ulong address)
+ {
+ Address = address;
+
+ Predecessors = new List<Block>();
+ Successors = new List<Block>();
+
+ OpCodes = new List<InstOp>();
+ PushOpCodes = new List<PushOpInfo>();
+ SyncTargets = new Dictionary<ulong, SyncTarget>();
+ }
+
+ public void Split(Block rightBlock)
+ {
+ int splitIndex = BinarySearch(OpCodes, rightBlock.Address);
+
+ if (OpCodes[splitIndex].Address < rightBlock.Address)
+ {
+ splitIndex++;
+ }
+
+ int splitCount = OpCodes.Count - splitIndex;
+ if (splitCount <= 0)
+ {
+ throw new ArgumentException("Can't split at right block address.");
+ }
+
+ rightBlock.EndAddress = EndAddress;
+ rightBlock.Successors.AddRange(Successors);
+ rightBlock.Predecessors.Add(this);
+
+ EndAddress = rightBlock.Address;
+
+ Successors.Clear();
+ Successors.Add(rightBlock);
+
+ // Move ops.
+ rightBlock.OpCodes.AddRange(OpCodes.GetRange(splitIndex, splitCount));
+
+ OpCodes.RemoveRange(splitIndex, splitCount);
+
+ // Update push consumers that points to this block.
+ foreach (SyncTarget syncTarget in SyncTargets.Values)
+ {
+ PushOpInfo pushOpInfo = syncTarget.PushOpInfo;
+
+ Operand local = pushOpInfo.Consumers[this];
+ pushOpInfo.Consumers.Remove(this);
+ pushOpInfo.Consumers.Add(rightBlock, local);
+ }
+
+ foreach ((ulong key, SyncTarget value) in SyncTargets)
+ {
+ rightBlock.SyncTargets.Add(key, value);
+ }
+
+ SyncTargets.Clear();
+
+ // Move push ops.
+ for (int i = 0; i < PushOpCodes.Count; i++)
+ {
+ if (PushOpCodes[i].Op.Address >= rightBlock.Address)
+ {
+ int count = PushOpCodes.Count - i;
+ rightBlock.PushOpCodes.AddRange(PushOpCodes.Skip(i));
+ PushOpCodes.RemoveRange(i, count);
+ break;
+ }
+ }
+ }
+
+ private static int BinarySearch(List<InstOp> opCodes, ulong address)
+ {
+ int left = 0;
+ int middle = 0;
+ int right = opCodes.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ middle = left + (size >> 1);
+
+ InstOp opCode = opCodes[middle];
+
+ if (address == opCode.Address)
+ {
+ break;
+ }
+
+ if (address < opCode.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return middle;
+ }
+
+ public InstOp GetLastOp()
+ {
+ if (OpCodes.Count != 0)
+ {
+ return OpCodes[OpCodes.Count - 1];
+ }
+
+ return default;
+ }
+
+ public bool HasNext()
+ {
+ InstOp lastOp = GetLastOp();
+ return OpCodes.Count != 0 && !Decoder.IsUnconditionalBranch(ref lastOp);
+ }
+
+ public void AddPushOp(InstOp op)
+ {
+ PushOpCodes.Add(new PushOpInfo(op));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/DecodedFunction.cs b/src/Ryujinx.Graphics.Shader/Decoders/DecodedFunction.cs
new file mode 100644
index 00000000..7a172fe6
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/DecodedFunction.cs
@@ -0,0 +1,48 @@
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ class DecodedFunction
+ {
+ private readonly HashSet<DecodedFunction> _callers;
+
+ public bool IsCompilerGenerated => Type != FunctionType.User;
+ public FunctionType Type { get; set; }
+ public int Id { get; set; }
+
+ public ulong Address { get; }
+ public Block[] Blocks { get; private set; }
+
+ public DecodedFunction(ulong address)
+ {
+ Address = address;
+ _callers = new HashSet<DecodedFunction>();
+ Type = FunctionType.User;
+ Id = -1;
+ }
+
+ public void SetBlocks(Block[] blocks)
+ {
+ if (Blocks != null)
+ {
+ throw new InvalidOperationException("Blocks have already been set.");
+ }
+
+ Blocks = blocks;
+ }
+
+ public void AddCaller(DecodedFunction caller)
+ {
+ _callers.Add(caller);
+ }
+
+ public void RemoveCaller(DecodedFunction caller)
+ {
+ if (_callers.Remove(caller) && _callers.Count == 0)
+ {
+ Type = FunctionType.Unused;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs b/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs
new file mode 100644
index 00000000..2dd60155
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs
@@ -0,0 +1,57 @@
+using System;
+using System.Collections;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ readonly struct DecodedProgram : IEnumerable<DecodedFunction>
+ {
+ public DecodedFunction MainFunction { get; }
+ private readonly IReadOnlyDictionary<ulong, DecodedFunction> _functions;
+ private readonly List<DecodedFunction> _functionsWithId;
+ public int FunctionsWithIdCount => _functionsWithId.Count;
+
+ public DecodedProgram(DecodedFunction mainFunction, IReadOnlyDictionary<ulong, DecodedFunction> functions)
+ {
+ MainFunction = mainFunction;
+ _functions = functions;
+ _functionsWithId = new List<DecodedFunction>();
+ }
+
+ public DecodedFunction GetFunctionByAddress(ulong address)
+ {
+ if (_functions.TryGetValue(address, out DecodedFunction function))
+ {
+ return function;
+ }
+
+ return null;
+ }
+
+ public DecodedFunction GetFunctionById(int id)
+ {
+ if ((uint)id >= (uint)_functionsWithId.Count)
+ {
+ throw new ArgumentOutOfRangeException(nameof(id));
+ }
+
+ return _functionsWithId[id];
+ }
+
+ public void AddFunctionAndSetId(DecodedFunction function)
+ {
+ function.Id = _functionsWithId.Count;
+ _functionsWithId.Add(function);
+ }
+
+ public IEnumerator<DecodedFunction> GetEnumerator()
+ {
+ return _functions.Values.GetEnumerator();
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
new file mode 100644
index 00000000..c619b9bb
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
@@ -0,0 +1,765 @@
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Runtime.CompilerServices;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ static class Decoder
+ {
+ public static DecodedProgram Decode(ShaderConfig config, ulong startAddress)
+ {
+ Queue<DecodedFunction> functionsQueue = new Queue<DecodedFunction>();
+ Dictionary<ulong, DecodedFunction> functionsVisited = new Dictionary<ulong, DecodedFunction>();
+
+ DecodedFunction EnqueueFunction(ulong address)
+ {
+ if (!functionsVisited.TryGetValue(address, out DecodedFunction function))
+ {
+ functionsVisited.Add(address, function = new DecodedFunction(address));
+ functionsQueue.Enqueue(function);
+ }
+
+ return function;
+ }
+
+ DecodedFunction mainFunction = EnqueueFunction(0);
+
+ while (functionsQueue.TryDequeue(out DecodedFunction currentFunction))
+ {
+ List<Block> blocks = new List<Block>();
+ Queue<Block> workQueue = new Queue<Block>();
+ Dictionary<ulong, Block> visited = new Dictionary<ulong, Block>();
+
+ Block GetBlock(ulong blkAddress)
+ {
+ if (!visited.TryGetValue(blkAddress, out Block block))
+ {
+ block = new Block(blkAddress);
+
+ workQueue.Enqueue(block);
+ visited.Add(blkAddress, block);
+ }
+
+ return block;
+ }
+
+ GetBlock(currentFunction.Address);
+
+ bool hasNewTarget;
+
+ do
+ {
+ while (workQueue.TryDequeue(out Block currBlock))
+ {
+ // Check if the current block is inside another block.
+ if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ if (nBlock.Address == currBlock.Address)
+ {
+ throw new InvalidOperationException("Found duplicate block address on the list.");
+ }
+
+ nBlock.Split(currBlock);
+ blocks.Insert(nBlkIndex + 1, currBlock);
+
+ continue;
+ }
+
+ // If we have a block after the current one, set the limit address.
+ ulong limitAddress = ulong.MaxValue;
+
+ if (nBlkIndex != blocks.Count)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ int nextIndex = nBlkIndex + 1;
+
+ if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
+ {
+ limitAddress = blocks[nextIndex].Address;
+ }
+ else if (nBlock.Address > currBlock.Address)
+ {
+ limitAddress = blocks[nBlkIndex].Address;
+ }
+ }
+
+ FillBlock(config, currBlock, limitAddress, startAddress);
+
+ if (currBlock.OpCodes.Count != 0)
+ {
+ // We should have blocks for all possible branch targets,
+ // including those from PBK/PCNT/SSY instructions.
+ foreach (PushOpInfo pushOp in currBlock.PushOpCodes)
+ {
+ GetBlock(pushOp.Op.GetAbsoluteAddress());
+ }
+
+ // Set child blocks. "Branch" is the block the branch instruction
+ // points to (when taken), "Next" is the block at the next address,
+ // executed when the branch is not taken. For Unconditional Branches
+ // or end of program, Next is null.
+ InstOp lastOp = currBlock.GetLastOp();
+
+ if (lastOp.Name == InstName.Cal)
+ {
+ EnqueueFunction(lastOp.GetAbsoluteAddress()).AddCaller(currentFunction);
+ }
+ else if (lastOp.Name == InstName.Bra)
+ {
+ Block succBlock = GetBlock(lastOp.GetAbsoluteAddress());
+ currBlock.Successors.Add(succBlock);
+ succBlock.Predecessors.Add(currBlock);
+ }
+
+ if (!IsUnconditionalBranch(ref lastOp))
+ {
+ Block succBlock = GetBlock(currBlock.EndAddress);
+ currBlock.Successors.Insert(0, succBlock);
+ succBlock.Predecessors.Add(currBlock);
+ }
+ }
+
+ // Insert the new block on the list (sorted by address).
+ if (blocks.Count != 0)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
+ }
+ else
+ {
+ blocks.Add(currBlock);
+ }
+ }
+
+ // Propagate SSY/PBK addresses into their uses (SYNC/BRK).
+ foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
+ {
+ for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
+ {
+ PropagatePushOp(visited, block, pushOpIndex);
+ }
+ }
+
+ // Try to find targets for BRX (indirect branch) instructions.
+ hasNewTarget = FindBrxTargets(config, blocks, GetBlock);
+
+ // If we discovered new branch targets from the BRX instruction,
+ // we need another round of decoding to decode the new blocks.
+ // Additionally, we may have more SSY/PBK targets to propagate,
+ // and new BRX instructions.
+ }
+ while (hasNewTarget);
+
+ currentFunction.SetBlocks(blocks.ToArray());
+ }
+
+ return new DecodedProgram(mainFunction, functionsVisited);
+ }
+
+ private static bool BinarySearch(List<Block> blocks, ulong address, out int index)
+ {
+ index = 0;
+
+ int left = 0;
+ int right = blocks.Count - 1;
+
+ while (left <= right)
+ {
+ int size = right - left;
+
+ int middle = left + (size >> 1);
+
+ Block block = blocks[middle];
+
+ index = middle;
+
+ if (address >= block.Address && address < block.EndAddress)
+ {
+ return true;
+ }
+
+ if (address < block.Address)
+ {
+ right = middle - 1;
+ }
+ else
+ {
+ left = middle + 1;
+ }
+ }
+
+ return false;
+ }
+
+ private static void FillBlock(ShaderConfig config, Block block, ulong limitAddress, ulong startAddress)
+ {
+ IGpuAccessor gpuAccessor = config.GpuAccessor;
+
+ ulong address = block.Address;
+ int bufferOffset = 0;
+ ReadOnlySpan<ulong> buffer = ReadOnlySpan<ulong>.Empty;
+
+ InstOp op = default;
+
+ do
+ {
+ if (address + 7 >= limitAddress)
+ {
+ break;
+ }
+
+ // Ignore scheduling instructions, which are written every 32 bytes.
+ if ((address & 0x1f) == 0)
+ {
+ address += 8;
+ bufferOffset++;
+ continue;
+ }
+
+ if (bufferOffset >= buffer.Length)
+ {
+ buffer = gpuAccessor.GetCode(startAddress + address, 8);
+ bufferOffset = 0;
+ }
+
+ ulong opCode = buffer[bufferOffset++];
+
+ op = InstTable.GetOp(address, opCode);
+
+ if (op.Props.HasFlag(InstProps.TexB))
+ {
+ config.SetUsedFeature(FeatureFlags.Bindless);
+ }
+
+ if (op.Name == InstName.Ald || op.Name == InstName.Ast || op.Name == InstName.Ipa)
+ {
+ SetUserAttributeUses(config, op.Name, opCode);
+ }
+ else if (op.Name == InstName.Pbk || op.Name == InstName.Pcnt || op.Name == InstName.Ssy)
+ {
+ block.AddPushOp(op);
+ }
+
+ block.OpCodes.Add(op);
+
+ address += 8;
+ }
+ while (!op.Props.HasFlag(InstProps.Bra));
+
+ block.EndAddress = address;
+ }
+
+ private static void SetUserAttributeUses(ShaderConfig config, InstName name, ulong opCode)
+ {
+ int offset;
+ int count = 1;
+ bool isStore = false;
+ bool indexed = false;
+ bool perPatch = false;
+
+ if (name == InstName.Ast)
+ {
+ InstAst opAst = new InstAst(opCode);
+ count = (int)opAst.AlSize + 1;
+ offset = opAst.Imm11;
+ indexed = opAst.Phys;
+ perPatch = opAst.P;
+ isStore = true;
+ }
+ else if (name == InstName.Ald)
+ {
+ InstAld opAld = new InstAld(opCode);
+ count = (int)opAld.AlSize + 1;
+ offset = opAld.Imm11;
+ indexed = opAld.Phys;
+ perPatch = opAld.P;
+ isStore = opAld.O;
+ }
+ else /* if (name == InstName.Ipa) */
+ {
+ InstIpa opIpa = new InstIpa(opCode);
+ offset = opIpa.Imm10;
+ indexed = opIpa.Idx;
+ }
+
+ if (indexed)
+ {
+ if (isStore)
+ {
+ config.SetAllOutputUserAttributes();
+ config.SetUsedFeature(FeatureFlags.OaIndexing);
+ }
+ else
+ {
+ config.SetAllInputUserAttributes();
+ config.SetUsedFeature(FeatureFlags.IaIndexing);
+ }
+ }
+ else
+ {
+ for (int elemIndex = 0; elemIndex < count; elemIndex++)
+ {
+ int attr = offset + elemIndex * 4;
+
+ if (perPatch)
+ {
+ if (attr >= AttributeConsts.UserAttributePerPatchBase && attr < AttributeConsts.UserAttributePerPatchEnd)
+ {
+ int userAttr = attr - AttributeConsts.UserAttributePerPatchBase;
+ int index = userAttr / 16;
+
+ if (isStore)
+ {
+ config.SetOutputUserAttributePerPatch(index);
+ }
+ else
+ {
+ config.SetInputUserAttributePerPatch(index);
+ }
+ }
+ }
+ else if (attr >= AttributeConsts.UserAttributeBase && attr < AttributeConsts.UserAttributeEnd)
+ {
+ int userAttr = attr - AttributeConsts.UserAttributeBase;
+ int index = userAttr / 16;
+
+ if (isStore)
+ {
+ config.SetOutputUserAttribute(index);
+ }
+ else
+ {
+ config.SetInputUserAttribute(index, (userAttr >> 2) & 3);
+ }
+ }
+
+ if (!isStore &&
+ (attr == AttributeConsts.FogCoord ||
+ (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) ||
+ (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)))
+ {
+ config.SetUsedFeature(FeatureFlags.FixedFuncAttr);
+ }
+ }
+ }
+ }
+
+ public static bool IsUnconditionalBranch(ref InstOp op)
+ {
+ return IsUnconditional(ref op) && op.Props.HasFlag(InstProps.Bra);
+ }
+
+ private static bool IsUnconditional(ref InstOp op)
+ {
+ InstConditional condOp = new InstConditional(op.RawOpCode);
+
+ if ((op.Name == InstName.Bra || op.Name == InstName.Exit) && condOp.Ccc != Ccc.T)
+ {
+ return false;
+ }
+
+ return condOp.Pred == RegisterConsts.PredicateTrueIndex && !condOp.PredInv;
+ }
+
+ private static bool FindBrxTargets(ShaderConfig config, IEnumerable<Block> blocks, Func<ulong, Block> getBlock)
+ {
+ bool hasNewTarget = false;
+
+ foreach (Block block in blocks)
+ {
+ InstOp lastOp = block.GetLastOp();
+ bool hasNext = block.HasNext();
+
+ if (lastOp.Name == InstName.Brx && block.Successors.Count == (hasNext ? 1 : 0))
+ {
+ HashSet<ulong> visited = new HashSet<ulong>();
+
+ InstBrx opBrx = new InstBrx(lastOp.RawOpCode);
+ ulong baseOffset = lastOp.GetAbsoluteAddress();
+
+ // An indirect branch could go anywhere,
+ // try to get the possible target offsets from the constant buffer.
+ (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrx.SrcA);
+
+ if (cbOffsetsCount != 0)
+ {
+ hasNewTarget = true;
+ }
+
+ for (int i = 0; i < cbOffsetsCount; i++)
+ {
+ uint targetOffset = config.ConstantBuffer1Read(cbBaseOffset + i * 4);
+ ulong targetAddress = baseOffset + targetOffset;
+
+ if (visited.Add(targetAddress))
+ {
+ Block target = getBlock(targetAddress);
+ target.Predecessors.Add(block);
+ block.Successors.Add(target);
+ }
+ }
+ }
+ }
+
+ return hasNewTarget;
+ }
+
+ private static (int, int) FindBrxTargetRange(Block block, int brxReg)
+ {
+ // Try to match the following pattern:
+ //
+ // IMNMX.U32 Rx, Rx, UpperBound, PT
+ // SHL Rx, Rx, 0x2
+ // LDC Rx, c[0x1][Rx+BaseOffset]
+ //
+ // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
+ // The above pattern is assumed to be generated by the compiler before BRX,
+ // as the instruction is usually used to implement jump tables for switch statement optimizations.
+ // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
+ // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
+
+ HashSet<Block> visited = new HashSet<Block>();
+
+ var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
+ if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index].Name != InstName.Ldc)
+ {
+ return (0, 0);
+ }
+
+ GetOp<InstLdc>(ldcLocation, out var opLdc);
+
+ if (opLdc.CbufSlot != 1 || opLdc.AddressMode != 0)
+ {
+ return (0, 0);
+ }
+
+ var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.SrcA);
+ if (shlLocation.Block == null || !shlLocation.IsImmInst(InstName.Shl))
+ {
+ return (0, 0);
+ }
+
+ GetOp<InstShlI>(shlLocation, out var opShl);
+
+ if (opShl.Imm20 != 2)
+ {
+ return (0, 0);
+ }
+
+ var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.SrcA);
+ if (imnmxLocation.Block == null || !imnmxLocation.IsImmInst(InstName.Imnmx))
+ {
+ return (0, 0);
+ }
+
+ GetOp<InstImnmxI>(imnmxLocation, out var opImnmx);
+
+ if (opImnmx.Signed || opImnmx.SrcPred != RegisterConsts.PredicateTrueIndex || opImnmx.SrcPredInv)
+ {
+ return (0, 0);
+ }
+
+ return (opLdc.CbufOffset, opImnmx.Imm20 + 1);
+ }
+
+ private static void GetOp<T>(BlockLocation location, out T op) where T : unmanaged
+ {
+ ulong rawOp = location.Block.OpCodes[location.Index].RawOpCode;
+ op = Unsafe.As<ulong, T>(ref rawOp);
+ }
+
+ private readonly struct BlockLocation
+ {
+ public Block Block { get; }
+ public int Index { get; }
+
+ public BlockLocation(Block block, int index)
+ {
+ Block = block;
+ Index = index;
+ }
+
+ public bool IsImmInst(InstName name)
+ {
+ InstOp op = Block.OpCodes[Index];
+ return op.Name == name && op.Props.HasFlag(InstProps.Ib);
+ }
+ }
+
+ private static BlockLocation FindFirstRegWrite(HashSet<Block> visited, BlockLocation location, int regIndex)
+ {
+ Queue<BlockLocation> toVisit = new Queue<BlockLocation>();
+ toVisit.Enqueue(location);
+ visited.Add(location.Block);
+
+ while (toVisit.TryDequeue(out var currentLocation))
+ {
+ Block block = currentLocation.Block;
+ for (int i = currentLocation.Index - 1; i >= 0; i--)
+ {
+ if (WritesToRegister(block.OpCodes[i], regIndex))
+ {
+ return new BlockLocation(block, i);
+ }
+ }
+
+ foreach (Block predecessor in block.Predecessors)
+ {
+ if (visited.Add(predecessor))
+ {
+ toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
+ }
+ }
+ }
+
+ return new BlockLocation(null, 0);
+ }
+
+ private static bool WritesToRegister(InstOp op, int regIndex)
+ {
+ // Predicate instruction only ever writes to predicate, so we shouldn't check those.
+ if ((op.Props & (InstProps.Rd | InstProps.Rd2)) == 0)
+ {
+ return false;
+ }
+
+ if (op.Props.HasFlag(InstProps.Rd2) && (byte)(op.RawOpCode >> 28) == regIndex)
+ {
+ return true;
+ }
+
+ return (byte)op.RawOpCode == regIndex;
+ }
+
+ private enum MergeType
+ {
+ Brk,
+ Cont,
+ Sync
+ }
+
+ private struct PathBlockState
+ {
+ public Block Block { get; }
+
+ private enum RestoreType
+ {
+ None,
+ PopPushOp,
+ PushBranchOp
+ }
+
+ private RestoreType _restoreType;
+
+ private ulong _restoreValue;
+ private MergeType _restoreMergeType;
+
+ public bool ReturningFromVisit => _restoreType != RestoreType.None;
+
+ public PathBlockState(Block block)
+ {
+ Block = block;
+ _restoreType = RestoreType.None;
+ _restoreValue = 0;
+ _restoreMergeType = default;
+ }
+
+ public PathBlockState(int oldStackSize)
+ {
+ Block = null;
+ _restoreType = RestoreType.PopPushOp;
+ _restoreValue = (ulong)oldStackSize;
+ _restoreMergeType = default;
+ }
+
+ public PathBlockState(ulong syncAddress, MergeType mergeType)
+ {
+ Block = null;
+ _restoreType = RestoreType.PushBranchOp;
+ _restoreValue = syncAddress;
+ _restoreMergeType = mergeType;
+ }
+
+ public void RestoreStackState(Stack<(ulong, MergeType)> branchStack)
+ {
+ if (_restoreType == RestoreType.PushBranchOp)
+ {
+ branchStack.Push((_restoreValue, _restoreMergeType));
+ }
+ else if (_restoreType == RestoreType.PopPushOp)
+ {
+ while (branchStack.Count > (uint)_restoreValue)
+ {
+ branchStack.Pop();
+ }
+ }
+ }
+ }
+
+ private static void PropagatePushOp(Dictionary<ulong, Block> blocks, Block currBlock, int pushOpIndex)
+ {
+ PushOpInfo pushOpInfo = currBlock.PushOpCodes[pushOpIndex];
+ InstOp pushOp = pushOpInfo.Op;
+
+ Block target = blocks[pushOp.GetAbsoluteAddress()];
+
+ Stack<PathBlockState> workQueue = new Stack<PathBlockState>();
+ HashSet<Block> visited = new HashSet<Block>();
+ Stack<(ulong, MergeType)> branchStack = new Stack<(ulong, MergeType)>();
+
+ void Push(PathBlockState pbs)
+ {
+ // When block is null, this means we are pushing a restore operation.
+ // Restore operations are used to undo the work done inside a block
+ // when we return from it, for example it pops addresses pushed by
+ // SSY/PBK instructions inside the block, and pushes addresses poped
+ // by SYNC/BRK.
+ // For blocks, if it's already visited, we just ignore to avoid going
+ // around in circles and getting stuck here.
+ if (pbs.Block == null || !visited.Contains(pbs.Block))
+ {
+ workQueue.Push(pbs);
+ }
+ }
+
+ Push(new PathBlockState(currBlock));
+
+ while (workQueue.TryPop(out PathBlockState pbs))
+ {
+ if (pbs.ReturningFromVisit)
+ {
+ pbs.RestoreStackState(branchStack);
+
+ continue;
+ }
+
+ Block current = pbs.Block;
+
+ // If the block was already processed, we just ignore it, otherwise
+ // we would push the same child blocks of an already processed block,
+ // and go around in circles until memory is exhausted.
+ if (!visited.Add(current))
+ {
+ continue;
+ }
+
+ int pushOpsCount = current.PushOpCodes.Count;
+ if (pushOpsCount != 0)
+ {
+ Push(new PathBlockState(branchStack.Count));
+
+ for (int index = pushOpIndex; index < pushOpsCount; index++)
+ {
+ InstOp currentPushOp = current.PushOpCodes[index].Op;
+ MergeType pushMergeType = GetMergeTypeFromPush(currentPushOp.Name);
+ branchStack.Push((currentPushOp.GetAbsoluteAddress(), pushMergeType));
+ }
+ }
+
+ pushOpIndex = 0;
+
+ bool hasNext = current.HasNext();
+ if (hasNext)
+ {
+ Push(new PathBlockState(current.Successors[0]));
+ }
+
+ InstOp lastOp = current.GetLastOp();
+ if (IsPopBranch(lastOp.Name))
+ {
+ MergeType popMergeType = GetMergeTypeFromPop(lastOp.Name);
+
+ bool found = true;
+ ulong targetAddress = 0UL;
+ MergeType mergeType;
+
+ do
+ {
+ if (branchStack.Count == 0)
+ {
+ found = false;
+ break;
+ }
+
+ (targetAddress, mergeType) = branchStack.Pop();
+
+ // Push the target address (this will be used to push the address
+ // back into the PBK/PCNT/SSY stack when we return from that block),
+ Push(new PathBlockState(targetAddress, mergeType));
+ }
+ while (mergeType != popMergeType);
+
+ // Make sure we found the correct address,
+ // the push and pop instruction types must match, so:
+ // - BRK can only consume addresses pushed by PBK.
+ // - CONT can only consume addresses pushed by PCNT.
+ // - SYNC can only consume addresses pushed by SSY.
+ if (found)
+ {
+ if (branchStack.Count == 0)
+ {
+ // If the entire stack was consumed, then the current pop instruction
+ // just consumed the address from our push instruction.
+ if (current.SyncTargets.TryAdd(pushOp.Address, new SyncTarget(pushOpInfo, current.SyncTargets.Count)))
+ {
+ pushOpInfo.Consumers.Add(current, Local());
+ target.Predecessors.Add(current);
+ current.Successors.Add(target);
+ }
+ }
+ else
+ {
+ // Push the block itself into the work queue for processing.
+ Push(new PathBlockState(blocks[targetAddress]));
+ }
+ }
+ }
+ else
+ {
+ // By adding them in descending order (sorted by address), we process the blocks
+ // in order (of ascending address), since we work with a LIFO.
+ foreach (Block possibleTarget in current.Successors.OrderByDescending(x => x.Address))
+ {
+ if (!hasNext || possibleTarget != current.Successors[0])
+ {
+ Push(new PathBlockState(possibleTarget));
+ }
+ }
+ }
+ }
+ }
+
+ public static bool IsPopBranch(InstName name)
+ {
+ return name == InstName.Brk || name == InstName.Cont || name == InstName.Sync;
+ }
+
+ private static MergeType GetMergeTypeFromPush(InstName name)
+ {
+ return name switch
+ {
+ InstName.Pbk => MergeType.Brk,
+ InstName.Pcnt => MergeType.Cont,
+ _ => MergeType.Sync
+ };
+ }
+
+ private static MergeType GetMergeTypeFromPop(InstName name)
+ {
+ return name switch
+ {
+ InstName.Brk => MergeType.Brk,
+ InstName.Cont => MergeType.Cont,
+ _ => MergeType.Sync
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/FunctionType.cs b/src/Ryujinx.Graphics.Shader/Decoders/FunctionType.cs
new file mode 100644
index 00000000..6ea6a82a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/FunctionType.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ enum FunctionType : byte
+ {
+ User,
+ Unused,
+ BuiltInFSIBegin,
+ BuiltInFSIEnd
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs
new file mode 100644
index 00000000..0c22ddc0
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/InstDecoders.cs
@@ -0,0 +1,5383 @@
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ enum AlSize
+ {
+ _32 = 0,
+ _64 = 1,
+ _96 = 2,
+ _128 = 3,
+ }
+
+ enum AtomSize
+ {
+ U32 = 0,
+ S32 = 1,
+ U64 = 2,
+ F32FtzRn = 3,
+ F16x2FtzRn = 4,
+ S64 = 5,
+ }
+
+ enum AtomOp
+ {
+ Add = 0,
+ Min = 1,
+ Max = 2,
+ Inc = 3,
+ Dec = 4,
+ And = 5,
+ Or = 6,
+ Xor = 7,
+ Exch = 8,
+ Safeadd = 10,
+ }
+
+ enum AtomsSize
+ {
+ U32 = 0,
+ S32 = 1,
+ U64 = 2,
+ S64 = 3,
+ }
+
+ enum BarMode
+ {
+ Bar = 0,
+ Result = 1,
+ Warp = 2,
+ }
+
+ enum BarOp
+ {
+ Sync = 0,
+ Arv = 1,
+ Red = 2,
+ Scan = 3,
+ SyncAll = 4,
+ }
+
+ enum BarRedOp
+ {
+ Popc = 0,
+ And = 1,
+ Or = 2,
+ }
+
+ enum Bpt
+ {
+ DrainIllegal = 0,
+ Cal = 1,
+ Pause = 2,
+ Trap = 3,
+ Int = 4,
+ Drain = 5,
+ }
+
+ enum Ccc
+ {
+ F = 0,
+ Lt = 1,
+ Eq = 2,
+ Le = 3,
+ Gt = 4,
+ Ne = 5,
+ Ge = 6,
+ Num = 7,
+ Nan = 8,
+ Ltu = 9,
+ Equ = 10,
+ Leu = 11,
+ Gtu = 12,
+ Neu = 13,
+ Geu = 14,
+ T = 15,
+ Off = 16,
+ Lo = 17,
+ Sff = 18,
+ Ls = 19,
+ Hi = 20,
+ Sft = 21,
+ Hs = 22,
+ Oft = 23,
+ CsmTa = 24,
+ CsmTr = 25,
+ CsmMx = 26,
+ FcsmTa = 27,
+ FcsmTr = 28,
+ FcsmMx = 29,
+ Rle = 30,
+ Rgt = 31,
+ }
+
+ enum CacheType
+ {
+ U = 1,
+ C = 2,
+ I = 3,
+ Crs = 4,
+ }
+
+ enum CctlOp
+ {
+ Pf1 = 1,
+ Pf1_5 = 2,
+ Pf2 = 3,
+ Wb = 4,
+ Iv = 5,
+ Ivall = 6,
+ Rs = 7,
+ Rslb = 9,
+ }
+
+ enum CctltOp
+ {
+ Ivth = 1,
+ }
+
+ enum BoolOp
+ {
+ And = 0,
+ Or = 1,
+ Xor = 2,
+ }
+
+ enum SReg
+ {
+ LaneId = 0,
+ Clock = 1,
+ VirtCfg = 2,
+ VirtId = 3,
+ Pm0 = 4,
+ Pm1 = 5,
+ Pm2 = 6,
+ Pm3 = 7,
+ Pm4 = 8,
+ Pm5 = 9,
+ Pm6 = 10,
+ Pm7 = 11,
+ OrderingTicket = 15,
+ PrimType = 16,
+ InvocationId = 17,
+ YDirection = 18,
+ ThreadKill = 19,
+ ShaderType = 20,
+ DirectCbeWriteAddressLow = 21,
+ DirectCbeWriteAddressHigh = 22,
+ DirectCbeWriteEnabled = 23,
+ MachineId0 = 24,
+ MachineId1 = 25,
+ MachineId2 = 26,
+ MachineId3 = 27,
+ Affinity = 28,
+ InvocationInfo = 29,
+ WScaleFactorXY = 30,
+ WScaleFactorZ = 31,
+ TId = 32,
+ TIdX = 33,
+ TIdY = 34,
+ TIdZ = 35,
+ CtaParam = 36,
+ CtaIdX = 37,
+ CtaIdY = 38,
+ CtaIdZ = 39,
+ Ntid = 40,
+ CirQueueIncrMinusOne = 41,
+ Nlatc = 42,
+ Swinlo = 48,
+ Swinsz = 49,
+ Smemsz = 50,
+ Smembanks = 51,
+ LWinLo = 52,
+ LWinSz = 53,
+ LMemLoSz = 54,
+ LMemHiOff = 55,
+ EqMask = 56,
+ LtMask = 57,
+ LeMask = 58,
+ GtMask = 59,
+ GeMask = 60,
+ RegAlloc = 61,
+ CtxAddr = 62,
+ GlobalErrorStatus = 64,
+ WarpErrorStatus = 66,
+ WarpErrorStatusClear = 67,
+ PmHi0 = 72,
+ PmHi1 = 73,
+ PmHi2 = 74,
+ PmHi3 = 75,
+ PmHi4 = 76,
+ PmHi5 = 77,
+ PmHi6 = 78,
+ PmHi7 = 79,
+ ClockLo = 80,
+ ClockHi = 81,
+ GlobalTimerLo = 82,
+ GlobalTimerHi = 83,
+ HwTaskId = 96,
+ CircularQueueEntryIndex = 97,
+ CircularQueueEntryAddressLow = 98,
+ CircularQueueEntryAddressHigh = 99,
+ }
+
+ enum RoundMode
+ {
+ Rn = 0,
+ Rm = 1,
+ Rp = 2,
+ Rz = 3,
+ }
+
+ enum FComp
+ {
+ F = 0,
+ Lt = 1,
+ Eq = 2,
+ Le = 3,
+ Gt = 4,
+ Ne = 5,
+ Ge = 6,
+ Num = 7,
+ Nan = 8,
+ Ltu = 9,
+ Equ = 10,
+ Leu = 11,
+ Gtu = 12,
+ Neu = 13,
+ Geu = 14,
+ T = 15,
+ }
+
+ enum IntegerRound
+ {
+ Pass = 1,
+ Round = 4,
+ Floor = 5,
+ Ceil = 6,
+ Trunc = 7,
+ }
+
+ enum IDstFmt
+ {
+ U16 = 1,
+ U32 = 2,
+ U64 = 3,
+ S16 = 5,
+ S32 = 6,
+ S64 = 7,
+ }
+
+ enum ISrcFmt
+ {
+ U8 = 0,
+ U16 = 1,
+ U32 = 2,
+ U64 = 3,
+ S8 = 4,
+ S16 = 5,
+ S32 = 6,
+ S64 = 7,
+ }
+
+ enum ISrcDstFmt
+ {
+ U8 = 0,
+ U16 = 1,
+ U32 = 2,
+ S8 = 4,
+ S16 = 5,
+ S32 = 6,
+ }
+
+ enum RoundMode2
+ {
+ Round = 0,
+ Floor = 1,
+ Ceil = 2,
+ Trunc = 3,
+ }
+
+ enum ChkModeF
+ {
+ Divide = 0,
+ }
+
+ enum Fmz
+ {
+ Ftz = 1,
+ Fmz = 2,
+ }
+
+ enum MultiplyScale
+ {
+ NoScale = 0,
+ D2 = 1,
+ D4 = 2,
+ D8 = 3,
+ M8 = 4,
+ M4 = 5,
+ M2 = 6,
+ }
+
+ enum OFmt
+ {
+ F16 = 0,
+ F32 = 1,
+ MrgH0 = 2,
+ MrgH1 = 3,
+ }
+
+ enum HalfSwizzle
+ {
+ F16 = 0,
+ F32 = 1,
+ H0H0 = 2,
+ H1H1 = 3,
+ }
+
+ enum ByteSel
+ {
+ B0 = 0,
+ B1 = 1,
+ B2 = 2,
+ B3 = 3,
+ }
+
+ enum DstFmt
+ {
+ F16 = 1,
+ F32 = 2,
+ F64 = 3,
+ }
+
+ enum AvgMode
+ {
+ NoNeg = 0,
+ NegB = 1,
+ NegA = 2,
+ PlusOne = 3,
+ }
+
+ enum Lrs
+ {
+ None = 0,
+ RightShift = 1,
+ LeftShift = 2,
+ }
+
+ enum HalfSelect
+ {
+ B32 = 0,
+ H0 = 1,
+ H1 = 2,
+ }
+
+ enum IComp
+ {
+ F = 0,
+ Lt = 1,
+ Eq = 2,
+ Le = 3,
+ Gt = 4,
+ Ne = 5,
+ Ge = 6,
+ T = 7,
+ }
+
+ enum XMode
+ {
+ Xlo = 1,
+ Xmed = 2,
+ Xhi = 3,
+ }
+
+ enum IpaOp
+ {
+ Pass = 0,
+ Multiply = 1,
+ Constant = 2,
+ Sc = 3,
+ }
+
+ enum IBase
+ {
+ Patch = 1,
+ Prim = 2,
+ Attr = 3,
+ }
+
+ enum CacheOpLd
+ {
+ Ca = 0,
+ Cg = 1,
+ Ci = 2,
+ Cv = 3,
+ }
+
+ enum CacheOpSt
+ {
+ Wb = 0,
+ Cg = 1,
+ Ci = 2,
+ Wt = 3,
+ }
+
+ enum LsSize
+ {
+ U8 = 0,
+ S8 = 1,
+ U16 = 2,
+ S16 = 3,
+ B32 = 4,
+ B64 = 5,
+ B128 = 6,
+ UB128 = 7,
+ }
+
+ enum LsSize2
+ {
+ U8 = 0,
+ S8 = 1,
+ U16 = 2,
+ S16 = 3,
+ B32 = 4,
+ B64 = 5,
+ B128 = 6,
+ }
+
+ enum AddressMode
+ {
+ Il = 1,
+ Is = 2,
+ Isl = 3,
+ }
+
+ enum CacheOp2
+ {
+ Lu = 1,
+ Ci = 2,
+ Cv = 3,
+ }
+
+ enum PredicateOp
+ {
+ F = 0,
+ T = 1,
+ Z = 2,
+ Nz = 3,
+ }
+
+ enum LogicOp
+ {
+ And = 0,
+ Or = 1,
+ Xor = 2,
+ PassB = 3,
+ }
+
+ enum Membar
+ {
+ Cta = 0,
+ Gl = 1,
+ Sys = 2,
+ Vc = 3,
+ }
+
+ enum Ivall
+ {
+ Ivalld = 1,
+ Ivallt = 2,
+ Ivalltd = 3,
+ }
+
+ enum MufuOp
+ {
+ Cos = 0,
+ Sin = 1,
+ Ex2 = 2,
+ Lg2 = 3,
+ Rcp = 4,
+ Rsq = 5,
+ Rcp64h = 6,
+ Rsq64h = 7,
+ Sqrt = 8,
+ }
+
+ enum OutType
+ {
+ Emit = 1,
+ Cut = 2,
+ EmitThenCut = 3,
+ }
+
+ enum PixMode
+ {
+ Covmask = 1,
+ Covered = 2,
+ Offset = 3,
+ CentroidOffset = 4,
+ MyIndex = 5,
+ }
+
+ enum PMode
+ {
+ F4e = 1,
+ B4e = 2,
+ Rc8 = 3,
+ Ecl = 4,
+ Ecr = 5,
+ Rc16 = 6,
+ }
+
+ enum RedOp
+ {
+ Add = 0,
+ Min = 1,
+ Max = 2,
+ Inc = 3,
+ Dec = 4,
+ And = 5,
+ Or = 6,
+ Xor = 7,
+ }
+
+ enum XModeShf
+ {
+ Hi = 1,
+ X = 2,
+ Xhi = 3,
+ }
+
+ enum MaxShift
+ {
+ U64 = 2,
+ S64 = 3,
+ }
+
+ enum ShflMode
+ {
+ Idx = 0,
+ Up = 1,
+ Down = 2,
+ Bfly = 3,
+ }
+
+ enum Clamp
+ {
+ Ign = 0,
+ Trap = 2,
+ }
+
+ enum SuatomSize
+ {
+ U32 = 0,
+ S32 = 1,
+ U64 = 2,
+ F32FtzRn = 3,
+ F16x2FtzRn = 4,
+ S64 = 5,
+ Sd32 = 6,
+ Sd64 = 7,
+ }
+
+ enum SuDim
+ {
+ _1d = 0,
+ _1dBuffer = 1,
+ _1dArray = 2,
+ _2d = 3,
+ _2dArray = 4,
+ _3d = 5,
+ }
+
+ enum SuatomOp
+ {
+ Add = 0,
+ Min = 1,
+ Max = 2,
+ Inc = 3,
+ Dec = 4,
+ And = 5,
+ Or = 6,
+ Xor = 7,
+ Exch = 8,
+ }
+
+ enum SuSize
+ {
+ U8 = 0,
+ S8 = 1,
+ U16 = 2,
+ S16 = 3,
+ B32 = 4,
+ B64 = 5,
+ B128 = 6,
+ UB128 = 7,
+ }
+
+ enum SuRgba
+ {
+ R = 1,
+ G = 2,
+ Rg = 3,
+ B = 4,
+ Rb = 5,
+ Gb = 6,
+ Rgb = 7,
+ A = 8,
+ Ra = 9,
+ Ga = 10,
+ Rga = 11,
+ Ba = 12,
+ Rba = 13,
+ Gba = 14,
+ Rgba = 15,
+ }
+
+ enum Lod
+ {
+ Lz = 1,
+ Lb = 2,
+ Ll = 3,
+ Lba = 6,
+ Lla = 7,
+ }
+
+ enum TexDim
+ {
+ _1d = 0,
+ Array1d = 1,
+ _2d = 2,
+ Array2d = 3,
+ _3d = 4,
+ Array3d = 5,
+ Cube = 6,
+ ArrayCube = 7,
+ }
+
+ enum TexsTarget
+ {
+ Texture1DLodZero = 0,
+ Texture2D = 1,
+ Texture2DLodZero = 2,
+ Texture2DLodLevel = 3,
+ Texture2DDepthCompare = 4,
+ Texture2DLodLevelDepthCompare = 5,
+ Texture2DLodZeroDepthCompare = 6,
+ Texture2DArray = 7,
+ Texture2DArrayLodZero = 8,
+ Texture2DArrayLodZeroDepthCompare = 9,
+ Texture3D = 10,
+ Texture3DLodZero = 11,
+ TextureCube = 12,
+ TextureCubeLodLevel = 13,
+ }
+
+ enum TldsTarget
+ {
+ Texture1DLodZero = 0x0,
+ Texture1DLodLevel = 0x1,
+ Texture2DLodZero = 0x2,
+ Texture2DLodZeroOffset = 0x4,
+ Texture2DLodLevel = 0x5,
+ Texture2DLodZeroMultisample = 0x6,
+ Texture3DLodZero = 0x7,
+ Texture2DArrayLodZero = 0x8,
+ Texture2DLodLevelOffset = 0xc
+ }
+
+ enum TexComp
+ {
+ R = 0,
+ G = 1,
+ B = 2,
+ A = 3,
+ }
+
+ enum TexOffset
+ {
+ None = 0,
+ Aoffi = 1,
+ Ptp = 2,
+ }
+
+ enum TexQuery
+ {
+ TexHeaderDimension = 1,
+ TexHeaderTextureType = 2,
+ TexHeaderSamplerPos = 5,
+ TexSamplerFilter = 16,
+ TexSamplerLod = 18,
+ TexSamplerWrap = 20,
+ TexSamplerBorderColor = 22,
+ }
+
+ enum VectorSelect
+ {
+ U8B0 = 0,
+ U8B1 = 1,
+ U8B2 = 2,
+ U8B3 = 3,
+ U16H0 = 4,
+ U16H1 = 5,
+ U32 = 6,
+ S8B0 = 8,
+ S8B1 = 9,
+ S8B2 = 10,
+ S8B3 = 11,
+ S16H0 = 12,
+ S16H1 = 13,
+ S32 = 14,
+ }
+
+ enum VideoOp
+ {
+ Mrg16h = 0,
+ Mrg16l = 1,
+ Mrg8b0 = 2,
+ Mrg8b2 = 3,
+ Acc = 4,
+ Min = 5,
+ Max = 6,
+ }
+
+ enum VideoRed
+ {
+ Acc = 1,
+ }
+
+ enum LaneMask4
+ {
+ Z = 1,
+ W = 2,
+ Zw = 3,
+ X = 4,
+ Xz = 5,
+ Xw = 6,
+ Xzw = 7,
+ Y = 8,
+ Yz = 9,
+ Yw = 10,
+ Yzw = 11,
+ Xy = 12,
+ Xyz = 13,
+ Xyw = 14,
+ Xyzw = 15,
+ }
+
+ enum ASelect4
+ {
+ _0000 = 0,
+ _1111 = 1,
+ _2222 = 2,
+ _3333 = 3,
+ _3210 = 4,
+ _5432 = 6,
+ _6543 = 7,
+ _3201 = 8,
+ _3012 = 9,
+ _0213 = 10,
+ _3120 = 11,
+ _1230 = 12,
+ _2310 = 13,
+ }
+
+ enum BSelect4
+ {
+ _4444 = 0,
+ _5555 = 1,
+ _6666 = 2,
+ _7777 = 3,
+ _7654 = 4,
+ _5432 = 6,
+ _4321 = 7,
+ _4567 = 8,
+ _6745 = 9,
+ _5476 = 10,
+ }
+
+ enum VideoScale
+ {
+ Shr7 = 1,
+ Shr15 = 2,
+ }
+
+ enum VoteMode
+ {
+ All = 0,
+ Any = 1,
+ Eq = 2,
+ }
+
+ enum XmadCop
+ {
+ Cfull = 0,
+ Clo = 1,
+ Chi = 2,
+ Csfu = 3,
+ Cbcc = 4,
+ }
+
+ enum XmadCop2
+ {
+ Cfull = 0,
+ Clo = 1,
+ Chi = 2,
+ Csfu = 3,
+ }
+
+ enum ImadspASelect
+ {
+ U32 = 0,
+ S32 = 1,
+ U24 = 2,
+ S24 = 3,
+ U16h0 = 4,
+ S16h0 = 5,
+ U16h1 = 6,
+ S16h1 = 7,
+ }
+
+ enum ImadspBSelect
+ {
+ U24 = 0,
+ S24 = 1,
+ U16h0 = 2,
+ S16h0 = 3,
+ }
+
+ struct InstConditional
+ {
+ private ulong _opcode;
+ public InstConditional(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ }
+
+ struct InstAl2p
+ {
+ private ulong _opcode;
+ public InstAl2p(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public AlSize AlSize => (AlSize)((_opcode >> 47) & 0x3);
+ public bool Aio => (_opcode & 0x100000000) != 0;
+ public int Imm11 => (int)((_opcode >> 20) & 0x7FF);
+ public int DestPred => (int)((_opcode >> 44) & 0x7);
+ }
+
+ struct InstAld
+ {
+ private ulong _opcode;
+ public InstAld(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm11 => (int)((_opcode >> 20) & 0x7FF);
+ public bool P => (_opcode & 0x80000000) != 0;
+ public bool O => (_opcode & 0x100000000) != 0;
+ public AlSize AlSize => (AlSize)((_opcode >> 47) & 0x3);
+ public bool Phys => !P && Imm11 == 0 && SrcA != RegisterConsts.RegisterZeroIndex;
+ }
+
+ struct InstAst
+ {
+ private ulong _opcode;
+ public InstAst(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 0) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm11 => (int)((_opcode >> 20) & 0x7FF);
+ public bool P => (_opcode & 0x80000000) != 0;
+ public AlSize AlSize => (AlSize)((_opcode >> 47) & 0x3);
+ public bool Phys => !P && Imm11 == 0 && SrcA != RegisterConsts.RegisterZeroIndex;
+ }
+
+ struct InstAtom
+ {
+ private ulong _opcode;
+ public InstAtom(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm20 => (int)((_opcode >> 28) & 0xFFFFF);
+ public AtomSize Size => (AtomSize)((_opcode >> 49) & 0x7);
+ public AtomOp Op => (AtomOp)((_opcode >> 52) & 0xF);
+ public bool E => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstAtomCas
+ {
+ private ulong _opcode;
+ public InstAtomCas(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int BcRz => (int)((_opcode >> 50) & 0x3);
+ public bool E => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstAtoms
+ {
+ private ulong _opcode;
+ public InstAtoms(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm22 => (int)((_opcode >> 30) & 0x3FFFFF);
+ public AtomsSize AtomsSize => (AtomsSize)((_opcode >> 28) & 0x3);
+ public AtomOp AtomOp => (AtomOp)((_opcode >> 52) & 0xF);
+ }
+
+ struct InstAtomsCas
+ {
+ private ulong _opcode;
+ public InstAtomsCas(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int AtomsBcRz => (int)((_opcode >> 28) & 0x3);
+ }
+
+ struct InstB2r
+ {
+ private ulong _opcode;
+ public InstB2r(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int DestPred => (int)((_opcode >> 45) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public BarMode Mode => (BarMode)((_opcode >> 32) & 0x3);
+ }
+
+ struct InstBar
+ {
+ private ulong _opcode;
+ public InstBar(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm12 => (int)((_opcode >> 20) & 0xFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public BarOp BarOp => (BarOp)((_opcode >> 32) & 0x7);
+ public BarRedOp BarRedOp => (BarRedOp)((_opcode >> 35) & 0x3);
+ public bool AFixBar => (_opcode & 0x100000000000) != 0;
+ public bool BFixBar => (_opcode & 0x80000000000) != 0;
+ }
+
+ struct InstBfeR
+ {
+ private ulong _opcode;
+ public InstBfeR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public bool Brev => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstBfeI
+ {
+ private ulong _opcode;
+ public InstBfeI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public bool Brev => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstBfeC
+ {
+ private ulong _opcode;
+ public InstBfeC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public bool Brev => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstBfiR
+ {
+ private ulong _opcode;
+ public InstBfiR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstBfiI
+ {
+ private ulong _opcode;
+ public InstBfiI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstBfiC
+ {
+ private ulong _opcode;
+ public InstBfiC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstBfiRc
+ {
+ private ulong _opcode;
+ public InstBfiRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstBpt
+ {
+ private ulong _opcode;
+ public InstBpt(ulong opcode) => _opcode = opcode;
+ public int Imm20 => (int)((_opcode >> 20) & 0xFFFFF);
+ public Bpt Bpt => (Bpt)((_opcode >> 6) & 0x7);
+ }
+
+ struct InstBra
+ {
+ private ulong _opcode;
+ public InstBra(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ public bool Ca => (_opcode & 0x20) != 0;
+ public bool Lmt => (_opcode & 0x40) != 0;
+ public bool U => (_opcode & 0x80) != 0;
+ }
+
+ struct InstBrk
+ {
+ private ulong _opcode;
+ public InstBrk(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ }
+
+ struct InstBrx
+ {
+ private ulong _opcode;
+ public InstBrx(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ public bool Ca => (_opcode & 0x20) != 0;
+ public bool Lmt => (_opcode & 0x40) != 0;
+ }
+
+ struct InstCal
+ {
+ private ulong _opcode;
+ public InstCal(ulong opcode) => _opcode = opcode;
+ public bool Ca => (_opcode & 0x20) != 0;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ public bool Inc => (_opcode & 0x40) != 0;
+ }
+
+ struct InstCctl
+ {
+ private ulong _opcode;
+ public InstCctl(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm30 => (int)((_opcode >> 22) & 0x3FFFFFFF);
+ public bool E => (_opcode & 0x10000000000000) != 0;
+ public CacheType Cache => (CacheType)((_opcode >> 4) & 0x7);
+ public CctlOp CctlOp => (CctlOp)((_opcode >> 0) & 0xF);
+ }
+
+ struct InstCctll
+ {
+ private ulong _opcode;
+ public InstCctll(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm22 => (int)((_opcode >> 22) & 0x3FFFFF);
+ public int Cache => (int)((_opcode >> 4) & 0x3);
+ public CctlOp CctlOp => (CctlOp)((_opcode >> 0) & 0xF);
+ }
+
+ struct InstCctlt
+ {
+ private ulong _opcode;
+ public InstCctlt(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int TsIdx13 => (int)((_opcode >> 36) & 0x1FFF);
+ public CctltOp CctltOp => (CctltOp)((_opcode >> 0) & 0x3);
+ }
+
+ struct InstCctltR
+ {
+ private ulong _opcode;
+ public InstCctltR(ulong opcode) => _opcode = opcode;
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public CctltOp CctltOp => (CctltOp)((_opcode >> 0) & 0x3);
+ }
+
+ struct InstCont
+ {
+ private ulong _opcode;
+ public InstCont(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ }
+
+ struct InstCset
+ {
+ private ulong _opcode;
+ public InstCset(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 8) & 0x1F);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public bool BVal => (_opcode & 0x100000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ }
+
+ struct InstCsetp
+ {
+ private ulong _opcode;
+ public InstCsetp(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 8) & 0x1F);
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ }
+
+ struct InstCs2r
+ {
+ private ulong _opcode;
+ public InstCs2r(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public SReg SReg => (SReg)((_opcode >> 20) & 0xFF);
+ }
+
+ struct InstDaddR
+ {
+ private ulong _opcode;
+ public InstDaddR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstDaddI
+ {
+ private ulong _opcode;
+ public InstDaddI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstDaddC
+ {
+ private ulong _opcode;
+ public InstDaddC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstDepbar
+ {
+ private ulong _opcode;
+ public InstDepbar(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Le => (_opcode & 0x20000000) != 0;
+ public int Sbid => (int)((_opcode >> 26) & 0x7);
+ public int PendCnt => (int)((_opcode >> 20) & 0x3F);
+ public int Imm6 => (int)((_opcode >> 0) & 0x3F);
+ }
+
+ struct InstDfmaR
+ {
+ private ulong _opcode;
+ public InstDfmaR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 50) & 0x3);
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstDfmaI
+ {
+ private ulong _opcode;
+ public InstDfmaI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 50) & 0x3);
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstDfmaC
+ {
+ private ulong _opcode;
+ public InstDfmaC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 50) & 0x3);
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstDfmaRc
+ {
+ private ulong _opcode;
+ public InstDfmaRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 50) & 0x3);
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstDmnmxR
+ {
+ private ulong _opcode;
+ public InstDmnmxR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstDmnmxI
+ {
+ private ulong _opcode;
+ public InstDmnmxI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstDmnmxC
+ {
+ private ulong _opcode;
+ public InstDmnmxC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstDmulR
+ {
+ private ulong _opcode;
+ public InstDmulR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstDmulI
+ {
+ private ulong _opcode;
+ public InstDmulI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstDmulC
+ {
+ private ulong _opcode;
+ public InstDmulC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstDsetR
+ {
+ private ulong _opcode;
+ public InstDsetR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsA => (_opcode & 0x40000000000000) != 0;
+ public bool NegB => (_opcode & 0x20000000000000) != 0;
+ public bool BVal => (_opcode & 0x10000000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstDsetI
+ {
+ private ulong _opcode;
+ public InstDsetI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsA => (_opcode & 0x40000000000000) != 0;
+ public bool NegB => (_opcode & 0x20000000000000) != 0;
+ public bool BVal => (_opcode & 0x10000000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstDsetC
+ {
+ private ulong _opcode;
+ public InstDsetC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsA => (_opcode & 0x40000000000000) != 0;
+ public bool NegB => (_opcode & 0x20000000000000) != 0;
+ public bool BVal => (_opcode & 0x10000000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstDsetpR
+ {
+ private ulong _opcode;
+ public InstDsetpR(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool AbsA => (_opcode & 0x80) != 0;
+ public bool NegB => (_opcode & 0x40) != 0;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ }
+
+ struct InstDsetpI
+ {
+ private ulong _opcode;
+ public InstDsetpI(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool AbsA => (_opcode & 0x80) != 0;
+ public bool NegB => (_opcode & 0x40) != 0;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ }
+
+ struct InstDsetpC
+ {
+ private ulong _opcode;
+ public InstDsetpC(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool AbsA => (_opcode & 0x80) != 0;
+ public bool NegB => (_opcode & 0x40) != 0;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ }
+
+ struct InstExit
+ {
+ private ulong _opcode;
+ public InstExit(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ public bool KeepRefCnt => (_opcode & 0x20) != 0;
+ }
+
+ struct InstF2fR
+ {
+ private ulong _opcode;
+ public InstF2fR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3);
+ public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3);
+ public IntegerRound RoundMode => (IntegerRound)((int)((_opcode >> 40) & 0x4) | (int)((_opcode >> 39) & 0x3));
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstF2fI
+ {
+ private ulong _opcode;
+ public InstF2fI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3);
+ public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3);
+ public IntegerRound RoundMode => (IntegerRound)((int)((_opcode >> 40) & 0x4) | (int)((_opcode >> 39) & 0x3));
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstF2fC
+ {
+ private ulong _opcode;
+ public InstF2fC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3);
+ public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3);
+ public IntegerRound RoundMode => (IntegerRound)((int)((_opcode >> 40) & 0x4) | (int)((_opcode >> 39) & 0x3));
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstF2iR
+ {
+ private ulong _opcode;
+ public InstF2iR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public IDstFmt IDstFmt => (IDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3));
+ public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3);
+ public RoundMode2 RoundMode => (RoundMode2)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstF2iI
+ {
+ private ulong _opcode;
+ public InstF2iI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public IDstFmt IDstFmt => (IDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3));
+ public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3);
+ public RoundMode2 RoundMode => (RoundMode2)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstF2iC
+ {
+ private ulong _opcode;
+ public InstF2iC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public IDstFmt IDstFmt => (IDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3));
+ public DstFmt SrcFmt => (DstFmt)((_opcode >> 10) & 0x3);
+ public RoundMode2 RoundMode => (RoundMode2)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstFaddR
+ {
+ private ulong _opcode;
+ public InstFaddR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstFaddI
+ {
+ private ulong _opcode;
+ public InstFaddI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstFaddC
+ {
+ private ulong _opcode;
+ public InstFaddC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstFadd32i
+ {
+ private ulong _opcode;
+ public InstFadd32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x10000000000000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool AbsB => (_opcode & 0x200000000000000) != 0;
+ public bool NegA => (_opcode & 0x100000000000000) != 0;
+ public bool Ftz => (_opcode & 0x80000000000000) != 0;
+ public bool AbsA => (_opcode & 0x40000000000000) != 0;
+ public bool NegB => (_opcode & 0x20000000000000) != 0;
+ }
+
+ struct InstFchkR
+ {
+ private ulong _opcode;
+ public InstFchkR(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ChkModeF ChkModeF => (ChkModeF)((_opcode >> 39) & 0x3F);
+ }
+
+ struct InstFchkI
+ {
+ private ulong _opcode;
+ public InstFchkI(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ChkModeF ChkModeF => (ChkModeF)((_opcode >> 39) & 0x3F);
+ }
+
+ struct InstFchkC
+ {
+ private ulong _opcode;
+ public InstFchkC(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ChkModeF ChkModeF => (ChkModeF)((_opcode >> 39) & 0x3F);
+ }
+
+ struct InstFcmpR
+ {
+ private ulong _opcode;
+ public InstFcmpR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public bool Ftz => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstFcmpI
+ {
+ private ulong _opcode;
+ public InstFcmpI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public bool Ftz => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstFcmpC
+ {
+ private ulong _opcode;
+ public InstFcmpC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public bool Ftz => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstFcmpRc
+ {
+ private ulong _opcode;
+ public InstFcmpRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public bool Ftz => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstFfmaR
+ {
+ private ulong _opcode;
+ public InstFfmaR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 51) & 0x3);
+ public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3);
+ }
+
+ struct InstFfmaI
+ {
+ private ulong _opcode;
+ public InstFfmaI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 51) & 0x3);
+ public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3);
+ }
+
+ struct InstFfmaC
+ {
+ private ulong _opcode;
+ public InstFfmaC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 51) & 0x3);
+ public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3);
+ }
+
+ struct InstFfmaRc
+ {
+ private ulong _opcode;
+ public InstFfmaRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 51) & 0x3);
+ public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3);
+ }
+
+ struct InstFfma32i
+ {
+ private ulong _opcode;
+ public InstFfma32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm32 => (int)(_opcode >> 20);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegC => (_opcode & 0x200000000000000) != 0;
+ public bool NegA => (_opcode & 0x100000000000000) != 0;
+ public bool Sat => (_opcode & 0x80000000000000) != 0;
+ public bool WriteCC => (_opcode & 0x10000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3);
+ }
+
+ struct InstFloR
+ {
+ private ulong _opcode;
+ public InstFloR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstFloI
+ {
+ private ulong _opcode;
+ public InstFloI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstFloC
+ {
+ private ulong _opcode;
+ public InstFloC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public bool Sh => (_opcode & 0x20000000000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstFmnmxR
+ {
+ private ulong _opcode;
+ public InstFmnmxR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstFmnmxI
+ {
+ private ulong _opcode;
+ public InstFmnmxI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstFmnmxC
+ {
+ private ulong _opcode;
+ public InstFmnmxC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstFmulR
+ {
+ private ulong _opcode;
+ public InstFmulR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public Fmz Fmz => (Fmz)((_opcode >> 44) & 0x3);
+ public MultiplyScale Scale => (MultiplyScale)((_opcode >> 41) & 0x7);
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstFmulI
+ {
+ private ulong _opcode;
+ public InstFmulI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public Fmz Fmz => (Fmz)((_opcode >> 44) & 0x3);
+ public MultiplyScale Scale => (MultiplyScale)((_opcode >> 41) & 0x7);
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstFmulC
+ {
+ private ulong _opcode;
+ public InstFmulC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public Fmz Fmz => (Fmz)((_opcode >> 44) & 0x3);
+ public MultiplyScale Scale => (MultiplyScale)((_opcode >> 41) & 0x7);
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstFmul32i
+ {
+ private ulong _opcode;
+ public InstFmul32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool Sat => (_opcode & 0x80000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 53) & 0x3);
+ public bool WriteCC => (_opcode & 0x10000000000000) != 0;
+ }
+
+ struct InstFsetR
+ {
+ private ulong _opcode;
+ public InstFsetR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x20000000000000) != 0;
+ public bool AbsA => (_opcode & 0x40000000000000) != 0;
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x80000000000000) != 0;
+ public bool BVal => (_opcode & 0x10000000000000) != 0;
+ }
+
+ struct InstFsetC
+ {
+ private ulong _opcode;
+ public InstFsetC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x20000000000000) != 0;
+ public bool AbsA => (_opcode & 0x40000000000000) != 0;
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x80000000000000) != 0;
+ public bool BVal => (_opcode & 0x10000000000000) != 0;
+ }
+
+ struct InstFsetI
+ {
+ private ulong _opcode;
+ public InstFsetI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x20000000000000) != 0;
+ public bool AbsA => (_opcode & 0x40000000000000) != 0;
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x80000000000000) != 0;
+ public bool BVal => (_opcode & 0x10000000000000) != 0;
+ }
+
+ struct InstFsetpR
+ {
+ private ulong _opcode;
+ public InstFsetpR(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x40) != 0;
+ public bool AbsA => (_opcode & 0x80) != 0;
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstFsetpI
+ {
+ private ulong _opcode;
+ public InstFsetpI(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x40) != 0;
+ public bool AbsA => (_opcode & 0x80) != 0;
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstFsetpC
+ {
+ private ulong _opcode;
+ public InstFsetpC(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x40) != 0;
+ public bool AbsA => (_opcode & 0x80) != 0;
+ public bool AbsB => (_opcode & 0x100000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 48) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstFswzadd
+ {
+ private ulong _opcode;
+ public InstFswzadd(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Ftz => (_opcode & 0x100000000000) != 0;
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public bool Ndv => (_opcode & 0x4000000000) != 0;
+ public int PnWord => (int)((_opcode >> 28) & 0xFF);
+ }
+
+ struct InstGetcrsptr
+ {
+ private ulong _opcode;
+ public InstGetcrsptr(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ }
+
+ struct InstGetlmembase
+ {
+ private ulong _opcode;
+ public InstGetlmembase(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ }
+
+ struct InstHadd2R
+ {
+ private ulong _opcode;
+ public InstHadd2R(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x80000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool AbsB => (_opcode & 0x40000000) != 0;
+ public bool Sat => (_opcode & 0x100000000) != 0;
+ public bool Ftz => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstHadd2I
+ {
+ private ulong _opcode;
+ public InstHadd2I(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int BimmH0 => (int)((_opcode >> 20) & 0x3FF);
+ public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public bool Ftz => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstHadd2C
+ {
+ private ulong _opcode;
+ public InstHadd2C(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x100000000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool AbsB => (_opcode & 0x40000000000000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public bool Ftz => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstHadd232i
+ {
+ private ulong _opcode;
+ public InstHadd232i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm => (int)(_opcode >> 20);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x100000000000000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public bool Ftz => (_opcode & 0x80000000000000) != 0;
+ }
+
+ struct InstHfma2R
+ {
+ private ulong _opcode;
+ public InstHfma2R(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3);
+ public HalfSwizzle CSwizzle => (HalfSwizzle)((_opcode >> 35) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000) != 0;
+ public bool NegC => (_opcode & 0x40000000) != 0;
+ public bool Sat => (_opcode & 0x100000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 37) & 0x3);
+ }
+
+ struct InstHfma2I
+ {
+ private ulong _opcode;
+ public InstHfma2I(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int BimmH0 => (int)((_opcode >> 20) & 0x3FF);
+ public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public HalfSwizzle CSwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegC => (_opcode & 0x8000000000000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 57) & 0x3);
+ }
+
+ struct InstHfma2C
+ {
+ private ulong _opcode;
+ public InstHfma2C(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public HalfSwizzle CSwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x100000000000000) != 0;
+ public bool NegC => (_opcode & 0x8000000000000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 57) & 0x3);
+ }
+
+ struct InstHfma2Rc
+ {
+ private ulong _opcode;
+ public InstHfma2Rc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public HalfSwizzle CSwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x100000000000000) != 0;
+ public bool NegC => (_opcode & 0x8000000000000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 57) & 0x3);
+ }
+
+ struct InstHfma232i
+ {
+ private ulong _opcode;
+ public InstHfma232i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm => (int)(_opcode >> 20);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegC => (_opcode & 0x8000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 57) & 0x3);
+ }
+
+ struct InstHmul2R
+ {
+ private ulong _opcode;
+ public InstHmul2R(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool AbsB => (_opcode & 0x40000000) != 0;
+ public bool Sat => (_opcode & 0x100000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstHmul2I
+ {
+ private ulong _opcode;
+ public InstHmul2I(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int BimmH0 => (int)((_opcode >> 20) & 0x3FF);
+ public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstHmul2C
+ {
+ private ulong _opcode;
+ public InstHmul2C(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public OFmt OFmt => (OFmt)((_opcode >> 49) & 0x3);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool AbsB => (_opcode & 0x40000000000000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstHmul232i
+ {
+ private ulong _opcode;
+ public InstHmul232i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm32 => (int)(_opcode >> 20);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 53) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Sat => (_opcode & 0x10000000000000) != 0;
+ public Fmz Fmz => (Fmz)((_opcode >> 55) & 0x3);
+ }
+
+ struct InstHset2R
+ {
+ private ulong _opcode;
+ public InstHset2R(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool NegB => (_opcode & 0x80000000) != 0;
+ public bool AbsB => (_opcode & 0x40000000) != 0;
+ public bool Bval => (_opcode & 0x2000000000000) != 0;
+ public FComp Cmp => (FComp)((_opcode >> 35) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public bool Ftz => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstHset2I
+ {
+ private ulong _opcode;
+ public InstHset2I(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int BimmH0 => (int)((_opcode >> 20) & 0x3FF);
+ public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool Bval => (_opcode & 0x20000000000000) != 0;
+ public FComp Cmp => (FComp)((_opcode >> 49) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public bool Ftz => (_opcode & 0x40000000000000) != 0;
+ }
+
+ struct InstHset2C
+ {
+ private ulong _opcode;
+ public InstHset2C(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool NegB => (_opcode & 0x100000000000000) != 0;
+ public bool Bval => (_opcode & 0x20000000000000) != 0;
+ public FComp Cmp => (FComp)((_opcode >> 49) & 0xF);
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public bool Ftz => (_opcode & 0x40000000000000) != 0;
+ }
+
+ struct InstHsetp2R
+ {
+ private ulong _opcode;
+ public InstHsetp2R(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x80000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool AbsB => (_opcode & 0x40000000) != 0;
+ public FComp FComp2 => (FComp)((_opcode >> 35) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x40) != 0;
+ public bool HAnd => (_opcode & 0x2000000000000) != 0;
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ public HalfSwizzle BSwizzle => (HalfSwizzle)((_opcode >> 28) & 0x3);
+ }
+
+ struct InstHsetp2I
+ {
+ private ulong _opcode;
+ public InstHsetp2I(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int BimmH0 => (int)((_opcode >> 20) & 0x3FF);
+ public int BimmH1 => (int)((_opcode >> 47) & 0x200) | (int)((_opcode >> 30) & 0x1FF);
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 49) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x40) != 0;
+ public bool HAnd => (_opcode & 0x20000000000000) != 0;
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ }
+
+ struct InstHsetp2C
+ {
+ private ulong _opcode;
+ public InstHsetp2C(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegA => (_opcode & 0x80000000000) != 0;
+ public bool NegB => (_opcode & 0x100000000000000) != 0;
+ public bool AbsA => (_opcode & 0x100000000000) != 0;
+ public bool AbsB => (_opcode & 0x40000000000000) != 0;
+ public FComp FComp => (FComp)((_opcode >> 49) & 0xF);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool Ftz => (_opcode & 0x40) != 0;
+ public bool HAnd => (_opcode & 0x20000000000000) != 0;
+ public HalfSwizzle ASwizzle => (HalfSwizzle)((_opcode >> 47) & 0x3);
+ }
+
+ struct InstI2fR
+ {
+ private ulong _opcode;
+ public InstI2fR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public ISrcFmt ISrcFmt => (ISrcFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3));
+ public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3);
+ }
+
+ struct InstI2fI
+ {
+ private ulong _opcode;
+ public InstI2fI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public ISrcFmt ISrcFmt => (ISrcFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3));
+ public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3);
+ }
+
+ struct InstI2fC
+ {
+ private ulong _opcode;
+ public InstI2fC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public RoundMode RoundMode => (RoundMode)((_opcode >> 39) & 0x3);
+ public ISrcFmt ISrcFmt => (ISrcFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3));
+ public DstFmt DstFmt => (DstFmt)((_opcode >> 8) & 0x3);
+ }
+
+ struct InstI2iR
+ {
+ private ulong _opcode;
+ public InstI2iR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public ISrcDstFmt IDstFmt => (ISrcDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3));
+ public ISrcDstFmt ISrcFmt => (ISrcDstFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3));
+ }
+
+ struct InstI2iI
+ {
+ private ulong _opcode;
+ public InstI2iI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public ISrcDstFmt IDstFmt => (ISrcDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3));
+ public ISrcDstFmt ISrcFmt => (ISrcDstFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3));
+ }
+
+ struct InstI2iC
+ {
+ private ulong _opcode;
+ public InstI2iC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public ISrcDstFmt IDstFmt => (ISrcDstFmt)((int)((_opcode >> 10) & 0x4) | (int)((_opcode >> 8) & 0x3));
+ public ISrcDstFmt ISrcFmt => (ISrcDstFmt)((int)((_opcode >> 11) & 0x4) | (int)((_opcode >> 10) & 0x3));
+ }
+
+ struct InstIaddR
+ {
+ private ulong _opcode;
+ public InstIaddR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ }
+
+ struct InstIaddI
+ {
+ private ulong _opcode;
+ public InstIaddI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ }
+
+ struct InstIaddC
+ {
+ private ulong _opcode;
+ public InstIaddC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ }
+
+ struct InstIadd32i
+ {
+ private ulong _opcode;
+ public InstIadd32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 55) & 0x3);
+ public bool Sat => (_opcode & 0x40000000000000) != 0;
+ public bool WriteCC => (_opcode & 0x10000000000000) != 0;
+ public bool X => (_opcode & 0x20000000000000) != 0;
+ }
+
+ struct InstIadd3R
+ {
+ private ulong _opcode;
+ public InstIadd3R(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x8000000000000) != 0;
+ public bool NegB => (_opcode & 0x4000000000000) != 0;
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool X => (_opcode & 0x1000000000000) != 0;
+ public Lrs Lrs => (Lrs)((_opcode >> 37) & 0x3);
+ public HalfSelect Apart => (HalfSelect)((_opcode >> 35) & 0x3);
+ public HalfSelect Bpart => (HalfSelect)((_opcode >> 33) & 0x3);
+ public HalfSelect Cpart => (HalfSelect)((_opcode >> 31) & 0x3);
+ }
+
+ struct InstIadd3I
+ {
+ private ulong _opcode;
+ public InstIadd3I(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x8000000000000) != 0;
+ public bool NegB => (_opcode & 0x4000000000000) != 0;
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool X => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstIadd3C
+ {
+ private ulong _opcode;
+ public InstIadd3C(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool NegA => (_opcode & 0x8000000000000) != 0;
+ public bool NegB => (_opcode & 0x4000000000000) != 0;
+ public bool NegC => (_opcode & 0x2000000000000) != 0;
+ public bool X => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstIcmpR
+ {
+ private ulong _opcode;
+ public InstIcmpR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstIcmpI
+ {
+ private ulong _opcode;
+ public InstIcmpI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstIcmpC
+ {
+ private ulong _opcode;
+ public InstIcmpC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstIcmpRc
+ {
+ private ulong _opcode;
+ public InstIcmpRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstIde
+ {
+ private ulong _opcode;
+ public InstIde(ulong opcode) => _opcode = opcode;
+ public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
+ public bool Di => (_opcode & 0x20) != 0;
+ }
+
+ struct InstIdpR
+ {
+ private ulong _opcode;
+ public InstIdpR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool IsHi => (_opcode & 0x4000000000000) != 0;
+ public bool SrcASign => (_opcode & 0x2000000000000) != 0;
+ public bool IsDp => (_opcode & 0x1000000000000) != 0;
+ public bool SrcBSign => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstIdpC
+ {
+ private ulong _opcode;
+ public InstIdpC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool IsHi => (_opcode & 0x4000000000000) != 0;
+ public bool SrcASign => (_opcode & 0x2000000000000) != 0;
+ public bool IsDp => (_opcode & 0x1000000000000) != 0;
+ public bool SrcBSign => (_opcode & 0x800000000000) != 0;
+ }
+
+ struct InstImadR
+ {
+ private ulong _opcode;
+ public InstImadR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Hilo => (_opcode & 0x40000000000000) != 0;
+ public bool BSigned => (_opcode & 0x20000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 51) & 0x3);
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool X => (_opcode & 0x2000000000000) != 0;
+ public bool ASigned => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstImadI
+ {
+ private ulong _opcode;
+ public InstImadI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Hilo => (_opcode & 0x40000000000000) != 0;
+ public bool BSigned => (_opcode & 0x20000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 51) & 0x3);
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool X => (_opcode & 0x2000000000000) != 0;
+ public bool ASigned => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstImadC
+ {
+ private ulong _opcode;
+ public InstImadC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Hilo => (_opcode & 0x40000000000000) != 0;
+ public bool BSigned => (_opcode & 0x20000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 51) & 0x3);
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool X => (_opcode & 0x2000000000000) != 0;
+ public bool ASigned => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstImadRc
+ {
+ private ulong _opcode;
+ public InstImadRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Hilo => (_opcode & 0x40000000000000) != 0;
+ public bool BSigned => (_opcode & 0x20000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 51) & 0x3);
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool X => (_opcode & 0x2000000000000) != 0;
+ public bool ASigned => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstImad32i
+ {
+ private ulong _opcode;
+ public InstImad32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool BSigned => (_opcode & 0x200000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 55) & 0x3);
+ public bool ASigned => (_opcode & 0x40000000000000) != 0;
+ public bool WriteCC => (_opcode & 0x10000000000000) != 0;
+ public bool Hilo => (_opcode & 0x20000000000000) != 0;
+ }
+
+ struct InstImadspR
+ {
+ private ulong _opcode;
+ public InstImadspR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ImadspASelect ASelect => (ImadspASelect)((_opcode >> 48) & 0x7);
+ public ImadspBSelect BSelect => (ImadspBSelect)((_opcode >> 53) & 0x3);
+ public ImadspASelect CSelect => (ImadspASelect)((int)((_opcode >> 50) & 0x6) | (int)((_opcode >> 48) & 0x1));
+ }
+
+ struct InstImadspI
+ {
+ private ulong _opcode;
+ public InstImadspI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ImadspASelect ASelect => (ImadspASelect)((_opcode >> 48) & 0x7);
+ public ImadspBSelect BSelect => (ImadspBSelect)((_opcode >> 53) & 0x3);
+ public ImadspASelect CSelect => (ImadspASelect)((int)((_opcode >> 50) & 0x6) | (int)((_opcode >> 48) & 0x1));
+ }
+
+ struct InstImadspC
+ {
+ private ulong _opcode;
+ public InstImadspC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ImadspASelect ASelect => (ImadspASelect)((_opcode >> 48) & 0x7);
+ public ImadspBSelect BSelect => (ImadspBSelect)((_opcode >> 53) & 0x3);
+ public ImadspASelect CSelect => (ImadspASelect)((int)((_opcode >> 50) & 0x6) | (int)((_opcode >> 48) & 0x1));
+ }
+
+ struct InstImadspRc
+ {
+ private ulong _opcode;
+ public InstImadspRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ImadspASelect ASelect => (ImadspASelect)((_opcode >> 48) & 0x7);
+ public ImadspBSelect BSelect => (ImadspBSelect)((_opcode >> 53) & 0x3);
+ public ImadspASelect CSelect => (ImadspASelect)((int)((_opcode >> 50) & 0x6) | (int)((_opcode >> 48) & 0x1));
+ }
+
+ struct InstImnmxR
+ {
+ private ulong _opcode;
+ public InstImnmxR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public XMode XMode => (XMode)((_opcode >> 43) & 0x3);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstImnmxI
+ {
+ private ulong _opcode;
+ public InstImnmxI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public XMode XMode => (XMode)((_opcode >> 43) & 0x3);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstImnmxC
+ {
+ private ulong _opcode;
+ public InstImnmxC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public XMode XMode => (XMode)((_opcode >> 43) & 0x3);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstImulR
+ {
+ private ulong _opcode;
+ public InstImulR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool ASigned => (_opcode & 0x10000000000) != 0;
+ public bool BSigned => (_opcode & 0x20000000000) != 0;
+ public bool Hilo => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstImulI
+ {
+ private ulong _opcode;
+ public InstImulI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool ASigned => (_opcode & 0x10000000000) != 0;
+ public bool BSigned => (_opcode & 0x20000000000) != 0;
+ public bool Hilo => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstImulC
+ {
+ private ulong _opcode;
+ public InstImulC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool ASigned => (_opcode & 0x10000000000) != 0;
+ public bool BSigned => (_opcode & 0x20000000000) != 0;
+ public bool Hilo => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstImul32i
+ {
+ private ulong _opcode;
+ public InstImul32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool ASigned => (_opcode & 0x40000000000000) != 0;
+ public bool BSigned => (_opcode & 0x80000000000000) != 0;
+ public bool Hilo => (_opcode & 0x20000000000000) != 0;
+ public bool WriteCC => (_opcode & 0x10000000000000) != 0;
+ }
+
+ struct InstIpa
+ {
+ private ulong _opcode;
+ public InstIpa(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public IpaOp IpaOp => (IpaOp)((_opcode >> 54) & 0x3);
+ public int Msi => (int)((_opcode >> 52) & 0x3);
+ public bool Sat => (_opcode & 0x8000000000000) != 0;
+ public bool Idx => (_opcode & 0x4000000000) != 0;
+ public int Imm10 => (int)((_opcode >> 28) & 0x3FF);
+ public int SrcPred => (int)((_opcode >> 47) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstIsberd
+ {
+ private ulong _opcode;
+ public InstIsberd(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public AlSize AlSize => (AlSize)((_opcode >> 47) & 0x3);
+ public IBase IBase => (IBase)((_opcode >> 33) & 0x3);
+ public bool O => (_opcode & 0x100000000) != 0;
+ public bool P => (_opcode & 0x80000000) != 0;
+ }
+
+ struct InstIscaddR
+ {
+ private ulong _opcode;
+ public InstIscaddR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int Imm5 => (int)((_opcode >> 39) & 0x1F);
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3);
+ }
+
+ struct InstIscaddI
+ {
+ private ulong _opcode;
+ public InstIscaddI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int Imm5 => (int)((_opcode >> 39) & 0x1F);
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3);
+ }
+
+ struct InstIscaddC
+ {
+ private ulong _opcode;
+ public InstIscaddC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int Imm5 => (int)((_opcode >> 39) & 0x1F);
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 48) & 0x3);
+ }
+
+ struct InstIscadd32i
+ {
+ private ulong _opcode;
+ public InstIscadd32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool WriteCC => (_opcode & 0x10000000000000) != 0;
+ public int Imm5 => (int)((_opcode >> 53) & 0x1F);
+ }
+
+ struct InstIsetR
+ {
+ private ulong _opcode;
+ public InstIsetR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public bool BVal => (_opcode & 0x100000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ }
+
+ struct InstIsetI
+ {
+ private ulong _opcode;
+ public InstIsetI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public bool BVal => (_opcode & 0x100000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ }
+
+ struct InstIsetC
+ {
+ private ulong _opcode;
+ public InstIsetC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public bool BVal => (_opcode & 0x100000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ }
+
+ struct InstIsetpR
+ {
+ private ulong _opcode;
+ public InstIsetpR(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ }
+
+ struct InstIsetpI
+ {
+ private ulong _opcode;
+ public InstIsetpI(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ }
+
+ struct InstIsetpC
+ {
+ private ulong _opcode;
+ public InstIsetpC(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public IComp IComp => (IComp)((_opcode >> 49) & 0x7);
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public BoolOp Bop => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ }
+
+ struct InstJcal
+ {
+ private ulong _opcode;
+ public InstJcal(ulong opcode) => _opcode = opcode;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool Ca => (_opcode & 0x20) != 0;
+ public bool Inc => (_opcode & 0x40) != 0;
+ }
+
+ struct InstJmp
+ {
+ private ulong _opcode;
+ public InstJmp(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ public bool Ca => (_opcode & 0x20) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool Lmt => (_opcode & 0x40) != 0;
+ public bool U => (_opcode & 0x80) != 0;
+ }
+
+ struct InstJmx
+ {
+ private ulong _opcode;
+ public InstJmx(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ public bool Ca => (_opcode & 0x20) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool Lmt => (_opcode & 0x40) != 0;
+ }
+
+ struct InstKil
+ {
+ private ulong _opcode;
+ public InstKil(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ }
+
+ struct InstLd
+ {
+ private ulong _opcode;
+ public InstLd(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int SrcPred => (int)((_opcode >> 58) & 0x7);
+ public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 56) & 0x3);
+ public LsSize LsSize => (LsSize)((_opcode >> 53) & 0x7);
+ public bool E => (_opcode & 0x10000000000000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ }
+
+ struct InstLdc
+ {
+ private ulong _opcode;
+ public InstLdc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7);
+ public AddressMode AddressMode => (AddressMode)((_opcode >> 44) & 0x3);
+ public int CbufSlot => (int)((_opcode >> 36) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0xFFFF);
+ }
+
+ struct InstLdg
+ {
+ private ulong _opcode;
+ public InstLdg(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public LsSize LsSize => (LsSize)((_opcode >> 48) & 0x7);
+ public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 46) & 0x3);
+ public bool E => (_opcode & 0x200000000000) != 0;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ }
+
+ struct InstLdl
+ {
+ private ulong _opcode;
+ public InstLdl(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7);
+ public CacheOp2 CacheOp => (CacheOp2)((_opcode >> 44) & 0x3);
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ }
+
+ struct InstLds
+ {
+ private ulong _opcode;
+ public InstLds(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7);
+ public bool U => (_opcode & 0x100000000000) != 0;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ }
+
+ struct InstLeaR
+ {
+ private ulong _opcode;
+ public InstLeaR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x400000000000) != 0;
+ public bool NegA => (_opcode & 0x200000000000) != 0;
+ public int ImmU5 => (int)((_opcode >> 39) & 0x1F);
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ }
+
+ struct InstLeaI
+ {
+ private ulong _opcode;
+ public InstLeaI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x400000000000) != 0;
+ public bool NegA => (_opcode & 0x200000000000) != 0;
+ public int ImmU5 => (int)((_opcode >> 39) & 0x1F);
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ }
+
+ struct InstLeaC
+ {
+ private ulong _opcode;
+ public InstLeaC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x400000000000) != 0;
+ public bool NegA => (_opcode & 0x200000000000) != 0;
+ public int ImmU5 => (int)((_opcode >> 39) & 0x1F);
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ }
+
+ struct InstLeaHiR
+ {
+ private ulong _opcode;
+ public InstLeaHiR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x4000000000) != 0;
+ public bool NegA => (_opcode & 0x2000000000) != 0;
+ public int ImmU5 => (int)((_opcode >> 28) & 0x1F);
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ }
+
+ struct InstLeaHiC
+ {
+ private ulong _opcode;
+ public InstLeaHiC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x200000000000000) != 0;
+ public bool NegA => (_opcode & 0x100000000000000) != 0;
+ public int ImmU5 => (int)((_opcode >> 51) & 0x1F);
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ }
+
+ struct InstLepc
+ {
+ private ulong _opcode;
+ public InstLepc(ulong opcode) => _opcode = opcode;
+ }
+
+ struct InstLongjmp
+ {
+ private ulong _opcode;
+ public InstLongjmp(ulong opcode) => _opcode = opcode;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ }
+
+ struct InstLopR
+ {
+ private ulong _opcode;
+ public InstLopR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ public PredicateOp PredicateOp => (PredicateOp)((_opcode >> 44) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public LogicOp Lop => (LogicOp)((_opcode >> 41) & 0x3);
+ public bool NegA => (_opcode & 0x8000000000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstLopI
+ {
+ private ulong _opcode;
+ public InstLopI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ public PredicateOp PredicateOp => (PredicateOp)((_opcode >> 44) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public LogicOp LogicOp => (LogicOp)((_opcode >> 41) & 0x3);
+ public bool NegA => (_opcode & 0x8000000000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstLopC
+ {
+ private ulong _opcode;
+ public InstLopC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ public PredicateOp PredicateOp => (PredicateOp)((_opcode >> 44) & 0x3);
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public LogicOp LogicOp => (LogicOp)((_opcode >> 41) & 0x3);
+ public bool NegA => (_opcode & 0x8000000000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstLop3R
+ {
+ private ulong _opcode;
+ public InstLop3R(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ public PredicateOp PredicateOp => (PredicateOp)((_opcode >> 36) & 0x3);
+ public bool X => (_opcode & 0x4000000000) != 0;
+ public int Imm => (int)((_opcode >> 28) & 0xFF);
+ }
+
+ struct InstLop3I
+ {
+ private ulong _opcode;
+ public InstLop3I(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x200000000000000) != 0;
+ public int Imm => (int)((_opcode >> 48) & 0xFF);
+ }
+
+ struct InstLop3C
+ {
+ private ulong _opcode;
+ public InstLop3C(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x100000000000000) != 0;
+ public int Imm => (int)((_opcode >> 48) & 0xFF);
+ }
+
+ struct InstLop32i
+ {
+ private ulong _opcode;
+ public InstLop32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x10000000000000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ public bool X => (_opcode & 0x200000000000000) != 0;
+ public LogicOp LogicOp => (LogicOp)((_opcode >> 53) & 0x3);
+ public bool NegA => (_opcode & 0x80000000000000) != 0;
+ public bool NegB => (_opcode & 0x100000000000000) != 0;
+ }
+
+ struct InstMembar
+ {
+ private ulong _opcode;
+ public InstMembar(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Membar Membar => (Membar)((_opcode >> 8) & 0x3);
+ public Ivall Ivall => (Ivall)((_opcode >> 0) & 0x3);
+ }
+
+ struct InstMovR
+ {
+ private ulong _opcode;
+ public InstMovR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int QuadMask => (int)((_opcode >> 39) & 0xF);
+ }
+
+ struct InstMovI
+ {
+ private ulong _opcode;
+ public InstMovI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int QuadMask => (int)((_opcode >> 39) & 0xF);
+ }
+
+ struct InstMovC
+ {
+ private ulong _opcode;
+ public InstMovC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int QuadMask => (int)((_opcode >> 39) & 0xF);
+ }
+
+ struct InstMov32i
+ {
+ private ulong _opcode;
+ public InstMov32i(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm32 => (int)(_opcode >> 20);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int QuadMask => (int)((_opcode >> 12) & 0xF);
+ }
+
+ struct InstMufu
+ {
+ private ulong _opcode;
+ public InstMufu(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public MufuOp MufuOp => (MufuOp)((_opcode >> 20) & 0xF);
+ public bool AbsA => (_opcode & 0x400000000000) != 0;
+ public bool NegA => (_opcode & 0x1000000000000) != 0;
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstNop
+ {
+ private ulong _opcode;
+ public InstNop(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
+ public bool Trig => (_opcode & 0x2000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 8) & 0x1F);
+ }
+
+ struct InstOutR
+ {
+ private ulong _opcode;
+ public InstOutR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public OutType OutType => (OutType)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstOutI
+ {
+ private ulong _opcode;
+ public InstOutI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public OutType OutType => (OutType)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstOutC
+ {
+ private ulong _opcode;
+ public InstOutC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public OutType OutType => (OutType)((_opcode >> 39) & 0x3);
+ }
+
+ struct InstP2rR
+ {
+ private ulong _opcode;
+ public InstP2rR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public bool Ccpr => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstP2rI
+ {
+ private ulong _opcode;
+ public InstP2rI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public bool Ccpr => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstP2rC
+ {
+ private ulong _opcode;
+ public InstP2rC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public bool Ccpr => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstPbk
+ {
+ private ulong _opcode;
+ public InstPbk(ulong opcode) => _opcode = opcode;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ public bool Ca => (_opcode & 0x20) != 0;
+ }
+
+ struct InstPcnt
+ {
+ private ulong _opcode;
+ public InstPcnt(ulong opcode) => _opcode = opcode;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ public bool Ca => (_opcode & 0x20) != 0;
+ }
+
+ struct InstPexit
+ {
+ private ulong _opcode;
+ public InstPexit(ulong opcode) => _opcode = opcode;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ }
+
+ struct InstPixld
+ {
+ private ulong _opcode;
+ public InstPixld(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int DestPred => (int)((_opcode >> 45) & 0x7);
+ public PixMode PixMode => (PixMode)((_opcode >> 31) & 0x7);
+ public int Imm8 => (int)((_opcode >> 20) & 0xFF);
+ }
+
+ struct InstPlongjmp
+ {
+ private ulong _opcode;
+ public InstPlongjmp(ulong opcode) => _opcode = opcode;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ public bool Ca => (_opcode & 0x20) != 0;
+ }
+
+ struct InstPopcR
+ {
+ private ulong _opcode;
+ public InstPopcR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstPopcI
+ {
+ private ulong _opcode;
+ public InstPopcI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstPopcC
+ {
+ private ulong _opcode;
+ public InstPopcC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool NegB => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstPret
+ {
+ private ulong _opcode;
+ public InstPret(ulong opcode) => _opcode = opcode;
+ public bool Ca => (_opcode & 0x20) != 0;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ public bool Inc => (_opcode & 0x40) != 0;
+ }
+
+ struct InstPrmtR
+ {
+ private ulong _opcode;
+ public InstPrmtR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public PMode PMode => (PMode)((_opcode >> 48) & 0xF);
+ }
+
+ struct InstPrmtI
+ {
+ private ulong _opcode;
+ public InstPrmtI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public PMode PMode => (PMode)((_opcode >> 48) & 0xF);
+ }
+
+ struct InstPrmtC
+ {
+ private ulong _opcode;
+ public InstPrmtC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public PMode PMode => (PMode)((_opcode >> 48) & 0xF);
+ }
+
+ struct InstPrmtRc
+ {
+ private ulong _opcode;
+ public InstPrmtRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public PMode PMode => (PMode)((_opcode >> 48) & 0xF);
+ }
+
+ struct InstPset
+ {
+ private ulong _opcode;
+ public InstPset(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int Src2Pred => (int)((_opcode >> 12) & 0x7);
+ public bool Src2PredInv => (_opcode & 0x8000) != 0;
+ public int Src1Pred => (int)((_opcode >> 29) & 0x7);
+ public bool Src1PredInv => (_opcode & 0x100000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp BoolOpAB => (BoolOp)((_opcode >> 24) & 0x3);
+ public BoolOp BoolOpC => (BoolOp)((_opcode >> 45) & 0x3);
+ public bool BVal => (_opcode & 0x100000000000) != 0;
+ }
+
+ struct InstPsetp
+ {
+ private ulong _opcode;
+ public InstPsetp(ulong opcode) => _opcode = opcode;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public int Src2Pred => (int)((_opcode >> 12) & 0x7);
+ public bool Src2PredInv => (_opcode & 0x8000) != 0;
+ public int Src1Pred => (int)((_opcode >> 29) & 0x7);
+ public bool Src1PredInv => (_opcode & 0x100000000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public BoolOp BoolOpAB => (BoolOp)((_opcode >> 24) & 0x3);
+ public BoolOp BoolOpC => (BoolOp)((_opcode >> 45) & 0x3);
+ }
+
+ struct InstR2b
+ {
+ private ulong _opcode;
+ public InstR2b(ulong opcode) => _opcode = opcode;
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public BarMode Mode => (BarMode)((_opcode >> 32) & 0x3);
+ public int Name => (int)((_opcode >> 28) & 0xF);
+ }
+
+ struct InstR2pR
+ {
+ private ulong _opcode;
+ public InstR2pR(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public bool Ccpr => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstR2pI
+ {
+ private ulong _opcode;
+ public InstR2pI(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public bool Ccpr => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstR2pC
+ {
+ private ulong _opcode;
+ public InstR2pC(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ByteSel ByteSel => (ByteSel)((_opcode >> 41) & 0x3);
+ public bool Ccpr => (_opcode & 0x10000000000) != 0;
+ }
+
+ struct InstRam
+ {
+ private ulong _opcode;
+ public InstRam(ulong opcode) => _opcode = opcode;
+ }
+
+ struct InstRed
+ {
+ private ulong _opcode;
+ public InstRed(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 0) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm20 => (int)((_opcode >> 28) & 0xFFFFF);
+ public AtomSize RedSize => (AtomSize)((_opcode >> 20) & 0x7);
+ public RedOp RedOp => (RedOp)((_opcode >> 23) & 0x7);
+ public bool E => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstRet
+ {
+ private ulong _opcode;
+ public InstRet(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ }
+
+ struct InstRroR
+ {
+ private ulong _opcode;
+ public InstRroR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool RroOp => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstRroI
+ {
+ private ulong _opcode;
+ public InstRroI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool RroOp => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstRroC
+ {
+ private ulong _opcode;
+ public InstRroC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool AbsB => (_opcode & 0x2000000000000) != 0;
+ public bool NegB => (_opcode & 0x200000000000) != 0;
+ public bool RroOp => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstRtt
+ {
+ private ulong _opcode;
+ public InstRtt(ulong opcode) => _opcode = opcode;
+ }
+
+ struct InstS2r
+ {
+ private ulong _opcode;
+ public InstS2r(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public SReg SReg => (SReg)((_opcode >> 20) & 0xFF);
+ }
+
+ struct InstSam
+ {
+ private ulong _opcode;
+ public InstSam(ulong opcode) => _opcode = opcode;
+ }
+
+ struct InstSelR
+ {
+ private ulong _opcode;
+ public InstSelR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstSelI
+ {
+ private ulong _opcode;
+ public InstSelI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstSelC
+ {
+ private ulong _opcode;
+ public InstSelC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ }
+
+ struct InstSetcrsptr
+ {
+ private ulong _opcode;
+ public InstSetcrsptr(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ }
+
+ struct InstSetlmembase
+ {
+ private ulong _opcode;
+ public InstSetlmembase(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ }
+
+ struct InstShfLR
+ {
+ private ulong _opcode;
+ public InstShfLR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool M => (_opcode & 0x4000000000000) != 0;
+ public XModeShf XModeShf => (XModeShf)((_opcode >> 48) & 0x3);
+ public MaxShift MaxShift => (MaxShift)((_opcode >> 37) & 0x3);
+ }
+
+ struct InstShfRR
+ {
+ private ulong _opcode;
+ public InstShfRR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool M => (_opcode & 0x4000000000000) != 0;
+ public XModeShf XModeShf => (XModeShf)((_opcode >> 48) & 0x3);
+ public MaxShift MaxShift => (MaxShift)((_opcode >> 37) & 0x3);
+ }
+
+ struct InstShfLI
+ {
+ private ulong _opcode;
+ public InstShfLI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool M => (_opcode & 0x4000000000000) != 0;
+ public XModeShf XModeShf => (XModeShf)((_opcode >> 48) & 0x3);
+ public MaxShift MaxShift => (MaxShift)((_opcode >> 37) & 0x3);
+ public int Imm6 => (int)((_opcode >> 20) & 0x3F);
+ }
+
+ struct InstShfRI
+ {
+ private ulong _opcode;
+ public InstShfRI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool M => (_opcode & 0x4000000000000) != 0;
+ public XModeShf XModeShf => (XModeShf)((_opcode >> 48) & 0x3);
+ public MaxShift MaxShift => (MaxShift)((_opcode >> 37) & 0x3);
+ public int Imm6 => (int)((_opcode >> 20) & 0x3F);
+ }
+
+ struct InstShfl
+ {
+ private ulong _opcode;
+ public InstShfl(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int SrcBImm => (int)((_opcode >> 20) & 0x1F);
+ public int SrcCImm => (int)((_opcode >> 34) & 0x1FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public ShflMode ShflMode => (ShflMode)((_opcode >> 30) & 0x3);
+ public bool CFixShfl => (_opcode & 0x20000000) != 0;
+ public bool BFixShfl => (_opcode & 0x10000000) != 0;
+ public int DestPred => (int)((_opcode >> 48) & 0x7);
+ }
+
+ struct InstShlR
+ {
+ private ulong _opcode;
+ public InstShlR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public bool M => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstShlI
+ {
+ private ulong _opcode;
+ public InstShlI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public bool M => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstShlC
+ {
+ private ulong _opcode;
+ public InstShlC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x80000000000) != 0;
+ public bool M => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstShrR
+ {
+ private ulong _opcode;
+ public InstShrR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public XMode XMode => (XMode)((_opcode >> 43) & 0x3);
+ public bool Brev => (_opcode & 0x10000000000) != 0;
+ public bool M => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstShrI
+ {
+ private ulong _opcode;
+ public InstShrI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Imm20 => (int)((_opcode >> 37) & 0x80000) | (int)((_opcode >> 20) & 0x7FFFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public XMode XMode => (XMode)((_opcode >> 43) & 0x3);
+ public bool Brev => (_opcode & 0x10000000000) != 0;
+ public bool M => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstShrC
+ {
+ private ulong _opcode;
+ public InstShrC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Signed => (_opcode & 0x1000000000000) != 0;
+ public XMode XMode => (XMode)((_opcode >> 43) & 0x3);
+ public bool Brev => (_opcode & 0x10000000000) != 0;
+ public bool M => (_opcode & 0x8000000000) != 0;
+ }
+
+ struct InstSsy
+ {
+ private ulong _opcode;
+ public InstSsy(ulong opcode) => _opcode = opcode;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ public bool Ca => (_opcode & 0x20) != 0;
+ }
+
+ struct InstSt
+ {
+ private ulong _opcode;
+ public InstSt(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int SrcPred => (int)((_opcode >> 58) & 0x7);
+ public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 56) & 0x3);
+ public LsSize LsSize => (LsSize)((_opcode >> 53) & 0x7);
+ public bool E => (_opcode & 0x10000000000000) != 0;
+ public int Imm32 => (int)(_opcode >> 20);
+ }
+
+ struct InstStg
+ {
+ private ulong _opcode;
+ public InstStg(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7);
+ public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 46) & 0x3);
+ public bool E => (_opcode & 0x200000000000) != 0;
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ }
+
+ struct InstStl
+ {
+ private ulong _opcode;
+ public InstStl(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7);
+ public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 44) & 0x3);
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ }
+
+ struct InstStp
+ {
+ private ulong _opcode;
+ public InstStp(ulong opcode) => _opcode = opcode;
+ public bool Wait => (_opcode & 0x80000000) != 0;
+ public int Imm8 => (int)((_opcode >> 20) & 0xFF);
+ }
+
+ struct InstSts
+ {
+ private ulong _opcode;
+ public InstSts(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public LsSize2 LsSize => (LsSize2)((_opcode >> 48) & 0x7);
+ public int Imm24 => (int)((_opcode >> 20) & 0xFFFFFF);
+ }
+
+ struct InstSuatomB
+ {
+ private ulong _opcode;
+ public InstSuatomB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public SuatomSize Size => (SuatomSize)((_opcode >> 36) & 0x7);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public SuatomOp Op => (SuatomOp)((_opcode >> 29) & 0xF);
+ public bool Ba => (_opcode & 0x10000000) != 0;
+ }
+
+ struct InstSuatom
+ {
+ private ulong _opcode;
+ public InstSuatom(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public SuatomSize Size => (SuatomSize)((_opcode >> 51) & 0x7);
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public SuatomOp Op => (SuatomOp)((_opcode >> 29) & 0xF);
+ public bool Ba => (_opcode & 0x10000000) != 0;
+ }
+
+ struct InstSuatomB2
+ {
+ private ulong _opcode;
+ public InstSuatomB2(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public SuatomSize Size => (SuatomSize)((_opcode >> 36) & 0x7);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public SuatomOp Op => (SuatomOp)((_opcode >> 29) & 0xF);
+ public bool Ba => (_opcode & 0x10000000) != 0;
+ }
+
+ struct InstSuatomCasB
+ {
+ private ulong _opcode;
+ public InstSuatomCasB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public SuatomSize Size => (SuatomSize)((_opcode >> 36) & 0x7);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public int DestPred => (int)((_opcode >> 30) & 0x7);
+ public bool Ba => (_opcode & 0x10000000) != 0;
+ }
+
+ struct InstSuatomCas
+ {
+ private ulong _opcode;
+ public InstSuatomCas(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public SuatomSize Size => (SuatomSize)((_opcode >> 51) & 0x7);
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public int DestPred => (int)((_opcode >> 30) & 0x7);
+ public bool Ba => (_opcode & 0x10000000) != 0;
+ }
+
+ struct InstSuldDB
+ {
+ private ulong _opcode;
+ public InstSuldDB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public int DestPred2 => (int)((_opcode >> 30) & 0x7);
+ public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 24) & 0x3);
+ public bool Ba => (_opcode & 0x800000) != 0;
+ public SuSize Size => (SuSize)((_opcode >> 20) & 0x7);
+ }
+
+ struct InstSuldD
+ {
+ private ulong _opcode;
+ public InstSuldD(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public int DestPred2 => (int)((_opcode >> 30) & 0x7);
+ public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 24) & 0x3);
+ public bool Ba => (_opcode & 0x800000) != 0;
+ public SuSize Size => (SuSize)((_opcode >> 20) & 0x7);
+ }
+
+ struct InstSuldB
+ {
+ private ulong _opcode;
+ public InstSuldB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public int DestPred2 => (int)((_opcode >> 30) & 0x7);
+ public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 24) & 0x3);
+ public SuRgba Rgba => (SuRgba)((_opcode >> 20) & 0xF);
+ }
+
+ struct InstSuld
+ {
+ private ulong _opcode;
+ public InstSuld(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public int DestPred2 => (int)((_opcode >> 30) & 0x7);
+ public CacheOpLd CacheOp => (CacheOpLd)((_opcode >> 24) & 0x3);
+ public SuRgba Rgba => (SuRgba)((_opcode >> 20) & 0xF);
+ }
+
+ struct InstSuredB
+ {
+ private ulong _opcode;
+ public InstSuredB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public RedOp Op => (RedOp)((_opcode >> 24) & 0x7);
+ public bool Ba => (_opcode & 0x800000) != 0;
+ public SuatomSize Size => (SuatomSize)((_opcode >> 20) & 0x7);
+ }
+
+ struct InstSured
+ {
+ private ulong _opcode;
+ public InstSured(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public RedOp Op => (RedOp)((_opcode >> 24) & 0x7);
+ public bool Ba => (_opcode & 0x800000) != 0;
+ public SuatomSize Size => (SuatomSize)((_opcode >> 20) & 0x7);
+ }
+
+ struct InstSustDB
+ {
+ private ulong _opcode;
+ public InstSustDB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 24) & 0x3);
+ public bool Ba => (_opcode & 0x800000) != 0;
+ public SuSize Size => (SuSize)((_opcode >> 20) & 0x7);
+ }
+
+ struct InstSustD
+ {
+ private ulong _opcode;
+ public InstSustD(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 24) & 0x3);
+ public bool Ba => (_opcode & 0x800000) != 0;
+ public SuSize Size => (SuSize)((_opcode >> 20) & 0x7);
+ }
+
+ struct InstSustB
+ {
+ private ulong _opcode;
+ public InstSustB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 24) & 0x3);
+ public SuRgba Rgba => (SuRgba)((_opcode >> 20) & 0xF);
+ }
+
+ struct InstSust
+ {
+ private ulong _opcode;
+ public InstSust(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Clamp Clamp => (Clamp)((_opcode >> 49) & 0x3);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public SuDim Dim => (SuDim)((_opcode >> 33) & 0x7);
+ public CacheOpSt CacheOp => (CacheOpSt)((_opcode >> 24) & 0x3);
+ public SuRgba Rgba => (SuRgba)((_opcode >> 20) & 0xF);
+ }
+
+ struct InstSync
+ {
+ private ulong _opcode;
+ public InstSync(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public Ccc Ccc => (Ccc)((_opcode >> 0) & 0x1F);
+ }
+
+ struct InstTex
+ {
+ private ulong _opcode;
+ public InstTex(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Lc => (_opcode & 0x400000000000000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public Lod Lod => (Lod)((_opcode >> 55) & 0x7);
+ public bool Aoffi => (_opcode & 0x40000000000000) != 0;
+ public bool Dc => (_opcode & 0x4000000000000) != 0;
+ public bool Ndv => (_opcode & 0x800000000) != 0;
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ }
+
+ struct InstTexB
+ {
+ private ulong _opcode;
+ public InstTexB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Lcb => (_opcode & 0x10000000000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public Lod Lodb => (Lod)((_opcode >> 37) & 0x7);
+ public bool Aoffib => (_opcode & 0x1000000000) != 0;
+ public bool Dc => (_opcode & 0x4000000000000) != 0;
+ public bool Ndv => (_opcode & 0x800000000) != 0;
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ }
+
+ struct InstTexs
+ {
+ private ulong _opcode;
+ public InstTexs(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public TexsTarget Target => (TexsTarget)((_opcode >> 53) & 0xF);
+ public int WMask => (int)((_opcode >> 50) & 0x7);
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int Dest2 => (int)((_opcode >> 28) & 0xFF);
+ }
+
+ struct InstTld
+ {
+ private ulong _opcode;
+ public InstTld(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public bool Lod => (_opcode & 0x80000000000000) != 0;
+ public bool Toff => (_opcode & 0x800000000) != 0;
+ public bool Ms => (_opcode & 0x4000000000000) != 0;
+ public bool Cl => (_opcode & 0x40000000000000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ }
+
+ struct InstTldB
+ {
+ private ulong _opcode;
+ public InstTldB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public bool Lod => (_opcode & 0x80000000000000) != 0;
+ public bool Toff => (_opcode & 0x800000000) != 0;
+ public bool Ms => (_opcode & 0x4000000000000) != 0;
+ public bool Cl => (_opcode & 0x40000000000000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ }
+
+ struct InstTlds
+ {
+ private ulong _opcode;
+ public InstTlds(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public TldsTarget Target => (TldsTarget)((_opcode >> 53) & 0xF);
+ public int WMask => (int)((_opcode >> 50) & 0x7);
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int Dest2 => (int)((_opcode >> 28) & 0xFF);
+ }
+
+ struct InstTld4
+ {
+ private ulong _opcode;
+ public InstTld4(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Lc => (_opcode & 0x400000000000000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public TexComp TexComp => (TexComp)((_opcode >> 56) & 0x3);
+ public TexOffset Toff => (TexOffset)((_opcode >> 54) & 0x3);
+ public bool Dc => (_opcode & 0x4000000000000) != 0;
+ public bool Ndv => (_opcode & 0x800000000) != 0;
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ }
+
+ struct InstTld4B
+ {
+ private ulong _opcode;
+ public InstTld4B(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Lc => (_opcode & 0x10000000000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public TexComp TexComp => (TexComp)((_opcode >> 38) & 0x3);
+ public TexOffset Toff => (TexOffset)((_opcode >> 36) & 0x3);
+ public bool Dc => (_opcode & 0x4000000000000) != 0;
+ public bool Ndv => (_opcode & 0x800000000) != 0;
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ }
+
+ struct InstTld4s
+ {
+ private ulong _opcode;
+ public InstTld4s(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public TexComp TexComp => (TexComp)((_opcode >> 52) & 0x3);
+ public bool Aoffi => (_opcode & 0x8000000000000) != 0;
+ public bool Dc => (_opcode & 0x4000000000000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int Dest2 => (int)((_opcode >> 28) & 0xFF);
+ }
+
+ struct InstTmml
+ {
+ private ulong _opcode;
+ public InstTmml(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public bool Ndv => (_opcode & 0x800000000) != 0;
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ }
+
+ struct InstTmmlB
+ {
+ private ulong _opcode;
+ public InstTmmlB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public bool Ndv => (_opcode & 0x800000000) != 0;
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ }
+
+ struct InstTxa
+ {
+ private ulong _opcode;
+ public InstTxa(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public bool Ndv => (_opcode & 0x800000000) != 0;
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ }
+
+ struct InstTxd
+ {
+ private ulong _opcode;
+ public InstTxd(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public bool Lc => (_opcode & 0x4000000000000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public bool Toff => (_opcode & 0x800000000) != 0;
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ }
+
+ struct InstTxdB
+ {
+ private ulong _opcode;
+ public InstTxdB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int DestPred => (int)((_opcode >> 51) & 0x7);
+ public bool Lc => (_opcode & 0x4000000000000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public bool Toff => (_opcode & 0x800000000) != 0;
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public TexDim Dim => (TexDim)((_opcode >> 28) & 0x7);
+ }
+
+ struct InstTxq
+ {
+ private ulong _opcode;
+ public InstTxq(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int TidB => (int)((_opcode >> 36) & 0x1FFF);
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public TexQuery TexQuery => (TexQuery)((_opcode >> 22) & 0x3F);
+ }
+
+ struct InstTxqB
+ {
+ private ulong _opcode;
+ public InstTxqB(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool Nodep => (_opcode & 0x2000000000000) != 0;
+ public int WMask => (int)((_opcode >> 31) & 0xF);
+ public TexQuery TexQuery => (TexQuery)((_opcode >> 22) & 0x3F);
+ }
+
+ struct InstVabsdiff
+ {
+ private ulong _opcode;
+ public InstVabsdiff(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool DFormat => (_opcode & 0x40000000000000) != 0;
+ public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
+ public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));
+ public bool Sat => (_opcode & 0x80000000000000) != 0;
+ public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7);
+ public bool BVideo => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstVabsdiff4
+ {
+ private ulong _opcode;
+ public InstVabsdiff4(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public VideoRed VRed => (VideoRed)((_opcode >> 53) & 0x3);
+ public LaneMask4 LaneMask4 => (LaneMask4)((int)((_opcode >> 49) & 0xC) | (int)((_opcode >> 36) & 0x3));
+ public bool Sat => (_opcode & 0x4000000000000) != 0;
+ public bool SrcBFmt => (_opcode & 0x2000000000000) != 0;
+ public bool SrcAFmt => (_opcode & 0x1000000000000) != 0;
+ public bool DFormat => (_opcode & 0x4000000000) != 0;
+ public ASelect4 Asel4 => (ASelect4)((_opcode >> 32) & 0xF);
+ public BSelect4 Bsel4 => (BSelect4)((_opcode >> 28) & 0xF);
+ }
+
+ struct InstVadd
+ {
+ private ulong _opcode;
+ public InstVadd(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 56) & 0x3);
+ public bool DFormat => (_opcode & 0x40000000000000) != 0;
+ public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
+ public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));
+ public bool Sat => (_opcode & 0x80000000000000) != 0;
+ public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7);
+ public bool BVideo => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstVmad
+ {
+ private ulong _opcode;
+ public InstVmad(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
+ public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));
+ public bool Sat => (_opcode & 0x80000000000000) != 0;
+ public AvgMode AvgMode => (AvgMode)((_opcode >> 53) & 0x3);
+ public VideoScale VideoScale => (VideoScale)((_opcode >> 51) & 0x3);
+ public bool BVideo => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstVmnmx
+ {
+ private ulong _opcode;
+ public InstVmnmx(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool DFormat => (_opcode & 0x40000000000000) != 0;
+ public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
+ public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));
+ public bool Sat => (_opcode & 0x80000000000000) != 0;
+ public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7);
+ public bool Mn => (_opcode & 0x100000000000000) != 0;
+ public bool BVideo => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstVote
+ {
+ private ulong _opcode;
+ public InstVote(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public VoteMode VoteMode => (VoteMode)((_opcode >> 48) & 0x3);
+ public int VpDest => (int)((_opcode >> 45) & 0x7);
+ }
+
+ struct InstVotevtg
+ {
+ private ulong _opcode;
+ public InstVotevtg(ulong opcode) => _opcode = opcode;
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public VoteMode VoteMode => (VoteMode)((_opcode >> 48) & 0x3);
+ public int Imm28 => (int)((_opcode >> 20) & 0xFFFFFFF);
+ }
+
+ struct InstVset
+ {
+ private ulong _opcode;
+ public InstVset(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public IComp VComp => (IComp)((_opcode >> 54) & 0x7);
+ public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
+ public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));
+ public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7);
+ public bool BVideo => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstVsetp
+ {
+ private ulong _opcode;
+ public InstVsetp(ulong opcode) => _opcode = opcode;
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
+ public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
+ public VectorSelect BSelect => (VectorSelect)((int)((_opcode >> 46) & 0x8) | (int)((_opcode >> 28) & 0x7));
+ public IComp VComp => (IComp)((int)((_opcode >> 45) & 0x4) | (int)((_opcode >> 43) & 0x3));
+ public BoolOp BoolOp => (BoolOp)((_opcode >> 45) & 0x3);
+ public int SrcPred => (int)((_opcode >> 39) & 0x7);
+ public bool SrcPredInv => (_opcode & 0x40000000000) != 0;
+ public int DestPred => (int)((_opcode >> 3) & 0x7);
+ public int DestPredInv => (int)((_opcode >> 0) & 0x7);
+ public bool BVideo => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstVshl
+ {
+ private ulong _opcode;
+ public InstVshl(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Mv => (_opcode & 0x2000000000000) != 0;
+ public bool DFormat => (_opcode & 0x40000000000000) != 0;
+ public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
+ public VectorSelect BSelect => (VectorSelect)((_opcode >> 28) & 0x7);
+ public bool Sat => (_opcode & 0x80000000000000) != 0;
+ public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7);
+ public bool BVideo => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstVshr
+ {
+ private ulong _opcode;
+ public InstVshr(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Mv => (_opcode & 0x2000000000000) != 0;
+ public bool DFormat => (_opcode & 0x40000000000000) != 0;
+ public VectorSelect ASelect => (VectorSelect)((int)((_opcode >> 45) & 0x8) | (int)((_opcode >> 36) & 0x7));
+ public VectorSelect BSelect => (VectorSelect)((_opcode >> 28) & 0x7);
+ public bool Sat => (_opcode & 0x80000000000000) != 0;
+ public VideoOp VideoOp => (VideoOp)((_opcode >> 51) & 0x7);
+ public bool BVideo => (_opcode & 0x4000000000000) != 0;
+ }
+
+ struct InstXmadR
+ {
+ private ulong _opcode;
+ public InstXmadR(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcB => (int)((_opcode >> 20) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool HiloA => (_opcode & 0x20000000000000) != 0;
+ public XmadCop XmadCop => (XmadCop)((_opcode >> 50) & 0x7);
+ public bool BSigned => (_opcode & 0x2000000000000) != 0;
+ public bool ASigned => (_opcode & 0x1000000000000) != 0;
+ public bool X => (_opcode & 0x4000000000) != 0;
+ public bool Mrg => (_opcode & 0x2000000000) != 0;
+ public bool Psl => (_opcode & 0x1000000000) != 0;
+ public bool HiloB => (_opcode & 0x800000000) != 0;
+ }
+
+ struct InstXmadI
+ {
+ private ulong _opcode;
+ public InstXmadI(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public int Imm16 => (int)((_opcode >> 20) & 0xFFFF);
+ public bool HiloA => (_opcode & 0x20000000000000) != 0;
+ public XmadCop XmadCop => (XmadCop)((_opcode >> 50) & 0x7);
+ public bool BSigned => (_opcode & 0x2000000000000) != 0;
+ public bool ASigned => (_opcode & 0x1000000000000) != 0;
+ public bool X => (_opcode & 0x4000000000) != 0;
+ public bool Mrg => (_opcode & 0x2000000000) != 0;
+ public bool Psl => (_opcode & 0x1000000000) != 0;
+ }
+
+ struct InstXmadC
+ {
+ private ulong _opcode;
+ public InstXmadC(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool Mrg => (_opcode & 0x100000000000000) != 0;
+ public bool Psl => (_opcode & 0x80000000000000) != 0;
+ public bool X => (_opcode & 0x40000000000000) != 0;
+ public bool HiloA => (_opcode & 0x20000000000000) != 0;
+ public bool HiloB => (_opcode & 0x10000000000000) != 0;
+ public XmadCop2 XmadCop => (XmadCop2)((_opcode >> 50) & 0x3);
+ public bool BSigned => (_opcode & 0x2000000000000) != 0;
+ public bool ASigned => (_opcode & 0x1000000000000) != 0;
+ }
+
+ struct InstXmadRc
+ {
+ private ulong _opcode;
+ public InstXmadRc(ulong opcode) => _opcode = opcode;
+ public int Dest => (int)((_opcode >> 0) & 0xFF);
+ public int SrcA => (int)((_opcode >> 8) & 0xFF);
+ public int SrcC => (int)((_opcode >> 39) & 0xFF);
+ public int CbufSlot => (int)((_opcode >> 34) & 0x1F);
+ public int CbufOffset => (int)((_opcode >> 20) & 0x3FFF);
+ public int Pred => (int)((_opcode >> 16) & 0x7);
+ public bool PredInv => (_opcode & 0x80000) != 0;
+ public bool WriteCC => (_opcode & 0x800000000000) != 0;
+ public bool X => (_opcode & 0x40000000000000) != 0;
+ public bool HiloA => (_opcode & 0x20000000000000) != 0;
+ public bool HiloB => (_opcode & 0x10000000000000) != 0;
+ public XmadCop2 XmadCop => (XmadCop2)((_opcode >> 50) & 0x3);
+ public bool BSigned => (_opcode & 0x2000000000000) != 0;
+ public bool ASigned => (_opcode & 0x1000000000000) != 0;
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstName.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstName.cs
new file mode 100644
index 00000000..9c79b7a5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/InstName.cs
@@ -0,0 +1,188 @@
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ enum InstName : byte
+ {
+ Invalid = 0,
+
+ Al2p,
+ Ald,
+ Ast,
+ Atom,
+ AtomCas,
+ Atoms,
+ AtomsCas,
+ B2r,
+ Bar,
+ Bfe,
+ Bfi,
+ Bpt,
+ Bra,
+ Brk,
+ Brx,
+ Cal,
+ Cctl,
+ Cctll,
+ Cctlt,
+ Cont,
+ Cset,
+ Csetp,
+ Cs2r,
+ Dadd,
+ Depbar,
+ Dfma,
+ Dmnmx,
+ Dmul,
+ Dset,
+ Dsetp,
+ Exit,
+ F2f,
+ F2i,
+ Fadd,
+ Fadd32i,
+ Fchk,
+ Fcmp,
+ Ffma,
+ Ffma32i,
+ Flo,
+ Fmnmx,
+ Fmul,
+ Fmul32i,
+ Fset,
+ Fsetp,
+ Fswzadd,
+ Getcrsptr,
+ Getlmembase,
+ Hadd2,
+ Hadd232i,
+ Hfma2,
+ Hmul2,
+ Hmul232i,
+ Hset2,
+ Hsetp2,
+ I2f,
+ I2i,
+ Iadd,
+ Iadd32i,
+ Iadd3,
+ Icmp,
+ Ide,
+ Idp,
+ Imad,
+ Imad32i,
+ Imadsp,
+ Imnmx,
+ Imul,
+ Imul32i,
+ Ipa,
+ Isberd,
+ Iscadd,
+ Iscadd32i,
+ Iset,
+ Isetp,
+ Jcal,
+ Jmp,
+ Jmx,
+ Kil,
+ Ld,
+ Ldc,
+ Ldg,
+ Ldl,
+ Lds,
+ Lea,
+ LeaHi,
+ Lepc,
+ Longjmp,
+ Lop,
+ Lop3,
+ Lop32i,
+ Membar,
+ Mov,
+ Mov32i,
+ Mufu,
+ Nop,
+ Out,
+ P2r,
+ Pbk,
+ Pcnt,
+ Pexit,
+ Pixld,
+ Plongjmp,
+ Popc,
+ Pret,
+ Prmt,
+ Pset,
+ Psetp,
+ R2b,
+ R2p,
+ Ram,
+ Red,
+ Ret,
+ Rro,
+ Rtt,
+ S2r,
+ Sam,
+ Sel,
+ Setcrsptr,
+ Setlmembase,
+ Shf,
+ Shf_2,
+ Shf_3,
+ Shf_4,
+ Shfl,
+ Shl,
+ Shr,
+ Ssy,
+ St,
+ Stg,
+ Stl,
+ Stp,
+ Sts,
+ SuatomB,
+ Suatom,
+ SuatomB2,
+ SuatomCasB,
+ SuatomCas,
+ SuldDB,
+ SuldD,
+ SuldB,
+ Suld,
+ SuredB,
+ Sured,
+ SustDB,
+ SustD,
+ SustB,
+ Sust,
+ Sync,
+ Tex,
+ TexB,
+ Texs,
+ TexsF16,
+ Tld,
+ TldB,
+ Tlds,
+ TldsF16,
+ Tld4,
+ Tld4B,
+ Tld4s,
+ Tld4sF16,
+ Tmml,
+ TmmlB,
+ Txa,
+ Txd,
+ TxdB,
+ Txq,
+ TxqB,
+ Vabsdiff,
+ Vabsdiff4,
+ Vadd,
+ Vmad,
+ Vmnmx,
+ Vote,
+ Votevtg,
+ Vset,
+ Vsetp,
+ Vshl,
+ Vshr,
+ Xmad,
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstOp.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstOp.cs
new file mode 100644
index 00000000..39244e64
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/InstOp.cs
@@ -0,0 +1,27 @@
+using Ryujinx.Graphics.Shader.Instructions;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ readonly struct InstOp
+ {
+ public readonly ulong Address;
+ public readonly ulong RawOpCode;
+ public readonly InstEmitter Emitter;
+ public readonly InstProps Props;
+ public readonly InstName Name;
+
+ public InstOp(ulong address, ulong rawOpCode, InstName name, InstEmitter emitter, InstProps props)
+ {
+ Address = address;
+ RawOpCode = rawOpCode;
+ Name = name;
+ Emitter = emitter;
+ Props = props;
+ }
+
+ public ulong GetAbsoluteAddress()
+ {
+ return (ulong)((long)Address + (((int)(RawOpCode >> 20) << 8) >> 8) + 8);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstProps.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstProps.cs
new file mode 100644
index 00000000..1af94ab5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/InstProps.cs
@@ -0,0 +1,28 @@
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ enum InstProps : ushort
+ {
+ None = 0,
+ Rd = 1 << 0,
+ Rd2 = 1 << 1,
+ Ra = 1 << 2,
+ Rb = 1 << 3,
+ Rb2 = 1 << 4,
+ Ib = 1 << 5,
+ Rc = 1 << 6,
+
+ Pd = 1 << 7,
+ LPd = 2 << 7,
+ SPd = 3 << 7,
+ TPd = 4 << 7,
+ VPd = 5 << 7,
+ PdMask = 7 << 7,
+
+ Pdn = 1 << 10,
+ Ps = 1 << 11,
+ Tex = 1 << 12,
+ TexB = 1 << 13,
+ Bra = 1 << 14,
+ NoPred = 1 << 15
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/InstTable.cs b/src/Ryujinx.Graphics.Shader/Decoders/InstTable.cs
new file mode 100644
index 00000000..eaa77930
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/InstTable.cs
@@ -0,0 +1,390 @@
+using Ryujinx.Graphics.Shader.Instructions;
+using System;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ static class InstTable
+ {
+ private const int EncodingBits = 14;
+
+ private readonly struct TableEntry
+ {
+ public InstName Name { get; }
+ public InstEmitter Emitter { get; }
+ public InstProps Props { get; }
+
+ public int XBits { get; }
+
+ public TableEntry(InstName name, InstEmitter emitter, InstProps props, int xBits)
+ {
+ Name = name;
+ Emitter = emitter;
+ Props = props;
+ XBits = xBits;
+ }
+ }
+
+ private static TableEntry[] _opCodes;
+
+ static InstTable()
+ {
+ _opCodes = new TableEntry[1 << EncodingBits];
+
+ #region Instructions
+ Add("1110111110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Al2p, InstEmit.Al2p, InstProps.Rd | InstProps.Ra);
+ Add("1110111111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ald, InstEmit.Ald, InstProps.Rd | InstProps.Ra);
+ Add("1110111111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ast, InstEmit.Ast, InstProps.Ra | InstProps.Rb2 | InstProps.Rc);
+ Add("11101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Atom, InstEmit.Atom, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("111011101111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.AtomCas, InstEmit.AtomCas, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("11101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Atoms, InstEmit.Atoms, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("111011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.AtomsCas, InstEmit.AtomsCas, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("1111000010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.B2r, InstEmit.B2r, InstProps.Rd | InstProps.Ra | InstProps.VPd);
+ Add("1111000010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bar, InstEmit.Bar, InstProps.Ra | InstProps.Ps);
+ Add("0101110000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x00000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfe, InstEmit.BfeC, InstProps.Rd | InstProps.Ra);
+ Add("0101101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011011x11110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("0100101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("0101001111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bfi, InstEmit.BfiRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("111000111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bpt, InstEmit.Bpt, InstProps.NoPred);
+ Add("111000100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Bra, InstEmit.Bra, InstProps.Bra);
+ Add("111000110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Brk, InstEmit.Brk, InstProps.Bra);
+ Add("111000100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Brx, InstEmit.Brx, InstProps.Ra | InstProps.Bra);
+ Add("111000100110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cal, InstEmit.Cal, InstProps.Bra | InstProps.NoPred);
+ Add("11101111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctl, InstEmit.Cctl, InstProps.Ra);
+ Add("1110111110000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctll, InstEmit.Cctll, InstProps.Ra);
+ Add("1110101111110xx0000000000000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt, InstEmit.Cctlt);
+ Add("1110101111101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cctlt, InstEmit.Cctlt, InstProps.Rc);
+ Add("111000110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cont, InstEmit.Cont, InstProps.Bra);
+ Add("0101000010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cset, InstEmit.Cset, InstProps.Rd | InstProps.Ps);
+ Add("0101000010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Csetp, InstEmit.Csetp, InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("0101000011001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Cs2r, InstEmit.Cs2r, InstProps.Rd);
+ Add("0101110001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x01110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dadd, InstEmit.DaddC, InstProps.Rd | InstProps.Ra);
+ Add("1111000011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Depbar, InstEmit.Depbar);
+ Add("010110110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011011x0111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("010010110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010100110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dfma, InstEmit.DfmaRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("0101110001010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
+ Add("0011100x01010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
+ Add("0100110001010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmnmx, InstEmit.DmnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
+ Add("0101110010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x10000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dmul, InstEmit.DmulC, InstProps.Rd | InstProps.Ra);
+ Add("010110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
+ Add("0011001x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
+ Add("010010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dset, InstEmit.DsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
+ Add("010110111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("0011011x1000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("010010111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Dsetp, InstEmit.DsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("111000110000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Exit, InstEmit.Exit, InstProps.Bra);
+ Add("0101110010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fR, InstProps.Rd | InstProps.Rb);
+ Add("0011100x10101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fI, InstProps.Rd | InstProps.Ib);
+ Add("0100110010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2f, InstEmit.F2fC, InstProps.Rd);
+ Add("0101110010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iR, InstProps.Rd | InstProps.Rb);
+ Add("0011100x10110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iI, InstProps.Rd | InstProps.Ib);
+ Add("0100110010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.F2i, InstEmit.F2iC, InstProps.Rd);
+ Add("0101110001011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x01011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110001011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd, InstEmit.FaddC, InstProps.Rd | InstProps.Ra);
+ Add("000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fadd32i, InstEmit.Fadd32i, InstProps.Rd | InstProps.Ra);
+ Add("0101110010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkR, InstProps.Ra | InstProps.Rb | InstProps.Pd);
+ Add("0011100x10001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkI, InstProps.Ra | InstProps.Ib | InstProps.Pd);
+ Add("0100110010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fchk, InstEmit.FchkC, InstProps.Ra | InstProps.Pd);
+ Add("010110111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011011x1010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("010010111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010100111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fcmp, InstEmit.FcmpRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011001x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("010010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma, InstEmit.FfmaRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ffma32i, InstEmit.Ffma32i, InstProps.Rd | InstProps.Ra);
+ Add("0101110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloR, InstProps.Rd | InstProps.Rb);
+ Add("0011100x00110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloI, InstProps.Rd | InstProps.Ib);
+ Add("0100110000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Flo, InstEmit.FloC, InstProps.Rd);
+ Add("0101110001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
+ Add("0011100x01100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
+ Add("0100110001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmnmx, InstEmit.FmnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
+ Add("0101110001101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x01101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110001101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul, InstEmit.FmulC, InstProps.Rd | InstProps.Ra);
+ Add("00011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fmul32i, InstEmit.Fmul32i, InstProps.Rd | InstProps.Ra);
+ Add("01011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
+ Add("0011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
+ Add("01001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fset, InstEmit.FsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
+ Add("010110111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("0011011x1011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("010010111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fsetp, InstEmit.FsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("0101000011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Fswzadd, InstEmit.Fswzadd, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("111000101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Getcrsptr, InstEmit.Getcrsptr, InstProps.Rd | InstProps.NoPred);
+ Add("111000101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Getlmembase, InstEmit.Getlmembase, InstProps.Rd | InstProps.NoPred);
+ Add("0101110100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2R, InstProps.Rd | InstProps.Ra);
+ Add("0111101x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2I, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0111101x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd2, InstEmit.Hadd2C, InstProps.Rd | InstProps.Ra);
+ Add("0010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hadd232i, InstEmit.Hadd232i, InstProps.Rd | InstProps.Ra);
+ Add("0101110100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("01110xxx0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("01110xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2C, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("01100xxx1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma2Rc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("0010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hfma2, InstEmit.Hfma232i, InstProps.Rd | InstProps.Ra);
+ Add("0101110100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2R, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0111100x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2I, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0111100x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul2, InstEmit.Hmul2C, InstProps.Rd | InstProps.Ra);
+ Add("0010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hmul232i, InstEmit.Hmul232i, InstProps.Rd | InstProps.Ra);
+ Add("0101110100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
+ Add("0111110x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
+ Add("0111110x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hset2, InstEmit.Hset2C, InstProps.Rd | InstProps.Ra | InstProps.Ps);
+ Add("0101110100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2R, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("0111111x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2I, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("0111111x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Hsetp2, InstEmit.Hsetp2C, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("0101110010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fR, InstProps.Rd | InstProps.Rb);
+ Add("0011100x10111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fI, InstProps.Rd | InstProps.Ib);
+ Add("0100110010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2f, InstEmit.I2fC, InstProps.Rd);
+ Add("0101110011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iR, InstProps.Rd | InstProps.Rb);
+ Add("0011100x11100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iI, InstProps.Rd | InstProps.Ib);
+ Add("0100110011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.I2i, InstEmit.I2iC, InstProps.Rd);
+ Add("0101110000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x00010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110000010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd, InstEmit.IaddC, InstProps.Rd | InstProps.Ra);
+ Add("0001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd32i, InstEmit.Iadd32i, InstProps.Rd | InstProps.Ra);
+ Add("010111001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011100x1100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("010011001100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iadd3, InstEmit.Iadd3C, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010110110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011011x0100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("010010110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010100110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Icmp, InstEmit.IcmpRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("111000111001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ide, InstEmit.Ide, InstProps.NoPred);
+ Add("0101001111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Idp, InstEmit.IdpR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0101001111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Idp, InstEmit.IdpC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010110100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011010x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("010010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad, InstEmit.ImadRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imad32i, InstEmit.Imad32i, InstProps.Rd | InstProps.Ra);
+ Add("010110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011010x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("010010101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imadsp, InstEmit.ImadspRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("0101110000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
+ Add("0011100x00100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
+ Add("0100110000100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imnmx, InstEmit.ImnmxC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
+ Add("0101110000111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x00111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110000111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul, InstEmit.ImulC, InstProps.Rd | InstProps.Ra);
+ Add("00011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Imul32i, InstEmit.Imul32i, InstProps.Rd | InstProps.Ra);
+ Add("11100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ipa, InstEmit.Ipa, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("1110111111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isberd, InstEmit.Isberd, InstProps.Rd | InstProps.Ra);
+ Add("0101110000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x00011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110000011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd, InstEmit.IscaddC, InstProps.Rd | InstProps.Ra);
+ Add("000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iscadd32i, InstEmit.Iscadd32i, InstProps.Rd | InstProps.Ra);
+ Add("010110110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
+ Add("0011011x0101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
+ Add("010010110101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Iset, InstEmit.IsetC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
+ Add("010110110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpR, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("0011011x0110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpI, InstProps.Ra | InstProps.Ib | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("010010110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Isetp, InstEmit.IsetpC, InstProps.Ra | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("111000100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jcal, InstEmit.Jcal, InstProps.Bra);
+ Add("111000100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jmp, InstEmit.Jmp, InstProps.Ra | InstProps.Bra);
+ Add("111000100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Jmx, InstEmit.Jmx, InstProps.Ra | InstProps.Bra);
+ Add("111000110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Kil, InstEmit.Kil, InstProps.Bra);
+ Add("100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ld, InstEmit.Ld, InstProps.Rd | InstProps.Ra);
+ Add("1110111110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldc, InstEmit.Ldc, InstProps.Rd | InstProps.Ra);
+ Add("1110111011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldg, InstEmit.Ldg, InstProps.Rd | InstProps.Ra);
+ Add("1110111101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ldl, InstEmit.Ldl, InstProps.Rd | InstProps.Ra);
+ Add("1110111101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lds, InstEmit.Lds, InstProps.Rd | InstProps.Ra);
+ Add("0101101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.LPd);
+ Add("0011011x11010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.LPd);
+ Add("0100101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lea, InstEmit.LeaC, InstProps.Rd | InstProps.Ra | InstProps.LPd);
+ Add("0101101111011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.LeaHi, InstEmit.LeaHiR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd);
+ Add("000110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.LeaHi, InstEmit.LeaHiC, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.LPd);
+ Add("0101000011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lepc, InstEmit.Lepc);
+ Add("111000110001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Longjmp, InstEmit.Longjmp, InstProps.Bra);
+ Add("0101110001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.LPd);
+ Add("0011100x01000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.LPd);
+ Add("0100110001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop, InstEmit.LopC, InstProps.Rd | InstProps.Ra | InstProps.LPd);
+ Add("0101101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3R, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd);
+ Add("001111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3I, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("0000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop3, InstEmit.Lop3C, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Lop32i, InstEmit.Lop32i, InstProps.Rd | InstProps.Ra);
+ Add("1110111110011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Membar, InstEmit.Membar);
+ Add("0101110010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovR, InstProps.Rd | InstProps.Ra);
+ Add("0011100x10011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovI, InstProps.Rd | InstProps.Ib);
+ Add("0100110010011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov, InstEmit.MovC, InstProps.Rd);
+ Add("000000010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mov32i, InstEmit.Mov32i, InstProps.Rd);
+ Add("0101000010000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Mufu, InstEmit.Mufu, InstProps.Rd | InstProps.Ra);
+ Add("0101000010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Nop, InstEmit.Nop);
+ Add("1111101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("1111011x11100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("1110101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Out, InstEmit.OutC, InstProps.Rd | InstProps.Ra);
+ Add("0101110011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x11101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.P2r, InstEmit.P2rC, InstProps.Rd | InstProps.Ra);
+ Add("111000101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pbk, InstEmit.Pbk, InstProps.NoPred);
+ Add("111000101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pcnt, InstEmit.Pcnt, InstProps.NoPred);
+ Add("111000100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pexit, InstEmit.Pexit);
+ Add("1110111111101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pixld, InstEmit.Pixld, InstProps.Rd | InstProps.Ra | InstProps.VPd);
+ Add("111000101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Plongjmp, InstEmit.Plongjmp, InstProps.Bra | InstProps.NoPred);
+ Add("0101110000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcR, InstProps.Rd | InstProps.Rb);
+ Add("0011100x00001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcI, InstProps.Rd | InstProps.Ib);
+ Add("0100110000001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Popc, InstEmit.PopcC, InstProps.Rd);
+ Add("111000100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pret, InstEmit.Pret, InstProps.NoPred);
+ Add("010110111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011011x1100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("010010111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010100111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Prmt, InstEmit.PrmtRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("0101000010001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Pset, InstEmit.Pset, InstProps.Rd | InstProps.Ps);
+ Add("0101000010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Psetp, InstEmit.Psetp, InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("1111000011000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2b, InstEmit.R2b, InstProps.Rb);
+ Add("0101110011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pR, InstProps.Ra | InstProps.Rb);
+ Add("0011100x11110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pI, InstProps.Ra | InstProps.Ib);
+ Add("0100110011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.R2p, InstEmit.R2pC, InstProps.Ra);
+ Add("111000111000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ram, InstEmit.Ram, InstProps.NoPred);
+ Add("1110101111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Red, InstEmit.Red, InstProps.Ra | InstProps.Rb2);
+ Add("111000110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ret, InstEmit.Ret, InstProps.Bra);
+ Add("0101110010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroR, InstProps.Rd | InstProps.Rb);
+ Add("0011100x10010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroI, InstProps.Rd | InstProps.Ib);
+ Add("0100110010010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rro, InstEmit.RroC, InstProps.Rd);
+ Add("111000110110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Rtt, InstEmit.Rtt, InstProps.NoPred);
+ Add("1111000011001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.S2r, InstEmit.S2r, InstProps.Rd);
+ Add("111000110111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sam, InstEmit.Sam, InstProps.NoPred);
+ Add("0101110010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Ps);
+ Add("0011100x10100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Ps);
+ Add("0100110010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sel, InstEmit.SelC, InstProps.Rd | InstProps.Ra | InstProps.Ps);
+ Add("111000101110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Setcrsptr, InstEmit.Setcrsptr, InstProps.Ra | InstProps.NoPred);
+ Add("111000101111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Setlmembase, InstEmit.Setlmembase, InstProps.Ra | InstProps.NoPred);
+ Add("0101101111111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfLR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0101110011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfRR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011011x11111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfLI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("0011100x11111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shf, InstEmit.ShfRI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("1110111100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shfl, InstEmit.Shfl, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.LPd);
+ Add("0101110001001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x01001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110001001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shl, InstEmit.ShlC, InstProps.Rd | InstProps.Ra);
+ Add("0101110000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrR, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("0011100x00101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrI, InstProps.Rd | InstProps.Ra | InstProps.Ib);
+ Add("0100110000101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Shr, InstEmit.ShrC, InstProps.Rd | InstProps.Ra);
+ Add("111000101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Ssy, InstEmit.Ssy, InstProps.NoPred);
+ Add("101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.St, InstEmit.St, InstProps.Rd | InstProps.Ra);
+ Add("1110111011011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stg, InstEmit.Stg, InstProps.Rd | InstProps.Ra);
+ Add("1110111101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stl, InstEmit.Stl, InstProps.Rd | InstProps.Ra);
+ Add("1110111010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Stp, InstEmit.Stp, InstProps.NoPred);
+ Add("1110111101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sts, InstEmit.Sts, InstProps.Rd | InstProps.Ra);
+ Add("1110101001110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomB, InstEmit.SuatomB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("11101010x0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Suatom, InstEmit.Suatom, InstProps.Rd | InstProps.Ra | InstProps.Rb);
+ Add("1110101110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomB2, InstEmit.SuatomB2, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("1110101011010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomCasB, InstEmit.SuatomCasB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc | InstProps.SPd);
+ Add("1110101x1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuatomCas, InstEmit.SuatomCas, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.SPd);
+ Add("1110101100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldDB, InstEmit.SuldDB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.SPd | InstProps.TexB);
+ Add("1110101100011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldD, InstEmit.SuldD, InstProps.Rd | InstProps.Ra | InstProps.SPd | InstProps.Tex);
+ Add("1110101100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuldB, InstEmit.SuldB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.SPd | InstProps.TexB);
+ Add("1110101100001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Suld, InstEmit.Suld, InstProps.Rd | InstProps.Ra | InstProps.SPd | InstProps.Tex);
+ Add("1110101101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SuredB, InstEmit.SuredB, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("1110101101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sured, InstEmit.Sured, InstProps.Rd | InstProps.Ra);
+ Add("1110101100110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustDB, InstEmit.SustDB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.TexB);
+ Add("1110101100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustD, InstEmit.SustD, InstProps.Rd | InstProps.Ra | InstProps.Tex);
+ Add("1110101100100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.SustB, InstEmit.SustB, InstProps.Rd | InstProps.Ra | InstProps.Rc | InstProps.TexB);
+ Add("1110101100101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sust, InstEmit.Sust, InstProps.Rd | InstProps.Ra | InstProps.Tex);
+ Add("1111000011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Sync, InstEmit.Sync, InstProps.Bra);
+ Add("11000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tex, InstEmit.Tex, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex);
+ Add("1101111010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TexB, InstEmit.TexB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB);
+ Add("1101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Texs, InstEmit.Texs, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
+ Add("1101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TexsF16, InstEmit.TexsF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
+ Add("11011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld, InstEmit.Tld, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex);
+ Add("11011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TldB, InstEmit.TldB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB);
+ Add("1101101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tlds, InstEmit.Tlds, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
+ Add("1101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TldsF16, InstEmit.TldsF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
+ Add("110010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4, InstEmit.Tld4, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex);
+ Add("1101111011xxxxxxxxxxxxx0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4B, InstEmit.Tld4B, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB);
+ Add("1101111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4s, InstEmit.Tld4s, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
+ Add("1101111110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tld4sF16, InstEmit.Tld4sF16, InstProps.Rd | InstProps.Rd2 | InstProps.Ra | InstProps.Rb | InstProps.Tex);
+ Add("1101111101011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Tmml, InstEmit.Tmml, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Tex);
+ Add("1101111101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TmmlB, InstEmit.TmmlB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TexB);
+ Add("1101111101000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txa, InstEmit.Txa, InstProps.Rd | InstProps.Ra | InstProps.Tex);
+ Add("110111100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txd, InstEmit.Txd, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.Tex);
+ Add("1101111001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TxdB, InstEmit.TxdB, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.TPd | InstProps.TexB);
+ Add("1101111101001xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Txq, InstEmit.Txq, InstProps.Rd | InstProps.Ra | InstProps.Tex);
+ Add("1101111101010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.TxqB, InstEmit.TxqB, InstProps.Rd | InstProps.Ra | InstProps.TexB);
+ Add("01010100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vabsdiff, InstEmit.Vabsdiff, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("010100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vabsdiff4, InstEmit.Vabsdiff4, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("001000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vadd, InstEmit.Vadd, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("01011111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vmad, InstEmit.Vmad, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011101xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vmnmx, InstEmit.Vmnmx, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0101000011011xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vote, InstEmit.Vote, InstProps.Rd | InstProps.VPd | InstProps.Ps);
+ Add("0101000011100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Votevtg, InstEmit.Votevtg);
+ Add("0100000xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vset, InstEmit.Vset, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0101000011110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vsetp, InstEmit.Vsetp, InstProps.Ra | InstProps.Rb | InstProps.Pd | InstProps.Pdn | InstProps.Ps);
+ Add("01010111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vshl, InstEmit.Vshl, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("01010110xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Vshr, InstEmit.Vshr, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0101101100xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadR, InstProps.Rd | InstProps.Ra | InstProps.Rb | InstProps.Rc);
+ Add("0011011x00xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadI, InstProps.Rd | InstProps.Ra | InstProps.Ib | InstProps.Rc);
+ Add("0100111xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadC, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ Add("010100010xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", InstName.Xmad, InstEmit.XmadRc, InstProps.Rd | InstProps.Ra | InstProps.Rc);
+ #endregion
+ }
+
+ private static void Add(string encoding, InstName name, InstEmitter emitter, InstProps props = InstProps.None)
+ {
+ ReadOnlySpan<char> encodingPart = encoding.AsSpan(0, EncodingBits);
+
+ int bit = encodingPart.Length - 1;
+ int value = 0;
+ int xMask = 0;
+ int xBits = 0;
+
+ int[] xPos = new int[encodingPart.Length];
+
+ for (int index = 0; index < encodingPart.Length; index++, bit--)
+ {
+ char chr = encodingPart[index];
+
+ if (chr == '1')
+ {
+ value |= 1 << bit;
+ }
+ else if (chr == 'x')
+ {
+ xMask |= 1 << bit;
+
+ xPos[xBits++] = bit;
+ }
+ }
+
+ xMask = ~xMask;
+
+ TableEntry entry = new TableEntry(name, emitter, props, xBits);
+
+ for (int index = 0; index < (1 << xBits); index++)
+ {
+ value &= xMask;
+
+ for (int x = 0; x < xBits; x++)
+ {
+ value |= ((index >> x) & 1) << xPos[x];
+ }
+
+ if (_opCodes[value].Emitter == null || _opCodes[value].XBits > xBits)
+ {
+ _opCodes[value] = entry;
+ }
+ }
+ }
+
+ public static InstOp GetOp(ulong address, ulong opCode)
+ {
+ ref TableEntry entry = ref _opCodes[opCode >> (64 - EncodingBits)];
+
+ if (entry.Emitter != null)
+ {
+ return new InstOp(address, opCode, entry.Name, entry.Emitter, entry.Props);
+ }
+
+ return new InstOp(address, opCode, InstName.Invalid, null, InstProps.None);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Register.cs b/src/Ryujinx.Graphics.Shader/Decoders/Register.cs
new file mode 100644
index 00000000..e375096d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/Register.cs
@@ -0,0 +1,36 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ readonly struct Register : IEquatable<Register>
+ {
+ public int Index { get; }
+
+ public RegisterType Type { get; }
+
+ public bool IsRZ => Type == RegisterType.Gpr && Index == RegisterConsts.RegisterZeroIndex;
+ public bool IsPT => Type == RegisterType.Predicate && Index == RegisterConsts.PredicateTrueIndex;
+
+ public Register(int index, RegisterType type)
+ {
+ Index = index;
+ Type = type;
+ }
+
+ public override int GetHashCode()
+ {
+ return (ushort)Index | ((ushort)Type << 16);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is Register reg && Equals(reg);
+ }
+
+ public bool Equals(Register other)
+ {
+ return other.Index == Index &&
+ other.Type == Type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/RegisterConsts.cs b/src/Ryujinx.Graphics.Shader/Decoders/RegisterConsts.cs
new file mode 100644
index 00000000..d381f954
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/RegisterConsts.cs
@@ -0,0 +1,13 @@
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ static class RegisterConsts
+ {
+ public const int GprsCount = 255;
+ public const int PredsCount = 7;
+ public const int FlagsCount = 4;
+ public const int TotalCount = GprsCount + PredsCount + FlagsCount;
+
+ public const int RegisterZeroIndex = GprsCount;
+ public const int PredicateTrueIndex = PredsCount;
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Decoders/RegisterType.cs b/src/Ryujinx.Graphics.Shader/Decoders/RegisterType.cs
new file mode 100644
index 00000000..648f816a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Decoders/RegisterType.cs
@@ -0,0 +1,9 @@
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ enum RegisterType
+ {
+ Flag,
+ Gpr,
+ Predicate,
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
new file mode 100644
index 00000000..2207156c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -0,0 +1,528 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader
+{
+ /// <summary>
+ /// GPU state access interface.
+ /// </summary>
+ public interface IGpuAccessor
+ {
+ /// <summary>
+ /// Prints a log message.
+ /// </summary>
+ /// <param name="message">Message to print</param>
+ void Log(string message)
+ {
+ // No default log output.
+ }
+
+ /// <summary>
+ /// Reads data from the constant buffer 1.
+ /// </summary>
+ /// <param name="offset">Offset in bytes to read from</param>
+ /// <returns>Value at the given offset</returns>
+ uint ConstantBuffer1Read(int offset)
+ {
+ return 0;
+ }
+
+ /// <summary>
+ /// Gets a span of the specified memory location, containing shader code.
+ /// </summary>
+ /// <param name="address">GPU virtual address of the data</param>
+ /// <param name="minimumSize">Minimum size that the returned span may have</param>
+ /// <returns>Span of the memory location</returns>
+ ReadOnlySpan<ulong> GetCode(ulong address, int minimumSize);
+
+ /// <summary>
+ /// Queries the alpha test comparison operator that is being used currently.
+ /// If alpha test is disabled, it should be set to <see cref="AlphaTestOp.Always"/>.
+ /// </summary>
+ /// <returns>Current alpha test comparison</returns>
+ AlphaTestOp QueryAlphaTestCompare()
+ {
+ return AlphaTestOp.Always;
+ }
+
+ /// <summary>
+ /// Queries the current alpha test reference value used by the comparison.
+ /// </summary>
+ /// <returns>Current alpha test reference value</returns>
+ float QueryAlphaTestReference()
+ {
+ return 0f;
+ }
+
+ /// <summary>
+ /// Queries the type of the vertex shader input attribute at the specified <paramref name="location"/>.
+ /// </summary>
+ /// <param name="location">Location of the input attribute</param>
+ /// <returns>Input type</returns>
+ AttributeType QueryAttributeType(int location)
+ {
+ return AttributeType.Float;
+ }
+
+ /// <summary>
+ /// Queries whenever the alpha-to-coverage dithering feature is enabled.
+ /// </summary>
+ /// <returns>True if the feature is enabled, false otherwise</returns>
+ bool QueryAlphaToCoverageDitherEnable()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries the binding number of a constant buffer.
+ /// </summary>
+ /// <param name="index">Constant buffer index</param>
+ /// <returns>Binding number</returns>
+ int QueryBindingConstantBuffer(int index)
+ {
+ return index;
+ }
+
+ /// <summary>
+ /// Queries the binding number of a storage buffer.
+ /// </summary>
+ /// <param name="index">Storage buffer index</param>
+ /// <returns>Binding number</returns>
+ int QueryBindingStorageBuffer(int index)
+ {
+ return index;
+ }
+
+ /// <summary>
+ /// Queries the binding number of a texture.
+ /// </summary>
+ /// <param name="index">Texture index</param>
+ /// <param name="isBuffer">Indicates if the texture is a buffer texture</param>
+ /// <returns>Binding number</returns>
+ int QueryBindingTexture(int index, bool isBuffer)
+ {
+ return index;
+ }
+
+ /// <summary>
+ /// Queries the binding number of an image.
+ /// </summary>
+ /// <param name="index">Image index</param>
+ /// <param name="isBuffer">Indicates if the image is a buffer image</param>
+ /// <returns>Binding number</returns>
+ int QueryBindingImage(int index, bool isBuffer)
+ {
+ return index;
+ }
+
+ /// <summary>
+ /// Queries output type for fragment shaders.
+ /// </summary>
+ /// <param name="location">Location of the framgent output</param>
+ /// <returns>Output location</returns>
+ AttributeType QueryFragmentOutputType(int location)
+ {
+ return AttributeType.Float;
+ }
+
+ /// <summary>
+ /// Queries Local Size X for compute shaders.
+ /// </summary>
+ /// <returns>Local Size X</returns>
+ int QueryComputeLocalSizeX()
+ {
+ return 1;
+ }
+
+ /// <summary>
+ /// Queries Local Size Y for compute shaders.
+ /// </summary>
+ /// <returns>Local Size Y</returns>
+ int QueryComputeLocalSizeY()
+ {
+ return 1;
+ }
+
+ /// <summary>
+ /// Queries Local Size Z for compute shaders.
+ /// </summary>
+ /// <returns>Local Size Z</returns>
+ int QueryComputeLocalSizeZ()
+ {
+ return 1;
+ }
+
+ /// <summary>
+ /// Queries Local Memory size in bytes for compute shaders.
+ /// </summary>
+ /// <returns>Local Memory size in bytes</returns>
+ int QueryComputeLocalMemorySize()
+ {
+ return 0x1000;
+ }
+
+ /// <summary>
+ /// Queries Shared Memory size in bytes for compute shaders.
+ /// </summary>
+ /// <returns>Shared Memory size in bytes</returns>
+ int QueryComputeSharedMemorySize()
+ {
+ return 0xc000;
+ }
+
+ /// <summary>
+ /// Queries Constant Buffer usage information.
+ /// </summary>
+ /// <returns>A mask where each bit set indicates a bound constant buffer</returns>
+ uint QueryConstantBufferUse()
+ {
+ return 0;
+ }
+
+ /// <summary>
+ /// Queries whenever the current draw has written the base vertex and base instance into Constant Buffer 0.
+ /// </summary>
+ /// <returns>True if the shader translator can assume that the constant buffer contains the base IDs, false otherwise</returns>
+ bool QueryHasConstantBufferDrawParameters()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries whenever the current draw uses unaligned storage buffer addresses.
+ /// </summary>
+ /// <returns>True if any storage buffer address is not aligned to 16 bytes, false otherwise</returns>
+ bool QueryHasUnalignedStorageBuffer()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries host's gather operation precision bits for biasing their coordinates. Zero means no bias.
+ /// </summary>
+ /// <returns>Bits of gather operation precision to use for coordinate bias</returns>
+ int QueryHostGatherBiasPrecision()
+ {
+ return 0;
+ }
+
+ /// <summary>
+ /// Queries host about whether to reduce precision to improve performance.
+ /// </summary>
+ /// <returns>True if precision is limited to vertex position, false otherwise</returns>
+ bool QueryHostReducedPrecision()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries dual source blend state.
+ /// </summary>
+ /// <returns>True if blending is enabled with a dual source blend equation, false otherwise</returns>
+ bool QueryDualSourceBlendEnable()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries host about the presence of the FrontFacing built-in variable bug.
+ /// </summary>
+ /// <returns>True if the bug is present on the host device used, false otherwise</returns>
+ bool QueryHostHasFrontFacingBug()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries host about the presence of the vector indexing bug.
+ /// </summary>
+ /// <returns>True if the bug is present on the host device used, false otherwise</returns>
+ bool QueryHostHasVectorIndexingBug()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries host storage buffer alignment required.
+ /// </summary>
+ /// <returns>Host storage buffer alignment in bytes</returns>
+ int QueryHostStorageBufferOffsetAlignment()
+ {
+ return 16;
+ }
+
+ /// <summary>
+ /// Queries host support for texture formats with BGRA component order (such as BGRA8).
+ /// </summary>
+ /// <returns>True if BGRA formats are supported, false otherwise</returns>
+ bool QueryHostSupportsBgraFormat()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host support for fragment shader ordering critical sections on the shader code.
+ /// </summary>
+ /// <returns>True if fragment shader interlock is supported, false otherwise</returns>
+ bool QueryHostSupportsFragmentShaderInterlock()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host support for fragment shader ordering scoped critical sections on the shader code.
+ /// </summary>
+ /// <returns>True if fragment shader ordering is supported, false otherwise</returns>
+ bool QueryHostSupportsFragmentShaderOrderingIntel()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries host GPU geometry shader support.
+ /// </summary>
+ /// <returns>True if the GPU and driver supports geometry shaders, false otherwise</returns>
+ bool QueryHostSupportsGeometryShader()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host GPU geometry shader passthrough support.
+ /// </summary>
+ /// <returns>True if the GPU and driver supports geometry shader passthrough, false otherwise</returns>
+ bool QueryHostSupportsGeometryShaderPassthrough()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host support for readable images without a explicit format declaration on the shader.
+ /// </summary>
+ /// <returns>True if formatted image load is supported, false otherwise</returns>
+ bool QueryHostSupportsImageLoadFormatted()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host support for writes to the layer from vertex or tessellation shader stages.
+ /// </summary>
+ /// <returns>True if writes to the layer from vertex or tessellation are supported, false otherwise</returns>
+ bool QueryHostSupportsLayerVertexTessellation()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host GPU non-constant texture offset support.
+ /// </summary>
+ /// <returns>True if the GPU and driver supports non-constant texture offsets, false otherwise</returns>
+ bool QueryHostSupportsNonConstantTextureOffset()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host GPU shader ballot support.
+ /// </summary>
+ /// <returns>True if the GPU and driver supports shader ballot, false otherwise</returns>
+ bool QueryHostSupportsShaderBallot()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host GPU support for signed normalized buffer texture formats.
+ /// </summary>
+ /// <returns>True if the GPU and driver supports the formats, false otherwise</returns>
+ bool QueryHostSupportsSnormBufferTextureFormat()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host GPU texture shadow LOD support.
+ /// </summary>
+ /// <returns>True if the GPU and driver supports texture shadow LOD, false otherwise</returns>
+ bool QueryHostSupportsTextureShadowLod()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host support for writes to the viewport index from vertex or tessellation shader stages.
+ /// </summary>
+ /// <returns>True if writes to the viewport index from vertex or tessellation are supported, false otherwise</returns>
+ bool QueryHostSupportsViewportIndexVertexTessellation()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries host GPU shader viewport mask output support.
+ /// </summary>
+ /// <returns>True if the GPU and driver supports shader viewport mask output, false otherwise</returns>
+ bool QueryHostSupportsViewportMask()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries the point size from the GPU state, used when it is not explicitly set on the shader.
+ /// </summary>
+ /// <returns>Current point size</returns>
+ float QueryPointSize()
+ {
+ return 1f;
+ }
+
+ /// <summary>
+ /// Queries the state that indicates if the program point size should be explicitly set on the shader
+ /// or read from the GPU state.
+ /// </summary>
+ /// <returns>True if the shader is expected to set the point size explicitly, false otherwise</returns>
+ bool QueryProgramPointSize()
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries sampler type information.
+ /// </summary>
+ /// <param name="handle">Texture handle</param>
+ /// <param name="cbufSlot">Constant buffer slot for the texture handle</param>
+ /// <returns>The sampler type value for the given handle</returns>
+ SamplerType QuerySamplerType(int handle, int cbufSlot = -1)
+ {
+ return SamplerType.Texture2D;
+ }
+
+ /// <summary>
+ /// Queries texture coordinate normalization information.
+ /// </summary>
+ /// <param name="handle">Texture handle</param>
+ /// <param name="cbufSlot">Constant buffer slot for the texture handle</param>
+ /// <returns>True if the coordinates are normalized, false otherwise</returns>
+ bool QueryTextureCoordNormalized(int handle, int cbufSlot = -1)
+ {
+ return true;
+ }
+
+ /// <summary>
+ /// Queries current primitive topology for geometry shaders.
+ /// </summary>
+ /// <returns>Current primitive topology</returns>
+ InputTopology QueryPrimitiveTopology()
+ {
+ return InputTopology.Points;
+ }
+
+ /// <summary>
+ /// Queries the tessellation evaluation shader primitive winding order.
+ /// </summary>
+ /// <returns>True if the primitive winding order is clockwise, false if counter-clockwise</returns>
+ bool QueryTessCw()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries the tessellation evaluation shader abstract patch type.
+ /// </summary>
+ /// <returns>Abstract patch type</returns>
+ TessPatchType QueryTessPatchType()
+ {
+ return TessPatchType.Triangles;
+ }
+
+ /// <summary>
+ /// Queries the tessellation evaluation shader spacing between tessellated vertices of the patch.
+ /// </summary>
+ /// <returns>Spacing between tessellated vertices of the patch</returns>
+ TessSpacing QueryTessSpacing()
+ {
+ return TessSpacing.EqualSpacing;
+ }
+
+ /// <summary>
+ /// Queries texture format information, for shaders using image load or store.
+ /// </summary>
+ /// <remarks>
+ /// This only returns non-compressed color formats.
+ /// If the format of the texture is a compressed, depth or unsupported format, then a default value is returned.
+ /// </remarks>
+ /// <param name="handle">Texture handle</param>
+ /// <param name="cbufSlot">Constant buffer slot for the texture handle</param>
+ /// <returns>Color format of the non-compressed texture</returns>
+ TextureFormat QueryTextureFormat(int handle, int cbufSlot = -1)
+ {
+ return TextureFormat.R8G8B8A8Unorm;
+ }
+
+ /// <summary>
+ /// Queries depth mode information from the GPU state.
+ /// </summary>
+ /// <returns>True if current depth mode is -1 to 1, false if 0 to 1</returns>
+ bool QueryTransformDepthMinusOneToOne()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries transform feedback enable state.
+ /// </summary>
+ /// <returns>True if the shader uses transform feedback, false otherwise</returns>
+ bool QueryTransformFeedbackEnabled()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries the varying locations that should be written to the transform feedback buffer.
+ /// </summary>
+ /// <param name="bufferIndex">Index of the transform feedback buffer</param>
+ /// <returns>Varying locations for the specified buffer</returns>
+ ReadOnlySpan<byte> QueryTransformFeedbackVaryingLocations(int bufferIndex)
+ {
+ return ReadOnlySpan<byte>.Empty;
+ }
+
+ /// <summary>
+ /// Queries the stride (in bytes) of the per vertex data written into the transform feedback buffer.
+ /// </summary>
+ /// <param name="bufferIndex">Index of the transform feedback buffer</param>
+ /// <returns>Stride for the specified buffer</returns>
+ int QueryTransformFeedbackStride(int bufferIndex)
+ {
+ return 0;
+ }
+
+ /// <summary>
+ /// Queries if host state forces early depth testing.
+ /// </summary>
+ /// <returns>True if early depth testing is forced</returns>
+ bool QueryEarlyZForce()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Queries if host state disables the viewport transform.
+ /// </summary>
+ /// <returns>True if the viewport transform is disabled</returns>
+ bool QueryViewportTransformDisable()
+ {
+ return false;
+ }
+
+ /// <summary>
+ /// Registers a texture used by the shader.
+ /// </summary>
+ /// <param name="handle">Texture handle word offset</param>
+ /// <param name="cbufSlot">Constant buffer slot where the texture handle is located</param>
+ void RegisterTexture(int handle, int cbufSlot)
+ {
+ // Only useful when recording information for a disk shader cache.
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/InputTopology.cs b/src/Ryujinx.Graphics.Shader/InputTopology.cs
new file mode 100644
index 00000000..da332909
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/InputTopology.cs
@@ -0,0 +1,40 @@
+namespace Ryujinx.Graphics.Shader
+{
+ public enum InputTopology : byte
+ {
+ Points,
+ Lines,
+ LinesAdjacency,
+ Triangles,
+ TrianglesAdjacency
+ }
+
+ static class InputTopologyExtensions
+ {
+ public static string ToGlslString(this InputTopology topology)
+ {
+ return topology switch
+ {
+ InputTopology.Points => "points",
+ InputTopology.Lines => "lines",
+ InputTopology.LinesAdjacency => "lines_adjacency",
+ InputTopology.Triangles => "triangles",
+ InputTopology.TrianglesAdjacency => "triangles_adjacency",
+ _ => "points"
+ };
+ }
+
+ public static int ToInputVertices(this InputTopology topology)
+ {
+ return topology switch
+ {
+ InputTopology.Points => 1,
+ InputTopology.Lines or
+ InputTopology.LinesAdjacency => 2,
+ InputTopology.Triangles or
+ InputTopology.TrianglesAdjacency => 3,
+ _ => 1
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs b/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs
new file mode 100644
index 00000000..562fb8d5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs
@@ -0,0 +1,351 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static class AttributeMap
+ {
+ private enum StagesMask : byte
+ {
+ None = 0,
+ Compute = 1 << (int)ShaderStage.Compute,
+ Vertex = 1 << (int)ShaderStage.Vertex,
+ TessellationControl = 1 << (int)ShaderStage.TessellationControl,
+ TessellationEvaluation = 1 << (int)ShaderStage.TessellationEvaluation,
+ Geometry = 1 << (int)ShaderStage.Geometry,
+ Fragment = 1 << (int)ShaderStage.Fragment,
+
+ Tessellation = TessellationControl | TessellationEvaluation,
+ VertexTessellationGeometry = Vertex | Tessellation | Geometry,
+ TessellationGeometryFragment = Tessellation | Geometry | Fragment,
+ AllGraphics = Vertex | Tessellation | Geometry | Fragment
+ }
+
+ private struct AttributeEntry
+ {
+ public int BaseOffset { get; }
+ public AggregateType Type { get; }
+ public IoVariable IoVariable { get; }
+ public StagesMask InputMask { get; }
+ public StagesMask OutputMask { get; }
+
+ public AttributeEntry(
+ int baseOffset,
+ AggregateType type,
+ IoVariable ioVariable,
+ StagesMask inputMask,
+ StagesMask outputMask)
+ {
+ BaseOffset = baseOffset;
+ Type = type;
+ IoVariable = ioVariable;
+ InputMask = inputMask;
+ OutputMask = outputMask;
+ }
+ }
+
+ private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributes;
+ private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributesPerPatch;
+
+ static AttributeMap()
+ {
+ _attributes = CreateMap();
+ _attributesPerPatch = CreatePerPatchMap();
+ }
+
+ private static IReadOnlyDictionary<int, AttributeEntry> CreateMap()
+ {
+ var map = new Dictionary<int, AttributeEntry>();
+
+ Add(map, 0x060, AggregateType.S32, IoVariable.PrimitiveId, StagesMask.TessellationGeometryFragment, StagesMask.Geometry);
+ Add(map, 0x064, AggregateType.S32, IoVariable.Layer, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x068, AggregateType.S32, IoVariable.ViewportIndex, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x06c, AggregateType.FP32, IoVariable.PointSize, StagesMask.None, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x070, AggregateType.Vector4 | AggregateType.FP32, IoVariable.Position, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x080, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.AllGraphics, StagesMask.VertexTessellationGeometry, 32);
+ Add(map, 0x280, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x290, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x2a0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x2b0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x2c0, AggregateType.Array | AggregateType.FP32, IoVariable.ClipDistance, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry, 8);
+ Add(map, 0x2e0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.PointCoord, StagesMask.Fragment, StagesMask.None);
+ Add(map, 0x2e8, AggregateType.FP32, IoVariable.FogCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x2f0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationCoord, StagesMask.TessellationEvaluation, StagesMask.None);
+ Add(map, 0x2f8, AggregateType.S32, IoVariable.InstanceId, StagesMask.Vertex, StagesMask.None);
+ Add(map, 0x2fc, AggregateType.S32, IoVariable.VertexId, StagesMask.Vertex, StagesMask.None);
+ Add(map, 0x300, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TextureCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x3a0, AggregateType.Array | AggregateType.S32, IoVariable.ViewportMask, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x3fc, AggregateType.Bool, IoVariable.FrontFacing, StagesMask.Fragment, StagesMask.None);
+
+ return map;
+ }
+
+ private static IReadOnlyDictionary<int, AttributeEntry> CreatePerPatchMap()
+ {
+ var map = new Dictionary<int, AttributeEntry>();
+
+ Add(map, 0x000, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TessellationLevelOuter, StagesMask.TessellationEvaluation, StagesMask.TessellationControl);
+ Add(map, 0x010, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationLevelInner, StagesMask.TessellationEvaluation, StagesMask.TessellationControl);
+ Add(map, 0x018, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.TessellationEvaluation, StagesMask.TessellationControl, 31, 0x200);
+
+ return map;
+ }
+
+ private static void Add(
+ Dictionary<int, AttributeEntry> attributes,
+ int offset,
+ AggregateType type,
+ IoVariable ioVariable,
+ StagesMask inputMask,
+ StagesMask outputMask,
+ int count = 1,
+ int upperBound = 0x400)
+ {
+ int baseOffset = offset;
+
+ int elementsCount = GetElementCount(type);
+
+ for (int index = 0; index < count; index++)
+ {
+ for (int elementIndex = 0; elementIndex < elementsCount; elementIndex++)
+ {
+ attributes.Add(offset, new AttributeEntry(baseOffset, type, ioVariable, inputMask, outputMask));
+
+ offset += 4;
+
+ if (offset >= upperBound)
+ {
+ return;
+ }
+ }
+ }
+ }
+
+ public static Operand GenerateAttributeLoad(EmitterContext context, Operand primVertex, int offset, bool isOutput, bool isPerPatch)
+ {
+ if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry))
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid.");
+ return Const(0);
+ }
+
+ StagesMask validUseMask = isOutput ? entry.OutputMask : entry.InputMask;
+
+ if (((StagesMask)(1 << (int)context.Config.Stage) & validUseMask) == StagesMask.None)
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}.");
+ return Const(0);
+ }
+
+ if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable))
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}.");
+ return Const(0);
+ }
+
+ if (HasInvocationId(context.Config.Stage, isOutput) && !isPerPatch)
+ {
+ primVertex = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ }
+
+ int innerOffset = offset - entry.BaseOffset;
+ int innerIndex = innerOffset / 4;
+
+ StorageKind storageKind = isPerPatch
+ ? (isOutput ? StorageKind.OutputPerPatch : StorageKind.InputPerPatch)
+ : (isOutput ? StorageKind.Output : StorageKind.Input);
+ IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry);
+ AggregateType type = GetType(context.Config, isOutput, innerIndex, in entry);
+ int elementCount = GetElementCount(type);
+
+ bool isArray = type.HasFlag(AggregateType.Array);
+ bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput);
+
+ bool hasElementIndex = elementCount > 1;
+
+ if (hasArrayIndex && hasElementIndex)
+ {
+ int arrayIndex = innerIndex / elementCount;
+ int elementIndex = innerIndex - (arrayIndex * elementCount);
+
+ return primVertex == null || isArray
+ ? context.Load(storageKind, ioVariable, primVertex, Const(arrayIndex), Const(elementIndex))
+ : context.Load(storageKind, ioVariable, Const(arrayIndex), primVertex, Const(elementIndex));
+ }
+ else if (hasArrayIndex || hasElementIndex)
+ {
+ return primVertex == null || isArray || !hasArrayIndex
+ ? context.Load(storageKind, ioVariable, primVertex, Const(innerIndex))
+ : context.Load(storageKind, ioVariable, Const(innerIndex), primVertex);
+ }
+ else
+ {
+ return context.Load(storageKind, ioVariable, primVertex);
+ }
+ }
+
+ public static void GenerateAttributeStore(EmitterContext context, int offset, bool isPerPatch, Operand value)
+ {
+ if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry))
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid.");
+ return;
+ }
+
+ if (((StagesMask)(1 << (int)context.Config.Stage) & entry.OutputMask) == StagesMask.None)
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}.");
+ return;
+ }
+
+ if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable))
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}.");
+ return;
+ }
+
+ Operand invocationId = null;
+
+ if (HasInvocationId(context.Config.Stage, isOutput: true) && !isPerPatch)
+ {
+ invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ }
+
+ int innerOffset = offset - entry.BaseOffset;
+ int innerIndex = innerOffset / 4;
+
+ StorageKind storageKind = isPerPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
+ IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry);
+ AggregateType type = GetType(context.Config, isOutput: true, innerIndex, in entry);
+ int elementCount = GetElementCount(type);
+
+ bool isArray = type.HasFlag(AggregateType.Array);
+ bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput: true);
+
+ bool hasElementIndex = elementCount > 1;
+
+ if (hasArrayIndex && hasElementIndex)
+ {
+ int arrayIndex = innerIndex / elementCount;
+ int elementIndex = innerIndex - (arrayIndex * elementCount);
+
+ if (invocationId == null || isArray)
+ {
+ context.Store(storageKind, ioVariable, invocationId, Const(arrayIndex), Const(elementIndex), value);
+ }
+ else
+ {
+ context.Store(storageKind, ioVariable, Const(arrayIndex), invocationId, Const(elementIndex), value);
+ }
+ }
+ else if (hasArrayIndex || hasElementIndex)
+ {
+ if (invocationId == null || isArray || !hasArrayIndex)
+ {
+ context.Store(storageKind, ioVariable, invocationId, Const(innerIndex), value);
+ }
+ else
+ {
+ context.Store(storageKind, ioVariable, Const(innerIndex), invocationId, value);
+ }
+ }
+ else
+ {
+ context.Store(storageKind, ioVariable, invocationId, value);
+ }
+ }
+
+ private static bool IsSupportedByHost(IGpuAccessor gpuAccessor, ShaderStage stage, IoVariable ioVariable)
+ {
+ if (ioVariable == IoVariable.ViewportIndex && stage != ShaderStage.Geometry && stage != ShaderStage.Fragment)
+ {
+ return gpuAccessor.QueryHostSupportsViewportIndexVertexTessellation();
+ }
+ else if (ioVariable == IoVariable.ViewportMask)
+ {
+ return gpuAccessor.QueryHostSupportsViewportMask();
+ }
+
+ return true;
+ }
+
+ public static IoVariable GetIoVariable(ShaderConfig config, int offset, out int location)
+ {
+ location = 0;
+
+ if (!_attributes.TryGetValue(offset, out AttributeEntry entry))
+ {
+ return IoVariable.Invalid;
+ }
+
+ if (((StagesMask)(1 << (int)config.Stage) & entry.OutputMask) == StagesMask.None)
+ {
+ return IoVariable.Invalid;
+ }
+
+ if (config.HasPerLocationInputOrOutput(entry.IoVariable, isOutput: true))
+ {
+ location = (offset - entry.BaseOffset) / 16;
+ }
+
+ return GetIoVariable(config.Stage, in entry);
+ }
+
+ private static IoVariable GetIoVariable(ShaderStage stage, in AttributeEntry entry)
+ {
+ if (entry.IoVariable == IoVariable.Position && stage == ShaderStage.Fragment)
+ {
+ return IoVariable.FragmentCoord;
+ }
+
+ return entry.IoVariable;
+ }
+
+ private static AggregateType GetType(ShaderConfig config, bool isOutput, int innerIndex, in AttributeEntry entry)
+ {
+ AggregateType type = entry.Type;
+
+ if (entry.IoVariable == IoVariable.UserDefined)
+ {
+ type = config.GetUserDefinedType(innerIndex / 4, isOutput);
+ }
+ else if (entry.IoVariable == IoVariable.FragmentOutputColor)
+ {
+ type = config.GetFragmentOutputColorType(innerIndex / 4);
+ }
+
+ return type;
+ }
+
+ public static bool HasPrimitiveVertex(ShaderStage stage, bool isOutput)
+ {
+ if (isOutput)
+ {
+ return false;
+ }
+
+ return stage == ShaderStage.TessellationControl ||
+ stage == ShaderStage.TessellationEvaluation ||
+ stage == ShaderStage.Geometry;
+ }
+
+ public static bool HasInvocationId(ShaderStage stage, bool isOutput)
+ {
+ return isOutput && stage == ShaderStage.TessellationControl;
+ }
+
+ private static int GetElementCount(AggregateType type)
+ {
+ return (type & AggregateType.ElementCountMask) switch
+ {
+ AggregateType.Vector2 => 2,
+ AggregateType.Vector3 => 3,
+ AggregateType.Vector4 => 4,
+ _ => 1
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs
new file mode 100644
index 00000000..3a9e658a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs
@@ -0,0 +1,379 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void AtomCas(EmitterContext context)
+ {
+ InstAtomCas op = context.GetOp<InstAtomCas>();
+
+ context.Config.GpuAccessor.Log("Shader instruction AtomCas is not implemented.");
+ }
+
+ public static void AtomsCas(EmitterContext context)
+ {
+ InstAtomsCas op = context.GetOp<InstAtomsCas>();
+
+ context.Config.GpuAccessor.Log("Shader instruction AtomsCas is not implemented.");
+ }
+
+ public static void B2r(EmitterContext context)
+ {
+ InstB2r op = context.GetOp<InstB2r>();
+
+ context.Config.GpuAccessor.Log("Shader instruction B2r is not implemented.");
+ }
+
+ public static void Bpt(EmitterContext context)
+ {
+ InstBpt op = context.GetOp<InstBpt>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Bpt is not implemented.");
+ }
+
+ public static void Cctl(EmitterContext context)
+ {
+ InstCctl op = context.GetOp<InstCctl>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Cctl is not implemented.");
+ }
+
+ public static void Cctll(EmitterContext context)
+ {
+ InstCctll op = context.GetOp<InstCctll>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Cctll is not implemented.");
+ }
+
+ public static void Cctlt(EmitterContext context)
+ {
+ InstCctlt op = context.GetOp<InstCctlt>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented.");
+ }
+
+ public static void Cs2r(EmitterContext context)
+ {
+ InstCs2r op = context.GetOp<InstCs2r>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Cs2r is not implemented.");
+ }
+
+ public static void FchkR(EmitterContext context)
+ {
+ InstFchkR op = context.GetOp<InstFchkR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction FchkR is not implemented.");
+ }
+
+ public static void FchkI(EmitterContext context)
+ {
+ InstFchkI op = context.GetOp<InstFchkI>();
+
+ context.Config.GpuAccessor.Log("Shader instruction FchkI is not implemented.");
+ }
+
+ public static void FchkC(EmitterContext context)
+ {
+ InstFchkC op = context.GetOp<InstFchkC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction FchkC is not implemented.");
+ }
+
+ public static void Getcrsptr(EmitterContext context)
+ {
+ InstGetcrsptr op = context.GetOp<InstGetcrsptr>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Getcrsptr is not implemented.");
+ }
+
+ public static void Getlmembase(EmitterContext context)
+ {
+ InstGetlmembase op = context.GetOp<InstGetlmembase>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Getlmembase is not implemented.");
+ }
+
+ public static void Ide(EmitterContext context)
+ {
+ InstIde op = context.GetOp<InstIde>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Ide is not implemented.");
+ }
+
+ public static void IdpR(EmitterContext context)
+ {
+ InstIdpR op = context.GetOp<InstIdpR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction IdpR is not implemented.");
+ }
+
+ public static void IdpC(EmitterContext context)
+ {
+ InstIdpC op = context.GetOp<InstIdpC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction IdpC is not implemented.");
+ }
+
+ public static void ImadspR(EmitterContext context)
+ {
+ InstImadspR op = context.GetOp<InstImadspR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction ImadspR is not implemented.");
+ }
+
+ public static void ImadspI(EmitterContext context)
+ {
+ InstImadspI op = context.GetOp<InstImadspI>();
+
+ context.Config.GpuAccessor.Log("Shader instruction ImadspI is not implemented.");
+ }
+
+ public static void ImadspC(EmitterContext context)
+ {
+ InstImadspC op = context.GetOp<InstImadspC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction ImadspC is not implemented.");
+ }
+
+ public static void ImadspRc(EmitterContext context)
+ {
+ InstImadspRc op = context.GetOp<InstImadspRc>();
+
+ context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented.");
+ }
+
+ public static void Jcal(EmitterContext context)
+ {
+ InstJcal op = context.GetOp<InstJcal>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Jcal is not implemented.");
+ }
+
+ public static void Jmp(EmitterContext context)
+ {
+ InstJmp op = context.GetOp<InstJmp>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Jmp is not implemented.");
+ }
+
+ public static void Jmx(EmitterContext context)
+ {
+ InstJmx op = context.GetOp<InstJmx>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Jmx is not implemented.");
+ }
+
+ public static void Ld(EmitterContext context)
+ {
+ InstLd op = context.GetOp<InstLd>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Ld is not implemented.");
+ }
+
+ public static void Lepc(EmitterContext context)
+ {
+ InstLepc op = context.GetOp<InstLepc>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Lepc is not implemented.");
+ }
+
+ public static void Longjmp(EmitterContext context)
+ {
+ InstLongjmp op = context.GetOp<InstLongjmp>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Longjmp is not implemented.");
+ }
+
+ public static void P2rR(EmitterContext context)
+ {
+ InstP2rR op = context.GetOp<InstP2rR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction P2rR is not implemented.");
+ }
+
+ public static void P2rI(EmitterContext context)
+ {
+ InstP2rI op = context.GetOp<InstP2rI>();
+
+ context.Config.GpuAccessor.Log("Shader instruction P2rI is not implemented.");
+ }
+
+ public static void P2rC(EmitterContext context)
+ {
+ InstP2rC op = context.GetOp<InstP2rC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented.");
+ }
+
+ public static void Pexit(EmitterContext context)
+ {
+ InstPexit op = context.GetOp<InstPexit>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Pexit is not implemented.");
+ }
+
+ public static void Pixld(EmitterContext context)
+ {
+ InstPixld op = context.GetOp<InstPixld>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Pixld is not implemented.");
+ }
+
+ public static void Plongjmp(EmitterContext context)
+ {
+ InstPlongjmp op = context.GetOp<InstPlongjmp>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Plongjmp is not implemented.");
+ }
+
+ public static void Pret(EmitterContext context)
+ {
+ InstPret op = context.GetOp<InstPret>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Pret is not implemented.");
+ }
+
+ public static void PrmtR(EmitterContext context)
+ {
+ InstPrmtR op = context.GetOp<InstPrmtR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction PrmtR is not implemented.");
+ }
+
+ public static void PrmtI(EmitterContext context)
+ {
+ InstPrmtI op = context.GetOp<InstPrmtI>();
+
+ context.Config.GpuAccessor.Log("Shader instruction PrmtI is not implemented.");
+ }
+
+ public static void PrmtC(EmitterContext context)
+ {
+ InstPrmtC op = context.GetOp<InstPrmtC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction PrmtC is not implemented.");
+ }
+
+ public static void PrmtRc(EmitterContext context)
+ {
+ InstPrmtRc op = context.GetOp<InstPrmtRc>();
+
+ context.Config.GpuAccessor.Log("Shader instruction PrmtRc is not implemented.");
+ }
+
+ public static void R2b(EmitterContext context)
+ {
+ InstR2b op = context.GetOp<InstR2b>();
+
+ context.Config.GpuAccessor.Log("Shader instruction R2b is not implemented.");
+ }
+
+ public static void Ram(EmitterContext context)
+ {
+ InstRam op = context.GetOp<InstRam>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Ram is not implemented.");
+ }
+
+ public static void Rtt(EmitterContext context)
+ {
+ InstRtt op = context.GetOp<InstRtt>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Rtt is not implemented.");
+ }
+
+ public static void Sam(EmitterContext context)
+ {
+ InstSam op = context.GetOp<InstSam>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Sam is not implemented.");
+ }
+
+ public static void Setcrsptr(EmitterContext context)
+ {
+ InstSetcrsptr op = context.GetOp<InstSetcrsptr>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Setcrsptr is not implemented.");
+ }
+
+ public static void Setlmembase(EmitterContext context)
+ {
+ InstSetlmembase op = context.GetOp<InstSetlmembase>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Setlmembase is not implemented.");
+ }
+
+ public static void St(EmitterContext context)
+ {
+ InstSt op = context.GetOp<InstSt>();
+
+ context.Config.GpuAccessor.Log("Shader instruction St is not implemented.");
+ }
+
+ public static void Stp(EmitterContext context)
+ {
+ InstStp op = context.GetOp<InstStp>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Stp is not implemented.");
+ }
+
+ public static void Txa(EmitterContext context)
+ {
+ InstTxa op = context.GetOp<InstTxa>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Txa is not implemented.");
+ }
+
+ public static void Vabsdiff(EmitterContext context)
+ {
+ InstVabsdiff op = context.GetOp<InstVabsdiff>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vabsdiff is not implemented.");
+ }
+
+ public static void Vabsdiff4(EmitterContext context)
+ {
+ InstVabsdiff4 op = context.GetOp<InstVabsdiff4>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vabsdiff4 is not implemented.");
+ }
+
+ public static void Vadd(EmitterContext context)
+ {
+ InstVadd op = context.GetOp<InstVadd>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vadd is not implemented.");
+ }
+
+ public static void Votevtg(EmitterContext context)
+ {
+ InstVotevtg op = context.GetOp<InstVotevtg>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Votevtg is not implemented.");
+ }
+
+ public static void Vset(EmitterContext context)
+ {
+ InstVset op = context.GetOp<InstVset>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vset is not implemented.");
+ }
+
+ public static void Vshl(EmitterContext context)
+ {
+ InstVshl op = context.GetOp<InstVshl>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vshl is not implemented.");
+ }
+
+ public static void Vshr(EmitterContext context)
+ {
+ InstVshr op = context.GetOp<InstVshr>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vshr is not implemented.");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
new file mode 100644
index 00000000..879075ba
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
@@ -0,0 +1,160 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static class InstEmitAluHelper
+ {
+ public static long GetIntMin(IDstFmt type)
+ {
+ return type switch
+ {
+ IDstFmt.U16 => ushort.MinValue,
+ IDstFmt.S16 => short.MinValue,
+ IDstFmt.U32 => uint.MinValue,
+ IDstFmt.S32 => int.MinValue,
+ _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
+ };
+ }
+
+ public static long GetIntMax(IDstFmt type)
+ {
+ return type switch
+ {
+ IDstFmt.U16 => ushort.MaxValue,
+ IDstFmt.S16 => short.MaxValue,
+ IDstFmt.U32 => uint.MaxValue,
+ IDstFmt.S32 => int.MaxValue,
+ _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
+ };
+ }
+
+ public static long GetIntMin(ISrcDstFmt type)
+ {
+ return type switch
+ {
+ ISrcDstFmt.U8 => byte.MinValue,
+ ISrcDstFmt.S8 => sbyte.MinValue,
+ ISrcDstFmt.U16 => ushort.MinValue,
+ ISrcDstFmt.S16 => short.MinValue,
+ ISrcDstFmt.U32 => uint.MinValue,
+ ISrcDstFmt.S32 => int.MinValue,
+ _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
+ };
+ }
+
+ public static long GetIntMax(ISrcDstFmt type)
+ {
+ return type switch
+ {
+ ISrcDstFmt.U8 => byte.MaxValue,
+ ISrcDstFmt.S8 => sbyte.MaxValue,
+ ISrcDstFmt.U16 => ushort.MaxValue,
+ ISrcDstFmt.S16 => short.MaxValue,
+ ISrcDstFmt.U32 => uint.MaxValue,
+ ISrcDstFmt.S32 => int.MaxValue,
+ _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
+ };
+ }
+
+ public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, Operand input, Operand pred)
+ {
+ return logicOp switch
+ {
+ BoolOp.And => context.BitwiseAnd(input, pred),
+ BoolOp.Or => context.BitwiseOr(input, pred),
+ BoolOp.Xor => context.BitwiseExclusiveOr(input, pred),
+ _ => input
+ };
+ }
+
+ public static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
+ {
+ return type switch
+ {
+ VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
+ VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
+ VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
+ VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
+ VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
+ VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
+ VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
+ VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
+ VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
+ VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
+ VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
+ VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
+ _ => src
+ };
+ }
+
+ public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false)
+ {
+ if (!setCC)
+ {
+ return;
+ }
+
+ if (extended)
+ {
+ // When the operation is extended, it means we are doing
+ // the operation on a long word with any number of bits,
+ // so we need to AND the zero flag from result with the
+ // previous result when extended is specified, to ensure
+ // we have ZF set only if all words are zero, and not just
+ // the last one.
+ Operand oldZF = GetZF();
+
+ Operand res = context.BitwiseAnd(context.ICompareEqual(dest, Const(0)), oldZF);
+
+ context.Copy(GetZF(), res);
+ }
+ else
+ {
+ context.Copy(GetZF(), context.ICompareEqual(dest, Const(0)));
+ }
+
+ context.Copy(GetNF(), context.ICompareLess(dest, Const(0)));
+ }
+
+ public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC, Instruction fpType = Instruction.FP32)
+ {
+ if (setCC)
+ {
+ Operand zero = ConstF(0);
+
+ if (fpType == Instruction.FP64)
+ {
+ zero = context.FP32ConvertToFP64(zero);
+ }
+
+ context.Copy(GetZF(), context.FPCompareEqual(dest, zero, fpType));
+ context.Copy(GetNF(), context.FPCompareLess (dest, zero, fpType));
+ }
+ }
+
+ public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high)
+ {
+ low = context.BitwiseNot(low);
+ high = context.BitwiseNot(high);
+ low = AddWithCarry(context, low, Const(1), out Operand carryOut);
+ high = context.IAdd(high, carryOut);
+ return (low, high);
+ }
+
+ public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut)
+ {
+ Operand result = context.IAdd(lhs, rhs);
+
+ // C = Rd < Rn
+ carryOut = context.INegate(context.ICompareLessUnsigned(result, lhs));
+
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs
new file mode 100644
index 00000000..1df38761
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs
@@ -0,0 +1,383 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Al2p(EmitterContext context)
+ {
+ InstAl2p op = context.GetOp<InstAl2p>();
+
+ context.Copy(GetDest(op.Dest), context.IAdd(GetSrcReg(context, op.SrcA), Const(op.Imm11)));
+ }
+
+ public static void Ald(EmitterContext context)
+ {
+ InstAld op = context.GetOp<InstAld>();
+
+ // Some of those attributes are per invocation,
+ // so we should ignore any primitive vertex indexing for those.
+ bool hasPrimitiveVertex = AttributeMap.HasPrimitiveVertex(context.Config.Stage, op.O) && !op.P;
+
+ if (!op.Phys)
+ {
+ hasPrimitiveVertex &= HasPrimitiveVertex(op.Imm11);
+ }
+
+ Operand primVertex = hasPrimitiveVertex ? context.Copy(GetSrcReg(context, op.SrcB)) : null;
+
+ for (int index = 0; index < (int)op.AlSize + 1; index++)
+ {
+ Register rd = new Register(op.Dest + index, RegisterType.Gpr);
+
+ if (rd.IsRZ)
+ {
+ break;
+ }
+
+ if (op.Phys)
+ {
+ Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
+ Operand vecIndex = context.ShiftRightU32(offset, Const(4));
+ Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
+
+ StorageKind storageKind = op.O ? StorageKind.Output : StorageKind.Input;
+
+ context.Copy(Register(rd), context.Load(storageKind, IoVariable.UserDefined, primVertex, vecIndex, elemIndex));
+ }
+ else if (op.SrcB == RegisterConsts.RegisterZeroIndex || op.P)
+ {
+ int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O);
+
+ context.FlagAttributeRead(offset);
+
+ bool isOutput = op.O && CanLoadOutput(offset);
+
+ if (!op.P && !isOutput && TryConvertIdToIndexForVulkan(context, offset, out Operand value))
+ {
+ context.Copy(Register(rd), value);
+ }
+ else
+ {
+ context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, op.P));
+ }
+ }
+ else
+ {
+ int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O);
+
+ context.FlagAttributeRead(offset);
+
+ bool isOutput = op.O && CanLoadOutput(offset);
+
+ context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, false));
+ }
+ }
+ }
+
+ public static void Ast(EmitterContext context)
+ {
+ InstAst op = context.GetOp<InstAst>();
+
+ for (int index = 0; index < (int)op.AlSize + 1; index++)
+ {
+ if (op.SrcB + index > RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ Register rd = new Register(op.SrcB + index, RegisterType.Gpr);
+
+ if (op.Phys)
+ {
+ Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
+ Operand vecIndex = context.ShiftRightU32(offset, Const(4));
+ Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
+ Operand invocationId = AttributeMap.HasInvocationId(context.Config.Stage, isOutput: true)
+ ? context.Load(StorageKind.Input, IoVariable.InvocationId)
+ : null;
+
+ context.Store(StorageKind.Output, IoVariable.UserDefined, invocationId, vecIndex, elemIndex, Register(rd));
+ }
+ else
+ {
+ // TODO: Support indirect stores using Ra.
+
+ int offset = op.Imm11 + index * 4;
+
+ if (!context.Config.IsUsedOutputAttribute(offset))
+ {
+ return;
+ }
+
+ offset = FixedFuncToUserAttribute(context.Config, offset, isOutput: true);
+
+ context.FlagAttributeWritten(offset);
+
+ AttributeMap.GenerateAttributeStore(context, offset, op.P, Register(rd));
+ }
+ }
+ }
+
+ public static void Ipa(EmitterContext context)
+ {
+ InstIpa op = context.GetOp<InstIpa>();
+
+ context.FlagAttributeRead(op.Imm10);
+
+ Operand res;
+
+ bool isFixedFunc = false;
+
+ if (op.Idx)
+ {
+ Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
+ Operand vecIndex = context.ShiftRightU32(offset, Const(4));
+ Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
+
+ res = context.Load(StorageKind.Input, IoVariable.UserDefined, null, vecIndex, elemIndex);
+ res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3)));
+ }
+ else
+ {
+ isFixedFunc = TryFixedFuncToUserAttributeIpa(context, op.Imm10, out res);
+
+ if (op.Imm10 >= AttributeConsts.UserAttributeBase && op.Imm10 < AttributeConsts.UserAttributeEnd)
+ {
+ int index = (op.Imm10 - AttributeConsts.UserAttributeBase) >> 4;
+
+ if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
+ {
+ res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3)));
+ }
+ }
+ else if (op.Imm10 == AttributeConsts.PositionX || op.Imm10 == AttributeConsts.PositionY)
+ {
+ // FragCoord X/Y must be divided by the render target scale, if resolution scaling is active,
+ // because the shader code is not expecting scaled values.
+ res = context.FPDivide(res, context.Load(StorageKind.Input, IoVariable.SupportBlockRenderScale, null, Const(0)));
+ }
+ else if (op.Imm10 == AttributeConsts.FrontFacing && context.Config.GpuAccessor.QueryHostHasFrontFacingBug())
+ {
+ // gl_FrontFacing sometimes has incorrect (flipped) values depending how it is accessed on Intel GPUs.
+ // This weird trick makes it behave.
+ res = context.ICompareLess(context.INegate(context.IConvertS32ToFP32(res)), Const(0));
+ }
+ }
+
+ if (op.IpaOp == IpaOp.Multiply && !isFixedFunc)
+ {
+ Operand srcB = GetSrcReg(context, op.SrcB);
+
+ res = context.FPMultiply(res, srcB);
+ }
+
+ res = context.FPSaturate(res, op.Sat);
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Isberd(EmitterContext context)
+ {
+ InstIsberd op = context.GetOp<InstIsberd>();
+
+ // This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory.
+ // Here, we just propagate the offset, as the result from this instruction is usually
+ // used with ALD to perform vertex load on geometry or tessellation shaders.
+ // The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex.
+ // Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex.
+ context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
+ }
+
+ public static void OutR(EmitterContext context)
+ {
+ InstOutR op = context.GetOp<InstOutR>();
+
+ EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
+ }
+
+ public static void OutI(EmitterContext context)
+ {
+ InstOutI op = context.GetOp<InstOutI>();
+
+ EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
+ }
+
+ public static void OutC(EmitterContext context)
+ {
+ InstOutC op = context.GetOp<InstOutC>();
+
+ EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
+ }
+
+ private static void EmitOut(EmitterContext context, bool emit, bool cut)
+ {
+ if (!(emit || cut))
+ {
+ context.Config.GpuAccessor.Log("Invalid OUT encoding.");
+ }
+
+ if (emit)
+ {
+ if (context.Config.LastInVertexPipeline)
+ {
+ context.PrepareForVertexReturn(out var tempXLocal, out var tempYLocal, out var tempZLocal);
+
+ context.EmitVertex();
+
+ // Restore output position value before transformation.
+
+ if (tempXLocal != null)
+ {
+ context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(0)), tempXLocal);
+ }
+
+ if (tempYLocal != null)
+ {
+ context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(1)), tempYLocal);
+ }
+
+ if (tempZLocal != null)
+ {
+ context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(2)), tempZLocal);
+ }
+ }
+ else
+ {
+ context.EmitVertex();
+ }
+ }
+
+ if (cut)
+ {
+ context.EndPrimitive();
+ }
+ }
+
+ private static bool HasPrimitiveVertex(int attr)
+ {
+ return attr != AttributeConsts.PrimitiveId &&
+ attr != AttributeConsts.TessCoordX &&
+ attr != AttributeConsts.TessCoordY;
+ }
+
+ private static bool CanLoadOutput(int attr)
+ {
+ return attr != AttributeConsts.TessCoordX && attr != AttributeConsts.TessCoordY;
+ }
+
+ private static bool TryFixedFuncToUserAttributeIpa(EmitterContext context, int attr, out Operand selectedAttr)
+ {
+ if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.BackColorDiffuseR)
+ {
+ // TODO: If two sided rendering is enabled, then this should return
+ // FrontColor if the fragment is front facing, and back color otherwise.
+ selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
+ return true;
+ }
+ else if (attr == AttributeConsts.FogCoord)
+ {
+ // TODO: We likely need to emulate the fixed-function functionality for FogCoord here.
+ selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
+ return true;
+ }
+ else if (attr >= AttributeConsts.BackColorDiffuseR && attr < AttributeConsts.ClipDistance0)
+ {
+ selectedAttr = ConstF(((attr >> 2) & 3) == 3 ? 1f : 0f);
+ return true;
+ }
+ else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)
+ {
+ selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
+ return true;
+ }
+
+ selectedAttr = GenerateIpaLoad(context, attr);
+ return false;
+ }
+
+ private static Operand GenerateIpaLoad(EmitterContext context, int offset)
+ {
+ return AttributeMap.GenerateAttributeLoad(context, null, offset, isOutput: false, isPerPatch: false);
+ }
+
+ private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, bool isOutput)
+ {
+ bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
+ int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
+
+ if (attr == AttributeConsts.Layer && config.Stage != ShaderStage.Geometry && !supportsLayerFromVertexOrTess)
+ {
+ attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.Layer, 0, isOutput);
+ config.SetLayerOutputAttribute(attr);
+ }
+ else if (attr == AttributeConsts.FogCoord)
+ {
+ attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FogCoord, fixedStartAttr, isOutput);
+ }
+ else if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0)
+ {
+ attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FrontColorDiffuseR, fixedStartAttr + 1, isOutput);
+ }
+ else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)
+ {
+ attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.TexCoordBase, fixedStartAttr + 5, isOutput);
+ }
+
+ return attr;
+ }
+
+ private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, int baseAttr, int baseIndex, bool isOutput)
+ {
+ int index = (attr - baseAttr) >> 4;
+ int userAttrIndex = config.GetFreeUserAttribute(isOutput, baseIndex + index);
+
+ if ((uint)userAttrIndex < Constants.MaxAttributes)
+ {
+ attr = AttributeConsts.UserAttributeBase + userAttrIndex * 16 + (attr & 0xf);
+
+ if (isOutput)
+ {
+ config.SetOutputUserAttributeFixedFunc(userAttrIndex);
+ }
+ else
+ {
+ config.SetInputUserAttributeFixedFunc(userAttrIndex);
+ }
+ }
+ else
+ {
+ config.GpuAccessor.Log($"No enough user attributes for fixed attribute offset 0x{attr:X}.");
+ }
+
+ return attr;
+ }
+
+ private static bool TryConvertIdToIndexForVulkan(EmitterContext context, int attr, out Operand value)
+ {
+ if (context.Config.Options.TargetApi == TargetApi.Vulkan)
+ {
+ if (attr == AttributeConsts.InstanceId)
+ {
+ value = context.ISubtract(
+ context.Load(StorageKind.Input, IoVariable.InstanceIndex),
+ context.Load(StorageKind.Input, IoVariable.BaseInstance));
+ return true;
+ }
+ else if (attr == AttributeConsts.VertexId)
+ {
+ value = context.Load(StorageKind.Input, IoVariable.VertexIndex);
+ return true;
+ }
+ }
+
+ value = null;
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs
new file mode 100644
index 00000000..f3114c6e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs
@@ -0,0 +1,44 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bar(EmitterContext context)
+ {
+ InstBar op = context.GetOp<InstBar>();
+
+ // TODO: Support other modes.
+ if (op.BarOp == BarOp.Sync)
+ {
+ context.Barrier();
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.BarOp}.");
+ }
+ }
+
+ public static void Depbar(EmitterContext context)
+ {
+ InstDepbar op = context.GetOp<InstDepbar>();
+
+ // No operation.
+ }
+
+ public static void Membar(EmitterContext context)
+ {
+ InstMembar op = context.GetOp<InstMembar>();
+
+ if (op.Membar == Decoders.Membar.Cta)
+ {
+ context.GroupMemoryBarrier();
+ }
+ else
+ {
+ context.MemoryBarrier();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs
new file mode 100644
index 00000000..71925269
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs
@@ -0,0 +1,194 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void BfeR(EmitterContext context)
+ {
+ InstBfeR op = context.GetOp<InstBfeR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
+ }
+
+ public static void BfeI(EmitterContext context)
+ {
+ InstBfeI op = context.GetOp<InstBfeI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
+ }
+
+ public static void BfeC(EmitterContext context)
+ {
+ InstBfeC op = context.GetOp<InstBfeC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
+ }
+
+ public static void BfiR(EmitterContext context)
+ {
+ InstBfiR op = context.GetOp<InstBfiR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitBfi(context, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void BfiI(EmitterContext context)
+ {
+ InstBfiI op = context.GetOp<InstBfiI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitBfi(context, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void BfiC(EmitterContext context)
+ {
+ InstBfiC op = context.GetOp<InstBfiC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitBfi(context, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void BfiRc(EmitterContext context)
+ {
+ InstBfiRc op = context.GetOp<InstBfiRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitBfi(context, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FloR(EmitterContext context)
+ {
+ InstFloR op = context.GetOp<InstFloR>();
+
+ EmitFlo(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB, op.Sh, op.Signed);
+ }
+
+ public static void FloI(EmitterContext context)
+ {
+ InstFloI op = context.GetOp<InstFloI>();
+
+ EmitFlo(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB, op.Sh, op.Signed);
+ }
+
+ public static void FloC(EmitterContext context)
+ {
+ InstFloC op = context.GetOp<InstFloC>();
+
+ EmitFlo(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB, op.Sh, op.Signed);
+ }
+
+ public static void PopcR(EmitterContext context)
+ {
+ InstPopcR op = context.GetOp<InstPopcR>();
+
+ EmitPopc(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB);
+ }
+
+ public static void PopcI(EmitterContext context)
+ {
+ InstPopcI op = context.GetOp<InstPopcI>();
+
+ EmitPopc(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB);
+ }
+
+ public static void PopcC(EmitterContext context)
+ {
+ InstPopcC op = context.GetOp<InstPopcC>();
+
+ EmitPopc(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB);
+ }
+
+ private static void EmitBfe(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ bool bitReverse,
+ bool isSigned)
+ {
+ if (bitReverse)
+ {
+ srcA = context.BitfieldReverse(srcA);
+ }
+
+ Operand position = context.BitwiseAnd(srcB, Const(0xff));
+
+ Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
+
+ Operand res = isSigned
+ ? context.BitfieldExtractS32(srcA, position, size)
+ : context.BitfieldExtractU32(srcA, position, size);
+
+ context.Copy(GetDest(rd), res);
+
+ // TODO: CC, X, corner cases.
+ }
+
+ private static void EmitBfi(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int rd)
+ {
+ Operand position = context.BitwiseAnd(srcB, Const(0xff));
+
+ Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
+
+ Operand res = context.BitfieldInsert(srcC, srcA, position, size);
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitFlo(EmitterContext context, Operand src, int rd, bool invert, bool sh, bool isSigned)
+ {
+ Operand srcB = context.BitwiseNot(src, invert);
+
+ Operand res;
+
+ if (sh)
+ {
+ res = context.FindLSB(context.BitfieldReverse(srcB));
+ }
+ else
+ {
+ res = isSigned
+ ? context.FindMSBS32(srcB)
+ : context.FindMSBU32(srcB);
+ }
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitPopc(EmitterContext context, Operand src, int rd, bool invert)
+ {
+ Operand srcB = context.BitwiseNot(src, invert);
+
+ Operand res = context.BitCount(srcB);
+
+ context.Copy(GetDest(rd), res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs
new file mode 100644
index 00000000..74ac7602
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs
@@ -0,0 +1,87 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Cset(EmitterContext context)
+ {
+ InstCset op = context.GetOp<InstCset>();
+
+ Operand res = GetCondition(context, op.Ccc);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ res = GetPredLogicalOp(context, op.Bop, res, srcPred);
+
+ Operand dest = GetDest(op.Dest);
+
+ if (op.BVal)
+ {
+ context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
+ }
+ else
+ {
+ context.Copy(dest, res);
+ }
+
+ // TODO: CC.
+ }
+
+ public static void Csetp(EmitterContext context)
+ {
+ InstCsetp op = context.GetOp<InstCsetp>();
+
+ Operand p0Res = GetCondition(context, op.Ccc);
+ Operand p1Res = context.BitwiseNot(p0Res);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ p0Res = GetPredLogicalOp(context, op.Bop, p0Res, srcPred);
+ p1Res = GetPredLogicalOp(context, op.Bop, p1Res, srcPred);
+
+ context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
+
+ // TODO: CC.
+ }
+
+ private static Operand GetCondition(EmitterContext context, Ccc cond, int defaultCond = IrConsts.True)
+ {
+ return cond switch
+ {
+ Ccc.F => Const(IrConsts.False),
+ Ccc.Lt => context.BitwiseExclusiveOr(context.BitwiseAnd(GetNF(), context.BitwiseNot(GetZF())), GetVF()),
+ Ccc.Eq => context.BitwiseAnd(context.BitwiseNot(GetNF()), GetZF()),
+ Ccc.Le => context.BitwiseExclusiveOr(GetNF(), context.BitwiseOr(GetZF(), GetVF())),
+ Ccc.Gt => context.BitwiseNot(context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF())),
+ Ccc.Ne => context.BitwiseNot(GetZF()),
+ Ccc.Ge => context.BitwiseNot(context.BitwiseExclusiveOr(GetNF(), GetVF())),
+ Ccc.Num => context.BitwiseNot(context.BitwiseAnd(GetNF(), GetZF())),
+ Ccc.Nan => context.BitwiseAnd(GetNF(), GetZF()),
+ Ccc.Ltu => context.BitwiseExclusiveOr(GetNF(), GetVF()),
+ Ccc.Equ => GetZF(),
+ Ccc.Leu => context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF()),
+ Ccc.Gtu => context.BitwiseExclusiveOr(context.BitwiseNot(GetNF()), context.BitwiseOr(GetVF(), GetZF())),
+ Ccc.Neu => context.BitwiseOr(GetNF(), context.BitwiseNot(GetZF())),
+ Ccc.Geu => context.BitwiseExclusiveOr(context.BitwiseOr(context.BitwiseNot(GetNF()), GetZF()), GetVF()),
+ Ccc.T => Const(IrConsts.True),
+ Ccc.Off => context.BitwiseNot(GetVF()),
+ Ccc.Lo => context.BitwiseNot(GetCF()),
+ Ccc.Sff => context.BitwiseNot(GetNF()),
+ Ccc.Ls => context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF())),
+ Ccc.Hi => context.BitwiseAnd(GetCF(), context.BitwiseNot(GetZF())),
+ Ccc.Sft => GetNF(),
+ Ccc.Hs => GetCF(),
+ Ccc.Oft => GetVF(),
+ Ccc.Rle => context.BitwiseOr(GetNF(), GetZF()),
+ Ccc.Rgt => context.BitwiseNot(context.BitwiseOr(GetNF(), GetZF())),
+ _ => Const(defaultCond)
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
new file mode 100644
index 00000000..bebd96dd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
@@ -0,0 +1,425 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void F2fR(EmitterContext context)
+ {
+ InstF2fR op = context.GetOp<InstF2fR>();
+
+ var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
+
+ EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
+ }
+
+ public static void F2fI(EmitterContext context)
+ {
+ InstF2fI op = context.GetOp<InstF2fI>();
+
+ var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
+
+ EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
+ }
+
+ public static void F2fC(EmitterContext context)
+ {
+ InstF2fC op = context.GetOp<InstF2fC>();
+
+ var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
+
+ EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
+ }
+
+ public static void F2iR(EmitterContext context)
+ {
+ InstF2iR op = context.GetOp<InstF2iR>();
+
+ var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
+
+ EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void F2iI(EmitterContext context)
+ {
+ InstF2iI op = context.GetOp<InstF2iI>();
+
+ var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
+
+ EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void F2iC(EmitterContext context)
+ {
+ InstF2iC op = context.GetOp<InstF2iC>();
+
+ var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
+
+ EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void I2fR(EmitterContext context)
+ {
+ InstI2fR op = context.GetOp<InstI2fR>();
+
+ var src = GetSrcReg(context, op.SrcB);
+
+ EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void I2fI(EmitterContext context)
+ {
+ InstI2fI op = context.GetOp<InstI2fI>();
+
+ var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void I2fC(EmitterContext context)
+ {
+ InstI2fC op = context.GetOp<InstI2fC>();
+
+ var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void I2iR(EmitterContext context)
+ {
+ InstI2iR op = context.GetOp<InstI2iR>();
+
+ var src = GetSrcReg(context, op.SrcB);
+
+ EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
+ }
+
+ public static void I2iI(EmitterContext context)
+ {
+ InstI2iI op = context.GetOp<InstI2iI>();
+
+ var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
+ }
+
+ public static void I2iC(EmitterContext context)
+ {
+ InstI2iC op = context.GetOp<InstI2iC>();
+
+ var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
+ }
+
+ private static void EmitF2F(
+ EmitterContext context,
+ DstFmt srcType,
+ DstFmt dstType,
+ IntegerRound roundingMode,
+ Operand src,
+ int rd,
+ bool absolute,
+ bool negate,
+ bool saturate)
+ {
+ Operand srcB = context.FPAbsNeg(src, absolute, negate, srcType.ToInstFPType());
+
+ if (srcType == dstType)
+ {
+ srcB = roundingMode switch
+ {
+ IntegerRound.Round => context.FPRound(srcB, srcType.ToInstFPType()),
+ IntegerRound.Floor => context.FPFloor(srcB, srcType.ToInstFPType()),
+ IntegerRound.Ceil => context.FPCeiling(srcB, srcType.ToInstFPType()),
+ IntegerRound.Trunc => context.FPTruncate(srcB, srcType.ToInstFPType()),
+ _ => srcB
+ };
+ }
+
+ // We don't need to handle conversions between FP16 <-> FP32
+ // since we do FP16 operations as FP32 directly.
+ // FP16 <-> FP64 conversions are invalid.
+ if (srcType == DstFmt.F32 && dstType == DstFmt.F64)
+ {
+ srcB = context.FP32ConvertToFP64(srcB);
+ }
+ else if (srcType == DstFmt.F64 && dstType == DstFmt.F32)
+ {
+ srcB = context.FP64ConvertToFP32(srcB);
+ }
+
+ srcB = context.FPSaturate(srcB, saturate, dstType.ToInstFPType());
+
+ WriteFP(context, dstType, srcB, rd);
+
+ // TODO: CC.
+ }
+
+ private static void EmitF2I(
+ EmitterContext context,
+ DstFmt srcType,
+ IDstFmt dstType,
+ RoundMode2 roundingMode,
+ Operand src,
+ int rd,
+ bool absolute,
+ bool negate)
+ {
+ if (dstType == IDstFmt.U64)
+ {
+ context.Config.GpuAccessor.Log("Unimplemented 64-bits F2I.");
+ }
+
+ Instruction fpType = srcType.ToInstFPType();
+
+ bool isSignedInt = dstType == IDstFmt.S16 || dstType == IDstFmt.S32 || dstType == IDstFmt.S64;
+ bool isSmallInt = dstType == IDstFmt.U16 || dstType == IDstFmt.S16;
+
+ Operand srcB = context.FPAbsNeg(src, absolute, negate, fpType);
+
+ srcB = roundingMode switch
+ {
+ RoundMode2.Round => context.FPRound(srcB, fpType),
+ RoundMode2.Floor => context.FPFloor(srcB, fpType),
+ RoundMode2.Ceil => context.FPCeiling(srcB, fpType),
+ RoundMode2.Trunc => context.FPTruncate(srcB, fpType),
+ _ => srcB
+ };
+
+ if (!isSignedInt)
+ {
+ // Negative float to uint cast is undefined, so we clamp the value before conversion.
+ Operand c0 = srcType == DstFmt.F64 ? context.PackDouble2x32(0.0) : ConstF(0);
+
+ srcB = context.FPMaximum(srcB, c0, fpType);
+ }
+
+ if (srcType == DstFmt.F64)
+ {
+ srcB = isSignedInt
+ ? context.FP64ConvertToS32(srcB)
+ : context.FP64ConvertToU32(srcB);
+ }
+ else
+ {
+ srcB = isSignedInt
+ ? context.FP32ConvertToS32(srcB)
+ : context.FP32ConvertToU32(srcB);
+ }
+
+ if (isSmallInt)
+ {
+ int min = (int)GetIntMin(dstType);
+ int max = (int)GetIntMax(dstType);
+
+ srcB = isSignedInt
+ ? context.IClampS32(srcB, Const(min), Const(max))
+ : context.IClampU32(srcB, Const(min), Const(max));
+ }
+
+ Operand dest = GetDest(rd);
+
+ context.Copy(dest, srcB);
+
+ // TODO: CC.
+ }
+
+ private static void EmitI2F(
+ EmitterContext context,
+ ISrcFmt srcType,
+ DstFmt dstType,
+ Operand src,
+ ByteSel byteSelection,
+ int rd,
+ bool absolute,
+ bool negate)
+ {
+ bool isSignedInt =
+ srcType == ISrcFmt.S8 ||
+ srcType == ISrcFmt.S16 ||
+ srcType == ISrcFmt.S32 ||
+ srcType == ISrcFmt.S64;
+ bool isSmallInt =
+ srcType == ISrcFmt.U16 ||
+ srcType == ISrcFmt.S16 ||
+ srcType == ISrcFmt.U8 ||
+ srcType == ISrcFmt.S8;
+
+ // TODO: Handle S/U64.
+
+ Operand srcB = context.IAbsNeg(src, absolute, negate);
+
+ if (isSmallInt)
+ {
+ int size = srcType == ISrcFmt.U16 || srcType == ISrcFmt.S16 ? 16 : 8;
+
+ srcB = isSignedInt
+ ? context.BitfieldExtractS32(srcB, Const((int)byteSelection * 8), Const(size))
+ : context.BitfieldExtractU32(srcB, Const((int)byteSelection * 8), Const(size));
+ }
+
+ if (dstType == DstFmt.F64)
+ {
+ srcB = isSignedInt
+ ? context.IConvertS32ToFP64(srcB)
+ : context.IConvertU32ToFP64(srcB);
+ }
+ else
+ {
+ srcB = isSignedInt
+ ? context.IConvertS32ToFP32(srcB)
+ : context.IConvertU32ToFP32(srcB);
+ }
+
+ WriteFP(context, dstType, srcB, rd);
+
+ // TODO: CC.
+ }
+
+ private static void EmitI2I(
+ EmitterContext context,
+ ISrcDstFmt srcType,
+ ISrcDstFmt dstType,
+ Operand src,
+ ByteSel byteSelection,
+ int rd,
+ bool absolute,
+ bool negate,
+ bool saturate,
+ bool writeCC)
+ {
+ if ((srcType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32 || (dstType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32)
+ {
+ context.Config.GpuAccessor.Log("Invalid I2I encoding.");
+ return;
+ }
+
+ bool srcIsSignedInt =
+ srcType == ISrcDstFmt.S8 ||
+ srcType == ISrcDstFmt.S16 ||
+ srcType == ISrcDstFmt.S32;
+ bool dstIsSignedInt =
+ dstType == ISrcDstFmt.S8 ||
+ dstType == ISrcDstFmt.S16 ||
+ dstType == ISrcDstFmt.S32;
+ bool srcIsSmallInt =
+ srcType == ISrcDstFmt.U16 ||
+ srcType == ISrcDstFmt.S16 ||
+ srcType == ISrcDstFmt.U8 ||
+ srcType == ISrcDstFmt.S8;
+
+ if (srcIsSmallInt)
+ {
+ int size = srcType == ISrcDstFmt.U16 || srcType == ISrcDstFmt.S16 ? 16 : 8;
+
+ src = srcIsSignedInt
+ ? context.BitfieldExtractS32(src, Const((int)byteSelection * 8), Const(size))
+ : context.BitfieldExtractU32(src, Const((int)byteSelection * 8), Const(size));
+ }
+
+ src = context.IAbsNeg(src, absolute, negate);
+
+ if (saturate)
+ {
+ int min = (int)GetIntMin(dstType);
+ int max = (int)GetIntMax(dstType);
+
+ src = dstIsSignedInt
+ ? context.IClampS32(src, Const(min), Const(max))
+ : context.IClampU32(src, Const(min), Const(max));
+ }
+
+ context.Copy(GetDest(rd), src);
+
+ SetZnFlags(context, src, writeCC);
+ }
+
+ private static Operand UnpackReg(EmitterContext context, DstFmt floatType, bool h, int reg)
+ {
+ if (floatType == DstFmt.F32)
+ {
+ return GetSrcReg(context, reg);
+ }
+ else if (floatType == DstFmt.F16)
+ {
+ return GetHalfUnpacked(context, GetSrcReg(context, reg), HalfSwizzle.F16)[h ? 1 : 0];
+ }
+ else if (floatType == DstFmt.F64)
+ {
+ return GetSrcReg(context, reg, isFP64: true);
+ }
+
+ throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
+ }
+
+ private static Operand UnpackCbuf(EmitterContext context, DstFmt floatType, bool h, int cbufSlot, int cbufOffset)
+ {
+ if (floatType == DstFmt.F32)
+ {
+ return GetSrcCbuf(context, cbufSlot, cbufOffset);
+ }
+ else if (floatType == DstFmt.F16)
+ {
+ return GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), HalfSwizzle.F16)[h ? 1 : 0];
+ }
+ else if (floatType == DstFmt.F64)
+ {
+ return GetSrcCbuf(context, cbufSlot, cbufOffset, isFP64: true);
+ }
+
+ throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
+ }
+
+ private static Operand UnpackImm(EmitterContext context, DstFmt floatType, bool h, int imm)
+ {
+ if (floatType == DstFmt.F32)
+ {
+ return GetSrcImm(context, imm);
+ }
+ else if (floatType == DstFmt.F16)
+ {
+ return GetHalfUnpacked(context, GetSrcImm(context, imm), HalfSwizzle.F16)[h ? 1 : 0];
+ }
+ else if (floatType == DstFmt.F64)
+ {
+ return GetSrcImm(context, imm, isFP64: true);
+ }
+
+ throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
+ }
+
+ private static void WriteFP(EmitterContext context, DstFmt type, Operand srcB, int rd)
+ {
+ Operand dest = GetDest(rd);
+
+ if (type == DstFmt.F32)
+ {
+ context.Copy(dest, srcB);
+ }
+ else if (type == DstFmt.F16)
+ {
+ context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0)));
+ }
+ else /* if (type == FPType.FP64) */
+ {
+ Operand dest2 = GetDest2(rd);
+
+ context.Copy(dest, context.UnpackDouble2x32Low(srcB));
+ context.Copy(dest2, context.UnpackDouble2x32High(srcB));
+ }
+ }
+
+ private static Instruction ToInstFPType(this DstFmt type)
+ {
+ return type == DstFmt.F64 ? Instruction.FP64 : Instruction.FP32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs
new file mode 100644
index 00000000..29803c31
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs
@@ -0,0 +1,532 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void DaddR(EmitterContext context)
+ {
+ InstDaddR op = context.GetOp<InstDaddR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+
+ EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
+ }
+
+ public static void DaddI(EmitterContext context)
+ {
+ InstDaddI op = context.GetOp<InstDaddI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+
+ EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
+ }
+
+ public static void DaddC(EmitterContext context)
+ {
+ InstDaddC op = context.GetOp<InstDaddC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
+ }
+
+ public static void DfmaR(EmitterContext context)
+ {
+ InstDfmaR op = context.GetOp<InstDfmaR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+ var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
+
+ EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
+ }
+
+ public static void DfmaI(EmitterContext context)
+ {
+ InstDfmaI op = context.GetOp<InstDfmaI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+ var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
+
+ EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
+ }
+
+ public static void DfmaC(EmitterContext context)
+ {
+ InstDfmaC op = context.GetOp<InstDfmaC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+ var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
+
+ EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
+ }
+
+ public static void DfmaRc(EmitterContext context)
+ {
+ InstDfmaRc op = context.GetOp<InstDfmaRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcC, isFP64: true);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
+ }
+
+ public static void DmulR(EmitterContext context)
+ {
+ InstDmulR op = context.GetOp<InstDmulR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+
+ EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
+ }
+
+ public static void DmulI(EmitterContext context)
+ {
+ InstDmulI op = context.GetOp<InstDmulI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+
+ EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
+ }
+
+ public static void DmulC(EmitterContext context)
+ {
+ InstDmulC op = context.GetOp<InstDmulC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
+ }
+
+ public static void FaddR(EmitterContext context)
+ {
+ InstFaddR op = context.GetOp<InstFaddR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
+ }
+
+ public static void FaddI(EmitterContext context)
+ {
+ InstFaddI op = context.GetOp<InstFaddI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+
+ EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
+ }
+
+ public static void FaddC(EmitterContext context)
+ {
+ InstFaddC op = context.GetOp<InstFaddC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
+ }
+
+ public static void Fadd32i(EmitterContext context)
+ {
+ InstFadd32i op = context.GetOp<InstFadd32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
+ }
+
+ public static void FfmaR(EmitterContext context)
+ {
+ InstFfmaR op = context.GetOp<InstFfmaR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void FfmaI(EmitterContext context)
+ {
+ InstFfmaI op = context.GetOp<InstFfmaI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void FfmaC(EmitterContext context)
+ {
+ InstFfmaC op = context.GetOp<InstFfmaC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void FfmaRc(EmitterContext context)
+ {
+ InstFfmaRc op = context.GetOp<InstFfmaRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void Ffma32i(EmitterContext context)
+ {
+ InstFfma32i op = context.GetOp<InstFfma32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+ var srcC = GetSrcReg(context, op.Dest);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void FmulR(EmitterContext context)
+ {
+ InstFmulR op = context.GetOp<InstFmulR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
+ }
+
+ public static void FmulI(EmitterContext context)
+ {
+ InstFmulI op = context.GetOp<InstFmulI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+
+ EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
+ }
+
+ public static void FmulC(EmitterContext context)
+ {
+ InstFmulC op = context.GetOp<InstFmulC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
+ }
+
+ public static void Fmul32i(EmitterContext context)
+ {
+ InstFmul32i op = context.GetOp<InstFmul32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitFmul(context, Instruction.FP32, MultiplyScale.NoScale, srcA, srcB, op.Dest, false, op.Sat, op.WriteCC);
+ }
+
+ public static void Hadd2R(EmitterContext context)
+ {
+ InstHadd2R op = context.GetOp<InstHadd2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
+ }
+
+ public static void Hadd2I(EmitterContext context)
+ {
+ InstHadd2I op = context.GetOp<InstHadd2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
+ }
+
+ public static void Hadd2C(EmitterContext context)
+ {
+ InstHadd2C op = context.GetOp<InstHadd2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
+ }
+
+ public static void Hadd232i(EmitterContext context)
+ {
+ InstHadd232i op = context.GetOp<InstHadd232i>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, false);
+ var srcB = GetHalfSrc(context, op.Imm);
+
+ EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: true, op.Dest, op.Sat);
+ }
+
+ public static void Hfma2R(EmitterContext context)
+ {
+ InstHfma2R op = context.GetOp<InstHfma2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, false);
+ var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
+
+ EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
+ }
+
+ public static void Hfma2I(EmitterContext context)
+ {
+ InstHfma2I op = context.GetOp<InstHfma2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+ var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
+
+ EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
+ }
+
+ public static void Hfma2C(EmitterContext context)
+ {
+ InstHfma2C op = context.GetOp<InstHfma2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, false);
+ var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
+
+ EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
+ }
+
+ public static void Hfma2Rc(EmitterContext context)
+ {
+ InstHfma2Rc op = context.GetOp<InstHfma2Rc>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegA, false);
+ var srcC = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegC, false);
+
+ EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
+ }
+
+ public static void Hfma232i(EmitterContext context)
+ {
+ InstHfma232i op = context.GetOp<InstHfma232i>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.Imm);
+ var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.Dest, op.NegC, false);
+
+ EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false);
+ }
+
+ public static void Hmul2R(EmitterContext context)
+ {
+ InstHmul2R op = context.GetOp<InstHmul2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, op.AbsB);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
+ }
+
+ public static void Hmul2I(EmitterContext context)
+ {
+ InstHmul2I op = context.GetOp<InstHmul2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
+ }
+
+ public static void Hmul2C(EmitterContext context)
+ {
+ InstHmul2C op = context.GetOp<InstHmul2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, op.AbsB);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
+ }
+
+ public static void Hmul232i(EmitterContext context)
+ {
+ InstHmul232i op = context.GetOp<InstHmul232i>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.Imm32);
+
+ EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: false, op.Dest, op.Sat);
+ }
+
+ private static void EmitFadd(
+ EmitterContext context,
+ Instruction fpType,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ bool negateA,
+ bool negateB,
+ bool absoluteA,
+ bool absoluteB,
+ bool saturate,
+ bool writeCC)
+ {
+ bool isFP64 = fpType == Instruction.FP64;
+
+ srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
+ srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
+
+ Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), saturate, fpType);
+
+ SetDest(context, res, rd, isFP64);
+
+ SetFPZnFlags(context, res, writeCC, fpType);
+ }
+
+ private static void EmitFfma(
+ EmitterContext context,
+ Instruction fpType,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool negateB,
+ bool negateC,
+ bool saturate,
+ bool writeCC)
+ {
+ bool isFP64 = fpType == Instruction.FP64;
+
+ srcB = context.FPNegate(srcB, negateB, fpType);
+ srcC = context.FPNegate(srcC, negateC, fpType);
+
+ Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), saturate, fpType);
+
+ SetDest(context, res, rd, isFP64);
+
+ SetFPZnFlags(context, res, writeCC, fpType);
+ }
+
+ private static void EmitFmul(
+ EmitterContext context,
+ Instruction fpType,
+ MultiplyScale scale,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ bool negateB,
+ bool saturate,
+ bool writeCC)
+ {
+ bool isFP64 = fpType == Instruction.FP64;
+
+ srcB = context.FPNegate(srcB, negateB, fpType);
+
+ if (scale != MultiplyScale.NoScale)
+ {
+ Operand scaleConst = scale switch
+ {
+ MultiplyScale.D2 => ConstF(0.5f),
+ MultiplyScale.D4 => ConstF(0.25f),
+ MultiplyScale.D8 => ConstF(0.125f),
+ MultiplyScale.M2 => ConstF(2f),
+ MultiplyScale.M4 => ConstF(4f),
+ MultiplyScale.M8 => ConstF(8f),
+ _ => ConstF(1f) // Invalid, behave as if it had no scale.
+ };
+
+ if (scaleConst.AsFloat() == 1f)
+ {
+ context.Config.GpuAccessor.Log($"Invalid FP multiply scale \"{scale}\".");
+ }
+
+ if (isFP64)
+ {
+ scaleConst = context.FP32ConvertToFP64(scaleConst);
+ }
+
+ srcA = context.FPMultiply(srcA, scaleConst, fpType);
+ }
+
+ Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType);
+
+ SetDest(context, res, rd, isFP64);
+
+ SetFPZnFlags(context, res, writeCC, fpType);
+ }
+
+ private static void EmitHadd2Hmul2(
+ EmitterContext context,
+ OFmt swizzle,
+ Operand[] srcA,
+ Operand[] srcB,
+ bool isAdd,
+ int rd,
+ bool saturate)
+ {
+ Operand[] res = new Operand[2];
+
+ for (int index = 0; index < res.Length; index++)
+ {
+ if (isAdd)
+ {
+ res[index] = context.FPAdd(srcA[index], srcB[index]);
+ }
+ else
+ {
+ res[index] = context.FPMultiply(srcA[index], srcB[index]);
+ }
+
+ res[index] = context.FPSaturate(res[index], saturate);
+ }
+
+ context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
+ }
+
+ public static void EmitHfma2(
+ EmitterContext context,
+ OFmt swizzle,
+ Operand[] srcA,
+ Operand[] srcB,
+ Operand[] srcC,
+ int rd,
+ bool saturate)
+ {
+ Operand[] res = new Operand[2];
+
+ for (int index = 0; index < res.Length; index++)
+ {
+ res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]);
+ res[index] = context.FPSaturate(res[index], saturate);
+ }
+
+ context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs
new file mode 100644
index 00000000..8f99ddb3
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs
@@ -0,0 +1,575 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void DsetR(EmitterContext context)
+ {
+ InstDsetR op = context.GetOp<InstDsetR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+
+ EmitFset(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.Dest,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.BVal,
+ op.WriteCC,
+ isFP64: true);
+ }
+
+ public static void DsetI(EmitterContext context)
+ {
+ InstDsetI op = context.GetOp<InstDsetI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+
+ EmitFset(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.Dest,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.BVal,
+ op.WriteCC,
+ isFP64: true);
+ }
+
+ public static void DsetC(EmitterContext context)
+ {
+ InstDsetC op = context.GetOp<InstDsetC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFset(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.Dest,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.BVal,
+ op.WriteCC,
+ isFP64: true);
+ }
+
+ public static void DsetpR(EmitterContext context)
+ {
+ InstDsetpR op = context.GetOp<InstDsetpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ writeCC: false,
+ isFP64: true);
+ }
+
+ public static void DsetpI(EmitterContext context)
+ {
+ InstDsetpI op = context.GetOp<InstDsetpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ writeCC: false,
+ isFP64: true);
+ }
+
+ public static void DsetpC(EmitterContext context)
+ {
+ InstDsetpC op = context.GetOp<InstDsetpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ writeCC: false,
+ isFP64: true);
+ }
+
+ public static void FcmpR(EmitterContext context)
+ {
+ InstFcmpR op = context.GetOp<InstFcmpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FcmpI(EmitterContext context)
+ {
+ InstFcmpI op = context.GetOp<InstFcmpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FcmpC(EmitterContext context)
+ {
+ InstFcmpC op = context.GetOp<InstFcmpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FcmpRc(EmitterContext context)
+ {
+ InstFcmpRc op = context.GetOp<InstFcmpRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FsetR(EmitterContext context)
+ {
+ InstFsetR op = context.GetOp<InstFsetR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
+ }
+
+ public static void FsetC(EmitterContext context)
+ {
+ InstFsetC op = context.GetOp<InstFsetC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
+ }
+
+ public static void FsetI(EmitterContext context)
+ {
+ InstFsetI op = context.GetOp<InstFsetI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+
+ EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
+ }
+
+ public static void FsetpR(EmitterContext context)
+ {
+ InstFsetpR op = context.GetOp<InstFsetpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.WriteCC);
+ }
+
+ public static void FsetpI(EmitterContext context)
+ {
+ InstFsetpI op = context.GetOp<InstFsetpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.WriteCC);
+ }
+
+ public static void FsetpC(EmitterContext context)
+ {
+ InstFsetpC op = context.GetOp<InstFsetpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.WriteCC);
+ }
+
+ public static void Hset2R(EmitterContext context)
+ {
+ InstHset2R op = context.GetOp<InstHset2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
+
+ EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
+ }
+
+ public static void Hset2I(EmitterContext context)
+ {
+ InstHset2I op = context.GetOp<InstHset2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+
+ EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
+ }
+
+ public static void Hset2C(EmitterContext context)
+ {
+ InstHset2C op = context.GetOp<InstHset2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, false);
+
+ EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
+ }
+
+ public static void Hsetp2R(EmitterContext context)
+ {
+ InstHsetp2R op = context.GetOp<InstHsetp2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
+
+ EmitHsetp2(context, op.FComp2, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
+ }
+
+ public static void Hsetp2I(EmitterContext context)
+ {
+ InstHsetp2I op = context.GetOp<InstHsetp2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+
+ EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
+ }
+
+ public static void Hsetp2C(EmitterContext context)
+ {
+ InstHsetp2C op = context.GetOp<InstHsetp2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
+
+ EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
+ }
+
+ private static void EmitFcmp(EmitterContext context, FComp cmpOp, Operand srcA, Operand srcB, Operand srcC, int rd)
+ {
+ Operand cmpRes = GetFPComparison(context, cmpOp, srcC, ConstF(0));
+
+ Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitFset(
+ EmitterContext context,
+ FComp cmpOp,
+ BoolOp logicOp,
+ Operand srcA,
+ Operand srcB,
+ int srcPred,
+ bool srcPredInv,
+ int rd,
+ bool absoluteA,
+ bool absoluteB,
+ bool negateA,
+ bool negateB,
+ bool boolFloat,
+ bool writeCC,
+ bool isFP64 = false)
+ {
+ Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
+
+ srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
+ srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
+
+ Operand res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ res = GetPredLogicalOp(context, logicOp, res, pred);
+
+ Operand dest = GetDest(rd);
+
+ if (boolFloat)
+ {
+ res = context.ConditionalSelect(res, ConstF(1), Const(0));
+
+ context.Copy(dest, res);
+
+ SetFPZnFlags(context, res, writeCC);
+ }
+ else
+ {
+ context.Copy(dest, res);
+
+ SetZnFlags(context, res, writeCC, extended: false);
+ }
+ }
+
+ private static void EmitFsetp(
+ EmitterContext context,
+ FComp cmpOp,
+ BoolOp logicOp,
+ Operand srcA,
+ Operand srcB,
+ int srcPred,
+ bool srcPredInv,
+ int destPred,
+ int destPredInv,
+ bool absoluteA,
+ bool absoluteB,
+ bool negateA,
+ bool negateB,
+ bool writeCC,
+ bool isFP64 = false)
+ {
+ Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
+
+ srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
+ srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
+
+ Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
+ Operand p1Res = context.BitwiseNot(p0Res);
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
+ p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
+
+ context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
+ }
+
+ private static void EmitHset2(
+ EmitterContext context,
+ FComp cmpOp,
+ BoolOp logicOp,
+ Operand[] srcA,
+ Operand[] srcB,
+ int srcPred,
+ bool srcPredInv,
+ int rd,
+ bool boolFloat)
+ {
+ Operand[] res = new Operand[2];
+
+ res[0] = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
+ res[1] = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
+
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ res[0] = GetPredLogicalOp(context, logicOp, res[0], pred);
+ res[1] = GetPredLogicalOp(context, logicOp, res[1], pred);
+
+ if (boolFloat)
+ {
+ res[0] = context.ConditionalSelect(res[0], ConstF(1), Const(0));
+ res[1] = context.ConditionalSelect(res[1], ConstF(1), Const(0));
+
+ context.Copy(GetDest(rd), context.PackHalf2x16(res[0], res[1]));
+ }
+ else
+ {
+ Operand low = context.BitwiseAnd(res[0], Const(0xffff));
+ Operand high = context.ShiftLeft (res[1], Const(16));
+
+ Operand packed = context.BitwiseOr(low, high);
+
+ context.Copy(GetDest(rd), packed);
+ }
+ }
+
+ private static void EmitHsetp2(
+ EmitterContext context,
+ FComp cmpOp,
+ BoolOp logicOp,
+ Operand[] srcA,
+ Operand[] srcB,
+ int srcPred,
+ bool srcPredInv,
+ int destPred,
+ int destPredInv,
+ bool hAnd)
+ {
+ Operand p0Res = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
+ Operand p1Res = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
+
+ if (hAnd)
+ {
+ p0Res = context.BitwiseAnd(p0Res, p1Res);
+ p1Res = context.BitwiseNot(p0Res);
+ }
+
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
+ p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
+
+ context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
+ }
+
+ private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB, Instruction fpType = Instruction.FP32)
+ {
+ Operand res;
+
+ if (cond == FComp.T)
+ {
+ res = Const(IrConsts.True);
+ }
+ else if (cond == FComp.F)
+ {
+ res = Const(IrConsts.False);
+ }
+ else if (cond == FComp.Nan || cond == FComp.Num)
+ {
+ res = context.BitwiseOr(context.IsNan(srcA, fpType), context.IsNan(srcB, fpType));
+
+ if (cond == FComp.Num)
+ {
+ res = context.BitwiseNot(res);
+ }
+ }
+ else
+ {
+ Instruction inst;
+
+ switch (cond & ~FComp.Nan)
+ {
+ case FComp.Lt: inst = Instruction.CompareLess; break;
+ case FComp.Eq: inst = Instruction.CompareEqual; break;
+ case FComp.Le: inst = Instruction.CompareLessOrEqual; break;
+ case FComp.Gt: inst = Instruction.CompareGreater; break;
+ case FComp.Ne: inst = Instruction.CompareNotEqual; break;
+ case FComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
+
+ default: throw new ArgumentException($"Unexpected condition \"{cond}\".");
+ }
+
+ res = context.Add(inst | fpType, Local(), srcA, srcB);
+
+ if ((cond & FComp.Nan) != 0)
+ {
+ res = context.BitwiseOr(res, context.IsNan(srcA, fpType));
+ res = context.BitwiseOr(res, context.IsNan(srcB, fpType));
+ }
+ }
+
+ return res;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs
new file mode 100644
index 00000000..412a5305
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs
@@ -0,0 +1,106 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void DmnmxR(EmitterContext context)
+ {
+ InstDmnmxR op = context.GetOp<InstDmnmxR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
+ }
+
+ public static void DmnmxI(EmitterContext context)
+ {
+ InstDmnmxI op = context.GetOp<InstDmnmxI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
+ }
+
+ public static void DmnmxC(EmitterContext context)
+ {
+ InstDmnmxC op = context.GetOp<InstDmnmxC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
+ }
+
+ public static void FmnmxR(EmitterContext context)
+ {
+ InstFmnmxR op = context.GetOp<InstFmnmxR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
+ }
+
+ public static void FmnmxI(EmitterContext context)
+ {
+ InstFmnmxI op = context.GetOp<InstFmnmxI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
+ }
+
+ public static void FmnmxC(EmitterContext context)
+ {
+ InstFmnmxC op = context.GetOp<InstFmnmxC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
+ }
+
+ private static void EmitFmnmx(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ Operand srcPred,
+ int rd,
+ bool absoluteA,
+ bool absoluteB,
+ bool negateA,
+ bool negateB,
+ bool writeCC,
+ bool isFP64 = false)
+ {
+ Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
+
+ srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
+ srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
+
+ Operand resMin = context.FPMinimum(srcA, srcB, fpType);
+ Operand resMax = context.FPMaximum(srcA, srcB, fpType);
+
+ Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
+
+ SetDest(context, res, rd, isFP64);
+
+ SetFPZnFlags(context, res, writeCC, fpType);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
new file mode 100644
index 00000000..91c23230
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
@@ -0,0 +1,322 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+using System.Linq;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bra(EmitterContext context)
+ {
+ InstBra op = context.GetOp<InstBra>();
+
+ EmitBranch(context, context.CurrBlock.Successors[^1].Address);
+ }
+
+ public static void Brk(EmitterContext context)
+ {
+ InstBrk op = context.GetOp<InstBrk>();
+
+ EmitBrkContSync(context);
+ }
+
+ public static void Brx(EmitterContext context)
+ {
+ InstBrx op = context.GetOp<InstBrx>();
+ InstOp currOp = context.CurrOp;
+ int startIndex = context.CurrBlock.HasNext() ? 1 : 0;
+
+ if (context.CurrBlock.Successors.Count <= startIndex)
+ {
+ context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{currOp.Address:X}.");
+ return;
+ }
+
+ int offset = (int)currOp.GetAbsoluteAddress();
+
+ Operand address = context.IAdd(Register(op.SrcA, RegisterType.Gpr), Const(offset));
+
+ var targets = context.CurrBlock.Successors.Skip(startIndex);
+
+ bool allTargetsSinglePred = true;
+ int total = context.CurrBlock.Successors.Count - startIndex;
+ int count = 0;
+
+ foreach (var target in targets.OrderBy(x => x.Address))
+ {
+ if (++count < total && (target.Predecessors.Count > 1 || target.Address <= context.CurrBlock.Address))
+ {
+ allTargetsSinglePred = false;
+ break;
+ }
+ }
+
+ if (allTargetsSinglePred)
+ {
+ // Chain blocks, each target block will check if the BRX target address
+ // matches its own address, if not, it jumps to the next target which will do the same check,
+ // until it reaches the last possible target, which executed unconditionally.
+ // We can only do this if the BRX block is the only predecessor of all target blocks.
+ // Additionally, this is not supported for blocks located before the current block,
+ // since it will be too late to insert a label, but this is something that can be improved
+ // in the future if necessary.
+
+ var sortedTargets = targets.OrderBy(x => x.Address);
+
+ Block currentTarget = null;
+ ulong firstTargetAddress = 0;
+
+ foreach (Block nextTarget in sortedTargets)
+ {
+ if (currentTarget != null)
+ {
+ if (currentTarget.Address != nextTarget.Address)
+ {
+ context.SetBrxTarget(currentTarget.Address, address, (int)currentTarget.Address, nextTarget.Address);
+ }
+ }
+ else
+ {
+ firstTargetAddress = nextTarget.Address;
+ }
+
+ currentTarget = nextTarget;
+ }
+
+ context.Branch(context.GetLabel(firstTargetAddress));
+ }
+ else
+ {
+ // Emit the branches sequentially.
+ // This generates slightly worse code, but should work for all cases.
+
+ var sortedTargets = targets.OrderByDescending(x => x.Address);
+ ulong lastTargetAddress = ulong.MaxValue;
+
+ count = 0;
+
+ foreach (Block target in sortedTargets)
+ {
+ Operand label = context.GetLabel(target.Address);
+
+ if (++count < total)
+ {
+ if (target.Address != lastTargetAddress)
+ {
+ context.BranchIfTrue(label, context.ICompareEqual(address, Const((int)target.Address)));
+ }
+
+ lastTargetAddress = target.Address;
+ }
+ else
+ {
+ context.Branch(label);
+ }
+ }
+ }
+ }
+
+ public static void Cal(EmitterContext context)
+ {
+ InstCal op = context.GetOp<InstCal>();
+
+ DecodedFunction function = context.Program.GetFunctionByAddress(context.CurrOp.GetAbsoluteAddress());
+
+ if (function.IsCompilerGenerated)
+ {
+ switch (function.Type)
+ {
+ case FunctionType.BuiltInFSIBegin:
+ context.FSIBegin();
+ break;
+ case FunctionType.BuiltInFSIEnd:
+ context.FSIEnd();
+ break;
+ }
+ }
+ else
+ {
+ context.Call(function.Id, false);
+ }
+ }
+
+ public static void Cont(EmitterContext context)
+ {
+ InstCont op = context.GetOp<InstCont>();
+
+ EmitBrkContSync(context);
+ }
+
+ public static void Exit(EmitterContext context)
+ {
+ InstExit op = context.GetOp<InstExit>();
+
+ if (context.IsNonMain)
+ {
+ context.Config.GpuAccessor.Log("Invalid exit on non-main function.");
+ return;
+ }
+
+ if (op.Ccc == Ccc.T)
+ {
+ context.Return();
+ }
+ else
+ {
+ Operand cond = GetCondition(context, op.Ccc, IrConsts.False);
+
+ // If the condition is always false, we don't need to do anything.
+ if (cond.Type != OperandType.Constant || cond.Value != IrConsts.False)
+ {
+ Operand lblSkip = Label();
+ context.BranchIfFalse(lblSkip, cond);
+ context.Return();
+ context.MarkLabel(lblSkip);
+ }
+ }
+ }
+
+ public static void Kil(EmitterContext context)
+ {
+ InstKil op = context.GetOp<InstKil>();
+
+ context.Discard();
+ }
+
+ public static void Pbk(EmitterContext context)
+ {
+ InstPbk op = context.GetOp<InstPbk>();
+
+ EmitPbkPcntSsy(context);
+ }
+
+ public static void Pcnt(EmitterContext context)
+ {
+ InstPcnt op = context.GetOp<InstPcnt>();
+
+ EmitPbkPcntSsy(context);
+ }
+
+ public static void Ret(EmitterContext context)
+ {
+ InstRet op = context.GetOp<InstRet>();
+
+ if (context.IsNonMain)
+ {
+ context.Return();
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log("Invalid return on main function.");
+ }
+ }
+
+ public static void Ssy(EmitterContext context)
+ {
+ InstSsy op = context.GetOp<InstSsy>();
+
+ EmitPbkPcntSsy(context);
+ }
+
+ public static void Sync(EmitterContext context)
+ {
+ InstSync op = context.GetOp<InstSync>();
+
+ EmitBrkContSync(context);
+ }
+
+ private static void EmitPbkPcntSsy(EmitterContext context)
+ {
+ var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers;
+
+ foreach (KeyValuePair<Block, Operand> kv in consumers)
+ {
+ Block consumerBlock = kv.Key;
+ Operand local = kv.Value;
+
+ int id = consumerBlock.SyncTargets[context.CurrOp.Address].PushOpId;
+
+ context.Copy(local, Const(id));
+ }
+ }
+
+ private static void EmitBrkContSync(EmitterContext context)
+ {
+ var targets = context.CurrBlock.SyncTargets;
+
+ if (targets.Count == 1)
+ {
+ // If we have only one target, then the SSY/PBK is basically
+ // a branch, we can produce better codegen for this case.
+ EmitBranch(context, targets.Values.First().PushOpInfo.Op.GetAbsoluteAddress());
+ }
+ else
+ {
+ // TODO: Support CC here as well (condition).
+ foreach (SyncTarget target in targets.Values)
+ {
+ PushOpInfo pushOpInfo = target.PushOpInfo;
+
+ Operand label = context.GetLabel(pushOpInfo.Op.GetAbsoluteAddress());
+ Operand local = pushOpInfo.Consumers[context.CurrBlock];
+
+ context.BranchIfTrue(label, context.ICompareEqual(local, Const(target.PushOpId)));
+ }
+ }
+ }
+
+ private static void EmitBranch(EmitterContext context, ulong address)
+ {
+ InstOp op = context.CurrOp;
+ InstConditional opCond = new InstConditional(op.RawOpCode);
+
+ // If we're branching to the next instruction, then the branch
+ // is useless and we can ignore it.
+ if (address == op.Address + 8)
+ {
+ return;
+ }
+
+ Operand label = context.GetLabel(address);
+
+ Operand pred = Register(opCond.Pred, RegisterType.Predicate);
+
+ if (opCond.Ccc != Ccc.T)
+ {
+ Operand cond = GetCondition(context, opCond.Ccc);
+
+ if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
+ {
+ pred = cond;
+ }
+ else if (opCond.PredInv)
+ {
+ pred = context.BitwiseAnd(context.BitwiseNot(pred), cond);
+ }
+ else
+ {
+ pred = context.BitwiseAnd(pred, cond);
+ }
+
+ context.BranchIfTrue(label, pred);
+ }
+ else if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
+ {
+ context.Branch(label);
+ }
+ else if (opCond.PredInv)
+ {
+ context.BranchIfFalse(label, pred);
+ }
+ else
+ {
+ context.BranchIfTrue(label, pred);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
new file mode 100644
index 00000000..0ba4667e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
@@ -0,0 +1,266 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Runtime.CompilerServices;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static class InstEmitHelper
+ {
+ public static Operand GetZF()
+ {
+ return Register(0, RegisterType.Flag);
+ }
+
+ public static Operand GetNF()
+ {
+ return Register(1, RegisterType.Flag);
+ }
+
+ public static Operand GetCF()
+ {
+ return Register(2, RegisterType.Flag);
+ }
+
+ public static Operand GetVF()
+ {
+ return Register(3, RegisterType.Flag);
+ }
+
+ public static Operand GetDest(int rd)
+ {
+ return Register(rd, RegisterType.Gpr);
+ }
+
+ public static Operand GetDest2(int rd)
+ {
+ return Register(rd | 1, RegisterType.Gpr);
+ }
+
+ public static Operand GetSrcCbuf(EmitterContext context, int cbufSlot, int cbufOffset, bool isFP64 = false)
+ {
+ if (isFP64)
+ {
+ return context.PackDouble2x32(
+ Cbuf(cbufSlot, cbufOffset),
+ Cbuf(cbufSlot, cbufOffset + 1));
+ }
+ else
+ {
+ return Cbuf(cbufSlot, cbufOffset);
+ }
+ }
+
+ public static Operand GetSrcImm(EmitterContext context, int imm, bool isFP64 = false)
+ {
+ if (isFP64)
+ {
+ return context.PackDouble2x32(Const(0), Const(imm));
+ }
+ else
+ {
+ return Const(imm);
+ }
+ }
+
+ public static Operand GetSrcReg(EmitterContext context, int reg, bool isFP64 = false)
+ {
+ if (isFP64)
+ {
+ return context.PackDouble2x32(Register(reg, RegisterType.Gpr), Register(reg | 1, RegisterType.Gpr));
+ }
+ else
+ {
+ return Register(reg, RegisterType.Gpr);
+ }
+ }
+
+ public static Operand[] GetHalfSrc(
+ EmitterContext context,
+ HalfSwizzle swizzle,
+ int ra,
+ bool negate,
+ bool absolute)
+ {
+ Operand[] operands = GetHalfUnpacked(context, GetSrcReg(context, ra), swizzle);
+
+ return FPAbsNeg(context, operands, absolute, negate);
+ }
+
+ public static Operand[] GetHalfSrc(
+ EmitterContext context,
+ HalfSwizzle swizzle,
+ int cbufSlot,
+ int cbufOffset,
+ bool negate,
+ bool absolute)
+ {
+ Operand[] operands = GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), swizzle);
+
+ return FPAbsNeg(context, operands, absolute, negate);
+ }
+
+ public static Operand[] GetHalfSrc(EmitterContext context, int immH0, int immH1)
+ {
+ ushort low = (ushort)(immH0 << 6);
+ ushort high = (ushort)(immH1 << 6);
+
+ return new Operand[]
+ {
+ ConstF((float)Unsafe.As<ushort, Half>(ref low)),
+ ConstF((float)Unsafe.As<ushort, Half>(ref high))
+ };
+ }
+
+ public static Operand[] GetHalfSrc(EmitterContext context, int imm32)
+ {
+ ushort low = (ushort)imm32;
+ ushort high = (ushort)(imm32 >> 16);
+
+ return new Operand[]
+ {
+ ConstF((float)Unsafe.As<ushort, Half>(ref low)),
+ ConstF((float)Unsafe.As<ushort, Half>(ref high))
+ };
+ }
+
+ public static Operand[] FPAbsNeg(EmitterContext context, Operand[] operands, bool abs, bool neg)
+ {
+ for (int index = 0; index < operands.Length; index++)
+ {
+ operands[index] = context.FPAbsNeg(operands[index], abs, neg);
+ }
+
+ return operands;
+ }
+
+ public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, HalfSwizzle swizzle)
+ {
+ switch (swizzle)
+ {
+ case HalfSwizzle.F16:
+ return new Operand[]
+ {
+ context.UnpackHalf2x16Low (src),
+ context.UnpackHalf2x16High(src)
+ };
+
+ case HalfSwizzle.F32: return new Operand[] { src, src };
+
+ case HalfSwizzle.H0H0:
+ return new Operand[]
+ {
+ context.UnpackHalf2x16Low(src),
+ context.UnpackHalf2x16Low(src)
+ };
+
+ case HalfSwizzle.H1H1:
+ return new Operand[]
+ {
+ context.UnpackHalf2x16High(src),
+ context.UnpackHalf2x16High(src)
+ };
+ }
+
+ throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
+ }
+
+ public static Operand GetHalfPacked(EmitterContext context, OFmt swizzle, Operand[] results, int rd)
+ {
+ switch (swizzle)
+ {
+ case OFmt.F16: return context.PackHalf2x16(results[0], results[1]);
+
+ case OFmt.F32: return results[0];
+
+ case OFmt.MrgH0:
+ {
+ Operand h1 = GetHalfDest(context, rd, isHigh: true);
+
+ return context.PackHalf2x16(results[0], h1);
+ }
+
+ case OFmt.MrgH1:
+ {
+ Operand h0 = GetHalfDest(context, rd, isHigh: false);
+
+ return context.PackHalf2x16(h0, results[1]);
+ }
+ }
+
+ throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
+ }
+
+ public static Operand GetHalfDest(EmitterContext context, int rd, bool isHigh)
+ {
+ if (isHigh)
+ {
+ return context.UnpackHalf2x16High(GetDest(rd));
+ }
+ else
+ {
+ return context.UnpackHalf2x16Low(GetDest(rd));
+ }
+ }
+
+ public static Operand GetPredicate(EmitterContext context, int pred, bool not)
+ {
+ Operand local = Register(pred, RegisterType.Predicate);
+
+ if (not)
+ {
+ local = context.BitwiseNot(local);
+ }
+
+ return local;
+ }
+
+ public static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64)
+ {
+ if (isFP64)
+ {
+ context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value));
+ context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value));
+ }
+ else
+ {
+ context.Copy(GetDest(rd), value);
+ }
+ }
+
+ public static int Imm16ToSInt(int imm16)
+ {
+ return (short)imm16;
+ }
+
+ public static int Imm20ToFloat(int imm20)
+ {
+ return imm20 << 12;
+ }
+
+ public static int Imm20ToSInt(int imm20)
+ {
+ return (imm20 << 12) >> 12;
+ }
+
+ public static int Imm24ToSInt(int imm24)
+ {
+ return (imm24 << 8) >> 8;
+ }
+
+ public static Operand SignExtendTo32(EmitterContext context, Operand src, int srcBits)
+ {
+ return context.BitfieldExtractS32(src, Const(0), Const(srcBits));
+ }
+
+ public static Operand ZeroExtendTo32(EmitterContext context, Operand src, int srcBits)
+ {
+ int mask = (int)(uint.MaxValue >> (32 - srcBits));
+
+ return context.BitwiseAnd(src, Const(mask));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs
new file mode 100644
index 00000000..374e3d61
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs
@@ -0,0 +1,699 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void IaddR(EmitterContext context)
+ {
+ InstIaddR op = context.GetOp<InstIaddR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
+ }
+
+ public static void IaddI(EmitterContext context)
+ {
+ InstIaddI op = context.GetOp<InstIaddI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
+ }
+
+ public static void IaddC(EmitterContext context)
+ {
+ InstIaddC op = context.GetOp<InstIaddC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
+ }
+
+ public static void Iadd32i(EmitterContext context)
+ {
+ InstIadd32i op = context.GetOp<InstIadd32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
+ }
+
+ public static void Iadd3R(EmitterContext context)
+ {
+ InstIadd3R op = context.GetOp<InstIadd3R>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIadd3(context, op.Lrs, srcA, srcB, srcC, op.Apart, op.Bpart, op.Cpart, op.Dest, op.NegA, op.NegB, op.NegC);
+ }
+
+ public static void Iadd3I(EmitterContext context)
+ {
+ InstIadd3I op = context.GetOp<InstIadd3I>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
+ }
+
+ public static void Iadd3C(EmitterContext context)
+ {
+ InstIadd3C op = context.GetOp<InstIadd3C>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
+ }
+
+ public static void ImadR(EmitterContext context)
+ {
+ InstImadR op = context.GetOp<InstImadR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImadI(EmitterContext context)
+ {
+ InstImadI op = context.GetOp<InstImadI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImadC(EmitterContext context)
+ {
+ InstImadC op = context.GetOp<InstImadC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImadRc(EmitterContext context)
+ {
+ InstImadRc op = context.GetOp<InstImadRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void Imad32i(EmitterContext context)
+ {
+ InstImad32i op = context.GetOp<InstImad32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+ var srcC = GetSrcReg(context, op.Dest);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImulR(EmitterContext context)
+ {
+ InstImulR op = context.GetOp<InstImulR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImulI(EmitterContext context)
+ {
+ InstImulI op = context.GetOp<InstImulI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImulC(EmitterContext context)
+ {
+ InstImulC op = context.GetOp<InstImulC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void Imul32i(EmitterContext context)
+ {
+ InstImul32i op = context.GetOp<InstImul32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void IscaddR(EmitterContext context)
+ {
+ InstIscaddR op = context.GetOp<InstIscaddR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
+ }
+
+ public static void IscaddI(EmitterContext context)
+ {
+ InstIscaddI op = context.GetOp<InstIscaddI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
+ }
+
+ public static void IscaddC(EmitterContext context)
+ {
+ InstIscaddC op = context.GetOp<InstIscaddC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
+ }
+
+ public static void Iscadd32i(EmitterContext context)
+ {
+ InstIscadd32i op = context.GetOp<InstIscadd32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, AvgMode.NoNeg, op.WriteCC);
+ }
+
+ public static void LeaR(EmitterContext context)
+ {
+ InstLeaR op = context.GetOp<InstLeaR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void LeaI(EmitterContext context)
+ {
+ InstLeaI op = context.GetOp<InstLeaI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void LeaC(EmitterContext context)
+ {
+ InstLeaC op = context.GetOp<InstLeaC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void LeaHiR(EmitterContext context)
+ {
+ InstLeaHiR op = context.GetOp<InstLeaHiR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void LeaHiC(EmitterContext context)
+ {
+ InstLeaHiC op = context.GetOp<InstLeaHiC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void XmadR(EmitterContext context)
+ {
+ InstXmadR op = context.GetOp<InstXmadR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
+ }
+
+ public static void XmadI(EmitterContext context)
+ {
+ InstXmadI op = context.GetOp<InstXmadI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm16);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, false, op.Psl, op.Mrg, op.X, op.WriteCC);
+ }
+
+ public static void XmadC(EmitterContext context)
+ {
+ InstXmadC op = context.GetOp<InstXmadC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
+ }
+
+ public static void XmadRc(EmitterContext context)
+ {
+ InstXmadRc op = context.GetOp<InstXmadRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, false, false, op.X, op.WriteCC);
+ }
+
+ private static void EmitIadd(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ AvgMode avgMode,
+ bool extended,
+ bool writeCC)
+ {
+ srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
+ srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
+
+ Operand res = context.IAdd(srcA, srcB);
+
+ if (extended)
+ {
+ res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
+ }
+
+ SetIaddFlags(context, res, srcA, srcB, writeCC, extended);
+
+ // TODO: SAT.
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitIadd3(
+ EmitterContext context,
+ Lrs mode,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ HalfSelect partA,
+ HalfSelect partB,
+ HalfSelect partC,
+ int rd,
+ bool negateA,
+ bool negateB,
+ bool negateC)
+ {
+ Operand Extend(Operand src, HalfSelect part)
+ {
+ if (part == HalfSelect.B32)
+ {
+ return src;
+ }
+
+ if (part == HalfSelect.H0)
+ {
+ return context.BitwiseAnd(src, Const(0xffff));
+ }
+ else if (part == HalfSelect.H1)
+ {
+ return context.ShiftRightU32(src, Const(16));
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Iadd3 has invalid component selection {part}.");
+ }
+
+ return src;
+ }
+
+ srcA = context.INegate(Extend(srcA, partA), negateA);
+ srcB = context.INegate(Extend(srcB, partB), negateB);
+ srcC = context.INegate(Extend(srcC, partC), negateC);
+
+ Operand res = context.IAdd(srcA, srcB);
+
+ if (mode != Lrs.None)
+ {
+ if (mode == Lrs.LeftShift)
+ {
+ res = context.ShiftLeft(res, Const(16));
+ }
+ else if (mode == Lrs.RightShift)
+ {
+ res = context.ShiftRightU32(res, Const(16));
+ }
+ else
+ {
+ // TODO: Warning.
+ }
+ }
+
+ res = context.IAdd(res, srcC);
+
+ context.Copy(GetDest(rd), res);
+
+ // TODO: CC, X, corner cases.
+ }
+
+ private static void EmitImad(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ AvgMode avgMode,
+ bool signedA,
+ bool signedB,
+ bool high)
+ {
+ srcB = context.INegate(srcB, avgMode == AvgMode.NegA);
+ srcC = context.INegate(srcC, avgMode == AvgMode.NegB);
+
+ Operand res;
+
+ if (high)
+ {
+ if (signedA && signedB)
+ {
+ res = context.MultiplyHighS32(srcA, srcB);
+ }
+ else
+ {
+ res = context.MultiplyHighU32(srcA, srcB);
+
+ if (signedA)
+ {
+ res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))));
+ }
+ else if (signedB)
+ {
+ res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))));
+ }
+ }
+ }
+ else
+ {
+ res = context.IMultiply(srcA, srcB);
+ }
+
+ if (srcC.Type != OperandType.Constant || srcC.Value != 0)
+ {
+ res = context.IAdd(res, srcC);
+ }
+
+ // TODO: CC, X, SAT, and more?
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitIscadd(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ int shift,
+ AvgMode avgMode,
+ bool writeCC)
+ {
+ srcA = context.ShiftLeft(srcA, Const(shift));
+
+ srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
+ srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
+
+ Operand res = context.IAdd(srcA, srcB);
+
+ SetIaddFlags(context, res, srcA, srcB, writeCC, false);
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ public static void EmitLea(EmitterContext context, Operand srcA, Operand srcB, int rd, bool negateA, int shift)
+ {
+ srcA = context.ShiftLeft(srcA, Const(shift));
+ srcA = context.INegate(srcA, negateA);
+
+ Operand res = context.IAdd(srcA, srcB);
+
+ context.Copy(GetDest(rd), res);
+
+ // TODO: CC, X.
+ }
+
+ private static void EmitLeaHi(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool negateA,
+ int shift)
+ {
+ Operand aLow = context.ShiftLeft(srcA, Const(shift));
+ Operand aHigh = shift == 0 ? Const(0) : context.ShiftRightU32(srcA, Const(32 - shift));
+ aHigh = context.BitwiseOr(aHigh, context.ShiftLeft(srcC, Const(shift)));
+
+ if (negateA)
+ {
+ // Perform 64-bit negation by doing bitwise not of the value,
+ // then adding 1 and carrying over from low to high.
+ aLow = context.BitwiseNot(aLow);
+ aHigh = context.BitwiseNot(aHigh);
+
+ aLow = AddWithCarry(context, aLow, Const(1), out Operand aLowCOut);
+ aHigh = context.IAdd(aHigh, aLowCOut);
+ }
+
+ Operand res = context.IAdd(aHigh, srcB);
+
+ context.Copy(GetDest(rd), res);
+
+ // TODO: CC, X.
+ }
+
+ public static void EmitXmad(
+ EmitterContext context,
+ XmadCop2 mode,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool signedA,
+ bool signedB,
+ bool highA,
+ bool highB,
+ bool productShiftLeft,
+ bool merge,
+ bool extended,
+ bool writeCC)
+ {
+ XmadCop modeConv;
+ switch (mode)
+ {
+ case XmadCop2.Cfull:
+ modeConv = XmadCop.Cfull;
+ break;
+ case XmadCop2.Clo:
+ modeConv = XmadCop.Clo;
+ break;
+ case XmadCop2.Chi:
+ modeConv = XmadCop.Chi;
+ break;
+ case XmadCop2.Csfu:
+ modeConv = XmadCop.Csfu;
+ break;
+ default:
+ context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
+ return;
+ }
+
+ EmitXmad(context, modeConv, srcA, srcB, srcC, rd, signedA, signedB, highA, highB, productShiftLeft, merge, extended, writeCC);
+ }
+
+ public static void EmitXmad(
+ EmitterContext context,
+ XmadCop mode,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool signedA,
+ bool signedB,
+ bool highA,
+ bool highB,
+ bool productShiftLeft,
+ bool merge,
+ bool extended,
+ bool writeCC)
+ {
+ var srcBUnmodified = srcB;
+
+ Operand Extend16To32(Operand src, bool high, bool signed)
+ {
+ if (signed && high)
+ {
+ return context.ShiftRightS32(src, Const(16));
+ }
+ else if (signed)
+ {
+ return context.BitfieldExtractS32(src, Const(0), Const(16));
+ }
+ else if (high)
+ {
+ return context.ShiftRightU32(src, Const(16));
+ }
+ else
+ {
+ return context.BitwiseAnd(src, Const(0xffff));
+ }
+ }
+
+ srcA = Extend16To32(srcA, highA, signedA);
+ srcB = Extend16To32(srcB, highB, signedB);
+
+ Operand res = context.IMultiply(srcA, srcB);
+
+ if (productShiftLeft)
+ {
+ res = context.ShiftLeft(res, Const(16));
+ }
+
+ switch (mode)
+ {
+ case XmadCop.Cfull:
+ break;
+
+ case XmadCop.Clo:
+ srcC = Extend16To32(srcC, high: false, signed: false);
+ break;
+ case XmadCop.Chi:
+ srcC = Extend16To32(srcC, high: true, signed: false);
+ break;
+
+ case XmadCop.Cbcc:
+ srcC = context.IAdd(srcC, context.ShiftLeft(srcBUnmodified, Const(16)));
+ break;
+
+ case XmadCop.Csfu:
+ Operand signAdjustA = context.ShiftLeft(context.ShiftRightU32(srcA, Const(31)), Const(16));
+ Operand signAdjustB = context.ShiftLeft(context.ShiftRightU32(srcB, Const(31)), Const(16));
+
+ srcC = context.ISubtract(srcC, context.IAdd(signAdjustA, signAdjustB));
+ break;
+
+ default:
+ context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
+ return;
+ }
+
+ Operand product = res;
+
+ if (extended)
+ {
+ // Add with carry.
+ res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
+ }
+ else
+ {
+ // Add (no carry in).
+ res = context.IAdd(res, srcC);
+ }
+
+ SetIaddFlags(context, res, product, srcC, writeCC, extended);
+
+ if (merge)
+ {
+ res = context.BitwiseAnd(res, Const(0xffff));
+ res = context.BitwiseOr(res, context.ShiftLeft(srcBUnmodified, Const(16)));
+ }
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void SetIaddFlags(EmitterContext context, Operand res, Operand srcA, Operand srcB, bool setCC, bool extended)
+ {
+ if (!setCC)
+ {
+ return;
+ }
+
+ if (extended)
+ {
+ // C = (d == a && CIn) || d < a
+ Operand tempC0 = context.ICompareEqual(res, srcA);
+ Operand tempC1 = context.ICompareLessUnsigned(res, srcA);
+
+ tempC0 = context.BitwiseAnd(tempC0, GetCF());
+
+ context.Copy(GetCF(), context.BitwiseOr(tempC0, tempC1));
+ }
+ else
+ {
+ // C = d < a
+ context.Copy(GetCF(), context.ICompareLessUnsigned(res, srcA));
+ }
+
+ // V = (d ^ a) & ~(a ^ b) < 0
+ Operand tempV0 = context.BitwiseExclusiveOr(res, srcA);
+ Operand tempV1 = context.BitwiseExclusiveOr(srcA, srcB);
+
+ tempV1 = context.BitwiseNot(tempV1);
+
+ Operand tempV = context.BitwiseAnd(tempV0, tempV1);
+
+ context.Copy(GetVF(), context.ICompareLess(tempV, Const(0)));
+
+ SetZnFlags(context, res, setCC: true, extended: extended);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs
new file mode 100644
index 00000000..dcdb189f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs
@@ -0,0 +1,310 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void IcmpR(EmitterContext context)
+ {
+ InstIcmpR op = context.GetOp<InstIcmpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
+ }
+
+ public static void IcmpI(EmitterContext context)
+ {
+ InstIcmpI op = context.GetOp<InstIcmpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
+ }
+
+ public static void IcmpC(EmitterContext context)
+ {
+ InstIcmpC op = context.GetOp<InstIcmpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
+ }
+
+ public static void IcmpRc(EmitterContext context)
+ {
+ InstIcmpRc op = context.GetOp<InstIcmpRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
+ }
+
+ public static void IsetR(EmitterContext context)
+ {
+ InstIsetR op = context.GetOp<InstIsetR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
+ }
+
+ public static void IsetI(EmitterContext context)
+ {
+ InstIsetI op = context.GetOp<InstIsetI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
+ }
+
+ public static void IsetC(EmitterContext context)
+ {
+ InstIsetC op = context.GetOp<InstIsetC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
+ }
+
+ public static void IsetpR(EmitterContext context)
+ {
+ InstIsetpR op = context.GetOp<InstIsetpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
+ }
+
+ public static void IsetpI(EmitterContext context)
+ {
+ InstIsetpI op = context.GetOp<InstIsetpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
+ }
+
+ public static void IsetpC(EmitterContext context)
+ {
+ InstIsetpC op = context.GetOp<InstIsetpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
+ }
+
+ private static void EmitIcmp(
+ EmitterContext context,
+ IComp cmpOp,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool isSigned)
+ {
+ Operand cmpRes = GetIntComparison(context, cmpOp, srcC, Const(0), isSigned);
+
+ Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitIset(
+ EmitterContext context,
+ IComp cmpOp,
+ BoolOp logicOp,
+ Operand srcA,
+ Operand srcB,
+ int srcPred,
+ bool srcPredInv,
+ int rd,
+ bool boolFloat,
+ bool isSigned,
+ bool extended,
+ bool writeCC)
+ {
+ Operand res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ res = GetPredLogicalOp(context, logicOp, res, pred);
+
+ Operand dest = GetDest(rd);
+
+ if (boolFloat)
+ {
+ res = context.ConditionalSelect(res, ConstF(1), Const(0));
+
+ context.Copy(dest, res);
+
+ SetFPZnFlags(context, res, writeCC);
+ }
+ else
+ {
+ context.Copy(dest, res);
+
+ SetZnFlags(context, res, writeCC, extended);
+ }
+ }
+
+ private static void EmitIsetp(
+ EmitterContext context,
+ IComp cmpOp,
+ BoolOp logicOp,
+ Operand srcA,
+ Operand srcB,
+ int srcPred,
+ bool srcPredInv,
+ int destPred,
+ int destPredInv,
+ bool isSigned,
+ bool extended)
+ {
+ Operand p0Res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
+ Operand p1Res = context.BitwiseNot(p0Res);
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
+ p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
+
+ context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
+ }
+
+ private static Operand GetIntComparison(
+ EmitterContext context,
+ IComp cond,
+ Operand srcA,
+ Operand srcB,
+ bool isSigned,
+ bool extended)
+ {
+ return extended
+ ? GetIntComparisonExtended(context, cond, srcA, srcB, isSigned)
+ : GetIntComparison(context, cond, srcA, srcB, isSigned);
+ }
+
+ private static Operand GetIntComparisonExtended(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
+ {
+ Operand res;
+
+ if (cond == IComp.T)
+ {
+ res = Const(IrConsts.True);
+ }
+ else if (cond == IComp.F)
+ {
+ res = Const(IrConsts.False);
+ }
+ else
+ {
+ res = context.ISubtract(srcA, srcB);
+ res = context.IAdd(res, context.BitwiseNot(GetCF()));
+
+ switch (cond)
+ {
+ case IComp.Eq: // r = xh == yh && xl == yl
+ res = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetZF());
+ break;
+ case IComp.Lt: // r = xh < yh || (xh == yh && xl < yl)
+ Operand notC = context.BitwiseNot(GetCF());
+ Operand prevLt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notC);
+ res = isSigned
+ ? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLt)
+ : context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLt);
+ break;
+ case IComp.Le: // r = xh < yh || (xh == yh && xl <= yl)
+ Operand zOrNotC = context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF()));
+ Operand prevLe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), zOrNotC);
+ res = isSigned
+ ? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLe)
+ : context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLe);
+ break;
+ case IComp.Gt: // r = xh > yh || (xh == yh && xl > yl)
+ Operand notZAndC = context.BitwiseAnd(context.BitwiseNot(GetZF()), GetCF());
+ Operand prevGt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notZAndC);
+ res = isSigned
+ ? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGt)
+ : context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGt);
+ break;
+ case IComp.Ge: // r = xh > yh || (xh == yh && xl >= yl)
+ Operand prevGe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetCF());
+ res = isSigned
+ ? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGe)
+ : context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGe);
+ break;
+ case IComp.Ne: // r = xh != yh || xl != yl
+ res = context.BitwiseOr(context.ICompareNotEqual(srcA, srcB), context.BitwiseNot(GetZF()));
+ break;
+ default:
+ throw new ArgumentException($"Unexpected condition \"{cond}\".");
+ }
+ }
+
+ return res;
+ }
+
+ private static Operand GetIntComparison(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
+ {
+ Operand res;
+
+ if (cond == IComp.T)
+ {
+ res = Const(IrConsts.True);
+ }
+ else if (cond == IComp.F)
+ {
+ res = Const(IrConsts.False);
+ }
+ else
+ {
+ var inst = cond switch
+ {
+ IComp.Lt => Instruction.CompareLessU32,
+ IComp.Eq => Instruction.CompareEqual,
+ IComp.Le => Instruction.CompareLessOrEqualU32,
+ IComp.Gt => Instruction.CompareGreaterU32,
+ IComp.Ne => Instruction.CompareNotEqual,
+ IComp.Ge => Instruction.CompareGreaterOrEqualU32,
+ _ => throw new InvalidOperationException($"Unexpected condition \"{cond}\".")
+ };
+
+ if (isSigned)
+ {
+ switch (cond)
+ {
+ case IComp.Lt: inst = Instruction.CompareLess; break;
+ case IComp.Le: inst = Instruction.CompareLessOrEqual; break;
+ case IComp.Gt: inst = Instruction.CompareGreater; break;
+ case IComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
+ }
+ }
+
+ res = context.Add(inst, Local(), srcA, srcB);
+ }
+
+ return res;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs
new file mode 100644
index 00000000..1f3f66ae
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs
@@ -0,0 +1,167 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int PT = RegisterConsts.PredicateTrueIndex;
+
+ public static void LopR(EmitterContext context)
+ {
+ InstLopR op = context.GetOp<InstLopR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitLop(context, op.Lop, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
+ }
+
+ public static void LopI(EmitterContext context)
+ {
+ InstLopI op = context.GetOp<InstLopI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
+ }
+
+ public static void LopC(EmitterContext context)
+ {
+ InstLopC op = context.GetOp<InstLopC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
+ }
+
+ public static void Lop32i(EmitterContext context)
+ {
+ InstLop32i op = context.GetOp<InstLop32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitLop(context, op.LogicOp, PredicateOp.F, srcA, srcB, op.Dest, PT, op.NegA, op.NegB, op.X, op.WriteCC);
+ }
+
+ public static void Lop3R(EmitterContext context)
+ {
+ InstLop3R op = context.GetOp<InstLop3R>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLop3(context, op.Imm, op.PredicateOp, srcA, srcB, srcC, op.Dest, op.DestPred, op.X, op.WriteCC);
+ }
+
+ public static void Lop3I(EmitterContext context)
+ {
+ InstLop3I op = context.GetOp<InstLop3I>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
+ }
+
+ public static void Lop3C(EmitterContext context)
+ {
+ InstLop3C op = context.GetOp<InstLop3C>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
+ }
+
+ private static void EmitLop(
+ EmitterContext context,
+ LogicOp logicOp,
+ PredicateOp predOp,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ int destPred,
+ bool invertA,
+ bool invertB,
+ bool extended,
+ bool writeCC)
+ {
+ srcA = context.BitwiseNot(srcA, invertA);
+ srcB = context.BitwiseNot(srcB, invertB);
+
+ Operand res = logicOp switch
+ {
+ LogicOp.And => res = context.BitwiseAnd(srcA, srcB),
+ LogicOp.Or => res = context.BitwiseOr(srcA, srcB),
+ LogicOp.Xor => res = context.BitwiseExclusiveOr(srcA, srcB),
+ _ => srcB
+ };
+
+ EmitLopPredWrite(context, res, predOp, destPred);
+
+ context.Copy(GetDest(rd), res);
+
+ SetZnFlags(context, res, writeCC, extended);
+ }
+
+ private static void EmitLop3(
+ EmitterContext context,
+ int truthTable,
+ PredicateOp predOp,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ int destPred,
+ bool extended,
+ bool writeCC)
+ {
+ Operand res = Lop3Expression.GetFromTruthTable(context, srcA, srcB, srcC, truthTable);
+
+ EmitLopPredWrite(context, res, predOp, destPred);
+
+ context.Copy(GetDest(rd), res);
+
+ SetZnFlags(context, res, writeCC, extended);
+ }
+
+ private static void EmitLopPredWrite(EmitterContext context, Operand result, PredicateOp predOp, int pred)
+ {
+ if (pred != RegisterConsts.PredicateTrueIndex)
+ {
+ Operand pRes;
+
+ if (predOp == PredicateOp.F)
+ {
+ pRes = Const(IrConsts.False);
+ }
+ else if (predOp == PredicateOp.T)
+ {
+ pRes = Const(IrConsts.True);
+ }
+ else if (predOp == PredicateOp.Z)
+ {
+ pRes = context.ICompareEqual(result, Const(0));
+ }
+ else /* if (predOp == Pop.Nz) */
+ {
+ pRes = context.ICompareNotEqual(result, Const(0));
+ }
+
+ context.Copy(Register(pred, RegisterType.Predicate), pRes);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs
new file mode 100644
index 00000000..73930ed1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs
@@ -0,0 +1,71 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void ImnmxR(EmitterContext context)
+ {
+ InstImnmxR op = context.GetOp<InstImnmxR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
+ }
+
+ public static void ImnmxI(EmitterContext context)
+ {
+ InstImnmxI op = context.GetOp<InstImnmxI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
+ }
+
+ public static void ImnmxC(EmitterContext context)
+ {
+ InstImnmxC op = context.GetOp<InstImnmxC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
+ }
+
+ private static void EmitImnmx(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ Operand srcPred,
+ int rd,
+ bool isSignedInt,
+ bool writeCC)
+ {
+ Operand resMin = isSignedInt
+ ? context.IMinimumS32(srcA, srcB)
+ : context.IMinimumU32(srcA, srcB);
+
+ Operand resMax = isSignedInt
+ ? context.IMaximumS32(srcA, srcB)
+ : context.IMaximumU32(srcA, srcB);
+
+ Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
+
+ context.Copy(GetDest(rd), res);
+
+ SetZnFlags(context, res, writeCC);
+
+ // TODO: X flags.
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
new file mode 100644
index 00000000..c73c6b2a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
@@ -0,0 +1,541 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ private enum MemoryRegion
+ {
+ Local,
+ Shared
+ }
+
+ public static void Atom(EmitterContext context)
+ {
+ InstAtom op = context.GetOp<InstAtom>();
+
+ int sOffset = (op.Imm20 << 12) >> 12;
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
+
+ Operand value = GetSrcReg(context, op.SrcB);
+
+ Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value);
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Atoms(EmitterContext context)
+ {
+ InstAtoms op = context.GetOp<InstAtoms>();
+
+ Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
+
+ int sOffset = (op.Imm22 << 10) >> 10;
+
+ offset = context.IAdd(offset, Const(sOffset));
+
+ Operand value = GetSrcReg(context, op.SrcB);
+
+ AtomSize size = op.AtomsSize switch
+ {
+ AtomsSize.S32 => AtomSize.S32,
+ AtomsSize.U64 => AtomSize.U64,
+ AtomsSize.S64 => AtomSize.S64,
+ _ => AtomSize.U32
+ };
+
+ Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value);
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Ldc(EmitterContext context)
+ {
+ InstLdc op = context.GetOp<InstLdc>();
+
+ if (op.LsSize > LsSize2.B64)
+ {
+ context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
+ return;
+ }
+
+ bool isSmallInt = op.LsSize < LsSize2.B32;
+
+ int count = op.LsSize == LsSize2.B64 ? 2 : 1;
+
+ Operand slot = Const(op.CbufSlot);
+ Operand srcA = GetSrcReg(context, op.SrcA);
+
+ if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
+ {
+ slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
+ srcA = context.BitwiseAnd(srcA, Const(0xffff));
+ }
+
+ Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
+ Operand wordOffset = context.ShiftRightU32(addr, Const(2));
+ Operand bitOffset = GetBitOffset(context, addr);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(op.Dest + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand offset = context.IAdd(wordOffset, Const(index));
+ Operand value = context.LoadConstant(slot, offset);
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ public static void Ldg(EmitterContext context)
+ {
+ InstLdg op = context.GetOp<InstLdg>();
+
+ EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
+ }
+
+ public static void Ldl(EmitterContext context)
+ {
+ InstLdl op = context.GetOp<InstLdl>();
+
+ EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Lds(EmitterContext context)
+ {
+ InstLds op = context.GetOp<InstLds>();
+
+ EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Red(EmitterContext context)
+ {
+ InstRed op = context.GetOp<InstRed>();
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
+
+ EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
+ }
+
+ public static void Stg(EmitterContext context)
+ {
+ InstStg op = context.GetOp<InstStg>();
+
+ EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
+ }
+
+ public static void Stl(EmitterContext context)
+ {
+ InstStl op = context.GetOp<InstStl>();
+
+ EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Sts(EmitterContext context)
+ {
+ InstSts op = context.GetOp<InstSts>();
+
+ EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ private static Operand EmitAtomicOp(
+ EmitterContext context,
+ StorageKind storageKind,
+ AtomOp op,
+ AtomSize type,
+ Operand addrLow,
+ Operand addrHigh,
+ Operand value)
+ {
+ Operand res = Const(0);
+
+ switch (op)
+ {
+ case AtomOp.Add:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicAdd(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.And:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicAnd(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Xor:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicXor(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Or:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicOr(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Max:
+ if (type == AtomSize.S32)
+ {
+ res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value);
+ }
+ else if (type == AtomSize.U32)
+ {
+ res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Min:
+ if (type == AtomSize.S32)
+ {
+ res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value);
+ }
+ else if (type == AtomSize.U32)
+ {
+ res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ }
+
+ return res;
+ }
+
+ private static void EmitLoad(
+ EmitterContext context,
+ MemoryRegion region,
+ LsSize2 size,
+ Operand srcA,
+ int rd,
+ int offset)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
+ return;
+ }
+
+ bool isSmallInt = size < LsSize2.B32;
+
+ int count = 1;
+
+ switch (size)
+ {
+ case LsSize2.B64: count = 2; break;
+ case LsSize2.B128: count = 4; break;
+ }
+
+ Operand baseOffset = context.IAdd(srcA, Const(offset));
+ Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
+ Operand bitOffset = GetBitOffset(context, baseOffset);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(rd + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand elemOffset = context.IAdd(wordOffset, Const(index));
+ Operand value = null;
+
+ switch (region)
+ {
+ case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
+ case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
+ }
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ private static void EmitLdg(
+ EmitterContext context,
+ LsSize size,
+ int ra,
+ int rd,
+ int offset,
+ bool extended)
+ {
+ bool isSmallInt = size < LsSize.B32;
+
+ int count = GetVectorCount(size);
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
+
+ Operand bitOffset = GetBitOffset(context, addrLow);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(rd + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, size, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ private static void EmitStore(
+ EmitterContext context,
+ MemoryRegion region,
+ LsSize2 size,
+ Operand srcA,
+ int rd,
+ int offset)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
+ return;
+ }
+
+ bool isSmallInt = size < LsSize2.B32;
+
+ int count = 1;
+
+ switch (size)
+ {
+ case LsSize2.B64: count = 2; break;
+ case LsSize2.B128: count = 4; break;
+ }
+
+ Operand baseOffset = context.IAdd(srcA, Const(offset));
+ Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
+ Operand bitOffset = GetBitOffset(context, baseOffset);
+
+ for (int index = 0; index < count; index++)
+ {
+ bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
+
+ Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
+ Operand elemOffset = context.IAdd(wordOffset, Const(index));
+
+ if (isSmallInt && region == MemoryRegion.Local)
+ {
+ Operand word = context.LoadLocal(elemOffset);
+
+ value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
+ }
+
+ if (region == MemoryRegion.Local)
+ {
+ context.StoreLocal(elemOffset, value);
+ }
+ else if (region == MemoryRegion.Shared)
+ {
+ switch (size)
+ {
+ case LsSize2.U8:
+ case LsSize2.S8:
+ context.StoreShared8(baseOffset, value);
+ break;
+ case LsSize2.U16:
+ case LsSize2.S16:
+ context.StoreShared16(baseOffset, value);
+ break;
+ default:
+ context.StoreShared(elemOffset, value);
+ break;
+ }
+ }
+ }
+ }
+
+ private static void EmitStg(
+ EmitterContext context,
+ LsSize2 size,
+ int ra,
+ int rd,
+ int offset,
+ bool extended)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
+ return;
+ }
+
+ int count = GetVectorCount((LsSize)size);
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
+
+ Operand bitOffset = GetBitOffset(context, addrLow);
+
+ for (int index = 0; index < count; index++)
+ {
+ bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
+
+ Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
+
+ Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
+
+ if (size == LsSize2.U8 || size == LsSize2.S8)
+ {
+ context.StoreGlobal8(addrLowOffset, addrHigh, value);
+ }
+ else if (size == LsSize2.U16 || size == LsSize2.S16)
+ {
+ context.StoreGlobal16(addrLowOffset, addrHigh, value);
+ }
+ else
+ {
+ context.StoreGlobal(addrLowOffset, addrHigh, value);
+ }
+ }
+ }
+
+ private static int GetVectorCount(LsSize size)
+ {
+ switch (size)
+ {
+ case LsSize.B64:
+ return 2;
+ case LsSize.B128:
+ case LsSize.UB128:
+ return 4;
+ }
+
+ return 1;
+ }
+
+ private static (Operand, Operand) Get40BitsAddress(
+ EmitterContext context,
+ Register ra,
+ bool extended,
+ int offset)
+ {
+ Operand addrLow = Register(ra);
+ Operand addrHigh;
+
+ if (extended && !ra.IsRZ)
+ {
+ addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
+ }
+ else
+ {
+ addrHigh = Const(0);
+ }
+
+ Operand offs = Const(offset);
+
+ addrLow = context.IAdd(addrLow, offs);
+
+ if (extended)
+ {
+ Operand carry = context.ICompareLessUnsigned(addrLow, offs);
+
+ addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
+ }
+
+ return (addrLow, addrHigh);
+ }
+
+ private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
+ {
+ // Note: bit offset = (baseOffset & 0b11) * 8.
+ // Addresses should be always aligned to the integer type,
+ // so we don't need to take unaligned addresses into account.
+ return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
+ }
+
+ private static Operand ExtractSmallInt(
+ EmitterContext context,
+ LsSize size,
+ Operand bitOffset,
+ Operand value)
+ {
+ value = context.ShiftRightU32(value, bitOffset);
+
+ switch (size)
+ {
+ case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
+ case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
+ case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
+ case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
+ }
+
+ return value;
+ }
+
+ private static Operand InsertSmallInt(
+ EmitterContext context,
+ LsSize size,
+ Operand bitOffset,
+ Operand word,
+ Operand value)
+ {
+ switch (size)
+ {
+ case LsSize.U8:
+ case LsSize.S8:
+ value = context.BitwiseAnd(value, Const(0xff));
+ value = context.BitfieldInsert(word, value, bitOffset, Const(8));
+ break;
+
+ case LsSize.U16:
+ case LsSize.S16:
+ value = context.BitwiseAnd(value, Const(0xffff));
+ value = context.BitfieldInsert(word, value, bitOffset, Const(16));
+ break;
+ }
+
+ return value;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs
new file mode 100644
index 00000000..9992ac37
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs
@@ -0,0 +1,237 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void MovR(EmitterContext context)
+ {
+ InstMovR op = context.GetOp<InstMovR>();
+
+ context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
+ }
+
+ public static void MovI(EmitterContext context)
+ {
+ InstMovI op = context.GetOp<InstMovI>();
+
+ context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm20));
+ }
+
+ public static void MovC(EmitterContext context)
+ {
+ InstMovC op = context.GetOp<InstMovC>();
+
+ context.Copy(GetDest(op.Dest), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset));
+ }
+
+ public static void Mov32i(EmitterContext context)
+ {
+ InstMov32i op = context.GetOp<InstMov32i>();
+
+ context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm32));
+ }
+
+ public static void R2pR(EmitterContext context)
+ {
+ InstR2pR op = context.GetOp<InstR2pR>();
+
+ Operand value = GetSrcReg(context, op.SrcA);
+ Operand mask = GetSrcReg(context, op.SrcB);
+
+ EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
+ }
+
+ public static void R2pI(EmitterContext context)
+ {
+ InstR2pI op = context.GetOp<InstR2pI>();
+
+ Operand value = GetSrcReg(context, op.SrcA);
+ Operand mask = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
+ }
+
+ public static void R2pC(EmitterContext context)
+ {
+ InstR2pC op = context.GetOp<InstR2pC>();
+
+ Operand value = GetSrcReg(context, op.SrcA);
+ Operand mask = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
+ }
+
+ public static void S2r(EmitterContext context)
+ {
+ InstS2r op = context.GetOp<InstS2r>();
+
+ Operand src;
+
+ switch (op.SReg)
+ {
+ case SReg.LaneId:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
+ break;
+
+ case SReg.InvocationId:
+ src = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ break;
+
+ case SReg.YDirection:
+ src = ConstF(1); // TODO: Use value from Y direction GPU register.
+ break;
+
+ case SReg.ThreadKill:
+ src = context.Config.Stage == ShaderStage.Fragment ? context.Load(StorageKind.Input, IoVariable.ThreadKill) : Const(0);
+ break;
+
+ case SReg.InvocationInfo:
+ if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment)
+ {
+ // Note: Lowest 8-bits seems to contain some primitive index,
+ // but it seems to be NVIDIA implementation specific as it's only used
+ // to calculate ISBE offsets, so we can just keep it as zero.
+
+ if (context.Config.Stage == ShaderStage.TessellationControl ||
+ context.Config.Stage == ShaderStage.TessellationEvaluation)
+ {
+ src = context.ShiftLeft(context.Load(StorageKind.Input, IoVariable.PatchVertices), Const(16));
+ }
+ else
+ {
+ src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16);
+ }
+ }
+ else
+ {
+ src = Const(0);
+ }
+ break;
+
+ case SReg.TId:
+ Operand tidX = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0));
+ Operand tidY = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1));
+ Operand tidZ = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2));
+
+ tidY = context.ShiftLeft(tidY, Const(16));
+ tidZ = context.ShiftLeft(tidZ, Const(26));
+
+ src = context.BitwiseOr(tidX, context.BitwiseOr(tidY, tidZ));
+ break;
+
+ case SReg.TIdX:
+ src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0));
+ break;
+ case SReg.TIdY:
+ src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1));
+ break;
+ case SReg.TIdZ:
+ src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2));
+ break;
+
+ case SReg.CtaIdX:
+ src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(0));
+ break;
+ case SReg.CtaIdY:
+ src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(1));
+ break;
+ case SReg.CtaIdZ:
+ src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(2));
+ break;
+
+ case SReg.EqMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupEqMask, null, Const(0));
+ break;
+ case SReg.LtMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupLtMask, null, Const(0));
+ break;
+ case SReg.LeMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupLeMask, null, Const(0));
+ break;
+ case SReg.GtMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupGtMask, null, Const(0));
+ break;
+ case SReg.GeMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupGeMask, null, Const(0));
+ break;
+
+ default:
+ src = Const(0);
+ break;
+ }
+
+ context.Copy(GetDest(op.Dest), src);
+ }
+
+ public static void SelR(EmitterContext context)
+ {
+ InstSelR op = context.GetOp<InstSelR>();
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+ Operand srcB = GetSrcReg(context, op.SrcB);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitSel(context, srcA, srcB, srcPred, op.Dest);
+ }
+
+ public static void SelI(EmitterContext context)
+ {
+ InstSelI op = context.GetOp<InstSelI>();
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+ Operand srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitSel(context, srcA, srcB, srcPred, op.Dest);
+ }
+
+ public static void SelC(EmitterContext context)
+ {
+ InstSelC op = context.GetOp<InstSelC>();
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+ Operand srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitSel(context, srcA, srcB, srcPred, op.Dest);
+ }
+
+ private static void EmitR2p(EmitterContext context, Operand value, Operand mask, ByteSel byteSel, bool ccpr)
+ {
+ Operand Test(Operand value, int bit)
+ {
+ return context.ICompareNotEqual(context.BitwiseAnd(value, Const(1 << bit)), Const(0));
+ }
+
+ if (ccpr)
+ {
+ // TODO: Support Register to condition code flags copy.
+ context.Config.GpuAccessor.Log("R2P.CC not implemented.");
+ }
+ else
+ {
+ int shift = (int)byteSel * 8;
+
+ for (int bit = 0; bit < RegisterConsts.PredsCount; bit++)
+ {
+ Operand pred = Register(bit, RegisterType.Predicate);
+ Operand res = context.ConditionalSelect(Test(mask, bit), Test(value, bit + shift), pred);
+ context.Copy(pred, res);
+ }
+ }
+ }
+
+ private static void EmitSel(EmitterContext context, Operand srcA, Operand srcB, Operand srcPred, int rd)
+ {
+ Operand res = context.ConditionalSelect(srcPred, srcA, srcB);
+
+ context.Copy(GetDest(rd), res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs
new file mode 100644
index 00000000..1ea7d321
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs
@@ -0,0 +1,97 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void RroR(EmitterContext context)
+ {
+ InstRroR op = context.GetOp<InstRroR>();
+
+ EmitRro(context, GetSrcReg(context, op.SrcB), op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void RroI(EmitterContext context)
+ {
+ InstRroI op = context.GetOp<InstRroI>();
+
+ EmitRro(context, GetSrcImm(context, Imm20ToFloat(op.Imm20)), op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void RroC(EmitterContext context)
+ {
+ InstRroC op = context.GetOp<InstRroC>();
+
+ EmitRro(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void Mufu(EmitterContext context)
+ {
+ InstMufu op = context.GetOp<InstMufu>();
+
+ Operand res = context.FPAbsNeg(GetSrcReg(context, op.SrcA), op.AbsA, op.NegA);
+
+ switch (op.MufuOp)
+ {
+ case MufuOp.Cos:
+ res = context.FPCosine(res);
+ break;
+
+ case MufuOp.Sin:
+ res = context.FPSine(res);
+ break;
+
+ case MufuOp.Ex2:
+ res = context.FPExponentB2(res);
+ break;
+
+ case MufuOp.Lg2:
+ res = context.FPLogarithmB2(res);
+ break;
+
+ case MufuOp.Rcp:
+ res = context.FPReciprocal(res);
+ break;
+
+ case MufuOp.Rsq:
+ res = context.FPReciprocalSquareRoot(res);
+ break;
+
+ case MufuOp.Rcp64h:
+ res = context.PackDouble2x32(OperandHelper.Const(0), res);
+ res = context.UnpackDouble2x32High(context.FPReciprocal(res, Instruction.FP64));
+ break;
+
+ case MufuOp.Rsq64h:
+ res = context.PackDouble2x32(OperandHelper.Const(0), res);
+ res = context.UnpackDouble2x32High(context.FPReciprocalSquareRoot(res, Instruction.FP64));
+ break;
+
+ case MufuOp.Sqrt:
+ res = context.FPSquareRoot(res);
+ break;
+
+ default:
+ context.Config.GpuAccessor.Log($"Invalid MUFU operation \"{op.MufuOp}\".");
+ break;
+ }
+
+ context.Copy(GetDest(op.Dest), context.FPSaturate(res, op.Sat));
+ }
+
+ private static void EmitRro(EmitterContext context, Operand srcB, int rd, bool absB, bool negB)
+ {
+ // This is the range reduction operator,
+ // we translate it as a simple move, as it
+ // should be always followed by a matching
+ // MUFU instruction.
+ srcB = context.FPAbsNeg(srcB, absB, negB);
+
+ context.Copy(GetDest(rd), srcB);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs
new file mode 100644
index 00000000..01144007
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs
@@ -0,0 +1,15 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Nop(EmitterContext context)
+ {
+ InstNop op = context.GetOp<InstNop>();
+
+ // No operation.
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs
new file mode 100644
index 00000000..d605661f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs
@@ -0,0 +1,54 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Pset(EmitterContext context)
+ {
+ InstPset op = context.GetOp<InstPset>();
+
+ Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
+ Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
+ Operand srcC = context.BitwiseNot(Register(op.SrcPred, RegisterType.Predicate), op.SrcPredInv);
+
+ Operand res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
+ res = GetPredLogicalOp(context, op.BoolOpC, res, srcC);
+
+ Operand dest = GetDest(op.Dest);
+
+ if (op.BVal)
+ {
+ context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
+ }
+ else
+ {
+ context.Copy(dest, res);
+ }
+ }
+
+ public static void Psetp(EmitterContext context)
+ {
+ InstPsetp op = context.GetOp<InstPsetp>();
+
+ Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
+ Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
+
+ Operand p0Res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
+ Operand p1Res = context.BitwiseNot(p0Res);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ p0Res = GetPredLogicalOp(context, op.BoolOpC, p0Res, srcPred);
+ p1Res = GetPredLogicalOp(context, op.BoolOpC, p1Res, srcPred);
+
+ context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs
new file mode 100644
index 00000000..2873cad8
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs
@@ -0,0 +1,249 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void ShfLR(EmitterContext context)
+ {
+ InstShfLR op = context.GetOp<InstShfLR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC);
+ }
+
+ public static void ShfRR(EmitterContext context)
+ {
+ InstShfRR op = context.GetOp<InstShfRR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC);
+ }
+
+ public static void ShfLI(EmitterContext context)
+ {
+ InstShfLI op = context.GetOp<InstShfLI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = Const(op.Imm6);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC);
+ }
+
+ public static void ShfRI(EmitterContext context)
+ {
+ InstShfRI op = context.GetOp<InstShfRI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = Const(op.Imm6);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC);
+ }
+
+ public static void ShlR(EmitterContext context)
+ {
+ InstShlR op = context.GetOp<InstShlR>();
+
+ EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcReg(context, op.SrcB), op.Dest, op.M);
+ }
+
+ public static void ShlI(EmitterContext context)
+ {
+ InstShlI op = context.GetOp<InstShlI>();
+
+ EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.M);
+ }
+
+ public static void ShlC(EmitterContext context)
+ {
+ InstShlC op = context.GetOp<InstShlC>();
+
+ EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.M);
+ }
+
+ public static void ShrR(EmitterContext context)
+ {
+ InstShrR op = context.GetOp<InstShrR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
+ }
+
+ public static void ShrI(EmitterContext context)
+ {
+ InstShrI op = context.GetOp<InstShrI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
+ }
+
+ public static void ShrC(EmitterContext context)
+ {
+ InstShrC op = context.GetOp<InstShrC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
+ }
+
+ private static void EmitShf(
+ EmitterContext context,
+ MaxShift maxShift,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool mask,
+ bool left,
+ bool writeCC)
+ {
+ bool isLongShift = maxShift == MaxShift.U64 || maxShift == MaxShift.S64;
+ bool signedShift = maxShift == MaxShift.S64;
+ int maxShiftConst = isLongShift ? 64 : 32;
+
+ if (mask)
+ {
+ srcB = context.BitwiseAnd(srcB, Const(maxShiftConst - 1));
+ }
+
+ Operand res;
+
+ if (left)
+ {
+ // res = (C << B) | (A >> (32 - B))
+ res = context.ShiftLeft(srcC, srcB);
+ res = context.BitwiseOr(res, context.ShiftRightU32(srcA, context.ISubtract(Const(32), srcB)));
+
+ if (isLongShift)
+ {
+ // res = B >= 32 ? A << (B - 32) : res
+ Operand lowerShift = context.ShiftLeft(srcA, context.ISubtract(srcB, Const(32)));
+
+ Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32));
+ res = context.ConditionalSelect(shiftGreaterThan31, lowerShift, res);
+ }
+ }
+ else
+ {
+ // res = (A >> B) | (C << (32 - B))
+ res = context.ShiftRightU32(srcA, srcB);
+ res = context.BitwiseOr(res, context.ShiftLeft(srcC, context.ISubtract(Const(32), srcB)));
+
+ if (isLongShift)
+ {
+ // res = B >= 32 ? C >> (B - 32) : res
+ Operand upperShift = signedShift
+ ? context.ShiftRightS32(srcC, context.ISubtract(srcB, Const(32)))
+ : context.ShiftRightU32(srcC, context.ISubtract(srcB, Const(32)));
+
+ Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32));
+ res = context.ConditionalSelect(shiftGreaterThan31, upperShift, res);
+ }
+ }
+
+ if (!mask)
+ {
+ // Clamped shift value.
+ Operand isLessThanMax = context.ICompareLessUnsigned(srcB, Const(maxShiftConst));
+
+ res = context.ConditionalSelect(isLessThanMax, res, Const(0));
+ }
+
+ context.Copy(GetDest(rd), res);
+
+ if (writeCC)
+ {
+ InstEmitAluHelper.SetZnFlags(context, res, writeCC);
+ }
+
+ // TODO: X.
+ }
+
+ private static void EmitShl(EmitterContext context, Operand srcA, Operand srcB, int rd, bool mask)
+ {
+ if (mask)
+ {
+ srcB = context.BitwiseAnd(srcB, Const(0x1f));
+ }
+
+ Operand res = context.ShiftLeft(srcA, srcB);
+
+ if (!mask)
+ {
+ // Clamped shift value.
+ Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
+
+ res = context.ConditionalSelect(isLessThan32, res, Const(0));
+ }
+
+ // TODO: X, CC.
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitShr(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ bool mask,
+ bool bitReverse,
+ bool isSigned)
+ {
+ if (bitReverse)
+ {
+ srcA = context.BitfieldReverse(srcA);
+ }
+
+ if (mask)
+ {
+ srcB = context.BitwiseAnd(srcB, Const(0x1f));
+ }
+
+ Operand res = isSigned
+ ? context.ShiftRightS32(srcA, srcB)
+ : context.ShiftRightU32(srcA, srcB);
+
+ if (!mask)
+ {
+ // Clamped shift value.
+ Operand resShiftBy32;
+
+ if (isSigned)
+ {
+ resShiftBy32 = context.ShiftRightS32(srcA, Const(31));
+ }
+ else
+ {
+ resShiftBy32 = Const(0);
+ }
+
+ Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
+
+ res = context.ConditionalSelect(isLessThan32, res, resShiftBy32);
+ }
+
+ // TODO: X, CC.
+
+ context.Copy(GetDest(rd), res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs
new file mode 100644
index 00000000..3d94b893
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs
@@ -0,0 +1,796 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void SuatomB(EmitterContext context)
+ {
+ InstSuatomB op = context.GetOp<InstSuatomB>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ op.Op,
+ op.Size,
+ 0,
+ op.SrcA,
+ op.SrcB,
+ op.SrcC,
+ op.Dest,
+ op.Ba,
+ isBindless: true,
+ compareAndSwap: false);
+ }
+
+ public static void Suatom(EmitterContext context)
+ {
+ InstSuatom op = context.GetOp<InstSuatom>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ op.Op,
+ op.Size,
+ op.TidB,
+ op.SrcA,
+ op.SrcB,
+ 0,
+ op.Dest,
+ op.Ba,
+ isBindless: false,
+ compareAndSwap: false);
+ }
+
+ public static void SuatomB2(EmitterContext context)
+ {
+ InstSuatomB2 op = context.GetOp<InstSuatomB2>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ op.Op,
+ op.Size,
+ 0,
+ op.SrcA,
+ op.SrcB,
+ op.SrcC,
+ op.Dest,
+ op.Ba,
+ isBindless: true,
+ compareAndSwap: false);
+ }
+
+ public static void SuatomCasB(EmitterContext context)
+ {
+ InstSuatomCasB op = context.GetOp<InstSuatomCasB>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ 0,
+ op.Size,
+ 0,
+ op.SrcA,
+ op.SrcB,
+ op.SrcC,
+ op.Dest,
+ op.Ba,
+ isBindless: true,
+ compareAndSwap: true);
+ }
+
+ public static void SuatomCas(EmitterContext context)
+ {
+ InstSuatomCas op = context.GetOp<InstSuatomCas>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ 0,
+ op.Size,
+ op.TidB,
+ op.SrcA,
+ op.SrcB,
+ 0,
+ op.Dest,
+ op.Ba,
+ isBindless: false,
+ compareAndSwap: true);
+ }
+
+ public static void SuldDB(EmitterContext context)
+ {
+ InstSuldDB op = context.GetOp<InstSuldDB>();
+
+ EmitSuld(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
+ }
+
+ public static void SuldD(EmitterContext context)
+ {
+ InstSuldD op = context.GetOp<InstSuldD>();
+
+ EmitSuld(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
+ }
+
+ public static void SuldB(EmitterContext context)
+ {
+ InstSuldB op = context.GetOp<InstSuldB>();
+
+ EmitSuld(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
+ }
+
+ public static void Suld(EmitterContext context)
+ {
+ InstSuld op = context.GetOp<InstSuld>();
+
+ EmitSuld(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
+ }
+
+ public static void SuredB(EmitterContext context)
+ {
+ InstSuredB op = context.GetOp<InstSuredB>();
+
+ EmitSured(context, op.Dim, op.Op, op.Size, 0, op.SrcA, op.Dest, op.SrcC, op.Ba, isBindless: true);
+ }
+
+ public static void Sured(EmitterContext context)
+ {
+ InstSured op = context.GetOp<InstSured>();
+
+ EmitSured(context, op.Dim, op.Op, op.Size, op.TidB, op.SrcA, op.Dest, 0, op.Ba, isBindless: false);
+ }
+
+ public static void SustDB(EmitterContext context)
+ {
+ InstSustDB op = context.GetOp<InstSustDB>();
+
+ EmitSust(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
+ }
+
+ public static void SustD(EmitterContext context)
+ {
+ InstSustD op = context.GetOp<InstSustD>();
+
+ EmitSust(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
+ }
+
+ public static void SustB(EmitterContext context)
+ {
+ InstSustB op = context.GetOp<InstSustB>();
+
+ EmitSust(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
+ }
+
+ public static void Sust(EmitterContext context)
+ {
+ InstSust op = context.GetOp<InstSust>();
+
+ EmitSust(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
+ }
+
+ private static void EmitSuatom(
+ EmitterContext context,
+ SuDim dimensions,
+ SuatomOp atomicOp,
+ SuatomSize size,
+ int imm,
+ int srcA,
+ int srcB,
+ int srcC,
+ int dest,
+ bool byteAddress,
+ bool isBindless,
+ bool compareAndSwap)
+ {
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ Operand destOperand = dest != RegisterConsts.RegisterZeroIndex ? Register(dest, RegisterType.Gpr) : null;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(Const(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ sourcesList.Add(Ra());
+
+ type |= SamplerType.Array;
+ }
+
+ if (byteAddress)
+ {
+ int xIndex = isBindless ? 1 : 0;
+
+ sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
+ }
+
+ // TODO: FP and 64-bit formats.
+ TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64
+ ? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm))
+ : GetTextureFormat(size);
+
+ if (compareAndSwap)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ sourcesList.Add(Rb());
+
+ Operand[] sources = sourcesList.ToArray();
+
+ TextureFlags flags = compareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(atomicOp);
+
+ if (isBindless)
+ {
+ flags |= TextureFlags.Bindless;
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageAtomic,
+ type,
+ format,
+ flags,
+ imm,
+ 0,
+ new[] { destOperand },
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitSuld(
+ EmitterContext context,
+ CacheOpLd cacheOp,
+ SuDim dimensions,
+ SuSize size,
+ int imm,
+ SuRgba componentMask,
+ int srcA,
+ int srcB,
+ int srcC,
+ bool useComponents,
+ bool byteAddress,
+ bool isBindless)
+ {
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid image store sampler type.");
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(Const(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ sourcesList.Add(Ra());
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ int handle = imm;
+
+ TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
+
+ if (cacheOp == CacheOpLd.Cg)
+ {
+ flags |= TextureFlags.Coherent;
+ }
+
+ if (useComponents)
+ {
+ Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (srcB + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageLoad,
+ type,
+ flags,
+ handle,
+ (int)componentMask,
+ dests,
+ sources);
+
+ if (!isBindless)
+ {
+ operation.Format = context.Config.GetTextureFormat(handle);
+ }
+
+ context.Add(operation);
+ }
+ else
+ {
+ if (byteAddress)
+ {
+ int xIndex = isBindless ? 1 : 0;
+
+ sources[xIndex] = context.ShiftRightS32(sources[xIndex], Const(GetComponentSizeInBytesLog2(size)));
+ }
+
+ int components = GetComponents(size);
+ int compMask = (1 << components) - 1;
+
+ Operand[] dests = new Operand[components];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (srcB + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageLoad,
+ type,
+ GetTextureFormat(size),
+ flags,
+ handle,
+ compMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+
+ switch (size)
+ {
+ case SuSize.U8: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 8)); break;
+ case SuSize.U16: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 16)); break;
+ case SuSize.S8: context.Copy(dests[0], SignExtendTo32(context, dests[0], 8)); break;
+ case SuSize.S16: context.Copy(dests[0], SignExtendTo32(context, dests[0], 16)); break;
+ }
+ }
+ }
+
+ private static void EmitSured(
+ EmitterContext context,
+ SuDim dimensions,
+ RedOp atomicOp,
+ SuatomSize size,
+ int imm,
+ int srcA,
+ int srcB,
+ int srcC,
+ bool byteAddress,
+ bool isBindless)
+ {
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(Const(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ sourcesList.Add(Ra());
+
+ type |= SamplerType.Array;
+ }
+
+ if (byteAddress)
+ {
+ int xIndex = isBindless ? 1 : 0;
+
+ sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
+ }
+
+ // TODO: FP and 64-bit formats.
+ TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64
+ ? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm))
+ : GetTextureFormat(size);
+
+ sourcesList.Add(Rb());
+
+ Operand[] sources = sourcesList.ToArray();
+
+ TextureFlags flags = GetAtomicOpFlags((SuatomOp)atomicOp);
+
+ if (isBindless)
+ {
+ flags |= TextureFlags.Bindless;
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageAtomic,
+ type,
+ format,
+ flags,
+ imm,
+ 0,
+ null,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitSust(
+ EmitterContext context,
+ CacheOpSt cacheOp,
+ SuDim dimensions,
+ SuSize size,
+ int imm,
+ SuRgba componentMask,
+ int srcA,
+ int srcB,
+ int srcC,
+ bool useComponents,
+ bool byteAddress,
+ bool isBindless)
+ {
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid image store sampler type.");
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(Const(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ sourcesList.Add(Ra());
+ }
+
+ TextureFormat format = TextureFormat.Unknown;
+
+ if (useComponents)
+ {
+ for (int compMask = (int)componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
+ {
+ if ((compMask & 1) != 0)
+ {
+ sourcesList.Add(Rb());
+ }
+ }
+
+ if (!isBindless)
+ {
+ format = context.Config.GetTextureFormat(imm);
+ }
+ }
+ else
+ {
+ if (byteAddress)
+ {
+ int xIndex = isBindless ? 1 : 0;
+
+ sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
+ }
+
+ int components = GetComponents(size);
+
+ for (int compIndex = 0; compIndex < components; compIndex++)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ format = GetTextureFormat(size);
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ int handle = imm;
+
+ TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
+
+ if (cacheOp == CacheOpSt.Cg)
+ {
+ flags |= TextureFlags.Coherent;
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageStore,
+ type,
+ format,
+ flags,
+ handle,
+ 0,
+ null,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static int GetComponentSizeInBytesLog2(SuatomSize size)
+ {
+ return size switch
+ {
+ SuatomSize.U32 => 2,
+ SuatomSize.S32 => 2,
+ SuatomSize.U64 => 3,
+ SuatomSize.F32FtzRn => 2,
+ SuatomSize.F16x2FtzRn => 2,
+ SuatomSize.S64 => 3,
+ SuatomSize.Sd32 => 2,
+ SuatomSize.Sd64 => 3,
+ _ => 2
+ };
+ }
+
+ private static TextureFormat GetTextureFormat(SuatomSize size)
+ {
+ return size switch
+ {
+ SuatomSize.U32 => TextureFormat.R32Uint,
+ SuatomSize.S32 => TextureFormat.R32Sint,
+ SuatomSize.U64 => TextureFormat.R32G32Uint,
+ SuatomSize.F32FtzRn => TextureFormat.R32Float,
+ SuatomSize.F16x2FtzRn => TextureFormat.R16G16Float,
+ SuatomSize.S64 => TextureFormat.R32G32Uint,
+ SuatomSize.Sd32 => TextureFormat.R32Uint,
+ SuatomSize.Sd64 => TextureFormat.R32G32Uint,
+ _ => TextureFormat.R32Uint
+ };
+ }
+
+ private static TextureFlags GetAtomicOpFlags(SuatomOp op)
+ {
+ return op switch
+ {
+ SuatomOp.Add => TextureFlags.Add,
+ SuatomOp.Min => TextureFlags.Minimum,
+ SuatomOp.Max => TextureFlags.Maximum,
+ SuatomOp.Inc => TextureFlags.Increment,
+ SuatomOp.Dec => TextureFlags.Decrement,
+ SuatomOp.And => TextureFlags.BitwiseAnd,
+ SuatomOp.Or => TextureFlags.BitwiseOr,
+ SuatomOp.Xor => TextureFlags.BitwiseXor,
+ SuatomOp.Exch => TextureFlags.Swap,
+ _ => TextureFlags.Add
+ };
+ }
+
+ private static int GetComponents(SuSize size)
+ {
+ return size switch
+ {
+ SuSize.B64 => 2,
+ SuSize.B128 => 4,
+ SuSize.UB128 => 4,
+ _ => 1
+ };
+ }
+
+ private static int GetComponentSizeInBytesLog2(SuSize size)
+ {
+ return size switch
+ {
+ SuSize.U8 => 0,
+ SuSize.S8 => 0,
+ SuSize.U16 => 1,
+ SuSize.S16 => 1,
+ SuSize.B32 => 2,
+ SuSize.B64 => 3,
+ SuSize.B128 => 4,
+ SuSize.UB128 => 4,
+ _ => 2
+ };
+ }
+
+ private static TextureFormat GetTextureFormat(SuSize size)
+ {
+ return size switch
+ {
+ SuSize.U8 => TextureFormat.R8Uint,
+ SuSize.S8 => TextureFormat.R8Sint,
+ SuSize.U16 => TextureFormat.R16Uint,
+ SuSize.S16 => TextureFormat.R16Sint,
+ SuSize.B32 => TextureFormat.R32Uint,
+ SuSize.B64 => TextureFormat.R32G32Uint,
+ SuSize.B128 => TextureFormat.R32G32B32A32Uint,
+ SuSize.UB128 => TextureFormat.R32G32B32A32Uint,
+ _ => TextureFormat.R32Uint
+ };
+ }
+
+ private static SamplerType ConvertSamplerType(SuDim target)
+ {
+ return target switch
+ {
+ SuDim._1d => SamplerType.Texture1D,
+ SuDim._1dBuffer => SamplerType.TextureBuffer,
+ SuDim._1dArray => SamplerType.Texture1D | SamplerType.Array,
+ SuDim._2d => SamplerType.Texture2D,
+ SuDim._2dArray => SamplerType.Texture2D | SamplerType.Array,
+ SuDim._3d => SamplerType.Texture3D,
+ _ => SamplerType.None
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
new file mode 100644
index 00000000..caa9a775
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
@@ -0,0 +1,1312 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ private static readonly int[,] _maskLut = new int[,]
+ {
+ { 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b1001, 0b1010, 0b1100 },
+ { 0b0111, 0b1011, 0b1101, 0b1110, 0b1111, 0b0000, 0b0000, 0b0000 }
+ };
+
+ public const bool Sample1DAs2D = true;
+
+ private enum TexsType
+ {
+ Texs,
+ Tlds,
+ Tld4s
+ }
+
+ public static void Tex(EmitterContext context)
+ {
+ InstTex op = context.GetOp<InstTex>();
+
+ EmitTex(context, TextureFlags.None, op.Dim, op.Lod, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, false, op.Dc, op.Aoffi);
+ }
+
+ public static void TexB(EmitterContext context)
+ {
+ InstTexB op = context.GetOp<InstTexB>();
+
+ EmitTex(context, TextureFlags.Bindless, op.Dim, op.Lodb, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, false, op.Dc, op.Aoffib);
+ }
+
+ public static void Texs(EmitterContext context)
+ {
+ InstTexs op = context.GetOp<InstTexs>();
+
+ EmitTexs(context, TexsType.Texs, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false);
+ }
+
+ public static void TexsF16(EmitterContext context)
+ {
+ InstTexs op = context.GetOp<InstTexs>();
+
+ EmitTexs(context, TexsType.Texs, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true);
+ }
+
+ public static void Tld(EmitterContext context)
+ {
+ InstTld op = context.GetOp<InstTld>();
+
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ var lod = op.Lod ? Lod.Ll : Lod.Lz;
+
+ EmitTex(context, TextureFlags.IntCoords, op.Dim, lod, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Ms, false, op.Toff);
+ }
+
+ public static void TldB(EmitterContext context)
+ {
+ InstTldB op = context.GetOp<InstTldB>();
+
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ var flags = TextureFlags.IntCoords | TextureFlags.Bindless;
+ var lod = op.Lod ? Lod.Ll : Lod.Lz;
+
+ EmitTex(context, flags, op.Dim, lod, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Ms, false, op.Toff);
+ }
+
+ public static void Tlds(EmitterContext context)
+ {
+ InstTlds op = context.GetOp<InstTlds>();
+
+ EmitTexs(context, TexsType.Tlds, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false);
+ }
+
+ public static void TldsF16(EmitterContext context)
+ {
+ InstTlds op = context.GetOp<InstTlds>();
+
+ EmitTexs(context, TexsType.Tlds, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true);
+ }
+
+ public static void Tld4(EmitterContext context)
+ {
+ InstTld4 op = context.GetOp<InstTld4>();
+
+ EmitTld4(context, op.Dim, op.TexComp, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, op.Dc, isBindless: false);
+ }
+
+ public static void Tld4B(EmitterContext context)
+ {
+ InstTld4B op = context.GetOp<InstTld4B>();
+
+ EmitTld4(context, op.Dim, op.TexComp, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, op.Dc, isBindless: true);
+ }
+
+ public static void Tld4s(EmitterContext context)
+ {
+ InstTld4s op = context.GetOp<InstTld4s>();
+
+ EmitTexs(context, TexsType.Tld4s, op.TidB, 4, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false);
+ }
+
+ public static void Tld4sF16(EmitterContext context)
+ {
+ InstTld4s op = context.GetOp<InstTld4s>();
+
+ EmitTexs(context, TexsType.Tld4s, op.TidB, 4, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true);
+ }
+
+ public static void Tmml(EmitterContext context)
+ {
+ InstTmml op = context.GetOp<InstTmml>();
+
+ EmitTmml(context, op.Dim, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, isBindless: false);
+ }
+
+ public static void TmmlB(EmitterContext context)
+ {
+ InstTmmlB op = context.GetOp<InstTmmlB>();
+
+ EmitTmml(context, op.Dim, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, isBindless: true);
+ }
+
+ public static void Txd(EmitterContext context)
+ {
+ InstTxd op = context.GetOp<InstTxd>();
+
+ EmitTxd(context, op.Dim, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, isBindless: false);
+ }
+
+ public static void TxdB(EmitterContext context)
+ {
+ InstTxdB op = context.GetOp<InstTxdB>();
+
+ EmitTxd(context, op.Dim, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, isBindless: true);
+ }
+
+ public static void Txq(EmitterContext context)
+ {
+ InstTxq op = context.GetOp<InstTxq>();
+
+ EmitTxq(context, op.TexQuery, op.TidB, op.WMask, op.SrcA, op.Dest, isBindless: false);
+ }
+
+ public static void TxqB(EmitterContext context)
+ {
+ InstTxqB op = context.GetOp<InstTxqB>();
+
+ EmitTxq(context, op.TexQuery, 0, op.WMask, op.SrcA, op.Dest, isBindless: true);
+ }
+
+ private static void EmitTex(
+ EmitterContext context,
+ TextureFlags flags,
+ TexDim dimensions,
+ Lod lodMode,
+ int imm,
+ int componentMask,
+ int raIndex,
+ int rbIndex,
+ int rdIndex,
+ bool isMultisample,
+ bool hasDepthCompare,
+ bool hasOffset)
+ {
+ if (rdIndex == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (raIndex > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(raIndex++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (rbIndex > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(rbIndex++, RegisterType.Gpr));
+ }
+
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ bool isArray = type.HasFlag(SamplerType.Array);
+ bool isBindless = flags.HasFlag(TextureFlags.Bindless);
+
+ Operand arrayIndex = isArray ? Ra() : null;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ bool hasLod = lodMode > Lod.Lz;
+
+ if (type == SamplerType.Texture1D && (flags & ~TextureFlags.Bindless) == TextureFlags.IntCoords && !(
+ hasLod ||
+ hasDepthCompare ||
+ hasOffset ||
+ isArray ||
+ isMultisample))
+ {
+ // For bindless, we don't have any way to know the texture type,
+ // so we assume it's texture buffer when the sampler type is 1D, since that's more common.
+ bool isTypeBuffer = isBindless || context.Config.GpuAccessor.QuerySamplerType(imm) == SamplerType.TextureBuffer;
+ if (isTypeBuffer)
+ {
+ type = SamplerType.TextureBuffer;
+ }
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ bool is1DTo2D = false;
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type = SamplerType.Texture2D | (type & SamplerType.Array);
+ is1DTo2D = true;
+ }
+
+ if (isArray)
+ {
+ sourcesList.Add(arrayIndex);
+ }
+
+ Operand lodValue = hasLod ? Rb() : ConstF(0);
+
+ Operand packedOffs = hasOffset ? Rb() : null;
+
+ if (hasDepthCompare)
+ {
+ sourcesList.Add(Rb());
+
+ type |= SamplerType.Shadow;
+ }
+
+ if ((lodMode == Lod.Lz ||
+ lodMode == Lod.Ll ||
+ lodMode == Lod.Lla) && !isMultisample && type != SamplerType.TextureBuffer)
+ {
+ sourcesList.Add(lodValue);
+
+ flags |= TextureFlags.LodLevel;
+ }
+
+ if (hasOffset)
+ {
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(context.BitfieldExtractS32(packedOffs, Const(index * 4), Const(4)));
+ }
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(Const(0));
+ }
+
+ flags |= TextureFlags.Offset;
+ }
+
+ if (lodMode == Lod.Lb || lodMode == Lod.Lba)
+ {
+ sourcesList.Add(lodValue);
+
+ flags |= TextureFlags.LodBias;
+ }
+
+ if (isMultisample)
+ {
+ sourcesList.Add(Rb());
+
+ type |= SamplerType.Multisample;
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+ Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (rdIndex + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(rdIndex + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ int handle = !isBindless ? imm : 0;
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ componentMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitTexs(
+ EmitterContext context,
+ TexsType texsType,
+ int imm,
+ int writeMask,
+ int srcA,
+ int srcB,
+ int dest,
+ int dest2,
+ bool isF16)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex && dest2 == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ void AddTextureOffset(int coordsCount, int stride, int size)
+ {
+ Operand packedOffs = Rb();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(context.BitfieldExtractS32(packedOffs, Const(index * stride), Const(size)));
+ }
+ }
+
+ SamplerType type;
+ TextureFlags flags;
+
+ if (texsType == TexsType.Texs)
+ {
+ var texsOp = context.GetOp<InstTexs>();
+
+ type = ConvertSamplerType(texsOp.Target);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid texture sampler type.");
+ return;
+ }
+
+ flags = ConvertTextureFlags(texsOp.Target);
+
+ // We don't need to handle 1D -> Buffer conversions here as
+ // only texture sample with integer coordinates can ever use buffer targets.
+
+ if ((type & SamplerType.Array) != 0)
+ {
+ Operand arrayIndex = Ra();
+
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+
+ sourcesList.Add(arrayIndex);
+
+ if ((type & SamplerType.Shadow) != 0)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ if ((flags & TextureFlags.LodLevel) != 0)
+ {
+ sourcesList.Add(ConstF(0));
+ }
+ }
+ else
+ {
+ switch (texsOp.Target)
+ {
+ case TexsTarget.Texture1DLodZero:
+ sourcesList.Add(Ra());
+
+ if (Sample1DAs2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ sourcesList.Add(ConstF(0));
+ break;
+
+ case TexsTarget.Texture2D:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ break;
+
+ case TexsTarget.Texture2DLodZero:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(ConstF(0));
+ break;
+
+ case TexsTarget.Texture2DLodLevel:
+ case TexsTarget.Texture2DDepthCompare:
+ case TexsTarget.Texture3D:
+ case TexsTarget.TextureCube:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ break;
+
+ case TexsTarget.Texture2DLodZeroDepthCompare:
+ case TexsTarget.Texture3DLodZero:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(ConstF(0));
+ break;
+
+ case TexsTarget.Texture2DLodLevelDepthCompare:
+ case TexsTarget.TextureCubeLodLevel:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(Rb());
+ break;
+ }
+ }
+ }
+ else if (texsType == TexsType.Tlds)
+ {
+ var tldsOp = context.GetOp<InstTlds>();
+
+ type = ConvertSamplerType(tldsOp.Target);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid texel fetch sampler type.");
+ return;
+ }
+
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ flags = ConvertTextureFlags(tldsOp.Target) | TextureFlags.IntCoords;
+
+ if (tldsOp.Target == TldsTarget.Texture1DLodZero &&
+ context.Config.GpuAccessor.QuerySamplerType(tldsOp.TidB) == SamplerType.TextureBuffer)
+ {
+ type = SamplerType.TextureBuffer;
+ flags &= ~TextureFlags.LodLevel;
+ }
+
+ switch (tldsOp.Target)
+ {
+ case TldsTarget.Texture1DLodZero:
+ sourcesList.Add(Ra());
+
+ if (type != SamplerType.TextureBuffer)
+ {
+ if (Sample1DAs2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ sourcesList.Add(ConstF(0));
+ }
+ break;
+
+ case TldsTarget.Texture1DLodLevel:
+ sourcesList.Add(Ra());
+
+ if (Sample1DAs2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ sourcesList.Add(Rb());
+ break;
+
+ case TldsTarget.Texture2DLodZero:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(Const(0));
+ break;
+
+ case TldsTarget.Texture2DLodZeroOffset:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Const(0));
+ break;
+
+ case TldsTarget.Texture2DLodZeroMultisample:
+ case TldsTarget.Texture2DLodLevel:
+ case TldsTarget.Texture2DLodLevelOffset:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ break;
+
+ case TldsTarget.Texture3DLodZero:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(Const(0));
+ break;
+
+ case TldsTarget.Texture2DArrayLodZero:
+ sourcesList.Add(Rb());
+ sourcesList.Add(Rb());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Const(0));
+ break;
+ }
+
+ if ((flags & TextureFlags.Offset) != 0)
+ {
+ AddTextureOffset(type.GetDimensions(), 4, 4);
+ }
+ }
+ else if (texsType == TexsType.Tld4s)
+ {
+ var tld4sOp = context.GetOp<InstTld4s>();
+
+ if (!(tld4sOp.Dc || tld4sOp.Aoffi))
+ {
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ }
+ else
+ {
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ }
+
+ type = SamplerType.Texture2D;
+ flags = TextureFlags.Gather;
+
+ if (tld4sOp.Dc)
+ {
+ sourcesList.Add(Rb());
+
+ type |= SamplerType.Shadow;
+ }
+
+ if (tld4sOp.Aoffi)
+ {
+ AddTextureOffset(type.GetDimensions(), 8, 6);
+
+ flags |= TextureFlags.Offset;
+ }
+
+ sourcesList.Add(Const((int)tld4sOp.TexComp));
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid TEXS type \"{texsType}\".");
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ Operand[] rd0 = new Operand[2] { ConstF(0), ConstF(0) };
+ Operand[] rd1 = new Operand[2] { ConstF(0), ConstF(0) };
+
+ int handle = imm;
+ int componentMask = _maskLut[dest2 == RegisterConsts.RegisterZeroIndex ? 0 : 1, writeMask];
+
+ int componentsCount = BitOperations.PopCount((uint)componentMask);
+
+ Operand[] dests = new Operand[componentsCount];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < componentsCount; i++)
+ {
+ int high = i >> 1;
+ int low = i & 1;
+
+ if (isF16)
+ {
+ dests[outputIndex++] = high != 0
+ ? (rd1[low] = Local())
+ : (rd0[low] = Local());
+ }
+ else
+ {
+ int rdIndex = high != 0 ? dest2 : dest;
+
+ if (rdIndex < RegisterConsts.RegisterZeroIndex)
+ {
+ rdIndex += low;
+ }
+
+ dests[outputIndex++] = Register(rdIndex, RegisterType.Gpr);
+ }
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ componentMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+
+ if (isF16)
+ {
+ context.Copy(Register(dest, RegisterType.Gpr), context.PackHalf2x16(rd0[0], rd0[1]));
+ context.Copy(Register(dest2, RegisterType.Gpr), context.PackHalf2x16(rd1[0], rd1[1]));
+ }
+ }
+
+ private static void EmitTld4(
+ EmitterContext context,
+ TexDim dimensions,
+ TexComp component,
+ int imm,
+ int componentMask,
+ int srcA,
+ int srcB,
+ int dest,
+ TexOffset offset,
+ bool hasDepthCompare,
+ bool isBindless)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ bool isArray =
+ dimensions == TexDim.Array1d ||
+ dimensions == TexDim.Array2d ||
+ dimensions == TexDim.Array3d ||
+ dimensions == TexDim.ArrayCube;
+
+ Operand arrayIndex = isArray ? Ra() : null;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ SamplerType type = ConvertSamplerType(dimensions);
+ TextureFlags flags = TextureFlags.Gather;
+
+ if (isBindless)
+ {
+ sourcesList.Add(Rb());
+
+ flags |= TextureFlags.Bindless;
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ bool is1DTo2D = Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D;
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type = SamplerType.Texture2D | (type & SamplerType.Array);
+ }
+
+ if (isArray)
+ {
+ sourcesList.Add(arrayIndex);
+ }
+
+ Operand[] packedOffs = new Operand[2];
+
+ bool hasAnyOffset = offset == TexOffset.Aoffi || offset == TexOffset.Ptp;
+
+ packedOffs[0] = hasAnyOffset ? Rb() : null;
+ packedOffs[1] = offset == TexOffset.Ptp ? Rb() : null;
+
+ if (hasDepthCompare)
+ {
+ sourcesList.Add(Rb());
+
+ type |= SamplerType.Shadow;
+ }
+
+ if (hasAnyOffset)
+ {
+ int offsetTexelsCount = offset == TexOffset.Ptp ? 4 : 1;
+
+ for (int index = 0; index < coordsCount * offsetTexelsCount; index++)
+ {
+ Operand packed = packedOffs[(index >> 2) & 1];
+
+ sourcesList.Add(context.BitfieldExtractS32(packed, Const((index & 3) * 8), Const(6)));
+ }
+
+ if (is1DTo2D)
+ {
+ for (int index = 0; index < offsetTexelsCount; index++)
+ {
+ sourcesList.Add(Const(0));
+ }
+ }
+
+ flags |= offset == TexOffset.Ptp ? TextureFlags.Offsets : TextureFlags.Offset;
+ }
+
+ sourcesList.Add(Const((int)component));
+
+ Operand[] sources = sourcesList.ToArray();
+ Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (dest + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(dest + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ int handle = imm;
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ componentMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitTmml(
+ EmitterContext context,
+ TexDim dimensions,
+ int imm,
+ int componentMask,
+ int srcA,
+ int srcB,
+ int dest,
+ bool isBindless)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ TextureFlags flags = TextureFlags.None;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(Rb());
+
+ flags |= TextureFlags.Bindless;
+ }
+
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ int coordsCount = type.GetDimensions();
+
+ bool isArray =
+ dimensions == TexDim.Array1d ||
+ dimensions == TexDim.Array2d ||
+ dimensions == TexDim.Array3d ||
+ dimensions == TexDim.ArrayCube;
+
+ Operand arrayIndex = isArray ? Ra() : null;
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type = SamplerType.Texture2D | (type & SamplerType.Array);
+ }
+
+ if (isArray)
+ {
+ sourcesList.Add(arrayIndex);
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ Operand GetDest()
+ {
+ if (dest >= RegisterConsts.RegisterZeroIndex)
+ {
+ return null;
+ }
+
+ return Register(dest++, RegisterType.Gpr);
+ }
+
+ int handle = imm;
+
+ for (int compMask = componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
+ {
+ if ((compMask & 1) != 0)
+ {
+ Operand destOperand = GetDest();
+
+ if (destOperand == null)
+ {
+ break;
+ }
+
+ // Components z and w aren't standard, we return 0 in this case and add a comment.
+ if (compIndex >= 2)
+ {
+ context.Add(new CommentNode("Unsupported component z or w found"));
+ context.Copy(destOperand, Const(0));
+ }
+ else
+ {
+ Operand tempDest = Local();
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.Lod,
+ type,
+ flags,
+ handle,
+ compIndex ^ 1, // The instruction component order is the inverse of GLSL's.
+ new[] { tempDest },
+ sources);
+
+ context.Add(operation);
+
+ tempDest = context.FPMultiply(tempDest, ConstF(256.0f));
+
+ Operand fixedPointValue = context.FP32ConvertToS32(tempDest);
+
+ context.Copy(destOperand, fixedPointValue);
+ }
+ }
+ }
+ }
+
+ private static void EmitTxd(
+ EmitterContext context,
+ TexDim dimensions,
+ int imm,
+ int componentMask,
+ int srcA,
+ int srcB,
+ int dest,
+ bool hasOffset,
+ bool isBindless)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ TextureFlags flags = TextureFlags.Derivatives;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(Ra());
+
+ flags |= TextureFlags.Bindless;
+ }
+
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ bool is1DTo2D = Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D;
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type = SamplerType.Texture2D | (type & SamplerType.Array);
+ }
+
+ Operand packedParams = Ra();
+
+ bool isArray =
+ dimensions == TexDim.Array1d ||
+ dimensions == TexDim.Array2d ||
+ dimensions == TexDim.Array3d ||
+ dimensions == TexDim.ArrayCube;
+
+ if (isArray)
+ {
+ sourcesList.Add(context.BitwiseAnd(packedParams, Const(0xffff)));
+ }
+
+ // Derivatives (X and Y).
+ for (int dIndex = 0; dIndex < 2 * coordsCount; dIndex++)
+ {
+ sourcesList.Add(Rb());
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(ConstF(0));
+ }
+ }
+
+ if (hasOffset)
+ {
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(context.BitfieldExtractS32(packedParams, Const(16 + index * 4), Const(4)));
+ }
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(Const(0));
+ }
+
+ flags |= TextureFlags.Offset;
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+ Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (dest + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(dest + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ int handle = imm;
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ componentMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitTxq(
+ EmitterContext context,
+ TexQuery query,
+ int imm,
+ int componentMask,
+ int srcA,
+ int dest,
+ bool isBindless)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ // TODO: Validate and use query.
+ Instruction inst = Instruction.TextureSize;
+ TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ sourcesList.Add(Ra());
+
+ Operand[] sources = sourcesList.ToArray();
+
+ Operand GetDest()
+ {
+ if (dest >= RegisterConsts.RegisterZeroIndex)
+ {
+ return null;
+ }
+
+ return Register(dest++, RegisterType.Gpr);
+ }
+
+ SamplerType type;
+
+ if (isBindless)
+ {
+ type = (componentMask & 4) != 0 ? SamplerType.Texture3D : SamplerType.Texture2D;
+ }
+ else
+ {
+ type = context.Config.GpuAccessor.QuerySamplerType(imm);
+ }
+
+ for (int compMask = componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
+ {
+ if ((compMask & 1) != 0)
+ {
+ Operand destOperand = GetDest();
+
+ if (destOperand == null)
+ {
+ break;
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ inst,
+ type,
+ flags,
+ imm,
+ compIndex,
+ new[] { destOperand },
+ sources);
+
+ context.Add(operation);
+ }
+ }
+ }
+
+ private static SamplerType ConvertSamplerType(TexDim dimensions)
+ {
+ return dimensions switch
+ {
+ TexDim._1d => SamplerType.Texture1D,
+ TexDim.Array1d => SamplerType.Texture1D | SamplerType.Array,
+ TexDim._2d => SamplerType.Texture2D,
+ TexDim.Array2d => SamplerType.Texture2D | SamplerType.Array,
+ TexDim._3d => SamplerType.Texture3D,
+ TexDim.Array3d => SamplerType.Texture3D | SamplerType.Array,
+ TexDim.Cube => SamplerType.TextureCube,
+ TexDim.ArrayCube => SamplerType.TextureCube | SamplerType.Array,
+ _ => throw new ArgumentException($"Invalid texture dimensions \"{dimensions}\".")
+ };
+ }
+
+ private static SamplerType ConvertSamplerType(TexsTarget type)
+ {
+ switch (type)
+ {
+ case TexsTarget.Texture1DLodZero:
+ return SamplerType.Texture1D;
+
+ case TexsTarget.Texture2D:
+ case TexsTarget.Texture2DLodZero:
+ case TexsTarget.Texture2DLodLevel:
+ return SamplerType.Texture2D;
+
+ case TexsTarget.Texture2DDepthCompare:
+ case TexsTarget.Texture2DLodLevelDepthCompare:
+ case TexsTarget.Texture2DLodZeroDepthCompare:
+ return SamplerType.Texture2D | SamplerType.Shadow;
+
+ case TexsTarget.Texture2DArray:
+ case TexsTarget.Texture2DArrayLodZero:
+ return SamplerType.Texture2D | SamplerType.Array;
+
+ case TexsTarget.Texture2DArrayLodZeroDepthCompare:
+ return SamplerType.Texture2D | SamplerType.Array | SamplerType.Shadow;
+
+ case TexsTarget.Texture3D:
+ case TexsTarget.Texture3DLodZero:
+ return SamplerType.Texture3D;
+
+ case TexsTarget.TextureCube:
+ case TexsTarget.TextureCubeLodLevel:
+ return SamplerType.TextureCube;
+ }
+
+ return SamplerType.None;
+ }
+
+ private static SamplerType ConvertSamplerType(TldsTarget type)
+ {
+ switch (type)
+ {
+ case TldsTarget.Texture1DLodZero:
+ case TldsTarget.Texture1DLodLevel:
+ return SamplerType.Texture1D;
+
+ case TldsTarget.Texture2DLodZero:
+ case TldsTarget.Texture2DLodZeroOffset:
+ case TldsTarget.Texture2DLodLevel:
+ case TldsTarget.Texture2DLodLevelOffset:
+ return SamplerType.Texture2D;
+
+ case TldsTarget.Texture2DLodZeroMultisample:
+ return SamplerType.Texture2D | SamplerType.Multisample;
+
+ case TldsTarget.Texture3DLodZero:
+ return SamplerType.Texture3D;
+
+ case TldsTarget.Texture2DArrayLodZero:
+ return SamplerType.Texture2D | SamplerType.Array;
+ }
+
+ return SamplerType.None;
+ }
+
+ private static TextureFlags ConvertTextureFlags(TexsTarget type)
+ {
+ switch (type)
+ {
+ case TexsTarget.Texture1DLodZero:
+ case TexsTarget.Texture2DLodZero:
+ case TexsTarget.Texture2DLodLevel:
+ case TexsTarget.Texture2DLodLevelDepthCompare:
+ case TexsTarget.Texture2DLodZeroDepthCompare:
+ case TexsTarget.Texture2DArrayLodZero:
+ case TexsTarget.Texture2DArrayLodZeroDepthCompare:
+ case TexsTarget.Texture3DLodZero:
+ case TexsTarget.TextureCubeLodLevel:
+ return TextureFlags.LodLevel;
+
+ case TexsTarget.Texture2D:
+ case TexsTarget.Texture2DDepthCompare:
+ case TexsTarget.Texture2DArray:
+ case TexsTarget.Texture3D:
+ case TexsTarget.TextureCube:
+ return TextureFlags.None;
+ }
+
+ return TextureFlags.None;
+ }
+
+ private static TextureFlags ConvertTextureFlags(TldsTarget type)
+ {
+ switch (type)
+ {
+ case TldsTarget.Texture1DLodZero:
+ case TldsTarget.Texture1DLodLevel:
+ case TldsTarget.Texture2DLodZero:
+ case TldsTarget.Texture2DLodLevel:
+ case TldsTarget.Texture2DLodZeroMultisample:
+ case TldsTarget.Texture3DLodZero:
+ case TldsTarget.Texture2DArrayLodZero:
+ return TextureFlags.LodLevel;
+
+ case TldsTarget.Texture2DLodZeroOffset:
+ case TldsTarget.Texture2DLodLevelOffset:
+ return TextureFlags.LodLevel | TextureFlags.Offset;
+ }
+
+ return TextureFlags.None;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs
new file mode 100644
index 00000000..2d84c5bd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs
@@ -0,0 +1,118 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Vmad(EmitterContext context)
+ {
+ InstVmad op = context.GetOp<InstVmad>();
+
+ bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0;
+ bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0;
+
+ Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
+ Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB);
+ Operand srcB;
+
+ if (op.BVideo)
+ {
+ srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
+ }
+ else
+ {
+ int imm = op.Imm16;
+
+ if (bSigned)
+ {
+ imm = (imm << 16) >> 16;
+ }
+
+ srcB = Const(imm);
+ }
+
+ Operand productLow = context.IMultiply(srcA, srcB);
+ Operand productHigh;
+
+ if (aSigned == bSigned)
+ {
+ productHigh = aSigned
+ ? context.MultiplyHighS32(srcA, srcB)
+ : context.MultiplyHighU32(srcA, srcB);
+ }
+ else
+ {
+ Operand temp = aSigned
+ ? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))
+ : context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)));
+
+ productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB));
+ }
+
+ if (op.AvgMode == AvgMode.NegA)
+ {
+ (productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh);
+ }
+
+ Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry);
+ Operand resHigh = context.IAdd(productHigh, sumCarry);
+
+ if (op.AvgMode == AvgMode.PlusOne)
+ {
+ resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry);
+ resHigh = context.IAdd(resHigh, poCarry);
+ }
+
+ bool resSigned = op.ASelect == VectorSelect.S32 ||
+ op.BSelect == VectorSelect.S32 ||
+ op.AvgMode == AvgMode.NegB ||
+ op.AvgMode == AvgMode.NegA;
+
+ int shift = op.VideoScale switch
+ {
+ VideoScale.Shr7 => 7,
+ VideoScale.Shr15 => 15,
+ _ => 0
+ };
+
+ if (shift != 0)
+ {
+ // Low = (Low >> Shift) | (High << (32 - Shift))
+ // High >>= Shift
+ resLow = context.ShiftRightU32(resLow, Const(shift));
+ resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift)));
+ resHigh = resSigned
+ ? context.ShiftRightS32(resHigh, Const(shift))
+ : context.ShiftRightU32(resHigh, Const(shift));
+ }
+
+ Operand res = resLow;
+
+ if (op.Sat)
+ {
+ Operand sign = context.ShiftRightS32(resHigh, Const(31));
+
+ if (resSigned)
+ {
+ Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31)));
+ Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue));
+ res = context.ConditionalSelect(overflow, clampValue, resLow);
+ }
+ else
+ {
+ Operand overflow = context.ICompareNotEqual(resHigh, Const(0));
+ res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow);
+ }
+ }
+
+ context.Copy(GetDest(op.Dest), res);
+
+ // TODO: CC.
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs
new file mode 100644
index 00000000..67b185ab
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs
@@ -0,0 +1,183 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Vmnmx(EmitterContext context)
+ {
+ InstVmnmx op = context.GetOp<InstVmnmx>();
+
+ Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
+ Operand srcC = GetSrcReg(context, op.SrcC);
+ Operand srcB;
+
+ if (op.BVideo)
+ {
+ srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
+ }
+ else
+ {
+ int imm = op.Imm16;
+
+ if ((op.BSelect & VectorSelect.S8B0) != 0)
+ {
+ imm = (imm << 16) >> 16;
+ }
+
+ srcB = Const(imm);
+ }
+
+ Operand res;
+
+ bool resSigned;
+
+ if ((op.ASelect & VectorSelect.S8B0) != (op.BSelect & VectorSelect.S8B0))
+ {
+ // Signedness is different, but for max, result will always fit a U32,
+ // since one of the inputs can't be negative, and the result is the one
+ // with highest value. For min, it will always fit on a S32, since
+ // one of the input can't be greater than INT_MAX and we want the lowest value.
+ resSigned = !op.Mn;
+
+ res = op.Mn ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB);
+
+ if ((op.ASelect & VectorSelect.S8B0) != 0)
+ {
+ Operand isBGtIntMax = context.ICompareLess(srcB, Const(0));
+
+ res = context.ConditionalSelect(isBGtIntMax, srcB, res);
+ }
+ else
+ {
+ Operand isAGtIntMax = context.ICompareLess(srcA, Const(0));
+
+ res = context.ConditionalSelect(isAGtIntMax, srcA, res);
+ }
+ }
+ else
+ {
+ // Ra and Rb have the same signedness, so doesn't matter which one we test.
+ resSigned = (op.ASelect & VectorSelect.S8B0) != 0;
+
+ if (op.Mn)
+ {
+ res = resSigned
+ ? context.IMaximumS32(srcA, srcB)
+ : context.IMaximumU32(srcA, srcB);
+ }
+ else
+ {
+ res = resSigned
+ ? context.IMinimumS32(srcA, srcB)
+ : context.IMinimumU32(srcA, srcB);
+ }
+ }
+
+ if (op.Sat)
+ {
+ if (op.DFormat && !resSigned)
+ {
+ res = context.IMinimumU32(res, Const(int.MaxValue));
+ }
+ else if (!op.DFormat && resSigned)
+ {
+ res = context.IMaximumS32(res, Const(0));
+ }
+ }
+
+ switch (op.VideoOp)
+ {
+ case VideoOp.Acc:
+ res = context.IAdd(res, srcC);
+ break;
+ case VideoOp.Max:
+ res = op.DFormat ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC);
+ break;
+ case VideoOp.Min:
+ res = op.DFormat ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC);
+ break;
+ case VideoOp.Mrg16h:
+ res = context.BitfieldInsert(srcC, res, Const(16), Const(16));
+ break;
+ case VideoOp.Mrg16l:
+ res = context.BitfieldInsert(srcC, res, Const(0), Const(16));
+ break;
+ case VideoOp.Mrg8b0:
+ res = context.BitfieldInsert(srcC, res, Const(0), Const(8));
+ break;
+ case VideoOp.Mrg8b2:
+ res = context.BitfieldInsert(srcC, res, Const(16), Const(8));
+ break;
+ }
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Vsetp(EmitterContext context)
+ {
+ InstVsetp op = context.GetOp<InstVsetp>();
+
+ Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
+ Operand srcB;
+
+ if (op.BVideo)
+ {
+ srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
+ }
+ else
+ {
+ int imm = op.Imm16;
+
+ if ((op.BSelect & VectorSelect.S8B0) != 0)
+ {
+ imm = (imm << 16) >> 16;
+ }
+
+ srcB = Const(imm);
+ }
+
+ Operand p0Res;
+
+ bool signedA = (op.ASelect & VectorSelect.S8B0) != 0;
+ bool signedB = (op.BSelect & VectorSelect.S8B0) != 0;
+
+ if (signedA != signedB)
+ {
+ bool a32 = (op.ASelect & ~VectorSelect.S8B0) == VectorSelect.U32;
+ bool b32 = (op.BSelect & ~VectorSelect.S8B0) == VectorSelect.U32;
+
+ if (!a32 && !b32)
+ {
+ // Both values are extended small integer and can always fit in a S32, just do a signed comparison.
+ p0Res = GetIntComparison(context, op.VComp, srcA, srcB, isSigned: true, extended: false);
+ }
+ else
+ {
+ // TODO: Mismatching sign case.
+ p0Res = Const(0);
+ }
+ }
+ else
+ {
+ // Sign matches, just do a regular comparison.
+ p0Res = GetIntComparison(context, op.VComp, srcA, srcB, signedA, extended: false);
+ }
+
+ Operand p1Res = context.BitwiseNot(p0Res);
+
+ Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ p0Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p0Res, pred);
+ p1Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p1Res, pred);
+
+ context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs
new file mode 100644
index 00000000..3c833613
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs
@@ -0,0 +1,84 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Fswzadd(EmitterContext context)
+ {
+ InstFswzadd op = context.GetOp<InstFswzadd>();
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+ Operand srcB = GetSrcReg(context, op.SrcB);
+ Operand dest = GetDest(op.Dest);
+
+ context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, op.PnWord));
+
+ InstEmitAluHelper.SetFPZnFlags(context, dest, op.WriteCC);
+ }
+
+ public static void Shfl(EmitterContext context)
+ {
+ InstShfl op = context.GetOp<InstShfl>();
+
+ Operand pred = Register(op.DestPred, RegisterType.Predicate);
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+
+ Operand srcB = op.BFixShfl ? Const(op.SrcBImm) : GetSrcReg(context, op.SrcB);
+ Operand srcC = op.CFixShfl ? Const(op.SrcCImm) : GetSrcReg(context, op.SrcC);
+
+ (Operand res, Operand valid) = op.ShflMode switch
+ {
+ ShflMode.Idx => context.Shuffle(srcA, srcB, srcC),
+ ShflMode.Up => context.ShuffleUp(srcA, srcB, srcC),
+ ShflMode.Down => context.ShuffleDown(srcA, srcB, srcC),
+ ShflMode.Bfly => context.ShuffleXor(srcA, srcB, srcC),
+ _ => (null, null)
+ };
+
+ context.Copy(GetDest(op.Dest), res);
+ context.Copy(pred, valid);
+ }
+
+ public static void Vote(EmitterContext context)
+ {
+ InstVote op = context.GetOp<InstVote>();
+
+ Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+ Operand res = null;
+
+ switch (op.VoteMode)
+ {
+ case VoteMode.All:
+ res = context.VoteAll(pred);
+ break;
+ case VoteMode.Any:
+ res = context.VoteAny(pred);
+ break;
+ case VoteMode.Eq:
+ res = context.VoteAllEqual(pred);
+ break;
+ }
+
+ if (res != null)
+ {
+ context.Copy(Register(op.VpDest, RegisterType.Predicate), res);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid vote operation: {op.VoteMode}.");
+ }
+
+ if (op.Dest != RegisterConsts.RegisterZeroIndex)
+ {
+ context.Copy(GetDest(op.Dest), context.Ballot(pred));
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs
new file mode 100644
index 00000000..91c740b6
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs
@@ -0,0 +1,6 @@
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ delegate void InstEmitter(EmitterContext context);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs b/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs
new file mode 100644
index 00000000..6217ce53
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs
@@ -0,0 +1,141 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static class Lop3Expression
+ {
+ private enum TruthTable : byte
+ {
+ False = 0x00, // false
+ True = 0xff, // true
+ In = 0xf0, // a
+ And2 = 0xc0, // a & b
+ Or2 = 0xfc, // a | b
+ Xor2 = 0x3c, // a ^ b
+ And3 = 0x80, // a & b & c
+ Or3 = 0xfe, // a | b | c
+ XorAnd = 0x60, // a & (b ^ c)
+ XorOr = 0xf6, // a | (b ^ c)
+ OrAnd = 0xe0, // a & (b | c)
+ AndOr = 0xf8, // a | (b & c)
+ Onehot = 0x16, // (a & !b & !c) | (!a & b & !c) | (!a & !b & c) - Only one value is true.
+ Majority = 0xe8, // Popcount(a, b, c) >= 2
+ Gamble = 0x81, // (a & b & c) | (!a & !b & !c) - All on or all off
+ InverseGamble = 0x7e, // Inverse of Gamble
+ Dot = 0x1a, // a ^ (c | (a & b))
+ Mux = 0xca, // a ? b : c
+ AndXor = 0x78, // a ^ (b & c)
+ OrXor = 0x1e, // a ^ (b | c)
+ Xor3 = 0x96, // a ^ b ^ c
+ }
+
+ public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm)
+ {
+ for (int i = 0; i < 0x40; i++)
+ {
+ TruthTable currImm = (TruthTable)imm;
+
+ Operand x = srcA;
+ Operand y = srcB;
+ Operand z = srcC;
+
+ if ((i & 0x01) != 0)
+ {
+ (x, y) = (y, x);
+ currImm = PermuteTable(currImm, 7, 6, 3, 2, 5, 4, 1, 0);
+ }
+
+ if ((i & 0x02) != 0)
+ {
+ (x, z) = (z, x);
+ currImm = PermuteTable(currImm, 7, 3, 5, 1, 6, 2, 4, 0);
+ }
+
+ if ((i & 0x04) != 0)
+ {
+ (y, z) = (z, y);
+ currImm = PermuteTable(currImm, 7, 5, 6, 4, 3, 1, 2, 0);
+ }
+
+ if ((i & 0x08) != 0)
+ {
+ x = context.BitwiseNot(x);
+ currImm = PermuteTable(currImm, 3, 2, 1, 0, 7, 6, 5, 4);
+ }
+
+ if ((i & 0x10) != 0)
+ {
+ y = context.BitwiseNot(y);
+ currImm = PermuteTable(currImm, 5, 4, 7, 6, 1, 0, 3, 2);
+ }
+
+ if ((i & 0x20) != 0)
+ {
+ z = context.BitwiseNot(z);
+ currImm = PermuteTable(currImm, 6, 7, 4, 5, 2, 3, 0, 1);
+ }
+
+ Operand result = GetExpr(currImm, context, x, y, z);
+ if (result != null)
+ {
+ return result;
+ }
+
+ Operand notResult = GetExpr((TruthTable)((~(int)currImm) & 0xff), context, x, y, z);
+ if (notResult != null)
+ {
+ return context.BitwiseNot(notResult);
+ }
+ }
+
+ return null;
+ }
+
+ private static Operand GetExpr(TruthTable imm, EmitterContext context, Operand x, Operand y, Operand z)
+ {
+ return imm switch
+ {
+ TruthTable.False => Const(0),
+ TruthTable.True => Const(-1),
+ TruthTable.In => x,
+ TruthTable.And2 => context.BitwiseAnd(x, y),
+ TruthTable.Or2 => context.BitwiseOr(x, y),
+ TruthTable.Xor2 => context.BitwiseExclusiveOr(x, y),
+ TruthTable.And3 => context.BitwiseAnd(x, context.BitwiseAnd(y, z)),
+ TruthTable.Or3 => context.BitwiseOr(x, context.BitwiseOr(y, z)),
+ TruthTable.XorAnd => context.BitwiseAnd(x, context.BitwiseExclusiveOr(y, z)),
+ TruthTable.XorOr => context.BitwiseOr(x, context.BitwiseExclusiveOr(y, z)),
+ TruthTable.OrAnd => context.BitwiseAnd(x, context.BitwiseOr(y, z)),
+ TruthTable.AndOr => context.BitwiseOr(x, context.BitwiseAnd(y, z)),
+ TruthTable.Onehot => context.BitwiseExclusiveOr(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))),
+ TruthTable.Majority => context.BitwiseAnd(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))),
+ TruthTable.InverseGamble => context.BitwiseOr(context.BitwiseExclusiveOr(x, y), context.BitwiseExclusiveOr(x, z)),
+ TruthTable.Dot => context.BitwiseAnd(context.BitwiseExclusiveOr(x, z), context.BitwiseOr(context.BitwiseNot(y), z)),
+ TruthTable.Mux => context.BitwiseOr(context.BitwiseAnd(x, y), context.BitwiseAnd(context.BitwiseNot(x), z)),
+ TruthTable.AndXor => context.BitwiseExclusiveOr(x, context.BitwiseAnd(y, z)),
+ TruthTable.OrXor => context.BitwiseExclusiveOr(x, context.BitwiseOr(y, z)),
+ TruthTable.Xor3 => context.BitwiseExclusiveOr(x, context.BitwiseExclusiveOr(y, z)),
+ _ => null
+ };
+ }
+
+ private static TruthTable PermuteTable(TruthTable imm, int bit7, int bit6, int bit5, int bit4, int bit3, int bit2, int bit1, int bit0)
+ {
+ int result = 0;
+
+ result |= (((int)imm >> 0) & 1) << bit0;
+ result |= (((int)imm >> 1) & 1) << bit1;
+ result |= (((int)imm >> 2) & 1) << bit2;
+ result |= (((int)imm >> 3) & 1) << bit3;
+ result |= (((int)imm >> 4) & 1) << bit4;
+ result |= (((int)imm >> 5) & 1) << bit5;
+ result |= (((int)imm >> 6) & 1) << bit6;
+ result |= (((int)imm >> 7) & 1) << bit7;
+
+ return (TruthTable)result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs
new file mode 100644
index 00000000..2aca118b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/BasicBlock.cs
@@ -0,0 +1,91 @@
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ class BasicBlock
+ {
+ public int Index { get; set; }
+
+ public LinkedList<INode> Operations { get; }
+
+ private BasicBlock _next;
+ private BasicBlock _branch;
+
+ public BasicBlock Next
+ {
+ get => _next;
+ set => _next = AddSuccessor(_next, value);
+ }
+
+ public BasicBlock Branch
+ {
+ get => _branch;
+ set => _branch = AddSuccessor(_branch, value);
+ }
+
+ public bool HasBranch => _branch != null;
+ public bool Reachable => Index == 0 || Predecessors.Count != 0;
+
+ public List<BasicBlock> Predecessors { get; }
+
+ public HashSet<BasicBlock> DominanceFrontiers { get; }
+
+ public BasicBlock ImmediateDominator { get; set; }
+
+ public BasicBlock()
+ {
+ Operations = new LinkedList<INode>();
+
+ Predecessors = new List<BasicBlock>();
+
+ DominanceFrontiers = new HashSet<BasicBlock>();
+ }
+
+ public BasicBlock(int index) : this()
+ {
+ Index = index;
+ }
+
+ private BasicBlock AddSuccessor(BasicBlock oldBlock, BasicBlock newBlock)
+ {
+ oldBlock?.Predecessors.Remove(this);
+ newBlock?.Predecessors.Add(this);
+
+ return newBlock;
+ }
+
+ public INode GetLastOp()
+ {
+ return Operations.Last?.Value;
+ }
+
+ public void Append(INode node)
+ {
+ INode lastOp = GetLastOp();
+
+ if (lastOp is Operation operation && IsControlFlowInst(operation.Inst))
+ {
+ Operations.AddBefore(Operations.Last, node);
+ }
+ else
+ {
+ Operations.AddLast(node);
+ }
+ }
+
+ private static bool IsControlFlowInst(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.Branch:
+ case Instruction.BranchIfFalse:
+ case Instruction.BranchIfTrue:
+ case Instruction.Discard:
+ case Instruction.Return:
+ return true;
+ }
+
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/CommentNode.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/CommentNode.cs
new file mode 100644
index 00000000..d4d87b06
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/CommentNode.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ class CommentNode : Operation
+ {
+ public string Comment { get; }
+
+ public CommentNode(string comment) : base(Instruction.Comment, null)
+ {
+ Comment = comment;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Function.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Function.cs
new file mode 100644
index 00000000..e535c3fc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Function.cs
@@ -0,0 +1,23 @@
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ class Function
+ {
+ public BasicBlock[] Blocks { get; }
+
+ public string Name { get; }
+
+ public bool ReturnsValue { get; }
+
+ public int InArgumentsCount { get; }
+ public int OutArgumentsCount { get; }
+
+ public Function(BasicBlock[] blocks, string name, bool returnsValue, int inArgumentsCount, int outArgumentsCount)
+ {
+ Blocks = blocks;
+ Name = name;
+ ReturnsValue = returnsValue;
+ InArgumentsCount = inArgumentsCount;
+ OutArgumentsCount = outArgumentsCount;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/INode.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/INode.cs
new file mode 100644
index 00000000..0f545e56
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/INode.cs
@@ -0,0 +1,15 @@
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ interface INode
+ {
+ Operand Dest { get; set; }
+
+ int DestsCount { get; }
+ int SourcesCount { get; }
+
+ Operand GetDest(int index);
+ Operand GetSource(int index);
+
+ void SetSource(int index, Operand operand);
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
new file mode 100644
index 00000000..d7c4a961
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
@@ -0,0 +1,178 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ [Flags]
+ enum Instruction
+ {
+ Absolute = 1,
+ Add,
+ AtomicAdd,
+ AtomicAnd,
+ AtomicCompareAndSwap,
+ AtomicMinS32,
+ AtomicMinU32,
+ AtomicMaxS32,
+ AtomicMaxU32,
+ AtomicOr,
+ AtomicSwap,
+ AtomicXor,
+ Ballot,
+ Barrier,
+ BitCount,
+ BitfieldExtractS32,
+ BitfieldExtractU32,
+ BitfieldInsert,
+ BitfieldReverse,
+ BitwiseAnd,
+ BitwiseExclusiveOr,
+ BitwiseNot,
+ BitwiseOr,
+ Branch,
+ BranchIfFalse,
+ BranchIfTrue,
+ Call,
+ Ceiling,
+ Clamp,
+ ClampU32,
+ Comment,
+ CompareEqual,
+ CompareGreater,
+ CompareGreaterOrEqual,
+ CompareGreaterOrEqualU32,
+ CompareGreaterU32,
+ CompareLess,
+ CompareLessOrEqual,
+ CompareLessOrEqualU32,
+ CompareLessU32,
+ CompareNotEqual,
+ ConditionalSelect,
+ ConvertFP32ToFP64,
+ ConvertFP64ToFP32,
+ ConvertFP32ToS32,
+ ConvertFP32ToU32,
+ ConvertFP64ToS32,
+ ConvertFP64ToU32,
+ ConvertS32ToFP32,
+ ConvertS32ToFP64,
+ ConvertU32ToFP32,
+ ConvertU32ToFP64,
+ Copy,
+ Cosine,
+ Ddx,
+ Ddy,
+ Discard,
+ Divide,
+ EmitVertex,
+ EndPrimitive,
+ ExponentB2,
+ FSIBegin,
+ FSIEnd,
+ FindLSB,
+ FindMSBS32,
+ FindMSBU32,
+ Floor,
+ FusedMultiplyAdd,
+ GroupMemoryBarrier,
+ ImageLoad,
+ ImageStore,
+ ImageAtomic,
+ IsNan,
+ Load,
+ LoadConstant,
+ LoadGlobal,
+ LoadLocal,
+ LoadShared,
+ LoadStorage,
+ Lod,
+ LogarithmB2,
+ LogicalAnd,
+ LogicalExclusiveOr,
+ LogicalNot,
+ LogicalOr,
+ LoopBreak,
+ LoopContinue,
+ MarkLabel,
+ Maximum,
+ MaximumU32,
+ MemoryBarrier,
+ Minimum,
+ MinimumU32,
+ Multiply,
+ MultiplyHighS32,
+ MultiplyHighU32,
+ Negate,
+ PackDouble2x32,
+ PackHalf2x16,
+ ReciprocalSquareRoot,
+ Return,
+ Round,
+ ShiftLeft,
+ ShiftRightS32,
+ ShiftRightU32,
+ Shuffle,
+ ShuffleDown,
+ ShuffleUp,
+ ShuffleXor,
+ Sine,
+ SquareRoot,
+ Store,
+ StoreGlobal,
+ StoreGlobal16,
+ StoreGlobal8,
+ StoreLocal,
+ StoreShared,
+ StoreShared16,
+ StoreShared8,
+ StoreStorage,
+ StoreStorage16,
+ StoreStorage8,
+ Subtract,
+ SwizzleAdd,
+ TextureSample,
+ TextureSize,
+ Truncate,
+ UnpackDouble2x32,
+ UnpackHalf2x16,
+ VectorExtract,
+ VoteAll,
+ VoteAllEqual,
+ VoteAny,
+
+ Count,
+
+ FP32 = 1 << 16,
+ FP64 = 1 << 17,
+
+ Mask = 0xffff
+ }
+
+ static class InstructionExtensions
+ {
+ public static bool IsAtomic(this Instruction inst)
+ {
+ switch (inst & Instruction.Mask)
+ {
+ case Instruction.AtomicAdd:
+ case Instruction.AtomicAnd:
+ case Instruction.AtomicCompareAndSwap:
+ case Instruction.AtomicMaxS32:
+ case Instruction.AtomicMaxU32:
+ case Instruction.AtomicMinS32:
+ case Instruction.AtomicMinU32:
+ case Instruction.AtomicOr:
+ case Instruction.AtomicSwap:
+ case Instruction.AtomicXor:
+ return true;
+ }
+
+ return false;
+ }
+
+ public static bool IsTextureQuery(this Instruction inst)
+ {
+ inst &= Instruction.Mask;
+ return inst == Instruction.Lod || inst == Instruction.TextureSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs
new file mode 100644
index 00000000..a2163d14
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs
@@ -0,0 +1,51 @@
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ enum IoVariable
+ {
+ Invalid,
+
+ BackColorDiffuse,
+ BackColorSpecular,
+ BaseInstance,
+ BaseVertex,
+ ClipDistance,
+ CtaId,
+ DrawIndex,
+ FogCoord,
+ FragmentCoord,
+ FragmentOutputColor,
+ FragmentOutputDepth,
+ FragmentOutputIsBgra, // TODO: Remove and use constant buffer access.
+ FrontColorDiffuse,
+ FrontColorSpecular,
+ FrontFacing,
+ InstanceId,
+ InstanceIndex,
+ InvocationId,
+ Layer,
+ PatchVertices,
+ PointCoord,
+ PointSize,
+ Position,
+ PrimitiveId,
+ SubgroupEqMask,
+ SubgroupGeMask,
+ SubgroupGtMask,
+ SubgroupLaneId,
+ SubgroupLeMask,
+ SubgroupLtMask,
+ SupportBlockViewInverse, // TODO: Remove and use constant buffer access.
+ SupportBlockRenderScale, // TODO: Remove and use constant buffer access.
+ TessellationCoord,
+ TessellationLevelInner,
+ TessellationLevelOuter,
+ TextureCoord,
+ ThreadId,
+ ThreadKill,
+ UserDefined,
+ VertexId,
+ VertexIndex,
+ ViewportIndex,
+ ViewportMask
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IrConsts.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IrConsts.cs
new file mode 100644
index 00000000..c264e47d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IrConsts.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ static class IrConsts
+ {
+ public const int False = 0;
+ public const int True = -1;
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operand.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operand.cs
new file mode 100644
index 00000000..1df88a3d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operand.cs
@@ -0,0 +1,79 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ class Operand
+ {
+ private const int CbufSlotBits = 5;
+ private const int CbufSlotLsb = 32 - CbufSlotBits;
+ private const int CbufSlotMask = (1 << CbufSlotBits) - 1;
+
+ public OperandType Type { get; }
+
+ public int Value { get; }
+
+ public INode AsgOp { get; set; }
+
+ public HashSet<INode> UseOps { get; }
+
+ private Operand()
+ {
+ UseOps = new HashSet<INode>();
+ }
+
+ public Operand(OperandType type) : this()
+ {
+ Type = type;
+ }
+
+ public Operand(OperandType type, int value) : this()
+ {
+ Type = type;
+ Value = value;
+ }
+
+ public Operand(Register reg) : this()
+ {
+ Type = OperandType.Register;
+ Value = PackRegInfo(reg.Index, reg.Type);
+ }
+
+ public Operand(int slot, int offset) : this()
+ {
+ Type = OperandType.ConstantBuffer;
+ Value = PackCbufInfo(slot, offset);
+ }
+
+ private static int PackCbufInfo(int slot, int offset)
+ {
+ return (slot << CbufSlotLsb) | offset;
+ }
+
+ private static int PackRegInfo(int index, RegisterType type)
+ {
+ return ((int)type << 24) | index;
+ }
+
+ public int GetCbufSlot()
+ {
+ return (Value >> CbufSlotLsb) & CbufSlotMask;
+ }
+
+ public int GetCbufOffset()
+ {
+ return Value & ~(CbufSlotMask << CbufSlotLsb);
+ }
+
+ public Register GetRegister()
+ {
+ return new Register(Value & 0xffffff, (RegisterType)(Value >> 24));
+ }
+
+ public float AsFloat()
+ {
+ return BitConverter.Int32BitsToSingle(Value);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandHelper.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandHelper.cs
new file mode 100644
index 00000000..37c349e8
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandHelper.cs
@@ -0,0 +1,62 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using System;
+
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ static class OperandHelper
+ {
+ public static Operand Argument(int value)
+ {
+ return new Operand(OperandType.Argument, value);
+ }
+
+ public static Operand Cbuf(int slot, int offset)
+ {
+ return new Operand(slot, offset);
+ }
+
+ public static Operand Const(int value)
+ {
+ return new Operand(OperandType.Constant, value);
+ }
+
+ public static Operand ConstF(float value)
+ {
+ return new Operand(OperandType.Constant, BitConverter.SingleToInt32Bits(value));
+ }
+
+ public static Operand Label()
+ {
+ return new Operand(OperandType.Label);
+ }
+
+ public static Operand Local()
+ {
+ return new Operand(OperandType.LocalVariable);
+ }
+
+ public static Operand Register(int index, RegisterType type)
+ {
+ return Register(new Register(index, type));
+ }
+
+ public static Operand Register(Register reg)
+ {
+ if (reg.IsRZ)
+ {
+ return Const(0);
+ }
+ else if (reg.IsPT)
+ {
+ return Const(IrConsts.True);
+ }
+
+ return new Operand(reg);
+ }
+
+ public static Operand Undef()
+ {
+ return new Operand(OperandType.Undefined);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandType.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandType.cs
new file mode 100644
index 00000000..4d2da734
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/OperandType.cs
@@ -0,0 +1,13 @@
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ enum OperandType
+ {
+ Argument,
+ Constant,
+ ConstantBuffer,
+ Label,
+ LocalVariable,
+ Register,
+ Undefined
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs
new file mode 100644
index 00000000..99179f15
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs
@@ -0,0 +1,257 @@
+using System;
+using System.Diagnostics;
+
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ class Operation : INode
+ {
+ public Instruction Inst { get; private set; }
+ public StorageKind StorageKind { get; }
+
+ private Operand[] _dests;
+
+ public Operand Dest
+ {
+ get
+ {
+ return _dests.Length != 0 ? _dests[0] : null;
+ }
+ set
+ {
+ if (value != null && value.Type == OperandType.LocalVariable)
+ {
+ value.AsgOp = this;
+ }
+
+ if (value != null)
+ {
+ _dests = new[] { value };
+ }
+ else
+ {
+ _dests = Array.Empty<Operand>();
+ }
+ }
+ }
+
+ public int DestsCount => _dests.Length;
+
+ private Operand[] _sources;
+
+ public int SourcesCount => _sources.Length;
+
+ public int Index { get; }
+
+ private Operation(Operand[] sources)
+ {
+ // The array may be modified externally, so we store a copy.
+ _sources = (Operand[])sources.Clone();
+
+ for (int index = 0; index < _sources.Length; index++)
+ {
+ Operand source = _sources[index];
+
+ if (source.Type == OperandType.LocalVariable)
+ {
+ source.UseOps.Add(this);
+ }
+ }
+ }
+
+ public Operation(Instruction inst, int index, Operand[] dests, Operand[] sources) : this(sources)
+ {
+ Inst = inst;
+ Index = index;
+
+ if (dests != null)
+ {
+ // The array may be modified externally, so we store a copy.
+ _dests = (Operand[])dests.Clone();
+
+ for (int dstIndex = 0; dstIndex < dests.Length; dstIndex++)
+ {
+ Operand dest = dests[dstIndex];
+
+ if (dest != null && dest.Type == OperandType.LocalVariable)
+ {
+ dest.AsgOp = this;
+ }
+ }
+ }
+ else
+ {
+ _dests = Array.Empty<Operand>();
+ }
+ }
+
+ public Operation(Instruction inst, Operand dest, params Operand[] sources) : this(sources)
+ {
+ Inst = inst;
+
+ if (dest != null)
+ {
+ dest.AsgOp = this;
+
+ _dests = new[] { dest };
+ }
+ else
+ {
+ _dests = Array.Empty<Operand>();
+ }
+ }
+
+ public Operation(Instruction inst, StorageKind storageKind, Operand dest, params Operand[] sources) : this(sources)
+ {
+ Inst = inst;
+ StorageKind = storageKind;
+
+ if (dest != null)
+ {
+ dest.AsgOp = this;
+
+ _dests = new[] { dest };
+ }
+ else
+ {
+ _dests = Array.Empty<Operand>();
+ }
+ }
+
+ public Operation(Instruction inst, int index, Operand dest, params Operand[] sources) : this(inst, dest, sources)
+ {
+ Index = index;
+ }
+
+ public void AppendDests(Operand[] operands)
+ {
+ int startIndex = _dests.Length;
+
+ Array.Resize(ref _dests, startIndex + operands.Length);
+
+ for (int index = 0; index < operands.Length; index++)
+ {
+ Operand dest = operands[index];
+
+ if (dest != null && dest.Type == OperandType.LocalVariable)
+ {
+ Debug.Assert(dest.AsgOp == null);
+ dest.AsgOp = this;
+ }
+
+ _dests[startIndex + index] = dest;
+ }
+ }
+
+ public void AppendSources(Operand[] operands)
+ {
+ int startIndex = _sources.Length;
+
+ Array.Resize(ref _sources, startIndex + operands.Length);
+
+ for (int index = 0; index < operands.Length; index++)
+ {
+ Operand source = operands[index];
+
+ if (source.Type == OperandType.LocalVariable)
+ {
+ source.UseOps.Add(this);
+ }
+
+ _sources[startIndex + index] = source;
+ }
+ }
+
+ public Operand GetDest(int index)
+ {
+ return _dests[index];
+ }
+
+ public Operand GetSource(int index)
+ {
+ return _sources[index];
+ }
+
+ public void SetDest(int index, Operand dest)
+ {
+ Operand oldDest = _dests[index];
+
+ if (oldDest != null && oldDest.Type == OperandType.LocalVariable)
+ {
+ oldDest.AsgOp = null;
+ }
+
+ if (dest != null && dest.Type == OperandType.LocalVariable)
+ {
+ dest.AsgOp = this;
+ }
+
+ _dests[index] = dest;
+ }
+
+ public void SetSource(int index, Operand source)
+ {
+ Operand oldSrc = _sources[index];
+
+ if (oldSrc != null && oldSrc.Type == OperandType.LocalVariable)
+ {
+ oldSrc.UseOps.Remove(this);
+ }
+
+ if (source != null && source.Type == OperandType.LocalVariable)
+ {
+ source.UseOps.Add(this);
+ }
+
+ _sources[index] = source;
+ }
+
+ public void InsertSource(int index, Operand source)
+ {
+ Operand[] newSources = new Operand[_sources.Length + 1];
+
+ Array.Copy(_sources, 0, newSources, 0, index);
+ Array.Copy(_sources, index, newSources, index + 1, _sources.Length - index);
+
+ newSources[index] = source;
+
+ _sources = newSources;
+ }
+
+ protected void RemoveSource(int index)
+ {
+ SetSource(index, null);
+
+ Operand[] newSources = new Operand[_sources.Length - 1];
+
+ Array.Copy(_sources, 0, newSources, 0, index);
+ Array.Copy(_sources, index + 1, newSources, index, _sources.Length - (index + 1));
+
+ _sources = newSources;
+ }
+
+ public void TurnIntoCopy(Operand source)
+ {
+ TurnInto(Instruction.Copy, source);
+ }
+
+ public void TurnInto(Instruction newInst, Operand source)
+ {
+ Inst = newInst;
+
+ foreach (Operand oldSrc in _sources)
+ {
+ if (oldSrc != null && oldSrc.Type == OperandType.LocalVariable)
+ {
+ oldSrc.UseOps.Remove(this);
+ }
+ }
+
+ if (source.Type == OperandType.LocalVariable)
+ {
+ source.UseOps.Add(this);
+ }
+
+ _sources = new Operand[] { source };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/PhiNode.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/PhiNode.cs
new file mode 100644
index 00000000..8fa25ae9
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/PhiNode.cs
@@ -0,0 +1,107 @@
+using System;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ class PhiNode : INode
+ {
+ private Operand _dest;
+
+ public Operand Dest
+ {
+ get => _dest;
+ set => _dest = AssignDest(value);
+ }
+
+ public int DestsCount => _dest != null ? 1 : 0;
+
+ private HashSet<BasicBlock> _blocks;
+
+ private class PhiSource
+ {
+ public BasicBlock Block { get; }
+ public Operand Operand { get; set; }
+
+ public PhiSource(BasicBlock block, Operand operand)
+ {
+ Block = block;
+ Operand = operand;
+ }
+ }
+
+ private List<PhiSource> _sources;
+
+ public int SourcesCount => _sources.Count;
+
+ public PhiNode(Operand dest)
+ {
+ _blocks = new HashSet<BasicBlock>();
+
+ _sources = new List<PhiSource>();
+
+ dest.AsgOp = this;
+
+ Dest = dest;
+ }
+
+ private Operand AssignDest(Operand dest)
+ {
+ if (dest != null && dest.Type == OperandType.LocalVariable)
+ {
+ dest.AsgOp = this;
+ }
+
+ return dest;
+ }
+
+ public void AddSource(BasicBlock block, Operand operand)
+ {
+ if (_blocks.Add(block))
+ {
+ if (operand.Type == OperandType.LocalVariable)
+ {
+ operand.UseOps.Add(this);
+ }
+
+ _sources.Add(new PhiSource(block, operand));
+ }
+ }
+
+ public Operand GetDest(int index)
+ {
+ if (index != 0)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ return _dest;
+ }
+
+ public Operand GetSource(int index)
+ {
+ return _sources[index].Operand;
+ }
+
+ public BasicBlock GetBlock(int index)
+ {
+ return _sources[index].Block;
+ }
+
+ public void SetSource(int index, Operand source)
+ {
+ Operand oldSrc = _sources[index].Operand;
+
+ if (oldSrc != null && oldSrc.Type == OperandType.LocalVariable)
+ {
+ oldSrc.UseOps.Remove(this);
+ }
+
+ if (source.Type == OperandType.LocalVariable)
+ {
+ source.UseOps.Add(this);
+ }
+
+ _sources[index].Operand = source;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs
new file mode 100644
index 00000000..59357443
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/StorageKind.cs
@@ -0,0 +1,39 @@
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ enum StorageKind
+ {
+ None,
+ Input,
+ InputPerPatch,
+ Output,
+ OutputPerPatch,
+ ConstantBuffer,
+ StorageBuffer,
+ LocalMemory,
+ SharedMemory,
+ GlobalMemory
+ }
+
+ static class StorageKindExtensions
+ {
+ public static bool IsInputOrOutput(this StorageKind storageKind)
+ {
+ return storageKind == StorageKind.Input ||
+ storageKind == StorageKind.InputPerPatch ||
+ storageKind == StorageKind.Output ||
+ storageKind == StorageKind.OutputPerPatch;
+ }
+
+ public static bool IsOutput(this StorageKind storageKind)
+ {
+ return storageKind == StorageKind.Output ||
+ storageKind == StorageKind.OutputPerPatch;
+ }
+
+ public static bool IsPerPatch(this StorageKind storageKind)
+ {
+ return storageKind == StorageKind.InputPerPatch ||
+ storageKind == StorageKind.OutputPerPatch;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs
new file mode 100644
index 00000000..6c20e856
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs
@@ -0,0 +1,32 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ [Flags]
+ enum TextureFlags
+ {
+ None = 0,
+ Bindless = 1 << 0,
+ Gather = 1 << 1,
+ Derivatives = 1 << 2,
+ IntCoords = 1 << 3,
+ LodBias = 1 << 4,
+ LodLevel = 1 << 5,
+ Offset = 1 << 6,
+ Offsets = 1 << 7,
+ Coherent = 1 << 8,
+
+ AtomicMask = 15 << 16,
+
+ Add = 0 << 16,
+ Minimum = 1 << 16,
+ Maximum = 2 << 16,
+ Increment = 3 << 16,
+ Decrement = 4 << 16,
+ BitwiseAnd = 5 << 16,
+ BitwiseOr = 6 << 16,
+ BitwiseXor = 7 << 16,
+ Swap = 8 << 16,
+ CAS = 9 << 16
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureOperation.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureOperation.cs
new file mode 100644
index 00000000..6ab868cd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureOperation.cs
@@ -0,0 +1,69 @@
+namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
+{
+ class TextureOperation : Operation
+ {
+ public const int DefaultCbufSlot = -1;
+
+ public SamplerType Type { get; set; }
+ public TextureFormat Format { get; set; }
+ public TextureFlags Flags { get; private set; }
+
+ public int CbufSlot { get; private set; }
+ public int Handle { get; private set; }
+
+ public TextureOperation(
+ Instruction inst,
+ SamplerType type,
+ TextureFormat format,
+ TextureFlags flags,
+ int cbufSlot,
+ int handle,
+ int compIndex,
+ Operand[] dests,
+ Operand[] sources) : base(inst, compIndex, dests, sources)
+ {
+ Type = type;
+ Format = format;
+ Flags = flags;
+ CbufSlot = cbufSlot;
+ Handle = handle;
+ }
+
+ public TextureOperation(
+ Instruction inst,
+ SamplerType type,
+ TextureFormat format,
+ TextureFlags flags,
+ int handle,
+ int compIndex,
+ Operand[] dests,
+ Operand[] sources) : this(inst, type, format, flags, DefaultCbufSlot, handle, compIndex, dests, sources)
+ {
+ }
+
+ public void TurnIntoIndexed(int handle)
+ {
+ Type |= SamplerType.Indexed;
+ Flags &= ~TextureFlags.Bindless;
+ Handle = handle;
+ }
+
+ public void SetHandle(int handle, int cbufSlot = DefaultCbufSlot)
+ {
+ if ((Flags & TextureFlags.Bindless) != 0)
+ {
+ Flags &= ~TextureFlags.Bindless;
+
+ RemoveSource(0);
+ }
+
+ CbufSlot = cbufSlot;
+ Handle = handle;
+ }
+
+ public void SetLodLevelFlag()
+ {
+ Flags |= TextureFlags.LodLevel;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/OutputTopology.cs b/src/Ryujinx.Graphics.Shader/OutputTopology.cs
new file mode 100644
index 00000000..6f977bec
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/OutputTopology.cs
@@ -0,0 +1,24 @@
+namespace Ryujinx.Graphics.Shader
+{
+ enum OutputTopology
+ {
+ PointList = 1,
+ LineStrip = 6,
+ TriangleStrip = 7
+ }
+
+ static class OutputTopologyExtensions
+ {
+ public static string ToGlslString(this OutputTopology topology)
+ {
+ switch (topology)
+ {
+ case OutputTopology.LineStrip: return "line_strip";
+ case OutputTopology.PointList: return "points";
+ case OutputTopology.TriangleStrip: return "triangle_strip";
+ }
+
+ return "points";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
new file mode 100644
index 00000000..3434e2a8
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
@@ -0,0 +1,33 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+ <PropertyGroup>
+ <TargetFramework>net7.0</TargetFramework>
+ </PropertyGroup>
+
+ <ItemGroup>
+ <None Remove="CodeGen\Glsl\HelperFunctions\TexelFetchScale_vp.glsl" />
+ </ItemGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+ <ProjectReference Include="..\Spv.Generator\Spv.Generator.csproj" />
+ </ItemGroup>
+
+ <ItemGroup>
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Shared.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\AtomicMinMaxS32Storage.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighS32.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\MultiplyHighU32.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreSharedSmallInt.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\StoreStorageSmallInt.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_vp.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_fp.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\TexelFetchScale_cp.glsl" />
+ </ItemGroup>
+
+</Project>
diff --git a/src/Ryujinx.Graphics.Shader/SamplerType.cs b/src/Ryujinx.Graphics.Shader/SamplerType.cs
new file mode 100644
index 00000000..620f4ccf
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/SamplerType.cs
@@ -0,0 +1,100 @@
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+namespace Ryujinx.Graphics.Shader
+{
+ [Flags]
+ public enum SamplerType
+ {
+ None = 0,
+ Texture1D,
+ TextureBuffer,
+ Texture2D,
+ Texture3D,
+ TextureCube,
+
+ Mask = 0xff,
+
+ Array = 1 << 8,
+ Indexed = 1 << 9,
+ Multisample = 1 << 10,
+ Shadow = 1 << 11
+ }
+
+ static class SamplerTypeExtensions
+ {
+ public static int GetDimensions(this SamplerType type)
+ {
+ return (type & SamplerType.Mask) switch
+ {
+ SamplerType.Texture1D => 1,
+ SamplerType.TextureBuffer => 1,
+ SamplerType.Texture2D => 2,
+ SamplerType.Texture3D => 3,
+ SamplerType.TextureCube => 3,
+ _ => throw new ArgumentException($"Invalid sampler type \"{type}\".")
+ };
+ }
+
+ public static string ToGlslSamplerType(this SamplerType type)
+ {
+ string typeName = (type & SamplerType.Mask) switch
+ {
+ SamplerType.Texture1D => "sampler1D",
+ SamplerType.TextureBuffer => "samplerBuffer",
+ SamplerType.Texture2D => "sampler2D",
+ SamplerType.Texture3D => "sampler3D",
+ SamplerType.TextureCube => "samplerCube",
+ _ => throw new ArgumentException($"Invalid sampler type \"{type}\".")
+ };
+
+ if ((type & SamplerType.Multisample) != 0)
+ {
+ typeName += "MS";
+ }
+
+ if ((type & SamplerType.Array) != 0)
+ {
+ typeName += "Array";
+ }
+
+ if ((type & SamplerType.Shadow) != 0)
+ {
+ typeName += "Shadow";
+ }
+
+ return typeName;
+ }
+
+ public static string ToGlslImageType(this SamplerType type, AggregateType componentType)
+ {
+ string typeName = (type & SamplerType.Mask) switch
+ {
+ SamplerType.Texture1D => "image1D",
+ SamplerType.TextureBuffer => "imageBuffer",
+ SamplerType.Texture2D => "image2D",
+ SamplerType.Texture3D => "image3D",
+ SamplerType.TextureCube => "imageCube",
+ _ => throw new ArgumentException($"Invalid sampler type \"{type}\".")
+ };
+
+ if ((type & SamplerType.Multisample) != 0)
+ {
+ typeName += "MS";
+ }
+
+ if ((type & SamplerType.Array) != 0)
+ {
+ typeName += "Array";
+ }
+
+ switch (componentType)
+ {
+ case AggregateType.U32: typeName = 'u' + typeName; break;
+ case AggregateType.S32: typeName = 'i' + typeName; break;
+ }
+
+ return typeName;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs b/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs
new file mode 100644
index 00000000..3f015762
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Shader
+{
+ public enum ShaderIdentification
+ {
+ None,
+ GeometryLayerPassthrough
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/ShaderProgram.cs b/src/Ryujinx.Graphics.Shader/ShaderProgram.cs
new file mode 100644
index 00000000..29fff21e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/ShaderProgram.cs
@@ -0,0 +1,35 @@
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+namespace Ryujinx.Graphics.Shader
+{
+ public class ShaderProgram
+ {
+ public ShaderProgramInfo Info { get; }
+ public TargetLanguage Language { get; }
+
+ public string Code { get; private set; }
+ public byte[] BinaryCode { get; }
+
+ private ShaderProgram(ShaderProgramInfo info, TargetLanguage language)
+ {
+ Info = info;
+ Language = language;
+ }
+
+ public ShaderProgram(ShaderProgramInfo info, TargetLanguage language, string code) : this(info, language)
+ {
+ Code = code;
+ }
+
+ public ShaderProgram(ShaderProgramInfo info, TargetLanguage language, byte[] binaryCode) : this(info, language)
+ {
+ BinaryCode = binaryCode;
+ }
+
+ public void Prepend(string line)
+ {
+ Code = line + Environment.NewLine + Code;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs b/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs
new file mode 100644
index 00000000..30f0ffaa
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs
@@ -0,0 +1,51 @@
+using System;
+using System.Collections.ObjectModel;
+
+namespace Ryujinx.Graphics.Shader
+{
+ public class ShaderProgramInfo
+ {
+ public ReadOnlyCollection<BufferDescriptor> CBuffers { get; }
+ public ReadOnlyCollection<BufferDescriptor> SBuffers { get; }
+ public ReadOnlyCollection<TextureDescriptor> Textures { get; }
+ public ReadOnlyCollection<TextureDescriptor> Images { get; }
+
+ public ShaderIdentification Identification { get; }
+ public int GpLayerInputAttribute { get; }
+ public ShaderStage Stage { get; }
+ public bool UsesInstanceId { get; }
+ public bool UsesDrawParameters { get; }
+ public bool UsesRtLayer { get; }
+ public byte ClipDistancesWritten { get; }
+ public int FragmentOutputMap { get; }
+
+ public ShaderProgramInfo(
+ BufferDescriptor[] cBuffers,
+ BufferDescriptor[] sBuffers,
+ TextureDescriptor[] textures,
+ TextureDescriptor[] images,
+ ShaderIdentification identification,
+ int gpLayerInputAttribute,
+ ShaderStage stage,
+ bool usesInstanceId,
+ bool usesDrawParameters,
+ bool usesRtLayer,
+ byte clipDistancesWritten,
+ int fragmentOutputMap)
+ {
+ CBuffers = Array.AsReadOnly(cBuffers);
+ SBuffers = Array.AsReadOnly(sBuffers);
+ Textures = Array.AsReadOnly(textures);
+ Images = Array.AsReadOnly(images);
+
+ Identification = identification;
+ GpLayerInputAttribute = gpLayerInputAttribute;
+ Stage = stage;
+ UsesInstanceId = usesInstanceId;
+ UsesDrawParameters = usesDrawParameters;
+ UsesRtLayer = usesRtLayer;
+ ClipDistancesWritten = clipDistancesWritten;
+ FragmentOutputMap = fragmentOutputMap;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/ShaderStage.cs b/src/Ryujinx.Graphics.Shader/ShaderStage.cs
new file mode 100644
index 00000000..f16fe328
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/ShaderStage.cs
@@ -0,0 +1,27 @@
+namespace Ryujinx.Graphics.Shader
+{
+ public enum ShaderStage : byte
+ {
+ Compute,
+ Vertex,
+ TessellationControl,
+ TessellationEvaluation,
+ Geometry,
+ Fragment,
+
+ Count
+ }
+
+ public static class ShaderStageExtensions
+ {
+ /// <summary>
+ /// Checks if the shader stage supports render scale.
+ /// </summary>
+ /// <param name="stage">Shader stage</param>
+ /// <returns>True if the shader stage supports render scale, false otherwise</returns>
+ public static bool SupportsRenderScale(this ShaderStage stage)
+ {
+ return stage == ShaderStage.Vertex || stage == ShaderStage.Fragment || stage == ShaderStage.Compute;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstAssignment.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstAssignment.cs
new file mode 100644
index 00000000..bb3fe7af
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstAssignment.cs
@@ -0,0 +1,35 @@
+using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class AstAssignment : AstNode
+ {
+ public IAstNode Destination { get; }
+
+ private IAstNode _source;
+
+ public IAstNode Source
+ {
+ get
+ {
+ return _source;
+ }
+ set
+ {
+ RemoveUse(_source, this);
+
+ AddUse(value, this);
+
+ _source = value;
+ }
+ }
+
+ public AstAssignment(IAstNode destination, IAstNode source)
+ {
+ Destination = destination;
+ Source = source;
+
+ AddDef(destination, this);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs
new file mode 100644
index 00000000..2f34bee8
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlock.cs
@@ -0,0 +1,117 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+using System.Collections;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class AstBlock : AstNode, IEnumerable<IAstNode>
+ {
+ public AstBlockType Type { get; private set; }
+
+ private IAstNode _condition;
+
+ public IAstNode Condition
+ {
+ get
+ {
+ return _condition;
+ }
+ set
+ {
+ RemoveUse(_condition, this);
+
+ AddUse(value, this);
+
+ _condition = value;
+ }
+ }
+
+ private LinkedList<IAstNode> _nodes;
+
+ public IAstNode First => _nodes.First?.Value;
+ public IAstNode Last => _nodes.Last?.Value;
+
+ public int Count => _nodes.Count;
+
+ public AstBlock(AstBlockType type, IAstNode condition = null)
+ {
+ Type = type;
+ Condition = condition;
+
+ _nodes = new LinkedList<IAstNode>();
+ }
+
+ public void Add(IAstNode node)
+ {
+ Add(node, _nodes.AddLast(node));
+ }
+
+ public void AddFirst(IAstNode node)
+ {
+ Add(node, _nodes.AddFirst(node));
+ }
+
+ public void AddBefore(IAstNode next, IAstNode node)
+ {
+ Add(node, _nodes.AddBefore(next.LLNode, node));
+ }
+
+ public void AddAfter(IAstNode prev, IAstNode node)
+ {
+ Add(node, _nodes.AddAfter(prev.LLNode, node));
+ }
+
+ private void Add(IAstNode node, LinkedListNode<IAstNode> newNode)
+ {
+ if (node.Parent != null)
+ {
+ throw new ArgumentException("Node already belongs to a block.");
+ }
+
+ node.Parent = this;
+ node.LLNode = newNode;
+ }
+
+ public void Remove(IAstNode node)
+ {
+ _nodes.Remove(node.LLNode);
+
+ node.Parent = null;
+ node.LLNode = null;
+ }
+
+ public void AndCondition(IAstNode cond)
+ {
+ Condition = new AstOperation(Instruction.LogicalAnd, Condition, cond);
+ }
+
+ public void OrCondition(IAstNode cond)
+ {
+ Condition = new AstOperation(Instruction.LogicalOr, Condition, cond);
+ }
+ public void TurnIntoIf(IAstNode cond)
+ {
+ Condition = cond;
+
+ Type = AstBlockType.If;
+ }
+
+ public void TurnIntoElseIf()
+ {
+ Type = AstBlockType.ElseIf;
+ }
+
+ public IEnumerator<IAstNode> GetEnumerator()
+ {
+ return _nodes.GetEnumerator();
+ }
+
+ IEnumerator IEnumerable.GetEnumerator()
+ {
+ return GetEnumerator();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockType.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockType.cs
new file mode 100644
index 00000000..c12efda9
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockType.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ enum AstBlockType
+ {
+ DoWhile,
+ If,
+ Else,
+ ElseIf,
+ Main,
+ While
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockVisitor.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockVisitor.cs
new file mode 100644
index 00000000..10d5dce0
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstBlockVisitor.cs
@@ -0,0 +1,68 @@
+using System;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class AstBlockVisitor
+ {
+ public AstBlock Block { get; private set; }
+
+ public class BlockVisitationEventArgs : EventArgs
+ {
+ public AstBlock Block { get; }
+
+ public BlockVisitationEventArgs(AstBlock block)
+ {
+ Block = block;
+ }
+ }
+
+ public event EventHandler<BlockVisitationEventArgs> BlockEntered;
+ public event EventHandler<BlockVisitationEventArgs> BlockLeft;
+
+ public AstBlockVisitor(AstBlock mainBlock)
+ {
+ Block = mainBlock;
+ }
+
+ public IEnumerable<IAstNode> Visit()
+ {
+ IAstNode node = Block.First;
+
+ while (node != null)
+ {
+ // We reached a child block, visit the nodes inside.
+ while (node is AstBlock childBlock)
+ {
+ Block = childBlock;
+
+ node = childBlock.First;
+
+ BlockEntered?.Invoke(this, new BlockVisitationEventArgs(Block));
+ }
+
+ // Node may be null, if the block is empty.
+ if (node != null)
+ {
+ IAstNode next = Next(node);
+
+ yield return node;
+
+ node = next;
+ }
+
+ // We reached the end of the list, go up on tree to the parent blocks.
+ while (node == null && Block.Type != AstBlockType.Main)
+ {
+ BlockLeft?.Invoke(this, new BlockVisitationEventArgs(Block));
+
+ node = Next(Block);
+
+ Block = Block.Parent;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstComment.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstComment.cs
new file mode 100644
index 00000000..dabe623f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstComment.cs
@@ -0,0 +1,12 @@
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class AstComment : AstNode
+ {
+ public string Comment { get; }
+
+ public AstComment(string comment)
+ {
+ Comment = comment;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstHelper.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstHelper.cs
new file mode 100644
index 00000000..7aa0409b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstHelper.cs
@@ -0,0 +1,74 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ static class AstHelper
+ {
+ public static void AddUse(IAstNode node, IAstNode parent)
+ {
+ if (node is AstOperand operand && operand.Type == OperandType.LocalVariable)
+ {
+ operand.Uses.Add(parent);
+ }
+ }
+
+ public static void AddDef(IAstNode node, IAstNode parent)
+ {
+ if (node is AstOperand operand && operand.Type == OperandType.LocalVariable)
+ {
+ operand.Defs.Add(parent);
+ }
+ }
+
+ public static void RemoveUse(IAstNode node, IAstNode parent)
+ {
+ if (node is AstOperand operand && operand.Type == OperandType.LocalVariable)
+ {
+ operand.Uses.Remove(parent);
+ }
+ }
+
+ public static void RemoveDef(IAstNode node, IAstNode parent)
+ {
+ if (node is AstOperand operand && operand.Type == OperandType.LocalVariable)
+ {
+ operand.Defs.Remove(parent);
+ }
+ }
+
+ public static AstAssignment Assign(IAstNode destination, IAstNode source)
+ {
+ return new AstAssignment(destination, source);
+ }
+
+ public static AstOperand Const(int value)
+ {
+ return new AstOperand(OperandType.Constant, value);
+ }
+
+ public static AstOperand Local(AggregateType type)
+ {
+ AstOperand local = new AstOperand(OperandType.LocalVariable);
+
+ local.VarType = type;
+
+ return local;
+ }
+
+ public static IAstNode InverseCond(IAstNode cond)
+ {
+ return new AstOperation(Instruction.LogicalNot, cond);
+ }
+
+ public static IAstNode Next(IAstNode node)
+ {
+ return node.LLNode.Next?.Value;
+ }
+
+ public static IAstNode Previous(IAstNode node)
+ {
+ return node.LLNode.Previous?.Value;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstNode.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstNode.cs
new file mode 100644
index 00000000..c667aac9
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstNode.cs
@@ -0,0 +1,11 @@
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class AstNode : IAstNode
+ {
+ public AstBlock Parent { get; set; }
+
+ public LinkedListNode<IAstNode> LLNode { get; set; }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperand.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperand.cs
new file mode 100644
index 00000000..1fc0035f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperand.cs
@@ -0,0 +1,50 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class AstOperand : AstNode
+ {
+ public HashSet<IAstNode> Defs { get; }
+ public HashSet<IAstNode> Uses { get; }
+
+ public OperandType Type { get; }
+
+ public AggregateType VarType { get; set; }
+
+ public int Value { get; }
+
+ public int CbufSlot { get; }
+ public int CbufOffset { get; }
+
+ private AstOperand()
+ {
+ Defs = new HashSet<IAstNode>();
+ Uses = new HashSet<IAstNode>();
+
+ VarType = AggregateType.S32;
+ }
+
+ public AstOperand(Operand operand) : this()
+ {
+ Type = operand.Type;
+
+ if (Type == OperandType.ConstantBuffer)
+ {
+ CbufSlot = operand.GetCbufSlot();
+ CbufOffset = operand.GetCbufOffset();
+ }
+ else
+ {
+ Value = operand.Value;
+ }
+ }
+
+ public AstOperand(OperandType type, int value = 0) : this()
+ {
+ Type = type;
+ Value = value;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperation.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperation.cs
new file mode 100644
index 00000000..2393fd8d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOperation.cs
@@ -0,0 +1,80 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class AstOperation : AstNode
+ {
+ public Instruction Inst { get; }
+ public StorageKind StorageKind { get; }
+
+ public int Index { get; }
+
+ private IAstNode[] _sources;
+
+ public int SourcesCount => _sources.Length;
+
+ public AstOperation(Instruction inst, StorageKind storageKind, IAstNode[] sources, int sourcesCount)
+ {
+ Inst = inst;
+ StorageKind = storageKind;
+ _sources = sources;
+
+ for (int index = 0; index < sources.Length; index++)
+ {
+ if (index < sourcesCount)
+ {
+ AddUse(sources[index], this);
+ }
+ else
+ {
+ AddDef(sources[index], this);
+ }
+ }
+
+ Index = 0;
+ }
+
+ public AstOperation(Instruction inst, StorageKind storageKind, int index, IAstNode[] sources, int sourcesCount) : this(inst, storageKind, sources, sourcesCount)
+ {
+ Index = index;
+ }
+
+ public AstOperation(Instruction inst, params IAstNode[] sources) : this(inst, StorageKind.None, sources, sources.Length)
+ {
+ }
+
+ public IAstNode GetSource(int index)
+ {
+ return _sources[index];
+ }
+
+ public void SetSource(int index, IAstNode source)
+ {
+ RemoveUse(_sources[index], this);
+
+ AddUse(source, this);
+
+ _sources[index] = source;
+ }
+
+ public AggregateType GetVectorType(AggregateType scalarType)
+ {
+ int componentsCount = BitOperations.PopCount((uint)Index);
+
+ AggregateType type = scalarType;
+
+ switch (componentsCount)
+ {
+ case 2: type |= AggregateType.Vector2; break;
+ case 3: type |= AggregateType.Vector3; break;
+ case 4: type |= AggregateType.Vector4; break;
+ }
+
+ return type;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs
new file mode 100644
index 00000000..b71ae2c4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstOptimizer.cs
@@ -0,0 +1,155 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+using System.Linq;
+
+using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ static class AstOptimizer
+ {
+ public static void Optimize(StructuredProgramContext context)
+ {
+ AstBlock mainBlock = context.CurrentFunction.MainBlock;
+
+ // When debug mode is enabled, we disable expression propagation
+ // (this makes comparison with the disassembly easier).
+ if (!context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode))
+ {
+ AstBlockVisitor visitor = new AstBlockVisitor(mainBlock);
+
+ foreach (IAstNode node in visitor.Visit())
+ {
+ if (node is AstAssignment assignment && assignment.Destination is AstOperand propVar)
+ {
+ bool isWorthPropagating = propVar.Uses.Count == 1 || IsWorthPropagating(assignment.Source);
+
+ if (propVar.Defs.Count == 1 && isWorthPropagating)
+ {
+ PropagateExpression(propVar, assignment.Source);
+ }
+
+ if (propVar.Type == OperandType.LocalVariable && propVar.Uses.Count == 0)
+ {
+ visitor.Block.Remove(assignment);
+
+ context.CurrentFunction.Locals.Remove(propVar);
+ }
+ }
+ }
+ }
+
+ RemoveEmptyBlocks(mainBlock);
+ }
+
+ private static bool IsWorthPropagating(IAstNode source)
+ {
+ if (!(source is AstOperation srcOp))
+ {
+ return false;
+ }
+
+ if (!InstructionInfo.IsUnary(srcOp.Inst))
+ {
+ return false;
+ }
+
+ return srcOp.GetSource(0) is AstOperand || srcOp.Inst == Instruction.Copy;
+ }
+
+ private static void PropagateExpression(AstOperand propVar, IAstNode source)
+ {
+ IAstNode[] uses = propVar.Uses.ToArray();
+
+ foreach (IAstNode useNode in uses)
+ {
+ if (useNode is AstBlock useBlock)
+ {
+ useBlock.Condition = source;
+ }
+ else if (useNode is AstOperation useOperation)
+ {
+ for (int srcIndex = 0; srcIndex < useOperation.SourcesCount; srcIndex++)
+ {
+ if (useOperation.GetSource(srcIndex) == propVar)
+ {
+ useOperation.SetSource(srcIndex, source);
+ }
+ }
+ }
+ else if (useNode is AstAssignment useAssignment)
+ {
+ useAssignment.Source = source;
+ }
+ }
+ }
+
+ private static void RemoveEmptyBlocks(AstBlock mainBlock)
+ {
+ Queue<AstBlock> pending = new Queue<AstBlock>();
+
+ pending.Enqueue(mainBlock);
+
+ while (pending.TryDequeue(out AstBlock block))
+ {
+ foreach (IAstNode node in block)
+ {
+ if (node is AstBlock childBlock)
+ {
+ pending.Enqueue(childBlock);
+ }
+ }
+
+ AstBlock parent = block.Parent;
+
+ if (parent == null)
+ {
+ continue;
+ }
+
+ AstBlock nextBlock = Next(block) as AstBlock;
+
+ bool hasElse = nextBlock != null && nextBlock.Type == AstBlockType.Else;
+
+ bool isIf = block.Type == AstBlockType.If;
+
+ if (block.Count == 0)
+ {
+ if (isIf)
+ {
+ if (hasElse)
+ {
+ nextBlock.TurnIntoIf(InverseCond(block.Condition));
+ }
+
+ parent.Remove(block);
+ }
+ else if (block.Type == AstBlockType.Else)
+ {
+ parent.Remove(block);
+ }
+ }
+ else if (isIf && parent.Type == AstBlockType.Else && parent.Count == (hasElse ? 2 : 1))
+ {
+ AstBlock parentOfParent = parent.Parent;
+
+ parent.Remove(block);
+
+ parentOfParent.AddAfter(parent, block);
+
+ if (hasElse)
+ {
+ parent.Remove(nextBlock);
+
+ parentOfParent.AddAfter(block, nextBlock);
+ }
+
+ parentOfParent.Remove(parent);
+
+ block.TurnIntoElseIf();
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/AstTextureOperation.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/AstTextureOperation.cs
new file mode 100644
index 00000000..a44f13cc
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/AstTextureOperation.cs
@@ -0,0 +1,36 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class AstTextureOperation : AstOperation
+ {
+ public SamplerType Type { get; }
+ public TextureFormat Format { get; }
+ public TextureFlags Flags { get; }
+
+ public int CbufSlot { get; }
+ public int Handle { get; }
+
+ public AstTextureOperation(
+ Instruction inst,
+ SamplerType type,
+ TextureFormat format,
+ TextureFlags flags,
+ int cbufSlot,
+ int handle,
+ int index,
+ params IAstNode[] sources) : base(inst, StorageKind.None, index, sources, sources.Length)
+ {
+ Type = type;
+ Format = format;
+ Flags = flags;
+ CbufSlot = cbufSlot;
+ Handle = handle;
+ }
+
+ public AstTextureOperation WithType(SamplerType type)
+ {
+ return new AstTextureOperation(Inst, type, Format, Flags, CbufSlot, Handle, Index);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/GotoElimination.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/GotoElimination.cs
new file mode 100644
index 00000000..8bcf9d9c
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/GotoElimination.cs
@@ -0,0 +1,459 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ static class GotoElimination
+ {
+ // This is a modified version of the algorithm presented on the paper
+ // "Taming Control Flow: A Structured Approach to Eliminating Goto Statements".
+ public static void Eliminate(GotoStatement[] gotos)
+ {
+ for (int index = gotos.Length - 1; index >= 0; index--)
+ {
+ GotoStatement stmt = gotos[index];
+
+ AstBlock gBlock = ParentBlock(stmt.Goto);
+ AstBlock lBlock = ParentBlock(stmt.Label);
+
+ int gLevel = Level(gBlock);
+ int lLevel = Level(lBlock);
+
+ if (IndirectlyRelated(gBlock, lBlock, gLevel, lLevel))
+ {
+ AstBlock drBlock = gBlock;
+
+ int drLevel = gLevel;
+
+ do
+ {
+ drBlock = drBlock.Parent;
+
+ drLevel--;
+ }
+ while (!DirectlyRelated(drBlock, lBlock, drLevel, lLevel));
+
+ MoveOutward(stmt, gLevel, drLevel);
+
+ gBlock = drBlock;
+ gLevel = drLevel;
+
+ if (Previous(stmt.Goto) is AstBlock elseBlock && elseBlock.Type == AstBlockType.Else)
+ {
+ // It's possible that the label was enclosed inside an else block,
+ // in this case we need to update the block and level.
+ // We also need to set the IsLoop for the case when the label is
+ // now before the goto, due to the newly introduced else block.
+ lBlock = ParentBlock(stmt.Label);
+
+ lLevel = Level(lBlock);
+
+ if (!IndirectlyRelated(elseBlock, lBlock, gLevel + 1, lLevel))
+ {
+ stmt.IsLoop = true;
+ }
+ }
+ }
+
+ if (DirectlyRelated(gBlock, lBlock, gLevel, lLevel))
+ {
+ if (gLevel > lLevel)
+ {
+ MoveOutward(stmt, gLevel, lLevel);
+ }
+ else
+ {
+ if (stmt.IsLoop)
+ {
+ Lift(stmt);
+ }
+
+ MoveInward(stmt);
+ }
+ }
+
+ gBlock = ParentBlock(stmt.Goto);
+
+ if (stmt.IsLoop)
+ {
+ EncloseDoWhile(stmt, gBlock, stmt.Label);
+ }
+ else
+ {
+ Enclose(gBlock, AstBlockType.If, stmt.Condition, Next(stmt.Goto), stmt.Label);
+ }
+
+ gBlock.Remove(stmt.Goto);
+ }
+ }
+
+ private static bool IndirectlyRelated(AstBlock lBlock, AstBlock rBlock, int lLevel, int rlevel)
+ {
+ return !(lBlock == rBlock || DirectlyRelated(lBlock, rBlock, lLevel, rlevel));
+ }
+
+ private static bool DirectlyRelated(AstBlock lBlock, AstBlock rBlock, int lLevel, int rLevel)
+ {
+ // If the levels are equal, they can be either siblings or indirectly related.
+ if (lLevel == rLevel)
+ {
+ return false;
+ }
+
+ IAstNode block;
+ IAstNode other;
+
+ int blockLvl, otherLvl;
+
+ if (lLevel > rLevel)
+ {
+ block = lBlock;
+ blockLvl = lLevel;
+ other = rBlock;
+ otherLvl = rLevel;
+ }
+ else /* if (rLevel > lLevel) */
+ {
+ block = rBlock;
+ blockLvl = rLevel;
+ other = lBlock;
+ otherLvl = lLevel;
+ }
+
+ while (blockLvl >= otherLvl)
+ {
+ if (block == other)
+ {
+ return true;
+ }
+
+ block = block.Parent;
+
+ blockLvl--;
+ }
+
+ return false;
+ }
+
+ private static void Lift(GotoStatement stmt)
+ {
+ AstBlock block = ParentBlock(stmt.Goto);
+
+ AstBlock[] path = BackwardsPath(block, ParentBlock(stmt.Label));
+
+ AstBlock loopFirstStmt = path[path.Length - 1];
+
+ if (loopFirstStmt.Type == AstBlockType.Else)
+ {
+ loopFirstStmt = Previous(loopFirstStmt) as AstBlock;
+
+ if (loopFirstStmt == null || loopFirstStmt.Type != AstBlockType.If)
+ {
+ throw new InvalidOperationException("Found an else without a matching if.");
+ }
+ }
+
+ AstBlock newBlock = EncloseDoWhile(stmt, block, loopFirstStmt);
+
+ block.Remove(stmt.Goto);
+
+ newBlock.AddFirst(stmt.Goto);
+
+ stmt.IsLoop = false;
+ }
+
+ private static void MoveOutward(GotoStatement stmt, int gLevel, int lLevel)
+ {
+ AstBlock origin = ParentBlock(stmt.Goto);
+
+ AstBlock block = origin;
+
+ // Check if a loop is enclosing the goto, and the block that is
+ // directly related to the label is above the loop block.
+ // In that case, we need to introduce a break to get out of the loop.
+ AstBlock loopBlock = origin;
+
+ int loopLevel = gLevel;
+
+ while (loopLevel > lLevel)
+ {
+ AstBlock child = loopBlock;
+
+ loopBlock = loopBlock.Parent;
+
+ loopLevel--;
+
+ if (child.Type == AstBlockType.DoWhile)
+ {
+ EncloseSingleInst(stmt, Instruction.LoopBreak);
+
+ block.Remove(stmt.Goto);
+
+ loopBlock.AddAfter(child, stmt.Goto);
+
+ block = loopBlock;
+ gLevel = loopLevel;
+ }
+ }
+
+ // Insert ifs to skip the parts that shouldn't be executed due to the goto.
+ bool tryInsertElse = stmt.IsUnconditional && origin.Type == AstBlockType.If;
+
+ while (gLevel > lLevel)
+ {
+ Enclose(block, AstBlockType.If, stmt.Condition, Next(stmt.Goto));
+
+ block.Remove(stmt.Goto);
+
+ AstBlock child = block;
+
+ // We can't move the goto in the middle of a if and a else block, in
+ // this case we need to move it after the else.
+ // IsLoop may need to be updated if the label is inside the else, as
+ // introducing a loop is the only way to ensure the else will be executed.
+ if (Next(child) is AstBlock elseBlock && elseBlock.Type == AstBlockType.Else)
+ {
+ child = elseBlock;
+ }
+
+ block = block.Parent;
+
+ block.AddAfter(child, stmt.Goto);
+
+ gLevel--;
+
+ if (tryInsertElse && child == origin)
+ {
+ AstBlock lBlock = ParentBlock(stmt.Label);
+
+ IAstNode last = block == lBlock && !stmt.IsLoop ? stmt.Label : null;
+
+ AstBlock newBlock = Enclose(block, AstBlockType.Else, null, Next(stmt.Goto), last);
+
+ if (newBlock != null)
+ {
+ block.Remove(stmt.Goto);
+
+ block.AddAfter(newBlock, stmt.Goto);
+ }
+ }
+ }
+ }
+
+ private static void MoveInward(GotoStatement stmt)
+ {
+ AstBlock block = ParentBlock(stmt.Goto);
+
+ AstBlock[] path = BackwardsPath(block, ParentBlock(stmt.Label));
+
+ for (int index = path.Length - 1; index >= 0; index--)
+ {
+ AstBlock child = path[index];
+ AstBlock last = child;
+
+ if (child.Type == AstBlockType.If)
+ {
+ // Modify the if condition to allow it to be entered by the goto.
+ if (!ContainsCondComb(child.Condition, Instruction.LogicalOr, stmt.Condition))
+ {
+ child.OrCondition(stmt.Condition);
+ }
+ }
+ else if (child.Type == AstBlockType.Else)
+ {
+ // Modify the matching if condition to force the else to be entered by the goto.
+ if (!(Previous(child) is AstBlock ifBlock) || ifBlock.Type != AstBlockType.If)
+ {
+ throw new InvalidOperationException("Found an else without a matching if.");
+ }
+
+ IAstNode cond = InverseCond(stmt.Condition);
+
+ if (!ContainsCondComb(ifBlock.Condition, Instruction.LogicalAnd, cond))
+ {
+ ifBlock.AndCondition(cond);
+ }
+
+ last = ifBlock;
+ }
+
+ Enclose(block, AstBlockType.If, stmt.Condition, Next(stmt.Goto), last);
+
+ block.Remove(stmt.Goto);
+
+ child.AddFirst(stmt.Goto);
+
+ block = child;
+ }
+ }
+
+ private static bool ContainsCondComb(IAstNode node, Instruction inst, IAstNode newCond)
+ {
+ while (node is AstOperation operation && operation.SourcesCount == 2)
+ {
+ if (operation.Inst == inst && IsSameCond(operation.GetSource(1), newCond))
+ {
+ return true;
+ }
+
+ node = operation.GetSource(0);
+ }
+
+ return false;
+ }
+
+ private static AstBlock EncloseDoWhile(GotoStatement stmt, AstBlock block, IAstNode first)
+ {
+ if (block.Type == AstBlockType.DoWhile && first == block.First)
+ {
+ // We only need to insert the continue if we're not at the end of the loop,
+ // or if our condition is different from the loop condition.
+ if (Next(stmt.Goto) != null || block.Condition != stmt.Condition)
+ {
+ EncloseSingleInst(stmt, Instruction.LoopContinue);
+ }
+
+ // Modify the do-while condition to allow it to continue.
+ if (!ContainsCondComb(block.Condition, Instruction.LogicalOr, stmt.Condition))
+ {
+ block.OrCondition(stmt.Condition);
+ }
+
+ return block;
+ }
+
+ return Enclose(block, AstBlockType.DoWhile, stmt.Condition, first, stmt.Goto);
+ }
+
+ private static void EncloseSingleInst(GotoStatement stmt, Instruction inst)
+ {
+ AstBlock block = ParentBlock(stmt.Goto);
+
+ AstBlock newBlock = new AstBlock(AstBlockType.If, stmt.Condition);
+
+ block.AddAfter(stmt.Goto, newBlock);
+
+ newBlock.AddFirst(new AstOperation(inst));
+ }
+
+ private static AstBlock Enclose(
+ AstBlock block,
+ AstBlockType type,
+ IAstNode cond,
+ IAstNode first,
+ IAstNode last = null)
+ {
+ if (first == last)
+ {
+ return null;
+ }
+
+ if (type == AstBlockType.If)
+ {
+ cond = InverseCond(cond);
+ }
+
+ // Do a quick check, if we are enclosing a single block,
+ // and the block type/condition matches the one we're going
+ // to create, then we don't need a new block, we can just
+ // return the old one.
+ bool hasSingleNode = Next(first) == last;
+
+ if (hasSingleNode && BlockMatches(first, type, cond))
+ {
+ return first as AstBlock;
+ }
+
+ AstBlock newBlock = new AstBlock(type, cond);
+
+ block.AddBefore(first, newBlock);
+
+ while (first != last)
+ {
+ IAstNode next = Next(first);
+
+ block.Remove(first);
+
+ newBlock.Add(first);
+
+ first = next;
+ }
+
+ return newBlock;
+ }
+
+ private static bool BlockMatches(IAstNode node, AstBlockType type, IAstNode cond)
+ {
+ if (!(node is AstBlock block))
+ {
+ return false;
+ }
+
+ return block.Type == type && IsSameCond(block.Condition, cond);
+ }
+
+ private static bool IsSameCond(IAstNode lCond, IAstNode rCond)
+ {
+ if (lCond is AstOperation lCondOp && lCondOp.Inst == Instruction.LogicalNot)
+ {
+ if (!(rCond is AstOperation rCondOp) || rCondOp.Inst != lCondOp.Inst)
+ {
+ return false;
+ }
+
+ lCond = lCondOp.GetSource(0);
+ rCond = rCondOp.GetSource(0);
+ }
+
+ return lCond == rCond;
+ }
+
+ private static AstBlock ParentBlock(IAstNode node)
+ {
+ if (node is AstBlock block)
+ {
+ return block.Parent;
+ }
+
+ while (!(node is AstBlock))
+ {
+ node = node.Parent;
+ }
+
+ return node as AstBlock;
+ }
+
+ private static AstBlock[] BackwardsPath(AstBlock top, AstBlock bottom)
+ {
+ AstBlock block = bottom;
+
+ List<AstBlock> path = new List<AstBlock>();
+
+ while (block != top)
+ {
+ path.Add(block);
+
+ block = block.Parent;
+ }
+
+ return path.ToArray();
+ }
+
+ private static int Level(IAstNode node)
+ {
+ int level = 0;
+
+ while (node != null)
+ {
+ level++;
+
+ node = node.Parent;
+ }
+
+ return level;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/GotoStatement.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/GotoStatement.cs
new file mode 100644
index 00000000..25216e55
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/GotoStatement.cs
@@ -0,0 +1,23 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class GotoStatement
+ {
+ public AstOperation Goto { get; }
+ public AstAssignment Label { get; }
+
+ public IAstNode Condition => Label.Destination;
+
+ public bool IsLoop { get; set; }
+
+ public bool IsUnconditional => Goto.Inst == Instruction.Branch;
+
+ public GotoStatement(AstOperation branch, AstAssignment label, bool isLoop)
+ {
+ Goto = branch;
+ Label = label;
+ IsLoop = isLoop;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
new file mode 100644
index 00000000..d45f8d4e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
@@ -0,0 +1,21 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ [Flags]
+ enum HelperFunctionsMask
+ {
+ AtomicMinMaxS32Shared = 1 << 0,
+ AtomicMinMaxS32Storage = 1 << 1,
+ MultiplyHighS32 = 1 << 2,
+ MultiplyHighU32 = 1 << 3,
+ Shuffle = 1 << 4,
+ ShuffleDown = 1 << 5,
+ ShuffleUp = 1 << 6,
+ ShuffleXor = 1 << 7,
+ StoreSharedSmallInt = 1 << 8,
+ StoreStorageSmallInt = 1 << 9,
+ SwizzleAdd = 1 << 10,
+ FSI = 1 << 11
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/IAstNode.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/IAstNode.cs
new file mode 100644
index 00000000..5ececbb5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/IAstNode.cs
@@ -0,0 +1,11 @@
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ interface IAstNode
+ {
+ AstBlock Parent { get; set; }
+
+ LinkedListNode<IAstNode> LLNode { get; set; }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
new file mode 100644
index 00000000..8eccef23
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
@@ -0,0 +1,216 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ static class InstructionInfo
+ {
+ private readonly struct InstInfo
+ {
+ public AggregateType DestType { get; }
+
+ public AggregateType[] SrcTypes { get; }
+
+ public InstInfo(AggregateType destType, params AggregateType[] srcTypes)
+ {
+ DestType = destType;
+ SrcTypes = srcTypes;
+ }
+ }
+
+ private static InstInfo[] _infoTbl;
+
+ static InstructionInfo()
+ {
+ _infoTbl = new InstInfo[(int)Instruction.Count];
+
+ // Inst Destination type Source 1 type Source 2 type Source 3 type Source 4 type
+ Add(Instruction.AtomicAdd, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.AtomicAnd, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.AtomicCompareAndSwap, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.AtomicMaxS32, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.AtomicMaxU32, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.AtomicMinS32, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.AtomicMinU32, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.AtomicOr, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.AtomicSwap, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.AtomicXor, AggregateType.U32, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.Absolute, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.Add, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.Ballot, AggregateType.U32, AggregateType.Bool);
+ Add(Instruction.BitCount, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BitfieldExtractS32, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BitfieldExtractU32, AggregateType.U32, AggregateType.U32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BitfieldInsert, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BitfieldReverse, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BitwiseAnd, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BitwiseExclusiveOr, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BitwiseNot, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BitwiseOr, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.BranchIfTrue, AggregateType.Void, AggregateType.Bool);
+ Add(Instruction.BranchIfFalse, AggregateType.Void, AggregateType.Bool);
+ Add(Instruction.Call, AggregateType.Scalar);
+ Add(Instruction.Ceiling, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.Clamp, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.ClampU32, AggregateType.U32, AggregateType.U32, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.CompareEqual, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.CompareGreater, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.CompareGreaterOrEqual, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.CompareGreaterOrEqualU32, AggregateType.Bool, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.CompareGreaterU32, AggregateType.Bool, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.CompareLess, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.CompareLessOrEqual, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.CompareLessOrEqualU32, AggregateType.Bool, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.CompareLessU32, AggregateType.Bool, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.CompareNotEqual, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.ConditionalSelect, AggregateType.Scalar, AggregateType.Bool, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.ConvertFP32ToFP64, AggregateType.FP64, AggregateType.FP32);
+ Add(Instruction.ConvertFP64ToFP32, AggregateType.FP32, AggregateType.FP64);
+ Add(Instruction.ConvertFP32ToS32, AggregateType.S32, AggregateType.FP32);
+ Add(Instruction.ConvertFP32ToU32, AggregateType.U32, AggregateType.FP32);
+ Add(Instruction.ConvertFP64ToS32, AggregateType.S32, AggregateType.FP64);
+ Add(Instruction.ConvertFP64ToU32, AggregateType.U32, AggregateType.FP64);
+ Add(Instruction.ConvertS32ToFP32, AggregateType.FP32, AggregateType.S32);
+ Add(Instruction.ConvertS32ToFP64, AggregateType.FP64, AggregateType.S32);
+ Add(Instruction.ConvertU32ToFP32, AggregateType.FP32, AggregateType.U32);
+ Add(Instruction.ConvertU32ToFP64, AggregateType.FP64, AggregateType.U32);
+ Add(Instruction.Cosine, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.Ddx, AggregateType.FP32, AggregateType.FP32);
+ Add(Instruction.Ddy, AggregateType.FP32, AggregateType.FP32);
+ Add(Instruction.Divide, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.ExponentB2, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.FindLSB, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.FindMSBS32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.FindMSBU32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.Floor, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.FusedMultiplyAdd, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.ImageLoad, AggregateType.FP32);
+ Add(Instruction.ImageStore, AggregateType.Void);
+ Add(Instruction.ImageAtomic, AggregateType.S32);
+ Add(Instruction.IsNan, AggregateType.Bool, AggregateType.Scalar);
+ Add(Instruction.Load, AggregateType.FP32);
+ Add(Instruction.LoadConstant, AggregateType.FP32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.LoadGlobal, AggregateType.U32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.LoadLocal, AggregateType.U32, AggregateType.S32);
+ Add(Instruction.LoadShared, AggregateType.U32, AggregateType.S32);
+ Add(Instruction.LoadStorage, AggregateType.U32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.Lod, AggregateType.FP32);
+ Add(Instruction.LogarithmB2, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.LogicalAnd, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool);
+ Add(Instruction.LogicalExclusiveOr, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool);
+ Add(Instruction.LogicalNot, AggregateType.Bool, AggregateType.Bool);
+ Add(Instruction.LogicalOr, AggregateType.Bool, AggregateType.Bool, AggregateType.Bool);
+ Add(Instruction.Maximum, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.MaximumU32, AggregateType.U32, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.Minimum, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.MinimumU32, AggregateType.U32, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.Multiply, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.MultiplyHighS32, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.MultiplyHighU32, AggregateType.U32, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.Negate, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.PackDouble2x32, AggregateType.FP64, AggregateType.U32, AggregateType.U32);
+ Add(Instruction.PackHalf2x16, AggregateType.U32, AggregateType.FP32, AggregateType.FP32);
+ Add(Instruction.ReciprocalSquareRoot, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.Round, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.ShiftLeft, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.ShiftRightS32, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.ShiftRightU32, AggregateType.U32, AggregateType.U32, AggregateType.S32);
+ Add(Instruction.Shuffle, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool);
+ Add(Instruction.ShuffleDown, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool);
+ Add(Instruction.ShuffleUp, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool);
+ Add(Instruction.ShuffleXor, AggregateType.FP32, AggregateType.FP32, AggregateType.U32, AggregateType.U32, AggregateType.Bool);
+ Add(Instruction.Sine, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.SquareRoot, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.Store, AggregateType.Void);
+ Add(Instruction.StoreGlobal, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.StoreLocal, AggregateType.Void, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.StoreShared, AggregateType.Void, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.StoreShared16, AggregateType.Void, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.StoreShared8, AggregateType.Void, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.StoreStorage, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.StoreStorage16, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.StoreStorage8, AggregateType.Void, AggregateType.S32, AggregateType.S32, AggregateType.U32);
+ Add(Instruction.Subtract, AggregateType.Scalar, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.SwizzleAdd, AggregateType.FP32, AggregateType.FP32, AggregateType.FP32, AggregateType.S32);
+ Add(Instruction.TextureSample, AggregateType.FP32);
+ Add(Instruction.TextureSize, AggregateType.S32, AggregateType.S32, AggregateType.S32);
+ Add(Instruction.Truncate, AggregateType.Scalar, AggregateType.Scalar);
+ Add(Instruction.UnpackDouble2x32, AggregateType.U32, AggregateType.FP64);
+ Add(Instruction.UnpackHalf2x16, AggregateType.FP32, AggregateType.U32);
+ Add(Instruction.VectorExtract, AggregateType.Scalar, AggregateType.Vector4, AggregateType.S32);
+ Add(Instruction.VoteAll, AggregateType.Bool, AggregateType.Bool);
+ Add(Instruction.VoteAllEqual, AggregateType.Bool, AggregateType.Bool);
+ Add(Instruction.VoteAny, AggregateType.Bool, AggregateType.Bool);
+ }
+
+ private static void Add(Instruction inst, AggregateType destType, params AggregateType[] srcTypes)
+ {
+ _infoTbl[(int)inst] = new InstInfo(destType, srcTypes);
+ }
+
+ public static AggregateType GetDestVarType(Instruction inst)
+ {
+ return GetFinalVarType(_infoTbl[(int)(inst & Instruction.Mask)].DestType, inst);
+ }
+
+ public static AggregateType GetSrcVarType(Instruction inst, int index)
+ {
+ // TODO: Return correct type depending on source index,
+ // that can improve the decompiler output.
+ if (inst == Instruction.ImageLoad ||
+ inst == Instruction.ImageStore ||
+ inst == Instruction.ImageAtomic ||
+ inst == Instruction.Lod ||
+ inst == Instruction.TextureSample)
+ {
+ return AggregateType.FP32;
+ }
+ else if (inst == Instruction.Call || inst == Instruction.Load || inst == Instruction.Store)
+ {
+ return AggregateType.S32;
+ }
+
+ return GetFinalVarType(_infoTbl[(int)(inst & Instruction.Mask)].SrcTypes[index], inst);
+ }
+
+ private static AggregateType GetFinalVarType(AggregateType type, Instruction inst)
+ {
+ if (type == AggregateType.Scalar)
+ {
+ if ((inst & Instruction.FP32) != 0)
+ {
+ return AggregateType.FP32;
+ }
+ else if ((inst & Instruction.FP64) != 0)
+ {
+ return AggregateType.FP64;
+ }
+ else
+ {
+ return AggregateType.S32;
+ }
+ }
+ else if (type == AggregateType.Void)
+ {
+ throw new ArgumentException($"Invalid operand for instruction \"{inst}\".");
+ }
+
+ return type;
+ }
+
+ public static bool IsUnary(Instruction inst)
+ {
+ if (inst == Instruction.Copy)
+ {
+ return true;
+ }
+ else if (inst == Instruction.TextureSample)
+ {
+ return false;
+ }
+
+ return _infoTbl[(int)(inst & Instruction.Mask)].SrcTypes.Length == 1;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/IoDefinition.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/IoDefinition.cs
new file mode 100644
index 00000000..21a1b3f0
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/IoDefinition.cs
@@ -0,0 +1,44 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ readonly struct IoDefinition : IEquatable<IoDefinition>
+ {
+ public StorageKind StorageKind { get; }
+ public IoVariable IoVariable { get; }
+ public int Location { get; }
+ public int Component { get; }
+
+ public IoDefinition(StorageKind storageKind, IoVariable ioVariable, int location = 0, int component = 0)
+ {
+ StorageKind = storageKind;
+ IoVariable = ioVariable;
+ Location = location;
+ Component = component;
+ }
+
+ public override bool Equals(object other)
+ {
+ return other is IoDefinition ioDefinition && Equals(ioDefinition);
+ }
+
+ public bool Equals(IoDefinition other)
+ {
+ return StorageKind == other.StorageKind &&
+ IoVariable == other.IoVariable &&
+ Location == other.Location &&
+ Component == other.Component;
+ }
+
+ public override int GetHashCode()
+ {
+ return (int)StorageKind | ((int)IoVariable << 8) | (Location << 16) | (Component << 24);
+ }
+
+ public override string ToString()
+ {
+ return $"{StorageKind}.{IoVariable}.{Location}.{Component}";
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/OperandInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/OperandInfo.cs
new file mode 100644
index 00000000..38ed1584
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/OperandInfo.cs
@@ -0,0 +1,33 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ static class OperandInfo
+ {
+ public static AggregateType GetVarType(AstOperand operand)
+ {
+ if (operand.Type == OperandType.LocalVariable)
+ {
+ return operand.VarType;
+ }
+ else
+ {
+ return GetVarType(operand.Type);
+ }
+ }
+
+ public static AggregateType GetVarType(OperandType type)
+ {
+ return type switch
+ {
+ OperandType.Argument => AggregateType.S32,
+ OperandType.Constant => AggregateType.S32,
+ OperandType.ConstantBuffer => AggregateType.FP32,
+ OperandType.Undefined => AggregateType.S32,
+ _ => throw new ArgumentException($"Invalid operand type \"{type}\".")
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs
new file mode 100644
index 00000000..541ca298
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/PhiFunctions.cs
@@ -0,0 +1,45 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ static class PhiFunctions
+ {
+ public static void Remove(BasicBlock[] blocks)
+ {
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ LinkedListNode<INode> node = block.Operations.First;
+
+ while (node != null)
+ {
+ LinkedListNode<INode> nextNode = node.Next;
+
+ if (node.Value is not PhiNode phi)
+ {
+ node = nextNode;
+
+ continue;
+ }
+
+ for (int index = 0; index < phi.SourcesCount; index++)
+ {
+ Operand src = phi.GetSource(index);
+
+ BasicBlock srcBlock = phi.GetBlock(index);
+
+ Operation copyOp = new Operation(Instruction.Copy, phi.Dest, src);
+
+ srcBlock.Append(copyOp);
+ }
+
+ block.Operations.Remove(node);
+
+ node = nextNode;
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredFunction.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredFunction.cs
new file mode 100644
index 00000000..61c4fed7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredFunction.cs
@@ -0,0 +1,42 @@
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class StructuredFunction
+ {
+ public AstBlock MainBlock { get; }
+
+ public string Name { get; }
+
+ public AggregateType ReturnType { get; }
+
+ public AggregateType[] InArguments { get; }
+ public AggregateType[] OutArguments { get; }
+
+ public HashSet<AstOperand> Locals { get; }
+
+ public StructuredFunction(
+ AstBlock mainBlock,
+ string name,
+ AggregateType returnType,
+ AggregateType[] inArguments,
+ AggregateType[] outArguments)
+ {
+ MainBlock = mainBlock;
+ Name = name;
+ ReturnType = returnType;
+ InArguments = inArguments;
+ OutArguments = outArguments;
+
+ Locals = new HashSet<AstOperand>();
+ }
+
+ public AggregateType GetArgumentType(int index)
+ {
+ return index >= InArguments.Length
+ ? OutArguments[index - InArguments.Length]
+ : InArguments[index];
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
new file mode 100644
index 00000000..b4ca8ee5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
@@ -0,0 +1,421 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ static class StructuredProgram
+ {
+ public static StructuredProgramInfo MakeStructuredProgram(Function[] functions, ShaderConfig config)
+ {
+ StructuredProgramContext context = new StructuredProgramContext(config);
+
+ for (int funcIndex = 0; funcIndex < functions.Length; funcIndex++)
+ {
+ Function function = functions[funcIndex];
+
+ BasicBlock[] blocks = function.Blocks;
+
+ AggregateType returnType = function.ReturnsValue ? AggregateType.S32 : AggregateType.Void;
+
+ AggregateType[] inArguments = new AggregateType[function.InArgumentsCount];
+ AggregateType[] outArguments = new AggregateType[function.OutArgumentsCount];
+
+ for (int i = 0; i < inArguments.Length; i++)
+ {
+ inArguments[i] = AggregateType.S32;
+ }
+
+ for (int i = 0; i < outArguments.Length; i++)
+ {
+ outArguments[i] = AggregateType.S32;
+ }
+
+ context.EnterFunction(blocks.Length, function.Name, returnType, inArguments, outArguments);
+
+ PhiFunctions.Remove(blocks);
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ context.EnterBlock(block);
+
+ for (LinkedListNode<INode> opNode = block.Operations.First; opNode != null; opNode = opNode.Next)
+ {
+ Operation operation = (Operation)opNode.Value;
+
+ if (IsBranchInst(operation.Inst))
+ {
+ context.LeaveBlock(block, operation);
+ }
+ else
+ {
+ AddOperation(context, operation);
+ }
+ }
+ }
+
+ GotoElimination.Eliminate(context.GetGotos());
+
+ AstOptimizer.Optimize(context);
+
+ context.LeaveFunction();
+ }
+
+ return context.Info;
+ }
+
+ private static void AddOperation(StructuredProgramContext context, Operation operation)
+ {
+ Instruction inst = operation.Inst;
+ StorageKind storageKind = operation.StorageKind;
+
+ if ((inst == Instruction.Load || inst == Instruction.Store) && storageKind.IsInputOrOutput())
+ {
+ IoVariable ioVariable = (IoVariable)operation.GetSource(0).Value;
+ bool isOutput = storageKind.IsOutput();
+ bool perPatch = storageKind.IsPerPatch();
+ int location = 0;
+ int component = 0;
+
+ if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput))
+ {
+ location = operation.GetSource(1).Value;
+
+ if (operation.SourcesCount > 2 &&
+ operation.GetSource(2).Type == OperandType.Constant &&
+ context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, operation.GetSource(2).Value, isOutput))
+ {
+ component = operation.GetSource(2).Value;
+ }
+ }
+
+ context.Info.IoDefinitions.Add(new IoDefinition(storageKind, ioVariable, location, component));
+ }
+
+ bool vectorDest = IsVectorDestInst(inst);
+
+ int sourcesCount = operation.SourcesCount;
+ int outDestsCount = operation.DestsCount != 0 && !vectorDest ? operation.DestsCount - 1 : 0;
+
+ IAstNode[] sources = new IAstNode[sourcesCount + outDestsCount];
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ sources[index] = context.GetOperand(operation.GetSource(index));
+ }
+
+ for (int index = 0; index < outDestsCount; index++)
+ {
+ AstOperand oper = context.GetOperand(operation.GetDest(1 + index));
+
+ oper.VarType = InstructionInfo.GetSrcVarType(inst, sourcesCount + index);
+
+ sources[sourcesCount + index] = oper;
+ }
+
+ AstTextureOperation GetAstTextureOperation(TextureOperation texOp)
+ {
+ return new AstTextureOperation(
+ inst,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.CbufSlot,
+ texOp.Handle,
+ texOp.Index,
+ sources);
+ }
+
+ int componentsCount = BitOperations.PopCount((uint)operation.Index);
+
+ if (vectorDest && componentsCount > 1)
+ {
+ AggregateType destType = InstructionInfo.GetDestVarType(inst);
+
+ IAstNode source;
+
+ if (operation is TextureOperation texOp)
+ {
+ if (texOp.Inst == Instruction.ImageLoad)
+ {
+ destType = texOp.Format.GetComponentType();
+ }
+
+ source = GetAstTextureOperation(texOp);
+ }
+ else
+ {
+ source = new AstOperation(inst, operation.StorageKind, operation.Index, sources, operation.SourcesCount);
+ }
+
+ AggregateType destElemType = destType;
+
+ switch (componentsCount)
+ {
+ case 2: destType |= AggregateType.Vector2; break;
+ case 3: destType |= AggregateType.Vector3; break;
+ case 4: destType |= AggregateType.Vector4; break;
+ }
+
+ AstOperand destVec = context.NewTemp(destType);
+
+ context.AddNode(new AstAssignment(destVec, source));
+
+ for (int i = 0; i < operation.DestsCount; i++)
+ {
+ AstOperand dest = context.GetOperand(operation.GetDest(i));
+ AstOperand index = new AstOperand(OperandType.Constant, i);
+
+ dest.VarType = destElemType;
+
+ context.AddNode(new AstAssignment(dest, new AstOperation(Instruction.VectorExtract, StorageKind.None, new[] { destVec, index }, 2)));
+ }
+ }
+ else if (operation.Dest != null)
+ {
+ AstOperand dest = context.GetOperand(operation.Dest);
+
+ // If all the sources are bool, it's better to use short-circuiting
+ // logical operations, rather than forcing a cast to int and doing
+ // a bitwise operation with the value, as it is likely to be used as
+ // a bool in the end.
+ if (IsBitwiseInst(inst) && AreAllSourceTypesEqual(sources, AggregateType.Bool))
+ {
+ inst = GetLogicalFromBitwiseInst(inst);
+ }
+
+ bool isCondSel = inst == Instruction.ConditionalSelect;
+ bool isCopy = inst == Instruction.Copy;
+
+ if (isCondSel || isCopy)
+ {
+ AggregateType type = GetVarTypeFromUses(operation.Dest);
+
+ if (isCondSel && type == AggregateType.FP32)
+ {
+ inst |= Instruction.FP32;
+ }
+
+ dest.VarType = type;
+ }
+ else
+ {
+ dest.VarType = InstructionInfo.GetDestVarType(inst);
+ }
+
+ IAstNode source;
+
+ if (operation is TextureOperation texOp)
+ {
+ if (texOp.Inst == Instruction.ImageLoad)
+ {
+ dest.VarType = texOp.Format.GetComponentType();
+ }
+
+ source = GetAstTextureOperation(texOp);
+ }
+ else if (!isCopy)
+ {
+ source = new AstOperation(inst, operation.StorageKind, operation.Index, sources, operation.SourcesCount);
+ }
+ else
+ {
+ source = sources[0];
+ }
+
+ context.AddNode(new AstAssignment(dest, source));
+ }
+ else if (operation.Inst == Instruction.Comment)
+ {
+ context.AddNode(new AstComment(((CommentNode)operation).Comment));
+ }
+ else if (operation is TextureOperation texOp)
+ {
+ AstTextureOperation astTexOp = GetAstTextureOperation(texOp);
+
+ context.AddNode(astTexOp);
+ }
+ else
+ {
+ context.AddNode(new AstOperation(inst, operation.StorageKind, operation.Index, sources, operation.SourcesCount));
+ }
+
+ // Those instructions needs to be emulated by using helper functions,
+ // because they are NVIDIA specific. Those flags helps the backend to
+ // decide which helper functions are needed on the final generated code.
+ switch (operation.Inst)
+ {
+ case Instruction.AtomicMaxS32:
+ case Instruction.AtomicMinS32:
+ if (operation.StorageKind == StorageKind.SharedMemory)
+ {
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Shared;
+ }
+ else if (operation.StorageKind == StorageKind.StorageBuffer)
+ {
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.AtomicMinMaxS32Storage;
+ }
+ break;
+ case Instruction.MultiplyHighS32:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighS32;
+ break;
+ case Instruction.MultiplyHighU32:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.MultiplyHighU32;
+ break;
+ case Instruction.Shuffle:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle;
+ break;
+ case Instruction.ShuffleDown:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleDown;
+ break;
+ case Instruction.ShuffleUp:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleUp;
+ break;
+ case Instruction.ShuffleXor:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor;
+ break;
+ case Instruction.StoreShared16:
+ case Instruction.StoreShared8:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreSharedSmallInt;
+ break;
+ case Instruction.StoreStorage16:
+ case Instruction.StoreStorage8:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.StoreStorageSmallInt;
+ break;
+ case Instruction.SwizzleAdd:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd;
+ break;
+ case Instruction.FSIBegin:
+ case Instruction.FSIEnd:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.FSI;
+ break;
+ }
+ }
+
+ private static AggregateType GetVarTypeFromUses(Operand dest)
+ {
+ HashSet<Operand> visited = new HashSet<Operand>();
+
+ Queue<Operand> pending = new Queue<Operand>();
+
+ bool Enqueue(Operand operand)
+ {
+ if (visited.Add(operand))
+ {
+ pending.Enqueue(operand);
+
+ return true;
+ }
+
+ return false;
+ }
+
+ Enqueue(dest);
+
+ while (pending.TryDequeue(out Operand operand))
+ {
+ foreach (INode useNode in operand.UseOps)
+ {
+ if (useNode is not Operation operation)
+ {
+ continue;
+ }
+
+ if (operation.Inst == Instruction.Copy)
+ {
+ if (operation.Dest.Type == OperandType.LocalVariable)
+ {
+ if (Enqueue(operation.Dest))
+ {
+ break;
+ }
+ }
+ else
+ {
+ return OperandInfo.GetVarType(operation.Dest.Type);
+ }
+ }
+ else
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ if (operation.GetSource(index) == operand)
+ {
+ return InstructionInfo.GetSrcVarType(operation.Inst, index);
+ }
+ }
+ }
+ }
+ }
+
+ return AggregateType.S32;
+ }
+
+ private static bool AreAllSourceTypesEqual(IAstNode[] sources, AggregateType type)
+ {
+ foreach (IAstNode node in sources)
+ {
+ if (node is not AstOperand operand)
+ {
+ return false;
+ }
+
+ if (operand.VarType != type)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static bool IsVectorDestInst(Instruction inst)
+ {
+ return inst switch
+ {
+ Instruction.ImageLoad or
+ Instruction.TextureSample => true,
+ _ => false
+ };
+ }
+
+ private static bool IsBranchInst(Instruction inst)
+ {
+ return inst switch
+ {
+ Instruction.Branch or
+ Instruction.BranchIfFalse or
+ Instruction.BranchIfTrue => true,
+ _ => false
+ };
+ }
+
+ private static bool IsBitwiseInst(Instruction inst)
+ {
+ return inst switch
+ {
+ Instruction.BitwiseAnd or
+ Instruction.BitwiseExclusiveOr or
+ Instruction.BitwiseNot or
+ Instruction.BitwiseOr => true,
+ _ => false
+ };
+ }
+
+ private static Instruction GetLogicalFromBitwiseInst(Instruction inst)
+ {
+ return inst switch
+ {
+ Instruction.BitwiseAnd => Instruction.LogicalAnd,
+ Instruction.BitwiseExclusiveOr => Instruction.LogicalExclusiveOr,
+ Instruction.BitwiseNot => Instruction.LogicalNot,
+ Instruction.BitwiseOr => Instruction.LogicalOr,
+ _ => throw new ArgumentException($"Unexpected instruction \"{inst}\".")
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs
new file mode 100644
index 00000000..68bbdeb1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramContext.cs
@@ -0,0 +1,330 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.StructuredIr.AstHelper;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ class StructuredProgramContext
+ {
+ private HashSet<BasicBlock> _loopTails;
+
+ private Stack<(AstBlock Block, int CurrEndIndex, int LoopEndIndex)> _blockStack;
+
+ private Dictionary<Operand, AstOperand> _localsMap;
+
+ private Dictionary<int, AstAssignment> _gotoTempAsgs;
+
+ private List<GotoStatement> _gotos;
+
+ private AstBlock _currBlock;
+
+ private int _currEndIndex;
+ private int _loopEndIndex;
+
+ public StructuredFunction CurrentFunction { get; private set; }
+
+ public StructuredProgramInfo Info { get; }
+
+ public ShaderConfig Config { get; }
+
+ public StructuredProgramContext(ShaderConfig config)
+ {
+ Info = new StructuredProgramInfo();
+
+ Config = config;
+
+ if (config.GpPassthrough)
+ {
+ int passthroughAttributes = config.PassthroughAttributes;
+ while (passthroughAttributes != 0)
+ {
+ int index = BitOperations.TrailingZeroCount(passthroughAttributes);
+
+ Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.UserDefined, index));
+
+ passthroughAttributes &= ~(1 << index);
+ }
+
+ Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.Position));
+ Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.PointSize));
+ Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.ClipDistance));
+ }
+ else if (config.Stage == ShaderStage.Fragment)
+ {
+ // Potentially used for texture coordinate scaling.
+ Info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.FragmentCoord));
+ }
+ }
+
+ public void EnterFunction(
+ int blocksCount,
+ string name,
+ AggregateType returnType,
+ AggregateType[] inArguments,
+ AggregateType[] outArguments)
+ {
+ _loopTails = new HashSet<BasicBlock>();
+
+ _blockStack = new Stack<(AstBlock, int, int)>();
+
+ _localsMap = new Dictionary<Operand, AstOperand>();
+
+ _gotoTempAsgs = new Dictionary<int, AstAssignment>();
+
+ _gotos = new List<GotoStatement>();
+
+ _currBlock = new AstBlock(AstBlockType.Main);
+
+ _currEndIndex = blocksCount;
+ _loopEndIndex = blocksCount;
+
+ CurrentFunction = new StructuredFunction(_currBlock, name, returnType, inArguments, outArguments);
+ }
+
+ public void LeaveFunction()
+ {
+ Info.Functions.Add(CurrentFunction);
+ }
+
+ public void EnterBlock(BasicBlock block)
+ {
+ while (_currEndIndex == block.Index)
+ {
+ (_currBlock, _currEndIndex, _loopEndIndex) = _blockStack.Pop();
+ }
+
+ if (_gotoTempAsgs.TryGetValue(block.Index, out AstAssignment gotoTempAsg))
+ {
+ AddGotoTempReset(block, gotoTempAsg);
+ }
+
+ LookForDoWhileStatements(block);
+ }
+
+ public void LeaveBlock(BasicBlock block, Operation branchOp)
+ {
+ LookForIfStatements(block, branchOp);
+ }
+
+ private void LookForDoWhileStatements(BasicBlock block)
+ {
+ // Check if we have any predecessor whose index is greater than the
+ // current block, this indicates a loop.
+ bool done = false;
+
+ foreach (BasicBlock predecessor in block.Predecessors.OrderByDescending(x => x.Index))
+ {
+ // If not a loop, break.
+ if (predecessor.Index < block.Index)
+ {
+ break;
+ }
+
+ // Check if we can create a do-while loop here (only possible if the loop end
+ // falls inside the current scope), if not add a goto instead.
+ if (predecessor.Index < _currEndIndex && !done)
+ {
+ // Create do-while loop block. We must avoid inserting a goto at the end
+ // of the loop later, when the tail block is processed. So we add the predecessor
+ // to a list of loop tails to prevent it from being processed later.
+ Operation branchOp = (Operation)predecessor.GetLastOp();
+
+ NewBlock(AstBlockType.DoWhile, branchOp, predecessor.Index + 1);
+
+ _loopTails.Add(predecessor);
+
+ done = true;
+ }
+ else
+ {
+ // Failed to create loop. Since this block is the loop head, we reset the
+ // goto condition variable here. The variable is always reset on the jump
+ // target, and this block is the jump target for some loop.
+ AddGotoTempReset(block, GetGotoTempAsg(block.Index));
+
+ break;
+ }
+ }
+ }
+
+ private void LookForIfStatements(BasicBlock block, Operation branchOp)
+ {
+ if (block.Branch == null)
+ {
+ return;
+ }
+
+ // We can only enclose the "if" when the branch lands before
+ // the end of the current block. If the current enclosing block
+ // is not a loop, then we can also do so if the branch lands
+ // right at the end of the current block. When it is a loop,
+ // this is not valid as the loop condition would be evaluated,
+ // and it could erroneously jump back to the start of the loop.
+ bool inRange =
+ block.Branch.Index < _currEndIndex ||
+ (block.Branch.Index == _currEndIndex && block.Branch.Index < _loopEndIndex);
+
+ bool isLoop = block.Branch.Index <= block.Index;
+
+ if (inRange && !isLoop)
+ {
+ NewBlock(AstBlockType.If, branchOp, block.Branch.Index);
+ }
+ else if (!_loopTails.Contains(block))
+ {
+ AstAssignment gotoTempAsg = GetGotoTempAsg(block.Branch.Index);
+
+ // We use DoWhile type here, as the condition should be true for
+ // unconditional branches, or it should jump if the condition is true otherwise.
+ IAstNode cond = GetBranchCond(AstBlockType.DoWhile, branchOp);
+
+ AddNode(Assign(gotoTempAsg.Destination, cond));
+
+ AstOperation branch = new AstOperation(branchOp.Inst);
+
+ AddNode(branch);
+
+ GotoStatement gotoStmt = new GotoStatement(branch, gotoTempAsg, isLoop);
+
+ _gotos.Add(gotoStmt);
+ }
+ }
+
+ private AstAssignment GetGotoTempAsg(int index)
+ {
+ if (_gotoTempAsgs.TryGetValue(index, out AstAssignment gotoTempAsg))
+ {
+ return gotoTempAsg;
+ }
+
+ AstOperand gotoTemp = NewTemp(AggregateType.Bool);
+
+ gotoTempAsg = Assign(gotoTemp, Const(IrConsts.False));
+
+ _gotoTempAsgs.Add(index, gotoTempAsg);
+
+ return gotoTempAsg;
+ }
+
+ private void AddGotoTempReset(BasicBlock block, AstAssignment gotoTempAsg)
+ {
+ // If it was already added, we don't need to add it again.
+ if (gotoTempAsg.Parent != null)
+ {
+ return;
+ }
+
+ AddNode(gotoTempAsg);
+
+ // For block 0, we don't need to add the extra "reset" at the beginning,
+ // because it is already the first node to be executed on the shader,
+ // so it is reset to false by the "local" assignment anyway.
+ if (block.Index != 0)
+ {
+ CurrentFunction.MainBlock.AddFirst(Assign(gotoTempAsg.Destination, Const(IrConsts.False)));
+ }
+ }
+
+ private void NewBlock(AstBlockType type, Operation branchOp, int endIndex)
+ {
+ NewBlock(type, GetBranchCond(type, branchOp), endIndex);
+ }
+
+ private void NewBlock(AstBlockType type, IAstNode cond, int endIndex)
+ {
+ AstBlock childBlock = new AstBlock(type, cond);
+
+ AddNode(childBlock);
+
+ _blockStack.Push((_currBlock, _currEndIndex, _loopEndIndex));
+
+ _currBlock = childBlock;
+ _currEndIndex = endIndex;
+
+ if (type == AstBlockType.DoWhile)
+ {
+ _loopEndIndex = endIndex;
+ }
+ }
+
+ private IAstNode GetBranchCond(AstBlockType type, Operation branchOp)
+ {
+ IAstNode cond;
+
+ if (branchOp.Inst == Instruction.Branch)
+ {
+ // If the branch is not conditional, the condition is a constant.
+ // For if it's false (always jump over, if block never executed).
+ // For loops it's always true (always loop).
+ cond = Const(type == AstBlockType.If ? IrConsts.False : IrConsts.True);
+ }
+ else
+ {
+ cond = GetOperand(branchOp.GetSource(0));
+
+ Instruction invInst = type == AstBlockType.If
+ ? Instruction.BranchIfTrue
+ : Instruction.BranchIfFalse;
+
+ if (branchOp.Inst == invInst)
+ {
+ cond = new AstOperation(Instruction.LogicalNot, cond);
+ }
+ }
+
+ return cond;
+ }
+
+ public void AddNode(IAstNode node)
+ {
+ _currBlock.Add(node);
+ }
+
+ public GotoStatement[] GetGotos()
+ {
+ return _gotos.ToArray();
+ }
+
+ public AstOperand NewTemp(AggregateType type)
+ {
+ AstOperand newTemp = Local(type);
+
+ CurrentFunction.Locals.Add(newTemp);
+
+ return newTemp;
+ }
+
+ public AstOperand GetOperand(Operand operand)
+ {
+ if (operand == null)
+ {
+ return null;
+ }
+
+ if (operand.Type != OperandType.LocalVariable)
+ {
+ if (operand.Type == OperandType.ConstantBuffer)
+ {
+ Config.SetUsedConstantBuffer(operand.GetCbufSlot());
+ }
+
+ return new AstOperand(operand);
+ }
+
+ if (!_localsMap.TryGetValue(operand, out AstOperand astOperand))
+ {
+ astOperand = new AstOperand(operand);
+
+ _localsMap.Add(operand, astOperand);
+
+ CurrentFunction.Locals.Add(astOperand);
+ }
+
+ return astOperand;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs
new file mode 100644
index 00000000..c5104146
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs
@@ -0,0 +1,36 @@
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ readonly struct TransformFeedbackOutput
+ {
+ public readonly bool Valid;
+ public readonly int Buffer;
+ public readonly int Offset;
+ public readonly int Stride;
+
+ public TransformFeedbackOutput(int buffer, int offset, int stride)
+ {
+ Valid = true;
+ Buffer = buffer;
+ Offset = offset;
+ Stride = stride;
+ }
+ }
+
+ class StructuredProgramInfo
+ {
+ public List<StructuredFunction> Functions { get; }
+
+ public HashSet<IoDefinition> IoDefinitions { get; }
+
+ public HelperFunctionsMask HelperFunctionsMask { get; set; }
+
+ public StructuredProgramInfo()
+ {
+ Functions = new List<StructuredFunction>();
+
+ IoDefinitions = new HashSet<IoDefinition>();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/SupportBuffer.cs b/src/Ryujinx.Graphics.Shader/SupportBuffer.cs
new file mode 100644
index 00000000..5fe99327
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/SupportBuffer.cs
@@ -0,0 +1,58 @@
+using Ryujinx.Common.Memory;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Shader
+{
+ public struct Vector4<T>
+ {
+ public T X;
+ public T Y;
+ public T Z;
+ public T W;
+ }
+
+ public struct SupportBuffer
+ {
+ public static int FieldSize;
+ public static int RequiredSize;
+
+ public static int FragmentAlphaTestOffset;
+ public static int FragmentIsBgraOffset;
+ public static int ViewportInverseOffset;
+ public static int FragmentRenderScaleCountOffset;
+ public static int GraphicsRenderScaleOffset;
+ public static int ComputeRenderScaleOffset;
+
+ public const int FragmentIsBgraCount = 8;
+ // One for the render target, 64 for the textures, and 8 for the images.
+ public const int RenderScaleMaxCount = 1 + 64 + 8;
+
+ private static int OffsetOf<T>(ref SupportBuffer storage, ref T target)
+ {
+ return (int)Unsafe.ByteOffset(ref Unsafe.As<SupportBuffer, T>(ref storage), ref target);
+ }
+
+ static SupportBuffer()
+ {
+ FieldSize = Unsafe.SizeOf<Vector4<float>>();
+ RequiredSize = Unsafe.SizeOf<SupportBuffer>();
+
+ SupportBuffer instance = new SupportBuffer();
+
+ FragmentAlphaTestOffset = OffsetOf(ref instance, ref instance.FragmentAlphaTest);
+ FragmentIsBgraOffset = OffsetOf(ref instance, ref instance.FragmentIsBgra);
+ ViewportInverseOffset = OffsetOf(ref instance, ref instance.ViewportInverse);
+ FragmentRenderScaleCountOffset = OffsetOf(ref instance, ref instance.FragmentRenderScaleCount);
+ GraphicsRenderScaleOffset = OffsetOf(ref instance, ref instance.RenderScale);
+ ComputeRenderScaleOffset = GraphicsRenderScaleOffset + FieldSize;
+ }
+
+ public Vector4<int> FragmentAlphaTest;
+ public Array8<Vector4<int>> FragmentIsBgra;
+ public Vector4<float> ViewportInverse;
+ public Vector4<int> FragmentRenderScaleCount;
+
+ // Render scale max count: 1 + 64 + 8. First scale is fragment output scale, others are textures/image inputs.
+ public Array73<Vector4<float>> RenderScale;
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/TessPatchType.cs b/src/Ryujinx.Graphics.Shader/TessPatchType.cs
new file mode 100644
index 00000000..2361b69f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/TessPatchType.cs
@@ -0,0 +1,22 @@
+namespace Ryujinx.Graphics.Shader
+{
+ public enum TessPatchType
+ {
+ Isolines = 0,
+ Triangles = 1,
+ Quads = 2
+ }
+
+ static class TessPatchTypeExtensions
+ {
+ public static string ToGlsl(this TessPatchType type)
+ {
+ return type switch
+ {
+ TessPatchType.Isolines => "isolines",
+ TessPatchType.Quads => "quads",
+ _ => "triangles"
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/TessSpacing.cs b/src/Ryujinx.Graphics.Shader/TessSpacing.cs
new file mode 100644
index 00000000..35c44190
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/TessSpacing.cs
@@ -0,0 +1,22 @@
+namespace Ryujinx.Graphics.Shader
+{
+ public enum TessSpacing
+ {
+ EqualSpacing = 0,
+ FractionalEventSpacing = 1,
+ FractionalOddSpacing = 2
+ }
+
+ static class TessSpacingExtensions
+ {
+ public static string ToGlsl(this TessSpacing spacing)
+ {
+ return spacing switch
+ {
+ TessSpacing.FractionalEventSpacing => "fractional_even_spacing",
+ TessSpacing.FractionalOddSpacing => "fractional_odd_spacing",
+ _ => "equal_spacing"
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/TextureDescriptor.cs b/src/Ryujinx.Graphics.Shader/TextureDescriptor.cs
new file mode 100644
index 00000000..85ea9adb
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/TextureDescriptor.cs
@@ -0,0 +1,34 @@
+namespace Ryujinx.Graphics.Shader
+{
+ public struct TextureDescriptor
+ {
+ // New fields should be added to the end of the struct to keep disk shader cache compatibility.
+
+ public readonly int Binding;
+
+ public readonly SamplerType Type;
+ public readonly TextureFormat Format;
+
+ public readonly int CbufSlot;
+ public readonly int HandleIndex;
+
+ public TextureUsageFlags Flags;
+
+ public TextureDescriptor(int binding, SamplerType type, TextureFormat format, int cbufSlot, int handleIndex)
+ {
+ Binding = binding;
+ Type = type;
+ Format = format;
+ CbufSlot = cbufSlot;
+ HandleIndex = handleIndex;
+ Flags = TextureUsageFlags.None;
+ }
+
+ public TextureDescriptor SetFlag(TextureUsageFlags flag)
+ {
+ Flags |= flag;
+
+ return this;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/TextureFormat.cs b/src/Ryujinx.Graphics.Shader/TextureFormat.cs
new file mode 100644
index 00000000..d4c8b96b
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/TextureFormat.cs
@@ -0,0 +1,128 @@
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader
+{
+ public enum TextureFormat
+ {
+ Unknown,
+ R8Unorm,
+ R8Snorm,
+ R8Uint,
+ R8Sint,
+ R16Float,
+ R16Unorm,
+ R16Snorm,
+ R16Uint,
+ R16Sint,
+ R32Float,
+ R32Uint,
+ R32Sint,
+ R8G8Unorm,
+ R8G8Snorm,
+ R8G8Uint,
+ R8G8Sint,
+ R16G16Float,
+ R16G16Unorm,
+ R16G16Snorm,
+ R16G16Uint,
+ R16G16Sint,
+ R32G32Float,
+ R32G32Uint,
+ R32G32Sint,
+ R8G8B8A8Unorm,
+ R8G8B8A8Snorm,
+ R8G8B8A8Uint,
+ R8G8B8A8Sint,
+ R16G16B16A16Float,
+ R16G16B16A16Unorm,
+ R16G16B16A16Snorm,
+ R16G16B16A16Uint,
+ R16G16B16A16Sint,
+ R32G32B32A32Float,
+ R32G32B32A32Uint,
+ R32G32B32A32Sint,
+ R10G10B10A2Unorm,
+ R10G10B10A2Uint,
+ R11G11B10Float
+ }
+
+ static class TextureFormatExtensions
+ {
+ public static string ToGlslFormat(this TextureFormat format)
+ {
+ return format switch
+ {
+ TextureFormat.R8Unorm => "r8",
+ TextureFormat.R8Snorm => "r8_snorm",
+ TextureFormat.R8Uint => "r8ui",
+ TextureFormat.R8Sint => "r8i",
+ TextureFormat.R16Float => "r16f",
+ TextureFormat.R16Unorm => "r16",
+ TextureFormat.R16Snorm => "r16_snorm",
+ TextureFormat.R16Uint => "r16ui",
+ TextureFormat.R16Sint => "r16i",
+ TextureFormat.R32Float => "r32f",
+ TextureFormat.R32Uint => "r32ui",
+ TextureFormat.R32Sint => "r32i",
+ TextureFormat.R8G8Unorm => "rg8",
+ TextureFormat.R8G8Snorm => "rg8_snorm",
+ TextureFormat.R8G8Uint => "rg8ui",
+ TextureFormat.R8G8Sint => "rg8i",
+ TextureFormat.R16G16Float => "rg16f",
+ TextureFormat.R16G16Unorm => "rg16",
+ TextureFormat.R16G16Snorm => "rg16_snorm",
+ TextureFormat.R16G16Uint => "rg16ui",
+ TextureFormat.R16G16Sint => "rg16i",
+ TextureFormat.R32G32Float => "rg32f",
+ TextureFormat.R32G32Uint => "rg32ui",
+ TextureFormat.R32G32Sint => "rg32i",
+ TextureFormat.R8G8B8A8Unorm => "rgba8",
+ TextureFormat.R8G8B8A8Snorm => "rgba8_snorm",
+ TextureFormat.R8G8B8A8Uint => "rgba8ui",
+ TextureFormat.R8G8B8A8Sint => "rgba8i",
+ TextureFormat.R16G16B16A16Float => "rgba16f",
+ TextureFormat.R16G16B16A16Unorm => "rgba16",
+ TextureFormat.R16G16B16A16Snorm => "rgba16_snorm",
+ TextureFormat.R16G16B16A16Uint => "rgba16ui",
+ TextureFormat.R16G16B16A16Sint => "rgba16i",
+ TextureFormat.R32G32B32A32Float => "rgba32f",
+ TextureFormat.R32G32B32A32Uint => "rgba32ui",
+ TextureFormat.R32G32B32A32Sint => "rgba32i",
+ TextureFormat.R10G10B10A2Unorm => "rgb10_a2",
+ TextureFormat.R10G10B10A2Uint => "rgb10_a2ui",
+ TextureFormat.R11G11B10Float => "r11f_g11f_b10f",
+ _ => string.Empty
+ };
+ }
+
+ public static AggregateType GetComponentType(this TextureFormat format)
+ {
+ switch (format)
+ {
+ case TextureFormat.R8Uint:
+ case TextureFormat.R16Uint:
+ case TextureFormat.R32Uint:
+ case TextureFormat.R8G8Uint:
+ case TextureFormat.R16G16Uint:
+ case TextureFormat.R32G32Uint:
+ case TextureFormat.R8G8B8A8Uint:
+ case TextureFormat.R16G16B16A16Uint:
+ case TextureFormat.R32G32B32A32Uint:
+ case TextureFormat.R10G10B10A2Uint:
+ return AggregateType.U32;
+ case TextureFormat.R8Sint:
+ case TextureFormat.R16Sint:
+ case TextureFormat.R32Sint:
+ case TextureFormat.R8G8Sint:
+ case TextureFormat.R16G16Sint:
+ case TextureFormat.R32G32Sint:
+ case TextureFormat.R8G8B8A8Sint:
+ case TextureFormat.R16G16B16A16Sint:
+ case TextureFormat.R32G32B32A32Sint:
+ return AggregateType.S32;
+ }
+
+ return AggregateType.FP32;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/TextureHandle.cs b/src/Ryujinx.Graphics.Shader/TextureHandle.cs
new file mode 100644
index 00000000..39d5c1c3
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/TextureHandle.cs
@@ -0,0 +1,124 @@
+using System;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Shader
+{
+ public enum TextureHandleType
+ {
+ CombinedSampler = 0, // Must be 0.
+ SeparateSamplerHandle = 1,
+ SeparateSamplerId = 2,
+ SeparateConstantSamplerHandle = 3
+ }
+
+ public static class TextureHandle
+ {
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int PackSlots(int cbufSlot0, int cbufSlot1)
+ {
+ return cbufSlot0 | ((cbufSlot1 + 1) << 16);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static (int, int) UnpackSlots(int slots, int defaultTextureBufferIndex)
+ {
+ int textureBufferIndex;
+ int samplerBufferIndex;
+
+ if (slots < 0)
+ {
+ textureBufferIndex = defaultTextureBufferIndex;
+ samplerBufferIndex = textureBufferIndex;
+ }
+ else
+ {
+ uint high = (uint)slots >> 16;
+
+ textureBufferIndex = (ushort)slots;
+ samplerBufferIndex = high != 0 ? (int)high - 1 : textureBufferIndex;
+ }
+
+ return (textureBufferIndex, samplerBufferIndex);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int PackOffsets(int cbufOffset0, int cbufOffset1, TextureHandleType type)
+ {
+ return cbufOffset0 | (cbufOffset1 << 14) | ((int)type << 28);
+ }
+
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static (int, int, TextureHandleType) UnpackOffsets(int handle)
+ {
+ return (handle & 0x3fff, (handle >> 14) & 0x3fff, (TextureHandleType)((uint)handle >> 28));
+ }
+
+ /// <summary>
+ /// Unpacks the texture ID from the real texture handle.
+ /// </summary>
+ /// <param name="packedId">The real texture handle</param>
+ /// <returns>The texture ID</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int UnpackTextureId(int packedId)
+ {
+ return (packedId >> 0) & 0xfffff;
+ }
+
+ /// <summary>
+ /// Unpacks the sampler ID from the real texture handle.
+ /// </summary>
+ /// <param name="packedId">The real texture handle</param>
+ /// <returns>The sampler ID</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int UnpackSamplerId(int packedId)
+ {
+ return (packedId >> 20) & 0xfff;
+ }
+
+ /// <summary>
+ /// Reads a packed texture and sampler ID (basically, the real texture handle)
+ /// from a given texture/sampler constant buffer.
+ /// </summary>
+ /// <param name="wordOffset">A word offset of the handle on the buffer (the "fake" shader handle)</param>
+ /// <param name="cachedTextureBuffer">The constant buffer to fetch texture IDs from</param>
+ /// <param name="cachedSamplerBuffer">The constant buffer to fetch sampler IDs from</param>
+ /// <returns>The packed texture and sampler ID (the real texture handle)</returns>
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ public static int ReadPackedId(int wordOffset, ReadOnlySpan<int> cachedTextureBuffer, ReadOnlySpan<int> cachedSamplerBuffer)
+ {
+ (int textureWordOffset, int samplerWordOffset, TextureHandleType handleType) = UnpackOffsets(wordOffset);
+
+ int handle = cachedTextureBuffer.Length != 0 ? cachedTextureBuffer[textureWordOffset] : 0;
+
+ // The "wordOffset" (which is really the immediate value used on texture instructions on the shader)
+ // is a 13-bit value. However, in order to also support separate samplers and textures (which uses
+ // bindless textures on the shader), we extend it with another value on the higher 16 bits with
+ // another offset for the sampler.
+ // The shader translator has code to detect separate texture and sampler uses with a bindless texture,
+ // turn that into a regular texture access and produce those special handles with values on the higher 16 bits.
+ if (handleType != TextureHandleType.CombinedSampler)
+ {
+ int samplerHandle;
+
+ if (handleType != TextureHandleType.SeparateConstantSamplerHandle)
+ {
+ samplerHandle = cachedSamplerBuffer.Length != 0 ? cachedSamplerBuffer[samplerWordOffset] : 0;
+ }
+ else
+ {
+ samplerHandle = samplerWordOffset;
+ }
+
+ if (handleType == TextureHandleType.SeparateSamplerId ||
+ handleType == TextureHandleType.SeparateConstantSamplerHandle)
+ {
+ samplerHandle <<= 20;
+ }
+
+ handle |= samplerHandle;
+ }
+
+ return handle;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/TextureUsageFlags.cs b/src/Ryujinx.Graphics.Shader/TextureUsageFlags.cs
new file mode 100644
index 00000000..2419a1de
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/TextureUsageFlags.cs
@@ -0,0 +1,19 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader
+{
+ /// <summary>
+ /// Flags that indicate how a texture will be used in a shader.
+ /// </summary>
+ [Flags]
+ public enum TextureUsageFlags
+ {
+ None = 0,
+
+ // Integer sampled textures must be noted for resolution scaling.
+ ResScaleUnsupported = 1 << 0,
+ NeedsScaleValue = 1 << 1,
+ ImageStore = 1 << 2,
+ ImageCoherent = 1 << 3
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs b/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs
new file mode 100644
index 00000000..24993e00
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/AggregateType.cs
@@ -0,0 +1,25 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ enum AggregateType
+ {
+ Invalid,
+ Void,
+ Bool,
+ FP32,
+ FP64,
+ S32,
+ U32,
+
+ ElementTypeMask = 0xff,
+
+ ElementCountShift = 8,
+ ElementCountMask = 3 << ElementCountShift,
+
+ Scalar = 0 << ElementCountShift,
+ Vector2 = 1 << ElementCountShift,
+ Vector3 = 2 << ElementCountShift,
+ Vector4 = 3 << ElementCountShift,
+
+ Array = 1 << 10
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs
new file mode 100644
index 00000000..683b0d8a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs
@@ -0,0 +1,36 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class AttributeConsts
+ {
+ public const int PrimitiveId = 0x060;
+ public const int Layer = 0x064;
+ public const int PositionX = 0x070;
+ public const int PositionY = 0x074;
+ public const int FrontColorDiffuseR = 0x280;
+ public const int BackColorDiffuseR = 0x2a0;
+ public const int ClipDistance0 = 0x2c0;
+ public const int ClipDistance1 = 0x2c4;
+ public const int ClipDistance2 = 0x2c8;
+ public const int ClipDistance3 = 0x2cc;
+ public const int ClipDistance4 = 0x2d0;
+ public const int ClipDistance5 = 0x2d4;
+ public const int ClipDistance6 = 0x2d8;
+ public const int ClipDistance7 = 0x2dc;
+ public const int FogCoord = 0x2e8;
+ public const int TessCoordX = 0x2f0;
+ public const int TessCoordY = 0x2f4;
+ public const int InstanceId = 0x2f8;
+ public const int VertexId = 0x2fc;
+ public const int TexCoordCount = 10;
+ public const int TexCoordBase = 0x300;
+ public const int TexCoordEnd = TexCoordBase + TexCoordCount * 16;
+ public const int FrontFacing = 0x3fc;
+
+ public const int UserAttributesCount = 32;
+ public const int UserAttributeBase = 0x80;
+ public const int UserAttributeEnd = UserAttributeBase + UserAttributesCount * 16;
+
+ public const int UserAttributePerPatchBase = 0x18;
+ public const int UserAttributePerPatchEnd = 0x200;
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs b/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs
new file mode 100644
index 00000000..65328fd7
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/ControlFlowGraph.cs
@@ -0,0 +1,176 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ class ControlFlowGraph
+ {
+ public BasicBlock[] Blocks { get; }
+ public BasicBlock[] PostOrderBlocks { get; }
+ public int[] PostOrderMap { get; }
+
+ public ControlFlowGraph(BasicBlock[] blocks)
+ {
+ Blocks = blocks;
+
+ HashSet<BasicBlock> visited = new HashSet<BasicBlock>();
+
+ Stack<BasicBlock> blockStack = new Stack<BasicBlock>();
+
+ List<BasicBlock> postOrderBlocks = new List<BasicBlock>(blocks.Length);
+
+ PostOrderMap = new int[blocks.Length];
+
+ visited.Add(blocks[0]);
+
+ blockStack.Push(blocks[0]);
+
+ while (blockStack.TryPop(out BasicBlock block))
+ {
+ if (block.Next != null && visited.Add(block.Next))
+ {
+ blockStack.Push(block);
+ blockStack.Push(block.Next);
+ }
+ else if (block.Branch != null && visited.Add(block.Branch))
+ {
+ blockStack.Push(block);
+ blockStack.Push(block.Branch);
+ }
+ else
+ {
+ PostOrderMap[block.Index] = postOrderBlocks.Count;
+
+ postOrderBlocks.Add(block);
+ }
+ }
+
+ PostOrderBlocks = postOrderBlocks.ToArray();
+ }
+
+ public static ControlFlowGraph Create(Operation[] operations)
+ {
+ Dictionary<Operand, BasicBlock> labels = new Dictionary<Operand, BasicBlock>();
+
+ List<BasicBlock> blocks = new List<BasicBlock>();
+
+ BasicBlock currentBlock = null;
+
+ void NextBlock(BasicBlock nextBlock)
+ {
+ if (currentBlock != null && !EndsWithUnconditionalInst(currentBlock.GetLastOp()))
+ {
+ currentBlock.Next = nextBlock;
+ }
+
+ currentBlock = nextBlock;
+ }
+
+ void NewNextBlock()
+ {
+ BasicBlock block = new BasicBlock(blocks.Count);
+
+ blocks.Add(block);
+
+ NextBlock(block);
+ }
+
+ bool needsNewBlock = true;
+
+ for (int index = 0; index < operations.Length; index++)
+ {
+ Operation operation = operations[index];
+
+ if (operation.Inst == Instruction.MarkLabel)
+ {
+ Operand label = operation.Dest;
+
+ if (labels.TryGetValue(label, out BasicBlock nextBlock))
+ {
+ nextBlock.Index = blocks.Count;
+
+ blocks.Add(nextBlock);
+
+ NextBlock(nextBlock);
+ }
+ else
+ {
+ NewNextBlock();
+
+ labels.Add(label, currentBlock);
+ }
+ }
+ else
+ {
+ if (needsNewBlock)
+ {
+ NewNextBlock();
+ }
+
+ currentBlock.Operations.AddLast(operation);
+ }
+
+ needsNewBlock = operation.Inst == Instruction.Branch ||
+ operation.Inst == Instruction.BranchIfTrue ||
+ operation.Inst == Instruction.BranchIfFalse;
+
+ if (needsNewBlock)
+ {
+ Operand label = operation.Dest;
+
+ if (!labels.TryGetValue(label, out BasicBlock branchBlock))
+ {
+ branchBlock = new BasicBlock();
+
+ labels.Add(label, branchBlock);
+ }
+
+ currentBlock.Branch = branchBlock;
+ }
+ }
+
+ // Remove unreachable blocks.
+ bool hasUnreachable;
+
+ do
+ {
+ hasUnreachable = false;
+
+ for (int blkIndex = 1; blkIndex < blocks.Count; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ if (block.Predecessors.Count == 0)
+ {
+ block.Next = null;
+ block.Branch = null;
+ blocks.RemoveAt(blkIndex--);
+ hasUnreachable = true;
+ }
+ else
+ {
+ block.Index = blkIndex;
+ }
+ }
+ } while (hasUnreachable);
+
+ return new ControlFlowGraph(blocks.ToArray());
+ }
+
+ private static bool EndsWithUnconditionalInst(INode node)
+ {
+ if (node is Operation operation)
+ {
+ switch (operation.Inst)
+ {
+ case Instruction.Branch:
+ case Instruction.Discard:
+ case Instruction.Return:
+ return true;
+ }
+ }
+
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs b/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs
new file mode 100644
index 00000000..09c2eb0f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Dominance.cs
@@ -0,0 +1,94 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class Dominance
+ {
+ // Those methods are an implementation of the algorithms on "A Simple, Fast Dominance Algorithm".
+ // https://www.cs.rice.edu/~keith/EMBED/dom.pdf
+ public static void FindDominators(ControlFlowGraph cfg)
+ {
+ BasicBlock Intersect(BasicBlock block1, BasicBlock block2)
+ {
+ while (block1 != block2)
+ {
+ while (cfg.PostOrderMap[block1.Index] < cfg.PostOrderMap[block2.Index])
+ {
+ block1 = block1.ImmediateDominator;
+ }
+
+ while (cfg.PostOrderMap[block2.Index] < cfg.PostOrderMap[block1.Index])
+ {
+ block2 = block2.ImmediateDominator;
+ }
+ }
+
+ return block1;
+ }
+
+ cfg.Blocks[0].ImmediateDominator = cfg.Blocks[0];
+
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int blkIndex = cfg.PostOrderBlocks.Length - 2; blkIndex >= 0; blkIndex--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[blkIndex];
+
+ BasicBlock newIDom = null;
+
+ foreach (BasicBlock predecessor in block.Predecessors)
+ {
+ if (predecessor.ImmediateDominator != null)
+ {
+ if (newIDom != null)
+ {
+ newIDom = Intersect(predecessor, newIDom);
+ }
+ else
+ {
+ newIDom = predecessor;
+ }
+ }
+ }
+
+ if (block.ImmediateDominator != newIDom)
+ {
+ block.ImmediateDominator = newIDom;
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+ }
+
+ public static void FindDominanceFrontiers(BasicBlock[] blocks)
+ {
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ if (block.Predecessors.Count < 2)
+ {
+ continue;
+ }
+
+ for (int pBlkIndex = 0; pBlkIndex < block.Predecessors.Count; pBlkIndex++)
+ {
+ BasicBlock current = block.Predecessors[pBlkIndex];
+
+ while (current != block.ImmediateDominator)
+ {
+ current.DominanceFrontiers.Add(block);
+
+ current = current.ImmediateDominator;
+ }
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs
new file mode 100644
index 00000000..112baccf
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs
@@ -0,0 +1,492 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ class EmitterContext
+ {
+ public DecodedProgram Program { get; }
+ public ShaderConfig Config { get; }
+
+ public bool IsNonMain { get; }
+
+ public Block CurrBlock { get; set; }
+ public InstOp CurrOp { get; set; }
+
+ public int OperationsCount => _operations.Count;
+
+ private readonly struct BrxTarget
+ {
+ public readonly Operand Selector;
+ public readonly int ExpectedValue;
+ public readonly ulong NextTargetAddress;
+
+ public BrxTarget(Operand selector, int expectedValue, ulong nextTargetAddress)
+ {
+ Selector = selector;
+ ExpectedValue = expectedValue;
+ NextTargetAddress = nextTargetAddress;
+ }
+ }
+
+ private class BlockLabel
+ {
+ public readonly Operand Label;
+ public BrxTarget BrxTarget;
+
+ public BlockLabel(Operand label)
+ {
+ Label = label;
+ }
+ }
+
+ private readonly List<Operation> _operations;
+ private readonly Dictionary<ulong, BlockLabel> _labels;
+
+ public EmitterContext(DecodedProgram program, ShaderConfig config, bool isNonMain)
+ {
+ Program = program;
+ Config = config;
+ IsNonMain = isNonMain;
+ _operations = new List<Operation>();
+ _labels = new Dictionary<ulong, BlockLabel>();
+
+ EmitStart();
+ }
+
+ private void EmitStart()
+ {
+ if (Config.Stage == ShaderStage.Vertex &&
+ Config.Options.TargetApi == TargetApi.Vulkan &&
+ (Config.Options.Flags & TranslationFlags.VertexA) == 0)
+ {
+ // Vulkan requires the point size to be always written on the shader if the primitive topology is points.
+ this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(Config.GpuAccessor.QueryPointSize()));
+ }
+ }
+
+ public T GetOp<T>() where T : unmanaged
+ {
+ Debug.Assert(Unsafe.SizeOf<T>() == sizeof(ulong));
+ ulong op = CurrOp.RawOpCode;
+ return Unsafe.As<ulong, T>(ref op);
+ }
+
+ public Operand Add(Instruction inst, Operand dest = null, params Operand[] sources)
+ {
+ Operation operation = new Operation(inst, dest, sources);
+
+ _operations.Add(operation);
+
+ return dest;
+ }
+
+ public Operand Add(Instruction inst, StorageKind storageKind, Operand dest = null, params Operand[] sources)
+ {
+ Operation operation = new Operation(inst, storageKind, dest, sources);
+
+ _operations.Add(operation);
+
+ return dest;
+ }
+
+ public (Operand, Operand) Add(Instruction inst, (Operand, Operand) dest, params Operand[] sources)
+ {
+ Operand[] dests = new[] { dest.Item1, dest.Item2 };
+
+ Operation operation = new Operation(inst, 0, dests, sources);
+
+ Add(operation);
+
+ return dest;
+ }
+
+ public void Add(Operation operation)
+ {
+ _operations.Add(operation);
+ }
+
+ public TextureOperation CreateTextureOperation(
+ Instruction inst,
+ SamplerType type,
+ TextureFlags flags,
+ int handle,
+ int compIndex,
+ Operand[] dests,
+ params Operand[] sources)
+ {
+ return CreateTextureOperation(inst, type, TextureFormat.Unknown, flags, handle, compIndex, dests, sources);
+ }
+
+ public TextureOperation CreateTextureOperation(
+ Instruction inst,
+ SamplerType type,
+ TextureFormat format,
+ TextureFlags flags,
+ int handle,
+ int compIndex,
+ Operand[] dests,
+ params Operand[] sources)
+ {
+ if (!flags.HasFlag(TextureFlags.Bindless))
+ {
+ Config.SetUsedTexture(inst, type, format, flags, TextureOperation.DefaultCbufSlot, handle);
+ }
+
+ return new TextureOperation(inst, type, format, flags, handle, compIndex, dests, sources);
+ }
+
+ public void FlagAttributeRead(int attribute)
+ {
+ if (Config.Stage == ShaderStage.Vertex && attribute == AttributeConsts.InstanceId)
+ {
+ Config.SetUsedFeature(FeatureFlags.InstanceId);
+ }
+ else if (Config.Stage == ShaderStage.Fragment)
+ {
+ switch (attribute)
+ {
+ case AttributeConsts.PositionX:
+ case AttributeConsts.PositionY:
+ Config.SetUsedFeature(FeatureFlags.FragCoordXY);
+ break;
+ }
+ }
+ }
+
+ public void FlagAttributeWritten(int attribute)
+ {
+ if (Config.Stage == ShaderStage.Vertex)
+ {
+ switch (attribute)
+ {
+ case AttributeConsts.ClipDistance0:
+ case AttributeConsts.ClipDistance1:
+ case AttributeConsts.ClipDistance2:
+ case AttributeConsts.ClipDistance3:
+ case AttributeConsts.ClipDistance4:
+ case AttributeConsts.ClipDistance5:
+ case AttributeConsts.ClipDistance6:
+ case AttributeConsts.ClipDistance7:
+ Config.SetClipDistanceWritten((attribute - AttributeConsts.ClipDistance0) / 4);
+ break;
+ }
+ }
+
+ if (Config.Stage != ShaderStage.Fragment && attribute == AttributeConsts.Layer)
+ {
+ Config.SetUsedFeature(FeatureFlags.RtLayer);
+ }
+ }
+
+ public void MarkLabel(Operand label)
+ {
+ Add(Instruction.MarkLabel, label);
+ }
+
+ public Operand GetLabel(ulong address)
+ {
+ return EnsureBlockLabel(address).Label;
+ }
+
+ public void SetBrxTarget(ulong address, Operand selector, int targetValue, ulong nextTargetAddress)
+ {
+ BlockLabel blockLabel = EnsureBlockLabel(address);
+ Debug.Assert(blockLabel.BrxTarget.Selector == null);
+ blockLabel.BrxTarget = new BrxTarget(selector, targetValue, nextTargetAddress);
+ }
+
+ public void EnterBlock(ulong address)
+ {
+ BlockLabel blockLabel = EnsureBlockLabel(address);
+
+ MarkLabel(blockLabel.Label);
+
+ BrxTarget brxTarget = blockLabel.BrxTarget;
+
+ if (brxTarget.Selector != null)
+ {
+ this.BranchIfFalse(GetLabel(brxTarget.NextTargetAddress), this.ICompareEqual(brxTarget.Selector, Const(brxTarget.ExpectedValue)));
+ }
+ }
+
+ private BlockLabel EnsureBlockLabel(ulong address)
+ {
+ if (!_labels.TryGetValue(address, out BlockLabel blockLabel))
+ {
+ blockLabel = new BlockLabel(Label());
+
+ _labels.Add(address, blockLabel);
+ }
+
+ return blockLabel;
+ }
+
+ public void PrepareForVertexReturn()
+ {
+ if (Config.GpuAccessor.QueryViewportTransformDisable())
+ {
+ Operand x = this.Load(StorageKind.Output, IoVariable.Position, null, Const(0));
+ Operand y = this.Load(StorageKind.Output, IoVariable.Position, null, Const(1));
+ Operand xScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(0));
+ Operand yScale = this.Load(StorageKind.Input, IoVariable.SupportBlockViewInverse, null, Const(1));
+ Operand negativeOne = ConstF(-1.0f);
+
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), this.FPFusedMultiplyAdd(x, xScale, negativeOne));
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), this.FPFusedMultiplyAdd(y, yScale, negativeOne));
+ }
+
+ if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne())
+ {
+ Operand z = this.Load(StorageKind.Output, IoVariable.Position, null, Const(2));
+ Operand w = this.Load(StorageKind.Output, IoVariable.Position, null, Const(3));
+ Operand halfW = this.FPMultiply(w, ConstF(0.5f));
+
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW));
+ }
+
+ if (Config.Stage != ShaderStage.Geometry && Config.HasLayerInputAttribute)
+ {
+ Config.SetUsedFeature(FeatureFlags.RtLayer);
+
+ int attrVecIndex = Config.GpLayerInputAttribute >> 2;
+ int attrComponentIndex = Config.GpLayerInputAttribute & 3;
+
+ Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex));
+
+ this.Store(StorageKind.Output, IoVariable.Layer, null, layer);
+ }
+ }
+
+ public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal)
+ {
+ if (Config.GpuAccessor.QueryViewportTransformDisable())
+ {
+ oldXLocal = Local();
+ this.Copy(oldXLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(0)));
+ oldYLocal = Local();
+ this.Copy(oldYLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(1)));
+ }
+ else
+ {
+ oldXLocal = null;
+ oldYLocal = null;
+ }
+
+ if (Config.Options.TargetApi == TargetApi.Vulkan && Config.GpuAccessor.QueryTransformDepthMinusOneToOne())
+ {
+ oldZLocal = Local();
+ this.Copy(oldZLocal, this.Load(StorageKind.Output, IoVariable.Position, null, Const(2)));
+ }
+ else
+ {
+ oldZLocal = null;
+ }
+
+ PrepareForVertexReturn();
+ }
+
+ public void PrepareForReturn()
+ {
+ if (IsNonMain)
+ {
+ return;
+ }
+
+ if (Config.LastInVertexPipeline &&
+ (Config.Stage == ShaderStage.Vertex || Config.Stage == ShaderStage.TessellationEvaluation) &&
+ (Config.Options.Flags & TranslationFlags.VertexA) == 0)
+ {
+ PrepareForVertexReturn();
+ }
+ else if (Config.Stage == ShaderStage.Geometry)
+ {
+ void WritePositionOutput(int primIndex)
+ {
+ Operand x = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(0));
+ Operand y = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(1));
+ Operand z = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(2));
+ Operand w = this.Load(StorageKind.Input, IoVariable.Position, Const(primIndex), Const(3));
+
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(0), x);
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(1), y);
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), z);
+ this.Store(StorageKind.Output, IoVariable.Position, null, Const(3), w);
+ }
+
+ void WriteUserDefinedOutput(int index, int primIndex)
+ {
+ Operand x = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(0));
+ Operand y = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(1));
+ Operand z = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(2));
+ Operand w = this.Load(StorageKind.Input, IoVariable.UserDefined, Const(index), Const(primIndex), Const(3));
+
+ this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(0), x);
+ this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(1), y);
+ this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(2), z);
+ this.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(index), Const(3), w);
+ }
+
+ if (Config.GpPassthrough && !Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough())
+ {
+ int inputVertices = Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices();
+
+ for (int primIndex = 0; primIndex < inputVertices; primIndex++)
+ {
+ WritePositionOutput(primIndex);
+
+ int passthroughAttributes = Config.PassthroughAttributes;
+ while (passthroughAttributes != 0)
+ {
+ int index = BitOperations.TrailingZeroCount(passthroughAttributes);
+ WriteUserDefinedOutput(index, primIndex);
+ Config.SetOutputUserAttribute(index);
+ passthroughAttributes &= ~(1 << index);
+ }
+
+ this.EmitVertex();
+ }
+
+ this.EndPrimitive();
+ }
+ }
+ else if (Config.Stage == ShaderStage.Fragment)
+ {
+ GenerateAlphaToCoverageDitherDiscard();
+
+ bool supportsBgra = Config.GpuAccessor.QueryHostSupportsBgraFormat();
+
+ if (Config.OmapDepth)
+ {
+ Operand src = Register(Config.GetDepthRegister(), RegisterType.Gpr);
+
+ this.Store(StorageKind.Output, IoVariable.FragmentOutputDepth, null, src);
+ }
+
+ AlphaTestOp alphaTestOp = Config.GpuAccessor.QueryAlphaTestCompare();
+
+ if (alphaTestOp != AlphaTestOp.Always && (Config.OmapTargets & 8) != 0)
+ {
+ if (alphaTestOp == AlphaTestOp.Never)
+ {
+ this.Discard();
+ }
+ else
+ {
+ Instruction comparator = alphaTestOp switch
+ {
+ AlphaTestOp.Equal => Instruction.CompareEqual,
+ AlphaTestOp.Greater => Instruction.CompareGreater,
+ AlphaTestOp.GreaterOrEqual => Instruction.CompareGreaterOrEqual,
+ AlphaTestOp.Less => Instruction.CompareLess,
+ AlphaTestOp.LessOrEqual => Instruction.CompareLessOrEqual,
+ AlphaTestOp.NotEqual => Instruction.CompareNotEqual,
+ _ => 0
+ };
+
+ Debug.Assert(comparator != 0, $"Invalid alpha test operation \"{alphaTestOp}\".");
+
+ Operand alpha = Register(3, RegisterType.Gpr);
+ Operand alphaRef = ConstF(Config.GpuAccessor.QueryAlphaTestReference());
+ Operand alphaPass = Add(Instruction.FP32 | comparator, Local(), alpha, alphaRef);
+ Operand alphaPassLabel = Label();
+
+ this.BranchIfTrue(alphaPassLabel, alphaPass);
+ this.Discard();
+ this.MarkLabel(alphaPassLabel);
+ }
+ }
+
+ int regIndexBase = 0;
+
+ for (int rtIndex = 0; rtIndex < 8; rtIndex++)
+ {
+ for (int component = 0; component < 4; component++)
+ {
+ bool componentEnabled = (Config.OmapTargets & (1 << (rtIndex * 4 + component))) != 0;
+ if (!componentEnabled)
+ {
+ continue;
+ }
+
+ Operand src = Register(regIndexBase + component, RegisterType.Gpr);
+
+ // Perform B <-> R swap if needed, for BGRA formats (not supported on OpenGL).
+ if (!supportsBgra && (component == 0 || component == 2))
+ {
+ Operand isBgra = this.Load(StorageKind.Input, IoVariable.FragmentOutputIsBgra, null, Const(rtIndex));
+
+ Operand lblIsBgra = Label();
+ Operand lblEnd = Label();
+
+ this.BranchIfTrue(lblIsBgra, isBgra);
+
+ this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src);
+ this.Branch(lblEnd);
+
+ MarkLabel(lblIsBgra);
+
+ this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(2 - component), src);
+
+ MarkLabel(lblEnd);
+ }
+ else
+ {
+ this.Store(StorageKind.Output, IoVariable.FragmentOutputColor, null, Const(rtIndex), Const(component), src);
+ }
+ }
+
+ bool targetEnabled = (Config.OmapTargets & (0xf << (rtIndex * 4))) != 0;
+ if (targetEnabled)
+ {
+ Config.SetOutputUserAttribute(rtIndex);
+ regIndexBase += 4;
+ }
+ }
+ }
+ }
+
+ private void GenerateAlphaToCoverageDitherDiscard()
+ {
+ // If the feature is disabled, or alpha is not written, then we're done.
+ if (!Config.GpuAccessor.QueryAlphaToCoverageDitherEnable() || (Config.OmapTargets & 8) == 0)
+ {
+ return;
+ }
+
+ // 11 11 11 10 10 10 10 00
+ // 11 01 01 01 01 00 00 00
+ Operand ditherMask = Const(unchecked((int)0xfbb99110u));
+
+ Operand fragCoordX = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(0));
+ Operand fragCoordY = this.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(1));
+
+ Operand x = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordX), Const(1));
+ Operand y = this.BitwiseAnd(this.FP32ConvertToU32(fragCoordY), Const(1));
+ Operand xy = this.BitwiseOr(x, this.ShiftLeft(y, Const(1)));
+
+ Operand alpha = Register(3, RegisterType.Gpr);
+ Operand scaledAlpha = this.FPMultiply(this.FPSaturate(alpha), ConstF(8));
+ Operand quantizedAlpha = this.IMinimumU32(this.FP32ConvertToU32(scaledAlpha), Const(7));
+ Operand shift = this.BitwiseOr(this.ShiftLeft(quantizedAlpha, Const(2)), xy);
+ Operand opaque = this.BitwiseAnd(this.ShiftRightU32(ditherMask, shift), Const(1));
+
+ Operand a2cDitherEndLabel = Label();
+
+ this.BranchIfTrue(a2cDitherEndLabel, opaque);
+ this.Discard();
+ this.MarkLabel(a2cDitherEndLabel);
+ }
+
+ public Operation[] GetOperations()
+ {
+ return _operations.ToArray();
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
new file mode 100644
index 00000000..93748249
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
@@ -0,0 +1,819 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class EmitterContextInsts
+ {
+ public static Operand AtomicAdd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicAdd, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicAnd(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicAnd, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicCompareAndSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c, Operand d)
+ {
+ return context.Add(Instruction.AtomicCompareAndSwap, storageKind, Local(), a, b, c, d);
+ }
+
+ public static Operand AtomicMaxS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicMaxS32, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicMaxU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicMaxU32, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicMinS32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicMinS32, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicMinU32(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicMinU32, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicOr(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicOr, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicSwap(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicSwap, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand AtomicXor(this EmitterContext context, StorageKind storageKind, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.AtomicXor, storageKind, Local(), a, b, c);
+ }
+
+ public static Operand Ballot(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.Ballot, Local(), a);
+ }
+
+ public static Operand Barrier(this EmitterContext context)
+ {
+ return context.Add(Instruction.Barrier);
+ }
+
+ public static Operand BitCount(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.BitCount, Local(), a);
+ }
+
+ public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c);
+ }
+
+ public static Operand BitfieldExtractU32(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.BitfieldExtractU32, Local(), a, b, c);
+ }
+
+ public static Operand BitfieldInsert(this EmitterContext context, Operand a, Operand b, Operand c, Operand d)
+ {
+ return context.Add(Instruction.BitfieldInsert, Local(), a, b, c, d);
+ }
+
+ public static Operand BitfieldReverse(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.BitfieldReverse, Local(), a);
+ }
+
+ public static Operand BitwiseAnd(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.BitwiseAnd, Local(), a, b);
+ }
+
+ public static Operand BitwiseExclusiveOr(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.BitwiseExclusiveOr, Local(), a, b);
+ }
+
+ public static Operand BitwiseNot(this EmitterContext context, Operand a, bool invert)
+ {
+ if (invert)
+ {
+ a = context.BitwiseNot(a);
+ }
+
+ return a;
+ }
+
+ public static Operand BitwiseNot(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.BitwiseNot, Local(), a);
+ }
+
+ public static Operand BitwiseOr(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.BitwiseOr, Local(), a, b);
+ }
+
+ public static Operand Branch(this EmitterContext context, Operand d)
+ {
+ return context.Add(Instruction.Branch, d);
+ }
+
+ public static Operand BranchIfFalse(this EmitterContext context, Operand d, Operand a)
+ {
+ return context.Add(Instruction.BranchIfFalse, d, a);
+ }
+
+ public static Operand BranchIfTrue(this EmitterContext context, Operand d, Operand a)
+ {
+ return context.Add(Instruction.BranchIfTrue, d, a);
+ }
+
+ public static Operand Call(this EmitterContext context, int funcId, bool returns, params Operand[] args)
+ {
+ Operand[] args2 = new Operand[args.Length + 1];
+
+ args2[0] = Const(funcId);
+ args.CopyTo(args2, 1);
+
+ return context.Add(Instruction.Call, returns ? Local() : null, args2);
+ }
+
+ public static Operand ConditionalSelect(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ConditionalSelect, Local(), a, b, c);
+ }
+
+ public static Operand Copy(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.Copy, Local(), a);
+ }
+
+ public static void Copy(this EmitterContext context, Operand d, Operand a)
+ {
+ if (d.Type == OperandType.Constant)
+ {
+ return;
+ }
+
+ context.Add(Instruction.Copy, d, a);
+ }
+
+ public static Operand Discard(this EmitterContext context)
+ {
+ return context.Add(Instruction.Discard);
+ }
+
+ public static Operand EmitVertex(this EmitterContext context)
+ {
+ return context.Add(Instruction.EmitVertex);
+ }
+
+ public static Operand EndPrimitive(this EmitterContext context)
+ {
+ return context.Add(Instruction.EndPrimitive);
+ }
+
+ public static Operand FindLSB(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FindLSB, Local(), a);
+ }
+
+ public static Operand FindMSBS32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FindMSBS32, Local(), a);
+ }
+
+ public static Operand FindMSBU32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FindMSBU32, Local(), a);
+ }
+
+ public static Operand FP32ConvertToFP64(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP32ToFP64, Local(), a);
+ }
+
+ public static Operand FP64ConvertToFP32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP64ToFP32, Local(), a);
+ }
+
+ public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg, Instruction fpType = Instruction.FP32)
+ {
+ return context.FPNegate(context.FPAbsolute(a, abs, fpType), neg, fpType);
+ }
+
+ public static Operand FPAbsolute(this EmitterContext context, Operand a, bool abs, Instruction fpType = Instruction.FP32)
+ {
+ if (abs)
+ {
+ a = context.FPAbsolute(a, fpType);
+ }
+
+ return a;
+ }
+
+ public static Operand FPAbsolute(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Absolute, Local(), a);
+ }
+
+ public static Operand FPAdd(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Add, Local(), a, b);
+ }
+
+ public static Operand FPCeiling(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Ceiling, Local(), a);
+ }
+
+ public static Operand FPCompareEqual(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.CompareEqual, Local(), a, b);
+ }
+
+ public static Operand FPCompareLess(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.CompareLess, Local(), a, b);
+ }
+
+ public static Operand FP32ConvertToS32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP32ToS32, Local(), a);
+ }
+
+ public static Operand FP32ConvertToU32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP32ToU32, Local(), a);
+ }
+
+ public static Operand FP64ConvertToS32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP64ToS32, Local(), a);
+ }
+
+ public static Operand FP64ConvertToU32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertFP64ToU32, Local(), a);
+ }
+
+ public static Operand FPCosine(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.Cosine, Local(), a);
+ }
+
+ public static Operand FPDivide(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Divide, Local(), a, b);
+ }
+
+ public static Operand FPExponentB2(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.ExponentB2, Local(), a);
+ }
+
+ public static Operand FPFloor(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Floor, Local(), a);
+ }
+
+ public static Operand FPFusedMultiplyAdd(this EmitterContext context, Operand a, Operand b, Operand c, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.FusedMultiplyAdd, Local(), a, b, c);
+ }
+
+ public static Operand FPLogarithmB2(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.LogarithmB2, Local(), a);
+ }
+
+ public static Operand FPMaximum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Maximum, Local(), a, b);
+ }
+
+ public static Operand FPMinimum(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Minimum, Local(), a, b);
+ }
+
+ public static Operand FPMultiply(this EmitterContext context, Operand a, Operand b, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Multiply, Local(), a, b);
+ }
+
+ public static Operand FPNegate(this EmitterContext context, Operand a, bool neg, Instruction fpType = Instruction.FP32)
+ {
+ if (neg)
+ {
+ a = context.FPNegate(a, fpType);
+ }
+
+ return a;
+ }
+
+ public static Operand FPNegate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Negate, Local(), a);
+ }
+
+ public static Operand FPReciprocal(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.FPDivide(fpType == Instruction.FP64 ? context.PackDouble2x32(1.0) : ConstF(1), a, fpType);
+ }
+
+ public static Operand FPReciprocalSquareRoot(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.ReciprocalSquareRoot, Local(), a);
+ }
+
+ public static Operand FPRound(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Round, Local(), a);
+ }
+
+ public static Operand FPSaturate(this EmitterContext context, Operand a, bool sat, Instruction fpType = Instruction.FP32)
+ {
+ if (sat)
+ {
+ a = context.FPSaturate(a, fpType);
+ }
+
+ return a;
+ }
+
+ public static Operand FPSaturate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return fpType == Instruction.FP64
+ ? context.Add(fpType | Instruction.Clamp, Local(), a, context.PackDouble2x32(0.0), context.PackDouble2x32(1.0))
+ : context.Add(fpType | Instruction.Clamp, Local(), a, ConstF(0), ConstF(1));
+ }
+
+ public static Operand FPSine(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.Sine, Local(), a);
+ }
+
+ public static Operand FPSquareRoot(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FP32 | Instruction.SquareRoot, Local(), a);
+ }
+
+ public static Operand FPTruncate(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.Truncate, Local(), a);
+ }
+
+ public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask)
+ {
+ return context.Add(Instruction.SwizzleAdd, Local(), a, b, Const(mask));
+ }
+
+ public static void FSIBegin(this EmitterContext context)
+ {
+ context.Add(Instruction.FSIBegin);
+ }
+
+ public static void FSIEnd(this EmitterContext context)
+ {
+ context.Add(Instruction.FSIEnd);
+ }
+
+ public static Operand GroupMemoryBarrier(this EmitterContext context)
+ {
+ return context.Add(Instruction.GroupMemoryBarrier);
+ }
+
+ public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg)
+ {
+ return context.INegate(context.IAbsolute(a, abs), neg);
+ }
+
+ public static Operand IAbsolute(this EmitterContext context, Operand a, bool abs)
+ {
+ if (abs)
+ {
+ a = context.IAbsolute(a);
+ }
+
+ return a;
+ }
+
+ public static Operand IAbsolute(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.Absolute, Local(), a);
+ }
+
+ public static Operand IAdd(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Add, Local(), a, b);
+ }
+
+ public static Operand IClampS32(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.Clamp, Local(), a, b, c);
+ }
+
+ public static Operand IClampU32(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ClampU32, Local(), a, b, c);
+ }
+
+ public static Operand ICompareEqual(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareEqual, Local(), a, b);
+ }
+
+ public static Operand ICompareGreater(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareGreater, Local(), a, b);
+ }
+
+ public static Operand ICompareGreaterOrEqual(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareGreaterOrEqual, Local(), a, b);
+ }
+
+ public static Operand ICompareGreaterOrEqualUnsigned(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareGreaterOrEqualU32, Local(), a, b);
+ }
+
+ public static Operand ICompareGreaterUnsigned(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareGreaterU32, Local(), a, b);
+ }
+
+ public static Operand ICompareLess(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareLess, Local(), a, b);
+ }
+
+ public static Operand ICompareLessOrEqual(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareLessOrEqual, Local(), a, b);
+ }
+
+ public static Operand ICompareLessOrEqualUnsigned(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareLessOrEqualU32, Local(), a, b);
+ }
+
+ public static Operand ICompareLessUnsigned(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareLessU32, Local(), a, b);
+ }
+
+ public static Operand ICompareNotEqual(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.CompareNotEqual, Local(), a, b);
+ }
+
+ public static Operand IConvertS32ToFP32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertS32ToFP32, Local(), a);
+ }
+
+ public static Operand IConvertS32ToFP64(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertS32ToFP64, Local(), a);
+ }
+
+ public static Operand IConvertU32ToFP32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertU32ToFP32, Local(), a);
+ }
+
+ public static Operand IConvertU32ToFP64(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.ConvertU32ToFP64, Local(), a);
+ }
+
+ public static Operand IMaximumS32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Maximum, Local(), a, b);
+ }
+
+ public static Operand IMaximumU32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.MaximumU32, Local(), a, b);
+ }
+
+ public static Operand IMinimumS32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Minimum, Local(), a, b);
+ }
+
+ public static Operand IMinimumU32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.MinimumU32, Local(), a, b);
+ }
+
+ public static Operand IMultiply(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Multiply, Local(), a, b);
+ }
+
+ public static Operand INegate(this EmitterContext context, Operand a, bool neg)
+ {
+ if (neg)
+ {
+ a = context.INegate(a);
+ }
+
+ return a;
+ }
+
+ public static Operand INegate(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.Negate, Local(), a);
+ }
+
+ public static Operand ISubtract(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.Subtract, Local(), a, b);
+ }
+
+ public static Operand IsNan(this EmitterContext context, Operand a, Instruction fpType = Instruction.FP32)
+ {
+ return context.Add(fpType | Instruction.IsNan, Local(), a);
+ }
+
+ public static Operand Load(this EmitterContext context, StorageKind storageKind, IoVariable ioVariable, Operand primVertex = null)
+ {
+ return primVertex != null
+ ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex)
+ : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable));
+ }
+
+ public static Operand Load(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand primVertex,
+ Operand elemIndex)
+ {
+ return primVertex != null
+ ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, elemIndex)
+ : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), elemIndex);
+ }
+
+ public static Operand Load(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand primVertex,
+ Operand arrayIndex,
+ Operand elemIndex)
+ {
+ return primVertex != null
+ ? context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), primVertex, arrayIndex, elemIndex)
+ : context.Add(Instruction.Load, storageKind, Local(), Const((int)ioVariable), arrayIndex, elemIndex);
+ }
+
+ public static Operand LoadConstant(this EmitterContext context, Operand a, Operand b)
+ {
+ if (a.Type == OperandType.Constant)
+ {
+ context.Config.SetUsedConstantBuffer(a.Value);
+ }
+ else
+ {
+ context.Config.SetUsedFeature(FeatureFlags.CbIndexing);
+ }
+
+ return context.Add(Instruction.LoadConstant, Local(), a, b);
+ }
+
+ public static Operand LoadGlobal(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.LoadGlobal, Local(), a, b);
+ }
+
+ public static Operand LoadLocal(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.LoadLocal, Local(), a);
+ }
+
+ public static Operand LoadShared(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.LoadShared, Local(), a);
+ }
+
+ public static Operand MemoryBarrier(this EmitterContext context)
+ {
+ return context.Add(Instruction.MemoryBarrier);
+ }
+
+ public static Operand MultiplyHighS32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.MultiplyHighS32, Local(), a, b);
+ }
+
+ public static Operand MultiplyHighU32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.MultiplyHighU32, Local(), a, b);
+ }
+
+ public static Operand PackDouble2x32(this EmitterContext context, double value)
+ {
+ long valueAsLong = BitConverter.DoubleToInt64Bits(value);
+
+ return context.Add(Instruction.PackDouble2x32, Local(), Const((int)valueAsLong), Const((int)(valueAsLong >> 32)));
+ }
+
+ public static Operand PackDouble2x32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.PackDouble2x32, Local(), a, b);
+ }
+
+ public static Operand PackHalf2x16(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.PackHalf2x16, Local(), a, b);
+ }
+
+ public static void Return(this EmitterContext context)
+ {
+ context.PrepareForReturn();
+ context.Add(Instruction.Return);
+ }
+
+ public static void Return(this EmitterContext context, Operand returnValue)
+ {
+ context.PrepareForReturn();
+ context.Add(Instruction.Return, null, returnValue);
+ }
+
+ public static Operand ShiftLeft(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.ShiftLeft, Local(), a, b);
+ }
+
+ public static Operand ShiftRightS32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.ShiftRightS32, Local(), a, b);
+ }
+
+ public static Operand ShiftRightU32(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.ShiftRightU32, Local(), a, b);
+ }
+
+ public static (Operand, Operand) Shuffle(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.Shuffle, (Local(), Local()), a, b, c);
+ }
+
+ public static (Operand, Operand) ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleDown, (Local(), Local()), a, b, c);
+ }
+
+ public static (Operand, Operand) ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleUp, (Local(), Local()), a, b, c);
+ }
+
+ public static (Operand, Operand) ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleXor, (Local(), Local()), a, b, c);
+ }
+
+ public static Operand Store(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand invocationId,
+ Operand value)
+ {
+ return invocationId != null
+ ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, value)
+ : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), value);
+ }
+
+ public static Operand Store(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand invocationId,
+ Operand elemIndex,
+ Operand value)
+ {
+ return invocationId != null
+ ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, elemIndex, value)
+ : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), elemIndex, value);
+ }
+
+ public static Operand Store(
+ this EmitterContext context,
+ StorageKind storageKind,
+ IoVariable ioVariable,
+ Operand invocationId,
+ Operand arrayIndex,
+ Operand elemIndex,
+ Operand value)
+ {
+ return invocationId != null
+ ? context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), invocationId, arrayIndex, elemIndex, value)
+ : context.Add(Instruction.Store, storageKind, null, Const((int)ioVariable), arrayIndex, elemIndex, value);
+ }
+
+ public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.StoreGlobal, null, a, b, c);
+ }
+
+ public static Operand StoreGlobal16(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.StoreGlobal16, null, a, b, c);
+ }
+
+ public static Operand StoreGlobal8(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.StoreGlobal8, null, a, b, c);
+ }
+
+ public static Operand StoreLocal(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.StoreLocal, null, a, b);
+ }
+
+ public static Operand StoreShared(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.StoreShared, null, a, b);
+ }
+
+ public static Operand StoreShared16(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.StoreShared16, null, a, b);
+ }
+
+ public static Operand StoreShared8(this EmitterContext context, Operand a, Operand b)
+ {
+ return context.Add(Instruction.StoreShared8, null, a, b);
+ }
+
+ public static Operand UnpackDouble2x32High(this EmitterContext context, Operand a)
+ {
+ return UnpackDouble2x32(context, a, 1);
+ }
+
+ public static Operand UnpackDouble2x32Low(this EmitterContext context, Operand a)
+ {
+ return UnpackDouble2x32(context, a, 0);
+ }
+
+ private static Operand UnpackDouble2x32(this EmitterContext context, Operand a, int index)
+ {
+ Operand dest = Local();
+
+ context.Add(new Operation(Instruction.UnpackDouble2x32, index, dest, a));
+
+ return dest;
+ }
+
+ public static Operand UnpackHalf2x16High(this EmitterContext context, Operand a)
+ {
+ return UnpackHalf2x16(context, a, 1);
+ }
+
+ public static Operand UnpackHalf2x16Low(this EmitterContext context, Operand a)
+ {
+ return UnpackHalf2x16(context, a, 0);
+ }
+
+ private static Operand UnpackHalf2x16(this EmitterContext context, Operand a, int index)
+ {
+ Operand dest = Local();
+
+ context.Add(new Operation(Instruction.UnpackHalf2x16, index, dest, a));
+
+ return dest;
+ }
+
+ public static Operand VoteAll(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.VoteAll, Local(), a);
+ }
+
+ public static Operand VoteAllEqual(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.VoteAllEqual, Local(), a);
+ }
+
+ public static Operand VoteAny(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.VoteAny, Local(), a);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs
new file mode 100644
index 00000000..c035f212
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs
@@ -0,0 +1,27 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ /// <summary>
+ /// Features used by the shader that are important for the code generator to know in advance.
+ /// These typically change the declarations in the shader header.
+ /// </summary>
+ [Flags]
+ public enum FeatureFlags
+ {
+ None = 0,
+
+ // Affected by resolution scaling.
+ IntegerSampling = 1 << 0,
+ FragCoordXY = 1 << 1,
+
+ Bindless = 1 << 2,
+ InstanceId = 1 << 3,
+ DrawParameters = 1 << 4,
+ RtLayer = 1 << 5,
+ CbIndexing = 1 << 6,
+ IaIndexing = 1 << 7,
+ OaIndexing = 1 << 8,
+ FixedFuncAttr = 1 << 9
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs b/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs
new file mode 100644
index 00000000..073e120a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/FunctionMatch.cs
@@ -0,0 +1,866 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using System;
+using System.Collections.Generic;
+using System.Runtime.CompilerServices;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class FunctionMatch
+ {
+ private static IPatternTreeNode[] _fsiGetAddressTree = PatternTrees.GetFsiGetAddress();
+ private static IPatternTreeNode[] _fsiGetAddressV2Tree = PatternTrees.GetFsiGetAddressV2();
+ private static IPatternTreeNode[] _fsiIsLastWarpThreadPatternTree = PatternTrees.GetFsiIsLastWarpThread();
+ private static IPatternTreeNode[] _fsiBeginPatternTree = PatternTrees.GetFsiBeginPattern();
+ private static IPatternTreeNode[] _fsiEndPatternTree = PatternTrees.GetFsiEndPattern();
+
+ public static void RunPass(DecodedProgram program)
+ {
+ byte[] externalRegs = new byte[4];
+ bool hasGetAddress = false;
+
+ foreach (DecodedFunction function in program)
+ {
+ if (function == program.MainFunction)
+ {
+ continue;
+ }
+
+ int externalReg4 = 0;
+
+ TreeNode[] functionTree = BuildTree(function.Blocks);
+
+ if (Matches(_fsiGetAddressTree, functionTree))
+ {
+ externalRegs[1] = functionTree[0].GetRd();
+ externalRegs[2] = functionTree[2].GetRd();
+ externalRegs[3] = functionTree[1].GetRd();
+ externalReg4 = functionTree[3].GetRd();
+ }
+ else if (Matches(_fsiGetAddressV2Tree, functionTree))
+ {
+ externalRegs[1] = functionTree[2].GetRd();
+ externalRegs[2] = functionTree[1].GetRd();
+ externalRegs[3] = functionTree[0].GetRd();
+ externalReg4 = functionTree[3].GetRd();
+ }
+
+ // Ensure the register allocation is valid.
+ // If so, then we have a match.
+ if (externalRegs[1] != externalRegs[2] &&
+ externalRegs[2] != externalRegs[3] &&
+ externalRegs[1] != externalRegs[3] &&
+ externalRegs[1] + 1 != externalRegs[2] &&
+ externalRegs[1] + 1 != externalRegs[3] &&
+ externalRegs[1] + 1 == externalReg4 &&
+ externalRegs[2] != RegisterConsts.RegisterZeroIndex &&
+ externalRegs[3] != RegisterConsts.RegisterZeroIndex &&
+ externalReg4 != RegisterConsts.RegisterZeroIndex)
+ {
+ hasGetAddress = true;
+ function.Type = FunctionType.Unused;
+ break;
+ }
+ }
+
+ foreach (DecodedFunction function in program)
+ {
+ if (function.IsCompilerGenerated || function == program.MainFunction)
+ {
+ continue;
+ }
+
+ if (hasGetAddress)
+ {
+ TreeNode[] functionTree = BuildTree(function.Blocks);
+
+ if (MatchesFsi(_fsiBeginPatternTree, program, function, functionTree, externalRegs))
+ {
+ function.Type = FunctionType.BuiltInFSIBegin;
+ continue;
+ }
+ else if (MatchesFsi(_fsiEndPatternTree, program, function, functionTree, externalRegs))
+ {
+ function.Type = FunctionType.BuiltInFSIEnd;
+ continue;
+ }
+ }
+ }
+ }
+
+ private readonly struct TreeNodeUse
+ {
+ public TreeNode Node { get; }
+ public int Index { get; }
+ public bool Inverted { get; }
+
+ private TreeNodeUse(int index, bool inverted, TreeNode node)
+ {
+ Index = index;
+ Inverted = inverted;
+ Node = node;
+ }
+
+ public TreeNodeUse(int index, TreeNode node) : this(index, false, node)
+ {
+ }
+
+ public TreeNodeUse Flip()
+ {
+ return new TreeNodeUse(Index, !Inverted, Node);
+ }
+ }
+
+ private enum TreeNodeType : byte
+ {
+ Op,
+ Label
+ }
+
+ private class TreeNode
+ {
+ public readonly InstOp Op;
+ public readonly List<TreeNodeUse> Uses;
+ public TreeNodeType Type { get; }
+ public byte Order { get; }
+
+ public TreeNode(byte order)
+ {
+ Type = TreeNodeType.Label;
+ Order = order;
+ }
+
+ public TreeNode(InstOp op, byte order)
+ {
+ Op = op;
+ Uses = new List<TreeNodeUse>();
+ Type = TreeNodeType.Op;
+ Order = order;
+ }
+
+ public byte GetPd()
+ {
+ return (byte)((Op.RawOpCode >> 3) & 7);
+ }
+
+ public byte GetRd()
+ {
+ return (byte)Op.RawOpCode;
+ }
+ }
+
+ private static TreeNode[] BuildTree(Block[] blocks)
+ {
+ List<TreeNode> nodes = new List<TreeNode>();
+
+ Dictionary<ulong, TreeNode> labels = new Dictionary<ulong, TreeNode>();
+
+ TreeNodeUse[] predDefs = new TreeNodeUse[RegisterConsts.PredsCount];
+ TreeNodeUse[] gprDefs = new TreeNodeUse[RegisterConsts.GprsCount];
+
+ void DefPred(byte predIndex, int index, TreeNode node)
+ {
+ if (predIndex != RegisterConsts.PredicateTrueIndex)
+ {
+ predDefs[predIndex] = new TreeNodeUse(index, node);
+ }
+ }
+
+ void DefGpr(byte regIndex, int index, TreeNode node)
+ {
+ if (regIndex != RegisterConsts.RegisterZeroIndex)
+ {
+ gprDefs[regIndex] = new TreeNodeUse(index, node);
+ }
+ }
+
+ TreeNodeUse UsePred(byte predIndex, bool predInv)
+ {
+ if (predIndex != RegisterConsts.PredicateTrueIndex)
+ {
+ TreeNodeUse use = predDefs[predIndex];
+
+ if (use.Node != null)
+ {
+ nodes.Remove(use.Node);
+ }
+ else
+ {
+ use = new TreeNodeUse(-(predIndex + 2), null);
+ }
+
+ return predInv ? use.Flip() : use;
+ }
+
+ return new TreeNodeUse(-1, null);
+ }
+
+ TreeNodeUse UseGpr(byte regIndex)
+ {
+ if (regIndex != RegisterConsts.RegisterZeroIndex)
+ {
+ TreeNodeUse use = gprDefs[regIndex];
+
+ if (use.Node != null)
+ {
+ nodes.Remove(use.Node);
+ }
+ else
+ {
+ use = new TreeNodeUse(-(regIndex + 2), null);
+ }
+
+ return use;
+ }
+
+ return new TreeNodeUse(-1, null);
+ }
+
+ byte order = 0;
+
+ for (int index = 0; index < blocks.Length; index++)
+ {
+ Block block = blocks[index];
+
+ if (block.Predecessors.Count > 1)
+ {
+ TreeNode label = new TreeNode(order++);
+ nodes.Add(label);
+ labels.Add(block.Address, label);
+ }
+
+ for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
+ {
+ InstOp op = block.OpCodes[opIndex];
+
+ TreeNode node = new TreeNode(op, IsOrderDependant(op.Name) ? order : (byte)0);
+
+ // Add uses.
+
+ if (!op.Props.HasFlag(InstProps.NoPred))
+ {
+ byte predIndex = (byte)((op.RawOpCode >> 16) & 7);
+ bool predInv = (op.RawOpCode & 0x80000) != 0;
+ node.Uses.Add(UsePred(predIndex, predInv));
+ }
+
+ if (op.Props.HasFlag(InstProps.Ps))
+ {
+ byte predIndex = (byte)((op.RawOpCode >> 39) & 7);
+ bool predInv = (op.RawOpCode & 0x40000000000) != 0;
+ node.Uses.Add(UsePred(predIndex, predInv));
+ }
+
+ if (op.Props.HasFlag(InstProps.Ra))
+ {
+ byte ra = (byte)(op.RawOpCode >> 8);
+ node.Uses.Add(UseGpr(ra));
+ }
+
+ if ((op.Props & (InstProps.Rb | InstProps.Rb2)) != 0)
+ {
+ byte rb = op.Props.HasFlag(InstProps.Rb2) ? (byte)op.RawOpCode : (byte)(op.RawOpCode >> 20);
+ node.Uses.Add(UseGpr(rb));
+ }
+
+ if (op.Props.HasFlag(InstProps.Rc))
+ {
+ byte rc = (byte)(op.RawOpCode >> 39);
+ node.Uses.Add(UseGpr(rc));
+ }
+
+ if (op.Name == InstName.Bra && labels.TryGetValue(op.GetAbsoluteAddress(), out TreeNode label))
+ {
+ node.Uses.Add(new TreeNodeUse(0, label));
+ }
+
+ // Make definitions.
+
+ int defIndex = 0;
+
+ InstProps pdType = op.Props & InstProps.PdMask;
+
+ if (pdType != 0)
+ {
+ int bit = pdType switch
+ {
+ InstProps.Pd => 3,
+ InstProps.LPd => 48,
+ InstProps.SPd => 30,
+ InstProps.TPd => 51,
+ InstProps.VPd => 45,
+ _ => throw new InvalidOperationException($"Table has unknown predicate destination {pdType}.")
+ };
+
+ byte predIndex = (byte)((op.RawOpCode >> bit) & 7);
+ DefPred(predIndex, defIndex++, node);
+ }
+
+ if (op.Props.HasFlag(InstProps.Rd))
+ {
+ byte rd = (byte)op.RawOpCode;
+ DefGpr(rd, defIndex++, node);
+ }
+
+ nodes.Add(node);
+ }
+ }
+
+ return nodes.ToArray();
+ }
+
+ private static bool IsOrderDependant(InstName name)
+ {
+ switch (name)
+ {
+ case InstName.Atom:
+ case InstName.AtomCas:
+ case InstName.Atoms:
+ case InstName.AtomsCas:
+ case InstName.Ld:
+ case InstName.Ldg:
+ case InstName.Ldl:
+ case InstName.Lds:
+ case InstName.Suatom:
+ case InstName.SuatomB:
+ case InstName.SuatomB2:
+ case InstName.SuatomCas:
+ case InstName.SuatomCasB:
+ case InstName.Suld:
+ case InstName.SuldB:
+ case InstName.SuldD:
+ case InstName.SuldDB:
+ return true;
+ }
+
+ return false;
+ }
+
+ private interface IPatternTreeNode
+ {
+ List<PatternTreeNodeUse> Uses { get; }
+ InstName Name { get; }
+ TreeNodeType Type { get; }
+ byte Order { get; }
+ bool IsImm { get; }
+ bool Matches(in InstOp opInfo);
+ }
+
+ private readonly struct PatternTreeNodeUse
+ {
+ public IPatternTreeNode Node { get; }
+ public int Index { get; }
+ public bool Inverted { get; }
+ public PatternTreeNodeUse Inv => new PatternTreeNodeUse(Index, !Inverted, Node);
+
+ private PatternTreeNodeUse(int index, bool inverted, IPatternTreeNode node)
+ {
+ Index = index;
+ Inverted = inverted;
+ Node = node;
+ }
+
+ public PatternTreeNodeUse(int index, IPatternTreeNode node) : this(index, false, node)
+ {
+ }
+ }
+
+ private class PatternTreeNode<T> : IPatternTreeNode
+ {
+ public List<PatternTreeNodeUse> Uses { get; }
+ private readonly Func<T, bool> _match;
+
+ public InstName Name { get; }
+ public TreeNodeType Type { get; }
+ public byte Order { get; }
+ public bool IsImm { get; }
+ public PatternTreeNodeUse Out => new PatternTreeNodeUse(0, this);
+
+ public PatternTreeNode(InstName name, Func<T, bool> match, TreeNodeType type = TreeNodeType.Op, byte order = 0, bool isImm = false)
+ {
+ Name = name;
+ _match = match;
+ Type = type;
+ Order = order;
+ IsImm = isImm;
+ Uses = new List<PatternTreeNodeUse>();
+ }
+
+ public PatternTreeNode<T> Use(PatternTreeNodeUse use)
+ {
+ Uses.Add(use);
+ return this;
+ }
+
+ public PatternTreeNodeUse OutAt(int index)
+ {
+ return new PatternTreeNodeUse(index, this);
+ }
+
+ public bool Matches(in InstOp opInfo)
+ {
+ if (opInfo.Name != Name)
+ {
+ return false;
+ }
+
+ ulong rawOp = opInfo.RawOpCode;
+ T op = Unsafe.As<ulong, T>(ref rawOp);
+
+ if (!_match(op))
+ {
+ return false;
+ }
+
+ return true;
+ }
+ }
+
+ private static bool MatchesFsi(
+ IPatternTreeNode[] pattern,
+ DecodedProgram program,
+ DecodedFunction function,
+ TreeNode[] functionTree,
+ byte[] externalRegs)
+ {
+ if (function.Blocks.Length == 0)
+ {
+ return false;
+ }
+
+ InstOp callOp = function.Blocks[0].GetLastOp();
+
+ if (callOp.Name != InstName.Cal)
+ {
+ return false;
+ }
+
+ DecodedFunction callTarget = program.GetFunctionByAddress(callOp.GetAbsoluteAddress());
+ TreeNode[] callTargetTree = null;
+
+ if (callTarget == null || !Matches(_fsiIsLastWarpThreadPatternTree, callTargetTree = BuildTree(callTarget.Blocks)))
+ {
+ return false;
+ }
+
+ externalRegs[0] = callTargetTree[0].GetPd();
+
+ if (Matches(pattern, functionTree, externalRegs))
+ {
+ callTarget.RemoveCaller(function);
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool Matches(IPatternTreeNode[] pTree, TreeNode[] cTree, byte[] externalRegs = null)
+ {
+ if (pTree.Length != cTree.Length)
+ {
+ return false;
+ }
+
+ for (int index = 0; index < pTree.Length; index++)
+ {
+ if (!Matches(pTree[index], cTree[index], externalRegs))
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static bool Matches(IPatternTreeNode pTreeNode, TreeNode cTreeNode, byte[] externalRegs)
+ {
+ if (!pTreeNode.Matches(in cTreeNode.Op) ||
+ pTreeNode.Type != cTreeNode.Type ||
+ pTreeNode.Order != cTreeNode.Order ||
+ pTreeNode.IsImm != cTreeNode.Op.Props.HasFlag(InstProps.Ib))
+ {
+ return false;
+ }
+
+ if (pTreeNode.Type == TreeNodeType.Op)
+ {
+ if (pTreeNode.Uses.Count != cTreeNode.Uses.Count)
+ {
+ return false;
+ }
+
+ for (int index = 0; index < pTreeNode.Uses.Count; index++)
+ {
+ var pUse = pTreeNode.Uses[index];
+ var cUse = cTreeNode.Uses[index];
+
+ if (pUse.Index <= -2)
+ {
+ if (externalRegs[-pUse.Index - 2] != (-cUse.Index - 2))
+ {
+ return false;
+ }
+ }
+ else if (pUse.Index != cUse.Index)
+ {
+ return false;
+ }
+
+ if (pUse.Inverted != cUse.Inverted || (pUse.Node == null) != (cUse.Node == null))
+ {
+ return false;
+ }
+
+ if (pUse.Node != null && !Matches(pUse.Node, cUse.Node, externalRegs))
+ {
+ return false;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private static class PatternTrees
+ {
+ public static IPatternTreeNode[] GetFsiGetAddress()
+ {
+ var affinityValue = S2r(SReg.Affinity).Use(PT).Out;
+ var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out;
+
+ return new IPatternTreeNode[]
+ {
+ Iscadd(cc: true, 2, 0, 404)
+ .Use(PT)
+ .Use(Iscadd(cc: false, 8)
+ .Use(PT)
+ .Use(Lop32i(LogicOp.And, 0xff)
+ .Use(PT)
+ .Use(affinityValue).Out)
+ .Use(Lop32i(LogicOp.And, 0xff)
+ .Use(PT)
+ .Use(orderingTicketValue).Out).Out),
+ ShrU32W(16)
+ .Use(PT)
+ .Use(orderingTicketValue),
+ Iadd32i(0x200)
+ .Use(PT)
+ .Use(Lop32i(LogicOp.And, 0xfe00)
+ .Use(PT)
+ .Use(orderingTicketValue).Out),
+ Iadd(x: true, 0, 405).Use(PT).Use(RZ),
+ Ret().Use(PT)
+ };
+ }
+
+ public static IPatternTreeNode[] GetFsiGetAddressV2()
+ {
+ var affinityValue = S2r(SReg.Affinity).Use(PT).Out;
+ var orderingTicketValue = S2r(SReg.OrderingTicket).Use(PT).Out;
+
+ return new IPatternTreeNode[]
+ {
+ ShrU32W(16)
+ .Use(PT)
+ .Use(orderingTicketValue),
+ Iadd32i(0x200)
+ .Use(PT)
+ .Use(Lop32i(LogicOp.And, 0xfe00)
+ .Use(PT)
+ .Use(orderingTicketValue).Out),
+ Iscadd(cc: true, 2, 0, 404)
+ .Use(PT)
+ .Use(Bfi(0x808)
+ .Use(PT)
+ .Use(affinityValue)
+ .Use(Lop32i(LogicOp.And, 0xff)
+ .Use(PT)
+ .Use(orderingTicketValue).Out).Out),
+ Iadd(x: true, 0, 405).Use(PT).Use(RZ),
+ Ret().Use(PT)
+ };
+ }
+
+ public static IPatternTreeNode[] GetFsiIsLastWarpThread()
+ {
+ var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out;
+ var laneIdValue = S2r(SReg.LaneId).Use(PT).Out;
+
+ return new IPatternTreeNode[]
+ {
+ IsetpU32(IComp.Eq)
+ .Use(PT)
+ .Use(PT)
+ .Use(FloU32()
+ .Use(PT)
+ .Use(Vote(VoteMode.Any)
+ .Use(PT)
+ .Use(IsetpU32(IComp.Ne)
+ .Use(PT)
+ .Use(PT)
+ .Use(Lop(negB: true, LogicOp.PassB)
+ .Use(PT)
+ .Use(RZ)
+ .Use(threadKillValue).OutAt(1))
+ .Use(RZ).Out).OutAt(1)).Out)
+ .Use(laneIdValue),
+ Ret().Use(PT)
+ };
+ }
+
+ public static IPatternTreeNode[] GetFsiBeginPattern()
+ {
+ var addressLowValue = CallArg(1);
+
+ static PatternTreeNodeUse HighU16Equals(PatternTreeNodeUse x)
+ {
+ var expectedValue = CallArg(3);
+
+ return IsetpU32(IComp.Eq)
+ .Use(PT)
+ .Use(PT)
+ .Use(ShrU32W(16).Use(PT).Use(x).Out)
+ .Use(expectedValue).Out;
+ }
+
+ PatternTreeNode<byte> label;
+
+ return new IPatternTreeNode[]
+ {
+ Cal(),
+ Ret().Use(CallArg(0).Inv),
+ Ret()
+ .Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32)
+ .Use(PT)
+ .Use(addressLowValue).Out)),
+ label = Label(),
+ Bra()
+ .Use(HighU16Equals(LdgE(CacheOpLd.Cg, LsSize.B32, 1)
+ .Use(PT)
+ .Use(addressLowValue).Out).Inv)
+ .Use(label.Out),
+ Ret().Use(PT)
+ };
+ }
+
+ public static IPatternTreeNode[] GetFsiEndPattern()
+ {
+ var voteResult = Vote(VoteMode.All).Use(PT).Use(PT).OutAt(1);
+ var popcResult = Popc().Use(PT).Use(voteResult).Out;
+ var threadKillValue = S2r(SReg.ThreadKill).Use(PT).Out;
+ var laneIdValue = S2r(SReg.LaneId).Use(PT).Out;
+
+ var addressLowValue = CallArg(1);
+ var incrementValue = CallArg(2);
+
+ return new IPatternTreeNode[]
+ {
+ Cal(),
+ Ret().Use(CallArg(0).Inv),
+ Membar(Decoders.Membar.Vc).Use(PT),
+ Ret().Use(IsetpU32(IComp.Ne)
+ .Use(PT)
+ .Use(PT)
+ .Use(threadKillValue)
+ .Use(RZ).Out),
+ RedE(RedOp.Add, AtomSize.U32)
+ .Use(IsetpU32(IComp.Eq)
+ .Use(PT)
+ .Use(PT)
+ .Use(FloU32()
+ .Use(PT)
+ .Use(voteResult).Out)
+ .Use(laneIdValue).Out)
+ .Use(addressLowValue)
+ .Use(Xmad(XmadCop.Cbcc, psl: true, hiloA: true, hiloB: true)
+ .Use(PT)
+ .Use(incrementValue)
+ .Use(Xmad(XmadCop.Cfull, mrg: true, hiloB: true)
+ .Use(PT)
+ .Use(incrementValue)
+ .Use(popcResult)
+ .Use(RZ).Out)
+ .Use(Xmad(XmadCop.Cfull)
+ .Use(PT)
+ .Use(incrementValue)
+ .Use(popcResult)
+ .Use(RZ).Out).Out),
+ Ret().Use(PT)
+ };
+ }
+
+ private static PatternTreeNode<InstBfiI> Bfi(int imm)
+ {
+ return new(InstName.Bfi, (op) => !op.WriteCC && op.Imm20 == imm, isImm: true);
+ }
+
+ private static PatternTreeNode<InstBra> Bra()
+ {
+ return new(InstName.Bra, (op) => op.Ccc == Ccc.T && !op.Ca);
+ }
+
+ private static PatternTreeNode<InstCal> Cal()
+ {
+ return new(InstName.Cal, (op) => !op.Ca && op.Inc);
+ }
+
+ private static PatternTreeNode<InstFloR> FloU32()
+ {
+ return new(InstName.Flo, (op) => !op.Signed && !op.Sh && !op.NegB && !op.WriteCC);
+ }
+
+ private static PatternTreeNode<InstIaddC> Iadd(bool x, int cbufSlot, int cbufOffset)
+ {
+ return new(InstName.Iadd, (op) =>
+ !op.Sat &&
+ !op.WriteCC &&
+ op.X == x &&
+ op.AvgMode == AvgMode.NoNeg &&
+ op.CbufSlot == cbufSlot &&
+ op.CbufOffset == cbufOffset);
+ }
+
+ private static PatternTreeNode<InstIadd32i> Iadd32i(int imm)
+ {
+ return new(InstName.Iadd32i, (op) => !op.Sat && !op.WriteCC && !op.X && op.AvgMode == AvgMode.NoNeg && op.Imm32 == imm);
+ }
+
+ private static PatternTreeNode<InstIscaddR> Iscadd(bool cc, int imm)
+ {
+ return new(InstName.Iscadd, (op) => op.WriteCC == cc && op.AvgMode == AvgMode.NoNeg && op.Imm5 == imm);
+ }
+
+ private static PatternTreeNode<InstIscaddC> Iscadd(bool cc, int imm, int cbufSlot, int cbufOffset)
+ {
+ return new(InstName.Iscadd, (op) =>
+ op.WriteCC == cc &&
+ op.AvgMode == AvgMode.NoNeg &&
+ op.Imm5 == imm &&
+ op.CbufSlot == cbufSlot &&
+ op.CbufOffset == cbufOffset);
+ }
+
+ private static PatternTreeNode<InstIsetpR> IsetpU32(IComp comp)
+ {
+ return new(InstName.Isetp, (op) => !op.Signed && op.IComp == comp && op.Bop == BoolOp.And);
+ }
+
+ private static PatternTreeNode<byte> Label()
+ {
+ return new(InstName.Invalid, (op) => true, type: TreeNodeType.Label);
+ }
+
+ private static PatternTreeNode<InstLopR> Lop(bool negB, LogicOp logicOp)
+ {
+ return new(InstName.Lop, (op) => !op.NegA && op.NegB == negB && !op.WriteCC && !op.X && op.Lop == logicOp && op.PredicateOp == PredicateOp.F);
+ }
+
+ private static PatternTreeNode<InstLop32i> Lop32i(LogicOp logicOp, int imm)
+ {
+ return new(InstName.Lop32i, (op) => !op.NegA && !op.NegB && !op.X && !op.WriteCC && op.LogicOp == logicOp && op.Imm32 == imm);
+ }
+
+ private static PatternTreeNode<InstMembar> Membar(Membar membar)
+ {
+ return new(InstName.Membar, (op) => op.Membar == membar);
+ }
+
+ private static PatternTreeNode<InstPopcR> Popc()
+ {
+ return new(InstName.Popc, (op) => !op.NegB);
+ }
+
+ private static PatternTreeNode<InstRet> Ret()
+ {
+ return new(InstName.Ret, (op) => op.Ccc == Ccc.T);
+ }
+
+ private static PatternTreeNode<InstS2r> S2r(SReg reg)
+ {
+ return new(InstName.S2r, (op) => op.SReg == reg);
+ }
+
+ private static PatternTreeNode<InstShrI> ShrU32W(int imm)
+ {
+ return new(InstName.Shr, (op) => !op.Signed && !op.Brev && op.M && op.XMode == 0 && op.Imm20 == imm, isImm: true);
+ }
+
+ private static PatternTreeNode<InstLdg> LdgE(CacheOpLd cacheOp, LsSize size, byte order = 0)
+ {
+ return new(InstName.Ldg, (op) => op.E && op.CacheOp == cacheOp && op.LsSize == size, order: order);
+ }
+
+ private static PatternTreeNode<InstRed> RedE(RedOp redOp, AtomSize size, byte order = 0)
+ {
+ return new(InstName.Red, (op) => op.E && op.RedOp == redOp && op.RedSize == size, order: order);
+ }
+
+ private static PatternTreeNode<InstVote> Vote(VoteMode mode)
+ {
+ return new(InstName.Vote, (op) => op.VoteMode == mode);
+ }
+
+ private static PatternTreeNode<InstXmadR> Xmad(XmadCop cop, bool psl = false, bool mrg = false, bool hiloA = false, bool hiloB = false)
+ {
+ return new(InstName.Xmad, (op) => op.XmadCop == cop && op.Psl == psl && op.Mrg == mrg && op.HiloA == hiloA && op.HiloB == hiloB);
+ }
+
+ private static PatternTreeNodeUse PT => PTOrRZ();
+ private static PatternTreeNodeUse RZ => PTOrRZ();
+ private static PatternTreeNodeUse Undef => new PatternTreeNodeUse(0, null);
+
+ private static PatternTreeNodeUse CallArg(int index)
+ {
+ return new PatternTreeNodeUse(-(index + 2), null);
+ }
+
+ private static PatternTreeNodeUse PTOrRZ()
+ {
+ return new PatternTreeNodeUse(-1, null);
+ }
+ }
+
+ private static void PrintTreeNode(TreeNode node, string indentation)
+ {
+ Console.WriteLine($" {node.Op.Name}");
+
+ for (int i = 0; i < node.Uses.Count; i++)
+ {
+ TreeNodeUse use = node.Uses[i];
+ bool last = i == node.Uses.Count - 1;
+ char separator = last ? '`' : '|';
+
+ if (use.Node != null)
+ {
+ Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})");
+ PrintTreeNode(use.Node, indentation + (last ? " " : " | "));
+ }
+ else
+ {
+ Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL");
+ }
+ }
+ }
+
+ private static void PrintTreeNode(IPatternTreeNode node, string indentation)
+ {
+ Console.WriteLine($" {node.Name}");
+
+ for (int i = 0; i < node.Uses.Count; i++)
+ {
+ PatternTreeNodeUse use = node.Uses[i];
+ bool last = i == node.Uses.Count - 1;
+ char separator = last ? '`' : '|';
+
+ if (use.Node != null)
+ {
+ Console.Write($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index})");
+ PrintTreeNode(use.Node, indentation + (last ? " " : " | "));
+ }
+ else
+ {
+ Console.WriteLine($"{indentation} {separator}- ({(use.Inverted ? "INV " : "")}{use.Index}) NULL");
+ }
+ }
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
new file mode 100644
index 00000000..774a128d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/GlobalMemory.cs
@@ -0,0 +1,52 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class GlobalMemory
+ {
+ private const int StorageDescsBaseOffset = 0x44; // In words.
+
+ public const int StorageDescSize = 4; // In words.
+ public const int StorageMaxCount = 16;
+
+ public const int StorageDescsSize = StorageDescSize * StorageMaxCount;
+
+ public const int UbeBaseOffset = 0x98; // In words.
+ public const int UbeMaxCount = 9;
+ public const int UbeDescsSize = StorageDescSize * UbeMaxCount;
+ public const int UbeFirstCbuf = 8;
+
+ public static bool UsesGlobalMemory(Instruction inst, StorageKind storageKind)
+ {
+ return (inst.IsAtomic() && storageKind == StorageKind.GlobalMemory) ||
+ inst == Instruction.LoadGlobal ||
+ inst == Instruction.StoreGlobal ||
+ inst == Instruction.StoreGlobal16 ||
+ inst == Instruction.StoreGlobal8;
+ }
+
+ public static int GetStorageCbOffset(ShaderStage stage, int slot)
+ {
+ return GetStorageBaseCbOffset(stage) + slot * StorageDescSize;
+ }
+
+ public static int GetStorageBaseCbOffset(ShaderStage stage)
+ {
+ return stage switch
+ {
+ ShaderStage.Compute => StorageDescsBaseOffset + 2 * StorageDescsSize,
+ ShaderStage.Vertex => StorageDescsBaseOffset,
+ ShaderStage.TessellationControl => StorageDescsBaseOffset + 1 * StorageDescsSize,
+ ShaderStage.TessellationEvaluation => StorageDescsBaseOffset + 2 * StorageDescsSize,
+ ShaderStage.Geometry => StorageDescsBaseOffset + 3 * StorageDescsSize,
+ ShaderStage.Fragment => StorageDescsBaseOffset + 4 * StorageDescsSize,
+ _ => 0
+ };
+ }
+
+ public static int GetConstantUbeOffset(int slot)
+ {
+ return UbeBaseOffset + slot * StorageDescSize;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs
new file mode 100644
index 00000000..0c196c4d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessElimination.cs
@@ -0,0 +1,263 @@
+using Ryujinx.Graphics.Shader.Instructions;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ class BindlessElimination
+ {
+ public static void RunPass(BasicBlock block, ShaderConfig config)
+ {
+ // We can turn a bindless into regular access by recognizing the pattern
+ // produced by the compiler for separate texture and sampler.
+ // We check for the following conditions:
+ // - The handle is a constant buffer value.
+ // - The handle is the result of a bitwise OR logical operation.
+ // - Both sources of the OR operation comes from a constant buffer.
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ if (!(node.Value is TextureOperation texOp))
+ {
+ continue;
+ }
+
+ if ((texOp.Flags & TextureFlags.Bindless) == 0)
+ {
+ continue;
+ }
+
+ if (texOp.Inst == Instruction.Lod ||
+ texOp.Inst == Instruction.TextureSample ||
+ texOp.Inst == Instruction.TextureSize)
+ {
+ Operand bindlessHandle = Utils.FindLastOperation(texOp.GetSource(0), block);
+
+ // Some instructions do not encode an accurate sampler type:
+ // - Most instructions uses the same type for 1D and Buffer.
+ // - Query instructions may not have any type.
+ // For those cases, we need to try getting the type from current GPU state,
+ // as long bindless elimination is successful and we know where the texture descriptor is located.
+ bool rewriteSamplerType =
+ texOp.Type == SamplerType.TextureBuffer ||
+ texOp.Inst == Instruction.TextureSize;
+
+ if (bindlessHandle.Type == OperandType.ConstantBuffer)
+ {
+ SetHandle(config, texOp, bindlessHandle.GetCbufOffset(), bindlessHandle.GetCbufSlot(), rewriteSamplerType, isImage: false);
+ continue;
+ }
+
+ if (!(bindlessHandle.AsgOp is Operation handleCombineOp))
+ {
+ continue;
+ }
+
+ if (handleCombineOp.Inst != Instruction.BitwiseOr)
+ {
+ continue;
+ }
+
+ Operand src0 = Utils.FindLastOperation(handleCombineOp.GetSource(0), block);
+ Operand src1 = Utils.FindLastOperation(handleCombineOp.GetSource(1), block);
+
+ // For cases where we have a constant, ensure that the constant is always
+ // the second operand.
+ // Since this is a commutative operation, both are fine,
+ // and having a "canonical" representation simplifies some checks below.
+ if (src0.Type == OperandType.Constant && src1.Type != OperandType.Constant)
+ {
+ Operand temp = src1;
+ src1 = src0;
+ src0 = temp;
+ }
+
+ TextureHandleType handleType = TextureHandleType.SeparateSamplerHandle;
+
+ // Try to match the following patterns:
+ // Masked pattern:
+ // - samplerHandle = samplerHandle & 0xFFF00000;
+ // - textureHandle = textureHandle & 0xFFFFF;
+ // - combinedHandle = samplerHandle | textureHandle;
+ // Where samplerHandle and textureHandle comes from a constant buffer.
+ // Shifted pattern:
+ // - samplerHandle = samplerId << 20;
+ // - combinedHandle = samplerHandle | textureHandle;
+ // Where samplerId and textureHandle comes from a constant buffer.
+ // Constant pattern:
+ // - combinedHandle = samplerHandleConstant | textureHandle;
+ // Where samplerHandleConstant is a constant value, and textureHandle comes from a constant buffer.
+ if (src0.AsgOp is Operation src0AsgOp)
+ {
+ if (src1.AsgOp is Operation src1AsgOp &&
+ src0AsgOp.Inst == Instruction.BitwiseAnd &&
+ src1AsgOp.Inst == Instruction.BitwiseAnd)
+ {
+ src0 = GetSourceForMaskedHandle(src0AsgOp, 0xFFFFF);
+ src1 = GetSourceForMaskedHandle(src1AsgOp, 0xFFF00000);
+
+ // The OR operation is commutative, so we can also try to swap the operands to get a match.
+ if (src0 == null || src1 == null)
+ {
+ src0 = GetSourceForMaskedHandle(src1AsgOp, 0xFFFFF);
+ src1 = GetSourceForMaskedHandle(src0AsgOp, 0xFFF00000);
+ }
+
+ if (src0 == null || src1 == null)
+ {
+ continue;
+ }
+ }
+ else if (src0AsgOp.Inst == Instruction.ShiftLeft)
+ {
+ Operand shift = src0AsgOp.GetSource(1);
+
+ if (shift.Type == OperandType.Constant && shift.Value == 20)
+ {
+ src0 = src1;
+ src1 = src0AsgOp.GetSource(0);
+ handleType = TextureHandleType.SeparateSamplerId;
+ }
+ }
+ }
+ else if (src1.AsgOp is Operation src1AsgOp && src1AsgOp.Inst == Instruction.ShiftLeft)
+ {
+ Operand shift = src1AsgOp.GetSource(1);
+
+ if (shift.Type == OperandType.Constant && shift.Value == 20)
+ {
+ src1 = src1AsgOp.GetSource(0);
+ handleType = TextureHandleType.SeparateSamplerId;
+ }
+ }
+ else if (src1.Type == OperandType.Constant && (src1.Value & 0xfffff) == 0)
+ {
+ handleType = TextureHandleType.SeparateConstantSamplerHandle;
+ }
+
+ if (src0.Type != OperandType.ConstantBuffer)
+ {
+ continue;
+ }
+
+ if (handleType == TextureHandleType.SeparateConstantSamplerHandle)
+ {
+ SetHandle(
+ config,
+ texOp,
+ TextureHandle.PackOffsets(src0.GetCbufOffset(), ((src1.Value >> 20) & 0xfff), handleType),
+ TextureHandle.PackSlots(src0.GetCbufSlot(), 0),
+ rewriteSamplerType,
+ isImage: false);
+ }
+ else if (src1.Type == OperandType.ConstantBuffer)
+ {
+ SetHandle(
+ config,
+ texOp,
+ TextureHandle.PackOffsets(src0.GetCbufOffset(), src1.GetCbufOffset(), handleType),
+ TextureHandle.PackSlots(src0.GetCbufSlot(), src1.GetCbufSlot()),
+ rewriteSamplerType,
+ isImage: false);
+ }
+ }
+ else if (texOp.Inst == Instruction.ImageLoad ||
+ texOp.Inst == Instruction.ImageStore ||
+ texOp.Inst == Instruction.ImageAtomic)
+ {
+ Operand src0 = Utils.FindLastOperation(texOp.GetSource(0), block);
+
+ if (src0.Type == OperandType.ConstantBuffer)
+ {
+ int cbufOffset = src0.GetCbufOffset();
+ int cbufSlot = src0.GetCbufSlot();
+
+ if (texOp.Format == TextureFormat.Unknown)
+ {
+ if (texOp.Inst == Instruction.ImageAtomic)
+ {
+ texOp.Format = config.GetTextureFormatAtomic(cbufOffset, cbufSlot);
+ }
+ else
+ {
+ texOp.Format = config.GetTextureFormat(cbufOffset, cbufSlot);
+ }
+ }
+
+ bool rewriteSamplerType = texOp.Type == SamplerType.TextureBuffer;
+
+ SetHandle(config, texOp, cbufOffset, cbufSlot, rewriteSamplerType, isImage: true);
+ }
+ }
+ }
+ }
+
+ private static Operand GetSourceForMaskedHandle(Operation asgOp, uint mask)
+ {
+ // Assume it was already checked that the operation is bitwise AND.
+ Operand src0 = asgOp.GetSource(0);
+ Operand src1 = asgOp.GetSource(1);
+
+ if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.ConstantBuffer)
+ {
+ // We can't check if the mask matches here as both operands are from a constant buffer.
+ // Be optimistic and assume it matches. Avoid constant buffer 1 as official drivers
+ // uses this one to store compiler constants.
+ return src0.GetCbufSlot() == 1 ? src1 : src0;
+ }
+ else if (src0.Type == OperandType.ConstantBuffer && src1.Type == OperandType.Constant)
+ {
+ if ((uint)src1.Value == mask)
+ {
+ return src0;
+ }
+ }
+ else if (src0.Type == OperandType.Constant && src1.Type == OperandType.ConstantBuffer)
+ {
+ if ((uint)src0.Value == mask)
+ {
+ return src1;
+ }
+ }
+
+ return null;
+ }
+
+ private static void SetHandle(ShaderConfig config, TextureOperation texOp, int cbufOffset, int cbufSlot, bool rewriteSamplerType, bool isImage)
+ {
+ texOp.SetHandle(cbufOffset, cbufSlot);
+
+ if (rewriteSamplerType)
+ {
+ SamplerType newType = config.GpuAccessor.QuerySamplerType(cbufOffset, cbufSlot);
+
+ if (texOp.Inst.IsTextureQuery())
+ {
+ texOp.Type = newType;
+ }
+ else if (texOp.Type == SamplerType.TextureBuffer && newType == SamplerType.Texture1D)
+ {
+ int coordsCount = 1;
+
+ if (InstEmit.Sample1DAs2D)
+ {
+ newType = SamplerType.Texture2D;
+ texOp.InsertSource(coordsCount++, OperandHelper.Const(0));
+ }
+
+ if (!isImage &&
+ (texOp.Flags & TextureFlags.IntCoords) != 0 &&
+ (texOp.Flags & TextureFlags.LodLevel) == 0)
+ {
+ // IntCoords textures must always have explicit LOD.
+ texOp.SetLodLevelFlag();
+ texOp.InsertSource(coordsCount, OperandHelper.Const(0));
+ }
+
+ texOp.Type = newType;
+ }
+ }
+
+ config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, cbufSlot, cbufOffset);
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs
new file mode 100644
index 00000000..ca46a1f5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BindlessToIndexed.cs
@@ -0,0 +1,85 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class BindlessToIndexed
+ {
+ public static void RunPass(BasicBlock block, ShaderConfig config)
+ {
+ // We can turn a bindless texture access into a indexed access,
+ // as long the following conditions are true:
+ // - The handle is loaded using a LDC instruction.
+ // - The handle is loaded from the constant buffer with the handles (CB2 for NVN).
+ // - The load has a constant offset.
+ // The base offset of the array of handles on the constant buffer is the constant offset.
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ if (!(node.Value is TextureOperation texOp))
+ {
+ continue;
+ }
+
+ if ((texOp.Flags & TextureFlags.Bindless) == 0)
+ {
+ continue;
+ }
+
+ if (!(texOp.GetSource(0).AsgOp is Operation handleAsgOp))
+ {
+ continue;
+ }
+
+ if (handleAsgOp.Inst != Instruction.LoadConstant)
+ {
+ continue;
+ }
+
+ Operand ldcSrc0 = handleAsgOp.GetSource(0);
+ Operand ldcSrc1 = handleAsgOp.GetSource(1);
+
+ if (ldcSrc0.Type != OperandType.Constant || ldcSrc0.Value != 2)
+ {
+ continue;
+ }
+
+ if (!(ldcSrc1.AsgOp is Operation shrOp) || shrOp.Inst != Instruction.ShiftRightU32)
+ {
+ continue;
+ }
+
+ if (!(shrOp.GetSource(0).AsgOp is Operation addOp) || addOp.Inst != Instruction.Add)
+ {
+ continue;
+ }
+
+ Operand addSrc1 = addOp.GetSource(1);
+
+ if (addSrc1.Type != OperandType.Constant)
+ {
+ continue;
+ }
+
+ TurnIntoIndexed(config, texOp, addSrc1.Value / 4);
+
+ Operand index = Local();
+
+ Operand source = addOp.GetSource(0);
+
+ Operation shrBy3 = new Operation(Instruction.ShiftRightU32, index, source, Const(3));
+
+ block.Operations.AddBefore(node, shrBy3);
+
+ texOp.SetSource(0, index);
+ }
+ }
+
+ private static void TurnIntoIndexed(ShaderConfig config, TextureOperation texOp, int handle)
+ {
+ texOp.TurnIntoIndexed(handle);
+ config.SetUsedTexture(texOp.Inst, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, handle);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs
new file mode 100644
index 00000000..c87d1474
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/BranchElimination.cs
@@ -0,0 +1,64 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class BranchElimination
+ {
+ public static bool RunPass(BasicBlock block)
+ {
+ if (block.HasBranch && IsRedundantBranch((Operation)block.GetLastOp(), Next(block)))
+ {
+ block.Branch = null;
+
+ return true;
+ }
+
+ return false;
+ }
+
+ private static bool IsRedundantBranch(Operation current, BasicBlock nextBlock)
+ {
+ // Here we check that:
+ // - The current block ends with a branch.
+ // - The next block only contains a branch.
+ // - The branch on the next block is unconditional.
+ // - Both branches are jumping to the same location.
+ // In this case, the branch on the current block can be removed,
+ // as the next block is going to jump to the same place anyway.
+ if (nextBlock == null)
+ {
+ return false;
+ }
+
+ if (!(nextBlock.Operations.First?.Value is Operation next))
+ {
+ return false;
+ }
+
+ if (next.Inst != Instruction.Branch)
+ {
+ return false;
+ }
+
+ return current.Dest == next.Dest;
+ }
+
+ private static BasicBlock Next(BasicBlock block)
+ {
+ block = block.Next;
+
+ while (block != null && block.Operations.Count == 0)
+ {
+ if (block.HasBranch)
+ {
+ throw new InvalidOperationException("Found a bogus empty block that \"ends with a branch\".");
+ }
+
+ block = block.Next;
+ }
+
+ return block;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
new file mode 100644
index 00000000..6729f077
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
@@ -0,0 +1,346 @@
+using Ryujinx.Common.Utilities;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class ConstantFolding
+ {
+ public static void RunPass(Operation operation)
+ {
+ if (!AreAllSourcesConstant(operation))
+ {
+ return;
+ }
+
+ switch (operation.Inst)
+ {
+ case Instruction.Add:
+ EvaluateBinary(operation, (x, y) => x + y);
+ break;
+
+ case Instruction.BitCount:
+ EvaluateUnary(operation, (x) => BitCount(x));
+ break;
+
+ case Instruction.BitwiseAnd:
+ EvaluateBinary(operation, (x, y) => x & y);
+ break;
+
+ case Instruction.BitwiseExclusiveOr:
+ EvaluateBinary(operation, (x, y) => x ^ y);
+ break;
+
+ case Instruction.BitwiseNot:
+ EvaluateUnary(operation, (x) => ~x);
+ break;
+
+ case Instruction.BitwiseOr:
+ EvaluateBinary(operation, (x, y) => x | y);
+ break;
+
+ case Instruction.BitfieldExtractS32:
+ BitfieldExtractS32(operation);
+ break;
+
+ case Instruction.BitfieldExtractU32:
+ BitfieldExtractU32(operation);
+ break;
+
+ case Instruction.Clamp:
+ EvaluateTernary(operation, (x, y, z) => Math.Clamp(x, y, z));
+ break;
+
+ case Instruction.ClampU32:
+ EvaluateTernary(operation, (x, y, z) => (int)Math.Clamp((uint)x, (uint)y, (uint)z));
+ break;
+
+ case Instruction.CompareEqual:
+ EvaluateBinary(operation, (x, y) => x == y);
+ break;
+
+ case Instruction.CompareGreater:
+ EvaluateBinary(operation, (x, y) => x > y);
+ break;
+
+ case Instruction.CompareGreaterOrEqual:
+ EvaluateBinary(operation, (x, y) => x >= y);
+ break;
+
+ case Instruction.CompareGreaterOrEqualU32:
+ EvaluateBinary(operation, (x, y) => (uint)x >= (uint)y);
+ break;
+
+ case Instruction.CompareGreaterU32:
+ EvaluateBinary(operation, (x, y) => (uint)x > (uint)y);
+ break;
+
+ case Instruction.CompareLess:
+ EvaluateBinary(operation, (x, y) => x < y);
+ break;
+
+ case Instruction.CompareLessOrEqual:
+ EvaluateBinary(operation, (x, y) => x <= y);
+ break;
+
+ case Instruction.CompareLessOrEqualU32:
+ EvaluateBinary(operation, (x, y) => (uint)x <= (uint)y);
+ break;
+
+ case Instruction.CompareLessU32:
+ EvaluateBinary(operation, (x, y) => (uint)x < (uint)y);
+ break;
+
+ case Instruction.CompareNotEqual:
+ EvaluateBinary(operation, (x, y) => x != y);
+ break;
+
+ case Instruction.Divide:
+ EvaluateBinary(operation, (x, y) => y != 0 ? x / y : 0);
+ break;
+
+ case Instruction.FP32 | Instruction.Add:
+ EvaluateFPBinary(operation, (x, y) => x + y);
+ break;
+
+ case Instruction.FP32 | Instruction.Clamp:
+ EvaluateFPTernary(operation, (x, y, z) => Math.Clamp(x, y, z));
+ break;
+
+ case Instruction.FP32 | Instruction.CompareEqual:
+ EvaluateFPBinary(operation, (x, y) => x == y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareGreater:
+ EvaluateFPBinary(operation, (x, y) => x > y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareGreaterOrEqual:
+ EvaluateFPBinary(operation, (x, y) => x >= y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareLess:
+ EvaluateFPBinary(operation, (x, y) => x < y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareLessOrEqual:
+ EvaluateFPBinary(operation, (x, y) => x <= y);
+ break;
+
+ case Instruction.FP32 | Instruction.CompareNotEqual:
+ EvaluateFPBinary(operation, (x, y) => x != y);
+ break;
+
+ case Instruction.FP32 | Instruction.Divide:
+ EvaluateFPBinary(operation, (x, y) => x / y);
+ break;
+
+ case Instruction.FP32 | Instruction.Multiply:
+ EvaluateFPBinary(operation, (x, y) => x * y);
+ break;
+
+ case Instruction.FP32 | Instruction.Negate:
+ EvaluateFPUnary(operation, (x) => -x);
+ break;
+
+ case Instruction.FP32 | Instruction.Subtract:
+ EvaluateFPBinary(operation, (x, y) => x - y);
+ break;
+
+ case Instruction.IsNan:
+ EvaluateFPUnary(operation, (x) => float.IsNaN(x));
+ break;
+
+ case Instruction.LoadConstant:
+ operation.TurnIntoCopy(Cbuf(operation.GetSource(0).Value, operation.GetSource(1).Value));
+ break;
+
+ case Instruction.Maximum:
+ EvaluateBinary(operation, (x, y) => Math.Max(x, y));
+ break;
+
+ case Instruction.MaximumU32:
+ EvaluateBinary(operation, (x, y) => (int)Math.Max((uint)x, (uint)y));
+ break;
+
+ case Instruction.Minimum:
+ EvaluateBinary(operation, (x, y) => Math.Min(x, y));
+ break;
+
+ case Instruction.MinimumU32:
+ EvaluateBinary(operation, (x, y) => (int)Math.Min((uint)x, (uint)y));
+ break;
+
+ case Instruction.Multiply:
+ EvaluateBinary(operation, (x, y) => x * y);
+ break;
+
+ case Instruction.Negate:
+ EvaluateUnary(operation, (x) => -x);
+ break;
+
+ case Instruction.ShiftLeft:
+ EvaluateBinary(operation, (x, y) => x << y);
+ break;
+
+ case Instruction.ShiftRightS32:
+ EvaluateBinary(operation, (x, y) => x >> y);
+ break;
+
+ case Instruction.ShiftRightU32:
+ EvaluateBinary(operation, (x, y) => (int)((uint)x >> y));
+ break;
+
+ case Instruction.Subtract:
+ EvaluateBinary(operation, (x, y) => x - y);
+ break;
+
+ case Instruction.UnpackHalf2x16:
+ UnpackHalf2x16(operation);
+ break;
+ }
+ }
+
+ private static bool AreAllSourcesConstant(Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ if (operation.GetSource(index).Type != OperandType.Constant)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static int BitCount(int value)
+ {
+ int count = 0;
+
+ for (int bit = 0; bit < 32; bit++)
+ {
+ if (value.Extract(bit))
+ {
+ count++;
+ }
+ }
+
+ return count;
+ }
+
+ private static void BitfieldExtractS32(Operation operation)
+ {
+ int value = GetBitfieldExtractValue(operation);
+
+ int shift = 32 - operation.GetSource(2).Value;
+
+ value = (value << shift) >> shift;
+
+ operation.TurnIntoCopy(Const(value));
+ }
+
+ private static void BitfieldExtractU32(Operation operation)
+ {
+ operation.TurnIntoCopy(Const(GetBitfieldExtractValue(operation)));
+ }
+
+ private static int GetBitfieldExtractValue(Operation operation)
+ {
+ int value = operation.GetSource(0).Value;
+ int lsb = operation.GetSource(1).Value;
+ int length = operation.GetSource(2).Value;
+
+ return value.Extract(lsb, length);
+ }
+
+ private static void UnpackHalf2x16(Operation operation)
+ {
+ int value = operation.GetSource(0).Value;
+
+ value = (value >> operation.Index * 16) & 0xffff;
+
+ operation.TurnIntoCopy(ConstF((float)BitConverter.UInt16BitsToHalf((ushort)value)));
+ }
+
+ private static void FPNegate(Operation operation)
+ {
+ float value = operation.GetSource(0).AsFloat();
+
+ operation.TurnIntoCopy(ConstF(-value));
+ }
+
+ private static void EvaluateUnary(Operation operation, Func<int, int> op)
+ {
+ int x = operation.GetSource(0).Value;
+
+ operation.TurnIntoCopy(Const(op(x)));
+ }
+
+ private static void EvaluateFPUnary(Operation operation, Func<float, float> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+
+ operation.TurnIntoCopy(ConstF(op(x)));
+ }
+
+ private static void EvaluateFPUnary(Operation operation, Func<float, bool> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+
+ operation.TurnIntoCopy(Const(op(x) ? IrConsts.True : IrConsts.False));
+ }
+
+ private static void EvaluateBinary(Operation operation, Func<int, int, int> op)
+ {
+ int x = operation.GetSource(0).Value;
+ int y = operation.GetSource(1).Value;
+
+ operation.TurnIntoCopy(Const(op(x, y)));
+ }
+
+ private static void EvaluateBinary(Operation operation, Func<int, int, bool> op)
+ {
+ int x = operation.GetSource(0).Value;
+ int y = operation.GetSource(1).Value;
+
+ operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False));
+ }
+
+ private static void EvaluateFPBinary(Operation operation, Func<float, float, float> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+ float y = operation.GetSource(1).AsFloat();
+
+ operation.TurnIntoCopy(ConstF(op(x, y)));
+ }
+
+ private static void EvaluateFPBinary(Operation operation, Func<float, float, bool> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+ float y = operation.GetSource(1).AsFloat();
+
+ operation.TurnIntoCopy(Const(op(x, y) ? IrConsts.True : IrConsts.False));
+ }
+
+ private static void EvaluateTernary(Operation operation, Func<int, int, int, int> op)
+ {
+ int x = operation.GetSource(0).Value;
+ int y = operation.GetSource(1).Value;
+ int z = operation.GetSource(2).Value;
+
+ operation.TurnIntoCopy(Const(op(x, y, z)));
+ }
+
+ private static void EvaluateFPTernary(Operation operation, Func<float, float, float, float> op)
+ {
+ float x = operation.GetSource(0).AsFloat();
+ float y = operation.GetSource(1).AsFloat();
+ float z = operation.GetSource(2).AsFloat();
+
+ operation.TurnIntoCopy(ConstF(op(x, y, z)));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
new file mode 100644
index 00000000..2a4070e0
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/GlobalToStorage.cs
@@ -0,0 +1,433 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class GlobalToStorage
+ {
+ public static void RunPass(BasicBlock block, ShaderConfig config, ref int sbUseMask, ref int ubeUseMask)
+ {
+ int sbStart = GetStorageBaseCbOffset(config.Stage);
+ int sbEnd = sbStart + StorageDescsSize;
+
+ int ubeStart = UbeBaseOffset;
+ int ubeEnd = UbeBaseOffset + UbeDescsSize;
+
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ for (int index = 0; index < node.Value.SourcesCount; index++)
+ {
+ Operand src = node.Value.GetSource(index);
+
+ int storageIndex = GetStorageIndex(src, sbStart, sbEnd);
+
+ if (storageIndex >= 0)
+ {
+ sbUseMask |= 1 << storageIndex;
+ }
+
+ if (config.Stage == ShaderStage.Compute)
+ {
+ int constantIndex = GetStorageIndex(src, ubeStart, ubeEnd);
+
+ if (constantIndex >= 0)
+ {
+ ubeUseMask |= 1 << constantIndex;
+ }
+ }
+ }
+
+ if (!(node.Value is Operation operation))
+ {
+ continue;
+ }
+
+ if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
+ {
+ Operand source = operation.GetSource(0);
+
+ int storageIndex = SearchForStorageBase(block, source, sbStart, sbEnd);
+
+ if (storageIndex >= 0)
+ {
+ // Storage buffers are implemented using global memory access.
+ // If we know from where the base address of the access is loaded,
+ // we can guess which storage buffer it is accessing.
+ // We can then replace the global memory access with a storage
+ // buffer access.
+ node = ReplaceGlobalWithStorage(block, node, config, storageIndex);
+ }
+ else if (config.Stage == ShaderStage.Compute && operation.Inst == Instruction.LoadGlobal)
+ {
+ // Here we effectively try to replace a LDG instruction with LDC.
+ // The hardware only supports a limited amount of constant buffers
+ // so NVN "emulates" more constant buffers using global memory access.
+ // Here we try to replace the global access back to a constant buffer
+ // load.
+ storageIndex = SearchForStorageBase(block, source, ubeStart, ubeStart + ubeEnd);
+
+ if (storageIndex >= 0)
+ {
+ node = ReplaceLdgWithLdc(node, config, storageIndex);
+ }
+ }
+ }
+ }
+
+ config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
+ }
+
+ private static LinkedListNode<INode> ReplaceGlobalWithStorage(BasicBlock block, LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
+ {
+ Operation operation = (Operation)node.Value;
+
+ bool isAtomic = operation.Inst.IsAtomic();
+ bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
+ bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
+
+ config.SetUsedStorageBuffer(storageIndex, isWrite);
+
+ Operand[] sources = new Operand[operation.SourcesCount];
+
+ sources[0] = Const(storageIndex);
+ sources[1] = GetStorageOffset(block, node, config, storageIndex, operation.GetSource(0), isStg16Or8);
+
+ for (int index = 2; index < operation.SourcesCount; index++)
+ {
+ sources[index] = operation.GetSource(index);
+ }
+
+ Operation storageOp;
+
+ if (isAtomic)
+ {
+ storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
+ }
+ else if (operation.Inst == Instruction.LoadGlobal)
+ {
+ storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
+ }
+ else
+ {
+ Instruction storeInst = operation.Inst switch
+ {
+ Instruction.StoreGlobal16 => Instruction.StoreStorage16,
+ Instruction.StoreGlobal8 => Instruction.StoreStorage8,
+ _ => Instruction.StoreStorage
+ };
+
+ storageOp = new Operation(storeInst, null, sources);
+ }
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, null);
+ }
+
+ LinkedListNode<INode> oldNode = node;
+
+ node = node.List.AddBefore(node, storageOp);
+
+ node.List.Remove(oldNode);
+
+ return node;
+ }
+
+ private static Operand GetStorageOffset(
+ BasicBlock block,
+ LinkedListNode<INode> node,
+ ShaderConfig config,
+ int storageIndex,
+ Operand addrLow,
+ bool isStg16Or8)
+ {
+ int baseAddressCbOffset = GetStorageCbOffset(config.Stage, storageIndex);
+
+ bool storageAligned = !(config.GpuAccessor.QueryHasUnalignedStorageBuffer() || config.GpuAccessor.QueryHostStorageBufferOffsetAlignment() > Constants.StorageAlignment);
+
+ (Operand byteOffset, int constantOffset) = storageAligned ?
+ GetStorageOffset(block, Utils.FindLastOperation(addrLow, block), baseAddressCbOffset) :
+ (null, 0);
+
+ if (byteOffset != null)
+ {
+ ReplaceAddressAlignment(node.List, addrLow, byteOffset, constantOffset);
+ }
+
+ if (byteOffset == null)
+ {
+ Operand baseAddrLow = Cbuf(0, baseAddressCbOffset);
+ Operand baseAddrTrunc = Local();
+
+ Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
+
+ Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
+
+ node.List.AddBefore(node, andOp);
+
+ Operand offset = Local();
+ Operation subOp = new Operation(Instruction.Subtract, offset, addrLow, baseAddrTrunc);
+
+ node.List.AddBefore(node, subOp);
+
+ byteOffset = offset;
+ }
+ else if (constantOffset != 0)
+ {
+ Operand offset = Local();
+ Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
+
+ node.List.AddBefore(node, addOp);
+
+ byteOffset = offset;
+ }
+
+ if (isStg16Or8)
+ {
+ return byteOffset;
+ }
+
+ Operand wordOffset = Local();
+ Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
+
+ node.List.AddBefore(node, shrOp);
+
+ return wordOffset;
+ }
+
+ private static bool IsCb0Offset(Operand operand, int offset)
+ {
+ return operand.Type == OperandType.ConstantBuffer && operand.GetCbufSlot() == 0 && operand.GetCbufOffset() == offset;
+ }
+
+ private static void ReplaceAddressAlignment(LinkedList<INode> list, Operand address, Operand byteOffset, int constantOffset)
+ {
+ // When we emit 16/8-bit LDG, we add extra code to determine the address alignment.
+ // Eliminate the storage buffer base address from this too, leaving only the byte offset.
+
+ foreach (INode useNode in address.UseOps)
+ {
+ if (useNode is Operation op && op.Inst == Instruction.BitwiseAnd)
+ {
+ Operand src1 = op.GetSource(0);
+ Operand src2 = op.GetSource(1);
+
+ int addressIndex = -1;
+
+ if (src1 == address && src2.Type == OperandType.Constant && src2.Value == 3)
+ {
+ addressIndex = 0;
+ }
+ else if (src2 == address && src1.Type == OperandType.Constant && src1.Value == 3)
+ {
+ addressIndex = 1;
+ }
+
+ if (addressIndex != -1)
+ {
+ LinkedListNode<INode> node = list.Find(op);
+
+ // Add offset calculation before the use. Needs to be on the same block.
+ if (node != null)
+ {
+ Operand offset = Local();
+ Operation addOp = new Operation(Instruction.Add, offset, byteOffset, Const(constantOffset));
+ list.AddBefore(node, addOp);
+
+ op.SetSource(addressIndex, offset);
+ }
+ }
+ }
+ }
+ }
+
+ private static (Operand, int) GetStorageOffset(BasicBlock block, Operand address, int baseAddressCbOffset)
+ {
+ if (IsCb0Offset(address, baseAddressCbOffset))
+ {
+ // Direct offset: zero.
+ return (Const(0), 0);
+ }
+
+ (address, int constantOffset) = GetStorageConstantOffset(block, address);
+
+ address = Utils.FindLastOperation(address, block);
+
+ if (IsCb0Offset(address, baseAddressCbOffset))
+ {
+ // Only constant offset
+ return (Const(0), constantOffset);
+ }
+
+ if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
+ {
+ return (null, 0);
+ }
+
+ Operand src1 = offsetAdd.GetSource(0);
+ Operand src2 = Utils.FindLastOperation(offsetAdd.GetSource(1), block);
+
+ if (IsCb0Offset(src2, baseAddressCbOffset))
+ {
+ return (src1, constantOffset);
+ }
+ else if (IsCb0Offset(src1, baseAddressCbOffset))
+ {
+ return (src2, constantOffset);
+ }
+
+ return (null, 0);
+ }
+
+ private static (Operand, int) GetStorageConstantOffset(BasicBlock block, Operand address)
+ {
+ if (!(address.AsgOp is Operation offsetAdd) || offsetAdd.Inst != Instruction.Add)
+ {
+ return (address, 0);
+ }
+
+ Operand src1 = offsetAdd.GetSource(0);
+ Operand src2 = offsetAdd.GetSource(1);
+
+ if (src2.Type != OperandType.Constant)
+ {
+ return (address, 0);
+ }
+
+ return (src1, src2.Value);
+ }
+
+ private static LinkedListNode<INode> ReplaceLdgWithLdc(LinkedListNode<INode> node, ShaderConfig config, int storageIndex)
+ {
+ Operation operation = (Operation)node.Value;
+
+ Operand GetCbufOffset()
+ {
+ Operand addrLow = operation.GetSource(0);
+
+ Operand baseAddrLow = Cbuf(0, UbeBaseOffset + storageIndex * StorageDescSize);
+
+ Operand baseAddrTrunc = Local();
+
+ Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
+
+ Operation andOp = new Operation(Instruction.BitwiseAnd, baseAddrTrunc, baseAddrLow, alignMask);
+
+ node.List.AddBefore(node, andOp);
+
+ Operand byteOffset = Local();
+ Operand wordOffset = Local();
+
+ Operation subOp = new Operation(Instruction.Subtract, byteOffset, addrLow, baseAddrTrunc);
+ Operation shrOp = new Operation(Instruction.ShiftRightU32, wordOffset, byteOffset, Const(2));
+
+ node.List.AddBefore(node, subOp);
+ node.List.AddBefore(node, shrOp);
+
+ return wordOffset;
+ }
+
+ Operand[] sources = new Operand[operation.SourcesCount];
+
+ int cbSlot = UbeFirstCbuf + storageIndex;
+
+ sources[0] = Const(cbSlot);
+ sources[1] = GetCbufOffset();
+
+ config.SetUsedConstantBuffer(cbSlot);
+
+ for (int index = 2; index < operation.SourcesCount; index++)
+ {
+ sources[index] = operation.GetSource(index);
+ }
+
+ Operation ldcOp = new Operation(Instruction.LoadConstant, operation.Dest, sources);
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, null);
+ }
+
+ LinkedListNode<INode> oldNode = node;
+
+ node = node.List.AddBefore(node, ldcOp);
+
+ node.List.Remove(oldNode);
+
+ return node;
+ }
+
+ private static int SearchForStorageBase(BasicBlock block, Operand globalAddress, int sbStart, int sbEnd)
+ {
+ globalAddress = Utils.FindLastOperation(globalAddress, block);
+
+ if (globalAddress.Type == OperandType.ConstantBuffer)
+ {
+ return GetStorageIndex(globalAddress, sbStart, sbEnd);
+ }
+
+ Operation operation = globalAddress.AsgOp as Operation;
+
+ if (operation == null || operation.Inst != Instruction.Add)
+ {
+ return -1;
+ }
+
+ Operand src1 = operation.GetSource(0);
+ Operand src2 = operation.GetSource(1);
+
+ if ((src1.Type == OperandType.LocalVariable && src2.Type == OperandType.Constant) ||
+ (src2.Type == OperandType.LocalVariable && src1.Type == OperandType.Constant))
+ {
+ if (src1.Type == OperandType.LocalVariable)
+ {
+ operation = Utils.FindLastOperation(src1, block).AsgOp as Operation;
+ }
+ else
+ {
+ operation = Utils.FindLastOperation(src2, block).AsgOp as Operation;
+ }
+
+ if (operation == null || operation.Inst != Instruction.Add)
+ {
+ return -1;
+ }
+ }
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ Operand source = operation.GetSource(index);
+
+ int storageIndex = GetStorageIndex(source, sbStart, sbEnd);
+
+ if (storageIndex != -1)
+ {
+ return storageIndex;
+ }
+ }
+
+ return -1;
+ }
+
+ private static int GetStorageIndex(Operand operand, int sbStart, int sbEnd)
+ {
+ if (operand.Type == OperandType.ConstantBuffer)
+ {
+ int slot = operand.GetCbufSlot();
+ int offset = operand.GetCbufOffset();
+
+ if (slot == 0 && offset >= sbStart && offset < sbEnd)
+ {
+ int storageIndex = (offset - sbStart) / StorageDescSize;
+
+ return storageIndex;
+ }
+ }
+
+ return -1;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
new file mode 100644
index 00000000..bae774ee
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
@@ -0,0 +1,380 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class Optimizer
+ {
+ public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
+ {
+ RunOptimizationPasses(blocks);
+
+ int sbUseMask = 0;
+ int ubeUseMask = 0;
+
+ // Those passes are looking for specific patterns and only needs to run once.
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ GlobalToStorage.RunPass(blocks[blkIndex], config, ref sbUseMask, ref ubeUseMask);
+ BindlessToIndexed.RunPass(blocks[blkIndex], config);
+ BindlessElimination.RunPass(blocks[blkIndex], config);
+ }
+
+ config.SetAccessibleBufferMasks(sbUseMask, ubeUseMask);
+
+ // Run optimizations one last time to remove any code that is now optimizable after above passes.
+ RunOptimizationPasses(blocks);
+ }
+
+ private static void RunOptimizationPasses(BasicBlock[] blocks)
+ {
+ bool modified;
+
+ do
+ {
+ modified = false;
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ LinkedListNode<INode> node = block.Operations.First;
+
+ while (node != null)
+ {
+ LinkedListNode<INode> nextNode = node.Next;
+
+ bool isUnused = IsUnused(node.Value);
+
+ if (!(node.Value is Operation operation) || isUnused)
+ {
+ if (node.Value is PhiNode phi && !isUnused)
+ {
+ isUnused = PropagatePhi(phi);
+ }
+
+ if (isUnused)
+ {
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+
+ node = nextNode;
+
+ continue;
+ }
+
+ ConstantFolding.RunPass(operation);
+
+ Simplification.RunPass(operation);
+
+ if (DestIsLocalVar(operation))
+ {
+ if (operation.Inst == Instruction.Copy)
+ {
+ PropagateCopy(operation);
+
+ RemoveNode(block, node);
+
+ modified = true;
+ }
+ else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) ||
+ (operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation)))
+ {
+ if (DestHasNoUses(operation))
+ {
+ RemoveNode(block, node);
+ }
+
+ modified = true;
+ }
+ }
+
+ node = nextNode;
+ }
+
+ if (BranchElimination.RunPass(block))
+ {
+ RemoveNode(block, block.Operations.Last);
+
+ modified = true;
+ }
+ }
+ }
+ while (modified);
+ }
+
+ private static void PropagateCopy(Operation copyOp)
+ {
+ // Propagate copy source operand to all uses of
+ // the destination operand.
+
+ Operand dest = copyOp.Dest;
+ Operand src = copyOp.GetSource(0);
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ foreach (INode useNode in uses)
+ {
+ for (int index = 0; index < useNode.SourcesCount; index++)
+ {
+ if (useNode.GetSource(index) == dest)
+ {
+ useNode.SetSource(index, src);
+ }
+ }
+ }
+ }
+
+ private static bool PropagatePhi(PhiNode phi)
+ {
+ // If all phi sources are the same, we can propagate it and remove the phi.
+
+ Operand firstSrc = phi.GetSource(0);
+
+ for (int index = 1; index < phi.SourcesCount; index++)
+ {
+ if (!IsSameOperand(firstSrc, phi.GetSource(index)))
+ {
+ return false;
+ }
+ }
+
+ // All sources are equal, we can propagate the value.
+
+ Operand dest = phi.Dest;
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ foreach (INode useNode in uses)
+ {
+ for (int index = 0; index < useNode.SourcesCount; index++)
+ {
+ if (useNode.GetSource(index) == dest)
+ {
+ useNode.SetSource(index, firstSrc);
+ }
+ }
+ }
+
+ return true;
+ }
+
+ private static bool IsSameOperand(Operand x, Operand y)
+ {
+ if (x.Type != y.Type || x.Value != y.Value)
+ {
+ return false;
+ }
+
+ // TODO: Handle Load operations with the same storage and the same constant parameters.
+ return x.Type == OperandType.Constant || x.Type == OperandType.ConstantBuffer;
+ }
+
+ private static bool PropagatePack(Operation packOp)
+ {
+ // Propagate pack source operands to uses by unpack
+ // instruction. The source depends on the unpack instruction.
+ bool modified = false;
+
+ Operand dest = packOp.Dest;
+ Operand src0 = packOp.GetSource(0);
+ Operand src1 = packOp.GetSource(1);
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ foreach (INode useNode in uses)
+ {
+ if (!(useNode is Operation operation) || operation.Inst != Instruction.UnpackHalf2x16)
+ {
+ continue;
+ }
+
+ if (operation.GetSource(0) == dest)
+ {
+ operation.TurnIntoCopy(operation.Index == 1 ? src1 : src0);
+
+ modified = true;
+ }
+ }
+
+ return modified;
+ }
+
+ public static bool MatchDdxOrDdy(Operation operation)
+ {
+ // It's assumed that "operation.Inst" is ShuffleXor,
+ // that should be checked before calling this method.
+ Debug.Assert(operation.Inst == Instruction.ShuffleXor);
+
+ bool modified = false;
+
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2))
+ {
+ return false;
+ }
+
+ if (src3.Type != OperandType.Constant || src3.Value != 0x1c03)
+ {
+ return false;
+ }
+
+ bool isDdy = src2.Value == 2;
+ bool isDdx = !isDdy;
+
+ // We can replace any use by a FSWZADD with DDX/DDY, when
+ // the following conditions are true:
+ // - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX.
+ // - The first source operand must be the shuffle output.
+ // - The second source operand must be the shuffle first source operand.
+ INode[] uses = operation.Dest.UseOps.ToArray();
+
+ foreach (INode use in uses)
+ {
+ if (!(use is Operation test))
+ {
+ continue;
+ }
+
+ if (!(use is Operation useOp) || useOp.Inst != Instruction.SwizzleAdd)
+ {
+ continue;
+ }
+
+ Operand fswzaddSrc1 = useOp.GetSource(0);
+ Operand fswzaddSrc2 = useOp.GetSource(1);
+ Operand fswzaddSrc3 = useOp.GetSource(2);
+
+ if (fswzaddSrc1 != operation.Dest)
+ {
+ continue;
+ }
+
+ if (fswzaddSrc2 != operation.GetSource(0))
+ {
+ continue;
+ }
+
+ if (fswzaddSrc3.Type != OperandType.Constant)
+ {
+ continue;
+ }
+
+ int mask = fswzaddSrc3.Value;
+
+ if ((isDdx && mask != 0b10011001) ||
+ (isDdy && mask != 0b10100101))
+ {
+ continue;
+ }
+
+ useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2);
+
+ modified = true;
+ }
+
+ return modified;
+ }
+
+ private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode)
+ {
+ // Remove a node from the nodes list, and also remove itself
+ // from all the use lists on the operands that this node uses.
+ block.Operations.Remove(llNode);
+
+ Queue<INode> nodes = new Queue<INode>();
+
+ nodes.Enqueue(llNode.Value);
+
+ while (nodes.TryDequeue(out INode node))
+ {
+ for (int index = 0; index < node.SourcesCount; index++)
+ {
+ Operand src = node.GetSource(index);
+
+ if (src.Type != OperandType.LocalVariable)
+ {
+ continue;
+ }
+
+ if (src.UseOps.Remove(node) && src.UseOps.Count == 0)
+ {
+ Debug.Assert(src.AsgOp != null);
+ nodes.Enqueue(src.AsgOp);
+ }
+ }
+ }
+ }
+
+ private static bool IsUnused(INode node)
+ {
+ return !HasSideEffects(node) && DestIsLocalVar(node) && DestHasNoUses(node);
+ }
+
+ private static bool HasSideEffects(INode node)
+ {
+ if (node is Operation operation)
+ {
+ switch (operation.Inst & Instruction.Mask)
+ {
+ case Instruction.AtomicAdd:
+ case Instruction.AtomicAnd:
+ case Instruction.AtomicCompareAndSwap:
+ case Instruction.AtomicMaxS32:
+ case Instruction.AtomicMaxU32:
+ case Instruction.AtomicMinS32:
+ case Instruction.AtomicMinU32:
+ case Instruction.AtomicOr:
+ case Instruction.AtomicSwap:
+ case Instruction.AtomicXor:
+ case Instruction.Call:
+ case Instruction.ImageAtomic:
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private static bool DestIsLocalVar(INode node)
+ {
+ if (node.DestsCount == 0)
+ {
+ return false;
+ }
+
+ for (int index = 0; index < node.DestsCount; index++)
+ {
+ Operand dest = node.GetDest(index);
+
+ if (dest != null && dest.Type != OperandType.LocalVariable)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ private static bool DestHasNoUses(INode node)
+ {
+ for (int index = 0; index < node.DestsCount; index++)
+ {
+ Operand dest = node.GetDest(index);
+
+ if (dest != null && dest.UseOps.Count != 0)
+ {
+ return false;
+ }
+ }
+
+ return true;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs
new file mode 100644
index 00000000..8d05f99a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Simplification.cs
@@ -0,0 +1,147 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class Simplification
+ {
+ private const int AllOnes = ~0;
+
+ public static void RunPass(Operation operation)
+ {
+ switch (operation.Inst)
+ {
+ case Instruction.Add:
+ case Instruction.BitwiseExclusiveOr:
+ TryEliminateBinaryOpCommutative(operation, 0);
+ break;
+
+ case Instruction.BitwiseAnd:
+ TryEliminateBitwiseAnd(operation);
+ break;
+
+ case Instruction.BitwiseOr:
+ TryEliminateBitwiseOr(operation);
+ break;
+
+ case Instruction.ConditionalSelect:
+ TryEliminateConditionalSelect(operation);
+ break;
+
+ case Instruction.Divide:
+ TryEliminateBinaryOpY(operation, 1);
+ break;
+
+ case Instruction.Multiply:
+ TryEliminateBinaryOpCommutative(operation, 1);
+ break;
+
+ case Instruction.ShiftLeft:
+ case Instruction.ShiftRightS32:
+ case Instruction.ShiftRightU32:
+ case Instruction.Subtract:
+ TryEliminateBinaryOpY(operation, 0);
+ break;
+ }
+ }
+
+ private static void TryEliminateBitwiseAnd(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x & 0xFFFFFFFF == x, 0xFFFFFFFF & y == y,
+ // x & 0x00000000 == 0x00000000, 0x00000000 & y == 0x00000000
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, AllOnes))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, AllOnes))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, 0) || IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(Const(0));
+ }
+ }
+
+ private static void TryEliminateBitwiseOr(Operation operation)
+ {
+ // Try to recognize and optimize those 3 patterns (in order):
+ // x | 0x00000000 == x, 0x00000000 | y == y,
+ // x | 0xFFFFFFFF == 0xFFFFFFFF, 0xFFFFFFFF | y == 0xFFFFFFFF
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, 0))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, 0))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ else if (IsConstEqual(x, AllOnes) || IsConstEqual(y, AllOnes))
+ {
+ operation.TurnIntoCopy(Const(AllOnes));
+ }
+ }
+
+ private static void TryEliminateBinaryOpY(Operation operation, int comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateBinaryOpCommutative(Operation operation, int comparand)
+ {
+ Operand x = operation.GetSource(0);
+ Operand y = operation.GetSource(1);
+
+ if (IsConstEqual(x, comparand))
+ {
+ operation.TurnIntoCopy(y);
+ }
+ else if (IsConstEqual(y, comparand))
+ {
+ operation.TurnIntoCopy(x);
+ }
+ }
+
+ private static void TryEliminateConditionalSelect(Operation operation)
+ {
+ Operand cond = operation.GetSource(0);
+
+ if (cond.Type != OperandType.Constant)
+ {
+ return;
+ }
+
+ // The condition is constant, we can turn it into a copy, and select
+ // the source based on the condition value.
+ int srcIndex = cond.Value != 0 ? 1 : 2;
+
+ Operand source = operation.GetSource(srcIndex);
+
+ operation.TurnIntoCopy(source);
+ }
+
+ private static bool IsConstEqual(Operand operand, int comparand)
+ {
+ if (operand.Type != OperandType.Constant)
+ {
+ return false;
+ }
+
+ return operand.Value == comparand;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
new file mode 100644
index 00000000..4ca6d687
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Optimizations/Utils.cs
@@ -0,0 +1,68 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+
+namespace Ryujinx.Graphics.Shader.Translation.Optimizations
+{
+ static class Utils
+ {
+ private static Operation FindBranchSource(BasicBlock block)
+ {
+ foreach (BasicBlock sourceBlock in block.Predecessors)
+ {
+ if (sourceBlock.Operations.Count > 0)
+ {
+ if (sourceBlock.GetLastOp() is Operation lastOp && IsConditionalBranch(lastOp.Inst) && sourceBlock.Next == block)
+ {
+ return lastOp;
+ }
+ }
+ }
+
+ return null;
+ }
+
+ private static bool IsConditionalBranch(Instruction inst)
+ {
+ return inst == Instruction.BranchIfFalse || inst == Instruction.BranchIfTrue;
+ }
+
+ private static bool BlockConditionsMatch(BasicBlock currentBlock, BasicBlock queryBlock)
+ {
+ // Check if all the conditions for the query block are satisfied by the current block.
+ // Just checks the top-most conditional for now.
+
+ Operation currentBranch = FindBranchSource(currentBlock);
+ Operation queryBranch = FindBranchSource(queryBlock);
+
+ Operand currentCondition = currentBranch?.GetSource(0);
+ Operand queryCondition = queryBranch?.GetSource(0);
+
+ // The condition should be the same operand instance.
+
+ return currentBranch != null && queryBranch != null &&
+ currentBranch.Inst == queryBranch.Inst &&
+ currentCondition == queryCondition;
+ }
+
+ public static Operand FindLastOperation(Operand source, BasicBlock block)
+ {
+ if (source.AsgOp is PhiNode phiNode)
+ {
+ // This source can have a different value depending on a previous branch.
+ // Ensure that conditions met for that branch are also met for the current one.
+ // Prefer the latest sources for the phi node.
+
+ for (int i = phiNode.SourcesCount - 1; i >= 0; i--)
+ {
+ BasicBlock phiBlock = phiNode.GetBlock(i);
+
+ if (BlockConditionsMatch(block, phiBlock))
+ {
+ return phiNode.GetSource(i);
+ }
+ }
+ }
+
+ return source;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs b/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs
new file mode 100644
index 00000000..9e31831d
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/RegisterUsage.cs
@@ -0,0 +1,486 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class RegisterUsage
+ {
+ private const int RegsCount = 256;
+ private const int RegsMask = RegsCount - 1;
+
+ private const int GprMasks = 4;
+ private const int PredMasks = 1;
+ private const int FlagMasks = 1;
+ private const int TotalMasks = GprMasks + PredMasks + FlagMasks;
+
+ private struct RegisterMask : IEquatable<RegisterMask>
+ {
+ public long GprMask0 { get; set; }
+ public long GprMask1 { get; set; }
+ public long GprMask2 { get; set; }
+ public long GprMask3 { get; set; }
+ public long PredMask { get; set; }
+ public long FlagMask { get; set; }
+
+ public RegisterMask(long gprMask0, long gprMask1, long gprMask2, long gprMask3, long predMask, long flagMask)
+ {
+ GprMask0 = gprMask0;
+ GprMask1 = gprMask1;
+ GprMask2 = gprMask2;
+ GprMask3 = gprMask3;
+ PredMask = predMask;
+ FlagMask = flagMask;
+ }
+
+ public long GetMask(int index)
+ {
+ return index switch
+ {
+ 0 => GprMask0,
+ 1 => GprMask1,
+ 2 => GprMask2,
+ 3 => GprMask3,
+ 4 => PredMask,
+ 5 => FlagMask,
+ _ => throw new ArgumentOutOfRangeException(nameof(index))
+ };
+ }
+
+ public static RegisterMask operator &(RegisterMask x, RegisterMask y)
+ {
+ return new RegisterMask(
+ x.GprMask0 & y.GprMask0,
+ x.GprMask1 & y.GprMask1,
+ x.GprMask2 & y.GprMask2,
+ x.GprMask3 & y.GprMask3,
+ x.PredMask & y.PredMask,
+ x.FlagMask & y.FlagMask);
+ }
+
+ public static RegisterMask operator |(RegisterMask x, RegisterMask y)
+ {
+ return new RegisterMask(
+ x.GprMask0 | y.GprMask0,
+ x.GprMask1 | y.GprMask1,
+ x.GprMask2 | y.GprMask2,
+ x.GprMask3 | y.GprMask3,
+ x.PredMask | y.PredMask,
+ x.FlagMask | y.FlagMask);
+ }
+
+ public static RegisterMask operator ~(RegisterMask x)
+ {
+ return new RegisterMask(
+ ~x.GprMask0,
+ ~x.GprMask1,
+ ~x.GprMask2,
+ ~x.GprMask3,
+ ~x.PredMask,
+ ~x.FlagMask);
+ }
+
+ public static bool operator ==(RegisterMask x, RegisterMask y)
+ {
+ return x.Equals(y);
+ }
+
+ public static bool operator !=(RegisterMask x, RegisterMask y)
+ {
+ return !x.Equals(y);
+ }
+
+ public override bool Equals(object obj)
+ {
+ return obj is RegisterMask regMask && Equals(regMask);
+ }
+
+ public bool Equals(RegisterMask other)
+ {
+ return GprMask0 == other.GprMask0 &&
+ GprMask1 == other.GprMask1 &&
+ GprMask2 == other.GprMask2 &&
+ GprMask3 == other.GprMask3 &&
+ PredMask == other.PredMask &&
+ FlagMask == other.FlagMask;
+ }
+
+ public override int GetHashCode()
+ {
+ return HashCode.Combine(GprMask0, GprMask1, GprMask2, GprMask3, PredMask, FlagMask);
+ }
+ }
+
+ public readonly struct FunctionRegisterUsage
+ {
+ public Register[] InArguments { get; }
+ public Register[] OutArguments { get; }
+
+ public FunctionRegisterUsage(Register[] inArguments, Register[] outArguments)
+ {
+ InArguments = inArguments;
+ OutArguments = outArguments;
+ }
+ }
+
+ public static FunctionRegisterUsage RunPass(ControlFlowGraph cfg)
+ {
+ List<Register> inArguments = new List<Register>();
+ List<Register> outArguments = new List<Register>();
+
+ // Compute local register inputs and outputs used inside blocks.
+ RegisterMask[] localInputs = new RegisterMask[cfg.Blocks.Length];
+ RegisterMask[] localOutputs = new RegisterMask[cfg.Blocks.Length];
+
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ Operation operation = node.Value as Operation;
+
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand source = operation.GetSource(srcIndex);
+
+ if (source.Type != OperandType.Register)
+ {
+ continue;
+ }
+
+ Register register = source.GetRegister();
+
+ localInputs[block.Index] |= GetMask(register) & ~localOutputs[block.Index];
+ }
+
+ if (operation.Dest != null && operation.Dest.Type == OperandType.Register)
+ {
+ localOutputs[block.Index] |= GetMask(operation.Dest.GetRegister());
+ }
+ }
+ }
+
+ // Compute global register inputs and outputs used across blocks.
+ RegisterMask[] globalCmnOutputs = new RegisterMask[cfg.Blocks.Length];
+
+ RegisterMask[] globalInputs = new RegisterMask[cfg.Blocks.Length];
+ RegisterMask[] globalOutputs = new RegisterMask[cfg.Blocks.Length];
+
+ RegisterMask allOutputs = new RegisterMask();
+ RegisterMask allCmnOutputs = new RegisterMask(-1L, -1L, -1L, -1L, -1L, -1L);
+
+ bool modified;
+
+ bool firstPass = true;
+
+ do
+ {
+ modified = false;
+
+ // Compute register outputs.
+ for (int index = cfg.PostOrderBlocks.Length - 1; index >= 0; index--)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ if (block.Predecessors.Count != 0)
+ {
+ BasicBlock predecessor = block.Predecessors[0];
+
+ RegisterMask cmnOutputs = localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+
+ RegisterMask outputs = globalOutputs[predecessor.Index];
+
+ for (int pIndex = 1; pIndex < block.Predecessors.Count; pIndex++)
+ {
+ predecessor = block.Predecessors[pIndex];
+
+ cmnOutputs &= localOutputs[predecessor.Index] | globalCmnOutputs[predecessor.Index];
+
+ outputs |= globalOutputs[predecessor.Index];
+ }
+
+ globalInputs[block.Index] |= outputs & ~cmnOutputs;
+
+ if (!firstPass)
+ {
+ cmnOutputs &= globalCmnOutputs[block.Index];
+ }
+
+ if (EndsWithReturn(block))
+ {
+ allCmnOutputs &= cmnOutputs | localOutputs[block.Index];
+ }
+
+ if (Exchange(globalCmnOutputs, block.Index, cmnOutputs))
+ {
+ modified = true;
+ }
+
+ outputs |= localOutputs[block.Index];
+
+ if (Exchange(globalOutputs, block.Index, globalOutputs[block.Index] | outputs))
+ {
+ allOutputs |= outputs;
+ modified = true;
+ }
+ }
+ else if (Exchange(globalOutputs, block.Index, localOutputs[block.Index]))
+ {
+ allOutputs |= localOutputs[block.Index];
+ modified = true;
+ }
+ }
+
+ // Compute register inputs.
+ for (int index = 0; index < cfg.PostOrderBlocks.Length; index++)
+ {
+ BasicBlock block = cfg.PostOrderBlocks[index];
+
+ RegisterMask inputs = localInputs[block.Index];
+
+ if (block.Next != null)
+ {
+ inputs |= globalInputs[block.Next.Index];
+ }
+
+ if (block.Branch != null)
+ {
+ inputs |= globalInputs[block.Branch.Index];
+ }
+
+ inputs &= ~globalCmnOutputs[block.Index];
+
+ if (Exchange(globalInputs, block.Index, globalInputs[block.Index] | inputs))
+ {
+ modified = true;
+ }
+ }
+
+ firstPass = false;
+ }
+ while (modified);
+
+ // Insert load and store context instructions where needed.
+ foreach (BasicBlock block in cfg.Blocks)
+ {
+ // The only block without any predecessor should be the entry block.
+ // It always needs a context load as it is the first block to run.
+ if (block.Predecessors.Count == 0)
+ {
+ RegisterMask inputs = globalInputs[block.Index] | (allOutputs & ~allCmnOutputs);
+
+ LoadLocals(block, inputs, inArguments);
+ }
+
+ if (EndsWithReturn(block))
+ {
+ StoreLocals(block, allOutputs, inArguments.Count, outArguments);
+ }
+ }
+
+ return new FunctionRegisterUsage(inArguments.ToArray(), outArguments.ToArray());
+ }
+
+ public static void FixupCalls(BasicBlock[] blocks, FunctionRegisterUsage[] frus)
+ {
+ foreach (BasicBlock block in blocks)
+ {
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ Operation operation = node.Value as Operation;
+
+ if (operation.Inst == Instruction.Call)
+ {
+ Operand funcId = operation.GetSource(0);
+
+ Debug.Assert(funcId.Type == OperandType.Constant);
+
+ var fru = frus[funcId.Value];
+
+ Operand[] inRegs = new Operand[fru.InArguments.Length];
+
+ for (int i = 0; i < fru.InArguments.Length; i++)
+ {
+ inRegs[i] = OperandHelper.Register(fru.InArguments[i]);
+ }
+
+ operation.AppendSources(inRegs);
+
+ Operand[] outRegs = new Operand[1 + fru.OutArguments.Length];
+
+ for (int i = 0; i < fru.OutArguments.Length; i++)
+ {
+ outRegs[1 + i] = OperandHelper.Register(fru.OutArguments[i]);
+ }
+
+ operation.AppendDests(outRegs);
+ }
+ }
+ }
+ }
+
+ private static bool StartsWith(BasicBlock block, Instruction inst)
+ {
+ if (block.Operations.Count == 0)
+ {
+ return false;
+ }
+
+ return block.Operations.First.Value is Operation operation && operation.Inst == inst;
+ }
+
+ private static bool EndsWith(BasicBlock block, Instruction inst)
+ {
+ if (block.Operations.Count == 0)
+ {
+ return false;
+ }
+
+ return block.Operations.Last.Value is Operation operation && operation.Inst == inst;
+ }
+
+ private static RegisterMask GetMask(Register register)
+ {
+ Span<long> gprMasks = stackalloc long[4];
+ long predMask = 0;
+ long flagMask = 0;
+
+ switch (register.Type)
+ {
+ case RegisterType.Gpr:
+ gprMasks[register.Index >> 6] = 1L << (register.Index & 0x3f);
+ break;
+ case RegisterType.Predicate:
+ predMask = 1L << register.Index;
+ break;
+ case RegisterType.Flag:
+ flagMask = 1L << register.Index;
+ break;
+ }
+
+ return new RegisterMask(gprMasks[0], gprMasks[1], gprMasks[2], gprMasks[3], predMask, flagMask);
+ }
+
+ private static bool Exchange(RegisterMask[] masks, int blkIndex, RegisterMask value)
+ {
+ RegisterMask oldValue = masks[blkIndex];
+
+ masks[blkIndex] = value;
+
+ return oldValue != value;
+ }
+
+ private static void LoadLocals(BasicBlock block, RegisterMask masks, List<Register> inArguments)
+ {
+ bool fillArgsList = inArguments.Count == 0;
+ LinkedListNode<INode> node = null;
+ int argIndex = 0;
+
+ for (int i = 0; i < TotalMasks; i++)
+ {
+ (RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i);
+ long mask = masks.GetMask(i);
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ mask &= ~(1L << bit);
+
+ Register register = new Register(baseRegIndex + bit, regType);
+
+ if (fillArgsList)
+ {
+ inArguments.Add(register);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Register(register), OperandHelper.Argument(argIndex++));
+
+ if (node == null)
+ {
+ node = block.Operations.AddFirst(copyOp);
+ }
+ else
+ {
+ node = block.Operations.AddAfter(node, copyOp);
+ }
+ }
+ }
+
+ Debug.Assert(argIndex <= inArguments.Count);
+ }
+
+ private static void StoreLocals(BasicBlock block, RegisterMask masks, int inArgumentsCount, List<Register> outArguments)
+ {
+ LinkedListNode<INode> node = null;
+ int argIndex = inArgumentsCount;
+ bool fillArgsList = outArguments.Count == 0;
+
+ for (int i = 0; i < TotalMasks; i++)
+ {
+ (RegisterType regType, int baseRegIndex) = GetRegTypeAndBaseIndex(i);
+ long mask = masks.GetMask(i);
+
+ while (mask != 0)
+ {
+ int bit = BitOperations.TrailingZeroCount(mask);
+
+ mask &= ~(1L << bit);
+
+ Register register = new Register(baseRegIndex + bit, regType);
+
+ if (fillArgsList)
+ {
+ outArguments.Add(register);
+ }
+
+ Operation copyOp = new Operation(Instruction.Copy, OperandHelper.Argument(argIndex++), OperandHelper.Register(register));
+
+ if (node == null)
+ {
+ node = block.Operations.AddBefore(block.Operations.Last, copyOp);
+ }
+ else
+ {
+ node = block.Operations.AddAfter(node, copyOp);
+ }
+ }
+ }
+
+ Debug.Assert(argIndex <= inArgumentsCount + outArguments.Count);
+ }
+
+ private static (RegisterType RegType, int BaseRegIndex) GetRegTypeAndBaseIndex(int i)
+ {
+ RegisterType regType = RegisterType.Gpr;
+ int baseRegIndex = 0;
+
+ if (i < GprMasks)
+ {
+ baseRegIndex = i * sizeof(long) * 8;
+ }
+ else if (i == GprMasks)
+ {
+ regType = RegisterType.Predicate;
+ }
+ else
+ {
+ regType = RegisterType.Flag;
+ }
+
+ return (regType, baseRegIndex);
+ }
+
+ private static bool EndsWithReturn(BasicBlock block)
+ {
+ if (!(block.GetLastOp() is Operation operation))
+ {
+ return false;
+ }
+
+ return operation.Inst == Instruction.Return;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
new file mode 100644
index 00000000..91e7ace1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Rewriter.cs
@@ -0,0 +1,768 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Linq;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+using static Ryujinx.Graphics.Shader.Translation.GlobalMemory;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class Rewriter
+ {
+ public static void RunPass(BasicBlock[] blocks, ShaderConfig config)
+ {
+ bool isVertexShader = config.Stage == ShaderStage.Vertex;
+ bool hasConstantBufferDrawParameters = config.GpuAccessor.QueryHasConstantBufferDrawParameters();
+ bool supportsSnormBufferTextureFormat = config.GpuAccessor.QueryHostSupportsSnormBufferTextureFormat();
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ for (LinkedListNode<INode> node = block.Operations.First; node != null;)
+ {
+ if (node.Value is not Operation operation)
+ {
+ node = node.Next;
+ continue;
+ }
+
+ if (isVertexShader)
+ {
+ if (hasConstantBufferDrawParameters)
+ {
+ if (ReplaceConstantBufferWithDrawParameters(node, operation))
+ {
+ config.SetUsedFeature(FeatureFlags.DrawParameters);
+ }
+ }
+ else if (HasConstantBufferDrawParameters(operation))
+ {
+ config.SetUsedFeature(FeatureFlags.DrawParameters);
+ }
+ }
+
+ LinkedListNode<INode> nextNode = node.Next;
+
+ if (operation is TextureOperation texOp)
+ {
+ if (texOp.Inst == Instruction.TextureSample)
+ {
+ node = RewriteTextureSample(node, config);
+
+ if (texOp.Type == SamplerType.TextureBuffer && !supportsSnormBufferTextureFormat)
+ {
+ node = InsertSnormNormalization(node, config);
+ }
+ }
+
+ nextNode = node.Next;
+ }
+ else if (UsesGlobalMemory(operation.Inst, operation.StorageKind))
+ {
+ nextNode = RewriteGlobalAccess(node, config)?.Next ?? nextNode;
+ }
+
+ node = nextNode;
+ }
+ }
+ }
+
+ private static LinkedListNode<INode> RewriteGlobalAccess(LinkedListNode<INode> node, ShaderConfig config)
+ {
+ Operation operation = (Operation)node.Value;
+
+ bool isAtomic = operation.Inst.IsAtomic();
+ bool isStg16Or8 = operation.Inst == Instruction.StoreGlobal16 || operation.Inst == Instruction.StoreGlobal8;
+ bool isWrite = isAtomic || operation.Inst == Instruction.StoreGlobal || isStg16Or8;
+
+ Operation storageOp = null;
+
+ Operand PrependOperation(Instruction inst, params Operand[] sources)
+ {
+ Operand local = Local();
+
+ node.List.AddBefore(node, new Operation(inst, local, sources));
+
+ return local;
+ }
+
+ Operand PrependExistingOperation(Operation operation)
+ {
+ Operand local = Local();
+
+ operation.Dest = local;
+ node.List.AddBefore(node, operation);
+
+ return local;
+ }
+
+ Operand addrLow = operation.GetSource(0);
+ Operand addrHigh = operation.GetSource(1);
+
+ Operand sbBaseAddrLow = Const(0);
+ Operand sbSlot = Const(0);
+
+ Operand alignMask = Const(-config.GpuAccessor.QueryHostStorageBufferOffsetAlignment());
+
+ Operand BindingRangeCheck(int cbOffset, out Operand baseAddrLow)
+ {
+ baseAddrLow = Cbuf(0, cbOffset);
+ Operand baseAddrHigh = Cbuf(0, cbOffset + 1);
+ Operand size = Cbuf(0, cbOffset + 2);
+
+ Operand offset = PrependOperation(Instruction.Subtract, addrLow, baseAddrLow);
+ Operand borrow = PrependOperation(Instruction.CompareLessU32, addrLow, baseAddrLow);
+
+ Operand inRangeLow = PrependOperation(Instruction.CompareLessU32, offset, size);
+
+ Operand addrHighBorrowed = PrependOperation(Instruction.Add, addrHigh, borrow);
+
+ Operand inRangeHigh = PrependOperation(Instruction.CompareEqual, addrHighBorrowed, baseAddrHigh);
+
+ return PrependOperation(Instruction.BitwiseAnd, inRangeLow, inRangeHigh);
+ }
+
+ int sbUseMask = config.AccessibleStorageBuffersMask;
+
+ while (sbUseMask != 0)
+ {
+ int slot = BitOperations.TrailingZeroCount(sbUseMask);
+
+ sbUseMask &= ~(1 << slot);
+
+ config.SetUsedStorageBuffer(slot, isWrite);
+
+ int cbOffset = GetStorageCbOffset(config.Stage, slot);
+
+ Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
+
+ sbBaseAddrLow = PrependOperation(Instruction.ConditionalSelect, inRange, baseAddrLow, sbBaseAddrLow);
+ sbSlot = PrependOperation(Instruction.ConditionalSelect, inRange, Const(slot), sbSlot);
+ }
+
+ if (config.AccessibleStorageBuffersMask != 0)
+ {
+ Operand baseAddrTrunc = PrependOperation(Instruction.BitwiseAnd, sbBaseAddrLow, alignMask);
+ Operand byteOffset = PrependOperation(Instruction.Subtract, addrLow, baseAddrTrunc);
+
+ Operand[] sources = new Operand[operation.SourcesCount];
+
+ sources[0] = sbSlot;
+
+ if (isStg16Or8)
+ {
+ sources[1] = byteOffset;
+ }
+ else
+ {
+ sources[1] = PrependOperation(Instruction.ShiftRightU32, byteOffset, Const(2));
+ }
+
+ for (int index = 2; index < operation.SourcesCount; index++)
+ {
+ sources[index] = operation.GetSource(index);
+ }
+
+ if (isAtomic)
+ {
+ storageOp = new Operation(operation.Inst, StorageKind.StorageBuffer, operation.Dest, sources);
+ }
+ else if (operation.Inst == Instruction.LoadGlobal)
+ {
+ storageOp = new Operation(Instruction.LoadStorage, operation.Dest, sources);
+ }
+ else
+ {
+ Instruction storeInst = operation.Inst switch
+ {
+ Instruction.StoreGlobal16 => Instruction.StoreStorage16,
+ Instruction.StoreGlobal8 => Instruction.StoreStorage8,
+ _ => Instruction.StoreStorage
+ };
+
+ storageOp = new Operation(storeInst, null, sources);
+ }
+ }
+ else if (operation.Dest != null)
+ {
+ storageOp = new Operation(Instruction.Copy, operation.Dest, Const(0));
+ }
+
+ if (operation.Inst == Instruction.LoadGlobal)
+ {
+ int cbeUseMask = config.AccessibleConstantBuffersMask;
+
+ while (cbeUseMask != 0)
+ {
+ int slot = BitOperations.TrailingZeroCount(cbeUseMask);
+ int cbSlot = UbeFirstCbuf + slot;
+
+ cbeUseMask &= ~(1 << slot);
+
+ config.SetUsedConstantBuffer(cbSlot);
+
+ Operand previousResult = PrependExistingOperation(storageOp);
+
+ int cbOffset = GetConstantUbeOffset(slot);
+
+ Operand inRange = BindingRangeCheck(cbOffset, out Operand baseAddrLow);
+
+ Operand baseAddrTruncConst = PrependOperation(Instruction.BitwiseAnd, baseAddrLow, alignMask);
+ Operand byteOffsetConst = PrependOperation(Instruction.Subtract, addrLow, baseAddrTruncConst);
+
+ Operand cbIndex = PrependOperation(Instruction.ShiftRightU32, byteOffsetConst, Const(2));
+
+ Operand[] sourcesCb = new Operand[operation.SourcesCount];
+
+ sourcesCb[0] = Const(cbSlot);
+ sourcesCb[1] = cbIndex;
+
+ for (int index = 2; index < operation.SourcesCount; index++)
+ {
+ sourcesCb[index] = operation.GetSource(index);
+ }
+
+ Operand ldcResult = PrependOperation(Instruction.LoadConstant, sourcesCb);
+
+ storageOp = new Operation(Instruction.ConditionalSelect, operation.Dest, inRange, ldcResult, previousResult);
+ }
+ }
+
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, null);
+ }
+
+ LinkedListNode<INode> oldNode = node;
+ LinkedList<INode> oldNodeList = oldNode.List;
+
+ if (storageOp != null)
+ {
+ node = node.List.AddBefore(node, storageOp);
+ }
+ else
+ {
+ node = null;
+ }
+
+ oldNodeList.Remove(oldNode);
+
+ return node;
+ }
+
+ private static LinkedListNode<INode> RewriteTextureSample(LinkedListNode<INode> node, ShaderConfig config)
+ {
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
+ bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
+
+ bool hasInvalidOffset = (hasOffset || hasOffsets) && !config.GpuAccessor.QueryHostSupportsNonConstantTextureOffset();
+
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+
+ bool isCoordNormalized = isBindless || config.GpuAccessor.QueryTextureCoordNormalized(texOp.Handle, texOp.CbufSlot);
+
+ if (!hasInvalidOffset && isCoordNormalized)
+ {
+ return node;
+ }
+
+ bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
+ bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
+ bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+ bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
+ bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
+
+ bool isArray = (texOp.Type & SamplerType.Array) != 0;
+ bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0;
+ bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
+ bool isShadow = (texOp.Type & SamplerType.Shadow) != 0;
+
+ int coordsCount = texOp.Type.GetDimensions();
+
+ int offsetsCount;
+
+ if (hasOffsets)
+ {
+ offsetsCount = coordsCount * 4;
+ }
+ else if (hasOffset)
+ {
+ offsetsCount = coordsCount;
+ }
+ else
+ {
+ offsetsCount = 0;
+ }
+
+ Operand[] offsets = new Operand[offsetsCount];
+ Operand[] sources = new Operand[texOp.SourcesCount - offsetsCount];
+
+ int copyCount = 0;
+
+ if (isBindless || isIndexed)
+ {
+ copyCount++;
+ }
+
+ Operand[] lodSources = new Operand[copyCount + coordsCount];
+
+ for (int index = 0; index < lodSources.Length; index++)
+ {
+ lodSources[index] = texOp.GetSource(index);
+ }
+
+ copyCount += coordsCount;
+
+ if (isArray)
+ {
+ copyCount++;
+ }
+
+ if (isShadow)
+ {
+ copyCount++;
+ }
+
+ if (hasDerivatives)
+ {
+ copyCount += coordsCount * 2;
+ }
+
+ if (isMultisample)
+ {
+ copyCount++;
+ }
+ else if (hasLodLevel)
+ {
+ copyCount++;
+ }
+
+ int srcIndex = 0;
+ int dstIndex = 0;
+
+ for (int index = 0; index < copyCount; index++)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ bool areAllOffsetsConstant = true;
+
+ for (int index = 0; index < offsetsCount; index++)
+ {
+ Operand offset = texOp.GetSource(srcIndex++);
+
+ areAllOffsetsConstant &= offset.Type == OperandType.Constant;
+
+ offsets[index] = offset;
+ }
+
+ hasInvalidOffset &= !areAllOffsetsConstant;
+
+ if (!hasInvalidOffset && isCoordNormalized)
+ {
+ return node;
+ }
+
+ if (hasLodBias)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ if (isGather && !isShadow)
+ {
+ sources[dstIndex++] = texOp.GetSource(srcIndex++);
+ }
+
+ int coordsIndex = isBindless || isIndexed ? 1 : 0;
+
+ int componentIndex = texOp.Index;
+
+ Operand Float(Operand value)
+ {
+ Operand res = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.ConvertS32ToFP32, res, value));
+
+ return res;
+ }
+
+ // Emulate non-normalized coordinates by normalizing the coordinates on the shader.
+ // Without normalization, the coordinates are expected to the in the [0, W or H] range,
+ // and otherwise, it is expected to be in the [0, 1] range.
+ // We normalize by dividing the coords by the texture size.
+ if (!isCoordNormalized && !intCoords)
+ {
+ config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ int normCoordsCount = (texOp.Type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : coordsCount;
+
+ for (int index = 0; index < normCoordsCount; index++)
+ {
+ Operand coordSize = Local();
+
+ Operand[] texSizeSources;
+
+ if (isBindless || isIndexed)
+ {
+ texSizeSources = new Operand[] { sources[0], Const(0) };
+ }
+ else
+ {
+ texSizeSources = new Operand[] { Const(0) };
+ }
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.TextureSize,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.CbufSlot,
+ texOp.Handle,
+ index,
+ new[] { coordSize },
+ texSizeSources));
+
+ config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
+
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordNormalized = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, coordNormalized, source, Float(coordSize)));
+
+ sources[coordsIndex + index] = coordNormalized;
+ }
+ }
+
+ Operand[] dests = new Operand[texOp.DestsCount];
+
+ for (int i = 0; i < texOp.DestsCount; i++)
+ {
+ dests[i] = texOp.GetDest(i);
+ }
+
+ Operand bindlessHandle = isBindless || isIndexed ? sources[0] : null;
+
+ LinkedListNode<INode> oldNode = node;
+
+ // Technically, non-constant texture offsets are not allowed (according to the spec),
+ // however some GPUs does support that.
+ // For GPUs where it is not supported, we can replace the instruction with the following:
+ // For texture*Offset, we replace it by texture*, and add the offset to the P coords.
+ // The offset can be calculated as offset / textureSize(lod), where lod = textureQueryLod(coords).
+ // For texelFetchOffset, we replace it by texelFetch and add the offset to the P coords directly.
+ // For textureGatherOffset, we split the operation into up to 4 operations, one for each component
+ // that is accessed, where each textureGather operation has a different offset for each pixel.
+ if (hasInvalidOffset && isGather && !isShadow)
+ {
+ config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ Operand[] newSources = new Operand[sources.Length];
+
+ sources.CopyTo(newSources, 0);
+
+ Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount);
+
+ int destIndex = 0;
+
+ for (int compIndex = 0; compIndex < 4; compIndex++)
+ {
+ if (((texOp.Index >> compIndex) & 1) == 0)
+ {
+ continue;
+ }
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
+
+ Operand offset = Local();
+
+ Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)];
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index])));
+
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
+
+ newSources[coordsIndex + index] = coordPlusOffset;
+ }
+
+ TextureOperation newTexOp = new TextureOperation(
+ Instruction.TextureSample,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
+ texOp.CbufSlot,
+ texOp.Handle,
+ 1,
+ new[] { dests[destIndex++] },
+ newSources);
+
+ node = node.List.AddBefore(node, newTexOp);
+ }
+ }
+ else
+ {
+ if (hasInvalidOffset)
+ {
+ if (intCoords)
+ {
+ for (int index = 0; index < coordsCount; index++)
+ {
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.Add, coordPlusOffset, source, offsets[index]));
+
+ sources[coordsIndex + index] = coordPlusOffset;
+ }
+ }
+ else
+ {
+ config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ Operand[] texSizes = InsertTextureSize(node, texOp, lodSources, bindlessHandle, coordsCount);
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ config.SetUsedTexture(Instruction.TextureSize, texOp.Type, texOp.Format, texOp.Flags, texOp.CbufSlot, texOp.Handle);
+
+ Operand offset = Local();
+
+ Operand intOffset = offsets[index];
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Divide, offset, Float(intOffset), Float(texSizes[index])));
+
+ Operand source = sources[coordsIndex + index];
+
+ Operand coordPlusOffset = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.FP32 | Instruction.Add, coordPlusOffset, source, offset));
+
+ sources[coordsIndex + index] = coordPlusOffset;
+ }
+ }
+ }
+
+ TextureOperation newTexOp = new TextureOperation(
+ Instruction.TextureSample,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
+ texOp.CbufSlot,
+ texOp.Handle,
+ componentIndex,
+ dests,
+ sources);
+
+ node = node.List.AddBefore(node, newTexOp);
+ }
+
+ node.List.Remove(oldNode);
+
+ for (int index = 0; index < texOp.SourcesCount; index++)
+ {
+ texOp.SetSource(index, null);
+ }
+
+ return node;
+ }
+
+ private static Operand[] InsertTextureSize(
+ LinkedListNode<INode> node,
+ TextureOperation texOp,
+ Operand[] lodSources,
+ Operand bindlessHandle,
+ int coordsCount)
+ {
+ Operand Int(Operand value)
+ {
+ Operand res = Local();
+
+ node.List.AddBefore(node, new Operation(Instruction.ConvertFP32ToS32, res, value));
+
+ return res;
+ }
+
+ Operand[] texSizes = new Operand[coordsCount];
+
+ Operand lod = Local();
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.Lod,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.CbufSlot,
+ texOp.Handle,
+ 0,
+ new[] { lod },
+ lodSources));
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ texSizes[index] = Local();
+
+ Operand[] texSizeSources;
+
+ if (bindlessHandle != null)
+ {
+ texSizeSources = new Operand[] { bindlessHandle, Int(lod) };
+ }
+ else
+ {
+ texSizeSources = new Operand[] { Int(lod) };
+ }
+
+ node.List.AddBefore(node, new TextureOperation(
+ Instruction.TextureSize,
+ texOp.Type,
+ texOp.Format,
+ texOp.Flags,
+ texOp.CbufSlot,
+ texOp.Handle,
+ index,
+ new[] { texSizes[index] },
+ texSizeSources));
+ }
+
+ return texSizes;
+ }
+
+ private static LinkedListNode<INode> InsertSnormNormalization(LinkedListNode<INode> node, ShaderConfig config)
+ {
+ TextureOperation texOp = (TextureOperation)node.Value;
+
+ // We can't query the format of a bindless texture,
+ // because the handle is unknown, it can have any format.
+ if (texOp.Flags.HasFlag(TextureFlags.Bindless))
+ {
+ return node;
+ }
+
+ TextureFormat format = config.GpuAccessor.QueryTextureFormat(texOp.Handle, texOp.CbufSlot);
+
+ int maxPositive = format switch
+ {
+ TextureFormat.R8Snorm => sbyte.MaxValue,
+ TextureFormat.R8G8Snorm => sbyte.MaxValue,
+ TextureFormat.R8G8B8A8Snorm => sbyte.MaxValue,
+ TextureFormat.R16Snorm => short.MaxValue,
+ TextureFormat.R16G16Snorm => short.MaxValue,
+ TextureFormat.R16G16B16A16Snorm => short.MaxValue,
+ _ => 0
+ };
+
+ // The value being 0 means that the format is not a SNORM format,
+ // so there's nothing to do here.
+ if (maxPositive == 0)
+ {
+ return node;
+ }
+
+ // Do normalization. We assume SINT formats are being used
+ // as replacement for SNORM (which is not supported).
+ for (int i = 0; i < texOp.DestsCount; i++)
+ {
+ Operand dest = texOp.GetDest(i);
+
+ INode[] uses = dest.UseOps.ToArray();
+
+ Operation convOp = new Operation(Instruction.ConvertS32ToFP32, Local(), dest);
+ Operation normOp = new Operation(Instruction.FP32 | Instruction.Multiply, Local(), convOp.Dest, ConstF(1f / maxPositive));
+
+ node = node.List.AddAfter(node, convOp);
+ node = node.List.AddAfter(node, normOp);
+
+ foreach (INode useOp in uses)
+ {
+ if (useOp is not Operation op)
+ {
+ continue;
+ }
+
+ // Replace all uses of the texture pixel value with the normalized value.
+ for (int index = 0; index < op.SourcesCount; index++)
+ {
+ if (op.GetSource(index) == dest)
+ {
+ op.SetSource(index, normOp.Dest);
+ }
+ }
+ }
+ }
+
+ return node;
+ }
+
+ private static bool ReplaceConstantBufferWithDrawParameters(LinkedListNode<INode> node, Operation operation)
+ {
+ Operand GenerateLoad(IoVariable ioVariable)
+ {
+ Operand value = Local();
+ node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, value, Const((int)ioVariable)));
+ return value;
+ }
+
+ bool modified = false;
+
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand src = operation.GetSource(srcIndex);
+
+ if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
+ {
+ switch (src.GetCbufOffset())
+ {
+ case Constants.NvnBaseVertexByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseVertex));
+ modified = true;
+ break;
+ case Constants.NvnBaseInstanceByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.BaseInstance));
+ modified = true;
+ break;
+ case Constants.NvnDrawIndexByteOffset / 4:
+ operation.SetSource(srcIndex, GenerateLoad(IoVariable.DrawIndex));
+ modified = true;
+ break;
+ }
+ }
+ }
+
+ return modified;
+ }
+
+ private static bool HasConstantBufferDrawParameters(Operation operation)
+ {
+ for (int srcIndex = 0; srcIndex < operation.SourcesCount; srcIndex++)
+ {
+ Operand src = operation.GetSource(srcIndex);
+
+ if (src.Type == OperandType.ConstantBuffer && src.GetCbufSlot() == 0)
+ {
+ switch (src.GetCbufOffset())
+ {
+ case Constants.NvnBaseVertexByteOffset / 4:
+ case Constants.NvnBaseInstanceByteOffset / 4:
+ case Constants.NvnDrawIndexByteOffset / 4:
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
new file mode 100644
index 00000000..22f5a671
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderConfig.cs
@@ -0,0 +1,944 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ class ShaderConfig
+ {
+ // TODO: Non-hardcoded array size.
+ public const int SamplerArraySize = 4;
+
+ private const int ThreadsPerWarp = 32;
+
+ public ShaderStage Stage { get; }
+
+ public bool GpPassthrough { get; }
+ public bool LastInVertexPipeline { get; private set; }
+
+ public bool HasLayerInputAttribute { get; private set; }
+ public int GpLayerInputAttribute { get; private set; }
+ public int ThreadsPerInputPrimitive { get; }
+
+ public OutputTopology OutputTopology { get; }
+
+ public int MaxOutputVertices { get; }
+
+ public int LocalMemorySize { get; }
+
+ public ImapPixelType[] ImapTypes { get; }
+
+ public int OmapTargets { get; }
+ public bool OmapSampleMask { get; }
+ public bool OmapDepth { get; }
+
+ public IGpuAccessor GpuAccessor { get; }
+
+ public TranslationOptions Options { get; }
+
+ public bool TransformFeedbackEnabled { get; }
+
+ private TransformFeedbackOutput[] _transformFeedbackOutputs;
+
+ readonly struct TransformFeedbackVariable : IEquatable<TransformFeedbackVariable>
+ {
+ public IoVariable IoVariable { get; }
+ public int Location { get; }
+ public int Component { get; }
+
+ public TransformFeedbackVariable(IoVariable ioVariable, int location = 0, int component = 0)
+ {
+ IoVariable = ioVariable;
+ Location = location;
+ Component = component;
+ }
+
+ public override bool Equals(object other)
+ {
+ return other is TransformFeedbackVariable tfbVar && Equals(tfbVar);
+ }
+
+ public bool Equals(TransformFeedbackVariable other)
+ {
+ return IoVariable == other.IoVariable &&
+ Location == other.Location &&
+ Component == other.Component;
+ }
+
+ public override int GetHashCode()
+ {
+ return (int)IoVariable | (Location << 8) | (Component << 16);
+ }
+
+ public override string ToString()
+ {
+ return $"{IoVariable}.{Location}.{Component}";
+ }
+ }
+
+ private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions;
+
+ public int Size { get; private set; }
+
+ public byte ClipDistancesWritten { get; private set; }
+
+ public FeatureFlags UsedFeatures { get; private set; }
+
+ public int Cb1DataSize { get; private set; }
+
+ public bool LayerOutputWritten { get; private set; }
+ public int LayerOutputAttribute { get; private set; }
+
+ public bool NextUsesFixedFuncAttributes { get; private set; }
+ public int UsedInputAttributes { get; private set; }
+ public int UsedOutputAttributes { get; private set; }
+ public HashSet<int> UsedInputAttributesPerPatch { get; }
+ public HashSet<int> UsedOutputAttributesPerPatch { get; }
+ public HashSet<int> NextUsedInputAttributesPerPatch { get; private set; }
+ public int PassthroughAttributes { get; private set; }
+ private int _nextUsedInputAttributes;
+ private int _thisUsedInputAttributes;
+ private Dictionary<int, int> _perPatchAttributeLocations;
+
+ public UInt128 NextInputAttributesComponents { get; private set; }
+ public UInt128 ThisInputAttributesComponents { get; private set; }
+
+ public int AccessibleStorageBuffersMask { get; private set; }
+ public int AccessibleConstantBuffersMask { get; private set; }
+
+ private int _usedConstantBuffers;
+ private int _usedStorageBuffers;
+ private int _usedStorageBuffersWrite;
+
+ private readonly record struct TextureInfo(int CbufSlot, int Handle, bool Indexed, TextureFormat Format);
+
+ private struct TextureMeta
+ {
+ public bool AccurateType;
+ public SamplerType Type;
+ public TextureUsageFlags UsageFlags;
+ }
+
+ private readonly Dictionary<TextureInfo, TextureMeta> _usedTextures;
+ private readonly Dictionary<TextureInfo, TextureMeta> _usedImages;
+
+ private BufferDescriptor[] _cachedConstantBufferDescriptors;
+ private BufferDescriptor[] _cachedStorageBufferDescriptors;
+ private TextureDescriptor[] _cachedTextureDescriptors;
+ private TextureDescriptor[] _cachedImageDescriptors;
+
+ private int _firstConstantBufferBinding;
+ private int _firstStorageBufferBinding;
+
+ public int FirstConstantBufferBinding => _firstConstantBufferBinding;
+ public int FirstStorageBufferBinding => _firstStorageBufferBinding;
+
+ public ShaderConfig(IGpuAccessor gpuAccessor, TranslationOptions options)
+ {
+ Stage = ShaderStage.Compute;
+ GpuAccessor = gpuAccessor;
+ Options = options;
+
+ _transformFeedbackDefinitions = new Dictionary<TransformFeedbackVariable, TransformFeedbackOutput>();
+
+ AccessibleStorageBuffersMask = (1 << GlobalMemory.StorageMaxCount) - 1;
+ AccessibleConstantBuffersMask = (1 << GlobalMemory.UbeMaxCount) - 1;
+
+ UsedInputAttributesPerPatch = new HashSet<int>();
+ UsedOutputAttributesPerPatch = new HashSet<int>();
+
+ _usedTextures = new Dictionary<TextureInfo, TextureMeta>();
+ _usedImages = new Dictionary<TextureInfo, TextureMeta>();
+ }
+
+ public ShaderConfig(
+ ShaderStage stage,
+ OutputTopology outputTopology,
+ int maxOutputVertices,
+ IGpuAccessor gpuAccessor,
+ TranslationOptions options) : this(gpuAccessor, options)
+ {
+ Stage = stage;
+ ThreadsPerInputPrimitive = 1;
+ OutputTopology = outputTopology;
+ MaxOutputVertices = maxOutputVertices;
+ TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
+
+ if (Stage != ShaderStage.Compute)
+ {
+ AccessibleConstantBuffersMask = 0;
+ }
+ }
+
+ public ShaderConfig(ShaderHeader header, IGpuAccessor gpuAccessor, TranslationOptions options) : this(gpuAccessor, options)
+ {
+ Stage = header.Stage;
+ GpPassthrough = header.Stage == ShaderStage.Geometry && header.GpPassthrough;
+ ThreadsPerInputPrimitive = header.ThreadsPerInputPrimitive;
+ OutputTopology = header.OutputTopology;
+ MaxOutputVertices = header.MaxOutputVertexCount;
+ LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize + (header.ShaderLocalMemoryCrsSize / ThreadsPerWarp);
+ ImapTypes = header.ImapTypes;
+ OmapTargets = header.OmapTargets;
+ OmapSampleMask = header.OmapSampleMask;
+ OmapDepth = header.OmapDepth;
+ TransformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled();
+ LastInVertexPipeline = header.Stage < ShaderStage.Fragment;
+ }
+
+ private void EnsureTransformFeedbackInitialized()
+ {
+ if (HasTransformFeedbackOutputs() && _transformFeedbackOutputs == null)
+ {
+ TransformFeedbackOutput[] transformFeedbackOutputs = new TransformFeedbackOutput[0xc0];
+ ulong vecMap = 0UL;
+
+ for (int tfbIndex = 0; tfbIndex < 4; tfbIndex++)
+ {
+ var locations = GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex);
+ var stride = GpuAccessor.QueryTransformFeedbackStride(tfbIndex);
+
+ for (int i = 0; i < locations.Length; i++)
+ {
+ byte wordOffset = locations[i];
+ if (wordOffset < 0xc0)
+ {
+ transformFeedbackOutputs[wordOffset] = new TransformFeedbackOutput(tfbIndex, i * 4, stride);
+ vecMap |= 1UL << (wordOffset / 4);
+ }
+ }
+ }
+
+ _transformFeedbackOutputs = transformFeedbackOutputs;
+
+ while (vecMap != 0)
+ {
+ int vecIndex = BitOperations.TrailingZeroCount(vecMap);
+
+ for (int subIndex = 0; subIndex < 4; subIndex++)
+ {
+ int wordOffset = vecIndex * 4 + subIndex;
+ int byteOffset = wordOffset * 4;
+
+ if (transformFeedbackOutputs[wordOffset].Valid)
+ {
+ IoVariable ioVariable = Instructions.AttributeMap.GetIoVariable(this, byteOffset, out int location);
+ int component = 0;
+
+ if (HasPerLocationInputOrOutputComponent(ioVariable, location, subIndex, isOutput: true))
+ {
+ component = subIndex;
+ }
+
+ var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
+ _transformFeedbackDefinitions.TryAdd(transformFeedbackVariable, transformFeedbackOutputs[wordOffset]);
+ }
+ }
+
+ vecMap &= ~(1UL << vecIndex);
+ }
+ }
+ }
+
+ public TransformFeedbackOutput[] GetTransformFeedbackOutputs()
+ {
+ EnsureTransformFeedbackInitialized();
+ return _transformFeedbackOutputs;
+ }
+
+ public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput)
+ {
+ EnsureTransformFeedbackInitialized();
+ var transformFeedbackVariable = new TransformFeedbackVariable(ioVariable, location, component);
+ return _transformFeedbackDefinitions.TryGetValue(transformFeedbackVariable, out transformFeedbackOutput);
+ }
+
+ private bool HasTransformFeedbackOutputs()
+ {
+ return TransformFeedbackEnabled && (LastInVertexPipeline || Stage == ShaderStage.Fragment);
+ }
+
+ public bool HasTransformFeedbackOutputs(bool isOutput)
+ {
+ return TransformFeedbackEnabled && ((isOutput && LastInVertexPipeline) || (!isOutput && Stage == ShaderStage.Fragment));
+ }
+
+ public bool HasPerLocationInputOrOutput(IoVariable ioVariable, bool isOutput)
+ {
+ if (ioVariable == IoVariable.UserDefined)
+ {
+ return (!isOutput && !UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
+ (isOutput && !UsedFeatures.HasFlag(FeatureFlags.OaIndexing));
+ }
+
+ return ioVariable == IoVariable.FragmentOutputColor;
+ }
+
+ public bool HasPerLocationInputOrOutputComponent(IoVariable ioVariable, int location, int component, bool isOutput)
+ {
+ if (ioVariable != IoVariable.UserDefined || !HasTransformFeedbackOutputs(isOutput))
+ {
+ return false;
+ }
+
+ return GetTransformFeedbackOutputComponents(location, component) == 1;
+ }
+
+ public TransformFeedbackOutput GetTransformFeedbackOutput(int wordOffset)
+ {
+ EnsureTransformFeedbackInitialized();
+
+ return _transformFeedbackOutputs[wordOffset];
+ }
+
+ public TransformFeedbackOutput GetTransformFeedbackOutput(int location, int component)
+ {
+ return GetTransformFeedbackOutput((AttributeConsts.UserAttributeBase / 4) + location * 4 + component);
+ }
+
+ public int GetTransformFeedbackOutputComponents(int location, int component)
+ {
+ EnsureTransformFeedbackInitialized();
+
+ int baseIndex = (AttributeConsts.UserAttributeBase / 4) + location * 4;
+ int index = baseIndex + component;
+ int count = 1;
+
+ for (; count < 4; count++)
+ {
+ ref var prev = ref _transformFeedbackOutputs[baseIndex + count - 1];
+ ref var curr = ref _transformFeedbackOutputs[baseIndex + count];
+
+ int prevOffset = prev.Offset;
+ int currOffset = curr.Offset;
+
+ if (!prev.Valid || !curr.Valid || prevOffset + 4 != currOffset)
+ {
+ break;
+ }
+ }
+
+ if (baseIndex + count <= index)
+ {
+ return 1;
+ }
+
+ return count;
+ }
+
+ public AggregateType GetFragmentOutputColorType(int location)
+ {
+ return AggregateType.Vector4 | GpuAccessor.QueryFragmentOutputType(location).ToAggregateType();
+ }
+
+ public AggregateType GetUserDefinedType(int location, bool isOutput)
+ {
+ if ((!isOutput && UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) ||
+ (isOutput && UsedFeatures.HasFlag(FeatureFlags.OaIndexing)))
+ {
+ return AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32;
+ }
+
+ AggregateType type = AggregateType.Vector4;
+
+ if (Stage == ShaderStage.Vertex && !isOutput)
+ {
+ type |= GpuAccessor.QueryAttributeType(location).ToAggregateType();
+ }
+ else
+ {
+ type |= AggregateType.FP32;
+ }
+
+ return type;
+ }
+
+ public int GetDepthRegister()
+ {
+ // The depth register is always two registers after the last color output.
+ return BitOperations.PopCount((uint)OmapTargets) + 1;
+ }
+
+ public uint ConstantBuffer1Read(int offset)
+ {
+ if (Cb1DataSize < offset + 4)
+ {
+ Cb1DataSize = offset + 4;
+ }
+
+ return GpuAccessor.ConstantBuffer1Read(offset);
+ }
+
+ public TextureFormat GetTextureFormat(int handle, int cbufSlot = -1)
+ {
+ // When the formatted load extension is supported, we don't need to
+ // specify a format, we can just declare it without a format and the GPU will handle it.
+ if (GpuAccessor.QueryHostSupportsImageLoadFormatted())
+ {
+ return TextureFormat.Unknown;
+ }
+
+ var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
+
+ if (format == TextureFormat.Unknown)
+ {
+ GpuAccessor.Log($"Unknown format for texture {handle}.");
+
+ format = TextureFormat.R8G8B8A8Unorm;
+ }
+
+ return format;
+ }
+
+ private static bool FormatSupportsAtomic(TextureFormat format)
+ {
+ return format == TextureFormat.R32Sint || format == TextureFormat.R32Uint;
+ }
+
+ public TextureFormat GetTextureFormatAtomic(int handle, int cbufSlot = -1)
+ {
+ // Atomic image instructions do not support GL_EXT_shader_image_load_formatted,
+ // and must have a type specified. Default to R32Sint if not available.
+
+ var format = GpuAccessor.QueryTextureFormat(handle, cbufSlot);
+
+ if (!FormatSupportsAtomic(format))
+ {
+ GpuAccessor.Log($"Unsupported format for texture {handle}: {format}.");
+
+ format = TextureFormat.R32Sint;
+ }
+
+ return format;
+ }
+
+ public void SizeAdd(int size)
+ {
+ Size += size;
+ }
+
+ public void InheritFrom(ShaderConfig other)
+ {
+ ClipDistancesWritten |= other.ClipDistancesWritten;
+ UsedFeatures |= other.UsedFeatures;
+
+ UsedInputAttributes |= other.UsedInputAttributes;
+ UsedOutputAttributes |= other.UsedOutputAttributes;
+ _usedConstantBuffers |= other._usedConstantBuffers;
+ _usedStorageBuffers |= other._usedStorageBuffers;
+ _usedStorageBuffersWrite |= other._usedStorageBuffersWrite;
+
+ foreach (var kv in other._usedTextures)
+ {
+ if (!_usedTextures.TryAdd(kv.Key, kv.Value))
+ {
+ _usedTextures[kv.Key] = MergeTextureMeta(kv.Value, _usedTextures[kv.Key]);
+ }
+ }
+
+ foreach (var kv in other._usedImages)
+ {
+ if (!_usedImages.TryAdd(kv.Key, kv.Value))
+ {
+ _usedImages[kv.Key] = MergeTextureMeta(kv.Value, _usedImages[kv.Key]);
+ }
+ }
+ }
+
+ public void SetLayerOutputAttribute(int attr)
+ {
+ LayerOutputWritten = true;
+ LayerOutputAttribute = attr;
+ }
+
+ public void SetGeometryShaderLayerInputAttribute(int attr)
+ {
+ HasLayerInputAttribute = true;
+ GpLayerInputAttribute = attr;
+ }
+
+ public void SetLastInVertexPipeline()
+ {
+ LastInVertexPipeline = true;
+ }
+
+ public void SetInputUserAttributeFixedFunc(int index)
+ {
+ UsedInputAttributes |= 1 << index;
+ }
+
+ public void SetOutputUserAttributeFixedFunc(int index)
+ {
+ UsedOutputAttributes |= 1 << index;
+ }
+
+ public void SetInputUserAttribute(int index, int component)
+ {
+ int mask = 1 << index;
+
+ UsedInputAttributes |= mask;
+ _thisUsedInputAttributes |= mask;
+ ThisInputAttributesComponents |= UInt128.One << (index * 4 + component);
+ }
+
+ public void SetInputUserAttributePerPatch(int index)
+ {
+ UsedInputAttributesPerPatch.Add(index);
+ }
+
+ public void SetOutputUserAttribute(int index)
+ {
+ UsedOutputAttributes |= 1 << index;
+ }
+
+ public void SetOutputUserAttributePerPatch(int index)
+ {
+ UsedOutputAttributesPerPatch.Add(index);
+ }
+
+ public void MergeFromtNextStage(ShaderConfig config)
+ {
+ NextInputAttributesComponents = config.ThisInputAttributesComponents;
+ NextUsedInputAttributesPerPatch = config.UsedInputAttributesPerPatch;
+ NextUsesFixedFuncAttributes = config.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr);
+ MergeOutputUserAttributes(config.UsedInputAttributes, config.UsedInputAttributesPerPatch);
+
+ if (UsedOutputAttributesPerPatch.Count != 0)
+ {
+ // Regular and per-patch input/output locations can't overlap,
+ // so we must assign on our location using unused regular input/output locations.
+
+ Dictionary<int, int> locationsMap = new Dictionary<int, int>();
+
+ int freeMask = ~UsedOutputAttributes;
+
+ foreach (int attr in UsedOutputAttributesPerPatch)
+ {
+ int location = BitOperations.TrailingZeroCount(freeMask);
+ if (location == 32)
+ {
+ config.GpuAccessor.Log($"No enough free locations for patch input/output 0x{attr:X}.");
+ break;
+ }
+
+ locationsMap.Add(attr, location);
+ freeMask &= ~(1 << location);
+ }
+
+ // Both stages must agree on the locations, so use the same "map" for both.
+ _perPatchAttributeLocations = locationsMap;
+ config._perPatchAttributeLocations = locationsMap;
+ }
+
+ // We don't consider geometry shaders using the geometry shader passthrough feature
+ // as being the last because when this feature is used, it can't actually modify any of the outputs,
+ // so the stage that comes before it is the last one that can do modifications.
+ if (config.Stage != ShaderStage.Fragment && (config.Stage != ShaderStage.Geometry || !config.GpPassthrough))
+ {
+ LastInVertexPipeline = false;
+ }
+ }
+
+ public void MergeOutputUserAttributes(int mask, IEnumerable<int> perPatch)
+ {
+ _nextUsedInputAttributes = mask;
+
+ if (GpPassthrough)
+ {
+ PassthroughAttributes = mask & ~UsedOutputAttributes;
+ }
+ else
+ {
+ UsedOutputAttributes |= mask;
+ UsedOutputAttributesPerPatch.UnionWith(perPatch);
+ }
+ }
+
+ public int GetPerPatchAttributeLocation(int index)
+ {
+ if (_perPatchAttributeLocations == null || !_perPatchAttributeLocations.TryGetValue(index, out int location))
+ {
+ return index;
+ }
+
+ return location;
+ }
+
+ public bool IsUsedOutputAttribute(int attr)
+ {
+ // The check for fixed function attributes on the next stage is conservative,
+ // returning false if the output is just not used by the next stage is also valid.
+ if (NextUsesFixedFuncAttributes &&
+ attr >= AttributeConsts.UserAttributeBase &&
+ attr < AttributeConsts.UserAttributeEnd)
+ {
+ int index = (attr - AttributeConsts.UserAttributeBase) >> 4;
+ return (_nextUsedInputAttributes & (1 << index)) != 0;
+ }
+
+ return true;
+ }
+
+ public int GetFreeUserAttribute(bool isOutput, int index)
+ {
+ int useMask = isOutput ? _nextUsedInputAttributes : _thisUsedInputAttributes;
+ int bit = -1;
+
+ while (useMask != -1)
+ {
+ bit = BitOperations.TrailingZeroCount(~useMask);
+
+ if (bit == 32)
+ {
+ bit = -1;
+ break;
+ }
+ else if (index < 1)
+ {
+ break;
+ }
+
+ useMask |= 1 << bit;
+ index--;
+ }
+
+ return bit;
+ }
+
+ public void SetAllInputUserAttributes()
+ {
+ UsedInputAttributes |= Constants.AllAttributesMask;
+ ThisInputAttributesComponents |= ~UInt128.Zero >> (128 - Constants.MaxAttributes * 4);
+ }
+
+ public void SetAllOutputUserAttributes()
+ {
+ UsedOutputAttributes |= Constants.AllAttributesMask;
+ }
+
+ public void SetClipDistanceWritten(int index)
+ {
+ ClipDistancesWritten |= (byte)(1 << index);
+ }
+
+ public void SetUsedFeature(FeatureFlags flags)
+ {
+ UsedFeatures |= flags;
+ }
+
+ public void SetAccessibleBufferMasks(int sbMask, int ubeMask)
+ {
+ AccessibleStorageBuffersMask = sbMask;
+ AccessibleConstantBuffersMask = ubeMask;
+ }
+
+ public void SetUsedConstantBuffer(int slot)
+ {
+ _usedConstantBuffers |= 1 << slot;
+ }
+
+ public void SetUsedStorageBuffer(int slot, bool write)
+ {
+ int mask = 1 << slot;
+ _usedStorageBuffers |= mask;
+
+ if (write)
+ {
+ _usedStorageBuffersWrite |= mask;
+ }
+ }
+
+ public void SetUsedTexture(
+ Instruction inst,
+ SamplerType type,
+ TextureFormat format,
+ TextureFlags flags,
+ int cbufSlot,
+ int handle)
+ {
+ inst &= Instruction.Mask;
+ bool isImage = inst == Instruction.ImageLoad || inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
+ bool isWrite = inst == Instruction.ImageStore || inst == Instruction.ImageAtomic;
+ bool accurateType = inst != Instruction.Lod && inst != Instruction.TextureSize;
+ bool coherent = flags.HasFlag(TextureFlags.Coherent);
+
+ if (isImage)
+ {
+ SetUsedTextureOrImage(_usedImages, cbufSlot, handle, type, format, true, isWrite, false, coherent);
+ }
+ else
+ {
+ bool intCoords = flags.HasFlag(TextureFlags.IntCoords) || inst == Instruction.TextureSize;
+ SetUsedTextureOrImage(_usedTextures, cbufSlot, handle, type, TextureFormat.Unknown, intCoords, false, accurateType, coherent);
+ }
+
+ GpuAccessor.RegisterTexture(handle, cbufSlot);
+ }
+
+ private void SetUsedTextureOrImage(
+ Dictionary<TextureInfo, TextureMeta> dict,
+ int cbufSlot,
+ int handle,
+ SamplerType type,
+ TextureFormat format,
+ bool intCoords,
+ bool write,
+ bool accurateType,
+ bool coherent)
+ {
+ var dimensions = type.GetDimensions();
+ var isIndexed = type.HasFlag(SamplerType.Indexed);
+
+ var usageFlags = TextureUsageFlags.None;
+
+ if (intCoords)
+ {
+ usageFlags |= TextureUsageFlags.NeedsScaleValue;
+
+ var canScale = Stage.SupportsRenderScale() && !isIndexed && !write && dimensions == 2;
+
+ if (!canScale)
+ {
+ // Resolution scaling cannot be applied to this texture right now.
+ // Flag so that we know to blacklist scaling on related textures when binding them.
+ usageFlags |= TextureUsageFlags.ResScaleUnsupported;
+ }
+ }
+
+ if (write)
+ {
+ usageFlags |= TextureUsageFlags.ImageStore;
+ }
+
+ if (coherent)
+ {
+ usageFlags |= TextureUsageFlags.ImageCoherent;
+ }
+
+ int arraySize = isIndexed ? SamplerArraySize : 1;
+
+ for (int layer = 0; layer < arraySize; layer++)
+ {
+ var info = new TextureInfo(cbufSlot, handle + layer * 2, isIndexed, format);
+ var meta = new TextureMeta()
+ {
+ AccurateType = accurateType,
+ Type = type,
+ UsageFlags = usageFlags
+ };
+
+ if (dict.TryGetValue(info, out var existingMeta))
+ {
+ dict[info] = MergeTextureMeta(meta, existingMeta);
+ }
+ else
+ {
+ dict.Add(info, meta);
+ }
+ }
+ }
+
+ private static TextureMeta MergeTextureMeta(TextureMeta meta, TextureMeta existingMeta)
+ {
+ meta.UsageFlags |= existingMeta.UsageFlags;
+
+ // If the texture we have has inaccurate type information, then
+ // we prefer the most accurate one.
+ if (existingMeta.AccurateType)
+ {
+ meta.AccurateType = true;
+ meta.Type = existingMeta.Type;
+ }
+
+ return meta;
+ }
+
+ public BufferDescriptor[] GetConstantBufferDescriptors()
+ {
+ if (_cachedConstantBufferDescriptors != null)
+ {
+ return _cachedConstantBufferDescriptors;
+ }
+
+ int usedMask = _usedConstantBuffers;
+
+ if (UsedFeatures.HasFlag(FeatureFlags.CbIndexing))
+ {
+ usedMask |= (int)GpuAccessor.QueryConstantBufferUse();
+ }
+
+ return _cachedConstantBufferDescriptors = GetBufferDescriptors(
+ usedMask,
+ 0,
+ UsedFeatures.HasFlag(FeatureFlags.CbIndexing),
+ out _firstConstantBufferBinding,
+ GpuAccessor.QueryBindingConstantBuffer);
+ }
+
+ public BufferDescriptor[] GetStorageBufferDescriptors()
+ {
+ if (_cachedStorageBufferDescriptors != null)
+ {
+ return _cachedStorageBufferDescriptors;
+ }
+
+ return _cachedStorageBufferDescriptors = GetBufferDescriptors(
+ _usedStorageBuffers,
+ _usedStorageBuffersWrite,
+ true,
+ out _firstStorageBufferBinding,
+ GpuAccessor.QueryBindingStorageBuffer);
+ }
+
+ private static BufferDescriptor[] GetBufferDescriptors(
+ int usedMask,
+ int writtenMask,
+ bool isArray,
+ out int firstBinding,
+ Func<int, int> getBindingCallback)
+ {
+ firstBinding = 0;
+ bool hasFirstBinding = false;
+ var descriptors = new BufferDescriptor[BitOperations.PopCount((uint)usedMask)];
+
+ int lastSlot = -1;
+
+ for (int i = 0; i < descriptors.Length; i++)
+ {
+ int slot = BitOperations.TrailingZeroCount(usedMask);
+
+ if (isArray)
+ {
+ // The next array entries also consumes bindings, even if they are unused.
+ for (int j = lastSlot + 1; j < slot; j++)
+ {
+ int binding = getBindingCallback(j);
+
+ if (!hasFirstBinding)
+ {
+ firstBinding = binding;
+ hasFirstBinding = true;
+ }
+ }
+ }
+
+ lastSlot = slot;
+
+ descriptors[i] = new BufferDescriptor(getBindingCallback(slot), slot);
+
+ if (!hasFirstBinding)
+ {
+ firstBinding = descriptors[i].Binding;
+ hasFirstBinding = true;
+ }
+
+ if ((writtenMask & (1 << slot)) != 0)
+ {
+ descriptors[i].SetFlag(BufferUsageFlags.Write);
+ }
+
+ usedMask &= ~(1 << slot);
+ }
+
+ return descriptors;
+ }
+
+ public TextureDescriptor[] GetTextureDescriptors()
+ {
+ return _cachedTextureDescriptors ??= GetTextureOrImageDescriptors(_usedTextures, GpuAccessor.QueryBindingTexture);
+ }
+
+ public TextureDescriptor[] GetImageDescriptors()
+ {
+ return _cachedImageDescriptors ??= GetTextureOrImageDescriptors(_usedImages, GpuAccessor.QueryBindingImage);
+ }
+
+ private static TextureDescriptor[] GetTextureOrImageDescriptors(Dictionary<TextureInfo, TextureMeta> dict, Func<int, bool, int> getBindingCallback)
+ {
+ var descriptors = new TextureDescriptor[dict.Count];
+
+ int i = 0;
+ foreach (var kv in dict.OrderBy(x => x.Key.Indexed).OrderBy(x => x.Key.Handle))
+ {
+ var info = kv.Key;
+ var meta = kv.Value;
+
+ bool isBuffer = (meta.Type & SamplerType.Mask) == SamplerType.TextureBuffer;
+ int binding = getBindingCallback(i, isBuffer);
+
+ descriptors[i] = new TextureDescriptor(binding, meta.Type, info.Format, info.CbufSlot, info.Handle);
+ descriptors[i].SetFlag(meta.UsageFlags);
+ i++;
+ }
+
+ return descriptors;
+ }
+
+ public (TextureDescriptor, int) FindTextureDescriptor(AstTextureOperation texOp)
+ {
+ TextureDescriptor[] descriptors = GetTextureDescriptors();
+
+ for (int i = 0; i < descriptors.Length; i++)
+ {
+ var descriptor = descriptors[i];
+
+ if (descriptor.CbufSlot == texOp.CbufSlot &&
+ descriptor.HandleIndex == texOp.Handle &&
+ descriptor.Format == texOp.Format)
+ {
+ return (descriptor, i);
+ }
+ }
+
+ return (default, -1);
+ }
+
+ private static int FindDescriptorIndex(TextureDescriptor[] array, AstTextureOperation texOp)
+ {
+ for (int i = 0; i < array.Length; i++)
+ {
+ var descriptor = array[i];
+
+ if (descriptor.Type == texOp.Type &&
+ descriptor.CbufSlot == texOp.CbufSlot &&
+ descriptor.HandleIndex == texOp.Handle &&
+ descriptor.Format == texOp.Format)
+ {
+ return i;
+ }
+ }
+
+ return -1;
+ }
+
+ public int FindTextureDescriptorIndex(AstTextureOperation texOp)
+ {
+ return FindDescriptorIndex(GetTextureDescriptors(), texOp);
+ }
+
+ public int FindImageDescriptorIndex(AstTextureOperation texOp)
+ {
+ return FindDescriptorIndex(GetImageDescriptors(), texOp);
+ }
+
+ public ShaderProgramInfo CreateProgramInfo(ShaderIdentification identification = ShaderIdentification.None)
+ {
+ return new ShaderProgramInfo(
+ GetConstantBufferDescriptors(),
+ GetStorageBufferDescriptors(),
+ GetTextureDescriptors(),
+ GetImageDescriptors(),
+ identification,
+ GpLayerInputAttribute,
+ Stage,
+ UsedFeatures.HasFlag(FeatureFlags.InstanceId),
+ UsedFeatures.HasFlag(FeatureFlags.DrawParameters),
+ UsedFeatures.HasFlag(FeatureFlags.RtLayer),
+ ClipDistancesWritten,
+ OmapTargets);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs
new file mode 100644
index 00000000..01f7f08a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderHeader.cs
@@ -0,0 +1,158 @@
+using Ryujinx.Common.Utilities;
+using System;
+using System.Runtime.InteropServices;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ enum PixelImap
+ {
+ Unused = 0,
+ Constant = 1,
+ Perspective = 2,
+ ScreenLinear = 3
+ }
+
+ readonly struct ImapPixelType
+ {
+ public PixelImap X { get; }
+ public PixelImap Y { get; }
+ public PixelImap Z { get; }
+ public PixelImap W { get; }
+
+ public ImapPixelType(PixelImap x, PixelImap y, PixelImap z, PixelImap w)
+ {
+ X = x;
+ Y = y;
+ Z = z;
+ W = w;
+ }
+
+ public PixelImap GetFirstUsedType()
+ {
+ if (X != PixelImap.Unused) return X;
+ if (Y != PixelImap.Unused) return Y;
+ if (Z != PixelImap.Unused) return Z;
+ return W;
+ }
+ }
+
+ class ShaderHeader
+ {
+ public int SphType { get; }
+ public int Version { get; }
+
+ public ShaderStage Stage { get; }
+
+ public bool MrtEnable { get; }
+
+ public bool KillsPixels { get; }
+
+ public bool DoesGlobalStore { get; }
+
+ public int SassVersion { get; }
+
+ public bool GpPassthrough { get; }
+
+ public bool DoesLoadOrStore { get; }
+ public bool DoesFp64 { get; }
+
+ public int StreamOutMask { get; }
+
+ public int ShaderLocalMemoryLowSize { get; }
+
+ public int PerPatchAttributeCount { get; }
+
+ public int ShaderLocalMemoryHighSize { get; }
+
+ public int ThreadsPerInputPrimitive { get; }
+
+ public int ShaderLocalMemoryCrsSize { get; }
+
+ public OutputTopology OutputTopology { get; }
+
+ public int MaxOutputVertexCount { get; }
+
+ public int StoreReqStart { get; }
+ public int StoreReqEnd { get; }
+
+ public ImapPixelType[] ImapTypes { get; }
+
+ public int OmapTargets { get; }
+ public bool OmapSampleMask { get; }
+ public bool OmapDepth { get; }
+
+ public ShaderHeader(IGpuAccessor gpuAccessor, ulong address)
+ {
+ ReadOnlySpan<int> header = MemoryMarshal.Cast<ulong, int>(gpuAccessor.GetCode(address, 0x50));
+
+ int commonWord0 = header[0];
+ int commonWord1 = header[1];
+ int commonWord2 = header[2];
+ int commonWord3 = header[3];
+ int commonWord4 = header[4];
+
+ SphType = commonWord0.Extract(0, 5);
+ Version = commonWord0.Extract(5, 5);
+
+ Stage = (ShaderStage)commonWord0.Extract(10, 4);
+
+ // Invalid.
+ if (Stage == ShaderStage.Compute)
+ {
+ Stage = ShaderStage.Vertex;
+ }
+
+ MrtEnable = commonWord0.Extract(14);
+
+ KillsPixels = commonWord0.Extract(15);
+
+ DoesGlobalStore = commonWord0.Extract(16);
+
+ SassVersion = commonWord0.Extract(17, 4);
+
+ GpPassthrough = commonWord0.Extract(24);
+
+ DoesLoadOrStore = commonWord0.Extract(26);
+ DoesFp64 = commonWord0.Extract(27);
+
+ StreamOutMask = commonWord0.Extract(28, 4);
+
+ ShaderLocalMemoryLowSize = commonWord1.Extract(0, 24);
+
+ PerPatchAttributeCount = commonWord1.Extract(24, 8);
+
+ ShaderLocalMemoryHighSize = commonWord2.Extract(0, 24);
+
+ ThreadsPerInputPrimitive = commonWord2.Extract(24, 8);
+
+ ShaderLocalMemoryCrsSize = commonWord3.Extract(0, 24);
+
+ OutputTopology = (OutputTopology)commonWord3.Extract(24, 4);
+
+ MaxOutputVertexCount = commonWord4.Extract(0, 12);
+
+ StoreReqStart = commonWord4.Extract(12, 8);
+ StoreReqEnd = commonWord4.Extract(24, 8);
+
+ ImapTypes = new ImapPixelType[32];
+
+ for (int i = 0; i < 32; i++)
+ {
+ byte imap = (byte)(header[6 + (i >> 2)] >> ((i & 3) * 8));
+
+ ImapTypes[i] = new ImapPixelType(
+ (PixelImap)((imap >> 0) & 3),
+ (PixelImap)((imap >> 2) & 3),
+ (PixelImap)((imap >> 4) & 3),
+ (PixelImap)((imap >> 6) & 3));
+ }
+
+ int type2OmapTarget = header[18];
+ int type2Omap = header[19];
+
+ OmapTargets = type2OmapTarget;
+ OmapSampleMask = type2Omap.Extract(0);
+ OmapDepth = type2Omap.Extract(1);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
new file mode 100644
index 00000000..53f1e847
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs
@@ -0,0 +1,185 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class ShaderIdentifier
+ {
+ public static ShaderIdentification Identify(Function[] functions, ShaderConfig config)
+ {
+ if (config.Stage == ShaderStage.Geometry &&
+ config.GpuAccessor.QueryPrimitiveTopology() == InputTopology.Triangles &&
+ !config.GpuAccessor.QueryHostSupportsGeometryShader() &&
+ IsLayerPassthroughGeometryShader(functions, out int layerInputAttr))
+ {
+ config.SetGeometryShaderLayerInputAttribute(layerInputAttr);
+
+ return ShaderIdentification.GeometryLayerPassthrough;
+ }
+
+ return ShaderIdentification.None;
+ }
+
+ private static bool IsLayerPassthroughGeometryShader(Function[] functions, out int layerInputAttr)
+ {
+ bool writesLayer = false;
+ layerInputAttr = 0;
+
+ if (functions.Length != 1)
+ {
+ return false;
+ }
+
+ int verticesCount = 0;
+ int totalVerticesCount = 0;
+
+ foreach (BasicBlock block in functions[0].Blocks)
+ {
+ // We are not expecting loops or any complex control flow here, so fail in those cases.
+ if (block.Branch != null && block.Branch.Index <= block.Index)
+ {
+ return false;
+ }
+
+ foreach (INode node in block.Operations)
+ {
+ if (!(node is Operation operation))
+ {
+ continue;
+ }
+
+ if (IsResourceWrite(operation.Inst))
+ {
+ return false;
+ }
+
+ if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output)
+ {
+ Operand src = operation.GetSource(operation.SourcesCount - 1);
+ Operation srcAttributeAsgOp = null;
+
+ if (src.Type == OperandType.LocalVariable &&
+ src.AsgOp is Operation asgOp &&
+ asgOp.Inst == Instruction.Load &&
+ asgOp.StorageKind.IsInputOrOutput())
+ {
+ if (asgOp.StorageKind != StorageKind.Input)
+ {
+ return false;
+ }
+
+ srcAttributeAsgOp = asgOp;
+ }
+
+ if (srcAttributeAsgOp != null)
+ {
+ IoVariable dstAttribute = (IoVariable)operation.GetSource(0).Value;
+ IoVariable srcAttribute = (IoVariable)srcAttributeAsgOp.GetSource(0).Value;
+
+ if (dstAttribute == IoVariable.Layer && srcAttribute == IoVariable.UserDefined)
+ {
+ if (srcAttributeAsgOp.SourcesCount != 4)
+ {
+ return false;
+ }
+
+ writesLayer = true;
+ layerInputAttr = srcAttributeAsgOp.GetSource(1).Value * 4 + srcAttributeAsgOp.GetSource(3).Value;;
+ }
+ else
+ {
+ if (dstAttribute != srcAttribute)
+ {
+ return false;
+ }
+
+ int inputsCount = operation.SourcesCount - 2;
+
+ if (dstAttribute == IoVariable.UserDefined)
+ {
+ if (operation.GetSource(1).Value != srcAttributeAsgOp.GetSource(1).Value)
+ {
+ return false;
+ }
+
+ inputsCount--;
+ }
+
+ for (int i = 0; i < inputsCount; i++)
+ {
+ int dstIndex = operation.SourcesCount - 2 - i;
+ int srcIndex = srcAttributeAsgOp.SourcesCount - 1 - i;
+
+ if ((dstIndex | srcIndex) < 0)
+ {
+ return false;
+ }
+
+ if (operation.GetSource(dstIndex).Type != OperandType.Constant ||
+ srcAttributeAsgOp.GetSource(srcIndex).Type != OperandType.Constant ||
+ operation.GetSource(dstIndex).Value != srcAttributeAsgOp.GetSource(srcIndex).Value)
+ {
+ return false;
+ }
+ }
+ }
+ }
+ else if (src.Type == OperandType.Constant)
+ {
+ int dstComponent = operation.GetSource(operation.SourcesCount - 2).Value;
+ float expectedValue = dstComponent == 3 ? 1f : 0f;
+
+ if (src.AsFloat() != expectedValue)
+ {
+ return false;
+ }
+ }
+ else
+ {
+ return false;
+ }
+ }
+ else if (operation.Inst == Instruction.EmitVertex)
+ {
+ verticesCount++;
+ }
+ else if (operation.Inst == Instruction.EndPrimitive)
+ {
+ totalVerticesCount += verticesCount;
+ verticesCount = 0;
+ }
+ }
+ }
+
+ return totalVerticesCount + verticesCount == 3 && writesLayer;
+ }
+
+ private static bool IsResourceWrite(Instruction inst)
+ {
+ switch (inst)
+ {
+ case Instruction.AtomicAdd:
+ case Instruction.AtomicAnd:
+ case Instruction.AtomicCompareAndSwap:
+ case Instruction.AtomicMaxS32:
+ case Instruction.AtomicMaxU32:
+ case Instruction.AtomicMinS32:
+ case Instruction.AtomicMinU32:
+ case Instruction.AtomicOr:
+ case Instruction.AtomicSwap:
+ case Instruction.AtomicXor:
+ case Instruction.ImageAtomic:
+ case Instruction.ImageStore:
+ case Instruction.StoreGlobal:
+ case Instruction.StoreGlobal16:
+ case Instruction.StoreGlobal8:
+ case Instruction.StoreStorage:
+ case Instruction.StoreStorage16:
+ case Instruction.StoreStorage8:
+ return true;
+ }
+
+ return false;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs b/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs
new file mode 100644
index 00000000..16b8b924
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Ssa.cs
@@ -0,0 +1,376 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ static class Ssa
+ {
+ private const int GprsAndPredsCount = RegisterConsts.GprsCount + RegisterConsts.PredsCount;
+
+ private class DefMap
+ {
+ private Dictionary<Register, Operand> _map;
+
+ private long[] _phiMasks;
+
+ public DefMap()
+ {
+ _map = new Dictionary<Register, Operand>();
+
+ _phiMasks = new long[(RegisterConsts.TotalCount + 63) / 64];
+ }
+
+ public bool TryAddOperand(Register reg, Operand operand)
+ {
+ return _map.TryAdd(reg, operand);
+ }
+
+ public bool TryGetOperand(Register reg, out Operand operand)
+ {
+ return _map.TryGetValue(reg, out operand);
+ }
+
+ public bool AddPhi(Register reg)
+ {
+ int key = GetKeyFromRegister(reg);
+
+ int index = key / 64;
+ int bit = key & 63;
+
+ long mask = 1L << bit;
+
+ if ((_phiMasks[index] & mask) != 0)
+ {
+ return false;
+ }
+
+ _phiMasks[index] |= mask;
+
+ return true;
+ }
+
+ public bool HasPhi(Register reg)
+ {
+ int key = GetKeyFromRegister(reg);
+
+ int index = key / 64;
+ int bit = key & 63;
+
+ return (_phiMasks[index] & (1L << bit)) != 0;
+ }
+ }
+
+ private class LocalDefMap
+ {
+ private Operand[] _map;
+ private int[] _uses;
+ public int UseCount { get; private set; }
+
+ public LocalDefMap()
+ {
+ _map = new Operand[RegisterConsts.TotalCount];
+ _uses = new int[RegisterConsts.TotalCount];
+ }
+
+ public Operand Get(int key)
+ {
+ return _map[key];
+ }
+
+ public void Add(int key, Operand operand)
+ {
+ if (_map[key] == null)
+ {
+ _uses[UseCount++] = key;
+ }
+
+ _map[key] = operand;
+ }
+
+ public Operand GetUse(int index, out int key)
+ {
+ key = _uses[index];
+
+ return _map[key];
+ }
+
+ public void Clear()
+ {
+ for (int i = 0; i < UseCount; i++)
+ {
+ _map[_uses[i]] = null;
+ }
+
+ UseCount = 0;
+ }
+ }
+
+ private readonly struct Definition
+ {
+ public BasicBlock Block { get; }
+ public Operand Local { get; }
+
+ public Definition(BasicBlock block, Operand local)
+ {
+ Block = block;
+ Local = local;
+ }
+ }
+
+ public static void Rename(BasicBlock[] blocks)
+ {
+ DefMap[] globalDefs = new DefMap[blocks.Length];
+ LocalDefMap localDefs = new LocalDefMap();
+
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ globalDefs[blkIndex] = new DefMap();
+ }
+
+ Queue<BasicBlock> dfPhiBlocks = new Queue<BasicBlock>();
+
+ // First pass, get all defs and locals uses.
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ Operand RenameLocal(Operand operand)
+ {
+ if (operand != null && operand.Type == OperandType.Register)
+ {
+ Operand local = localDefs.Get(GetKeyFromRegister(operand.GetRegister()));
+
+ operand = local ?? operand;
+ }
+
+ return operand;
+ }
+
+ BasicBlock block = blocks[blkIndex];
+
+ LinkedListNode<INode> node = block.Operations.First;
+
+ while (node != null)
+ {
+ if (node.Value is Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, RenameLocal(operation.GetSource(index)));
+ }
+
+ for (int index = 0; index < operation.DestsCount; index++)
+ {
+ Operand dest = operation.GetDest(index);
+
+ if (dest != null && dest.Type == OperandType.Register)
+ {
+ Operand local = Local();
+
+ localDefs.Add(GetKeyFromRegister(dest.GetRegister()), local);
+
+ operation.SetDest(index, local);
+ }
+ }
+ }
+
+ node = node.Next;
+ }
+
+ int localUses = localDefs.UseCount;
+ for (int index = 0; index < localUses; index++)
+ {
+ Operand local = localDefs.GetUse(index, out int key);
+
+ Register reg = GetRegisterFromKey(key);
+
+ globalDefs[block.Index].TryAddOperand(reg, local);
+
+ dfPhiBlocks.Enqueue(block);
+
+ while (dfPhiBlocks.TryDequeue(out BasicBlock dfPhiBlock))
+ {
+ foreach (BasicBlock domFrontier in dfPhiBlock.DominanceFrontiers)
+ {
+ if (globalDefs[domFrontier.Index].AddPhi(reg))
+ {
+ dfPhiBlocks.Enqueue(domFrontier);
+ }
+ }
+ }
+ }
+
+ localDefs.Clear();
+ }
+
+ // Second pass, rename variables with definitions on different blocks.
+ for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
+ {
+ BasicBlock block = blocks[blkIndex];
+
+ Operand RenameGlobal(Operand operand)
+ {
+ if (operand != null && operand.Type == OperandType.Register)
+ {
+ int key = GetKeyFromRegister(operand.GetRegister());
+
+ Operand local = localDefs.Get(key);
+
+ if (local != null)
+ {
+ return local;
+ }
+
+ operand = FindDefinitionForCurr(globalDefs, block, operand.GetRegister());
+
+ localDefs.Add(key, operand);
+ }
+
+ return operand;
+ }
+
+ for (LinkedListNode<INode> node = block.Operations.First; node != null; node = node.Next)
+ {
+ if (node.Value is Operation operation)
+ {
+ for (int index = 0; index < operation.SourcesCount; index++)
+ {
+ operation.SetSource(index, RenameGlobal(operation.GetSource(index)));
+ }
+ }
+ }
+
+ if (blkIndex < blocks.Length - 1)
+ {
+ localDefs.Clear();
+ }
+ }
+ }
+
+ private static Operand FindDefinitionForCurr(DefMap[] globalDefs, BasicBlock current, Register reg)
+ {
+ if (globalDefs[current.Index].HasPhi(reg))
+ {
+ return InsertPhi(globalDefs, current, reg);
+ }
+
+ if (current != current.ImmediateDominator)
+ {
+ return FindDefinition(globalDefs, current.ImmediateDominator, reg).Local;
+ }
+
+ return Undef();
+ }
+
+ private static Definition FindDefinition(DefMap[] globalDefs, BasicBlock current, Register reg)
+ {
+ foreach (BasicBlock block in SelfAndImmediateDominators(current))
+ {
+ DefMap defMap = globalDefs[block.Index];
+
+ if (defMap.TryGetOperand(reg, out Operand lastDef))
+ {
+ return new Definition(block, lastDef);
+ }
+
+ if (defMap.HasPhi(reg))
+ {
+ return new Definition(block, InsertPhi(globalDefs, block, reg));
+ }
+ }
+
+ return new Definition(current, Undef());
+ }
+
+ private static IEnumerable<BasicBlock> SelfAndImmediateDominators(BasicBlock block)
+ {
+ while (block != block.ImmediateDominator)
+ {
+ yield return block;
+
+ block = block.ImmediateDominator;
+ }
+
+ yield return block;
+ }
+
+ private static Operand InsertPhi(DefMap[] globalDefs, BasicBlock block, Register reg)
+ {
+ // This block has a Phi that has not been materialized yet, but that
+ // would define a new version of the variable we're looking for. We need
+ // to materialize the Phi, add all the block/operand pairs into the Phi, and
+ // then use the definition from that Phi.
+ Operand local = Local();
+
+ PhiNode phi = new PhiNode(local);
+
+ AddPhi(block, phi);
+
+ globalDefs[block.Index].TryAddOperand(reg, local);
+
+ foreach (BasicBlock predecessor in block.Predecessors)
+ {
+ Definition def = FindDefinition(globalDefs, predecessor, reg);
+
+ phi.AddSource(def.Block, def.Local);
+ }
+
+ return local;
+ }
+
+ private static void AddPhi(BasicBlock block, PhiNode phi)
+ {
+ LinkedListNode<INode> node = block.Operations.First;
+
+ if (node != null)
+ {
+ while (node.Next?.Value is PhiNode)
+ {
+ node = node.Next;
+ }
+ }
+
+ if (node?.Value is PhiNode)
+ {
+ block.Operations.AddAfter(node, phi);
+ }
+ else
+ {
+ block.Operations.AddFirst(phi);
+ }
+ }
+
+ private static int GetKeyFromRegister(Register reg)
+ {
+ if (reg.Type == RegisterType.Gpr)
+ {
+ return reg.Index;
+ }
+ else if (reg.Type == RegisterType.Predicate)
+ {
+ return RegisterConsts.GprsCount + reg.Index;
+ }
+ else /* if (reg.Type == RegisterType.Flag) */
+ {
+ return GprsAndPredsCount + reg.Index;
+ }
+ }
+
+ private static Register GetRegisterFromKey(int key)
+ {
+ if (key < RegisterConsts.GprsCount)
+ {
+ return new Register(key, RegisterType.Gpr);
+ }
+ else if (key < GprsAndPredsCount)
+ {
+ return new Register(key - RegisterConsts.GprsCount, RegisterType.Predicate);
+ }
+ else /* if (key < RegisterConsts.TotalCount) */
+ {
+ return new Register(key - GprsAndPredsCount, RegisterType.Flag);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs b/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs
new file mode 100644
index 00000000..6ac235a4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TargetApi.cs
@@ -0,0 +1,8 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public enum TargetApi
+ {
+ OpenGL,
+ Vulkan
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs b/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs
new file mode 100644
index 00000000..8314b223
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TargetLanguage.cs
@@ -0,0 +1,9 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public enum TargetLanguage
+ {
+ Glsl,
+ Spirv,
+ Arb
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs
new file mode 100644
index 00000000..1874dec3
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TranslationFlags.cs
@@ -0,0 +1,14 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ [Flags]
+ public enum TranslationFlags
+ {
+ None = 0,
+
+ VertexA = 1 << 0,
+ Compute = 1 << 1,
+ DebugMode = 1 << 2
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs
new file mode 100644
index 00000000..d9829ac4
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TranslationOptions.cs
@@ -0,0 +1,16 @@
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public readonly struct TranslationOptions
+ {
+ public TargetLanguage TargetLanguage { get; }
+ public TargetApi TargetApi { get; }
+ public TranslationFlags Flags { get; }
+
+ public TranslationOptions(TargetLanguage targetLanguage, TargetApi targetApi, TranslationFlags flags)
+ {
+ TargetLanguage = targetLanguage;
+ TargetApi = targetApi;
+ Flags = flags;
+ }
+ }
+}
diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs
new file mode 100644
index 00000000..77d3b568
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs
@@ -0,0 +1,362 @@
+using Ryujinx.Graphics.Shader.CodeGen.Glsl;
+using Ryujinx.Graphics.Shader.CodeGen.Spirv;
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using Ryujinx.Graphics.Shader.Translation.Optimizations;
+using System;
+using System.Linq;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public static class Translator
+ {
+ private const int HeaderSize = 0x50;
+
+ internal readonly struct FunctionCode
+ {
+ public Operation[] Code { get; }
+
+ public FunctionCode(Operation[] code)
+ {
+ Code = code;
+ }
+ }
+
+ public static TranslatorContext CreateContext(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
+ {
+ return DecodeShader(address, gpuAccessor, options);
+ }
+
+ internal static ShaderProgram Translate(FunctionCode[] functions, ShaderConfig config)
+ {
+ var cfgs = new ControlFlowGraph[functions.Length];
+ var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length];
+
+ for (int i = 0; i < functions.Length; i++)
+ {
+ cfgs[i] = ControlFlowGraph.Create(functions[i].Code);
+
+ if (i != 0)
+ {
+ frus[i] = RegisterUsage.RunPass(cfgs[i]);
+ }
+ }
+
+ Function[] funcs = new Function[functions.Length];
+
+ for (int i = 0; i < functions.Length; i++)
+ {
+ var cfg = cfgs[i];
+
+ int inArgumentsCount = 0;
+ int outArgumentsCount = 0;
+
+ if (i != 0)
+ {
+ var fru = frus[i];
+
+ inArgumentsCount = fru.InArguments.Length;
+ outArgumentsCount = fru.OutArguments.Length;
+ }
+
+ if (cfg.Blocks.Length != 0)
+ {
+ RegisterUsage.FixupCalls(cfg.Blocks, frus);
+
+ Dominance.FindDominators(cfg);
+ Dominance.FindDominanceFrontiers(cfg.Blocks);
+
+ Ssa.Rename(cfg.Blocks);
+
+ Optimizer.RunPass(cfg.Blocks, config);
+ Rewriter.RunPass(cfg.Blocks, config);
+ }
+
+ funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount);
+ }
+
+ var identification = ShaderIdentifier.Identify(funcs, config);
+
+ var sInfo = StructuredProgram.MakeStructuredProgram(funcs, config);
+
+ var info = config.CreateProgramInfo(identification);
+
+ return config.Options.TargetLanguage switch
+ {
+ TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
+ TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
+ _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString())
+ };
+ }
+
+ private static TranslatorContext DecodeShader(ulong address, IGpuAccessor gpuAccessor, TranslationOptions options)
+ {
+ ShaderConfig config;
+ DecodedProgram program;
+ ulong maxEndAddress = 0;
+
+ if (options.Flags.HasFlag(TranslationFlags.Compute))
+ {
+ config = new ShaderConfig(gpuAccessor, options);
+
+ program = Decoder.Decode(config, address);
+ }
+ else
+ {
+ config = new ShaderConfig(new ShaderHeader(gpuAccessor, address), gpuAccessor, options);
+
+ program = Decoder.Decode(config, address + HeaderSize);
+ }
+
+ foreach (DecodedFunction function in program)
+ {
+ foreach (Block block in function.Blocks)
+ {
+ if (maxEndAddress < block.EndAddress)
+ {
+ maxEndAddress = block.EndAddress;
+ }
+ }
+ }
+
+ config.SizeAdd((int)maxEndAddress + (options.Flags.HasFlag(TranslationFlags.Compute) ? 0 : HeaderSize));
+
+ return new TranslatorContext(address, program, config);
+ }
+
+ internal static FunctionCode[] EmitShader(DecodedProgram program, ShaderConfig config, bool initializeOutputs, out int initializationOperations)
+ {
+ initializationOperations = 0;
+
+ FunctionMatch.RunPass(program);
+
+ foreach (DecodedFunction function in program.OrderBy(x => x.Address).Where(x => !x.IsCompilerGenerated))
+ {
+ program.AddFunctionAndSetId(function);
+ }
+
+ FunctionCode[] functions = new FunctionCode[program.FunctionsWithIdCount];
+
+ for (int index = 0; index < functions.Length; index++)
+ {
+ EmitterContext context = new EmitterContext(program, config, index != 0);
+
+ if (initializeOutputs && index == 0)
+ {
+ EmitOutputsInitialization(context, config);
+ initializationOperations = context.OperationsCount;
+ }
+
+ DecodedFunction function = program.GetFunctionById(index);
+
+ foreach (Block block in function.Blocks)
+ {
+ context.CurrBlock = block;
+
+ context.EnterBlock(block.Address);
+
+ EmitOps(context, block);
+ }
+
+ functions[index] = new FunctionCode(context.GetOperations());
+ }
+
+ return functions;
+ }
+
+ private static void EmitOutputsInitialization(EmitterContext context, ShaderConfig config)
+ {
+ // Compute has no output attributes, and fragment is the last stage, so we
+ // don't need to initialize outputs on those stages.
+ if (config.Stage == ShaderStage.Compute || config.Stage == ShaderStage.Fragment)
+ {
+ return;
+ }
+
+ if (config.Stage == ShaderStage.Vertex)
+ {
+ InitializePositionOutput(context);
+ }
+
+ UInt128 usedAttributes = context.Config.NextInputAttributesComponents;
+ while (usedAttributes != UInt128.Zero)
+ {
+ int index = (int)UInt128.TrailingZeroCount(usedAttributes);
+ int vecIndex = index / 4;
+
+ usedAttributes &= ~(UInt128.One << index);
+
+ // We don't need to initialize passthrough attributes.
+ if ((context.Config.PassthroughAttributes & (1 << vecIndex)) != 0)
+ {
+ continue;
+ }
+
+ InitializeOutputComponent(context, vecIndex, index & 3, perPatch: false);
+ }
+
+ if (context.Config.NextUsedInputAttributesPerPatch != null)
+ {
+ foreach (int vecIndex in context.Config.NextUsedInputAttributesPerPatch.Order())
+ {
+ InitializeOutput(context, vecIndex, perPatch: true);
+ }
+ }
+
+ if (config.NextUsesFixedFuncAttributes)
+ {
+ bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
+ int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
+
+ for (int i = fixedStartAttr; i < fixedStartAttr + 5 + AttributeConsts.TexCoordCount; i++)
+ {
+ int index = config.GetFreeUserAttribute(isOutput: true, i);
+ if (index < 0)
+ {
+ break;
+ }
+
+ InitializeOutput(context, index, perPatch: false);
+
+ config.SetOutputUserAttributeFixedFunc(index);
+ }
+ }
+ }
+
+ private static void InitializePositionOutput(EmitterContext context)
+ {
+ for (int c = 0; c < 4; c++)
+ {
+ context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), ConstF(c == 3 ? 1f : 0f));
+ }
+ }
+
+ private static void InitializeOutput(EmitterContext context, int location, bool perPatch)
+ {
+ for (int c = 0; c < 4; c++)
+ {
+ InitializeOutputComponent(context, location, c, perPatch);
+ }
+ }
+
+ private static void InitializeOutputComponent(EmitterContext context, int location, int c, bool perPatch)
+ {
+ StorageKind storageKind = perPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
+
+ if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing))
+ {
+ Operand invocationId = null;
+
+ if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
+ {
+ invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ }
+
+ int index = location * 4 + c;
+
+ context.Store(storageKind, IoVariable.UserDefined, invocationId, Const(index), ConstF(c == 3 ? 1f : 0f));
+ }
+ else
+ {
+ if (context.Config.Stage == ShaderStage.TessellationControl && !perPatch)
+ {
+ Operand invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ context.Store(storageKind, IoVariable.UserDefined, Const(location), invocationId, Const(c), ConstF(c == 3 ? 1f : 0f));
+ }
+ else
+ {
+ context.Store(storageKind, IoVariable.UserDefined, null, Const(location), Const(c), ConstF(c == 3 ? 1f : 0f));
+ }
+ }
+ }
+
+ private static void EmitOps(EmitterContext context, Block block)
+ {
+ for (int opIndex = 0; opIndex < block.OpCodes.Count; opIndex++)
+ {
+ InstOp op = block.OpCodes[opIndex];
+
+ if (context.Config.Options.Flags.HasFlag(TranslationFlags.DebugMode))
+ {
+ string instName;
+
+ if (op.Emitter != null)
+ {
+ instName = op.Name.ToString();
+ }
+ else
+ {
+ instName = "???";
+
+ context.Config.GpuAccessor.Log($"Invalid instruction at 0x{op.Address:X6} (0x{op.RawOpCode:X16}).");
+ }
+
+ string dbgComment = $"0x{op.Address:X6}: 0x{op.RawOpCode:X16} {instName}";
+
+ context.Add(new CommentNode(dbgComment));
+ }
+
+ InstConditional opConditional = new InstConditional(op.RawOpCode);
+
+ bool noPred = op.Props.HasFlag(InstProps.NoPred);
+ if (!noPred && opConditional.Pred == RegisterConsts.PredicateTrueIndex && opConditional.PredInv)
+ {
+ continue;
+ }
+
+ Operand predSkipLbl = null;
+
+ if (Decoder.IsPopBranch(op.Name))
+ {
+ // If the instruction is a SYNC or BRK instruction with only one
+ // possible target address, then the instruction is basically
+ // just a simple branch, we can generate code similar to branch
+ // instructions, with the condition check on the branch itself.
+ noPred = block.SyncTargets.Count <= 1;
+ }
+ else if (op.Name == InstName.Bra)
+ {
+ noPred = true;
+ }
+
+ if (!(opConditional.Pred == RegisterConsts.PredicateTrueIndex || noPred))
+ {
+ Operand label;
+
+ if (opIndex == block.OpCodes.Count - 1 && block.HasNext())
+ {
+ label = context.GetLabel(block.Successors[0].Address);
+ }
+ else
+ {
+ label = Label();
+
+ predSkipLbl = label;
+ }
+
+ Operand pred = Register(opConditional.Pred, RegisterType.Predicate);
+
+ if (opConditional.PredInv)
+ {
+ context.BranchIfTrue(label, pred);
+ }
+ else
+ {
+ context.BranchIfFalse(label, pred);
+ }
+ }
+
+ context.CurrOp = op;
+
+ op.Emitter?.Invoke(context);
+
+ if (predSkipLbl != null)
+ {
+ context.MarkLabel(predSkipLbl);
+ }
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
new file mode 100644
index 00000000..4b4cc8d9
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs
@@ -0,0 +1,255 @@
+using Ryujinx.Graphics.Shader.CodeGen.Glsl;
+using Ryujinx.Graphics.Shader.CodeGen.Spirv;
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.StructuredIr;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+using static Ryujinx.Graphics.Shader.Translation.Translator;
+
+namespace Ryujinx.Graphics.Shader.Translation
+{
+ public class TranslatorContext
+ {
+ private readonly DecodedProgram _program;
+ private ShaderConfig _config;
+
+ public ulong Address { get; }
+
+ public ShaderStage Stage => _config.Stage;
+ public int Size => _config.Size;
+ public int Cb1DataSize => _config.Cb1DataSize;
+ public bool LayerOutputWritten => _config.LayerOutputWritten;
+
+ public IGpuAccessor GpuAccessor => _config.GpuAccessor;
+
+ internal TranslatorContext(ulong address, DecodedProgram program, ShaderConfig config)
+ {
+ Address = address;
+ _program = program;
+ _config = config;
+ }
+
+ private static bool IsLoadUserDefined(Operation operation)
+ {
+ // TODO: Check if sources count match and all sources are constant.
+ return operation.Inst == Instruction.Load && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined;
+ }
+
+ private static bool IsStoreUserDefined(Operation operation)
+ {
+ // TODO: Check if sources count match and all sources are constant.
+ return operation.Inst == Instruction.Store && (IoVariable)operation.GetSource(0).Value == IoVariable.UserDefined;
+ }
+
+ private static FunctionCode[] Combine(FunctionCode[] a, FunctionCode[] b, int aStart)
+ {
+ // Here we combine two shaders.
+ // For shader A:
+ // - All user attribute stores on shader A are turned into copies to a
+ // temporary variable. It's assumed that shader B will consume them.
+ // - All return instructions are turned into branch instructions, the
+ // branch target being the start of the shader B code.
+ // For shader B:
+ // - All user attribute loads on shader B are turned into copies from a
+ // temporary variable, as long that attribute is written by shader A.
+ FunctionCode[] output = new FunctionCode[a.Length + b.Length - 1];
+
+ List<Operation> ops = new List<Operation>(a.Length + b.Length);
+
+ Operand[] temps = new Operand[AttributeConsts.UserAttributesCount * 4];
+
+ Operand lblB = Label();
+
+ for (int index = aStart; index < a[0].Code.Length; index++)
+ {
+ Operation operation = a[0].Code[index];
+
+ if (IsStoreUserDefined(operation))
+ {
+ int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value;
+
+ Operand temp = temps[tIndex];
+
+ if (temp == null)
+ {
+ temp = Local();
+
+ temps[tIndex] = temp;
+ }
+
+ operation.Dest = temp;
+ operation.TurnIntoCopy(operation.GetSource(operation.SourcesCount - 1));
+ }
+
+ if (operation.Inst == Instruction.Return)
+ {
+ ops.Add(new Operation(Instruction.Branch, lblB));
+ }
+ else
+ {
+ ops.Add(operation);
+ }
+ }
+
+ ops.Add(new Operation(Instruction.MarkLabel, lblB));
+
+ for (int index = 0; index < b[0].Code.Length; index++)
+ {
+ Operation operation = b[0].Code[index];
+
+ if (IsLoadUserDefined(operation))
+ {
+ int tIndex = operation.GetSource(1).Value * 4 + operation.GetSource(2).Value;
+
+ Operand temp = temps[tIndex];
+
+ if (temp != null)
+ {
+ operation.TurnIntoCopy(temp);
+ }
+ }
+
+ ops.Add(operation);
+ }
+
+ output[0] = new FunctionCode(ops.ToArray());
+
+ for (int i = 1; i < a.Length; i++)
+ {
+ output[i] = a[i];
+ }
+
+ for (int i = 1; i < b.Length; i++)
+ {
+ output[a.Length + i - 1] = b[i];
+ }
+
+ return output;
+ }
+
+ public void SetNextStage(TranslatorContext nextStage)
+ {
+ _config.MergeFromtNextStage(nextStage._config);
+ }
+
+ public void SetGeometryShaderLayerInputAttribute(int attr)
+ {
+ _config.SetGeometryShaderLayerInputAttribute(attr);
+ }
+
+ public void SetLastInVertexPipeline()
+ {
+ _config.SetLastInVertexPipeline();
+ }
+
+ public ShaderProgram Translate(TranslatorContext other = null)
+ {
+ FunctionCode[] code = EmitShader(_program, _config, initializeOutputs: other == null, out _);
+
+ if (other != null)
+ {
+ other._config.MergeOutputUserAttributes(_config.UsedOutputAttributes, Enumerable.Empty<int>());
+
+ FunctionCode[] otherCode = EmitShader(other._program, other._config, initializeOutputs: true, out int aStart);
+
+ code = Combine(otherCode, code, aStart);
+
+ _config.InheritFrom(other._config);
+ }
+
+ return Translator.Translate(code, _config);
+ }
+
+ public ShaderProgram GenerateGeometryPassthrough()
+ {
+ int outputAttributesMask = _config.UsedOutputAttributes;
+ int layerOutputAttr = _config.LayerOutputAttribute;
+
+ OutputTopology outputTopology;
+ int maxOutputVertices;
+
+ switch (GpuAccessor.QueryPrimitiveTopology())
+ {
+ case InputTopology.Points:
+ outputTopology = OutputTopology.PointList;
+ maxOutputVertices = 1;
+ break;
+ case InputTopology.Lines:
+ case InputTopology.LinesAdjacency:
+ outputTopology = OutputTopology.LineStrip;
+ maxOutputVertices = 2;
+ break;
+ default:
+ outputTopology = OutputTopology.TriangleStrip;
+ maxOutputVertices = 3;
+ break;
+ }
+
+ ShaderConfig config = new ShaderConfig(ShaderStage.Geometry, outputTopology, maxOutputVertices, GpuAccessor, _config.Options);
+
+ EmitterContext context = new EmitterContext(default, config, false);
+
+ for (int v = 0; v < maxOutputVertices; v++)
+ {
+ int outAttrsMask = outputAttributesMask;
+
+ while (outAttrsMask != 0)
+ {
+ int attrIndex = BitOperations.TrailingZeroCount(outAttrsMask);
+
+ outAttrsMask &= ~(1 << attrIndex);
+
+ for (int c = 0; c < 4; c++)
+ {
+ int attr = AttributeConsts.UserAttributeBase + attrIndex * 16 + c * 4;
+
+ Operand value = context.Load(StorageKind.Input, IoVariable.UserDefined, Const(attrIndex), Const(v), Const(c));
+
+ if (attr == layerOutputAttr)
+ {
+ context.Store(StorageKind.Output, IoVariable.Layer, null, value);
+ }
+ else
+ {
+ context.Store(StorageKind.Output, IoVariable.UserDefined, null, Const(attrIndex), Const(c), value);
+ config.SetOutputUserAttribute(attrIndex);
+ }
+
+ config.SetInputUserAttribute(attrIndex, c);
+ }
+ }
+
+ for (int c = 0; c < 4; c++)
+ {
+ Operand value = context.Load(StorageKind.Input, IoVariable.Position, Const(v), Const(c));
+
+ context.Store(StorageKind.Output, IoVariable.Position, null, Const(c), value);
+ }
+
+ context.EmitVertex();
+ }
+
+ context.EndPrimitive();
+
+ var operations = context.GetOperations();
+ var cfg = ControlFlowGraph.Create(operations);
+ var function = new Function(cfg.Blocks, "main", false, 0, 0);
+
+ var sInfo = StructuredProgram.MakeStructuredProgram(new[] { function }, config);
+
+ var info = config.CreateProgramInfo();
+
+ return config.Options.TargetLanguage switch
+ {
+ TargetLanguage.Glsl => new ShaderProgram(info, TargetLanguage.Glsl, GlslGenerator.Generate(sInfo, config)),
+ TargetLanguage.Spirv => new ShaderProgram(info, TargetLanguage.Spirv, SpirvGenerator.Generate(sInfo, config)),
+ _ => throw new NotImplementedException(config.Options.TargetLanguage.ToString())
+ };
+ }
+ }
+}