aboutsummaryrefslogtreecommitdiff
path: root/src/Ryujinx.Graphics.Shader/Instructions
diff options
context:
space:
mode:
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Instructions')
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs351
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs379
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs160
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs383
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs44
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs194
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs87
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs425
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs532
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs575
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs106
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs322
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs266
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs699
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs310
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs167
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs71
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs541
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs237
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs97
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs15
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs54
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs249
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs796
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs1312
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs118
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs183
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs84
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs6
-rw-r--r--src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs141
30 files changed, 8904 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs b/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs
new file mode 100644
index 00000000..562fb8d5
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs
@@ -0,0 +1,351 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static class AttributeMap
+ {
+ private enum StagesMask : byte
+ {
+ None = 0,
+ Compute = 1 << (int)ShaderStage.Compute,
+ Vertex = 1 << (int)ShaderStage.Vertex,
+ TessellationControl = 1 << (int)ShaderStage.TessellationControl,
+ TessellationEvaluation = 1 << (int)ShaderStage.TessellationEvaluation,
+ Geometry = 1 << (int)ShaderStage.Geometry,
+ Fragment = 1 << (int)ShaderStage.Fragment,
+
+ Tessellation = TessellationControl | TessellationEvaluation,
+ VertexTessellationGeometry = Vertex | Tessellation | Geometry,
+ TessellationGeometryFragment = Tessellation | Geometry | Fragment,
+ AllGraphics = Vertex | Tessellation | Geometry | Fragment
+ }
+
+ private struct AttributeEntry
+ {
+ public int BaseOffset { get; }
+ public AggregateType Type { get; }
+ public IoVariable IoVariable { get; }
+ public StagesMask InputMask { get; }
+ public StagesMask OutputMask { get; }
+
+ public AttributeEntry(
+ int baseOffset,
+ AggregateType type,
+ IoVariable ioVariable,
+ StagesMask inputMask,
+ StagesMask outputMask)
+ {
+ BaseOffset = baseOffset;
+ Type = type;
+ IoVariable = ioVariable;
+ InputMask = inputMask;
+ OutputMask = outputMask;
+ }
+ }
+
+ private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributes;
+ private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributesPerPatch;
+
+ static AttributeMap()
+ {
+ _attributes = CreateMap();
+ _attributesPerPatch = CreatePerPatchMap();
+ }
+
+ private static IReadOnlyDictionary<int, AttributeEntry> CreateMap()
+ {
+ var map = new Dictionary<int, AttributeEntry>();
+
+ Add(map, 0x060, AggregateType.S32, IoVariable.PrimitiveId, StagesMask.TessellationGeometryFragment, StagesMask.Geometry);
+ Add(map, 0x064, AggregateType.S32, IoVariable.Layer, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x068, AggregateType.S32, IoVariable.ViewportIndex, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x06c, AggregateType.FP32, IoVariable.PointSize, StagesMask.None, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x070, AggregateType.Vector4 | AggregateType.FP32, IoVariable.Position, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x080, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.AllGraphics, StagesMask.VertexTessellationGeometry, 32);
+ Add(map, 0x280, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x290, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x2a0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x2b0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x2c0, AggregateType.Array | AggregateType.FP32, IoVariable.ClipDistance, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry, 8);
+ Add(map, 0x2e0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.PointCoord, StagesMask.Fragment, StagesMask.None);
+ Add(map, 0x2e8, AggregateType.FP32, IoVariable.FogCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x2f0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationCoord, StagesMask.TessellationEvaluation, StagesMask.None);
+ Add(map, 0x2f8, AggregateType.S32, IoVariable.InstanceId, StagesMask.Vertex, StagesMask.None);
+ Add(map, 0x2fc, AggregateType.S32, IoVariable.VertexId, StagesMask.Vertex, StagesMask.None);
+ Add(map, 0x300, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TextureCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x3a0, AggregateType.Array | AggregateType.S32, IoVariable.ViewportMask, StagesMask.Fragment, StagesMask.VertexTessellationGeometry);
+ Add(map, 0x3fc, AggregateType.Bool, IoVariable.FrontFacing, StagesMask.Fragment, StagesMask.None);
+
+ return map;
+ }
+
+ private static IReadOnlyDictionary<int, AttributeEntry> CreatePerPatchMap()
+ {
+ var map = new Dictionary<int, AttributeEntry>();
+
+ Add(map, 0x000, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TessellationLevelOuter, StagesMask.TessellationEvaluation, StagesMask.TessellationControl);
+ Add(map, 0x010, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationLevelInner, StagesMask.TessellationEvaluation, StagesMask.TessellationControl);
+ Add(map, 0x018, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.TessellationEvaluation, StagesMask.TessellationControl, 31, 0x200);
+
+ return map;
+ }
+
+ private static void Add(
+ Dictionary<int, AttributeEntry> attributes,
+ int offset,
+ AggregateType type,
+ IoVariable ioVariable,
+ StagesMask inputMask,
+ StagesMask outputMask,
+ int count = 1,
+ int upperBound = 0x400)
+ {
+ int baseOffset = offset;
+
+ int elementsCount = GetElementCount(type);
+
+ for (int index = 0; index < count; index++)
+ {
+ for (int elementIndex = 0; elementIndex < elementsCount; elementIndex++)
+ {
+ attributes.Add(offset, new AttributeEntry(baseOffset, type, ioVariable, inputMask, outputMask));
+
+ offset += 4;
+
+ if (offset >= upperBound)
+ {
+ return;
+ }
+ }
+ }
+ }
+
+ public static Operand GenerateAttributeLoad(EmitterContext context, Operand primVertex, int offset, bool isOutput, bool isPerPatch)
+ {
+ if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry))
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid.");
+ return Const(0);
+ }
+
+ StagesMask validUseMask = isOutput ? entry.OutputMask : entry.InputMask;
+
+ if (((StagesMask)(1 << (int)context.Config.Stage) & validUseMask) == StagesMask.None)
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}.");
+ return Const(0);
+ }
+
+ if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable))
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}.");
+ return Const(0);
+ }
+
+ if (HasInvocationId(context.Config.Stage, isOutput) && !isPerPatch)
+ {
+ primVertex = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ }
+
+ int innerOffset = offset - entry.BaseOffset;
+ int innerIndex = innerOffset / 4;
+
+ StorageKind storageKind = isPerPatch
+ ? (isOutput ? StorageKind.OutputPerPatch : StorageKind.InputPerPatch)
+ : (isOutput ? StorageKind.Output : StorageKind.Input);
+ IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry);
+ AggregateType type = GetType(context.Config, isOutput, innerIndex, in entry);
+ int elementCount = GetElementCount(type);
+
+ bool isArray = type.HasFlag(AggregateType.Array);
+ bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput);
+
+ bool hasElementIndex = elementCount > 1;
+
+ if (hasArrayIndex && hasElementIndex)
+ {
+ int arrayIndex = innerIndex / elementCount;
+ int elementIndex = innerIndex - (arrayIndex * elementCount);
+
+ return primVertex == null || isArray
+ ? context.Load(storageKind, ioVariable, primVertex, Const(arrayIndex), Const(elementIndex))
+ : context.Load(storageKind, ioVariable, Const(arrayIndex), primVertex, Const(elementIndex));
+ }
+ else if (hasArrayIndex || hasElementIndex)
+ {
+ return primVertex == null || isArray || !hasArrayIndex
+ ? context.Load(storageKind, ioVariable, primVertex, Const(innerIndex))
+ : context.Load(storageKind, ioVariable, Const(innerIndex), primVertex);
+ }
+ else
+ {
+ return context.Load(storageKind, ioVariable, primVertex);
+ }
+ }
+
+ public static void GenerateAttributeStore(EmitterContext context, int offset, bool isPerPatch, Operand value)
+ {
+ if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry))
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid.");
+ return;
+ }
+
+ if (((StagesMask)(1 << (int)context.Config.Stage) & entry.OutputMask) == StagesMask.None)
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}.");
+ return;
+ }
+
+ if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable))
+ {
+ context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}.");
+ return;
+ }
+
+ Operand invocationId = null;
+
+ if (HasInvocationId(context.Config.Stage, isOutput: true) && !isPerPatch)
+ {
+ invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ }
+
+ int innerOffset = offset - entry.BaseOffset;
+ int innerIndex = innerOffset / 4;
+
+ StorageKind storageKind = isPerPatch ? StorageKind.OutputPerPatch : StorageKind.Output;
+ IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry);
+ AggregateType type = GetType(context.Config, isOutput: true, innerIndex, in entry);
+ int elementCount = GetElementCount(type);
+
+ bool isArray = type.HasFlag(AggregateType.Array);
+ bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput: true);
+
+ bool hasElementIndex = elementCount > 1;
+
+ if (hasArrayIndex && hasElementIndex)
+ {
+ int arrayIndex = innerIndex / elementCount;
+ int elementIndex = innerIndex - (arrayIndex * elementCount);
+
+ if (invocationId == null || isArray)
+ {
+ context.Store(storageKind, ioVariable, invocationId, Const(arrayIndex), Const(elementIndex), value);
+ }
+ else
+ {
+ context.Store(storageKind, ioVariable, Const(arrayIndex), invocationId, Const(elementIndex), value);
+ }
+ }
+ else if (hasArrayIndex || hasElementIndex)
+ {
+ if (invocationId == null || isArray || !hasArrayIndex)
+ {
+ context.Store(storageKind, ioVariable, invocationId, Const(innerIndex), value);
+ }
+ else
+ {
+ context.Store(storageKind, ioVariable, Const(innerIndex), invocationId, value);
+ }
+ }
+ else
+ {
+ context.Store(storageKind, ioVariable, invocationId, value);
+ }
+ }
+
+ private static bool IsSupportedByHost(IGpuAccessor gpuAccessor, ShaderStage stage, IoVariable ioVariable)
+ {
+ if (ioVariable == IoVariable.ViewportIndex && stage != ShaderStage.Geometry && stage != ShaderStage.Fragment)
+ {
+ return gpuAccessor.QueryHostSupportsViewportIndexVertexTessellation();
+ }
+ else if (ioVariable == IoVariable.ViewportMask)
+ {
+ return gpuAccessor.QueryHostSupportsViewportMask();
+ }
+
+ return true;
+ }
+
+ public static IoVariable GetIoVariable(ShaderConfig config, int offset, out int location)
+ {
+ location = 0;
+
+ if (!_attributes.TryGetValue(offset, out AttributeEntry entry))
+ {
+ return IoVariable.Invalid;
+ }
+
+ if (((StagesMask)(1 << (int)config.Stage) & entry.OutputMask) == StagesMask.None)
+ {
+ return IoVariable.Invalid;
+ }
+
+ if (config.HasPerLocationInputOrOutput(entry.IoVariable, isOutput: true))
+ {
+ location = (offset - entry.BaseOffset) / 16;
+ }
+
+ return GetIoVariable(config.Stage, in entry);
+ }
+
+ private static IoVariable GetIoVariable(ShaderStage stage, in AttributeEntry entry)
+ {
+ if (entry.IoVariable == IoVariable.Position && stage == ShaderStage.Fragment)
+ {
+ return IoVariable.FragmentCoord;
+ }
+
+ return entry.IoVariable;
+ }
+
+ private static AggregateType GetType(ShaderConfig config, bool isOutput, int innerIndex, in AttributeEntry entry)
+ {
+ AggregateType type = entry.Type;
+
+ if (entry.IoVariable == IoVariable.UserDefined)
+ {
+ type = config.GetUserDefinedType(innerIndex / 4, isOutput);
+ }
+ else if (entry.IoVariable == IoVariable.FragmentOutputColor)
+ {
+ type = config.GetFragmentOutputColorType(innerIndex / 4);
+ }
+
+ return type;
+ }
+
+ public static bool HasPrimitiveVertex(ShaderStage stage, bool isOutput)
+ {
+ if (isOutput)
+ {
+ return false;
+ }
+
+ return stage == ShaderStage.TessellationControl ||
+ stage == ShaderStage.TessellationEvaluation ||
+ stage == ShaderStage.Geometry;
+ }
+
+ public static bool HasInvocationId(ShaderStage stage, bool isOutput)
+ {
+ return isOutput && stage == ShaderStage.TessellationControl;
+ }
+
+ private static int GetElementCount(AggregateType type)
+ {
+ return (type & AggregateType.ElementCountMask) switch
+ {
+ AggregateType.Vector2 => 2,
+ AggregateType.Vector3 => 3,
+ AggregateType.Vector4 => 4,
+ _ => 1
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs
new file mode 100644
index 00000000..3a9e658a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs
@@ -0,0 +1,379 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void AtomCas(EmitterContext context)
+ {
+ InstAtomCas op = context.GetOp<InstAtomCas>();
+
+ context.Config.GpuAccessor.Log("Shader instruction AtomCas is not implemented.");
+ }
+
+ public static void AtomsCas(EmitterContext context)
+ {
+ InstAtomsCas op = context.GetOp<InstAtomsCas>();
+
+ context.Config.GpuAccessor.Log("Shader instruction AtomsCas is not implemented.");
+ }
+
+ public static void B2r(EmitterContext context)
+ {
+ InstB2r op = context.GetOp<InstB2r>();
+
+ context.Config.GpuAccessor.Log("Shader instruction B2r is not implemented.");
+ }
+
+ public static void Bpt(EmitterContext context)
+ {
+ InstBpt op = context.GetOp<InstBpt>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Bpt is not implemented.");
+ }
+
+ public static void Cctl(EmitterContext context)
+ {
+ InstCctl op = context.GetOp<InstCctl>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Cctl is not implemented.");
+ }
+
+ public static void Cctll(EmitterContext context)
+ {
+ InstCctll op = context.GetOp<InstCctll>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Cctll is not implemented.");
+ }
+
+ public static void Cctlt(EmitterContext context)
+ {
+ InstCctlt op = context.GetOp<InstCctlt>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented.");
+ }
+
+ public static void Cs2r(EmitterContext context)
+ {
+ InstCs2r op = context.GetOp<InstCs2r>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Cs2r is not implemented.");
+ }
+
+ public static void FchkR(EmitterContext context)
+ {
+ InstFchkR op = context.GetOp<InstFchkR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction FchkR is not implemented.");
+ }
+
+ public static void FchkI(EmitterContext context)
+ {
+ InstFchkI op = context.GetOp<InstFchkI>();
+
+ context.Config.GpuAccessor.Log("Shader instruction FchkI is not implemented.");
+ }
+
+ public static void FchkC(EmitterContext context)
+ {
+ InstFchkC op = context.GetOp<InstFchkC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction FchkC is not implemented.");
+ }
+
+ public static void Getcrsptr(EmitterContext context)
+ {
+ InstGetcrsptr op = context.GetOp<InstGetcrsptr>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Getcrsptr is not implemented.");
+ }
+
+ public static void Getlmembase(EmitterContext context)
+ {
+ InstGetlmembase op = context.GetOp<InstGetlmembase>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Getlmembase is not implemented.");
+ }
+
+ public static void Ide(EmitterContext context)
+ {
+ InstIde op = context.GetOp<InstIde>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Ide is not implemented.");
+ }
+
+ public static void IdpR(EmitterContext context)
+ {
+ InstIdpR op = context.GetOp<InstIdpR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction IdpR is not implemented.");
+ }
+
+ public static void IdpC(EmitterContext context)
+ {
+ InstIdpC op = context.GetOp<InstIdpC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction IdpC is not implemented.");
+ }
+
+ public static void ImadspR(EmitterContext context)
+ {
+ InstImadspR op = context.GetOp<InstImadspR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction ImadspR is not implemented.");
+ }
+
+ public static void ImadspI(EmitterContext context)
+ {
+ InstImadspI op = context.GetOp<InstImadspI>();
+
+ context.Config.GpuAccessor.Log("Shader instruction ImadspI is not implemented.");
+ }
+
+ public static void ImadspC(EmitterContext context)
+ {
+ InstImadspC op = context.GetOp<InstImadspC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction ImadspC is not implemented.");
+ }
+
+ public static void ImadspRc(EmitterContext context)
+ {
+ InstImadspRc op = context.GetOp<InstImadspRc>();
+
+ context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented.");
+ }
+
+ public static void Jcal(EmitterContext context)
+ {
+ InstJcal op = context.GetOp<InstJcal>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Jcal is not implemented.");
+ }
+
+ public static void Jmp(EmitterContext context)
+ {
+ InstJmp op = context.GetOp<InstJmp>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Jmp is not implemented.");
+ }
+
+ public static void Jmx(EmitterContext context)
+ {
+ InstJmx op = context.GetOp<InstJmx>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Jmx is not implemented.");
+ }
+
+ public static void Ld(EmitterContext context)
+ {
+ InstLd op = context.GetOp<InstLd>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Ld is not implemented.");
+ }
+
+ public static void Lepc(EmitterContext context)
+ {
+ InstLepc op = context.GetOp<InstLepc>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Lepc is not implemented.");
+ }
+
+ public static void Longjmp(EmitterContext context)
+ {
+ InstLongjmp op = context.GetOp<InstLongjmp>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Longjmp is not implemented.");
+ }
+
+ public static void P2rR(EmitterContext context)
+ {
+ InstP2rR op = context.GetOp<InstP2rR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction P2rR is not implemented.");
+ }
+
+ public static void P2rI(EmitterContext context)
+ {
+ InstP2rI op = context.GetOp<InstP2rI>();
+
+ context.Config.GpuAccessor.Log("Shader instruction P2rI is not implemented.");
+ }
+
+ public static void P2rC(EmitterContext context)
+ {
+ InstP2rC op = context.GetOp<InstP2rC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented.");
+ }
+
+ public static void Pexit(EmitterContext context)
+ {
+ InstPexit op = context.GetOp<InstPexit>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Pexit is not implemented.");
+ }
+
+ public static void Pixld(EmitterContext context)
+ {
+ InstPixld op = context.GetOp<InstPixld>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Pixld is not implemented.");
+ }
+
+ public static void Plongjmp(EmitterContext context)
+ {
+ InstPlongjmp op = context.GetOp<InstPlongjmp>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Plongjmp is not implemented.");
+ }
+
+ public static void Pret(EmitterContext context)
+ {
+ InstPret op = context.GetOp<InstPret>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Pret is not implemented.");
+ }
+
+ public static void PrmtR(EmitterContext context)
+ {
+ InstPrmtR op = context.GetOp<InstPrmtR>();
+
+ context.Config.GpuAccessor.Log("Shader instruction PrmtR is not implemented.");
+ }
+
+ public static void PrmtI(EmitterContext context)
+ {
+ InstPrmtI op = context.GetOp<InstPrmtI>();
+
+ context.Config.GpuAccessor.Log("Shader instruction PrmtI is not implemented.");
+ }
+
+ public static void PrmtC(EmitterContext context)
+ {
+ InstPrmtC op = context.GetOp<InstPrmtC>();
+
+ context.Config.GpuAccessor.Log("Shader instruction PrmtC is not implemented.");
+ }
+
+ public static void PrmtRc(EmitterContext context)
+ {
+ InstPrmtRc op = context.GetOp<InstPrmtRc>();
+
+ context.Config.GpuAccessor.Log("Shader instruction PrmtRc is not implemented.");
+ }
+
+ public static void R2b(EmitterContext context)
+ {
+ InstR2b op = context.GetOp<InstR2b>();
+
+ context.Config.GpuAccessor.Log("Shader instruction R2b is not implemented.");
+ }
+
+ public static void Ram(EmitterContext context)
+ {
+ InstRam op = context.GetOp<InstRam>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Ram is not implemented.");
+ }
+
+ public static void Rtt(EmitterContext context)
+ {
+ InstRtt op = context.GetOp<InstRtt>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Rtt is not implemented.");
+ }
+
+ public static void Sam(EmitterContext context)
+ {
+ InstSam op = context.GetOp<InstSam>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Sam is not implemented.");
+ }
+
+ public static void Setcrsptr(EmitterContext context)
+ {
+ InstSetcrsptr op = context.GetOp<InstSetcrsptr>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Setcrsptr is not implemented.");
+ }
+
+ public static void Setlmembase(EmitterContext context)
+ {
+ InstSetlmembase op = context.GetOp<InstSetlmembase>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Setlmembase is not implemented.");
+ }
+
+ public static void St(EmitterContext context)
+ {
+ InstSt op = context.GetOp<InstSt>();
+
+ context.Config.GpuAccessor.Log("Shader instruction St is not implemented.");
+ }
+
+ public static void Stp(EmitterContext context)
+ {
+ InstStp op = context.GetOp<InstStp>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Stp is not implemented.");
+ }
+
+ public static void Txa(EmitterContext context)
+ {
+ InstTxa op = context.GetOp<InstTxa>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Txa is not implemented.");
+ }
+
+ public static void Vabsdiff(EmitterContext context)
+ {
+ InstVabsdiff op = context.GetOp<InstVabsdiff>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vabsdiff is not implemented.");
+ }
+
+ public static void Vabsdiff4(EmitterContext context)
+ {
+ InstVabsdiff4 op = context.GetOp<InstVabsdiff4>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vabsdiff4 is not implemented.");
+ }
+
+ public static void Vadd(EmitterContext context)
+ {
+ InstVadd op = context.GetOp<InstVadd>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vadd is not implemented.");
+ }
+
+ public static void Votevtg(EmitterContext context)
+ {
+ InstVotevtg op = context.GetOp<InstVotevtg>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Votevtg is not implemented.");
+ }
+
+ public static void Vset(EmitterContext context)
+ {
+ InstVset op = context.GetOp<InstVset>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vset is not implemented.");
+ }
+
+ public static void Vshl(EmitterContext context)
+ {
+ InstVshl op = context.GetOp<InstVshl>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vshl is not implemented.");
+ }
+
+ public static void Vshr(EmitterContext context)
+ {
+ InstVshr op = context.GetOp<InstVshr>();
+
+ context.Config.GpuAccessor.Log("Shader instruction Vshr is not implemented.");
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
new file mode 100644
index 00000000..879075ba
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs
@@ -0,0 +1,160 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static class InstEmitAluHelper
+ {
+ public static long GetIntMin(IDstFmt type)
+ {
+ return type switch
+ {
+ IDstFmt.U16 => ushort.MinValue,
+ IDstFmt.S16 => short.MinValue,
+ IDstFmt.U32 => uint.MinValue,
+ IDstFmt.S32 => int.MinValue,
+ _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
+ };
+ }
+
+ public static long GetIntMax(IDstFmt type)
+ {
+ return type switch
+ {
+ IDstFmt.U16 => ushort.MaxValue,
+ IDstFmt.S16 => short.MaxValue,
+ IDstFmt.U32 => uint.MaxValue,
+ IDstFmt.S32 => int.MaxValue,
+ _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
+ };
+ }
+
+ public static long GetIntMin(ISrcDstFmt type)
+ {
+ return type switch
+ {
+ ISrcDstFmt.U8 => byte.MinValue,
+ ISrcDstFmt.S8 => sbyte.MinValue,
+ ISrcDstFmt.U16 => ushort.MinValue,
+ ISrcDstFmt.S16 => short.MinValue,
+ ISrcDstFmt.U32 => uint.MinValue,
+ ISrcDstFmt.S32 => int.MinValue,
+ _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
+ };
+ }
+
+ public static long GetIntMax(ISrcDstFmt type)
+ {
+ return type switch
+ {
+ ISrcDstFmt.U8 => byte.MaxValue,
+ ISrcDstFmt.S8 => sbyte.MaxValue,
+ ISrcDstFmt.U16 => ushort.MaxValue,
+ ISrcDstFmt.S16 => short.MaxValue,
+ ISrcDstFmt.U32 => uint.MaxValue,
+ ISrcDstFmt.S32 => int.MaxValue,
+ _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.")
+ };
+ }
+
+ public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, Operand input, Operand pred)
+ {
+ return logicOp switch
+ {
+ BoolOp.And => context.BitwiseAnd(input, pred),
+ BoolOp.Or => context.BitwiseOr(input, pred),
+ BoolOp.Xor => context.BitwiseExclusiveOr(input, pred),
+ _ => input
+ };
+ }
+
+ public static Operand Extend(EmitterContext context, Operand src, VectorSelect type)
+ {
+ return type switch
+ {
+ VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
+ VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
+ VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
+ VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
+ VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
+ VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
+ VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8),
+ VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8),
+ VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8),
+ VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8),
+ VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16),
+ VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16),
+ _ => src
+ };
+ }
+
+ public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false)
+ {
+ if (!setCC)
+ {
+ return;
+ }
+
+ if (extended)
+ {
+ // When the operation is extended, it means we are doing
+ // the operation on a long word with any number of bits,
+ // so we need to AND the zero flag from result with the
+ // previous result when extended is specified, to ensure
+ // we have ZF set only if all words are zero, and not just
+ // the last one.
+ Operand oldZF = GetZF();
+
+ Operand res = context.BitwiseAnd(context.ICompareEqual(dest, Const(0)), oldZF);
+
+ context.Copy(GetZF(), res);
+ }
+ else
+ {
+ context.Copy(GetZF(), context.ICompareEqual(dest, Const(0)));
+ }
+
+ context.Copy(GetNF(), context.ICompareLess(dest, Const(0)));
+ }
+
+ public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC, Instruction fpType = Instruction.FP32)
+ {
+ if (setCC)
+ {
+ Operand zero = ConstF(0);
+
+ if (fpType == Instruction.FP64)
+ {
+ zero = context.FP32ConvertToFP64(zero);
+ }
+
+ context.Copy(GetZF(), context.FPCompareEqual(dest, zero, fpType));
+ context.Copy(GetNF(), context.FPCompareLess (dest, zero, fpType));
+ }
+ }
+
+ public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high)
+ {
+ low = context.BitwiseNot(low);
+ high = context.BitwiseNot(high);
+ low = AddWithCarry(context, low, Const(1), out Operand carryOut);
+ high = context.IAdd(high, carryOut);
+ return (low, high);
+ }
+
+ public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut)
+ {
+ Operand result = context.IAdd(lhs, rhs);
+
+ // C = Rd < Rn
+ carryOut = context.INegate(context.ICompareLessUnsigned(result, lhs));
+
+ return result;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs
new file mode 100644
index 00000000..1df38761
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs
@@ -0,0 +1,383 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Al2p(EmitterContext context)
+ {
+ InstAl2p op = context.GetOp<InstAl2p>();
+
+ context.Copy(GetDest(op.Dest), context.IAdd(GetSrcReg(context, op.SrcA), Const(op.Imm11)));
+ }
+
+ public static void Ald(EmitterContext context)
+ {
+ InstAld op = context.GetOp<InstAld>();
+
+ // Some of those attributes are per invocation,
+ // so we should ignore any primitive vertex indexing for those.
+ bool hasPrimitiveVertex = AttributeMap.HasPrimitiveVertex(context.Config.Stage, op.O) && !op.P;
+
+ if (!op.Phys)
+ {
+ hasPrimitiveVertex &= HasPrimitiveVertex(op.Imm11);
+ }
+
+ Operand primVertex = hasPrimitiveVertex ? context.Copy(GetSrcReg(context, op.SrcB)) : null;
+
+ for (int index = 0; index < (int)op.AlSize + 1; index++)
+ {
+ Register rd = new Register(op.Dest + index, RegisterType.Gpr);
+
+ if (rd.IsRZ)
+ {
+ break;
+ }
+
+ if (op.Phys)
+ {
+ Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
+ Operand vecIndex = context.ShiftRightU32(offset, Const(4));
+ Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
+
+ StorageKind storageKind = op.O ? StorageKind.Output : StorageKind.Input;
+
+ context.Copy(Register(rd), context.Load(storageKind, IoVariable.UserDefined, primVertex, vecIndex, elemIndex));
+ }
+ else if (op.SrcB == RegisterConsts.RegisterZeroIndex || op.P)
+ {
+ int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O);
+
+ context.FlagAttributeRead(offset);
+
+ bool isOutput = op.O && CanLoadOutput(offset);
+
+ if (!op.P && !isOutput && TryConvertIdToIndexForVulkan(context, offset, out Operand value))
+ {
+ context.Copy(Register(rd), value);
+ }
+ else
+ {
+ context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, op.P));
+ }
+ }
+ else
+ {
+ int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O);
+
+ context.FlagAttributeRead(offset);
+
+ bool isOutput = op.O && CanLoadOutput(offset);
+
+ context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, false));
+ }
+ }
+ }
+
+ public static void Ast(EmitterContext context)
+ {
+ InstAst op = context.GetOp<InstAst>();
+
+ for (int index = 0; index < (int)op.AlSize + 1; index++)
+ {
+ if (op.SrcB + index > RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ Register rd = new Register(op.SrcB + index, RegisterType.Gpr);
+
+ if (op.Phys)
+ {
+ Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
+ Operand vecIndex = context.ShiftRightU32(offset, Const(4));
+ Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
+ Operand invocationId = AttributeMap.HasInvocationId(context.Config.Stage, isOutput: true)
+ ? context.Load(StorageKind.Input, IoVariable.InvocationId)
+ : null;
+
+ context.Store(StorageKind.Output, IoVariable.UserDefined, invocationId, vecIndex, elemIndex, Register(rd));
+ }
+ else
+ {
+ // TODO: Support indirect stores using Ra.
+
+ int offset = op.Imm11 + index * 4;
+
+ if (!context.Config.IsUsedOutputAttribute(offset))
+ {
+ return;
+ }
+
+ offset = FixedFuncToUserAttribute(context.Config, offset, isOutput: true);
+
+ context.FlagAttributeWritten(offset);
+
+ AttributeMap.GenerateAttributeStore(context, offset, op.P, Register(rd));
+ }
+ }
+ }
+
+ public static void Ipa(EmitterContext context)
+ {
+ InstIpa op = context.GetOp<InstIpa>();
+
+ context.FlagAttributeRead(op.Imm10);
+
+ Operand res;
+
+ bool isFixedFunc = false;
+
+ if (op.Idx)
+ {
+ Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase));
+ Operand vecIndex = context.ShiftRightU32(offset, Const(4));
+ Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3));
+
+ res = context.Load(StorageKind.Input, IoVariable.UserDefined, null, vecIndex, elemIndex);
+ res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3)));
+ }
+ else
+ {
+ isFixedFunc = TryFixedFuncToUserAttributeIpa(context, op.Imm10, out res);
+
+ if (op.Imm10 >= AttributeConsts.UserAttributeBase && op.Imm10 < AttributeConsts.UserAttributeEnd)
+ {
+ int index = (op.Imm10 - AttributeConsts.UserAttributeBase) >> 4;
+
+ if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective)
+ {
+ res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3)));
+ }
+ }
+ else if (op.Imm10 == AttributeConsts.PositionX || op.Imm10 == AttributeConsts.PositionY)
+ {
+ // FragCoord X/Y must be divided by the render target scale, if resolution scaling is active,
+ // because the shader code is not expecting scaled values.
+ res = context.FPDivide(res, context.Load(StorageKind.Input, IoVariable.SupportBlockRenderScale, null, Const(0)));
+ }
+ else if (op.Imm10 == AttributeConsts.FrontFacing && context.Config.GpuAccessor.QueryHostHasFrontFacingBug())
+ {
+ // gl_FrontFacing sometimes has incorrect (flipped) values depending how it is accessed on Intel GPUs.
+ // This weird trick makes it behave.
+ res = context.ICompareLess(context.INegate(context.IConvertS32ToFP32(res)), Const(0));
+ }
+ }
+
+ if (op.IpaOp == IpaOp.Multiply && !isFixedFunc)
+ {
+ Operand srcB = GetSrcReg(context, op.SrcB);
+
+ res = context.FPMultiply(res, srcB);
+ }
+
+ res = context.FPSaturate(res, op.Sat);
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Isberd(EmitterContext context)
+ {
+ InstIsberd op = context.GetOp<InstIsberd>();
+
+ // This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory.
+ // Here, we just propagate the offset, as the result from this instruction is usually
+ // used with ALD to perform vertex load on geometry or tessellation shaders.
+ // The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex.
+ // Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex.
+ context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
+ }
+
+ public static void OutR(EmitterContext context)
+ {
+ InstOutR op = context.GetOp<InstOutR>();
+
+ EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
+ }
+
+ public static void OutI(EmitterContext context)
+ {
+ InstOutI op = context.GetOp<InstOutI>();
+
+ EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
+ }
+
+ public static void OutC(EmitterContext context)
+ {
+ InstOutC op = context.GetOp<InstOutC>();
+
+ EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut));
+ }
+
+ private static void EmitOut(EmitterContext context, bool emit, bool cut)
+ {
+ if (!(emit || cut))
+ {
+ context.Config.GpuAccessor.Log("Invalid OUT encoding.");
+ }
+
+ if (emit)
+ {
+ if (context.Config.LastInVertexPipeline)
+ {
+ context.PrepareForVertexReturn(out var tempXLocal, out var tempYLocal, out var tempZLocal);
+
+ context.EmitVertex();
+
+ // Restore output position value before transformation.
+
+ if (tempXLocal != null)
+ {
+ context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(0)), tempXLocal);
+ }
+
+ if (tempYLocal != null)
+ {
+ context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(1)), tempYLocal);
+ }
+
+ if (tempZLocal != null)
+ {
+ context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(2)), tempZLocal);
+ }
+ }
+ else
+ {
+ context.EmitVertex();
+ }
+ }
+
+ if (cut)
+ {
+ context.EndPrimitive();
+ }
+ }
+
+ private static bool HasPrimitiveVertex(int attr)
+ {
+ return attr != AttributeConsts.PrimitiveId &&
+ attr != AttributeConsts.TessCoordX &&
+ attr != AttributeConsts.TessCoordY;
+ }
+
+ private static bool CanLoadOutput(int attr)
+ {
+ return attr != AttributeConsts.TessCoordX && attr != AttributeConsts.TessCoordY;
+ }
+
+ private static bool TryFixedFuncToUserAttributeIpa(EmitterContext context, int attr, out Operand selectedAttr)
+ {
+ if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.BackColorDiffuseR)
+ {
+ // TODO: If two sided rendering is enabled, then this should return
+ // FrontColor if the fragment is front facing, and back color otherwise.
+ selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
+ return true;
+ }
+ else if (attr == AttributeConsts.FogCoord)
+ {
+ // TODO: We likely need to emulate the fixed-function functionality for FogCoord here.
+ selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
+ return true;
+ }
+ else if (attr >= AttributeConsts.BackColorDiffuseR && attr < AttributeConsts.ClipDistance0)
+ {
+ selectedAttr = ConstF(((attr >> 2) & 3) == 3 ? 1f : 0f);
+ return true;
+ }
+ else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)
+ {
+ selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false));
+ return true;
+ }
+
+ selectedAttr = GenerateIpaLoad(context, attr);
+ return false;
+ }
+
+ private static Operand GenerateIpaLoad(EmitterContext context, int offset)
+ {
+ return AttributeMap.GenerateAttributeLoad(context, null, offset, isOutput: false, isPerPatch: false);
+ }
+
+ private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, bool isOutput)
+ {
+ bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation();
+ int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1;
+
+ if (attr == AttributeConsts.Layer && config.Stage != ShaderStage.Geometry && !supportsLayerFromVertexOrTess)
+ {
+ attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.Layer, 0, isOutput);
+ config.SetLayerOutputAttribute(attr);
+ }
+ else if (attr == AttributeConsts.FogCoord)
+ {
+ attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FogCoord, fixedStartAttr, isOutput);
+ }
+ else if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0)
+ {
+ attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FrontColorDiffuseR, fixedStartAttr + 1, isOutput);
+ }
+ else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd)
+ {
+ attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.TexCoordBase, fixedStartAttr + 5, isOutput);
+ }
+
+ return attr;
+ }
+
+ private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, int baseAttr, int baseIndex, bool isOutput)
+ {
+ int index = (attr - baseAttr) >> 4;
+ int userAttrIndex = config.GetFreeUserAttribute(isOutput, baseIndex + index);
+
+ if ((uint)userAttrIndex < Constants.MaxAttributes)
+ {
+ attr = AttributeConsts.UserAttributeBase + userAttrIndex * 16 + (attr & 0xf);
+
+ if (isOutput)
+ {
+ config.SetOutputUserAttributeFixedFunc(userAttrIndex);
+ }
+ else
+ {
+ config.SetInputUserAttributeFixedFunc(userAttrIndex);
+ }
+ }
+ else
+ {
+ config.GpuAccessor.Log($"No enough user attributes for fixed attribute offset 0x{attr:X}.");
+ }
+
+ return attr;
+ }
+
+ private static bool TryConvertIdToIndexForVulkan(EmitterContext context, int attr, out Operand value)
+ {
+ if (context.Config.Options.TargetApi == TargetApi.Vulkan)
+ {
+ if (attr == AttributeConsts.InstanceId)
+ {
+ value = context.ISubtract(
+ context.Load(StorageKind.Input, IoVariable.InstanceIndex),
+ context.Load(StorageKind.Input, IoVariable.BaseInstance));
+ return true;
+ }
+ else if (attr == AttributeConsts.VertexId)
+ {
+ value = context.Load(StorageKind.Input, IoVariable.VertexIndex);
+ return true;
+ }
+ }
+
+ value = null;
+ return false;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs
new file mode 100644
index 00000000..f3114c6e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs
@@ -0,0 +1,44 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bar(EmitterContext context)
+ {
+ InstBar op = context.GetOp<InstBar>();
+
+ // TODO: Support other modes.
+ if (op.BarOp == BarOp.Sync)
+ {
+ context.Barrier();
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.BarOp}.");
+ }
+ }
+
+ public static void Depbar(EmitterContext context)
+ {
+ InstDepbar op = context.GetOp<InstDepbar>();
+
+ // No operation.
+ }
+
+ public static void Membar(EmitterContext context)
+ {
+ InstMembar op = context.GetOp<InstMembar>();
+
+ if (op.Membar == Decoders.Membar.Cta)
+ {
+ context.GroupMemoryBarrier();
+ }
+ else
+ {
+ context.MemoryBarrier();
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs
new file mode 100644
index 00000000..71925269
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs
@@ -0,0 +1,194 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void BfeR(EmitterContext context)
+ {
+ InstBfeR op = context.GetOp<InstBfeR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
+ }
+
+ public static void BfeI(EmitterContext context)
+ {
+ InstBfeI op = context.GetOp<InstBfeI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
+ }
+
+ public static void BfeC(EmitterContext context)
+ {
+ InstBfeC op = context.GetOp<InstBfeC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed);
+ }
+
+ public static void BfiR(EmitterContext context)
+ {
+ InstBfiR op = context.GetOp<InstBfiR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitBfi(context, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void BfiI(EmitterContext context)
+ {
+ InstBfiI op = context.GetOp<InstBfiI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitBfi(context, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void BfiC(EmitterContext context)
+ {
+ InstBfiC op = context.GetOp<InstBfiC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitBfi(context, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void BfiRc(EmitterContext context)
+ {
+ InstBfiRc op = context.GetOp<InstBfiRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitBfi(context, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FloR(EmitterContext context)
+ {
+ InstFloR op = context.GetOp<InstFloR>();
+
+ EmitFlo(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB, op.Sh, op.Signed);
+ }
+
+ public static void FloI(EmitterContext context)
+ {
+ InstFloI op = context.GetOp<InstFloI>();
+
+ EmitFlo(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB, op.Sh, op.Signed);
+ }
+
+ public static void FloC(EmitterContext context)
+ {
+ InstFloC op = context.GetOp<InstFloC>();
+
+ EmitFlo(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB, op.Sh, op.Signed);
+ }
+
+ public static void PopcR(EmitterContext context)
+ {
+ InstPopcR op = context.GetOp<InstPopcR>();
+
+ EmitPopc(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB);
+ }
+
+ public static void PopcI(EmitterContext context)
+ {
+ InstPopcI op = context.GetOp<InstPopcI>();
+
+ EmitPopc(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB);
+ }
+
+ public static void PopcC(EmitterContext context)
+ {
+ InstPopcC op = context.GetOp<InstPopcC>();
+
+ EmitPopc(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB);
+ }
+
+ private static void EmitBfe(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ bool bitReverse,
+ bool isSigned)
+ {
+ if (bitReverse)
+ {
+ srcA = context.BitfieldReverse(srcA);
+ }
+
+ Operand position = context.BitwiseAnd(srcB, Const(0xff));
+
+ Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
+
+ Operand res = isSigned
+ ? context.BitfieldExtractS32(srcA, position, size)
+ : context.BitfieldExtractU32(srcA, position, size);
+
+ context.Copy(GetDest(rd), res);
+
+ // TODO: CC, X, corner cases.
+ }
+
+ private static void EmitBfi(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int rd)
+ {
+ Operand position = context.BitwiseAnd(srcB, Const(0xff));
+
+ Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
+
+ Operand res = context.BitfieldInsert(srcC, srcA, position, size);
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitFlo(EmitterContext context, Operand src, int rd, bool invert, bool sh, bool isSigned)
+ {
+ Operand srcB = context.BitwiseNot(src, invert);
+
+ Operand res;
+
+ if (sh)
+ {
+ res = context.FindLSB(context.BitfieldReverse(srcB));
+ }
+ else
+ {
+ res = isSigned
+ ? context.FindMSBS32(srcB)
+ : context.FindMSBU32(srcB);
+ }
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitPopc(EmitterContext context, Operand src, int rd, bool invert)
+ {
+ Operand srcB = context.BitwiseNot(src, invert);
+
+ Operand res = context.BitCount(srcB);
+
+ context.Copy(GetDest(rd), res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs
new file mode 100644
index 00000000..74ac7602
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs
@@ -0,0 +1,87 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Cset(EmitterContext context)
+ {
+ InstCset op = context.GetOp<InstCset>();
+
+ Operand res = GetCondition(context, op.Ccc);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ res = GetPredLogicalOp(context, op.Bop, res, srcPred);
+
+ Operand dest = GetDest(op.Dest);
+
+ if (op.BVal)
+ {
+ context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
+ }
+ else
+ {
+ context.Copy(dest, res);
+ }
+
+ // TODO: CC.
+ }
+
+ public static void Csetp(EmitterContext context)
+ {
+ InstCsetp op = context.GetOp<InstCsetp>();
+
+ Operand p0Res = GetCondition(context, op.Ccc);
+ Operand p1Res = context.BitwiseNot(p0Res);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ p0Res = GetPredLogicalOp(context, op.Bop, p0Res, srcPred);
+ p1Res = GetPredLogicalOp(context, op.Bop, p1Res, srcPred);
+
+ context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
+
+ // TODO: CC.
+ }
+
+ private static Operand GetCondition(EmitterContext context, Ccc cond, int defaultCond = IrConsts.True)
+ {
+ return cond switch
+ {
+ Ccc.F => Const(IrConsts.False),
+ Ccc.Lt => context.BitwiseExclusiveOr(context.BitwiseAnd(GetNF(), context.BitwiseNot(GetZF())), GetVF()),
+ Ccc.Eq => context.BitwiseAnd(context.BitwiseNot(GetNF()), GetZF()),
+ Ccc.Le => context.BitwiseExclusiveOr(GetNF(), context.BitwiseOr(GetZF(), GetVF())),
+ Ccc.Gt => context.BitwiseNot(context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF())),
+ Ccc.Ne => context.BitwiseNot(GetZF()),
+ Ccc.Ge => context.BitwiseNot(context.BitwiseExclusiveOr(GetNF(), GetVF())),
+ Ccc.Num => context.BitwiseNot(context.BitwiseAnd(GetNF(), GetZF())),
+ Ccc.Nan => context.BitwiseAnd(GetNF(), GetZF()),
+ Ccc.Ltu => context.BitwiseExclusiveOr(GetNF(), GetVF()),
+ Ccc.Equ => GetZF(),
+ Ccc.Leu => context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF()),
+ Ccc.Gtu => context.BitwiseExclusiveOr(context.BitwiseNot(GetNF()), context.BitwiseOr(GetVF(), GetZF())),
+ Ccc.Neu => context.BitwiseOr(GetNF(), context.BitwiseNot(GetZF())),
+ Ccc.Geu => context.BitwiseExclusiveOr(context.BitwiseOr(context.BitwiseNot(GetNF()), GetZF()), GetVF()),
+ Ccc.T => Const(IrConsts.True),
+ Ccc.Off => context.BitwiseNot(GetVF()),
+ Ccc.Lo => context.BitwiseNot(GetCF()),
+ Ccc.Sff => context.BitwiseNot(GetNF()),
+ Ccc.Ls => context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF())),
+ Ccc.Hi => context.BitwiseAnd(GetCF(), context.BitwiseNot(GetZF())),
+ Ccc.Sft => GetNF(),
+ Ccc.Hs => GetCF(),
+ Ccc.Oft => GetVF(),
+ Ccc.Rle => context.BitwiseOr(GetNF(), GetZF()),
+ Ccc.Rgt => context.BitwiseNot(context.BitwiseOr(GetNF(), GetZF())),
+ _ => Const(defaultCond)
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
new file mode 100644
index 00000000..bebd96dd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs
@@ -0,0 +1,425 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void F2fR(EmitterContext context)
+ {
+ InstF2fR op = context.GetOp<InstF2fR>();
+
+ var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
+
+ EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
+ }
+
+ public static void F2fI(EmitterContext context)
+ {
+ InstF2fI op = context.GetOp<InstF2fI>();
+
+ var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
+
+ EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
+ }
+
+ public static void F2fC(EmitterContext context)
+ {
+ InstF2fC op = context.GetOp<InstF2fC>();
+
+ var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
+
+ EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat);
+ }
+
+ public static void F2iR(EmitterContext context)
+ {
+ InstF2iR op = context.GetOp<InstF2iR>();
+
+ var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB);
+
+ EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void F2iI(EmitterContext context)
+ {
+ InstF2iI op = context.GetOp<InstF2iI>();
+
+ var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20));
+
+ EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void F2iC(EmitterContext context)
+ {
+ InstF2iC op = context.GetOp<InstF2iC>();
+
+ var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset);
+
+ EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void I2fR(EmitterContext context)
+ {
+ InstI2fR op = context.GetOp<InstI2fR>();
+
+ var src = GetSrcReg(context, op.SrcB);
+
+ EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void I2fI(EmitterContext context)
+ {
+ InstI2fI op = context.GetOp<InstI2fI>();
+
+ var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void I2fC(EmitterContext context)
+ {
+ InstI2fC op = context.GetOp<InstI2fC>();
+
+ var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void I2iR(EmitterContext context)
+ {
+ InstI2iR op = context.GetOp<InstI2iR>();
+
+ var src = GetSrcReg(context, op.SrcB);
+
+ EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
+ }
+
+ public static void I2iI(EmitterContext context)
+ {
+ InstI2iI op = context.GetOp<InstI2iI>();
+
+ var src = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
+ }
+
+ public static void I2iC(EmitterContext context)
+ {
+ InstI2iC op = context.GetOp<InstI2iC>();
+
+ var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC);
+ }
+
+ private static void EmitF2F(
+ EmitterContext context,
+ DstFmt srcType,
+ DstFmt dstType,
+ IntegerRound roundingMode,
+ Operand src,
+ int rd,
+ bool absolute,
+ bool negate,
+ bool saturate)
+ {
+ Operand srcB = context.FPAbsNeg(src, absolute, negate, srcType.ToInstFPType());
+
+ if (srcType == dstType)
+ {
+ srcB = roundingMode switch
+ {
+ IntegerRound.Round => context.FPRound(srcB, srcType.ToInstFPType()),
+ IntegerRound.Floor => context.FPFloor(srcB, srcType.ToInstFPType()),
+ IntegerRound.Ceil => context.FPCeiling(srcB, srcType.ToInstFPType()),
+ IntegerRound.Trunc => context.FPTruncate(srcB, srcType.ToInstFPType()),
+ _ => srcB
+ };
+ }
+
+ // We don't need to handle conversions between FP16 <-> FP32
+ // since we do FP16 operations as FP32 directly.
+ // FP16 <-> FP64 conversions are invalid.
+ if (srcType == DstFmt.F32 && dstType == DstFmt.F64)
+ {
+ srcB = context.FP32ConvertToFP64(srcB);
+ }
+ else if (srcType == DstFmt.F64 && dstType == DstFmt.F32)
+ {
+ srcB = context.FP64ConvertToFP32(srcB);
+ }
+
+ srcB = context.FPSaturate(srcB, saturate, dstType.ToInstFPType());
+
+ WriteFP(context, dstType, srcB, rd);
+
+ // TODO: CC.
+ }
+
+ private static void EmitF2I(
+ EmitterContext context,
+ DstFmt srcType,
+ IDstFmt dstType,
+ RoundMode2 roundingMode,
+ Operand src,
+ int rd,
+ bool absolute,
+ bool negate)
+ {
+ if (dstType == IDstFmt.U64)
+ {
+ context.Config.GpuAccessor.Log("Unimplemented 64-bits F2I.");
+ }
+
+ Instruction fpType = srcType.ToInstFPType();
+
+ bool isSignedInt = dstType == IDstFmt.S16 || dstType == IDstFmt.S32 || dstType == IDstFmt.S64;
+ bool isSmallInt = dstType == IDstFmt.U16 || dstType == IDstFmt.S16;
+
+ Operand srcB = context.FPAbsNeg(src, absolute, negate, fpType);
+
+ srcB = roundingMode switch
+ {
+ RoundMode2.Round => context.FPRound(srcB, fpType),
+ RoundMode2.Floor => context.FPFloor(srcB, fpType),
+ RoundMode2.Ceil => context.FPCeiling(srcB, fpType),
+ RoundMode2.Trunc => context.FPTruncate(srcB, fpType),
+ _ => srcB
+ };
+
+ if (!isSignedInt)
+ {
+ // Negative float to uint cast is undefined, so we clamp the value before conversion.
+ Operand c0 = srcType == DstFmt.F64 ? context.PackDouble2x32(0.0) : ConstF(0);
+
+ srcB = context.FPMaximum(srcB, c0, fpType);
+ }
+
+ if (srcType == DstFmt.F64)
+ {
+ srcB = isSignedInt
+ ? context.FP64ConvertToS32(srcB)
+ : context.FP64ConvertToU32(srcB);
+ }
+ else
+ {
+ srcB = isSignedInt
+ ? context.FP32ConvertToS32(srcB)
+ : context.FP32ConvertToU32(srcB);
+ }
+
+ if (isSmallInt)
+ {
+ int min = (int)GetIntMin(dstType);
+ int max = (int)GetIntMax(dstType);
+
+ srcB = isSignedInt
+ ? context.IClampS32(srcB, Const(min), Const(max))
+ : context.IClampU32(srcB, Const(min), Const(max));
+ }
+
+ Operand dest = GetDest(rd);
+
+ context.Copy(dest, srcB);
+
+ // TODO: CC.
+ }
+
+ private static void EmitI2F(
+ EmitterContext context,
+ ISrcFmt srcType,
+ DstFmt dstType,
+ Operand src,
+ ByteSel byteSelection,
+ int rd,
+ bool absolute,
+ bool negate)
+ {
+ bool isSignedInt =
+ srcType == ISrcFmt.S8 ||
+ srcType == ISrcFmt.S16 ||
+ srcType == ISrcFmt.S32 ||
+ srcType == ISrcFmt.S64;
+ bool isSmallInt =
+ srcType == ISrcFmt.U16 ||
+ srcType == ISrcFmt.S16 ||
+ srcType == ISrcFmt.U8 ||
+ srcType == ISrcFmt.S8;
+
+ // TODO: Handle S/U64.
+
+ Operand srcB = context.IAbsNeg(src, absolute, negate);
+
+ if (isSmallInt)
+ {
+ int size = srcType == ISrcFmt.U16 || srcType == ISrcFmt.S16 ? 16 : 8;
+
+ srcB = isSignedInt
+ ? context.BitfieldExtractS32(srcB, Const((int)byteSelection * 8), Const(size))
+ : context.BitfieldExtractU32(srcB, Const((int)byteSelection * 8), Const(size));
+ }
+
+ if (dstType == DstFmt.F64)
+ {
+ srcB = isSignedInt
+ ? context.IConvertS32ToFP64(srcB)
+ : context.IConvertU32ToFP64(srcB);
+ }
+ else
+ {
+ srcB = isSignedInt
+ ? context.IConvertS32ToFP32(srcB)
+ : context.IConvertU32ToFP32(srcB);
+ }
+
+ WriteFP(context, dstType, srcB, rd);
+
+ // TODO: CC.
+ }
+
+ private static void EmitI2I(
+ EmitterContext context,
+ ISrcDstFmt srcType,
+ ISrcDstFmt dstType,
+ Operand src,
+ ByteSel byteSelection,
+ int rd,
+ bool absolute,
+ bool negate,
+ bool saturate,
+ bool writeCC)
+ {
+ if ((srcType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32 || (dstType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32)
+ {
+ context.Config.GpuAccessor.Log("Invalid I2I encoding.");
+ return;
+ }
+
+ bool srcIsSignedInt =
+ srcType == ISrcDstFmt.S8 ||
+ srcType == ISrcDstFmt.S16 ||
+ srcType == ISrcDstFmt.S32;
+ bool dstIsSignedInt =
+ dstType == ISrcDstFmt.S8 ||
+ dstType == ISrcDstFmt.S16 ||
+ dstType == ISrcDstFmt.S32;
+ bool srcIsSmallInt =
+ srcType == ISrcDstFmt.U16 ||
+ srcType == ISrcDstFmt.S16 ||
+ srcType == ISrcDstFmt.U8 ||
+ srcType == ISrcDstFmt.S8;
+
+ if (srcIsSmallInt)
+ {
+ int size = srcType == ISrcDstFmt.U16 || srcType == ISrcDstFmt.S16 ? 16 : 8;
+
+ src = srcIsSignedInt
+ ? context.BitfieldExtractS32(src, Const((int)byteSelection * 8), Const(size))
+ : context.BitfieldExtractU32(src, Const((int)byteSelection * 8), Const(size));
+ }
+
+ src = context.IAbsNeg(src, absolute, negate);
+
+ if (saturate)
+ {
+ int min = (int)GetIntMin(dstType);
+ int max = (int)GetIntMax(dstType);
+
+ src = dstIsSignedInt
+ ? context.IClampS32(src, Const(min), Const(max))
+ : context.IClampU32(src, Const(min), Const(max));
+ }
+
+ context.Copy(GetDest(rd), src);
+
+ SetZnFlags(context, src, writeCC);
+ }
+
+ private static Operand UnpackReg(EmitterContext context, DstFmt floatType, bool h, int reg)
+ {
+ if (floatType == DstFmt.F32)
+ {
+ return GetSrcReg(context, reg);
+ }
+ else if (floatType == DstFmt.F16)
+ {
+ return GetHalfUnpacked(context, GetSrcReg(context, reg), HalfSwizzle.F16)[h ? 1 : 0];
+ }
+ else if (floatType == DstFmt.F64)
+ {
+ return GetSrcReg(context, reg, isFP64: true);
+ }
+
+ throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
+ }
+
+ private static Operand UnpackCbuf(EmitterContext context, DstFmt floatType, bool h, int cbufSlot, int cbufOffset)
+ {
+ if (floatType == DstFmt.F32)
+ {
+ return GetSrcCbuf(context, cbufSlot, cbufOffset);
+ }
+ else if (floatType == DstFmt.F16)
+ {
+ return GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), HalfSwizzle.F16)[h ? 1 : 0];
+ }
+ else if (floatType == DstFmt.F64)
+ {
+ return GetSrcCbuf(context, cbufSlot, cbufOffset, isFP64: true);
+ }
+
+ throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
+ }
+
+ private static Operand UnpackImm(EmitterContext context, DstFmt floatType, bool h, int imm)
+ {
+ if (floatType == DstFmt.F32)
+ {
+ return GetSrcImm(context, imm);
+ }
+ else if (floatType == DstFmt.F16)
+ {
+ return GetHalfUnpacked(context, GetSrcImm(context, imm), HalfSwizzle.F16)[h ? 1 : 0];
+ }
+ else if (floatType == DstFmt.F64)
+ {
+ return GetSrcImm(context, imm, isFP64: true);
+ }
+
+ throw new ArgumentException($"Invalid floating point type \"{floatType}\".");
+ }
+
+ private static void WriteFP(EmitterContext context, DstFmt type, Operand srcB, int rd)
+ {
+ Operand dest = GetDest(rd);
+
+ if (type == DstFmt.F32)
+ {
+ context.Copy(dest, srcB);
+ }
+ else if (type == DstFmt.F16)
+ {
+ context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0)));
+ }
+ else /* if (type == FPType.FP64) */
+ {
+ Operand dest2 = GetDest2(rd);
+
+ context.Copy(dest, context.UnpackDouble2x32Low(srcB));
+ context.Copy(dest2, context.UnpackDouble2x32High(srcB));
+ }
+ }
+
+ private static Instruction ToInstFPType(this DstFmt type)
+ {
+ return type == DstFmt.F64 ? Instruction.FP64 : Instruction.FP32;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs
new file mode 100644
index 00000000..29803c31
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs
@@ -0,0 +1,532 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void DaddR(EmitterContext context)
+ {
+ InstDaddR op = context.GetOp<InstDaddR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+
+ EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
+ }
+
+ public static void DaddI(EmitterContext context)
+ {
+ InstDaddI op = context.GetOp<InstDaddI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+
+ EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
+ }
+
+ public static void DaddC(EmitterContext context)
+ {
+ InstDaddC op = context.GetOp<InstDaddC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
+ }
+
+ public static void DfmaR(EmitterContext context)
+ {
+ InstDfmaR op = context.GetOp<InstDfmaR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+ var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
+
+ EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
+ }
+
+ public static void DfmaI(EmitterContext context)
+ {
+ InstDfmaI op = context.GetOp<InstDfmaI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+ var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
+
+ EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
+ }
+
+ public static void DfmaC(EmitterContext context)
+ {
+ InstDfmaC op = context.GetOp<InstDfmaC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+ var srcC = GetSrcReg(context, op.SrcC, isFP64: true);
+
+ EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
+ }
+
+ public static void DfmaRc(EmitterContext context)
+ {
+ InstDfmaRc op = context.GetOp<InstDfmaRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcC, isFP64: true);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC);
+ }
+
+ public static void DmulR(EmitterContext context)
+ {
+ InstDmulR op = context.GetOp<InstDmulR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+
+ EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
+ }
+
+ public static void DmulI(EmitterContext context)
+ {
+ InstDmulI op = context.GetOp<InstDmulI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+
+ EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
+ }
+
+ public static void DmulC(EmitterContext context)
+ {
+ InstDmulC op = context.GetOp<InstDmulC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC);
+ }
+
+ public static void FaddR(EmitterContext context)
+ {
+ InstFaddR op = context.GetOp<InstFaddR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
+ }
+
+ public static void FaddI(EmitterContext context)
+ {
+ InstFaddI op = context.GetOp<InstFaddI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+
+ EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
+ }
+
+ public static void FaddC(EmitterContext context)
+ {
+ InstFaddC op = context.GetOp<InstFaddC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC);
+ }
+
+ public static void Fadd32i(EmitterContext context)
+ {
+ InstFadd32i op = context.GetOp<InstFadd32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC);
+ }
+
+ public static void FfmaR(EmitterContext context)
+ {
+ InstFfmaR op = context.GetOp<InstFfmaR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void FfmaI(EmitterContext context)
+ {
+ InstFfmaI op = context.GetOp<InstFfmaI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void FfmaC(EmitterContext context)
+ {
+ InstFfmaC op = context.GetOp<InstFfmaC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void FfmaRc(EmitterContext context)
+ {
+ InstFfmaRc op = context.GetOp<InstFfmaRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void Ffma32i(EmitterContext context)
+ {
+ InstFfma32i op = context.GetOp<InstFfma32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+ var srcC = GetSrcReg(context, op.Dest);
+
+ EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC);
+ }
+
+ public static void FmulR(EmitterContext context)
+ {
+ InstFmulR op = context.GetOp<InstFmulR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
+ }
+
+ public static void FmulI(EmitterContext context)
+ {
+ InstFmulI op = context.GetOp<InstFmulI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+
+ EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
+ }
+
+ public static void FmulC(EmitterContext context)
+ {
+ InstFmulC op = context.GetOp<InstFmulC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC);
+ }
+
+ public static void Fmul32i(EmitterContext context)
+ {
+ InstFmul32i op = context.GetOp<InstFmul32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitFmul(context, Instruction.FP32, MultiplyScale.NoScale, srcA, srcB, op.Dest, false, op.Sat, op.WriteCC);
+ }
+
+ public static void Hadd2R(EmitterContext context)
+ {
+ InstHadd2R op = context.GetOp<InstHadd2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
+ }
+
+ public static void Hadd2I(EmitterContext context)
+ {
+ InstHadd2I op = context.GetOp<InstHadd2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
+ }
+
+ public static void Hadd2C(EmitterContext context)
+ {
+ InstHadd2C op = context.GetOp<InstHadd2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat);
+ }
+
+ public static void Hadd232i(EmitterContext context)
+ {
+ InstHadd232i op = context.GetOp<InstHadd232i>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, false);
+ var srcB = GetHalfSrc(context, op.Imm);
+
+ EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: true, op.Dest, op.Sat);
+ }
+
+ public static void Hfma2R(EmitterContext context)
+ {
+ InstHfma2R op = context.GetOp<InstHfma2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, false);
+ var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
+
+ EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
+ }
+
+ public static void Hfma2I(EmitterContext context)
+ {
+ InstHfma2I op = context.GetOp<InstHfma2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+ var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
+
+ EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
+ }
+
+ public static void Hfma2C(EmitterContext context)
+ {
+ InstHfma2C op = context.GetOp<InstHfma2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, false);
+ var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false);
+
+ EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
+ }
+
+ public static void Hfma2Rc(EmitterContext context)
+ {
+ InstHfma2Rc op = context.GetOp<InstHfma2Rc>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegA, false);
+ var srcC = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegC, false);
+
+ EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat);
+ }
+
+ public static void Hfma232i(EmitterContext context)
+ {
+ InstHfma232i op = context.GetOp<InstHfma232i>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.Imm);
+ var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.Dest, op.NegC, false);
+
+ EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false);
+ }
+
+ public static void Hmul2R(EmitterContext context)
+ {
+ InstHmul2R op = context.GetOp<InstHmul2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, op.AbsB);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
+ }
+
+ public static void Hmul2I(EmitterContext context)
+ {
+ InstHmul2I op = context.GetOp<InstHmul2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
+ }
+
+ public static void Hmul2C(EmitterContext context)
+ {
+ InstHmul2C op = context.GetOp<InstHmul2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, op.AbsB);
+
+ EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat);
+ }
+
+ public static void Hmul232i(EmitterContext context)
+ {
+ InstHmul232i op = context.GetOp<InstHmul232i>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false);
+ var srcB = GetHalfSrc(context, op.Imm32);
+
+ EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: false, op.Dest, op.Sat);
+ }
+
+ private static void EmitFadd(
+ EmitterContext context,
+ Instruction fpType,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ bool negateA,
+ bool negateB,
+ bool absoluteA,
+ bool absoluteB,
+ bool saturate,
+ bool writeCC)
+ {
+ bool isFP64 = fpType == Instruction.FP64;
+
+ srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
+ srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
+
+ Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), saturate, fpType);
+
+ SetDest(context, res, rd, isFP64);
+
+ SetFPZnFlags(context, res, writeCC, fpType);
+ }
+
+ private static void EmitFfma(
+ EmitterContext context,
+ Instruction fpType,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool negateB,
+ bool negateC,
+ bool saturate,
+ bool writeCC)
+ {
+ bool isFP64 = fpType == Instruction.FP64;
+
+ srcB = context.FPNegate(srcB, negateB, fpType);
+ srcC = context.FPNegate(srcC, negateC, fpType);
+
+ Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), saturate, fpType);
+
+ SetDest(context, res, rd, isFP64);
+
+ SetFPZnFlags(context, res, writeCC, fpType);
+ }
+
+ private static void EmitFmul(
+ EmitterContext context,
+ Instruction fpType,
+ MultiplyScale scale,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ bool negateB,
+ bool saturate,
+ bool writeCC)
+ {
+ bool isFP64 = fpType == Instruction.FP64;
+
+ srcB = context.FPNegate(srcB, negateB, fpType);
+
+ if (scale != MultiplyScale.NoScale)
+ {
+ Operand scaleConst = scale switch
+ {
+ MultiplyScale.D2 => ConstF(0.5f),
+ MultiplyScale.D4 => ConstF(0.25f),
+ MultiplyScale.D8 => ConstF(0.125f),
+ MultiplyScale.M2 => ConstF(2f),
+ MultiplyScale.M4 => ConstF(4f),
+ MultiplyScale.M8 => ConstF(8f),
+ _ => ConstF(1f) // Invalid, behave as if it had no scale.
+ };
+
+ if (scaleConst.AsFloat() == 1f)
+ {
+ context.Config.GpuAccessor.Log($"Invalid FP multiply scale \"{scale}\".");
+ }
+
+ if (isFP64)
+ {
+ scaleConst = context.FP32ConvertToFP64(scaleConst);
+ }
+
+ srcA = context.FPMultiply(srcA, scaleConst, fpType);
+ }
+
+ Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType);
+
+ SetDest(context, res, rd, isFP64);
+
+ SetFPZnFlags(context, res, writeCC, fpType);
+ }
+
+ private static void EmitHadd2Hmul2(
+ EmitterContext context,
+ OFmt swizzle,
+ Operand[] srcA,
+ Operand[] srcB,
+ bool isAdd,
+ int rd,
+ bool saturate)
+ {
+ Operand[] res = new Operand[2];
+
+ for (int index = 0; index < res.Length; index++)
+ {
+ if (isAdd)
+ {
+ res[index] = context.FPAdd(srcA[index], srcB[index]);
+ }
+ else
+ {
+ res[index] = context.FPMultiply(srcA[index], srcB[index]);
+ }
+
+ res[index] = context.FPSaturate(res[index], saturate);
+ }
+
+ context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
+ }
+
+ public static void EmitHfma2(
+ EmitterContext context,
+ OFmt swizzle,
+ Operand[] srcA,
+ Operand[] srcB,
+ Operand[] srcC,
+ int rd,
+ bool saturate)
+ {
+ Operand[] res = new Operand[2];
+
+ for (int index = 0; index < res.Length; index++)
+ {
+ res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]);
+ res[index] = context.FPSaturate(res[index], saturate);
+ }
+
+ context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs
new file mode 100644
index 00000000..8f99ddb3
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs
@@ -0,0 +1,575 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void DsetR(EmitterContext context)
+ {
+ InstDsetR op = context.GetOp<InstDsetR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+
+ EmitFset(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.Dest,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.BVal,
+ op.WriteCC,
+ isFP64: true);
+ }
+
+ public static void DsetI(EmitterContext context)
+ {
+ InstDsetI op = context.GetOp<InstDsetI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+
+ EmitFset(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.Dest,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.BVal,
+ op.WriteCC,
+ isFP64: true);
+ }
+
+ public static void DsetC(EmitterContext context)
+ {
+ InstDsetC op = context.GetOp<InstDsetC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFset(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.Dest,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.BVal,
+ op.WriteCC,
+ isFP64: true);
+ }
+
+ public static void DsetpR(EmitterContext context)
+ {
+ InstDsetpR op = context.GetOp<InstDsetpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ writeCC: false,
+ isFP64: true);
+ }
+
+ public static void DsetpI(EmitterContext context)
+ {
+ InstDsetpI op = context.GetOp<InstDsetpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ writeCC: false,
+ isFP64: true);
+ }
+
+ public static void DsetpC(EmitterContext context)
+ {
+ InstDsetpC op = context.GetOp<InstDsetpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ writeCC: false,
+ isFP64: true);
+ }
+
+ public static void FcmpR(EmitterContext context)
+ {
+ InstFcmpR op = context.GetOp<InstFcmpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FcmpI(EmitterContext context)
+ {
+ InstFcmpI op = context.GetOp<InstFcmpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FcmpC(EmitterContext context)
+ {
+ InstFcmpC op = context.GetOp<InstFcmpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FcmpRc(EmitterContext context)
+ {
+ InstFcmpRc op = context.GetOp<InstFcmpRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest);
+ }
+
+ public static void FsetR(EmitterContext context)
+ {
+ InstFsetR op = context.GetOp<InstFsetR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
+ }
+
+ public static void FsetC(EmitterContext context)
+ {
+ InstFsetC op = context.GetOp<InstFsetC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
+ }
+
+ public static void FsetI(EmitterContext context)
+ {
+ InstFsetI op = context.GetOp<InstFsetI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+
+ EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC);
+ }
+
+ public static void FsetpR(EmitterContext context)
+ {
+ InstFsetpR op = context.GetOp<InstFsetpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.WriteCC);
+ }
+
+ public static void FsetpI(EmitterContext context)
+ {
+ InstFsetpI op = context.GetOp<InstFsetpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.WriteCC);
+ }
+
+ public static void FsetpC(EmitterContext context)
+ {
+ InstFsetpC op = context.GetOp<InstFsetpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitFsetp(
+ context,
+ op.FComp,
+ op.Bop,
+ srcA,
+ srcB,
+ op.SrcPred,
+ op.SrcPredInv,
+ op.DestPred,
+ op.DestPredInv,
+ op.AbsA,
+ op.AbsB,
+ op.NegA,
+ op.NegB,
+ op.WriteCC);
+ }
+
+ public static void Hset2R(EmitterContext context)
+ {
+ InstHset2R op = context.GetOp<InstHset2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
+
+ EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
+ }
+
+ public static void Hset2I(EmitterContext context)
+ {
+ InstHset2I op = context.GetOp<InstHset2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+
+ EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
+ }
+
+ public static void Hset2C(EmitterContext context)
+ {
+ InstHset2C op = context.GetOp<InstHset2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, false);
+
+ EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval);
+ }
+
+ public static void Hsetp2R(EmitterContext context)
+ {
+ InstHsetp2R op = context.GetOp<InstHsetp2R>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB);
+
+ EmitHsetp2(context, op.FComp2, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
+ }
+
+ public static void Hsetp2I(EmitterContext context)
+ {
+ InstHsetp2I op = context.GetOp<InstHsetp2I>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1);
+
+ EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
+ }
+
+ public static void Hsetp2C(EmitterContext context)
+ {
+ InstHsetp2C op = context.GetOp<InstHsetp2C>();
+
+ var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA);
+ var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB);
+
+ EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd);
+ }
+
+ private static void EmitFcmp(EmitterContext context, FComp cmpOp, Operand srcA, Operand srcB, Operand srcC, int rd)
+ {
+ Operand cmpRes = GetFPComparison(context, cmpOp, srcC, ConstF(0));
+
+ Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitFset(
+ EmitterContext context,
+ FComp cmpOp,
+ BoolOp logicOp,
+ Operand srcA,
+ Operand srcB,
+ int srcPred,
+ bool srcPredInv,
+ int rd,
+ bool absoluteA,
+ bool absoluteB,
+ bool negateA,
+ bool negateB,
+ bool boolFloat,
+ bool writeCC,
+ bool isFP64 = false)
+ {
+ Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
+
+ srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
+ srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
+
+ Operand res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ res = GetPredLogicalOp(context, logicOp, res, pred);
+
+ Operand dest = GetDest(rd);
+
+ if (boolFloat)
+ {
+ res = context.ConditionalSelect(res, ConstF(1), Const(0));
+
+ context.Copy(dest, res);
+
+ SetFPZnFlags(context, res, writeCC);
+ }
+ else
+ {
+ context.Copy(dest, res);
+
+ SetZnFlags(context, res, writeCC, extended: false);
+ }
+ }
+
+ private static void EmitFsetp(
+ EmitterContext context,
+ FComp cmpOp,
+ BoolOp logicOp,
+ Operand srcA,
+ Operand srcB,
+ int srcPred,
+ bool srcPredInv,
+ int destPred,
+ int destPredInv,
+ bool absoluteA,
+ bool absoluteB,
+ bool negateA,
+ bool negateB,
+ bool writeCC,
+ bool isFP64 = false)
+ {
+ Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
+
+ srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
+ srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
+
+ Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB, fpType);
+ Operand p1Res = context.BitwiseNot(p0Res);
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
+ p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
+
+ context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
+ }
+
+ private static void EmitHset2(
+ EmitterContext context,
+ FComp cmpOp,
+ BoolOp logicOp,
+ Operand[] srcA,
+ Operand[] srcB,
+ int srcPred,
+ bool srcPredInv,
+ int rd,
+ bool boolFloat)
+ {
+ Operand[] res = new Operand[2];
+
+ res[0] = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
+ res[1] = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
+
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ res[0] = GetPredLogicalOp(context, logicOp, res[0], pred);
+ res[1] = GetPredLogicalOp(context, logicOp, res[1], pred);
+
+ if (boolFloat)
+ {
+ res[0] = context.ConditionalSelect(res[0], ConstF(1), Const(0));
+ res[1] = context.ConditionalSelect(res[1], ConstF(1), Const(0));
+
+ context.Copy(GetDest(rd), context.PackHalf2x16(res[0], res[1]));
+ }
+ else
+ {
+ Operand low = context.BitwiseAnd(res[0], Const(0xffff));
+ Operand high = context.ShiftLeft (res[1], Const(16));
+
+ Operand packed = context.BitwiseOr(low, high);
+
+ context.Copy(GetDest(rd), packed);
+ }
+ }
+
+ private static void EmitHsetp2(
+ EmitterContext context,
+ FComp cmpOp,
+ BoolOp logicOp,
+ Operand[] srcA,
+ Operand[] srcB,
+ int srcPred,
+ bool srcPredInv,
+ int destPred,
+ int destPredInv,
+ bool hAnd)
+ {
+ Operand p0Res = GetFPComparison(context, cmpOp, srcA[0], srcB[0]);
+ Operand p1Res = GetFPComparison(context, cmpOp, srcA[1], srcB[1]);
+
+ if (hAnd)
+ {
+ p0Res = context.BitwiseAnd(p0Res, p1Res);
+ p1Res = context.BitwiseNot(p0Res);
+ }
+
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
+ p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
+
+ context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
+ }
+
+ private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB, Instruction fpType = Instruction.FP32)
+ {
+ Operand res;
+
+ if (cond == FComp.T)
+ {
+ res = Const(IrConsts.True);
+ }
+ else if (cond == FComp.F)
+ {
+ res = Const(IrConsts.False);
+ }
+ else if (cond == FComp.Nan || cond == FComp.Num)
+ {
+ res = context.BitwiseOr(context.IsNan(srcA, fpType), context.IsNan(srcB, fpType));
+
+ if (cond == FComp.Num)
+ {
+ res = context.BitwiseNot(res);
+ }
+ }
+ else
+ {
+ Instruction inst;
+
+ switch (cond & ~FComp.Nan)
+ {
+ case FComp.Lt: inst = Instruction.CompareLess; break;
+ case FComp.Eq: inst = Instruction.CompareEqual; break;
+ case FComp.Le: inst = Instruction.CompareLessOrEqual; break;
+ case FComp.Gt: inst = Instruction.CompareGreater; break;
+ case FComp.Ne: inst = Instruction.CompareNotEqual; break;
+ case FComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
+
+ default: throw new ArgumentException($"Unexpected condition \"{cond}\".");
+ }
+
+ res = context.Add(inst | fpType, Local(), srcA, srcB);
+
+ if ((cond & FComp.Nan) != 0)
+ {
+ res = context.BitwiseOr(res, context.IsNan(srcA, fpType));
+ res = context.BitwiseOr(res, context.IsNan(srcB, fpType));
+ }
+ }
+
+ return res;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs
new file mode 100644
index 00000000..412a5305
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs
@@ -0,0 +1,106 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void DmnmxR(EmitterContext context)
+ {
+ InstDmnmxR op = context.GetOp<InstDmnmxR>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcReg(context, op.SrcB, isFP64: true);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
+ }
+
+ public static void DmnmxI(EmitterContext context)
+ {
+ InstDmnmxI op = context.GetOp<InstDmnmxI>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
+ }
+
+ public static void DmnmxC(EmitterContext context)
+ {
+ InstDmnmxC op = context.GetOp<InstDmnmxC>();
+
+ var srcA = GetSrcReg(context, op.SrcA, isFP64: true);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true);
+ }
+
+ public static void FmnmxR(EmitterContext context)
+ {
+ InstFmnmxR op = context.GetOp<InstFmnmxR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
+ }
+
+ public static void FmnmxI(EmitterContext context)
+ {
+ InstFmnmxI op = context.GetOp<InstFmnmxI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20));
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
+ }
+
+ public static void FmnmxC(EmitterContext context)
+ {
+ InstFmnmxC op = context.GetOp<InstFmnmxC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC);
+ }
+
+ private static void EmitFmnmx(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ Operand srcPred,
+ int rd,
+ bool absoluteA,
+ bool absoluteB,
+ bool negateA,
+ bool negateB,
+ bool writeCC,
+ bool isFP64 = false)
+ {
+ Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32;
+
+ srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType);
+ srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType);
+
+ Operand resMin = context.FPMinimum(srcA, srcB, fpType);
+ Operand resMax = context.FPMaximum(srcA, srcB, fpType);
+
+ Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
+
+ SetDest(context, res, rd, isFP64);
+
+ SetFPZnFlags(context, res, writeCC, fpType);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
new file mode 100644
index 00000000..91c23230
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs
@@ -0,0 +1,322 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System.Collections.Generic;
+using System.Linq;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Bra(EmitterContext context)
+ {
+ InstBra op = context.GetOp<InstBra>();
+
+ EmitBranch(context, context.CurrBlock.Successors[^1].Address);
+ }
+
+ public static void Brk(EmitterContext context)
+ {
+ InstBrk op = context.GetOp<InstBrk>();
+
+ EmitBrkContSync(context);
+ }
+
+ public static void Brx(EmitterContext context)
+ {
+ InstBrx op = context.GetOp<InstBrx>();
+ InstOp currOp = context.CurrOp;
+ int startIndex = context.CurrBlock.HasNext() ? 1 : 0;
+
+ if (context.CurrBlock.Successors.Count <= startIndex)
+ {
+ context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{currOp.Address:X}.");
+ return;
+ }
+
+ int offset = (int)currOp.GetAbsoluteAddress();
+
+ Operand address = context.IAdd(Register(op.SrcA, RegisterType.Gpr), Const(offset));
+
+ var targets = context.CurrBlock.Successors.Skip(startIndex);
+
+ bool allTargetsSinglePred = true;
+ int total = context.CurrBlock.Successors.Count - startIndex;
+ int count = 0;
+
+ foreach (var target in targets.OrderBy(x => x.Address))
+ {
+ if (++count < total && (target.Predecessors.Count > 1 || target.Address <= context.CurrBlock.Address))
+ {
+ allTargetsSinglePred = false;
+ break;
+ }
+ }
+
+ if (allTargetsSinglePred)
+ {
+ // Chain blocks, each target block will check if the BRX target address
+ // matches its own address, if not, it jumps to the next target which will do the same check,
+ // until it reaches the last possible target, which executed unconditionally.
+ // We can only do this if the BRX block is the only predecessor of all target blocks.
+ // Additionally, this is not supported for blocks located before the current block,
+ // since it will be too late to insert a label, but this is something that can be improved
+ // in the future if necessary.
+
+ var sortedTargets = targets.OrderBy(x => x.Address);
+
+ Block currentTarget = null;
+ ulong firstTargetAddress = 0;
+
+ foreach (Block nextTarget in sortedTargets)
+ {
+ if (currentTarget != null)
+ {
+ if (currentTarget.Address != nextTarget.Address)
+ {
+ context.SetBrxTarget(currentTarget.Address, address, (int)currentTarget.Address, nextTarget.Address);
+ }
+ }
+ else
+ {
+ firstTargetAddress = nextTarget.Address;
+ }
+
+ currentTarget = nextTarget;
+ }
+
+ context.Branch(context.GetLabel(firstTargetAddress));
+ }
+ else
+ {
+ // Emit the branches sequentially.
+ // This generates slightly worse code, but should work for all cases.
+
+ var sortedTargets = targets.OrderByDescending(x => x.Address);
+ ulong lastTargetAddress = ulong.MaxValue;
+
+ count = 0;
+
+ foreach (Block target in sortedTargets)
+ {
+ Operand label = context.GetLabel(target.Address);
+
+ if (++count < total)
+ {
+ if (target.Address != lastTargetAddress)
+ {
+ context.BranchIfTrue(label, context.ICompareEqual(address, Const((int)target.Address)));
+ }
+
+ lastTargetAddress = target.Address;
+ }
+ else
+ {
+ context.Branch(label);
+ }
+ }
+ }
+ }
+
+ public static void Cal(EmitterContext context)
+ {
+ InstCal op = context.GetOp<InstCal>();
+
+ DecodedFunction function = context.Program.GetFunctionByAddress(context.CurrOp.GetAbsoluteAddress());
+
+ if (function.IsCompilerGenerated)
+ {
+ switch (function.Type)
+ {
+ case FunctionType.BuiltInFSIBegin:
+ context.FSIBegin();
+ break;
+ case FunctionType.BuiltInFSIEnd:
+ context.FSIEnd();
+ break;
+ }
+ }
+ else
+ {
+ context.Call(function.Id, false);
+ }
+ }
+
+ public static void Cont(EmitterContext context)
+ {
+ InstCont op = context.GetOp<InstCont>();
+
+ EmitBrkContSync(context);
+ }
+
+ public static void Exit(EmitterContext context)
+ {
+ InstExit op = context.GetOp<InstExit>();
+
+ if (context.IsNonMain)
+ {
+ context.Config.GpuAccessor.Log("Invalid exit on non-main function.");
+ return;
+ }
+
+ if (op.Ccc == Ccc.T)
+ {
+ context.Return();
+ }
+ else
+ {
+ Operand cond = GetCondition(context, op.Ccc, IrConsts.False);
+
+ // If the condition is always false, we don't need to do anything.
+ if (cond.Type != OperandType.Constant || cond.Value != IrConsts.False)
+ {
+ Operand lblSkip = Label();
+ context.BranchIfFalse(lblSkip, cond);
+ context.Return();
+ context.MarkLabel(lblSkip);
+ }
+ }
+ }
+
+ public static void Kil(EmitterContext context)
+ {
+ InstKil op = context.GetOp<InstKil>();
+
+ context.Discard();
+ }
+
+ public static void Pbk(EmitterContext context)
+ {
+ InstPbk op = context.GetOp<InstPbk>();
+
+ EmitPbkPcntSsy(context);
+ }
+
+ public static void Pcnt(EmitterContext context)
+ {
+ InstPcnt op = context.GetOp<InstPcnt>();
+
+ EmitPbkPcntSsy(context);
+ }
+
+ public static void Ret(EmitterContext context)
+ {
+ InstRet op = context.GetOp<InstRet>();
+
+ if (context.IsNonMain)
+ {
+ context.Return();
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log("Invalid return on main function.");
+ }
+ }
+
+ public static void Ssy(EmitterContext context)
+ {
+ InstSsy op = context.GetOp<InstSsy>();
+
+ EmitPbkPcntSsy(context);
+ }
+
+ public static void Sync(EmitterContext context)
+ {
+ InstSync op = context.GetOp<InstSync>();
+
+ EmitBrkContSync(context);
+ }
+
+ private static void EmitPbkPcntSsy(EmitterContext context)
+ {
+ var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers;
+
+ foreach (KeyValuePair<Block, Operand> kv in consumers)
+ {
+ Block consumerBlock = kv.Key;
+ Operand local = kv.Value;
+
+ int id = consumerBlock.SyncTargets[context.CurrOp.Address].PushOpId;
+
+ context.Copy(local, Const(id));
+ }
+ }
+
+ private static void EmitBrkContSync(EmitterContext context)
+ {
+ var targets = context.CurrBlock.SyncTargets;
+
+ if (targets.Count == 1)
+ {
+ // If we have only one target, then the SSY/PBK is basically
+ // a branch, we can produce better codegen for this case.
+ EmitBranch(context, targets.Values.First().PushOpInfo.Op.GetAbsoluteAddress());
+ }
+ else
+ {
+ // TODO: Support CC here as well (condition).
+ foreach (SyncTarget target in targets.Values)
+ {
+ PushOpInfo pushOpInfo = target.PushOpInfo;
+
+ Operand label = context.GetLabel(pushOpInfo.Op.GetAbsoluteAddress());
+ Operand local = pushOpInfo.Consumers[context.CurrBlock];
+
+ context.BranchIfTrue(label, context.ICompareEqual(local, Const(target.PushOpId)));
+ }
+ }
+ }
+
+ private static void EmitBranch(EmitterContext context, ulong address)
+ {
+ InstOp op = context.CurrOp;
+ InstConditional opCond = new InstConditional(op.RawOpCode);
+
+ // If we're branching to the next instruction, then the branch
+ // is useless and we can ignore it.
+ if (address == op.Address + 8)
+ {
+ return;
+ }
+
+ Operand label = context.GetLabel(address);
+
+ Operand pred = Register(opCond.Pred, RegisterType.Predicate);
+
+ if (opCond.Ccc != Ccc.T)
+ {
+ Operand cond = GetCondition(context, opCond.Ccc);
+
+ if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
+ {
+ pred = cond;
+ }
+ else if (opCond.PredInv)
+ {
+ pred = context.BitwiseAnd(context.BitwiseNot(pred), cond);
+ }
+ else
+ {
+ pred = context.BitwiseAnd(pred, cond);
+ }
+
+ context.BranchIfTrue(label, pred);
+ }
+ else if (opCond.Pred == RegisterConsts.PredicateTrueIndex)
+ {
+ context.Branch(label);
+ }
+ else if (opCond.PredInv)
+ {
+ context.BranchIfFalse(label, pred);
+ }
+ else
+ {
+ context.BranchIfTrue(label, pred);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
new file mode 100644
index 00000000..0ba4667e
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs
@@ -0,0 +1,266 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Runtime.CompilerServices;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static class InstEmitHelper
+ {
+ public static Operand GetZF()
+ {
+ return Register(0, RegisterType.Flag);
+ }
+
+ public static Operand GetNF()
+ {
+ return Register(1, RegisterType.Flag);
+ }
+
+ public static Operand GetCF()
+ {
+ return Register(2, RegisterType.Flag);
+ }
+
+ public static Operand GetVF()
+ {
+ return Register(3, RegisterType.Flag);
+ }
+
+ public static Operand GetDest(int rd)
+ {
+ return Register(rd, RegisterType.Gpr);
+ }
+
+ public static Operand GetDest2(int rd)
+ {
+ return Register(rd | 1, RegisterType.Gpr);
+ }
+
+ public static Operand GetSrcCbuf(EmitterContext context, int cbufSlot, int cbufOffset, bool isFP64 = false)
+ {
+ if (isFP64)
+ {
+ return context.PackDouble2x32(
+ Cbuf(cbufSlot, cbufOffset),
+ Cbuf(cbufSlot, cbufOffset + 1));
+ }
+ else
+ {
+ return Cbuf(cbufSlot, cbufOffset);
+ }
+ }
+
+ public static Operand GetSrcImm(EmitterContext context, int imm, bool isFP64 = false)
+ {
+ if (isFP64)
+ {
+ return context.PackDouble2x32(Const(0), Const(imm));
+ }
+ else
+ {
+ return Const(imm);
+ }
+ }
+
+ public static Operand GetSrcReg(EmitterContext context, int reg, bool isFP64 = false)
+ {
+ if (isFP64)
+ {
+ return context.PackDouble2x32(Register(reg, RegisterType.Gpr), Register(reg | 1, RegisterType.Gpr));
+ }
+ else
+ {
+ return Register(reg, RegisterType.Gpr);
+ }
+ }
+
+ public static Operand[] GetHalfSrc(
+ EmitterContext context,
+ HalfSwizzle swizzle,
+ int ra,
+ bool negate,
+ bool absolute)
+ {
+ Operand[] operands = GetHalfUnpacked(context, GetSrcReg(context, ra), swizzle);
+
+ return FPAbsNeg(context, operands, absolute, negate);
+ }
+
+ public static Operand[] GetHalfSrc(
+ EmitterContext context,
+ HalfSwizzle swizzle,
+ int cbufSlot,
+ int cbufOffset,
+ bool negate,
+ bool absolute)
+ {
+ Operand[] operands = GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), swizzle);
+
+ return FPAbsNeg(context, operands, absolute, negate);
+ }
+
+ public static Operand[] GetHalfSrc(EmitterContext context, int immH0, int immH1)
+ {
+ ushort low = (ushort)(immH0 << 6);
+ ushort high = (ushort)(immH1 << 6);
+
+ return new Operand[]
+ {
+ ConstF((float)Unsafe.As<ushort, Half>(ref low)),
+ ConstF((float)Unsafe.As<ushort, Half>(ref high))
+ };
+ }
+
+ public static Operand[] GetHalfSrc(EmitterContext context, int imm32)
+ {
+ ushort low = (ushort)imm32;
+ ushort high = (ushort)(imm32 >> 16);
+
+ return new Operand[]
+ {
+ ConstF((float)Unsafe.As<ushort, Half>(ref low)),
+ ConstF((float)Unsafe.As<ushort, Half>(ref high))
+ };
+ }
+
+ public static Operand[] FPAbsNeg(EmitterContext context, Operand[] operands, bool abs, bool neg)
+ {
+ for (int index = 0; index < operands.Length; index++)
+ {
+ operands[index] = context.FPAbsNeg(operands[index], abs, neg);
+ }
+
+ return operands;
+ }
+
+ public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, HalfSwizzle swizzle)
+ {
+ switch (swizzle)
+ {
+ case HalfSwizzle.F16:
+ return new Operand[]
+ {
+ context.UnpackHalf2x16Low (src),
+ context.UnpackHalf2x16High(src)
+ };
+
+ case HalfSwizzle.F32: return new Operand[] { src, src };
+
+ case HalfSwizzle.H0H0:
+ return new Operand[]
+ {
+ context.UnpackHalf2x16Low(src),
+ context.UnpackHalf2x16Low(src)
+ };
+
+ case HalfSwizzle.H1H1:
+ return new Operand[]
+ {
+ context.UnpackHalf2x16High(src),
+ context.UnpackHalf2x16High(src)
+ };
+ }
+
+ throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
+ }
+
+ public static Operand GetHalfPacked(EmitterContext context, OFmt swizzle, Operand[] results, int rd)
+ {
+ switch (swizzle)
+ {
+ case OFmt.F16: return context.PackHalf2x16(results[0], results[1]);
+
+ case OFmt.F32: return results[0];
+
+ case OFmt.MrgH0:
+ {
+ Operand h1 = GetHalfDest(context, rd, isHigh: true);
+
+ return context.PackHalf2x16(results[0], h1);
+ }
+
+ case OFmt.MrgH1:
+ {
+ Operand h0 = GetHalfDest(context, rd, isHigh: false);
+
+ return context.PackHalf2x16(h0, results[1]);
+ }
+ }
+
+ throw new ArgumentException($"Invalid swizzle \"{swizzle}\".");
+ }
+
+ public static Operand GetHalfDest(EmitterContext context, int rd, bool isHigh)
+ {
+ if (isHigh)
+ {
+ return context.UnpackHalf2x16High(GetDest(rd));
+ }
+ else
+ {
+ return context.UnpackHalf2x16Low(GetDest(rd));
+ }
+ }
+
+ public static Operand GetPredicate(EmitterContext context, int pred, bool not)
+ {
+ Operand local = Register(pred, RegisterType.Predicate);
+
+ if (not)
+ {
+ local = context.BitwiseNot(local);
+ }
+
+ return local;
+ }
+
+ public static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64)
+ {
+ if (isFP64)
+ {
+ context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value));
+ context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value));
+ }
+ else
+ {
+ context.Copy(GetDest(rd), value);
+ }
+ }
+
+ public static int Imm16ToSInt(int imm16)
+ {
+ return (short)imm16;
+ }
+
+ public static int Imm20ToFloat(int imm20)
+ {
+ return imm20 << 12;
+ }
+
+ public static int Imm20ToSInt(int imm20)
+ {
+ return (imm20 << 12) >> 12;
+ }
+
+ public static int Imm24ToSInt(int imm24)
+ {
+ return (imm24 << 8) >> 8;
+ }
+
+ public static Operand SignExtendTo32(EmitterContext context, Operand src, int srcBits)
+ {
+ return context.BitfieldExtractS32(src, Const(0), Const(srcBits));
+ }
+
+ public static Operand ZeroExtendTo32(EmitterContext context, Operand src, int srcBits)
+ {
+ int mask = (int)(uint.MaxValue >> (32 - srcBits));
+
+ return context.BitwiseAnd(src, Const(mask));
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs
new file mode 100644
index 00000000..374e3d61
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs
@@ -0,0 +1,699 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void IaddR(EmitterContext context)
+ {
+ InstIaddR op = context.GetOp<InstIaddR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
+ }
+
+ public static void IaddI(EmitterContext context)
+ {
+ InstIaddI op = context.GetOp<InstIaddI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
+ }
+
+ public static void IaddC(EmitterContext context)
+ {
+ InstIaddC op = context.GetOp<InstIaddC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
+ }
+
+ public static void Iadd32i(EmitterContext context)
+ {
+ InstIadd32i op = context.GetOp<InstIadd32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC);
+ }
+
+ public static void Iadd3R(EmitterContext context)
+ {
+ InstIadd3R op = context.GetOp<InstIadd3R>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIadd3(context, op.Lrs, srcA, srcB, srcC, op.Apart, op.Bpart, op.Cpart, op.Dest, op.NegA, op.NegB, op.NegC);
+ }
+
+ public static void Iadd3I(EmitterContext context)
+ {
+ InstIadd3I op = context.GetOp<InstIadd3I>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
+ }
+
+ public static void Iadd3C(EmitterContext context)
+ {
+ InstIadd3C op = context.GetOp<InstIadd3C>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC);
+ }
+
+ public static void ImadR(EmitterContext context)
+ {
+ InstImadR op = context.GetOp<InstImadR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImadI(EmitterContext context)
+ {
+ InstImadI op = context.GetOp<InstImadI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImadC(EmitterContext context)
+ {
+ InstImadC op = context.GetOp<InstImadC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImadRc(EmitterContext context)
+ {
+ InstImadRc op = context.GetOp<InstImadRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void Imad32i(EmitterContext context)
+ {
+ InstImad32i op = context.GetOp<InstImad32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+ var srcC = GetSrcReg(context, op.Dest);
+
+ EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImulR(EmitterContext context)
+ {
+ InstImulR op = context.GetOp<InstImulR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImulI(EmitterContext context)
+ {
+ InstImulI op = context.GetOp<InstImulI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void ImulC(EmitterContext context)
+ {
+ InstImulC op = context.GetOp<InstImulC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void Imul32i(EmitterContext context)
+ {
+ InstImul32i op = context.GetOp<InstImul32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo);
+ }
+
+ public static void IscaddR(EmitterContext context)
+ {
+ InstIscaddR op = context.GetOp<InstIscaddR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
+ }
+
+ public static void IscaddI(EmitterContext context)
+ {
+ InstIscaddI op = context.GetOp<InstIscaddI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
+ }
+
+ public static void IscaddC(EmitterContext context)
+ {
+ InstIscaddC op = context.GetOp<InstIscaddC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC);
+ }
+
+ public static void Iscadd32i(EmitterContext context)
+ {
+ InstIscadd32i op = context.GetOp<InstIscadd32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, AvgMode.NoNeg, op.WriteCC);
+ }
+
+ public static void LeaR(EmitterContext context)
+ {
+ InstLeaR op = context.GetOp<InstLeaR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void LeaI(EmitterContext context)
+ {
+ InstLeaI op = context.GetOp<InstLeaI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void LeaC(EmitterContext context)
+ {
+ InstLeaC op = context.GetOp<InstLeaC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void LeaHiR(EmitterContext context)
+ {
+ InstLeaHiR op = context.GetOp<InstLeaHiR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void LeaHiC(EmitterContext context)
+ {
+ InstLeaHiC op = context.GetOp<InstLeaHiC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5);
+ }
+
+ public static void XmadR(EmitterContext context)
+ {
+ InstXmadR op = context.GetOp<InstXmadR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
+ }
+
+ public static void XmadI(EmitterContext context)
+ {
+ InstXmadI op = context.GetOp<InstXmadI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm16);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, false, op.Psl, op.Mrg, op.X, op.WriteCC);
+ }
+
+ public static void XmadC(EmitterContext context)
+ {
+ InstXmadC op = context.GetOp<InstXmadC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC);
+ }
+
+ public static void XmadRc(EmitterContext context)
+ {
+ InstXmadRc op = context.GetOp<InstXmadRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, false, false, op.X, op.WriteCC);
+ }
+
+ private static void EmitIadd(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ AvgMode avgMode,
+ bool extended,
+ bool writeCC)
+ {
+ srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
+ srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
+
+ Operand res = context.IAdd(srcA, srcB);
+
+ if (extended)
+ {
+ res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
+ }
+
+ SetIaddFlags(context, res, srcA, srcB, writeCC, extended);
+
+ // TODO: SAT.
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitIadd3(
+ EmitterContext context,
+ Lrs mode,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ HalfSelect partA,
+ HalfSelect partB,
+ HalfSelect partC,
+ int rd,
+ bool negateA,
+ bool negateB,
+ bool negateC)
+ {
+ Operand Extend(Operand src, HalfSelect part)
+ {
+ if (part == HalfSelect.B32)
+ {
+ return src;
+ }
+
+ if (part == HalfSelect.H0)
+ {
+ return context.BitwiseAnd(src, Const(0xffff));
+ }
+ else if (part == HalfSelect.H1)
+ {
+ return context.ShiftRightU32(src, Const(16));
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Iadd3 has invalid component selection {part}.");
+ }
+
+ return src;
+ }
+
+ srcA = context.INegate(Extend(srcA, partA), negateA);
+ srcB = context.INegate(Extend(srcB, partB), negateB);
+ srcC = context.INegate(Extend(srcC, partC), negateC);
+
+ Operand res = context.IAdd(srcA, srcB);
+
+ if (mode != Lrs.None)
+ {
+ if (mode == Lrs.LeftShift)
+ {
+ res = context.ShiftLeft(res, Const(16));
+ }
+ else if (mode == Lrs.RightShift)
+ {
+ res = context.ShiftRightU32(res, Const(16));
+ }
+ else
+ {
+ // TODO: Warning.
+ }
+ }
+
+ res = context.IAdd(res, srcC);
+
+ context.Copy(GetDest(rd), res);
+
+ // TODO: CC, X, corner cases.
+ }
+
+ private static void EmitImad(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ AvgMode avgMode,
+ bool signedA,
+ bool signedB,
+ bool high)
+ {
+ srcB = context.INegate(srcB, avgMode == AvgMode.NegA);
+ srcC = context.INegate(srcC, avgMode == AvgMode.NegB);
+
+ Operand res;
+
+ if (high)
+ {
+ if (signedA && signedB)
+ {
+ res = context.MultiplyHighS32(srcA, srcB);
+ }
+ else
+ {
+ res = context.MultiplyHighU32(srcA, srcB);
+
+ if (signedA)
+ {
+ res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))));
+ }
+ else if (signedB)
+ {
+ res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))));
+ }
+ }
+ }
+ else
+ {
+ res = context.IMultiply(srcA, srcB);
+ }
+
+ if (srcC.Type != OperandType.Constant || srcC.Value != 0)
+ {
+ res = context.IAdd(res, srcC);
+ }
+
+ // TODO: CC, X, SAT, and more?
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitIscadd(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ int shift,
+ AvgMode avgMode,
+ bool writeCC)
+ {
+ srcA = context.ShiftLeft(srcA, Const(shift));
+
+ srcA = context.INegate(srcA, avgMode == AvgMode.NegA);
+ srcB = context.INegate(srcB, avgMode == AvgMode.NegB);
+
+ Operand res = context.IAdd(srcA, srcB);
+
+ SetIaddFlags(context, res, srcA, srcB, writeCC, false);
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ public static void EmitLea(EmitterContext context, Operand srcA, Operand srcB, int rd, bool negateA, int shift)
+ {
+ srcA = context.ShiftLeft(srcA, Const(shift));
+ srcA = context.INegate(srcA, negateA);
+
+ Operand res = context.IAdd(srcA, srcB);
+
+ context.Copy(GetDest(rd), res);
+
+ // TODO: CC, X.
+ }
+
+ private static void EmitLeaHi(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool negateA,
+ int shift)
+ {
+ Operand aLow = context.ShiftLeft(srcA, Const(shift));
+ Operand aHigh = shift == 0 ? Const(0) : context.ShiftRightU32(srcA, Const(32 - shift));
+ aHigh = context.BitwiseOr(aHigh, context.ShiftLeft(srcC, Const(shift)));
+
+ if (negateA)
+ {
+ // Perform 64-bit negation by doing bitwise not of the value,
+ // then adding 1 and carrying over from low to high.
+ aLow = context.BitwiseNot(aLow);
+ aHigh = context.BitwiseNot(aHigh);
+
+ aLow = AddWithCarry(context, aLow, Const(1), out Operand aLowCOut);
+ aHigh = context.IAdd(aHigh, aLowCOut);
+ }
+
+ Operand res = context.IAdd(aHigh, srcB);
+
+ context.Copy(GetDest(rd), res);
+
+ // TODO: CC, X.
+ }
+
+ public static void EmitXmad(
+ EmitterContext context,
+ XmadCop2 mode,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool signedA,
+ bool signedB,
+ bool highA,
+ bool highB,
+ bool productShiftLeft,
+ bool merge,
+ bool extended,
+ bool writeCC)
+ {
+ XmadCop modeConv;
+ switch (mode)
+ {
+ case XmadCop2.Cfull:
+ modeConv = XmadCop.Cfull;
+ break;
+ case XmadCop2.Clo:
+ modeConv = XmadCop.Clo;
+ break;
+ case XmadCop2.Chi:
+ modeConv = XmadCop.Chi;
+ break;
+ case XmadCop2.Csfu:
+ modeConv = XmadCop.Csfu;
+ break;
+ default:
+ context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
+ return;
+ }
+
+ EmitXmad(context, modeConv, srcA, srcB, srcC, rd, signedA, signedB, highA, highB, productShiftLeft, merge, extended, writeCC);
+ }
+
+ public static void EmitXmad(
+ EmitterContext context,
+ XmadCop mode,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool signedA,
+ bool signedB,
+ bool highA,
+ bool highB,
+ bool productShiftLeft,
+ bool merge,
+ bool extended,
+ bool writeCC)
+ {
+ var srcBUnmodified = srcB;
+
+ Operand Extend16To32(Operand src, bool high, bool signed)
+ {
+ if (signed && high)
+ {
+ return context.ShiftRightS32(src, Const(16));
+ }
+ else if (signed)
+ {
+ return context.BitfieldExtractS32(src, Const(0), Const(16));
+ }
+ else if (high)
+ {
+ return context.ShiftRightU32(src, Const(16));
+ }
+ else
+ {
+ return context.BitwiseAnd(src, Const(0xffff));
+ }
+ }
+
+ srcA = Extend16To32(srcA, highA, signedA);
+ srcB = Extend16To32(srcB, highB, signedB);
+
+ Operand res = context.IMultiply(srcA, srcB);
+
+ if (productShiftLeft)
+ {
+ res = context.ShiftLeft(res, Const(16));
+ }
+
+ switch (mode)
+ {
+ case XmadCop.Cfull:
+ break;
+
+ case XmadCop.Clo:
+ srcC = Extend16To32(srcC, high: false, signed: false);
+ break;
+ case XmadCop.Chi:
+ srcC = Extend16To32(srcC, high: true, signed: false);
+ break;
+
+ case XmadCop.Cbcc:
+ srcC = context.IAdd(srcC, context.ShiftLeft(srcBUnmodified, Const(16)));
+ break;
+
+ case XmadCop.Csfu:
+ Operand signAdjustA = context.ShiftLeft(context.ShiftRightU32(srcA, Const(31)), Const(16));
+ Operand signAdjustB = context.ShiftLeft(context.ShiftRightU32(srcB, Const(31)), Const(16));
+
+ srcC = context.ISubtract(srcC, context.IAdd(signAdjustA, signAdjustB));
+ break;
+
+ default:
+ context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\".");
+ return;
+ }
+
+ Operand product = res;
+
+ if (extended)
+ {
+ // Add with carry.
+ res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1)));
+ }
+ else
+ {
+ // Add (no carry in).
+ res = context.IAdd(res, srcC);
+ }
+
+ SetIaddFlags(context, res, product, srcC, writeCC, extended);
+
+ if (merge)
+ {
+ res = context.BitwiseAnd(res, Const(0xffff));
+ res = context.BitwiseOr(res, context.ShiftLeft(srcBUnmodified, Const(16)));
+ }
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void SetIaddFlags(EmitterContext context, Operand res, Operand srcA, Operand srcB, bool setCC, bool extended)
+ {
+ if (!setCC)
+ {
+ return;
+ }
+
+ if (extended)
+ {
+ // C = (d == a && CIn) || d < a
+ Operand tempC0 = context.ICompareEqual(res, srcA);
+ Operand tempC1 = context.ICompareLessUnsigned(res, srcA);
+
+ tempC0 = context.BitwiseAnd(tempC0, GetCF());
+
+ context.Copy(GetCF(), context.BitwiseOr(tempC0, tempC1));
+ }
+ else
+ {
+ // C = d < a
+ context.Copy(GetCF(), context.ICompareLessUnsigned(res, srcA));
+ }
+
+ // V = (d ^ a) & ~(a ^ b) < 0
+ Operand tempV0 = context.BitwiseExclusiveOr(res, srcA);
+ Operand tempV1 = context.BitwiseExclusiveOr(srcA, srcB);
+
+ tempV1 = context.BitwiseNot(tempV1);
+
+ Operand tempV = context.BitwiseAnd(tempV0, tempV1);
+
+ context.Copy(GetVF(), context.ICompareLess(tempV, Const(0)));
+
+ SetZnFlags(context, res, setCC: true, extended: extended);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs
new file mode 100644
index 00000000..dcdb189f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs
@@ -0,0 +1,310 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void IcmpR(EmitterContext context)
+ {
+ InstIcmpR op = context.GetOp<InstIcmpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
+ }
+
+ public static void IcmpI(EmitterContext context)
+ {
+ InstIcmpI op = context.GetOp<InstIcmpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
+ }
+
+ public static void IcmpC(EmitterContext context)
+ {
+ InstIcmpC op = context.GetOp<InstIcmpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
+ }
+
+ public static void IcmpRc(EmitterContext context)
+ {
+ InstIcmpRc op = context.GetOp<InstIcmpRc>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcC);
+ var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed);
+ }
+
+ public static void IsetR(EmitterContext context)
+ {
+ InstIsetR op = context.GetOp<InstIsetR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
+ }
+
+ public static void IsetI(EmitterContext context)
+ {
+ InstIsetI op = context.GetOp<InstIsetI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
+ }
+
+ public static void IsetC(EmitterContext context)
+ {
+ InstIsetC op = context.GetOp<InstIsetC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC);
+ }
+
+ public static void IsetpR(EmitterContext context)
+ {
+ InstIsetpR op = context.GetOp<InstIsetpR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
+ }
+
+ public static void IsetpI(EmitterContext context)
+ {
+ InstIsetpI op = context.GetOp<InstIsetpI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
+ }
+
+ public static void IsetpC(EmitterContext context)
+ {
+ InstIsetpC op = context.GetOp<InstIsetpC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X);
+ }
+
+ private static void EmitIcmp(
+ EmitterContext context,
+ IComp cmpOp,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool isSigned)
+ {
+ Operand cmpRes = GetIntComparison(context, cmpOp, srcC, Const(0), isSigned);
+
+ Operand res = context.ConditionalSelect(cmpRes, srcA, srcB);
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitIset(
+ EmitterContext context,
+ IComp cmpOp,
+ BoolOp logicOp,
+ Operand srcA,
+ Operand srcB,
+ int srcPred,
+ bool srcPredInv,
+ int rd,
+ bool boolFloat,
+ bool isSigned,
+ bool extended,
+ bool writeCC)
+ {
+ Operand res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ res = GetPredLogicalOp(context, logicOp, res, pred);
+
+ Operand dest = GetDest(rd);
+
+ if (boolFloat)
+ {
+ res = context.ConditionalSelect(res, ConstF(1), Const(0));
+
+ context.Copy(dest, res);
+
+ SetFPZnFlags(context, res, writeCC);
+ }
+ else
+ {
+ context.Copy(dest, res);
+
+ SetZnFlags(context, res, writeCC, extended);
+ }
+ }
+
+ private static void EmitIsetp(
+ EmitterContext context,
+ IComp cmpOp,
+ BoolOp logicOp,
+ Operand srcA,
+ Operand srcB,
+ int srcPred,
+ bool srcPredInv,
+ int destPred,
+ int destPredInv,
+ bool isSigned,
+ bool extended)
+ {
+ Operand p0Res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended);
+ Operand p1Res = context.BitwiseNot(p0Res);
+ Operand pred = GetPredicate(context, srcPred, srcPredInv);
+
+ p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred);
+ p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred);
+
+ context.Copy(Register(destPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res);
+ }
+
+ private static Operand GetIntComparison(
+ EmitterContext context,
+ IComp cond,
+ Operand srcA,
+ Operand srcB,
+ bool isSigned,
+ bool extended)
+ {
+ return extended
+ ? GetIntComparisonExtended(context, cond, srcA, srcB, isSigned)
+ : GetIntComparison(context, cond, srcA, srcB, isSigned);
+ }
+
+ private static Operand GetIntComparisonExtended(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
+ {
+ Operand res;
+
+ if (cond == IComp.T)
+ {
+ res = Const(IrConsts.True);
+ }
+ else if (cond == IComp.F)
+ {
+ res = Const(IrConsts.False);
+ }
+ else
+ {
+ res = context.ISubtract(srcA, srcB);
+ res = context.IAdd(res, context.BitwiseNot(GetCF()));
+
+ switch (cond)
+ {
+ case IComp.Eq: // r = xh == yh && xl == yl
+ res = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetZF());
+ break;
+ case IComp.Lt: // r = xh < yh || (xh == yh && xl < yl)
+ Operand notC = context.BitwiseNot(GetCF());
+ Operand prevLt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notC);
+ res = isSigned
+ ? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLt)
+ : context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLt);
+ break;
+ case IComp.Le: // r = xh < yh || (xh == yh && xl <= yl)
+ Operand zOrNotC = context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF()));
+ Operand prevLe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), zOrNotC);
+ res = isSigned
+ ? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLe)
+ : context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLe);
+ break;
+ case IComp.Gt: // r = xh > yh || (xh == yh && xl > yl)
+ Operand notZAndC = context.BitwiseAnd(context.BitwiseNot(GetZF()), GetCF());
+ Operand prevGt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notZAndC);
+ res = isSigned
+ ? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGt)
+ : context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGt);
+ break;
+ case IComp.Ge: // r = xh > yh || (xh == yh && xl >= yl)
+ Operand prevGe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetCF());
+ res = isSigned
+ ? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGe)
+ : context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGe);
+ break;
+ case IComp.Ne: // r = xh != yh || xl != yl
+ res = context.BitwiseOr(context.ICompareNotEqual(srcA, srcB), context.BitwiseNot(GetZF()));
+ break;
+ default:
+ throw new ArgumentException($"Unexpected condition \"{cond}\".");
+ }
+ }
+
+ return res;
+ }
+
+ private static Operand GetIntComparison(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned)
+ {
+ Operand res;
+
+ if (cond == IComp.T)
+ {
+ res = Const(IrConsts.True);
+ }
+ else if (cond == IComp.F)
+ {
+ res = Const(IrConsts.False);
+ }
+ else
+ {
+ var inst = cond switch
+ {
+ IComp.Lt => Instruction.CompareLessU32,
+ IComp.Eq => Instruction.CompareEqual,
+ IComp.Le => Instruction.CompareLessOrEqualU32,
+ IComp.Gt => Instruction.CompareGreaterU32,
+ IComp.Ne => Instruction.CompareNotEqual,
+ IComp.Ge => Instruction.CompareGreaterOrEqualU32,
+ _ => throw new InvalidOperationException($"Unexpected condition \"{cond}\".")
+ };
+
+ if (isSigned)
+ {
+ switch (cond)
+ {
+ case IComp.Lt: inst = Instruction.CompareLess; break;
+ case IComp.Le: inst = Instruction.CompareLessOrEqual; break;
+ case IComp.Gt: inst = Instruction.CompareGreater; break;
+ case IComp.Ge: inst = Instruction.CompareGreaterOrEqual; break;
+ }
+ }
+
+ res = context.Add(inst, Local(), srcA, srcB);
+ }
+
+ return res;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs
new file mode 100644
index 00000000..1f3f66ae
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs
@@ -0,0 +1,167 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ private const int PT = RegisterConsts.PredicateTrueIndex;
+
+ public static void LopR(EmitterContext context)
+ {
+ InstLopR op = context.GetOp<InstLopR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitLop(context, op.Lop, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
+ }
+
+ public static void LopI(EmitterContext context)
+ {
+ InstLopI op = context.GetOp<InstLopI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
+ }
+
+ public static void LopC(EmitterContext context)
+ {
+ InstLopC op = context.GetOp<InstLopC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC);
+ }
+
+ public static void Lop32i(EmitterContext context)
+ {
+ InstLop32i op = context.GetOp<InstLop32i>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, op.Imm32);
+
+ EmitLop(context, op.LogicOp, PredicateOp.F, srcA, srcB, op.Dest, PT, op.NegA, op.NegB, op.X, op.WriteCC);
+ }
+
+ public static void Lop3R(EmitterContext context)
+ {
+ InstLop3R op = context.GetOp<InstLop3R>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLop3(context, op.Imm, op.PredicateOp, srcA, srcB, srcC, op.Dest, op.DestPred, op.X, op.WriteCC);
+ }
+
+ public static void Lop3I(EmitterContext context)
+ {
+ InstLop3I op = context.GetOp<InstLop3I>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
+ }
+
+ public static void Lop3C(EmitterContext context)
+ {
+ InstLop3C op = context.GetOp<InstLop3C>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC);
+ }
+
+ private static void EmitLop(
+ EmitterContext context,
+ LogicOp logicOp,
+ PredicateOp predOp,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ int destPred,
+ bool invertA,
+ bool invertB,
+ bool extended,
+ bool writeCC)
+ {
+ srcA = context.BitwiseNot(srcA, invertA);
+ srcB = context.BitwiseNot(srcB, invertB);
+
+ Operand res = logicOp switch
+ {
+ LogicOp.And => res = context.BitwiseAnd(srcA, srcB),
+ LogicOp.Or => res = context.BitwiseOr(srcA, srcB),
+ LogicOp.Xor => res = context.BitwiseExclusiveOr(srcA, srcB),
+ _ => srcB
+ };
+
+ EmitLopPredWrite(context, res, predOp, destPred);
+
+ context.Copy(GetDest(rd), res);
+
+ SetZnFlags(context, res, writeCC, extended);
+ }
+
+ private static void EmitLop3(
+ EmitterContext context,
+ int truthTable,
+ PredicateOp predOp,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ int destPred,
+ bool extended,
+ bool writeCC)
+ {
+ Operand res = Lop3Expression.GetFromTruthTable(context, srcA, srcB, srcC, truthTable);
+
+ EmitLopPredWrite(context, res, predOp, destPred);
+
+ context.Copy(GetDest(rd), res);
+
+ SetZnFlags(context, res, writeCC, extended);
+ }
+
+ private static void EmitLopPredWrite(EmitterContext context, Operand result, PredicateOp predOp, int pred)
+ {
+ if (pred != RegisterConsts.PredicateTrueIndex)
+ {
+ Operand pRes;
+
+ if (predOp == PredicateOp.F)
+ {
+ pRes = Const(IrConsts.False);
+ }
+ else if (predOp == PredicateOp.T)
+ {
+ pRes = Const(IrConsts.True);
+ }
+ else if (predOp == PredicateOp.Z)
+ {
+ pRes = context.ICompareEqual(result, Const(0));
+ }
+ else /* if (predOp == Pop.Nz) */
+ {
+ pRes = context.ICompareNotEqual(result, Const(0));
+ }
+
+ context.Copy(Register(pred, RegisterType.Predicate), pRes);
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs
new file mode 100644
index 00000000..73930ed1
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs
@@ -0,0 +1,71 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void ImnmxR(EmitterContext context)
+ {
+ InstImnmxR op = context.GetOp<InstImnmxR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
+ }
+
+ public static void ImnmxI(EmitterContext context)
+ {
+ InstImnmxI op = context.GetOp<InstImnmxI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
+ }
+
+ public static void ImnmxC(EmitterContext context)
+ {
+ InstImnmxC op = context.GetOp<InstImnmxC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC);
+ }
+
+ private static void EmitImnmx(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ Operand srcPred,
+ int rd,
+ bool isSignedInt,
+ bool writeCC)
+ {
+ Operand resMin = isSignedInt
+ ? context.IMinimumS32(srcA, srcB)
+ : context.IMinimumU32(srcA, srcB);
+
+ Operand resMax = isSignedInt
+ ? context.IMaximumS32(srcA, srcB)
+ : context.IMaximumU32(srcA, srcB);
+
+ Operand res = context.ConditionalSelect(srcPred, resMin, resMax);
+
+ context.Copy(GetDest(rd), res);
+
+ SetZnFlags(context, res, writeCC);
+
+ // TODO: X flags.
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
new file mode 100644
index 00000000..c73c6b2a
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs
@@ -0,0 +1,541 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ private enum MemoryRegion
+ {
+ Local,
+ Shared
+ }
+
+ public static void Atom(EmitterContext context)
+ {
+ InstAtom op = context.GetOp<InstAtom>();
+
+ int sOffset = (op.Imm20 << 12) >> 12;
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset);
+
+ Operand value = GetSrcReg(context, op.SrcB);
+
+ Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value);
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Atoms(EmitterContext context)
+ {
+ InstAtoms op = context.GetOp<InstAtoms>();
+
+ Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2));
+
+ int sOffset = (op.Imm22 << 10) >> 10;
+
+ offset = context.IAdd(offset, Const(sOffset));
+
+ Operand value = GetSrcReg(context, op.SrcB);
+
+ AtomSize size = op.AtomsSize switch
+ {
+ AtomsSize.S32 => AtomSize.S32,
+ AtomsSize.U64 => AtomSize.U64,
+ AtomsSize.S64 => AtomSize.S64,
+ _ => AtomSize.U32
+ };
+
+ Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value);
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Ldc(EmitterContext context)
+ {
+ InstLdc op = context.GetOp<InstLdc>();
+
+ if (op.LsSize > LsSize2.B64)
+ {
+ context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}.");
+ return;
+ }
+
+ bool isSmallInt = op.LsSize < LsSize2.B32;
+
+ int count = op.LsSize == LsSize2.B64 ? 2 : 1;
+
+ Operand slot = Const(op.CbufSlot);
+ Operand srcA = GetSrcReg(context, op.SrcA);
+
+ if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl)
+ {
+ slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16)));
+ srcA = context.BitwiseAnd(srcA, Const(0xffff));
+ }
+
+ Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset)));
+ Operand wordOffset = context.ShiftRightU32(addr, Const(2));
+ Operand bitOffset = GetBitOffset(context, addr);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(op.Dest + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand offset = context.IAdd(wordOffset, Const(index));
+ Operand value = context.LoadConstant(slot, offset);
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ public static void Ldg(EmitterContext context)
+ {
+ InstLdg op = context.GetOp<InstLdg>();
+
+ EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
+ }
+
+ public static void Ldl(EmitterContext context)
+ {
+ InstLdl op = context.GetOp<InstLdl>();
+
+ EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Lds(EmitterContext context)
+ {
+ InstLds op = context.GetOp<InstLds>();
+
+ EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Red(EmitterContext context)
+ {
+ InstRed op = context.GetOp<InstRed>();
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20);
+
+ EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB));
+ }
+
+ public static void Stg(EmitterContext context)
+ {
+ InstStg op = context.GetOp<InstStg>();
+
+ EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E);
+ }
+
+ public static void Stl(EmitterContext context)
+ {
+ InstStl op = context.GetOp<InstStl>();
+
+ EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ public static void Sts(EmitterContext context)
+ {
+ InstSts op = context.GetOp<InstSts>();
+
+ EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24));
+ }
+
+ private static Operand EmitAtomicOp(
+ EmitterContext context,
+ StorageKind storageKind,
+ AtomOp op,
+ AtomSize type,
+ Operand addrLow,
+ Operand addrHigh,
+ Operand value)
+ {
+ Operand res = Const(0);
+
+ switch (op)
+ {
+ case AtomOp.Add:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicAdd(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.And:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicAnd(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Xor:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicXor(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Or:
+ if (type == AtomSize.S32 || type == AtomSize.U32)
+ {
+ res = context.AtomicOr(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Max:
+ if (type == AtomSize.S32)
+ {
+ res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value);
+ }
+ else if (type == AtomSize.U32)
+ {
+ res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ case AtomOp.Min:
+ if (type == AtomSize.S32)
+ {
+ res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value);
+ }
+ else if (type == AtomSize.U32)
+ {
+ res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid reduction type: {type}.");
+ }
+ break;
+ }
+
+ return res;
+ }
+
+ private static void EmitLoad(
+ EmitterContext context,
+ MemoryRegion region,
+ LsSize2 size,
+ Operand srcA,
+ int rd,
+ int offset)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid load size: {size}.");
+ return;
+ }
+
+ bool isSmallInt = size < LsSize2.B32;
+
+ int count = 1;
+
+ switch (size)
+ {
+ case LsSize2.B64: count = 2; break;
+ case LsSize2.B128: count = 4; break;
+ }
+
+ Operand baseOffset = context.IAdd(srcA, Const(offset));
+ Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes).
+ Operand bitOffset = GetBitOffset(context, baseOffset);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(rd + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand elemOffset = context.IAdd(wordOffset, Const(index));
+ Operand value = null;
+
+ switch (region)
+ {
+ case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break;
+ case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break;
+ }
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, (LsSize)size, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ private static void EmitLdg(
+ EmitterContext context,
+ LsSize size,
+ int ra,
+ int rd,
+ int offset,
+ bool extended)
+ {
+ bool isSmallInt = size < LsSize.B32;
+
+ int count = GetVectorCount(size);
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
+
+ Operand bitOffset = GetBitOffset(context, addrLow);
+
+ for (int index = 0; index < count; index++)
+ {
+ Register dest = new Register(rd + index, RegisterType.Gpr);
+
+ if (dest.IsRZ)
+ {
+ break;
+ }
+
+ Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh);
+
+ if (isSmallInt)
+ {
+ value = ExtractSmallInt(context, size, bitOffset, value);
+ }
+
+ context.Copy(Register(dest), value);
+ }
+ }
+
+ private static void EmitStore(
+ EmitterContext context,
+ MemoryRegion region,
+ LsSize2 size,
+ Operand srcA,
+ int rd,
+ int offset)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
+ return;
+ }
+
+ bool isSmallInt = size < LsSize2.B32;
+
+ int count = 1;
+
+ switch (size)
+ {
+ case LsSize2.B64: count = 2; break;
+ case LsSize2.B128: count = 4; break;
+ }
+
+ Operand baseOffset = context.IAdd(srcA, Const(offset));
+ Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2));
+ Operand bitOffset = GetBitOffset(context, baseOffset);
+
+ for (int index = 0; index < count; index++)
+ {
+ bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
+
+ Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
+ Operand elemOffset = context.IAdd(wordOffset, Const(index));
+
+ if (isSmallInt && region == MemoryRegion.Local)
+ {
+ Operand word = context.LoadLocal(elemOffset);
+
+ value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value);
+ }
+
+ if (region == MemoryRegion.Local)
+ {
+ context.StoreLocal(elemOffset, value);
+ }
+ else if (region == MemoryRegion.Shared)
+ {
+ switch (size)
+ {
+ case LsSize2.U8:
+ case LsSize2.S8:
+ context.StoreShared8(baseOffset, value);
+ break;
+ case LsSize2.U16:
+ case LsSize2.S16:
+ context.StoreShared16(baseOffset, value);
+ break;
+ default:
+ context.StoreShared(elemOffset, value);
+ break;
+ }
+ }
+ }
+ }
+
+ private static void EmitStg(
+ EmitterContext context,
+ LsSize2 size,
+ int ra,
+ int rd,
+ int offset,
+ bool extended)
+ {
+ if (size > LsSize2.B128)
+ {
+ context.Config.GpuAccessor.Log($"Invalid store size: {size}.");
+ return;
+ }
+
+ int count = GetVectorCount((LsSize)size);
+
+ (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset);
+
+ Operand bitOffset = GetBitOffset(context, addrLow);
+
+ for (int index = 0; index < count; index++)
+ {
+ bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex;
+
+ Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr);
+
+ Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4));
+
+ if (size == LsSize2.U8 || size == LsSize2.S8)
+ {
+ context.StoreGlobal8(addrLowOffset, addrHigh, value);
+ }
+ else if (size == LsSize2.U16 || size == LsSize2.S16)
+ {
+ context.StoreGlobal16(addrLowOffset, addrHigh, value);
+ }
+ else
+ {
+ context.StoreGlobal(addrLowOffset, addrHigh, value);
+ }
+ }
+ }
+
+ private static int GetVectorCount(LsSize size)
+ {
+ switch (size)
+ {
+ case LsSize.B64:
+ return 2;
+ case LsSize.B128:
+ case LsSize.UB128:
+ return 4;
+ }
+
+ return 1;
+ }
+
+ private static (Operand, Operand) Get40BitsAddress(
+ EmitterContext context,
+ Register ra,
+ bool extended,
+ int offset)
+ {
+ Operand addrLow = Register(ra);
+ Operand addrHigh;
+
+ if (extended && !ra.IsRZ)
+ {
+ addrHigh = Register(ra.Index + 1, RegisterType.Gpr);
+ }
+ else
+ {
+ addrHigh = Const(0);
+ }
+
+ Operand offs = Const(offset);
+
+ addrLow = context.IAdd(addrLow, offs);
+
+ if (extended)
+ {
+ Operand carry = context.ICompareLessUnsigned(addrLow, offs);
+
+ addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0)));
+ }
+
+ return (addrLow, addrHigh);
+ }
+
+ private static Operand GetBitOffset(EmitterContext context, Operand baseOffset)
+ {
+ // Note: bit offset = (baseOffset & 0b11) * 8.
+ // Addresses should be always aligned to the integer type,
+ // so we don't need to take unaligned addresses into account.
+ return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3));
+ }
+
+ private static Operand ExtractSmallInt(
+ EmitterContext context,
+ LsSize size,
+ Operand bitOffset,
+ Operand value)
+ {
+ value = context.ShiftRightU32(value, bitOffset);
+
+ switch (size)
+ {
+ case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break;
+ case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break;
+ case LsSize.S8: value = SignExtendTo32(context, value, 8); break;
+ case LsSize.S16: value = SignExtendTo32(context, value, 16); break;
+ }
+
+ return value;
+ }
+
+ private static Operand InsertSmallInt(
+ EmitterContext context,
+ LsSize size,
+ Operand bitOffset,
+ Operand word,
+ Operand value)
+ {
+ switch (size)
+ {
+ case LsSize.U8:
+ case LsSize.S8:
+ value = context.BitwiseAnd(value, Const(0xff));
+ value = context.BitfieldInsert(word, value, bitOffset, Const(8));
+ break;
+
+ case LsSize.U16:
+ case LsSize.S16:
+ value = context.BitwiseAnd(value, Const(0xffff));
+ value = context.BitfieldInsert(word, value, bitOffset, Const(16));
+ break;
+ }
+
+ return value;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs
new file mode 100644
index 00000000..9992ac37
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs
@@ -0,0 +1,237 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void MovR(EmitterContext context)
+ {
+ InstMovR op = context.GetOp<InstMovR>();
+
+ context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA));
+ }
+
+ public static void MovI(EmitterContext context)
+ {
+ InstMovI op = context.GetOp<InstMovI>();
+
+ context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm20));
+ }
+
+ public static void MovC(EmitterContext context)
+ {
+ InstMovC op = context.GetOp<InstMovC>();
+
+ context.Copy(GetDest(op.Dest), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset));
+ }
+
+ public static void Mov32i(EmitterContext context)
+ {
+ InstMov32i op = context.GetOp<InstMov32i>();
+
+ context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm32));
+ }
+
+ public static void R2pR(EmitterContext context)
+ {
+ InstR2pR op = context.GetOp<InstR2pR>();
+
+ Operand value = GetSrcReg(context, op.SrcA);
+ Operand mask = GetSrcReg(context, op.SrcB);
+
+ EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
+ }
+
+ public static void R2pI(EmitterContext context)
+ {
+ InstR2pI op = context.GetOp<InstR2pI>();
+
+ Operand value = GetSrcReg(context, op.SrcA);
+ Operand mask = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
+ }
+
+ public static void R2pC(EmitterContext context)
+ {
+ InstR2pC op = context.GetOp<InstR2pC>();
+
+ Operand value = GetSrcReg(context, op.SrcA);
+ Operand mask = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitR2p(context, value, mask, op.ByteSel, op.Ccpr);
+ }
+
+ public static void S2r(EmitterContext context)
+ {
+ InstS2r op = context.GetOp<InstS2r>();
+
+ Operand src;
+
+ switch (op.SReg)
+ {
+ case SReg.LaneId:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId);
+ break;
+
+ case SReg.InvocationId:
+ src = context.Load(StorageKind.Input, IoVariable.InvocationId);
+ break;
+
+ case SReg.YDirection:
+ src = ConstF(1); // TODO: Use value from Y direction GPU register.
+ break;
+
+ case SReg.ThreadKill:
+ src = context.Config.Stage == ShaderStage.Fragment ? context.Load(StorageKind.Input, IoVariable.ThreadKill) : Const(0);
+ break;
+
+ case SReg.InvocationInfo:
+ if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment)
+ {
+ // Note: Lowest 8-bits seems to contain some primitive index,
+ // but it seems to be NVIDIA implementation specific as it's only used
+ // to calculate ISBE offsets, so we can just keep it as zero.
+
+ if (context.Config.Stage == ShaderStage.TessellationControl ||
+ context.Config.Stage == ShaderStage.TessellationEvaluation)
+ {
+ src = context.ShiftLeft(context.Load(StorageKind.Input, IoVariable.PatchVertices), Const(16));
+ }
+ else
+ {
+ src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16);
+ }
+ }
+ else
+ {
+ src = Const(0);
+ }
+ break;
+
+ case SReg.TId:
+ Operand tidX = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0));
+ Operand tidY = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1));
+ Operand tidZ = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2));
+
+ tidY = context.ShiftLeft(tidY, Const(16));
+ tidZ = context.ShiftLeft(tidZ, Const(26));
+
+ src = context.BitwiseOr(tidX, context.BitwiseOr(tidY, tidZ));
+ break;
+
+ case SReg.TIdX:
+ src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0));
+ break;
+ case SReg.TIdY:
+ src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1));
+ break;
+ case SReg.TIdZ:
+ src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2));
+ break;
+
+ case SReg.CtaIdX:
+ src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(0));
+ break;
+ case SReg.CtaIdY:
+ src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(1));
+ break;
+ case SReg.CtaIdZ:
+ src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(2));
+ break;
+
+ case SReg.EqMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupEqMask, null, Const(0));
+ break;
+ case SReg.LtMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupLtMask, null, Const(0));
+ break;
+ case SReg.LeMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupLeMask, null, Const(0));
+ break;
+ case SReg.GtMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupGtMask, null, Const(0));
+ break;
+ case SReg.GeMask:
+ src = context.Load(StorageKind.Input, IoVariable.SubgroupGeMask, null, Const(0));
+ break;
+
+ default:
+ src = Const(0);
+ break;
+ }
+
+ context.Copy(GetDest(op.Dest), src);
+ }
+
+ public static void SelR(EmitterContext context)
+ {
+ InstSelR op = context.GetOp<InstSelR>();
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+ Operand srcB = GetSrcReg(context, op.SrcB);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitSel(context, srcA, srcB, srcPred, op.Dest);
+ }
+
+ public static void SelI(EmitterContext context)
+ {
+ InstSelI op = context.GetOp<InstSelI>();
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+ Operand srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitSel(context, srcA, srcB, srcPred, op.Dest);
+ }
+
+ public static void SelC(EmitterContext context)
+ {
+ InstSelC op = context.GetOp<InstSelC>();
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+ Operand srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ EmitSel(context, srcA, srcB, srcPred, op.Dest);
+ }
+
+ private static void EmitR2p(EmitterContext context, Operand value, Operand mask, ByteSel byteSel, bool ccpr)
+ {
+ Operand Test(Operand value, int bit)
+ {
+ return context.ICompareNotEqual(context.BitwiseAnd(value, Const(1 << bit)), Const(0));
+ }
+
+ if (ccpr)
+ {
+ // TODO: Support Register to condition code flags copy.
+ context.Config.GpuAccessor.Log("R2P.CC not implemented.");
+ }
+ else
+ {
+ int shift = (int)byteSel * 8;
+
+ for (int bit = 0; bit < RegisterConsts.PredsCount; bit++)
+ {
+ Operand pred = Register(bit, RegisterType.Predicate);
+ Operand res = context.ConditionalSelect(Test(mask, bit), Test(value, bit + shift), pred);
+ context.Copy(pred, res);
+ }
+ }
+ }
+
+ private static void EmitSel(EmitterContext context, Operand srcA, Operand srcB, Operand srcPred, int rd)
+ {
+ Operand res = context.ConditionalSelect(srcPred, srcA, srcB);
+
+ context.Copy(GetDest(rd), res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs
new file mode 100644
index 00000000..1ea7d321
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs
@@ -0,0 +1,97 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void RroR(EmitterContext context)
+ {
+ InstRroR op = context.GetOp<InstRroR>();
+
+ EmitRro(context, GetSrcReg(context, op.SrcB), op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void RroI(EmitterContext context)
+ {
+ InstRroI op = context.GetOp<InstRroI>();
+
+ EmitRro(context, GetSrcImm(context, Imm20ToFloat(op.Imm20)), op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void RroC(EmitterContext context)
+ {
+ InstRroC op = context.GetOp<InstRroC>();
+
+ EmitRro(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.AbsB, op.NegB);
+ }
+
+ public static void Mufu(EmitterContext context)
+ {
+ InstMufu op = context.GetOp<InstMufu>();
+
+ Operand res = context.FPAbsNeg(GetSrcReg(context, op.SrcA), op.AbsA, op.NegA);
+
+ switch (op.MufuOp)
+ {
+ case MufuOp.Cos:
+ res = context.FPCosine(res);
+ break;
+
+ case MufuOp.Sin:
+ res = context.FPSine(res);
+ break;
+
+ case MufuOp.Ex2:
+ res = context.FPExponentB2(res);
+ break;
+
+ case MufuOp.Lg2:
+ res = context.FPLogarithmB2(res);
+ break;
+
+ case MufuOp.Rcp:
+ res = context.FPReciprocal(res);
+ break;
+
+ case MufuOp.Rsq:
+ res = context.FPReciprocalSquareRoot(res);
+ break;
+
+ case MufuOp.Rcp64h:
+ res = context.PackDouble2x32(OperandHelper.Const(0), res);
+ res = context.UnpackDouble2x32High(context.FPReciprocal(res, Instruction.FP64));
+ break;
+
+ case MufuOp.Rsq64h:
+ res = context.PackDouble2x32(OperandHelper.Const(0), res);
+ res = context.UnpackDouble2x32High(context.FPReciprocalSquareRoot(res, Instruction.FP64));
+ break;
+
+ case MufuOp.Sqrt:
+ res = context.FPSquareRoot(res);
+ break;
+
+ default:
+ context.Config.GpuAccessor.Log($"Invalid MUFU operation \"{op.MufuOp}\".");
+ break;
+ }
+
+ context.Copy(GetDest(op.Dest), context.FPSaturate(res, op.Sat));
+ }
+
+ private static void EmitRro(EmitterContext context, Operand srcB, int rd, bool absB, bool negB)
+ {
+ // This is the range reduction operator,
+ // we translate it as a simple move, as it
+ // should be always followed by a matching
+ // MUFU instruction.
+ srcB = context.FPAbsNeg(srcB, absB, negB);
+
+ context.Copy(GetDest(rd), srcB);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs
new file mode 100644
index 00000000..01144007
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs
@@ -0,0 +1,15 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Nop(EmitterContext context)
+ {
+ InstNop op = context.GetOp<InstNop>();
+
+ // No operation.
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs
new file mode 100644
index 00000000..d605661f
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs
@@ -0,0 +1,54 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper;
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Pset(EmitterContext context)
+ {
+ InstPset op = context.GetOp<InstPset>();
+
+ Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
+ Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
+ Operand srcC = context.BitwiseNot(Register(op.SrcPred, RegisterType.Predicate), op.SrcPredInv);
+
+ Operand res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
+ res = GetPredLogicalOp(context, op.BoolOpC, res, srcC);
+
+ Operand dest = GetDest(op.Dest);
+
+ if (op.BVal)
+ {
+ context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0)));
+ }
+ else
+ {
+ context.Copy(dest, res);
+ }
+ }
+
+ public static void Psetp(EmitterContext context)
+ {
+ InstPsetp op = context.GetOp<InstPsetp>();
+
+ Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv);
+ Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv);
+
+ Operand p0Res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB);
+ Operand p1Res = context.BitwiseNot(p0Res);
+ Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ p0Res = GetPredLogicalOp(context, op.BoolOpC, p0Res, srcPred);
+ p1Res = GetPredLogicalOp(context, op.BoolOpC, p1Res, srcPred);
+
+ context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs
new file mode 100644
index 00000000..2873cad8
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs
@@ -0,0 +1,249 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void ShfLR(EmitterContext context)
+ {
+ InstShfLR op = context.GetOp<InstShfLR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC);
+ }
+
+ public static void ShfRR(EmitterContext context)
+ {
+ InstShfRR op = context.GetOp<InstShfRR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC);
+ }
+
+ public static void ShfLI(EmitterContext context)
+ {
+ InstShfLI op = context.GetOp<InstShfLI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = Const(op.Imm6);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC);
+ }
+
+ public static void ShfRI(EmitterContext context)
+ {
+ InstShfRI op = context.GetOp<InstShfRI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = Const(op.Imm6);
+ var srcC = GetSrcReg(context, op.SrcC);
+
+ EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC);
+ }
+
+ public static void ShlR(EmitterContext context)
+ {
+ InstShlR op = context.GetOp<InstShlR>();
+
+ EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcReg(context, op.SrcB), op.Dest, op.M);
+ }
+
+ public static void ShlI(EmitterContext context)
+ {
+ InstShlI op = context.GetOp<InstShlI>();
+
+ EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.M);
+ }
+
+ public static void ShlC(EmitterContext context)
+ {
+ InstShlC op = context.GetOp<InstShlC>();
+
+ EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.M);
+ }
+
+ public static void ShrR(EmitterContext context)
+ {
+ InstShrR op = context.GetOp<InstShrR>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcReg(context, op.SrcB);
+
+ EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
+ }
+
+ public static void ShrI(EmitterContext context)
+ {
+ InstShrI op = context.GetOp<InstShrI>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20));
+
+ EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
+ }
+
+ public static void ShrC(EmitterContext context)
+ {
+ InstShrC op = context.GetOp<InstShrC>();
+
+ var srcA = GetSrcReg(context, op.SrcA);
+ var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset);
+
+ EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed);
+ }
+
+ private static void EmitShf(
+ EmitterContext context,
+ MaxShift maxShift,
+ Operand srcA,
+ Operand srcB,
+ Operand srcC,
+ int rd,
+ bool mask,
+ bool left,
+ bool writeCC)
+ {
+ bool isLongShift = maxShift == MaxShift.U64 || maxShift == MaxShift.S64;
+ bool signedShift = maxShift == MaxShift.S64;
+ int maxShiftConst = isLongShift ? 64 : 32;
+
+ if (mask)
+ {
+ srcB = context.BitwiseAnd(srcB, Const(maxShiftConst - 1));
+ }
+
+ Operand res;
+
+ if (left)
+ {
+ // res = (C << B) | (A >> (32 - B))
+ res = context.ShiftLeft(srcC, srcB);
+ res = context.BitwiseOr(res, context.ShiftRightU32(srcA, context.ISubtract(Const(32), srcB)));
+
+ if (isLongShift)
+ {
+ // res = B >= 32 ? A << (B - 32) : res
+ Operand lowerShift = context.ShiftLeft(srcA, context.ISubtract(srcB, Const(32)));
+
+ Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32));
+ res = context.ConditionalSelect(shiftGreaterThan31, lowerShift, res);
+ }
+ }
+ else
+ {
+ // res = (A >> B) | (C << (32 - B))
+ res = context.ShiftRightU32(srcA, srcB);
+ res = context.BitwiseOr(res, context.ShiftLeft(srcC, context.ISubtract(Const(32), srcB)));
+
+ if (isLongShift)
+ {
+ // res = B >= 32 ? C >> (B - 32) : res
+ Operand upperShift = signedShift
+ ? context.ShiftRightS32(srcC, context.ISubtract(srcB, Const(32)))
+ : context.ShiftRightU32(srcC, context.ISubtract(srcB, Const(32)));
+
+ Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32));
+ res = context.ConditionalSelect(shiftGreaterThan31, upperShift, res);
+ }
+ }
+
+ if (!mask)
+ {
+ // Clamped shift value.
+ Operand isLessThanMax = context.ICompareLessUnsigned(srcB, Const(maxShiftConst));
+
+ res = context.ConditionalSelect(isLessThanMax, res, Const(0));
+ }
+
+ context.Copy(GetDest(rd), res);
+
+ if (writeCC)
+ {
+ InstEmitAluHelper.SetZnFlags(context, res, writeCC);
+ }
+
+ // TODO: X.
+ }
+
+ private static void EmitShl(EmitterContext context, Operand srcA, Operand srcB, int rd, bool mask)
+ {
+ if (mask)
+ {
+ srcB = context.BitwiseAnd(srcB, Const(0x1f));
+ }
+
+ Operand res = context.ShiftLeft(srcA, srcB);
+
+ if (!mask)
+ {
+ // Clamped shift value.
+ Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
+
+ res = context.ConditionalSelect(isLessThan32, res, Const(0));
+ }
+
+ // TODO: X, CC.
+
+ context.Copy(GetDest(rd), res);
+ }
+
+ private static void EmitShr(
+ EmitterContext context,
+ Operand srcA,
+ Operand srcB,
+ int rd,
+ bool mask,
+ bool bitReverse,
+ bool isSigned)
+ {
+ if (bitReverse)
+ {
+ srcA = context.BitfieldReverse(srcA);
+ }
+
+ if (mask)
+ {
+ srcB = context.BitwiseAnd(srcB, Const(0x1f));
+ }
+
+ Operand res = isSigned
+ ? context.ShiftRightS32(srcA, srcB)
+ : context.ShiftRightU32(srcA, srcB);
+
+ if (!mask)
+ {
+ // Clamped shift value.
+ Operand resShiftBy32;
+
+ if (isSigned)
+ {
+ resShiftBy32 = context.ShiftRightS32(srcA, Const(31));
+ }
+ else
+ {
+ resShiftBy32 = Const(0);
+ }
+
+ Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32));
+
+ res = context.ConditionalSelect(isLessThan32, res, resShiftBy32);
+ }
+
+ // TODO: X, CC.
+
+ context.Copy(GetDest(rd), res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs
new file mode 100644
index 00000000..3d94b893
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs
@@ -0,0 +1,796 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void SuatomB(EmitterContext context)
+ {
+ InstSuatomB op = context.GetOp<InstSuatomB>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ op.Op,
+ op.Size,
+ 0,
+ op.SrcA,
+ op.SrcB,
+ op.SrcC,
+ op.Dest,
+ op.Ba,
+ isBindless: true,
+ compareAndSwap: false);
+ }
+
+ public static void Suatom(EmitterContext context)
+ {
+ InstSuatom op = context.GetOp<InstSuatom>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ op.Op,
+ op.Size,
+ op.TidB,
+ op.SrcA,
+ op.SrcB,
+ 0,
+ op.Dest,
+ op.Ba,
+ isBindless: false,
+ compareAndSwap: false);
+ }
+
+ public static void SuatomB2(EmitterContext context)
+ {
+ InstSuatomB2 op = context.GetOp<InstSuatomB2>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ op.Op,
+ op.Size,
+ 0,
+ op.SrcA,
+ op.SrcB,
+ op.SrcC,
+ op.Dest,
+ op.Ba,
+ isBindless: true,
+ compareAndSwap: false);
+ }
+
+ public static void SuatomCasB(EmitterContext context)
+ {
+ InstSuatomCasB op = context.GetOp<InstSuatomCasB>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ 0,
+ op.Size,
+ 0,
+ op.SrcA,
+ op.SrcB,
+ op.SrcC,
+ op.Dest,
+ op.Ba,
+ isBindless: true,
+ compareAndSwap: true);
+ }
+
+ public static void SuatomCas(EmitterContext context)
+ {
+ InstSuatomCas op = context.GetOp<InstSuatomCas>();
+
+ EmitSuatom(
+ context,
+ op.Dim,
+ 0,
+ op.Size,
+ op.TidB,
+ op.SrcA,
+ op.SrcB,
+ 0,
+ op.Dest,
+ op.Ba,
+ isBindless: false,
+ compareAndSwap: true);
+ }
+
+ public static void SuldDB(EmitterContext context)
+ {
+ InstSuldDB op = context.GetOp<InstSuldDB>();
+
+ EmitSuld(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
+ }
+
+ public static void SuldD(EmitterContext context)
+ {
+ InstSuldD op = context.GetOp<InstSuldD>();
+
+ EmitSuld(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
+ }
+
+ public static void SuldB(EmitterContext context)
+ {
+ InstSuldB op = context.GetOp<InstSuldB>();
+
+ EmitSuld(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
+ }
+
+ public static void Suld(EmitterContext context)
+ {
+ InstSuld op = context.GetOp<InstSuld>();
+
+ EmitSuld(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
+ }
+
+ public static void SuredB(EmitterContext context)
+ {
+ InstSuredB op = context.GetOp<InstSuredB>();
+
+ EmitSured(context, op.Dim, op.Op, op.Size, 0, op.SrcA, op.Dest, op.SrcC, op.Ba, isBindless: true);
+ }
+
+ public static void Sured(EmitterContext context)
+ {
+ InstSured op = context.GetOp<InstSured>();
+
+ EmitSured(context, op.Dim, op.Op, op.Size, op.TidB, op.SrcA, op.Dest, 0, op.Ba, isBindless: false);
+ }
+
+ public static void SustDB(EmitterContext context)
+ {
+ InstSustDB op = context.GetOp<InstSustDB>();
+
+ EmitSust(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true);
+ }
+
+ public static void SustD(EmitterContext context)
+ {
+ InstSustD op = context.GetOp<InstSustD>();
+
+ EmitSust(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false);
+ }
+
+ public static void SustB(EmitterContext context)
+ {
+ InstSustB op = context.GetOp<InstSustB>();
+
+ EmitSust(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true);
+ }
+
+ public static void Sust(EmitterContext context)
+ {
+ InstSust op = context.GetOp<InstSust>();
+
+ EmitSust(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false);
+ }
+
+ private static void EmitSuatom(
+ EmitterContext context,
+ SuDim dimensions,
+ SuatomOp atomicOp,
+ SuatomSize size,
+ int imm,
+ int srcA,
+ int srcB,
+ int srcC,
+ int dest,
+ bool byteAddress,
+ bool isBindless,
+ bool compareAndSwap)
+ {
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid image atomic sampler type.");
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ Operand destOperand = dest != RegisterConsts.RegisterZeroIndex ? Register(dest, RegisterType.Gpr) : null;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(Const(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ sourcesList.Add(Ra());
+
+ type |= SamplerType.Array;
+ }
+
+ if (byteAddress)
+ {
+ int xIndex = isBindless ? 1 : 0;
+
+ sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
+ }
+
+ // TODO: FP and 64-bit formats.
+ TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64
+ ? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm))
+ : GetTextureFormat(size);
+
+ if (compareAndSwap)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ sourcesList.Add(Rb());
+
+ Operand[] sources = sourcesList.ToArray();
+
+ TextureFlags flags = compareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(atomicOp);
+
+ if (isBindless)
+ {
+ flags |= TextureFlags.Bindless;
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageAtomic,
+ type,
+ format,
+ flags,
+ imm,
+ 0,
+ new[] { destOperand },
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitSuld(
+ EmitterContext context,
+ CacheOpLd cacheOp,
+ SuDim dimensions,
+ SuSize size,
+ int imm,
+ SuRgba componentMask,
+ int srcA,
+ int srcB,
+ int srcC,
+ bool useComponents,
+ bool byteAddress,
+ bool isBindless)
+ {
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid image store sampler type.");
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(Const(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ sourcesList.Add(Ra());
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ int handle = imm;
+
+ TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
+
+ if (cacheOp == CacheOpLd.Cg)
+ {
+ flags |= TextureFlags.Coherent;
+ }
+
+ if (useComponents)
+ {
+ Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (srcB + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageLoad,
+ type,
+ flags,
+ handle,
+ (int)componentMask,
+ dests,
+ sources);
+
+ if (!isBindless)
+ {
+ operation.Format = context.Config.GetTextureFormat(handle);
+ }
+
+ context.Add(operation);
+ }
+ else
+ {
+ if (byteAddress)
+ {
+ int xIndex = isBindless ? 1 : 0;
+
+ sources[xIndex] = context.ShiftRightS32(sources[xIndex], Const(GetComponentSizeInBytesLog2(size)));
+ }
+
+ int components = GetComponents(size);
+ int compMask = (1 << components) - 1;
+
+ Operand[] dests = new Operand[components];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (srcB + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageLoad,
+ type,
+ GetTextureFormat(size),
+ flags,
+ handle,
+ compMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+
+ switch (size)
+ {
+ case SuSize.U8: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 8)); break;
+ case SuSize.U16: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 16)); break;
+ case SuSize.S8: context.Copy(dests[0], SignExtendTo32(context, dests[0], 8)); break;
+ case SuSize.S16: context.Copy(dests[0], SignExtendTo32(context, dests[0], 16)); break;
+ }
+ }
+ }
+
+ private static void EmitSured(
+ EmitterContext context,
+ SuDim dimensions,
+ RedOp atomicOp,
+ SuatomSize size,
+ int imm,
+ int srcA,
+ int srcB,
+ int srcC,
+ bool byteAddress,
+ bool isBindless)
+ {
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid image reduction sampler type.");
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(context.Copy(GetSrcReg(context, srcC)));
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(Const(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ sourcesList.Add(Ra());
+
+ type |= SamplerType.Array;
+ }
+
+ if (byteAddress)
+ {
+ int xIndex = isBindless ? 1 : 0;
+
+ sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
+ }
+
+ // TODO: FP and 64-bit formats.
+ TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64
+ ? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm))
+ : GetTextureFormat(size);
+
+ sourcesList.Add(Rb());
+
+ Operand[] sources = sourcesList.ToArray();
+
+ TextureFlags flags = GetAtomicOpFlags((SuatomOp)atomicOp);
+
+ if (isBindless)
+ {
+ flags |= TextureFlags.Bindless;
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageAtomic,
+ type,
+ format,
+ flags,
+ imm,
+ 0,
+ null,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitSust(
+ EmitterContext context,
+ CacheOpSt cacheOp,
+ SuDim dimensions,
+ SuSize size,
+ int imm,
+ SuRgba componentMask,
+ int srcA,
+ int srcB,
+ int srcC,
+ bool useComponents,
+ bool byteAddress,
+ bool isBindless)
+ {
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid image store sampler type.");
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr)));
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(Const(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ if (type.HasFlag(SamplerType.Array))
+ {
+ sourcesList.Add(Ra());
+ }
+
+ TextureFormat format = TextureFormat.Unknown;
+
+ if (useComponents)
+ {
+ for (int compMask = (int)componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
+ {
+ if ((compMask & 1) != 0)
+ {
+ sourcesList.Add(Rb());
+ }
+ }
+
+ if (!isBindless)
+ {
+ format = context.Config.GetTextureFormat(imm);
+ }
+ }
+ else
+ {
+ if (byteAddress)
+ {
+ int xIndex = isBindless ? 1 : 0;
+
+ sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size)));
+ }
+
+ int components = GetComponents(size);
+
+ for (int compIndex = 0; compIndex < components; compIndex++)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ format = GetTextureFormat(size);
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ int handle = imm;
+
+ TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
+
+ if (cacheOp == CacheOpSt.Cg)
+ {
+ flags |= TextureFlags.Coherent;
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.ImageStore,
+ type,
+ format,
+ flags,
+ handle,
+ 0,
+ null,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static int GetComponentSizeInBytesLog2(SuatomSize size)
+ {
+ return size switch
+ {
+ SuatomSize.U32 => 2,
+ SuatomSize.S32 => 2,
+ SuatomSize.U64 => 3,
+ SuatomSize.F32FtzRn => 2,
+ SuatomSize.F16x2FtzRn => 2,
+ SuatomSize.S64 => 3,
+ SuatomSize.Sd32 => 2,
+ SuatomSize.Sd64 => 3,
+ _ => 2
+ };
+ }
+
+ private static TextureFormat GetTextureFormat(SuatomSize size)
+ {
+ return size switch
+ {
+ SuatomSize.U32 => TextureFormat.R32Uint,
+ SuatomSize.S32 => TextureFormat.R32Sint,
+ SuatomSize.U64 => TextureFormat.R32G32Uint,
+ SuatomSize.F32FtzRn => TextureFormat.R32Float,
+ SuatomSize.F16x2FtzRn => TextureFormat.R16G16Float,
+ SuatomSize.S64 => TextureFormat.R32G32Uint,
+ SuatomSize.Sd32 => TextureFormat.R32Uint,
+ SuatomSize.Sd64 => TextureFormat.R32G32Uint,
+ _ => TextureFormat.R32Uint
+ };
+ }
+
+ private static TextureFlags GetAtomicOpFlags(SuatomOp op)
+ {
+ return op switch
+ {
+ SuatomOp.Add => TextureFlags.Add,
+ SuatomOp.Min => TextureFlags.Minimum,
+ SuatomOp.Max => TextureFlags.Maximum,
+ SuatomOp.Inc => TextureFlags.Increment,
+ SuatomOp.Dec => TextureFlags.Decrement,
+ SuatomOp.And => TextureFlags.BitwiseAnd,
+ SuatomOp.Or => TextureFlags.BitwiseOr,
+ SuatomOp.Xor => TextureFlags.BitwiseXor,
+ SuatomOp.Exch => TextureFlags.Swap,
+ _ => TextureFlags.Add
+ };
+ }
+
+ private static int GetComponents(SuSize size)
+ {
+ return size switch
+ {
+ SuSize.B64 => 2,
+ SuSize.B128 => 4,
+ SuSize.UB128 => 4,
+ _ => 1
+ };
+ }
+
+ private static int GetComponentSizeInBytesLog2(SuSize size)
+ {
+ return size switch
+ {
+ SuSize.U8 => 0,
+ SuSize.S8 => 0,
+ SuSize.U16 => 1,
+ SuSize.S16 => 1,
+ SuSize.B32 => 2,
+ SuSize.B64 => 3,
+ SuSize.B128 => 4,
+ SuSize.UB128 => 4,
+ _ => 2
+ };
+ }
+
+ private static TextureFormat GetTextureFormat(SuSize size)
+ {
+ return size switch
+ {
+ SuSize.U8 => TextureFormat.R8Uint,
+ SuSize.S8 => TextureFormat.R8Sint,
+ SuSize.U16 => TextureFormat.R16Uint,
+ SuSize.S16 => TextureFormat.R16Sint,
+ SuSize.B32 => TextureFormat.R32Uint,
+ SuSize.B64 => TextureFormat.R32G32Uint,
+ SuSize.B128 => TextureFormat.R32G32B32A32Uint,
+ SuSize.UB128 => TextureFormat.R32G32B32A32Uint,
+ _ => TextureFormat.R32Uint
+ };
+ }
+
+ private static SamplerType ConvertSamplerType(SuDim target)
+ {
+ return target switch
+ {
+ SuDim._1d => SamplerType.Texture1D,
+ SuDim._1dBuffer => SamplerType.TextureBuffer,
+ SuDim._1dArray => SamplerType.Texture1D | SamplerType.Array,
+ SuDim._2d => SamplerType.Texture2D,
+ SuDim._2dArray => SamplerType.Texture2D | SamplerType.Array,
+ SuDim._3d => SamplerType.Texture3D,
+ _ => SamplerType.None
+ };
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
new file mode 100644
index 00000000..caa9a775
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
@@ -0,0 +1,1312 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+using System;
+using System.Collections.Generic;
+using System.Numerics;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ private static readonly int[,] _maskLut = new int[,]
+ {
+ { 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b1001, 0b1010, 0b1100 },
+ { 0b0111, 0b1011, 0b1101, 0b1110, 0b1111, 0b0000, 0b0000, 0b0000 }
+ };
+
+ public const bool Sample1DAs2D = true;
+
+ private enum TexsType
+ {
+ Texs,
+ Tlds,
+ Tld4s
+ }
+
+ public static void Tex(EmitterContext context)
+ {
+ InstTex op = context.GetOp<InstTex>();
+
+ EmitTex(context, TextureFlags.None, op.Dim, op.Lod, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, false, op.Dc, op.Aoffi);
+ }
+
+ public static void TexB(EmitterContext context)
+ {
+ InstTexB op = context.GetOp<InstTexB>();
+
+ EmitTex(context, TextureFlags.Bindless, op.Dim, op.Lodb, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, false, op.Dc, op.Aoffib);
+ }
+
+ public static void Texs(EmitterContext context)
+ {
+ InstTexs op = context.GetOp<InstTexs>();
+
+ EmitTexs(context, TexsType.Texs, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false);
+ }
+
+ public static void TexsF16(EmitterContext context)
+ {
+ InstTexs op = context.GetOp<InstTexs>();
+
+ EmitTexs(context, TexsType.Texs, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true);
+ }
+
+ public static void Tld(EmitterContext context)
+ {
+ InstTld op = context.GetOp<InstTld>();
+
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ var lod = op.Lod ? Lod.Ll : Lod.Lz;
+
+ EmitTex(context, TextureFlags.IntCoords, op.Dim, lod, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Ms, false, op.Toff);
+ }
+
+ public static void TldB(EmitterContext context)
+ {
+ InstTldB op = context.GetOp<InstTldB>();
+
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ var flags = TextureFlags.IntCoords | TextureFlags.Bindless;
+ var lod = op.Lod ? Lod.Ll : Lod.Lz;
+
+ EmitTex(context, flags, op.Dim, lod, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Ms, false, op.Toff);
+ }
+
+ public static void Tlds(EmitterContext context)
+ {
+ InstTlds op = context.GetOp<InstTlds>();
+
+ EmitTexs(context, TexsType.Tlds, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false);
+ }
+
+ public static void TldsF16(EmitterContext context)
+ {
+ InstTlds op = context.GetOp<InstTlds>();
+
+ EmitTexs(context, TexsType.Tlds, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true);
+ }
+
+ public static void Tld4(EmitterContext context)
+ {
+ InstTld4 op = context.GetOp<InstTld4>();
+
+ EmitTld4(context, op.Dim, op.TexComp, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, op.Dc, isBindless: false);
+ }
+
+ public static void Tld4B(EmitterContext context)
+ {
+ InstTld4B op = context.GetOp<InstTld4B>();
+
+ EmitTld4(context, op.Dim, op.TexComp, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, op.Dc, isBindless: true);
+ }
+
+ public static void Tld4s(EmitterContext context)
+ {
+ InstTld4s op = context.GetOp<InstTld4s>();
+
+ EmitTexs(context, TexsType.Tld4s, op.TidB, 4, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false);
+ }
+
+ public static void Tld4sF16(EmitterContext context)
+ {
+ InstTld4s op = context.GetOp<InstTld4s>();
+
+ EmitTexs(context, TexsType.Tld4s, op.TidB, 4, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true);
+ }
+
+ public static void Tmml(EmitterContext context)
+ {
+ InstTmml op = context.GetOp<InstTmml>();
+
+ EmitTmml(context, op.Dim, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, isBindless: false);
+ }
+
+ public static void TmmlB(EmitterContext context)
+ {
+ InstTmmlB op = context.GetOp<InstTmmlB>();
+
+ EmitTmml(context, op.Dim, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, isBindless: true);
+ }
+
+ public static void Txd(EmitterContext context)
+ {
+ InstTxd op = context.GetOp<InstTxd>();
+
+ EmitTxd(context, op.Dim, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, isBindless: false);
+ }
+
+ public static void TxdB(EmitterContext context)
+ {
+ InstTxdB op = context.GetOp<InstTxdB>();
+
+ EmitTxd(context, op.Dim, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, isBindless: true);
+ }
+
+ public static void Txq(EmitterContext context)
+ {
+ InstTxq op = context.GetOp<InstTxq>();
+
+ EmitTxq(context, op.TexQuery, op.TidB, op.WMask, op.SrcA, op.Dest, isBindless: false);
+ }
+
+ public static void TxqB(EmitterContext context)
+ {
+ InstTxqB op = context.GetOp<InstTxqB>();
+
+ EmitTxq(context, op.TexQuery, 0, op.WMask, op.SrcA, op.Dest, isBindless: true);
+ }
+
+ private static void EmitTex(
+ EmitterContext context,
+ TextureFlags flags,
+ TexDim dimensions,
+ Lod lodMode,
+ int imm,
+ int componentMask,
+ int raIndex,
+ int rbIndex,
+ int rdIndex,
+ bool isMultisample,
+ bool hasDepthCompare,
+ bool hasOffset)
+ {
+ if (rdIndex == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (raIndex > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(raIndex++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (rbIndex > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(rbIndex++, RegisterType.Gpr));
+ }
+
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ bool isArray = type.HasFlag(SamplerType.Array);
+ bool isBindless = flags.HasFlag(TextureFlags.Bindless);
+
+ Operand arrayIndex = isArray ? Ra() : null;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ bool hasLod = lodMode > Lod.Lz;
+
+ if (type == SamplerType.Texture1D && (flags & ~TextureFlags.Bindless) == TextureFlags.IntCoords && !(
+ hasLod ||
+ hasDepthCompare ||
+ hasOffset ||
+ isArray ||
+ isMultisample))
+ {
+ // For bindless, we don't have any way to know the texture type,
+ // so we assume it's texture buffer when the sampler type is 1D, since that's more common.
+ bool isTypeBuffer = isBindless || context.Config.GpuAccessor.QuerySamplerType(imm) == SamplerType.TextureBuffer;
+ if (isTypeBuffer)
+ {
+ type = SamplerType.TextureBuffer;
+ }
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ bool is1DTo2D = false;
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type = SamplerType.Texture2D | (type & SamplerType.Array);
+ is1DTo2D = true;
+ }
+
+ if (isArray)
+ {
+ sourcesList.Add(arrayIndex);
+ }
+
+ Operand lodValue = hasLod ? Rb() : ConstF(0);
+
+ Operand packedOffs = hasOffset ? Rb() : null;
+
+ if (hasDepthCompare)
+ {
+ sourcesList.Add(Rb());
+
+ type |= SamplerType.Shadow;
+ }
+
+ if ((lodMode == Lod.Lz ||
+ lodMode == Lod.Ll ||
+ lodMode == Lod.Lla) && !isMultisample && type != SamplerType.TextureBuffer)
+ {
+ sourcesList.Add(lodValue);
+
+ flags |= TextureFlags.LodLevel;
+ }
+
+ if (hasOffset)
+ {
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(context.BitfieldExtractS32(packedOffs, Const(index * 4), Const(4)));
+ }
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(Const(0));
+ }
+
+ flags |= TextureFlags.Offset;
+ }
+
+ if (lodMode == Lod.Lb || lodMode == Lod.Lba)
+ {
+ sourcesList.Add(lodValue);
+
+ flags |= TextureFlags.LodBias;
+ }
+
+ if (isMultisample)
+ {
+ sourcesList.Add(Rb());
+
+ type |= SamplerType.Multisample;
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+ Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (rdIndex + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(rdIndex + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ int handle = !isBindless ? imm : 0;
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ componentMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitTexs(
+ EmitterContext context,
+ TexsType texsType,
+ int imm,
+ int writeMask,
+ int srcA,
+ int srcB,
+ int dest,
+ int dest2,
+ bool isF16)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex && dest2 == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ void AddTextureOffset(int coordsCount, int stride, int size)
+ {
+ Operand packedOffs = Rb();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(context.BitfieldExtractS32(packedOffs, Const(index * stride), Const(size)));
+ }
+ }
+
+ SamplerType type;
+ TextureFlags flags;
+
+ if (texsType == TexsType.Texs)
+ {
+ var texsOp = context.GetOp<InstTexs>();
+
+ type = ConvertSamplerType(texsOp.Target);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid texture sampler type.");
+ return;
+ }
+
+ flags = ConvertTextureFlags(texsOp.Target);
+
+ // We don't need to handle 1D -> Buffer conversions here as
+ // only texture sample with integer coordinates can ever use buffer targets.
+
+ if ((type & SamplerType.Array) != 0)
+ {
+ Operand arrayIndex = Ra();
+
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+
+ sourcesList.Add(arrayIndex);
+
+ if ((type & SamplerType.Shadow) != 0)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ if ((flags & TextureFlags.LodLevel) != 0)
+ {
+ sourcesList.Add(ConstF(0));
+ }
+ }
+ else
+ {
+ switch (texsOp.Target)
+ {
+ case TexsTarget.Texture1DLodZero:
+ sourcesList.Add(Ra());
+
+ if (Sample1DAs2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ sourcesList.Add(ConstF(0));
+ break;
+
+ case TexsTarget.Texture2D:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ break;
+
+ case TexsTarget.Texture2DLodZero:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(ConstF(0));
+ break;
+
+ case TexsTarget.Texture2DLodLevel:
+ case TexsTarget.Texture2DDepthCompare:
+ case TexsTarget.Texture3D:
+ case TexsTarget.TextureCube:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ break;
+
+ case TexsTarget.Texture2DLodZeroDepthCompare:
+ case TexsTarget.Texture3DLodZero:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(ConstF(0));
+ break;
+
+ case TexsTarget.Texture2DLodLevelDepthCompare:
+ case TexsTarget.TextureCubeLodLevel:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(Rb());
+ break;
+ }
+ }
+ }
+ else if (texsType == TexsType.Tlds)
+ {
+ var tldsOp = context.GetOp<InstTlds>();
+
+ type = ConvertSamplerType(tldsOp.Target);
+
+ if (type == SamplerType.None)
+ {
+ context.Config.GpuAccessor.Log("Invalid texel fetch sampler type.");
+ return;
+ }
+
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ flags = ConvertTextureFlags(tldsOp.Target) | TextureFlags.IntCoords;
+
+ if (tldsOp.Target == TldsTarget.Texture1DLodZero &&
+ context.Config.GpuAccessor.QuerySamplerType(tldsOp.TidB) == SamplerType.TextureBuffer)
+ {
+ type = SamplerType.TextureBuffer;
+ flags &= ~TextureFlags.LodLevel;
+ }
+
+ switch (tldsOp.Target)
+ {
+ case TldsTarget.Texture1DLodZero:
+ sourcesList.Add(Ra());
+
+ if (type != SamplerType.TextureBuffer)
+ {
+ if (Sample1DAs2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ sourcesList.Add(ConstF(0));
+ }
+ break;
+
+ case TldsTarget.Texture1DLodLevel:
+ sourcesList.Add(Ra());
+
+ if (Sample1DAs2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type &= ~SamplerType.Mask;
+ type |= SamplerType.Texture2D;
+ }
+
+ sourcesList.Add(Rb());
+ break;
+
+ case TldsTarget.Texture2DLodZero:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(Const(0));
+ break;
+
+ case TldsTarget.Texture2DLodZeroOffset:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Const(0));
+ break;
+
+ case TldsTarget.Texture2DLodZeroMultisample:
+ case TldsTarget.Texture2DLodLevel:
+ case TldsTarget.Texture2DLodLevelOffset:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ break;
+
+ case TldsTarget.Texture3DLodZero:
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ sourcesList.Add(Const(0));
+ break;
+
+ case TldsTarget.Texture2DArrayLodZero:
+ sourcesList.Add(Rb());
+ sourcesList.Add(Rb());
+ sourcesList.Add(Ra());
+ sourcesList.Add(Const(0));
+ break;
+ }
+
+ if ((flags & TextureFlags.Offset) != 0)
+ {
+ AddTextureOffset(type.GetDimensions(), 4, 4);
+ }
+ }
+ else if (texsType == TexsType.Tld4s)
+ {
+ var tld4sOp = context.GetOp<InstTld4s>();
+
+ if (!(tld4sOp.Dc || tld4sOp.Aoffi))
+ {
+ sourcesList.Add(Ra());
+ sourcesList.Add(Rb());
+ }
+ else
+ {
+ sourcesList.Add(Ra());
+ sourcesList.Add(Ra());
+ }
+
+ type = SamplerType.Texture2D;
+ flags = TextureFlags.Gather;
+
+ if (tld4sOp.Dc)
+ {
+ sourcesList.Add(Rb());
+
+ type |= SamplerType.Shadow;
+ }
+
+ if (tld4sOp.Aoffi)
+ {
+ AddTextureOffset(type.GetDimensions(), 8, 6);
+
+ flags |= TextureFlags.Offset;
+ }
+
+ sourcesList.Add(Const((int)tld4sOp.TexComp));
+ }
+ else
+ {
+ throw new ArgumentException($"Invalid TEXS type \"{texsType}\".");
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ Operand[] rd0 = new Operand[2] { ConstF(0), ConstF(0) };
+ Operand[] rd1 = new Operand[2] { ConstF(0), ConstF(0) };
+
+ int handle = imm;
+ int componentMask = _maskLut[dest2 == RegisterConsts.RegisterZeroIndex ? 0 : 1, writeMask];
+
+ int componentsCount = BitOperations.PopCount((uint)componentMask);
+
+ Operand[] dests = new Operand[componentsCount];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < componentsCount; i++)
+ {
+ int high = i >> 1;
+ int low = i & 1;
+
+ if (isF16)
+ {
+ dests[outputIndex++] = high != 0
+ ? (rd1[low] = Local())
+ : (rd0[low] = Local());
+ }
+ else
+ {
+ int rdIndex = high != 0 ? dest2 : dest;
+
+ if (rdIndex < RegisterConsts.RegisterZeroIndex)
+ {
+ rdIndex += low;
+ }
+
+ dests[outputIndex++] = Register(rdIndex, RegisterType.Gpr);
+ }
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ componentMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+
+ if (isF16)
+ {
+ context.Copy(Register(dest, RegisterType.Gpr), context.PackHalf2x16(rd0[0], rd0[1]));
+ context.Copy(Register(dest2, RegisterType.Gpr), context.PackHalf2x16(rd1[0], rd1[1]));
+ }
+ }
+
+ private static void EmitTld4(
+ EmitterContext context,
+ TexDim dimensions,
+ TexComp component,
+ int imm,
+ int componentMask,
+ int srcA,
+ int srcB,
+ int dest,
+ TexOffset offset,
+ bool hasDepthCompare,
+ bool isBindless)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ bool isArray =
+ dimensions == TexDim.Array1d ||
+ dimensions == TexDim.Array2d ||
+ dimensions == TexDim.Array3d ||
+ dimensions == TexDim.ArrayCube;
+
+ Operand arrayIndex = isArray ? Ra() : null;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ SamplerType type = ConvertSamplerType(dimensions);
+ TextureFlags flags = TextureFlags.Gather;
+
+ if (isBindless)
+ {
+ sourcesList.Add(Rb());
+
+ flags |= TextureFlags.Bindless;
+ }
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ bool is1DTo2D = Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D;
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type = SamplerType.Texture2D | (type & SamplerType.Array);
+ }
+
+ if (isArray)
+ {
+ sourcesList.Add(arrayIndex);
+ }
+
+ Operand[] packedOffs = new Operand[2];
+
+ bool hasAnyOffset = offset == TexOffset.Aoffi || offset == TexOffset.Ptp;
+
+ packedOffs[0] = hasAnyOffset ? Rb() : null;
+ packedOffs[1] = offset == TexOffset.Ptp ? Rb() : null;
+
+ if (hasDepthCompare)
+ {
+ sourcesList.Add(Rb());
+
+ type |= SamplerType.Shadow;
+ }
+
+ if (hasAnyOffset)
+ {
+ int offsetTexelsCount = offset == TexOffset.Ptp ? 4 : 1;
+
+ for (int index = 0; index < coordsCount * offsetTexelsCount; index++)
+ {
+ Operand packed = packedOffs[(index >> 2) & 1];
+
+ sourcesList.Add(context.BitfieldExtractS32(packed, Const((index & 3) * 8), Const(6)));
+ }
+
+ if (is1DTo2D)
+ {
+ for (int index = 0; index < offsetTexelsCount; index++)
+ {
+ sourcesList.Add(Const(0));
+ }
+ }
+
+ flags |= offset == TexOffset.Ptp ? TextureFlags.Offsets : TextureFlags.Offset;
+ }
+
+ sourcesList.Add(Const((int)component));
+
+ Operand[] sources = sourcesList.ToArray();
+ Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (dest + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(dest + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ int handle = imm;
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ componentMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitTmml(
+ EmitterContext context,
+ TexDim dimensions,
+ int imm,
+ int componentMask,
+ int srcA,
+ int srcB,
+ int dest,
+ bool isBindless)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ TextureFlags flags = TextureFlags.None;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(Rb());
+
+ flags |= TextureFlags.Bindless;
+ }
+
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ int coordsCount = type.GetDimensions();
+
+ bool isArray =
+ dimensions == TexDim.Array1d ||
+ dimensions == TexDim.Array2d ||
+ dimensions == TexDim.Array3d ||
+ dimensions == TexDim.ArrayCube;
+
+ Operand arrayIndex = isArray ? Ra() : null;
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type = SamplerType.Texture2D | (type & SamplerType.Array);
+ }
+
+ if (isArray)
+ {
+ sourcesList.Add(arrayIndex);
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ Operand GetDest()
+ {
+ if (dest >= RegisterConsts.RegisterZeroIndex)
+ {
+ return null;
+ }
+
+ return Register(dest++, RegisterType.Gpr);
+ }
+
+ int handle = imm;
+
+ for (int compMask = componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
+ {
+ if ((compMask & 1) != 0)
+ {
+ Operand destOperand = GetDest();
+
+ if (destOperand == null)
+ {
+ break;
+ }
+
+ // Components z and w aren't standard, we return 0 in this case and add a comment.
+ if (compIndex >= 2)
+ {
+ context.Add(new CommentNode("Unsupported component z or w found"));
+ context.Copy(destOperand, Const(0));
+ }
+ else
+ {
+ Operand tempDest = Local();
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.Lod,
+ type,
+ flags,
+ handle,
+ compIndex ^ 1, // The instruction component order is the inverse of GLSL's.
+ new[] { tempDest },
+ sources);
+
+ context.Add(operation);
+
+ tempDest = context.FPMultiply(tempDest, ConstF(256.0f));
+
+ Operand fixedPointValue = context.FP32ConvertToS32(tempDest);
+
+ context.Copy(destOperand, fixedPointValue);
+ }
+ }
+ }
+ }
+
+ private static void EmitTxd(
+ EmitterContext context,
+ TexDim dimensions,
+ int imm,
+ int componentMask,
+ int srcA,
+ int srcB,
+ int dest,
+ bool hasOffset,
+ bool isBindless)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (srcB > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcB++, RegisterType.Gpr));
+ }
+
+ TextureFlags flags = TextureFlags.Derivatives;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(Ra());
+
+ flags |= TextureFlags.Bindless;
+ }
+
+ SamplerType type = ConvertSamplerType(dimensions);
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ bool is1DTo2D = Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D;
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(ConstF(0));
+
+ type = SamplerType.Texture2D | (type & SamplerType.Array);
+ }
+
+ Operand packedParams = Ra();
+
+ bool isArray =
+ dimensions == TexDim.Array1d ||
+ dimensions == TexDim.Array2d ||
+ dimensions == TexDim.Array3d ||
+ dimensions == TexDim.ArrayCube;
+
+ if (isArray)
+ {
+ sourcesList.Add(context.BitwiseAnd(packedParams, Const(0xffff)));
+ }
+
+ // Derivatives (X and Y).
+ for (int dIndex = 0; dIndex < 2 * coordsCount; dIndex++)
+ {
+ sourcesList.Add(Rb());
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(ConstF(0));
+ }
+ }
+
+ if (hasOffset)
+ {
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(context.BitfieldExtractS32(packedParams, Const(16 + index * 4), Const(4)));
+ }
+
+ if (is1DTo2D)
+ {
+ sourcesList.Add(Const(0));
+ }
+
+ flags |= TextureFlags.Offset;
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+ Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)];
+
+ int outputIndex = 0;
+
+ for (int i = 0; i < dests.Length; i++)
+ {
+ if (dest + i >= RegisterConsts.RegisterZeroIndex)
+ {
+ break;
+ }
+
+ dests[outputIndex++] = Register(dest + i, RegisterType.Gpr);
+ }
+
+ if (outputIndex != dests.Length)
+ {
+ Array.Resize(ref dests, outputIndex);
+ }
+
+ int handle = imm;
+
+ TextureOperation operation = context.CreateTextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ componentMask,
+ dests,
+ sources);
+
+ context.Add(operation);
+ }
+
+ private static void EmitTxq(
+ EmitterContext context,
+ TexQuery query,
+ int imm,
+ int componentMask,
+ int srcA,
+ int dest,
+ bool isBindless)
+ {
+ if (dest == RegisterConsts.RegisterZeroIndex)
+ {
+ return;
+ }
+
+ context.Config.SetUsedFeature(FeatureFlags.IntegerSampling);
+
+ // TODO: Validate and use query.
+ Instruction inst = Instruction.TextureSize;
+ TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None;
+
+ Operand Ra()
+ {
+ if (srcA > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(srcA++, RegisterType.Gpr));
+ }
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (isBindless)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ sourcesList.Add(Ra());
+
+ Operand[] sources = sourcesList.ToArray();
+
+ Operand GetDest()
+ {
+ if (dest >= RegisterConsts.RegisterZeroIndex)
+ {
+ return null;
+ }
+
+ return Register(dest++, RegisterType.Gpr);
+ }
+
+ SamplerType type;
+
+ if (isBindless)
+ {
+ type = (componentMask & 4) != 0 ? SamplerType.Texture3D : SamplerType.Texture2D;
+ }
+ else
+ {
+ type = context.Config.GpuAccessor.QuerySamplerType(imm);
+ }
+
+ for (int compMask = componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
+ {
+ if ((compMask & 1) != 0)
+ {
+ Operand destOperand = GetDest();
+
+ if (destOperand == null)
+ {
+ break;
+ }
+
+ TextureOperation operation = context.CreateTextureOperation(
+ inst,
+ type,
+ flags,
+ imm,
+ compIndex,
+ new[] { destOperand },
+ sources);
+
+ context.Add(operation);
+ }
+ }
+ }
+
+ private static SamplerType ConvertSamplerType(TexDim dimensions)
+ {
+ return dimensions switch
+ {
+ TexDim._1d => SamplerType.Texture1D,
+ TexDim.Array1d => SamplerType.Texture1D | SamplerType.Array,
+ TexDim._2d => SamplerType.Texture2D,
+ TexDim.Array2d => SamplerType.Texture2D | SamplerType.Array,
+ TexDim._3d => SamplerType.Texture3D,
+ TexDim.Array3d => SamplerType.Texture3D | SamplerType.Array,
+ TexDim.Cube => SamplerType.TextureCube,
+ TexDim.ArrayCube => SamplerType.TextureCube | SamplerType.Array,
+ _ => throw new ArgumentException($"Invalid texture dimensions \"{dimensions}\".")
+ };
+ }
+
+ private static SamplerType ConvertSamplerType(TexsTarget type)
+ {
+ switch (type)
+ {
+ case TexsTarget.Texture1DLodZero:
+ return SamplerType.Texture1D;
+
+ case TexsTarget.Texture2D:
+ case TexsTarget.Texture2DLodZero:
+ case TexsTarget.Texture2DLodLevel:
+ return SamplerType.Texture2D;
+
+ case TexsTarget.Texture2DDepthCompare:
+ case TexsTarget.Texture2DLodLevelDepthCompare:
+ case TexsTarget.Texture2DLodZeroDepthCompare:
+ return SamplerType.Texture2D | SamplerType.Shadow;
+
+ case TexsTarget.Texture2DArray:
+ case TexsTarget.Texture2DArrayLodZero:
+ return SamplerType.Texture2D | SamplerType.Array;
+
+ case TexsTarget.Texture2DArrayLodZeroDepthCompare:
+ return SamplerType.Texture2D | SamplerType.Array | SamplerType.Shadow;
+
+ case TexsTarget.Texture3D:
+ case TexsTarget.Texture3DLodZero:
+ return SamplerType.Texture3D;
+
+ case TexsTarget.TextureCube:
+ case TexsTarget.TextureCubeLodLevel:
+ return SamplerType.TextureCube;
+ }
+
+ return SamplerType.None;
+ }
+
+ private static SamplerType ConvertSamplerType(TldsTarget type)
+ {
+ switch (type)
+ {
+ case TldsTarget.Texture1DLodZero:
+ case TldsTarget.Texture1DLodLevel:
+ return SamplerType.Texture1D;
+
+ case TldsTarget.Texture2DLodZero:
+ case TldsTarget.Texture2DLodZeroOffset:
+ case TldsTarget.Texture2DLodLevel:
+ case TldsTarget.Texture2DLodLevelOffset:
+ return SamplerType.Texture2D;
+
+ case TldsTarget.Texture2DLodZeroMultisample:
+ return SamplerType.Texture2D | SamplerType.Multisample;
+
+ case TldsTarget.Texture3DLodZero:
+ return SamplerType.Texture3D;
+
+ case TldsTarget.Texture2DArrayLodZero:
+ return SamplerType.Texture2D | SamplerType.Array;
+ }
+
+ return SamplerType.None;
+ }
+
+ private static TextureFlags ConvertTextureFlags(TexsTarget type)
+ {
+ switch (type)
+ {
+ case TexsTarget.Texture1DLodZero:
+ case TexsTarget.Texture2DLodZero:
+ case TexsTarget.Texture2DLodLevel:
+ case TexsTarget.Texture2DLodLevelDepthCompare:
+ case TexsTarget.Texture2DLodZeroDepthCompare:
+ case TexsTarget.Texture2DArrayLodZero:
+ case TexsTarget.Texture2DArrayLodZeroDepthCompare:
+ case TexsTarget.Texture3DLodZero:
+ case TexsTarget.TextureCubeLodLevel:
+ return TextureFlags.LodLevel;
+
+ case TexsTarget.Texture2D:
+ case TexsTarget.Texture2DDepthCompare:
+ case TexsTarget.Texture2DArray:
+ case TexsTarget.Texture3D:
+ case TexsTarget.TextureCube:
+ return TextureFlags.None;
+ }
+
+ return TextureFlags.None;
+ }
+
+ private static TextureFlags ConvertTextureFlags(TldsTarget type)
+ {
+ switch (type)
+ {
+ case TldsTarget.Texture1DLodZero:
+ case TldsTarget.Texture1DLodLevel:
+ case TldsTarget.Texture2DLodZero:
+ case TldsTarget.Texture2DLodLevel:
+ case TldsTarget.Texture2DLodZeroMultisample:
+ case TldsTarget.Texture3DLodZero:
+ case TldsTarget.Texture2DArrayLodZero:
+ return TextureFlags.LodLevel;
+
+ case TldsTarget.Texture2DLodZeroOffset:
+ case TldsTarget.Texture2DLodLevelOffset:
+ return TextureFlags.LodLevel | TextureFlags.Offset;
+ }
+
+ return TextureFlags.None;
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs
new file mode 100644
index 00000000..2d84c5bd
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs
@@ -0,0 +1,118 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Vmad(EmitterContext context)
+ {
+ InstVmad op = context.GetOp<InstVmad>();
+
+ bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0;
+ bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0;
+
+ Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
+ Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB);
+ Operand srcB;
+
+ if (op.BVideo)
+ {
+ srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
+ }
+ else
+ {
+ int imm = op.Imm16;
+
+ if (bSigned)
+ {
+ imm = (imm << 16) >> 16;
+ }
+
+ srcB = Const(imm);
+ }
+
+ Operand productLow = context.IMultiply(srcA, srcB);
+ Operand productHigh;
+
+ if (aSigned == bSigned)
+ {
+ productHigh = aSigned
+ ? context.MultiplyHighS32(srcA, srcB)
+ : context.MultiplyHighU32(srcA, srcB);
+ }
+ else
+ {
+ Operand temp = aSigned
+ ? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))
+ : context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)));
+
+ productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB));
+ }
+
+ if (op.AvgMode == AvgMode.NegA)
+ {
+ (productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh);
+ }
+
+ Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry);
+ Operand resHigh = context.IAdd(productHigh, sumCarry);
+
+ if (op.AvgMode == AvgMode.PlusOne)
+ {
+ resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry);
+ resHigh = context.IAdd(resHigh, poCarry);
+ }
+
+ bool resSigned = op.ASelect == VectorSelect.S32 ||
+ op.BSelect == VectorSelect.S32 ||
+ op.AvgMode == AvgMode.NegB ||
+ op.AvgMode == AvgMode.NegA;
+
+ int shift = op.VideoScale switch
+ {
+ VideoScale.Shr7 => 7,
+ VideoScale.Shr15 => 15,
+ _ => 0
+ };
+
+ if (shift != 0)
+ {
+ // Low = (Low >> Shift) | (High << (32 - Shift))
+ // High >>= Shift
+ resLow = context.ShiftRightU32(resLow, Const(shift));
+ resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift)));
+ resHigh = resSigned
+ ? context.ShiftRightS32(resHigh, Const(shift))
+ : context.ShiftRightU32(resHigh, Const(shift));
+ }
+
+ Operand res = resLow;
+
+ if (op.Sat)
+ {
+ Operand sign = context.ShiftRightS32(resHigh, Const(31));
+
+ if (resSigned)
+ {
+ Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31)));
+ Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue));
+ res = context.ConditionalSelect(overflow, clampValue, resLow);
+ }
+ else
+ {
+ Operand overflow = context.ICompareNotEqual(resHigh, Const(0));
+ res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow);
+ }
+ }
+
+ context.Copy(GetDest(op.Dest), res);
+
+ // TODO: CC.
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs
new file mode 100644
index 00000000..67b185ab
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs
@@ -0,0 +1,183 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Vmnmx(EmitterContext context)
+ {
+ InstVmnmx op = context.GetOp<InstVmnmx>();
+
+ Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
+ Operand srcC = GetSrcReg(context, op.SrcC);
+ Operand srcB;
+
+ if (op.BVideo)
+ {
+ srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
+ }
+ else
+ {
+ int imm = op.Imm16;
+
+ if ((op.BSelect & VectorSelect.S8B0) != 0)
+ {
+ imm = (imm << 16) >> 16;
+ }
+
+ srcB = Const(imm);
+ }
+
+ Operand res;
+
+ bool resSigned;
+
+ if ((op.ASelect & VectorSelect.S8B0) != (op.BSelect & VectorSelect.S8B0))
+ {
+ // Signedness is different, but for max, result will always fit a U32,
+ // since one of the inputs can't be negative, and the result is the one
+ // with highest value. For min, it will always fit on a S32, since
+ // one of the input can't be greater than INT_MAX and we want the lowest value.
+ resSigned = !op.Mn;
+
+ res = op.Mn ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB);
+
+ if ((op.ASelect & VectorSelect.S8B0) != 0)
+ {
+ Operand isBGtIntMax = context.ICompareLess(srcB, Const(0));
+
+ res = context.ConditionalSelect(isBGtIntMax, srcB, res);
+ }
+ else
+ {
+ Operand isAGtIntMax = context.ICompareLess(srcA, Const(0));
+
+ res = context.ConditionalSelect(isAGtIntMax, srcA, res);
+ }
+ }
+ else
+ {
+ // Ra and Rb have the same signedness, so doesn't matter which one we test.
+ resSigned = (op.ASelect & VectorSelect.S8B0) != 0;
+
+ if (op.Mn)
+ {
+ res = resSigned
+ ? context.IMaximumS32(srcA, srcB)
+ : context.IMaximumU32(srcA, srcB);
+ }
+ else
+ {
+ res = resSigned
+ ? context.IMinimumS32(srcA, srcB)
+ : context.IMinimumU32(srcA, srcB);
+ }
+ }
+
+ if (op.Sat)
+ {
+ if (op.DFormat && !resSigned)
+ {
+ res = context.IMinimumU32(res, Const(int.MaxValue));
+ }
+ else if (!op.DFormat && resSigned)
+ {
+ res = context.IMaximumS32(res, Const(0));
+ }
+ }
+
+ switch (op.VideoOp)
+ {
+ case VideoOp.Acc:
+ res = context.IAdd(res, srcC);
+ break;
+ case VideoOp.Max:
+ res = op.DFormat ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC);
+ break;
+ case VideoOp.Min:
+ res = op.DFormat ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC);
+ break;
+ case VideoOp.Mrg16h:
+ res = context.BitfieldInsert(srcC, res, Const(16), Const(16));
+ break;
+ case VideoOp.Mrg16l:
+ res = context.BitfieldInsert(srcC, res, Const(0), Const(16));
+ break;
+ case VideoOp.Mrg8b0:
+ res = context.BitfieldInsert(srcC, res, Const(0), Const(8));
+ break;
+ case VideoOp.Mrg8b2:
+ res = context.BitfieldInsert(srcC, res, Const(16), Const(8));
+ break;
+ }
+
+ context.Copy(GetDest(op.Dest), res);
+ }
+
+ public static void Vsetp(EmitterContext context)
+ {
+ InstVsetp op = context.GetOp<InstVsetp>();
+
+ Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect);
+ Operand srcB;
+
+ if (op.BVideo)
+ {
+ srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect);
+ }
+ else
+ {
+ int imm = op.Imm16;
+
+ if ((op.BSelect & VectorSelect.S8B0) != 0)
+ {
+ imm = (imm << 16) >> 16;
+ }
+
+ srcB = Const(imm);
+ }
+
+ Operand p0Res;
+
+ bool signedA = (op.ASelect & VectorSelect.S8B0) != 0;
+ bool signedB = (op.BSelect & VectorSelect.S8B0) != 0;
+
+ if (signedA != signedB)
+ {
+ bool a32 = (op.ASelect & ~VectorSelect.S8B0) == VectorSelect.U32;
+ bool b32 = (op.BSelect & ~VectorSelect.S8B0) == VectorSelect.U32;
+
+ if (!a32 && !b32)
+ {
+ // Both values are extended small integer and can always fit in a S32, just do a signed comparison.
+ p0Res = GetIntComparison(context, op.VComp, srcA, srcB, isSigned: true, extended: false);
+ }
+ else
+ {
+ // TODO: Mismatching sign case.
+ p0Res = Const(0);
+ }
+ }
+ else
+ {
+ // Sign matches, just do a regular comparison.
+ p0Res = GetIntComparison(context, op.VComp, srcA, srcB, signedA, extended: false);
+ }
+
+ Operand p1Res = context.BitwiseNot(p0Res);
+
+ Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+
+ p0Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p0Res, pred);
+ p1Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p1Res, pred);
+
+ context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res);
+ context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res);
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs
new file mode 100644
index 00000000..3c833613
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs
@@ -0,0 +1,84 @@
+using Ryujinx.Graphics.Shader.Decoders;
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper;
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static partial class InstEmit
+ {
+ public static void Fswzadd(EmitterContext context)
+ {
+ InstFswzadd op = context.GetOp<InstFswzadd>();
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+ Operand srcB = GetSrcReg(context, op.SrcB);
+ Operand dest = GetDest(op.Dest);
+
+ context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, op.PnWord));
+
+ InstEmitAluHelper.SetFPZnFlags(context, dest, op.WriteCC);
+ }
+
+ public static void Shfl(EmitterContext context)
+ {
+ InstShfl op = context.GetOp<InstShfl>();
+
+ Operand pred = Register(op.DestPred, RegisterType.Predicate);
+
+ Operand srcA = GetSrcReg(context, op.SrcA);
+
+ Operand srcB = op.BFixShfl ? Const(op.SrcBImm) : GetSrcReg(context, op.SrcB);
+ Operand srcC = op.CFixShfl ? Const(op.SrcCImm) : GetSrcReg(context, op.SrcC);
+
+ (Operand res, Operand valid) = op.ShflMode switch
+ {
+ ShflMode.Idx => context.Shuffle(srcA, srcB, srcC),
+ ShflMode.Up => context.ShuffleUp(srcA, srcB, srcC),
+ ShflMode.Down => context.ShuffleDown(srcA, srcB, srcC),
+ ShflMode.Bfly => context.ShuffleXor(srcA, srcB, srcC),
+ _ => (null, null)
+ };
+
+ context.Copy(GetDest(op.Dest), res);
+ context.Copy(pred, valid);
+ }
+
+ public static void Vote(EmitterContext context)
+ {
+ InstVote op = context.GetOp<InstVote>();
+
+ Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv);
+ Operand res = null;
+
+ switch (op.VoteMode)
+ {
+ case VoteMode.All:
+ res = context.VoteAll(pred);
+ break;
+ case VoteMode.Any:
+ res = context.VoteAny(pred);
+ break;
+ case VoteMode.Eq:
+ res = context.VoteAllEqual(pred);
+ break;
+ }
+
+ if (res != null)
+ {
+ context.Copy(Register(op.VpDest, RegisterType.Predicate), res);
+ }
+ else
+ {
+ context.Config.GpuAccessor.Log($"Invalid vote operation: {op.VoteMode}.");
+ }
+
+ if (op.Dest != RegisterConsts.RegisterZeroIndex)
+ {
+ context.Copy(GetDest(op.Dest), context.Ballot(pred));
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs
new file mode 100644
index 00000000..91c740b6
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs
@@ -0,0 +1,6 @@
+using Ryujinx.Graphics.Shader.Translation;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ delegate void InstEmitter(EmitterContext context);
+} \ No newline at end of file
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs b/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs
new file mode 100644
index 00000000..6217ce53
--- /dev/null
+++ b/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs
@@ -0,0 +1,141 @@
+using Ryujinx.Graphics.Shader.IntermediateRepresentation;
+using Ryujinx.Graphics.Shader.Translation;
+
+using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper;
+
+namespace Ryujinx.Graphics.Shader.Instructions
+{
+ static class Lop3Expression
+ {
+ private enum TruthTable : byte
+ {
+ False = 0x00, // false
+ True = 0xff, // true
+ In = 0xf0, // a
+ And2 = 0xc0, // a & b
+ Or2 = 0xfc, // a | b
+ Xor2 = 0x3c, // a ^ b
+ And3 = 0x80, // a & b & c
+ Or3 = 0xfe, // a | b | c
+ XorAnd = 0x60, // a & (b ^ c)
+ XorOr = 0xf6, // a | (b ^ c)
+ OrAnd = 0xe0, // a & (b | c)
+ AndOr = 0xf8, // a | (b & c)
+ Onehot = 0x16, // (a & !b & !c) | (!a & b & !c) | (!a & !b & c) - Only one value is true.
+ Majority = 0xe8, // Popcount(a, b, c) >= 2
+ Gamble = 0x81, // (a & b & c) | (!a & !b & !c) - All on or all off
+ InverseGamble = 0x7e, // Inverse of Gamble
+ Dot = 0x1a, // a ^ (c | (a & b))
+ Mux = 0xca, // a ? b : c
+ AndXor = 0x78, // a ^ (b & c)
+ OrXor = 0x1e, // a ^ (b | c)
+ Xor3 = 0x96, // a ^ b ^ c
+ }
+
+ public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm)
+ {
+ for (int i = 0; i < 0x40; i++)
+ {
+ TruthTable currImm = (TruthTable)imm;
+
+ Operand x = srcA;
+ Operand y = srcB;
+ Operand z = srcC;
+
+ if ((i & 0x01) != 0)
+ {
+ (x, y) = (y, x);
+ currImm = PermuteTable(currImm, 7, 6, 3, 2, 5, 4, 1, 0);
+ }
+
+ if ((i & 0x02) != 0)
+ {
+ (x, z) = (z, x);
+ currImm = PermuteTable(currImm, 7, 3, 5, 1, 6, 2, 4, 0);
+ }
+
+ if ((i & 0x04) != 0)
+ {
+ (y, z) = (z, y);
+ currImm = PermuteTable(currImm, 7, 5, 6, 4, 3, 1, 2, 0);
+ }
+
+ if ((i & 0x08) != 0)
+ {
+ x = context.BitwiseNot(x);
+ currImm = PermuteTable(currImm, 3, 2, 1, 0, 7, 6, 5, 4);
+ }
+
+ if ((i & 0x10) != 0)
+ {
+ y = context.BitwiseNot(y);
+ currImm = PermuteTable(currImm, 5, 4, 7, 6, 1, 0, 3, 2);
+ }
+
+ if ((i & 0x20) != 0)
+ {
+ z = context.BitwiseNot(z);
+ currImm = PermuteTable(currImm, 6, 7, 4, 5, 2, 3, 0, 1);
+ }
+
+ Operand result = GetExpr(currImm, context, x, y, z);
+ if (result != null)
+ {
+ return result;
+ }
+
+ Operand notResult = GetExpr((TruthTable)((~(int)currImm) & 0xff), context, x, y, z);
+ if (notResult != null)
+ {
+ return context.BitwiseNot(notResult);
+ }
+ }
+
+ return null;
+ }
+
+ private static Operand GetExpr(TruthTable imm, EmitterContext context, Operand x, Operand y, Operand z)
+ {
+ return imm switch
+ {
+ TruthTable.False => Const(0),
+ TruthTable.True => Const(-1),
+ TruthTable.In => x,
+ TruthTable.And2 => context.BitwiseAnd(x, y),
+ TruthTable.Or2 => context.BitwiseOr(x, y),
+ TruthTable.Xor2 => context.BitwiseExclusiveOr(x, y),
+ TruthTable.And3 => context.BitwiseAnd(x, context.BitwiseAnd(y, z)),
+ TruthTable.Or3 => context.BitwiseOr(x, context.BitwiseOr(y, z)),
+ TruthTable.XorAnd => context.BitwiseAnd(x, context.BitwiseExclusiveOr(y, z)),
+ TruthTable.XorOr => context.BitwiseOr(x, context.BitwiseExclusiveOr(y, z)),
+ TruthTable.OrAnd => context.BitwiseAnd(x, context.BitwiseOr(y, z)),
+ TruthTable.AndOr => context.BitwiseOr(x, context.BitwiseAnd(y, z)),
+ TruthTable.Onehot => context.BitwiseExclusiveOr(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))),
+ TruthTable.Majority => context.BitwiseAnd(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))),
+ TruthTable.InverseGamble => context.BitwiseOr(context.BitwiseExclusiveOr(x, y), context.BitwiseExclusiveOr(x, z)),
+ TruthTable.Dot => context.BitwiseAnd(context.BitwiseExclusiveOr(x, z), context.BitwiseOr(context.BitwiseNot(y), z)),
+ TruthTable.Mux => context.BitwiseOr(context.BitwiseAnd(x, y), context.BitwiseAnd(context.BitwiseNot(x), z)),
+ TruthTable.AndXor => context.BitwiseExclusiveOr(x, context.BitwiseAnd(y, z)),
+ TruthTable.OrXor => context.BitwiseExclusiveOr(x, context.BitwiseOr(y, z)),
+ TruthTable.Xor3 => context.BitwiseExclusiveOr(x, context.BitwiseExclusiveOr(y, z)),
+ _ => null
+ };
+ }
+
+ private static TruthTable PermuteTable(TruthTable imm, int bit7, int bit6, int bit5, int bit4, int bit3, int bit2, int bit1, int bit0)
+ {
+ int result = 0;
+
+ result |= (((int)imm >> 0) & 1) << bit0;
+ result |= (((int)imm >> 1) & 1) << bit1;
+ result |= (((int)imm >> 2) & 1) << bit2;
+ result |= (((int)imm >> 3) & 1) << bit3;
+ result |= (((int)imm >> 4) & 1) << bit4;
+ result |= (((int)imm >> 5) & 1) << bit5;
+ result |= (((int)imm >> 6) & 1) << bit6;
+ result |= (((int)imm >> 7) & 1) << bit7;
+
+ return (TruthTable)result;
+ }
+ }
+} \ No newline at end of file