diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/Instructions')
30 files changed, 8904 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs b/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs new file mode 100644 index 00000000..562fb8d5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/AttributeMap.cs @@ -0,0 +1,351 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static class AttributeMap + { + private enum StagesMask : byte + { + None = 0, + Compute = 1 << (int)ShaderStage.Compute, + Vertex = 1 << (int)ShaderStage.Vertex, + TessellationControl = 1 << (int)ShaderStage.TessellationControl, + TessellationEvaluation = 1 << (int)ShaderStage.TessellationEvaluation, + Geometry = 1 << (int)ShaderStage.Geometry, + Fragment = 1 << (int)ShaderStage.Fragment, + + Tessellation = TessellationControl | TessellationEvaluation, + VertexTessellationGeometry = Vertex | Tessellation | Geometry, + TessellationGeometryFragment = Tessellation | Geometry | Fragment, + AllGraphics = Vertex | Tessellation | Geometry | Fragment + } + + private struct AttributeEntry + { + public int BaseOffset { get; } + public AggregateType Type { get; } + public IoVariable IoVariable { get; } + public StagesMask InputMask { get; } + public StagesMask OutputMask { get; } + + public AttributeEntry( + int baseOffset, + AggregateType type, + IoVariable ioVariable, + StagesMask inputMask, + StagesMask outputMask) + { + BaseOffset = baseOffset; + Type = type; + IoVariable = ioVariable; + InputMask = inputMask; + OutputMask = outputMask; + } + } + + private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributes; + private static readonly IReadOnlyDictionary<int, AttributeEntry> _attributesPerPatch; + + static AttributeMap() + { + _attributes = CreateMap(); + _attributesPerPatch = CreatePerPatchMap(); + } + + private static IReadOnlyDictionary<int, AttributeEntry> CreateMap() + { + var map = new Dictionary<int, AttributeEntry>(); + + Add(map, 0x060, AggregateType.S32, IoVariable.PrimitiveId, StagesMask.TessellationGeometryFragment, StagesMask.Geometry); + Add(map, 0x064, AggregateType.S32, IoVariable.Layer, StagesMask.Fragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x068, AggregateType.S32, IoVariable.ViewportIndex, StagesMask.Fragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x06c, AggregateType.FP32, IoVariable.PointSize, StagesMask.None, StagesMask.VertexTessellationGeometry); + Add(map, 0x070, AggregateType.Vector4 | AggregateType.FP32, IoVariable.Position, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x080, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.AllGraphics, StagesMask.VertexTessellationGeometry, 32); + Add(map, 0x280, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x290, AggregateType.Vector4 | AggregateType.FP32, IoVariable.FrontColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x2a0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorDiffuse, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x2b0, AggregateType.Vector4 | AggregateType.FP32, IoVariable.BackColorSpecular, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x2c0, AggregateType.Array | AggregateType.FP32, IoVariable.ClipDistance, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry, 8); + Add(map, 0x2e0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.PointCoord, StagesMask.Fragment, StagesMask.None); + Add(map, 0x2e8, AggregateType.FP32, IoVariable.FogCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x2f0, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationCoord, StagesMask.TessellationEvaluation, StagesMask.None); + Add(map, 0x2f8, AggregateType.S32, IoVariable.InstanceId, StagesMask.Vertex, StagesMask.None); + Add(map, 0x2fc, AggregateType.S32, IoVariable.VertexId, StagesMask.Vertex, StagesMask.None); + Add(map, 0x300, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TextureCoord, StagesMask.TessellationGeometryFragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x3a0, AggregateType.Array | AggregateType.S32, IoVariable.ViewportMask, StagesMask.Fragment, StagesMask.VertexTessellationGeometry); + Add(map, 0x3fc, AggregateType.Bool, IoVariable.FrontFacing, StagesMask.Fragment, StagesMask.None); + + return map; + } + + private static IReadOnlyDictionary<int, AttributeEntry> CreatePerPatchMap() + { + var map = new Dictionary<int, AttributeEntry>(); + + Add(map, 0x000, AggregateType.Vector4 | AggregateType.FP32, IoVariable.TessellationLevelOuter, StagesMask.TessellationEvaluation, StagesMask.TessellationControl); + Add(map, 0x010, AggregateType.Vector2 | AggregateType.FP32, IoVariable.TessellationLevelInner, StagesMask.TessellationEvaluation, StagesMask.TessellationControl); + Add(map, 0x018, AggregateType.Vector4 | AggregateType.FP32, IoVariable.UserDefined, StagesMask.TessellationEvaluation, StagesMask.TessellationControl, 31, 0x200); + + return map; + } + + private static void Add( + Dictionary<int, AttributeEntry> attributes, + int offset, + AggregateType type, + IoVariable ioVariable, + StagesMask inputMask, + StagesMask outputMask, + int count = 1, + int upperBound = 0x400) + { + int baseOffset = offset; + + int elementsCount = GetElementCount(type); + + for (int index = 0; index < count; index++) + { + for (int elementIndex = 0; elementIndex < elementsCount; elementIndex++) + { + attributes.Add(offset, new AttributeEntry(baseOffset, type, ioVariable, inputMask, outputMask)); + + offset += 4; + + if (offset >= upperBound) + { + return; + } + } + } + } + + public static Operand GenerateAttributeLoad(EmitterContext context, Operand primVertex, int offset, bool isOutput, bool isPerPatch) + { + if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry)) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid."); + return Const(0); + } + + StagesMask validUseMask = isOutput ? entry.OutputMask : entry.InputMask; + + if (((StagesMask)(1 << (int)context.Config.Stage) & validUseMask) == StagesMask.None) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}."); + return Const(0); + } + + if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable)) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}."); + return Const(0); + } + + if (HasInvocationId(context.Config.Stage, isOutput) && !isPerPatch) + { + primVertex = context.Load(StorageKind.Input, IoVariable.InvocationId); + } + + int innerOffset = offset - entry.BaseOffset; + int innerIndex = innerOffset / 4; + + StorageKind storageKind = isPerPatch + ? (isOutput ? StorageKind.OutputPerPatch : StorageKind.InputPerPatch) + : (isOutput ? StorageKind.Output : StorageKind.Input); + IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry); + AggregateType type = GetType(context.Config, isOutput, innerIndex, in entry); + int elementCount = GetElementCount(type); + + bool isArray = type.HasFlag(AggregateType.Array); + bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput); + + bool hasElementIndex = elementCount > 1; + + if (hasArrayIndex && hasElementIndex) + { + int arrayIndex = innerIndex / elementCount; + int elementIndex = innerIndex - (arrayIndex * elementCount); + + return primVertex == null || isArray + ? context.Load(storageKind, ioVariable, primVertex, Const(arrayIndex), Const(elementIndex)) + : context.Load(storageKind, ioVariable, Const(arrayIndex), primVertex, Const(elementIndex)); + } + else if (hasArrayIndex || hasElementIndex) + { + return primVertex == null || isArray || !hasArrayIndex + ? context.Load(storageKind, ioVariable, primVertex, Const(innerIndex)) + : context.Load(storageKind, ioVariable, Const(innerIndex), primVertex); + } + else + { + return context.Load(storageKind, ioVariable, primVertex); + } + } + + public static void GenerateAttributeStore(EmitterContext context, int offset, bool isPerPatch, Operand value) + { + if (!(isPerPatch ? _attributesPerPatch : _attributes).TryGetValue(offset, out AttributeEntry entry)) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} is not valid."); + return; + } + + if (((StagesMask)(1 << (int)context.Config.Stage) & entry.OutputMask) == StagesMask.None) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not valid for stage {context.Config.Stage}."); + return; + } + + if (!IsSupportedByHost(context.Config.GpuAccessor, context.Config.Stage, entry.IoVariable)) + { + context.Config.GpuAccessor.Log($"Attribute offset 0x{offset:X} ({entry.IoVariable}) is not supported by the host for stage {context.Config.Stage}."); + return; + } + + Operand invocationId = null; + + if (HasInvocationId(context.Config.Stage, isOutput: true) && !isPerPatch) + { + invocationId = context.Load(StorageKind.Input, IoVariable.InvocationId); + } + + int innerOffset = offset - entry.BaseOffset; + int innerIndex = innerOffset / 4; + + StorageKind storageKind = isPerPatch ? StorageKind.OutputPerPatch : StorageKind.Output; + IoVariable ioVariable = GetIoVariable(context.Config.Stage, in entry); + AggregateType type = GetType(context.Config, isOutput: true, innerIndex, in entry); + int elementCount = GetElementCount(type); + + bool isArray = type.HasFlag(AggregateType.Array); + bool hasArrayIndex = isArray || context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput: true); + + bool hasElementIndex = elementCount > 1; + + if (hasArrayIndex && hasElementIndex) + { + int arrayIndex = innerIndex / elementCount; + int elementIndex = innerIndex - (arrayIndex * elementCount); + + if (invocationId == null || isArray) + { + context.Store(storageKind, ioVariable, invocationId, Const(arrayIndex), Const(elementIndex), value); + } + else + { + context.Store(storageKind, ioVariable, Const(arrayIndex), invocationId, Const(elementIndex), value); + } + } + else if (hasArrayIndex || hasElementIndex) + { + if (invocationId == null || isArray || !hasArrayIndex) + { + context.Store(storageKind, ioVariable, invocationId, Const(innerIndex), value); + } + else + { + context.Store(storageKind, ioVariable, Const(innerIndex), invocationId, value); + } + } + else + { + context.Store(storageKind, ioVariable, invocationId, value); + } + } + + private static bool IsSupportedByHost(IGpuAccessor gpuAccessor, ShaderStage stage, IoVariable ioVariable) + { + if (ioVariable == IoVariable.ViewportIndex && stage != ShaderStage.Geometry && stage != ShaderStage.Fragment) + { + return gpuAccessor.QueryHostSupportsViewportIndexVertexTessellation(); + } + else if (ioVariable == IoVariable.ViewportMask) + { + return gpuAccessor.QueryHostSupportsViewportMask(); + } + + return true; + } + + public static IoVariable GetIoVariable(ShaderConfig config, int offset, out int location) + { + location = 0; + + if (!_attributes.TryGetValue(offset, out AttributeEntry entry)) + { + return IoVariable.Invalid; + } + + if (((StagesMask)(1 << (int)config.Stage) & entry.OutputMask) == StagesMask.None) + { + return IoVariable.Invalid; + } + + if (config.HasPerLocationInputOrOutput(entry.IoVariable, isOutput: true)) + { + location = (offset - entry.BaseOffset) / 16; + } + + return GetIoVariable(config.Stage, in entry); + } + + private static IoVariable GetIoVariable(ShaderStage stage, in AttributeEntry entry) + { + if (entry.IoVariable == IoVariable.Position && stage == ShaderStage.Fragment) + { + return IoVariable.FragmentCoord; + } + + return entry.IoVariable; + } + + private static AggregateType GetType(ShaderConfig config, bool isOutput, int innerIndex, in AttributeEntry entry) + { + AggregateType type = entry.Type; + + if (entry.IoVariable == IoVariable.UserDefined) + { + type = config.GetUserDefinedType(innerIndex / 4, isOutput); + } + else if (entry.IoVariable == IoVariable.FragmentOutputColor) + { + type = config.GetFragmentOutputColorType(innerIndex / 4); + } + + return type; + } + + public static bool HasPrimitiveVertex(ShaderStage stage, bool isOutput) + { + if (isOutput) + { + return false; + } + + return stage == ShaderStage.TessellationControl || + stage == ShaderStage.TessellationEvaluation || + stage == ShaderStage.Geometry; + } + + public static bool HasInvocationId(ShaderStage stage, bool isOutput) + { + return isOutput && stage == ShaderStage.TessellationControl; + } + + private static int GetElementCount(AggregateType type) + { + return (type & AggregateType.ElementCountMask) switch + { + AggregateType.Vector2 => 2, + AggregateType.Vector3 => 3, + AggregateType.Vector4 => 4, + _ => 1 + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs new file mode 100644 index 00000000..3a9e658a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmit.cs @@ -0,0 +1,379 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void AtomCas(EmitterContext context) + { + InstAtomCas op = context.GetOp<InstAtomCas>(); + + context.Config.GpuAccessor.Log("Shader instruction AtomCas is not implemented."); + } + + public static void AtomsCas(EmitterContext context) + { + InstAtomsCas op = context.GetOp<InstAtomsCas>(); + + context.Config.GpuAccessor.Log("Shader instruction AtomsCas is not implemented."); + } + + public static void B2r(EmitterContext context) + { + InstB2r op = context.GetOp<InstB2r>(); + + context.Config.GpuAccessor.Log("Shader instruction B2r is not implemented."); + } + + public static void Bpt(EmitterContext context) + { + InstBpt op = context.GetOp<InstBpt>(); + + context.Config.GpuAccessor.Log("Shader instruction Bpt is not implemented."); + } + + public static void Cctl(EmitterContext context) + { + InstCctl op = context.GetOp<InstCctl>(); + + context.Config.GpuAccessor.Log("Shader instruction Cctl is not implemented."); + } + + public static void Cctll(EmitterContext context) + { + InstCctll op = context.GetOp<InstCctll>(); + + context.Config.GpuAccessor.Log("Shader instruction Cctll is not implemented."); + } + + public static void Cctlt(EmitterContext context) + { + InstCctlt op = context.GetOp<InstCctlt>(); + + context.Config.GpuAccessor.Log("Shader instruction Cctlt is not implemented."); + } + + public static void Cs2r(EmitterContext context) + { + InstCs2r op = context.GetOp<InstCs2r>(); + + context.Config.GpuAccessor.Log("Shader instruction Cs2r is not implemented."); + } + + public static void FchkR(EmitterContext context) + { + InstFchkR op = context.GetOp<InstFchkR>(); + + context.Config.GpuAccessor.Log("Shader instruction FchkR is not implemented."); + } + + public static void FchkI(EmitterContext context) + { + InstFchkI op = context.GetOp<InstFchkI>(); + + context.Config.GpuAccessor.Log("Shader instruction FchkI is not implemented."); + } + + public static void FchkC(EmitterContext context) + { + InstFchkC op = context.GetOp<InstFchkC>(); + + context.Config.GpuAccessor.Log("Shader instruction FchkC is not implemented."); + } + + public static void Getcrsptr(EmitterContext context) + { + InstGetcrsptr op = context.GetOp<InstGetcrsptr>(); + + context.Config.GpuAccessor.Log("Shader instruction Getcrsptr is not implemented."); + } + + public static void Getlmembase(EmitterContext context) + { + InstGetlmembase op = context.GetOp<InstGetlmembase>(); + + context.Config.GpuAccessor.Log("Shader instruction Getlmembase is not implemented."); + } + + public static void Ide(EmitterContext context) + { + InstIde op = context.GetOp<InstIde>(); + + context.Config.GpuAccessor.Log("Shader instruction Ide is not implemented."); + } + + public static void IdpR(EmitterContext context) + { + InstIdpR op = context.GetOp<InstIdpR>(); + + context.Config.GpuAccessor.Log("Shader instruction IdpR is not implemented."); + } + + public static void IdpC(EmitterContext context) + { + InstIdpC op = context.GetOp<InstIdpC>(); + + context.Config.GpuAccessor.Log("Shader instruction IdpC is not implemented."); + } + + public static void ImadspR(EmitterContext context) + { + InstImadspR op = context.GetOp<InstImadspR>(); + + context.Config.GpuAccessor.Log("Shader instruction ImadspR is not implemented."); + } + + public static void ImadspI(EmitterContext context) + { + InstImadspI op = context.GetOp<InstImadspI>(); + + context.Config.GpuAccessor.Log("Shader instruction ImadspI is not implemented."); + } + + public static void ImadspC(EmitterContext context) + { + InstImadspC op = context.GetOp<InstImadspC>(); + + context.Config.GpuAccessor.Log("Shader instruction ImadspC is not implemented."); + } + + public static void ImadspRc(EmitterContext context) + { + InstImadspRc op = context.GetOp<InstImadspRc>(); + + context.Config.GpuAccessor.Log("Shader instruction ImadspRc is not implemented."); + } + + public static void Jcal(EmitterContext context) + { + InstJcal op = context.GetOp<InstJcal>(); + + context.Config.GpuAccessor.Log("Shader instruction Jcal is not implemented."); + } + + public static void Jmp(EmitterContext context) + { + InstJmp op = context.GetOp<InstJmp>(); + + context.Config.GpuAccessor.Log("Shader instruction Jmp is not implemented."); + } + + public static void Jmx(EmitterContext context) + { + InstJmx op = context.GetOp<InstJmx>(); + + context.Config.GpuAccessor.Log("Shader instruction Jmx is not implemented."); + } + + public static void Ld(EmitterContext context) + { + InstLd op = context.GetOp<InstLd>(); + + context.Config.GpuAccessor.Log("Shader instruction Ld is not implemented."); + } + + public static void Lepc(EmitterContext context) + { + InstLepc op = context.GetOp<InstLepc>(); + + context.Config.GpuAccessor.Log("Shader instruction Lepc is not implemented."); + } + + public static void Longjmp(EmitterContext context) + { + InstLongjmp op = context.GetOp<InstLongjmp>(); + + context.Config.GpuAccessor.Log("Shader instruction Longjmp is not implemented."); + } + + public static void P2rR(EmitterContext context) + { + InstP2rR op = context.GetOp<InstP2rR>(); + + context.Config.GpuAccessor.Log("Shader instruction P2rR is not implemented."); + } + + public static void P2rI(EmitterContext context) + { + InstP2rI op = context.GetOp<InstP2rI>(); + + context.Config.GpuAccessor.Log("Shader instruction P2rI is not implemented."); + } + + public static void P2rC(EmitterContext context) + { + InstP2rC op = context.GetOp<InstP2rC>(); + + context.Config.GpuAccessor.Log("Shader instruction P2rC is not implemented."); + } + + public static void Pexit(EmitterContext context) + { + InstPexit op = context.GetOp<InstPexit>(); + + context.Config.GpuAccessor.Log("Shader instruction Pexit is not implemented."); + } + + public static void Pixld(EmitterContext context) + { + InstPixld op = context.GetOp<InstPixld>(); + + context.Config.GpuAccessor.Log("Shader instruction Pixld is not implemented."); + } + + public static void Plongjmp(EmitterContext context) + { + InstPlongjmp op = context.GetOp<InstPlongjmp>(); + + context.Config.GpuAccessor.Log("Shader instruction Plongjmp is not implemented."); + } + + public static void Pret(EmitterContext context) + { + InstPret op = context.GetOp<InstPret>(); + + context.Config.GpuAccessor.Log("Shader instruction Pret is not implemented."); + } + + public static void PrmtR(EmitterContext context) + { + InstPrmtR op = context.GetOp<InstPrmtR>(); + + context.Config.GpuAccessor.Log("Shader instruction PrmtR is not implemented."); + } + + public static void PrmtI(EmitterContext context) + { + InstPrmtI op = context.GetOp<InstPrmtI>(); + + context.Config.GpuAccessor.Log("Shader instruction PrmtI is not implemented."); + } + + public static void PrmtC(EmitterContext context) + { + InstPrmtC op = context.GetOp<InstPrmtC>(); + + context.Config.GpuAccessor.Log("Shader instruction PrmtC is not implemented."); + } + + public static void PrmtRc(EmitterContext context) + { + InstPrmtRc op = context.GetOp<InstPrmtRc>(); + + context.Config.GpuAccessor.Log("Shader instruction PrmtRc is not implemented."); + } + + public static void R2b(EmitterContext context) + { + InstR2b op = context.GetOp<InstR2b>(); + + context.Config.GpuAccessor.Log("Shader instruction R2b is not implemented."); + } + + public static void Ram(EmitterContext context) + { + InstRam op = context.GetOp<InstRam>(); + + context.Config.GpuAccessor.Log("Shader instruction Ram is not implemented."); + } + + public static void Rtt(EmitterContext context) + { + InstRtt op = context.GetOp<InstRtt>(); + + context.Config.GpuAccessor.Log("Shader instruction Rtt is not implemented."); + } + + public static void Sam(EmitterContext context) + { + InstSam op = context.GetOp<InstSam>(); + + context.Config.GpuAccessor.Log("Shader instruction Sam is not implemented."); + } + + public static void Setcrsptr(EmitterContext context) + { + InstSetcrsptr op = context.GetOp<InstSetcrsptr>(); + + context.Config.GpuAccessor.Log("Shader instruction Setcrsptr is not implemented."); + } + + public static void Setlmembase(EmitterContext context) + { + InstSetlmembase op = context.GetOp<InstSetlmembase>(); + + context.Config.GpuAccessor.Log("Shader instruction Setlmembase is not implemented."); + } + + public static void St(EmitterContext context) + { + InstSt op = context.GetOp<InstSt>(); + + context.Config.GpuAccessor.Log("Shader instruction St is not implemented."); + } + + public static void Stp(EmitterContext context) + { + InstStp op = context.GetOp<InstStp>(); + + context.Config.GpuAccessor.Log("Shader instruction Stp is not implemented."); + } + + public static void Txa(EmitterContext context) + { + InstTxa op = context.GetOp<InstTxa>(); + + context.Config.GpuAccessor.Log("Shader instruction Txa is not implemented."); + } + + public static void Vabsdiff(EmitterContext context) + { + InstVabsdiff op = context.GetOp<InstVabsdiff>(); + + context.Config.GpuAccessor.Log("Shader instruction Vabsdiff is not implemented."); + } + + public static void Vabsdiff4(EmitterContext context) + { + InstVabsdiff4 op = context.GetOp<InstVabsdiff4>(); + + context.Config.GpuAccessor.Log("Shader instruction Vabsdiff4 is not implemented."); + } + + public static void Vadd(EmitterContext context) + { + InstVadd op = context.GetOp<InstVadd>(); + + context.Config.GpuAccessor.Log("Shader instruction Vadd is not implemented."); + } + + public static void Votevtg(EmitterContext context) + { + InstVotevtg op = context.GetOp<InstVotevtg>(); + + context.Config.GpuAccessor.Log("Shader instruction Votevtg is not implemented."); + } + + public static void Vset(EmitterContext context) + { + InstVset op = context.GetOp<InstVset>(); + + context.Config.GpuAccessor.Log("Shader instruction Vset is not implemented."); + } + + public static void Vshl(EmitterContext context) + { + InstVshl op = context.GetOp<InstVshl>(); + + context.Config.GpuAccessor.Log("Shader instruction Vshl is not implemented."); + } + + public static void Vshr(EmitterContext context) + { + InstVshr op = context.GetOp<InstVshr>(); + + context.Config.GpuAccessor.Log("Shader instruction Vshr is not implemented."); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs new file mode 100644 index 00000000..879075ba --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAluHelper.cs @@ -0,0 +1,160 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static class InstEmitAluHelper + { + public static long GetIntMin(IDstFmt type) + { + return type switch + { + IDstFmt.U16 => ushort.MinValue, + IDstFmt.S16 => short.MinValue, + IDstFmt.U32 => uint.MinValue, + IDstFmt.S32 => int.MinValue, + _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.") + }; + } + + public static long GetIntMax(IDstFmt type) + { + return type switch + { + IDstFmt.U16 => ushort.MaxValue, + IDstFmt.S16 => short.MaxValue, + IDstFmt.U32 => uint.MaxValue, + IDstFmt.S32 => int.MaxValue, + _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.") + }; + } + + public static long GetIntMin(ISrcDstFmt type) + { + return type switch + { + ISrcDstFmt.U8 => byte.MinValue, + ISrcDstFmt.S8 => sbyte.MinValue, + ISrcDstFmt.U16 => ushort.MinValue, + ISrcDstFmt.S16 => short.MinValue, + ISrcDstFmt.U32 => uint.MinValue, + ISrcDstFmt.S32 => int.MinValue, + _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.") + }; + } + + public static long GetIntMax(ISrcDstFmt type) + { + return type switch + { + ISrcDstFmt.U8 => byte.MaxValue, + ISrcDstFmt.S8 => sbyte.MaxValue, + ISrcDstFmt.U16 => ushort.MaxValue, + ISrcDstFmt.S16 => short.MaxValue, + ISrcDstFmt.U32 => uint.MaxValue, + ISrcDstFmt.S32 => int.MaxValue, + _ => throw new ArgumentException($"The type \"{type}\" is not a supported integer type.") + }; + } + + public static Operand GetPredLogicalOp(EmitterContext context, BoolOp logicOp, Operand input, Operand pred) + { + return logicOp switch + { + BoolOp.And => context.BitwiseAnd(input, pred), + BoolOp.Or => context.BitwiseOr(input, pred), + BoolOp.Xor => context.BitwiseExclusiveOr(input, pred), + _ => input + }; + } + + public static Operand Extend(EmitterContext context, Operand src, VectorSelect type) + { + return type switch + { + VectorSelect.U8B0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.U8B1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.U8B2 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.U8B3 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.U16H0 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.U16H1 => ZeroExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + VectorSelect.S8B0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 8), + VectorSelect.S8B1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(8)), 8), + VectorSelect.S8B2 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 8), + VectorSelect.S8B3 => SignExtendTo32(context, context.ShiftRightU32(src, Const(24)), 8), + VectorSelect.S16H0 => SignExtendTo32(context, context.ShiftRightU32(src, Const(0)), 16), + VectorSelect.S16H1 => SignExtendTo32(context, context.ShiftRightU32(src, Const(16)), 16), + _ => src + }; + } + + public static void SetZnFlags(EmitterContext context, Operand dest, bool setCC, bool extended = false) + { + if (!setCC) + { + return; + } + + if (extended) + { + // When the operation is extended, it means we are doing + // the operation on a long word with any number of bits, + // so we need to AND the zero flag from result with the + // previous result when extended is specified, to ensure + // we have ZF set only if all words are zero, and not just + // the last one. + Operand oldZF = GetZF(); + + Operand res = context.BitwiseAnd(context.ICompareEqual(dest, Const(0)), oldZF); + + context.Copy(GetZF(), res); + } + else + { + context.Copy(GetZF(), context.ICompareEqual(dest, Const(0))); + } + + context.Copy(GetNF(), context.ICompareLess(dest, Const(0))); + } + + public static void SetFPZnFlags(EmitterContext context, Operand dest, bool setCC, Instruction fpType = Instruction.FP32) + { + if (setCC) + { + Operand zero = ConstF(0); + + if (fpType == Instruction.FP64) + { + zero = context.FP32ConvertToFP64(zero); + } + + context.Copy(GetZF(), context.FPCompareEqual(dest, zero, fpType)); + context.Copy(GetNF(), context.FPCompareLess (dest, zero, fpType)); + } + } + + public static (Operand, Operand) NegateLong(EmitterContext context, Operand low, Operand high) + { + low = context.BitwiseNot(low); + high = context.BitwiseNot(high); + low = AddWithCarry(context, low, Const(1), out Operand carryOut); + high = context.IAdd(high, carryOut); + return (low, high); + } + + public static Operand AddWithCarry(EmitterContext context, Operand lhs, Operand rhs, out Operand carryOut) + { + Operand result = context.IAdd(lhs, rhs); + + // C = Rd < Rn + carryOut = context.INegate(context.ICompareLessUnsigned(result, lhs)); + + return result; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs new file mode 100644 index 00000000..1df38761 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -0,0 +1,383 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Al2p(EmitterContext context) + { + InstAl2p op = context.GetOp<InstAl2p>(); + + context.Copy(GetDest(op.Dest), context.IAdd(GetSrcReg(context, op.SrcA), Const(op.Imm11))); + } + + public static void Ald(EmitterContext context) + { + InstAld op = context.GetOp<InstAld>(); + + // Some of those attributes are per invocation, + // so we should ignore any primitive vertex indexing for those. + bool hasPrimitiveVertex = AttributeMap.HasPrimitiveVertex(context.Config.Stage, op.O) && !op.P; + + if (!op.Phys) + { + hasPrimitiveVertex &= HasPrimitiveVertex(op.Imm11); + } + + Operand primVertex = hasPrimitiveVertex ? context.Copy(GetSrcReg(context, op.SrcB)) : null; + + for (int index = 0; index < (int)op.AlSize + 1; index++) + { + Register rd = new Register(op.Dest + index, RegisterType.Gpr); + + if (rd.IsRZ) + { + break; + } + + if (op.Phys) + { + Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase)); + Operand vecIndex = context.ShiftRightU32(offset, Const(4)); + Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3)); + + StorageKind storageKind = op.O ? StorageKind.Output : StorageKind.Input; + + context.Copy(Register(rd), context.Load(storageKind, IoVariable.UserDefined, primVertex, vecIndex, elemIndex)); + } + else if (op.SrcB == RegisterConsts.RegisterZeroIndex || op.P) + { + int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O); + + context.FlagAttributeRead(offset); + + bool isOutput = op.O && CanLoadOutput(offset); + + if (!op.P && !isOutput && TryConvertIdToIndexForVulkan(context, offset, out Operand value)) + { + context.Copy(Register(rd), value); + } + else + { + context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, op.P)); + } + } + else + { + int offset = FixedFuncToUserAttribute(context.Config, op.Imm11 + index * 4, op.O); + + context.FlagAttributeRead(offset); + + bool isOutput = op.O && CanLoadOutput(offset); + + context.Copy(Register(rd), AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, false)); + } + } + } + + public static void Ast(EmitterContext context) + { + InstAst op = context.GetOp<InstAst>(); + + for (int index = 0; index < (int)op.AlSize + 1; index++) + { + if (op.SrcB + index > RegisterConsts.RegisterZeroIndex) + { + break; + } + + Register rd = new Register(op.SrcB + index, RegisterType.Gpr); + + if (op.Phys) + { + Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase)); + Operand vecIndex = context.ShiftRightU32(offset, Const(4)); + Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3)); + Operand invocationId = AttributeMap.HasInvocationId(context.Config.Stage, isOutput: true) + ? context.Load(StorageKind.Input, IoVariable.InvocationId) + : null; + + context.Store(StorageKind.Output, IoVariable.UserDefined, invocationId, vecIndex, elemIndex, Register(rd)); + } + else + { + // TODO: Support indirect stores using Ra. + + int offset = op.Imm11 + index * 4; + + if (!context.Config.IsUsedOutputAttribute(offset)) + { + return; + } + + offset = FixedFuncToUserAttribute(context.Config, offset, isOutput: true); + + context.FlagAttributeWritten(offset); + + AttributeMap.GenerateAttributeStore(context, offset, op.P, Register(rd)); + } + } + } + + public static void Ipa(EmitterContext context) + { + InstIpa op = context.GetOp<InstIpa>(); + + context.FlagAttributeRead(op.Imm10); + + Operand res; + + bool isFixedFunc = false; + + if (op.Idx) + { + Operand offset = context.ISubtract(GetSrcReg(context, op.SrcA), Const(AttributeConsts.UserAttributeBase)); + Operand vecIndex = context.ShiftRightU32(offset, Const(4)); + Operand elemIndex = context.BitwiseAnd(context.ShiftRightU32(offset, Const(2)), Const(3)); + + res = context.Load(StorageKind.Input, IoVariable.UserDefined, null, vecIndex, elemIndex); + res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3))); + } + else + { + isFixedFunc = TryFixedFuncToUserAttributeIpa(context, op.Imm10, out res); + + if (op.Imm10 >= AttributeConsts.UserAttributeBase && op.Imm10 < AttributeConsts.UserAttributeEnd) + { + int index = (op.Imm10 - AttributeConsts.UserAttributeBase) >> 4; + + if (context.Config.ImapTypes[index].GetFirstUsedType() == PixelImap.Perspective) + { + res = context.FPMultiply(res, context.Load(StorageKind.Input, IoVariable.FragmentCoord, null, Const(3))); + } + } + else if (op.Imm10 == AttributeConsts.PositionX || op.Imm10 == AttributeConsts.PositionY) + { + // FragCoord X/Y must be divided by the render target scale, if resolution scaling is active, + // because the shader code is not expecting scaled values. + res = context.FPDivide(res, context.Load(StorageKind.Input, IoVariable.SupportBlockRenderScale, null, Const(0))); + } + else if (op.Imm10 == AttributeConsts.FrontFacing && context.Config.GpuAccessor.QueryHostHasFrontFacingBug()) + { + // gl_FrontFacing sometimes has incorrect (flipped) values depending how it is accessed on Intel GPUs. + // This weird trick makes it behave. + res = context.ICompareLess(context.INegate(context.IConvertS32ToFP32(res)), Const(0)); + } + } + + if (op.IpaOp == IpaOp.Multiply && !isFixedFunc) + { + Operand srcB = GetSrcReg(context, op.SrcB); + + res = context.FPMultiply(res, srcB); + } + + res = context.FPSaturate(res, op.Sat); + + context.Copy(GetDest(op.Dest), res); + } + + public static void Isberd(EmitterContext context) + { + InstIsberd op = context.GetOp<InstIsberd>(); + + // This instruction performs a load from ISBE (Internal Stage Buffer Entry) memory. + // Here, we just propagate the offset, as the result from this instruction is usually + // used with ALD to perform vertex load on geometry or tessellation shaders. + // The offset is calculated as (PrimitiveIndex * VerticesPerPrimitive) + VertexIndex. + // Since we hardcode PrimitiveIndex to zero, then the offset will be just VertexIndex. + context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA)); + } + + public static void OutR(EmitterContext context) + { + InstOutR op = context.GetOp<InstOutR>(); + + EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut)); + } + + public static void OutI(EmitterContext context) + { + InstOutI op = context.GetOp<InstOutI>(); + + EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut)); + } + + public static void OutC(EmitterContext context) + { + InstOutC op = context.GetOp<InstOutC>(); + + EmitOut(context, op.OutType.HasFlag(OutType.Emit), op.OutType.HasFlag(OutType.Cut)); + } + + private static void EmitOut(EmitterContext context, bool emit, bool cut) + { + if (!(emit || cut)) + { + context.Config.GpuAccessor.Log("Invalid OUT encoding."); + } + + if (emit) + { + if (context.Config.LastInVertexPipeline) + { + context.PrepareForVertexReturn(out var tempXLocal, out var tempYLocal, out var tempZLocal); + + context.EmitVertex(); + + // Restore output position value before transformation. + + if (tempXLocal != null) + { + context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(0)), tempXLocal); + } + + if (tempYLocal != null) + { + context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(1)), tempYLocal); + } + + if (tempZLocal != null) + { + context.Copy(context.Load(StorageKind.Input, IoVariable.Position, null, Const(2)), tempZLocal); + } + } + else + { + context.EmitVertex(); + } + } + + if (cut) + { + context.EndPrimitive(); + } + } + + private static bool HasPrimitiveVertex(int attr) + { + return attr != AttributeConsts.PrimitiveId && + attr != AttributeConsts.TessCoordX && + attr != AttributeConsts.TessCoordY; + } + + private static bool CanLoadOutput(int attr) + { + return attr != AttributeConsts.TessCoordX && attr != AttributeConsts.TessCoordY; + } + + private static bool TryFixedFuncToUserAttributeIpa(EmitterContext context, int attr, out Operand selectedAttr) + { + if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.BackColorDiffuseR) + { + // TODO: If two sided rendering is enabled, then this should return + // FrontColor if the fragment is front facing, and back color otherwise. + selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false)); + return true; + } + else if (attr == AttributeConsts.FogCoord) + { + // TODO: We likely need to emulate the fixed-function functionality for FogCoord here. + selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false)); + return true; + } + else if (attr >= AttributeConsts.BackColorDiffuseR && attr < AttributeConsts.ClipDistance0) + { + selectedAttr = ConstF(((attr >> 2) & 3) == 3 ? 1f : 0f); + return true; + } + else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd) + { + selectedAttr = GenerateIpaLoad(context, FixedFuncToUserAttribute(context.Config, attr, isOutput: false)); + return true; + } + + selectedAttr = GenerateIpaLoad(context, attr); + return false; + } + + private static Operand GenerateIpaLoad(EmitterContext context, int offset) + { + return AttributeMap.GenerateAttributeLoad(context, null, offset, isOutput: false, isPerPatch: false); + } + + private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, bool isOutput) + { + bool supportsLayerFromVertexOrTess = config.GpuAccessor.QueryHostSupportsLayerVertexTessellation(); + int fixedStartAttr = supportsLayerFromVertexOrTess ? 0 : 1; + + if (attr == AttributeConsts.Layer && config.Stage != ShaderStage.Geometry && !supportsLayerFromVertexOrTess) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.Layer, 0, isOutput); + config.SetLayerOutputAttribute(attr); + } + else if (attr == AttributeConsts.FogCoord) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FogCoord, fixedStartAttr, isOutput); + } + else if (attr >= AttributeConsts.FrontColorDiffuseR && attr < AttributeConsts.ClipDistance0) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.FrontColorDiffuseR, fixedStartAttr + 1, isOutput); + } + else if (attr >= AttributeConsts.TexCoordBase && attr < AttributeConsts.TexCoordEnd) + { + attr = FixedFuncToUserAttribute(config, attr, AttributeConsts.TexCoordBase, fixedStartAttr + 5, isOutput); + } + + return attr; + } + + private static int FixedFuncToUserAttribute(ShaderConfig config, int attr, int baseAttr, int baseIndex, bool isOutput) + { + int index = (attr - baseAttr) >> 4; + int userAttrIndex = config.GetFreeUserAttribute(isOutput, baseIndex + index); + + if ((uint)userAttrIndex < Constants.MaxAttributes) + { + attr = AttributeConsts.UserAttributeBase + userAttrIndex * 16 + (attr & 0xf); + + if (isOutput) + { + config.SetOutputUserAttributeFixedFunc(userAttrIndex); + } + else + { + config.SetInputUserAttributeFixedFunc(userAttrIndex); + } + } + else + { + config.GpuAccessor.Log($"No enough user attributes for fixed attribute offset 0x{attr:X}."); + } + + return attr; + } + + private static bool TryConvertIdToIndexForVulkan(EmitterContext context, int attr, out Operand value) + { + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + if (attr == AttributeConsts.InstanceId) + { + value = context.ISubtract( + context.Load(StorageKind.Input, IoVariable.InstanceIndex), + context.Load(StorageKind.Input, IoVariable.BaseInstance)); + return true; + } + else if (attr == AttributeConsts.VertexId) + { + value = context.Load(StorageKind.Input, IoVariable.VertexIndex); + return true; + } + } + + value = null; + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs new file mode 100644 index 00000000..f3114c6e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBarrier.cs @@ -0,0 +1,44 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Bar(EmitterContext context) + { + InstBar op = context.GetOp<InstBar>(); + + // TODO: Support other modes. + if (op.BarOp == BarOp.Sync) + { + context.Barrier(); + } + else + { + context.Config.GpuAccessor.Log($"Invalid barrier mode: {op.BarOp}."); + } + } + + public static void Depbar(EmitterContext context) + { + InstDepbar op = context.GetOp<InstDepbar>(); + + // No operation. + } + + public static void Membar(EmitterContext context) + { + InstMembar op = context.GetOp<InstMembar>(); + + if (op.Membar == Decoders.Membar.Cta) + { + context.GroupMemoryBarrier(); + } + else + { + context.MemoryBarrier(); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs new file mode 100644 index 00000000..71925269 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitBitfield.cs @@ -0,0 +1,194 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void BfeR(EmitterContext context) + { + InstBfeR op = context.GetOp<InstBfeR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed); + } + + public static void BfeI(EmitterContext context) + { + InstBfeI op = context.GetOp<InstBfeI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed); + } + + public static void BfeC(EmitterContext context) + { + InstBfeC op = context.GetOp<InstBfeC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitBfe(context, srcA, srcB, op.Dest, op.Brev, op.Signed); + } + + public static void BfiR(EmitterContext context) + { + InstBfiR op = context.GetOp<InstBfiR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitBfi(context, srcA, srcB, srcC, op.Dest); + } + + public static void BfiI(EmitterContext context) + { + InstBfiI op = context.GetOp<InstBfiI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitBfi(context, srcA, srcB, srcC, op.Dest); + } + + public static void BfiC(EmitterContext context) + { + InstBfiC op = context.GetOp<InstBfiC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitBfi(context, srcA, srcB, srcC, op.Dest); + } + + public static void BfiRc(EmitterContext context) + { + InstBfiRc op = context.GetOp<InstBfiRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitBfi(context, srcA, srcB, srcC, op.Dest); + } + + public static void FloR(EmitterContext context) + { + InstFloR op = context.GetOp<InstFloR>(); + + EmitFlo(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB, op.Sh, op.Signed); + } + + public static void FloI(EmitterContext context) + { + InstFloI op = context.GetOp<InstFloI>(); + + EmitFlo(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB, op.Sh, op.Signed); + } + + public static void FloC(EmitterContext context) + { + InstFloC op = context.GetOp<InstFloC>(); + + EmitFlo(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB, op.Sh, op.Signed); + } + + public static void PopcR(EmitterContext context) + { + InstPopcR op = context.GetOp<InstPopcR>(); + + EmitPopc(context, GetSrcReg(context, op.SrcB), op.Dest, op.NegB); + } + + public static void PopcI(EmitterContext context) + { + InstPopcI op = context.GetOp<InstPopcI>(); + + EmitPopc(context, GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.NegB); + } + + public static void PopcC(EmitterContext context) + { + InstPopcC op = context.GetOp<InstPopcC>(); + + EmitPopc(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.NegB); + } + + private static void EmitBfe( + EmitterContext context, + Operand srcA, + Operand srcB, + int rd, + bool bitReverse, + bool isSigned) + { + if (bitReverse) + { + srcA = context.BitfieldReverse(srcA); + } + + Operand position = context.BitwiseAnd(srcB, Const(0xff)); + + Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8)); + + Operand res = isSigned + ? context.BitfieldExtractS32(srcA, position, size) + : context.BitfieldExtractU32(srcA, position, size); + + context.Copy(GetDest(rd), res); + + // TODO: CC, X, corner cases. + } + + private static void EmitBfi(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int rd) + { + Operand position = context.BitwiseAnd(srcB, Const(0xff)); + + Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8)); + + Operand res = context.BitfieldInsert(srcC, srcA, position, size); + + context.Copy(GetDest(rd), res); + } + + private static void EmitFlo(EmitterContext context, Operand src, int rd, bool invert, bool sh, bool isSigned) + { + Operand srcB = context.BitwiseNot(src, invert); + + Operand res; + + if (sh) + { + res = context.FindLSB(context.BitfieldReverse(srcB)); + } + else + { + res = isSigned + ? context.FindMSBS32(srcB) + : context.FindMSBU32(srcB); + } + + context.Copy(GetDest(rd), res); + } + + private static void EmitPopc(EmitterContext context, Operand src, int rd, bool invert) + { + Operand srcB = context.BitwiseNot(src, invert); + + Operand res = context.BitCount(srcB); + + context.Copy(GetDest(rd), res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs new file mode 100644 index 00000000..74ac7602 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConditionCode.cs @@ -0,0 +1,87 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Cset(EmitterContext context) + { + InstCset op = context.GetOp<InstCset>(); + + Operand res = GetCondition(context, op.Ccc); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + res = GetPredLogicalOp(context, op.Bop, res, srcPred); + + Operand dest = GetDest(op.Dest); + + if (op.BVal) + { + context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0))); + } + else + { + context.Copy(dest, res); + } + + // TODO: CC. + } + + public static void Csetp(EmitterContext context) + { + InstCsetp op = context.GetOp<InstCsetp>(); + + Operand p0Res = GetCondition(context, op.Ccc); + Operand p1Res = context.BitwiseNot(p0Res); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + p0Res = GetPredLogicalOp(context, op.Bop, p0Res, srcPred); + p1Res = GetPredLogicalOp(context, op.Bop, p1Res, srcPred); + + context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); + context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); + + // TODO: CC. + } + + private static Operand GetCondition(EmitterContext context, Ccc cond, int defaultCond = IrConsts.True) + { + return cond switch + { + Ccc.F => Const(IrConsts.False), + Ccc.Lt => context.BitwiseExclusiveOr(context.BitwiseAnd(GetNF(), context.BitwiseNot(GetZF())), GetVF()), + Ccc.Eq => context.BitwiseAnd(context.BitwiseNot(GetNF()), GetZF()), + Ccc.Le => context.BitwiseExclusiveOr(GetNF(), context.BitwiseOr(GetZF(), GetVF())), + Ccc.Gt => context.BitwiseNot(context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF())), + Ccc.Ne => context.BitwiseNot(GetZF()), + Ccc.Ge => context.BitwiseNot(context.BitwiseExclusiveOr(GetNF(), GetVF())), + Ccc.Num => context.BitwiseNot(context.BitwiseAnd(GetNF(), GetZF())), + Ccc.Nan => context.BitwiseAnd(GetNF(), GetZF()), + Ccc.Ltu => context.BitwiseExclusiveOr(GetNF(), GetVF()), + Ccc.Equ => GetZF(), + Ccc.Leu => context.BitwiseOr(context.BitwiseExclusiveOr(GetNF(), GetVF()), GetZF()), + Ccc.Gtu => context.BitwiseExclusiveOr(context.BitwiseNot(GetNF()), context.BitwiseOr(GetVF(), GetZF())), + Ccc.Neu => context.BitwiseOr(GetNF(), context.BitwiseNot(GetZF())), + Ccc.Geu => context.BitwiseExclusiveOr(context.BitwiseOr(context.BitwiseNot(GetNF()), GetZF()), GetVF()), + Ccc.T => Const(IrConsts.True), + Ccc.Off => context.BitwiseNot(GetVF()), + Ccc.Lo => context.BitwiseNot(GetCF()), + Ccc.Sff => context.BitwiseNot(GetNF()), + Ccc.Ls => context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF())), + Ccc.Hi => context.BitwiseAnd(GetCF(), context.BitwiseNot(GetZF())), + Ccc.Sft => GetNF(), + Ccc.Hs => GetCF(), + Ccc.Oft => GetVF(), + Ccc.Rle => context.BitwiseOr(GetNF(), GetZF()), + Ccc.Rgt => context.BitwiseNot(context.BitwiseOr(GetNF(), GetZF())), + _ => Const(defaultCond) + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs new file mode 100644 index 00000000..bebd96dd --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitConversion.cs @@ -0,0 +1,425 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void F2fR(EmitterContext context) + { + InstF2fR op = context.GetOp<InstF2fR>(); + + var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB); + + EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat); + } + + public static void F2fI(EmitterContext context) + { + InstF2fI op = context.GetOp<InstF2fI>(); + + var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20)); + + EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat); + } + + public static void F2fC(EmitterContext context) + { + InstF2fC op = context.GetOp<InstF2fC>(); + + var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset); + + EmitF2F(context, op.SrcFmt, op.DstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB, op.Sat); + } + + public static void F2iR(EmitterContext context) + { + InstF2iR op = context.GetOp<InstF2iR>(); + + var src = UnpackReg(context, op.SrcFmt, op.Sh, op.SrcB); + + EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB); + } + + public static void F2iI(EmitterContext context) + { + InstF2iI op = context.GetOp<InstF2iI>(); + + var src = UnpackImm(context, op.SrcFmt, op.Sh, Imm20ToFloat(op.Imm20)); + + EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB); + } + + public static void F2iC(EmitterContext context) + { + InstF2iC op = context.GetOp<InstF2iC>(); + + var src = UnpackCbuf(context, op.SrcFmt, op.Sh, op.CbufSlot, op.CbufOffset); + + EmitF2I(context, op.SrcFmt, op.IDstFmt, op.RoundMode, src, op.Dest, op.AbsB, op.NegB); + } + + public static void I2fR(EmitterContext context) + { + InstI2fR op = context.GetOp<InstI2fR>(); + + var src = GetSrcReg(context, op.SrcB); + + EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB); + } + + public static void I2fI(EmitterContext context) + { + InstI2fI op = context.GetOp<InstI2fI>(); + + var src = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB); + } + + public static void I2fC(EmitterContext context) + { + InstI2fC op = context.GetOp<InstI2fC>(); + + var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitI2F(context, op.ISrcFmt, op.DstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB); + } + + public static void I2iR(EmitterContext context) + { + InstI2iR op = context.GetOp<InstI2iR>(); + + var src = GetSrcReg(context, op.SrcB); + + EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC); + } + + public static void I2iI(EmitterContext context) + { + InstI2iI op = context.GetOp<InstI2iI>(); + + var src = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC); + } + + public static void I2iC(EmitterContext context) + { + InstI2iC op = context.GetOp<InstI2iC>(); + + var src = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitI2I(context, op.ISrcFmt, op.IDstFmt, src, op.ByteSel, op.Dest, op.AbsB, op.NegB, op.Sat, op.WriteCC); + } + + private static void EmitF2F( + EmitterContext context, + DstFmt srcType, + DstFmt dstType, + IntegerRound roundingMode, + Operand src, + int rd, + bool absolute, + bool negate, + bool saturate) + { + Operand srcB = context.FPAbsNeg(src, absolute, negate, srcType.ToInstFPType()); + + if (srcType == dstType) + { + srcB = roundingMode switch + { + IntegerRound.Round => context.FPRound(srcB, srcType.ToInstFPType()), + IntegerRound.Floor => context.FPFloor(srcB, srcType.ToInstFPType()), + IntegerRound.Ceil => context.FPCeiling(srcB, srcType.ToInstFPType()), + IntegerRound.Trunc => context.FPTruncate(srcB, srcType.ToInstFPType()), + _ => srcB + }; + } + + // We don't need to handle conversions between FP16 <-> FP32 + // since we do FP16 operations as FP32 directly. + // FP16 <-> FP64 conversions are invalid. + if (srcType == DstFmt.F32 && dstType == DstFmt.F64) + { + srcB = context.FP32ConvertToFP64(srcB); + } + else if (srcType == DstFmt.F64 && dstType == DstFmt.F32) + { + srcB = context.FP64ConvertToFP32(srcB); + } + + srcB = context.FPSaturate(srcB, saturate, dstType.ToInstFPType()); + + WriteFP(context, dstType, srcB, rd); + + // TODO: CC. + } + + private static void EmitF2I( + EmitterContext context, + DstFmt srcType, + IDstFmt dstType, + RoundMode2 roundingMode, + Operand src, + int rd, + bool absolute, + bool negate) + { + if (dstType == IDstFmt.U64) + { + context.Config.GpuAccessor.Log("Unimplemented 64-bits F2I."); + } + + Instruction fpType = srcType.ToInstFPType(); + + bool isSignedInt = dstType == IDstFmt.S16 || dstType == IDstFmt.S32 || dstType == IDstFmt.S64; + bool isSmallInt = dstType == IDstFmt.U16 || dstType == IDstFmt.S16; + + Operand srcB = context.FPAbsNeg(src, absolute, negate, fpType); + + srcB = roundingMode switch + { + RoundMode2.Round => context.FPRound(srcB, fpType), + RoundMode2.Floor => context.FPFloor(srcB, fpType), + RoundMode2.Ceil => context.FPCeiling(srcB, fpType), + RoundMode2.Trunc => context.FPTruncate(srcB, fpType), + _ => srcB + }; + + if (!isSignedInt) + { + // Negative float to uint cast is undefined, so we clamp the value before conversion. + Operand c0 = srcType == DstFmt.F64 ? context.PackDouble2x32(0.0) : ConstF(0); + + srcB = context.FPMaximum(srcB, c0, fpType); + } + + if (srcType == DstFmt.F64) + { + srcB = isSignedInt + ? context.FP64ConvertToS32(srcB) + : context.FP64ConvertToU32(srcB); + } + else + { + srcB = isSignedInt + ? context.FP32ConvertToS32(srcB) + : context.FP32ConvertToU32(srcB); + } + + if (isSmallInt) + { + int min = (int)GetIntMin(dstType); + int max = (int)GetIntMax(dstType); + + srcB = isSignedInt + ? context.IClampS32(srcB, Const(min), Const(max)) + : context.IClampU32(srcB, Const(min), Const(max)); + } + + Operand dest = GetDest(rd); + + context.Copy(dest, srcB); + + // TODO: CC. + } + + private static void EmitI2F( + EmitterContext context, + ISrcFmt srcType, + DstFmt dstType, + Operand src, + ByteSel byteSelection, + int rd, + bool absolute, + bool negate) + { + bool isSignedInt = + srcType == ISrcFmt.S8 || + srcType == ISrcFmt.S16 || + srcType == ISrcFmt.S32 || + srcType == ISrcFmt.S64; + bool isSmallInt = + srcType == ISrcFmt.U16 || + srcType == ISrcFmt.S16 || + srcType == ISrcFmt.U8 || + srcType == ISrcFmt.S8; + + // TODO: Handle S/U64. + + Operand srcB = context.IAbsNeg(src, absolute, negate); + + if (isSmallInt) + { + int size = srcType == ISrcFmt.U16 || srcType == ISrcFmt.S16 ? 16 : 8; + + srcB = isSignedInt + ? context.BitfieldExtractS32(srcB, Const((int)byteSelection * 8), Const(size)) + : context.BitfieldExtractU32(srcB, Const((int)byteSelection * 8), Const(size)); + } + + if (dstType == DstFmt.F64) + { + srcB = isSignedInt + ? context.IConvertS32ToFP64(srcB) + : context.IConvertU32ToFP64(srcB); + } + else + { + srcB = isSignedInt + ? context.IConvertS32ToFP32(srcB) + : context.IConvertU32ToFP32(srcB); + } + + WriteFP(context, dstType, srcB, rd); + + // TODO: CC. + } + + private static void EmitI2I( + EmitterContext context, + ISrcDstFmt srcType, + ISrcDstFmt dstType, + Operand src, + ByteSel byteSelection, + int rd, + bool absolute, + bool negate, + bool saturate, + bool writeCC) + { + if ((srcType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32 || (dstType & ~ISrcDstFmt.S8) > ISrcDstFmt.U32) + { + context.Config.GpuAccessor.Log("Invalid I2I encoding."); + return; + } + + bool srcIsSignedInt = + srcType == ISrcDstFmt.S8 || + srcType == ISrcDstFmt.S16 || + srcType == ISrcDstFmt.S32; + bool dstIsSignedInt = + dstType == ISrcDstFmt.S8 || + dstType == ISrcDstFmt.S16 || + dstType == ISrcDstFmt.S32; + bool srcIsSmallInt = + srcType == ISrcDstFmt.U16 || + srcType == ISrcDstFmt.S16 || + srcType == ISrcDstFmt.U8 || + srcType == ISrcDstFmt.S8; + + if (srcIsSmallInt) + { + int size = srcType == ISrcDstFmt.U16 || srcType == ISrcDstFmt.S16 ? 16 : 8; + + src = srcIsSignedInt + ? context.BitfieldExtractS32(src, Const((int)byteSelection * 8), Const(size)) + : context.BitfieldExtractU32(src, Const((int)byteSelection * 8), Const(size)); + } + + src = context.IAbsNeg(src, absolute, negate); + + if (saturate) + { + int min = (int)GetIntMin(dstType); + int max = (int)GetIntMax(dstType); + + src = dstIsSignedInt + ? context.IClampS32(src, Const(min), Const(max)) + : context.IClampU32(src, Const(min), Const(max)); + } + + context.Copy(GetDest(rd), src); + + SetZnFlags(context, src, writeCC); + } + + private static Operand UnpackReg(EmitterContext context, DstFmt floatType, bool h, int reg) + { + if (floatType == DstFmt.F32) + { + return GetSrcReg(context, reg); + } + else if (floatType == DstFmt.F16) + { + return GetHalfUnpacked(context, GetSrcReg(context, reg), HalfSwizzle.F16)[h ? 1 : 0]; + } + else if (floatType == DstFmt.F64) + { + return GetSrcReg(context, reg, isFP64: true); + } + + throw new ArgumentException($"Invalid floating point type \"{floatType}\"."); + } + + private static Operand UnpackCbuf(EmitterContext context, DstFmt floatType, bool h, int cbufSlot, int cbufOffset) + { + if (floatType == DstFmt.F32) + { + return GetSrcCbuf(context, cbufSlot, cbufOffset); + } + else if (floatType == DstFmt.F16) + { + return GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), HalfSwizzle.F16)[h ? 1 : 0]; + } + else if (floatType == DstFmt.F64) + { + return GetSrcCbuf(context, cbufSlot, cbufOffset, isFP64: true); + } + + throw new ArgumentException($"Invalid floating point type \"{floatType}\"."); + } + + private static Operand UnpackImm(EmitterContext context, DstFmt floatType, bool h, int imm) + { + if (floatType == DstFmt.F32) + { + return GetSrcImm(context, imm); + } + else if (floatType == DstFmt.F16) + { + return GetHalfUnpacked(context, GetSrcImm(context, imm), HalfSwizzle.F16)[h ? 1 : 0]; + } + else if (floatType == DstFmt.F64) + { + return GetSrcImm(context, imm, isFP64: true); + } + + throw new ArgumentException($"Invalid floating point type \"{floatType}\"."); + } + + private static void WriteFP(EmitterContext context, DstFmt type, Operand srcB, int rd) + { + Operand dest = GetDest(rd); + + if (type == DstFmt.F32) + { + context.Copy(dest, srcB); + } + else if (type == DstFmt.F16) + { + context.Copy(dest, context.PackHalf2x16(srcB, ConstF(0))); + } + else /* if (type == FPType.FP64) */ + { + Operand dest2 = GetDest2(rd); + + context.Copy(dest, context.UnpackDouble2x32Low(srcB)); + context.Copy(dest2, context.UnpackDouble2x32High(srcB)); + } + } + + private static Instruction ToInstFPType(this DstFmt type) + { + return type == DstFmt.F64 ? Instruction.FP64 : Instruction.FP32; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs new file mode 100644 index 00000000..29803c31 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatArithmetic.cs @@ -0,0 +1,532 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void DaddR(EmitterContext context) + { + InstDaddR op = context.GetOp<InstDaddR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + + EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC); + } + + public static void DaddI(EmitterContext context) + { + InstDaddI op = context.GetOp<InstDaddI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + + EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC); + } + + public static void DaddC(EmitterContext context) + { + InstDaddC op = context.GetOp<InstDaddC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFadd(context, Instruction.FP64, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC); + } + + public static void DfmaR(EmitterContext context) + { + InstDfmaR op = context.GetOp<InstDfmaR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + var srcC = GetSrcReg(context, op.SrcC, isFP64: true); + + EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC); + } + + public static void DfmaI(EmitterContext context) + { + InstDfmaI op = context.GetOp<InstDfmaI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + var srcC = GetSrcReg(context, op.SrcC, isFP64: true); + + EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC); + } + + public static void DfmaC(EmitterContext context) + { + InstDfmaC op = context.GetOp<InstDfmaC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + var srcC = GetSrcReg(context, op.SrcC, isFP64: true); + + EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC); + } + + public static void DfmaRc(EmitterContext context) + { + InstDfmaRc op = context.GetOp<InstDfmaRc>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcC, isFP64: true); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFfma(context, Instruction.FP64, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, false, op.WriteCC); + } + + public static void DmulR(EmitterContext context) + { + InstDmulR op = context.GetOp<InstDmulR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + + EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC); + } + + public static void DmulI(EmitterContext context) + { + InstDmulI op = context.GetOp<InstDmulI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + + EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC); + } + + public static void DmulC(EmitterContext context) + { + InstDmulC op = context.GetOp<InstDmulC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFmul(context, Instruction.FP64, MultiplyScale.NoScale, srcA, srcB, op.Dest, op.NegA, false, op.WriteCC); + } + + public static void FaddR(EmitterContext context) + { + InstFaddR op = context.GetOp<InstFaddR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC); + } + + public static void FaddI(EmitterContext context) + { + InstFaddI op = context.GetOp<InstFaddI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + + EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC); + } + + public static void FaddC(EmitterContext context) + { + InstFaddC op = context.GetOp<InstFaddC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, op.Sat, op.WriteCC); + } + + public static void Fadd32i(EmitterContext context) + { + InstFadd32i op = context.GetOp<InstFadd32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitFadd(context, Instruction.FP32, srcA, srcB, op.Dest, op.NegA, op.NegB, op.AbsA, op.AbsB, false, op.WriteCC); + } + + public static void FfmaR(EmitterContext context) + { + InstFfmaR op = context.GetOp<InstFfmaR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void FfmaI(EmitterContext context) + { + InstFfmaI op = context.GetOp<InstFfmaI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void FfmaC(EmitterContext context) + { + InstFfmaC op = context.GetOp<InstFfmaC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void FfmaRc(EmitterContext context) + { + InstFfmaRc op = context.GetOp<InstFfmaRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void Ffma32i(EmitterContext context) + { + InstFfma32i op = context.GetOp<InstFfma32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + var srcC = GetSrcReg(context, op.Dest); + + EmitFfma(context, Instruction.FP32, srcA, srcB, srcC, op.Dest, op.NegA, op.NegC, op.Sat, op.WriteCC); + } + + public static void FmulR(EmitterContext context) + { + InstFmulR op = context.GetOp<InstFmulR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC); + } + + public static void FmulI(EmitterContext context) + { + InstFmulI op = context.GetOp<InstFmulI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + + EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC); + } + + public static void FmulC(EmitterContext context) + { + InstFmulC op = context.GetOp<InstFmulC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFmul(context, Instruction.FP32, op.Scale, srcA, srcB, op.Dest, op.NegA, op.Sat, op.WriteCC); + } + + public static void Fmul32i(EmitterContext context) + { + InstFmul32i op = context.GetOp<InstFmul32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitFmul(context, Instruction.FP32, MultiplyScale.NoScale, srcA, srcB, op.Dest, false, op.Sat, op.WriteCC); + } + + public static void Hadd2R(EmitterContext context) + { + InstHadd2R op = context.GetOp<InstHadd2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat); + } + + public static void Hadd2I(EmitterContext context) + { + InstHadd2I op = context.GetOp<InstHadd2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat); + } + + public static void Hadd2C(EmitterContext context) + { + InstHadd2C op = context.GetOp<InstHadd2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: true, op.Dest, op.Sat); + } + + public static void Hadd232i(EmitterContext context) + { + InstHadd232i op = context.GetOp<InstHadd232i>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, false); + var srcB = GetHalfSrc(context, op.Imm); + + EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: true, op.Dest, op.Sat); + } + + public static void Hfma2R(EmitterContext context) + { + InstHfma2R op = context.GetOp<InstHfma2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, false); + var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false); + + EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); + } + + public static void Hfma2I(EmitterContext context) + { + InstHfma2I op = context.GetOp<InstHfma2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false); + + EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); + } + + public static void Hfma2C(EmitterContext context) + { + InstHfma2C op = context.GetOp<InstHfma2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, false); + var srcC = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegC, false); + + EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); + } + + public static void Hfma2Rc(EmitterContext context) + { + InstHfma2Rc op = context.GetOp<InstHfma2Rc>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.CSwizzle, op.SrcC, op.NegA, false); + var srcC = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegC, false); + + EmitHfma2(context, op.OFmt, srcA, srcB, srcC, op.Dest, op.Sat); + } + + public static void Hfma232i(EmitterContext context) + { + InstHfma232i op = context.GetOp<InstHfma232i>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.Imm); + var srcC = GetHalfSrc(context, HalfSwizzle.F16, op.Dest, op.NegC, false); + + EmitHfma2(context, OFmt.F16, srcA, srcB, srcC, op.Dest, saturate: false); + } + + public static void Hmul2R(EmitterContext context) + { + InstHmul2R op = context.GetOp<InstHmul2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegA, op.AbsB); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat); + } + + public static void Hmul2I(EmitterContext context) + { + InstHmul2I op = context.GetOp<InstHmul2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat); + } + + public static void Hmul2C(EmitterContext context) + { + InstHmul2C op = context.GetOp<InstHmul2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, op.AbsA); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegA, op.AbsB); + + EmitHadd2Hmul2(context, op.OFmt, srcA, srcB, isAdd: false, op.Dest, op.Sat); + } + + public static void Hmul232i(EmitterContext context) + { + InstHmul232i op = context.GetOp<InstHmul232i>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, false, false); + var srcB = GetHalfSrc(context, op.Imm32); + + EmitHadd2Hmul2(context, OFmt.F16, srcA, srcB, isAdd: false, op.Dest, op.Sat); + } + + private static void EmitFadd( + EmitterContext context, + Instruction fpType, + Operand srcA, + Operand srcB, + int rd, + bool negateA, + bool negateB, + bool absoluteA, + bool absoluteB, + bool saturate, + bool writeCC) + { + bool isFP64 = fpType == Instruction.FP64; + + srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType); + srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType); + + Operand res = context.FPSaturate(context.FPAdd(srcA, srcB, fpType), saturate, fpType); + + SetDest(context, res, rd, isFP64); + + SetFPZnFlags(context, res, writeCC, fpType); + } + + private static void EmitFfma( + EmitterContext context, + Instruction fpType, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool negateB, + bool negateC, + bool saturate, + bool writeCC) + { + bool isFP64 = fpType == Instruction.FP64; + + srcB = context.FPNegate(srcB, negateB, fpType); + srcC = context.FPNegate(srcC, negateC, fpType); + + Operand res = context.FPSaturate(context.FPFusedMultiplyAdd(srcA, srcB, srcC, fpType), saturate, fpType); + + SetDest(context, res, rd, isFP64); + + SetFPZnFlags(context, res, writeCC, fpType); + } + + private static void EmitFmul( + EmitterContext context, + Instruction fpType, + MultiplyScale scale, + Operand srcA, + Operand srcB, + int rd, + bool negateB, + bool saturate, + bool writeCC) + { + bool isFP64 = fpType == Instruction.FP64; + + srcB = context.FPNegate(srcB, negateB, fpType); + + if (scale != MultiplyScale.NoScale) + { + Operand scaleConst = scale switch + { + MultiplyScale.D2 => ConstF(0.5f), + MultiplyScale.D4 => ConstF(0.25f), + MultiplyScale.D8 => ConstF(0.125f), + MultiplyScale.M2 => ConstF(2f), + MultiplyScale.M4 => ConstF(4f), + MultiplyScale.M8 => ConstF(8f), + _ => ConstF(1f) // Invalid, behave as if it had no scale. + }; + + if (scaleConst.AsFloat() == 1f) + { + context.Config.GpuAccessor.Log($"Invalid FP multiply scale \"{scale}\"."); + } + + if (isFP64) + { + scaleConst = context.FP32ConvertToFP64(scaleConst); + } + + srcA = context.FPMultiply(srcA, scaleConst, fpType); + } + + Operand res = context.FPSaturate(context.FPMultiply(srcA, srcB, fpType), saturate, fpType); + + SetDest(context, res, rd, isFP64); + + SetFPZnFlags(context, res, writeCC, fpType); + } + + private static void EmitHadd2Hmul2( + EmitterContext context, + OFmt swizzle, + Operand[] srcA, + Operand[] srcB, + bool isAdd, + int rd, + bool saturate) + { + Operand[] res = new Operand[2]; + + for (int index = 0; index < res.Length; index++) + { + if (isAdd) + { + res[index] = context.FPAdd(srcA[index], srcB[index]); + } + else + { + res[index] = context.FPMultiply(srcA[index], srcB[index]); + } + + res[index] = context.FPSaturate(res[index], saturate); + } + + context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd)); + } + + public static void EmitHfma2( + EmitterContext context, + OFmt swizzle, + Operand[] srcA, + Operand[] srcB, + Operand[] srcC, + int rd, + bool saturate) + { + Operand[] res = new Operand[2]; + + for (int index = 0; index < res.Length; index++) + { + res[index] = context.FPFusedMultiplyAdd(srcA[index], srcB[index], srcC[index]); + res[index] = context.FPSaturate(res[index], saturate); + } + + context.Copy(GetDest(rd), GetHalfPacked(context, swizzle, res, rd)); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs new file mode 100644 index 00000000..8f99ddb3 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatComparison.cs @@ -0,0 +1,575 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void DsetR(EmitterContext context) + { + InstDsetR op = context.GetOp<InstDsetR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + + EmitFset( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.Dest, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.BVal, + op.WriteCC, + isFP64: true); + } + + public static void DsetI(EmitterContext context) + { + InstDsetI op = context.GetOp<InstDsetI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + + EmitFset( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.Dest, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.BVal, + op.WriteCC, + isFP64: true); + } + + public static void DsetC(EmitterContext context) + { + InstDsetC op = context.GetOp<InstDsetC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFset( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.Dest, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.BVal, + op.WriteCC, + isFP64: true); + } + + public static void DsetpR(EmitterContext context) + { + InstDsetpR op = context.GetOp<InstDsetpR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + writeCC: false, + isFP64: true); + } + + public static void DsetpI(EmitterContext context) + { + InstDsetpI op = context.GetOp<InstDsetpI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + writeCC: false, + isFP64: true); + } + + public static void DsetpC(EmitterContext context) + { + InstDsetpC op = context.GetOp<InstDsetpC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + writeCC: false, + isFP64: true); + } + + public static void FcmpR(EmitterContext context) + { + InstFcmpR op = context.GetOp<InstFcmpR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest); + } + + public static void FcmpI(EmitterContext context) + { + InstFcmpI op = context.GetOp<InstFcmpI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest); + } + + public static void FcmpC(EmitterContext context) + { + InstFcmpC op = context.GetOp<InstFcmpC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest); + } + + public static void FcmpRc(EmitterContext context) + { + InstFcmpRc op = context.GetOp<InstFcmpRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFcmp(context, op.FComp, srcA, srcB, srcC, op.Dest); + } + + public static void FsetR(EmitterContext context) + { + InstFsetR op = context.GetOp<InstFsetR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC); + } + + public static void FsetC(EmitterContext context) + { + InstFsetC op = context.GetOp<InstFsetC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC); + } + + public static void FsetI(EmitterContext context) + { + InstFsetI op = context.GetOp<InstFsetI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + + EmitFset(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.BVal, op.WriteCC); + } + + public static void FsetpR(EmitterContext context) + { + InstFsetpR op = context.GetOp<InstFsetpR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.WriteCC); + } + + public static void FsetpI(EmitterContext context) + { + InstFsetpI op = context.GetOp<InstFsetpI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.WriteCC); + } + + public static void FsetpC(EmitterContext context) + { + InstFsetpC op = context.GetOp<InstFsetpC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitFsetp( + context, + op.FComp, + op.Bop, + srcA, + srcB, + op.SrcPred, + op.SrcPredInv, + op.DestPred, + op.DestPredInv, + op.AbsA, + op.AbsB, + op.NegA, + op.NegB, + op.WriteCC); + } + + public static void Hset2R(EmitterContext context) + { + InstHset2R op = context.GetOp<InstHset2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB); + + EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval); + } + + public static void Hset2I(EmitterContext context) + { + InstHset2I op = context.GetOp<InstHset2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + + EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval); + } + + public static void Hset2C(EmitterContext context) + { + InstHset2C op = context.GetOp<InstHset2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, false); + + EmitHset2(context, op.Cmp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.Bval); + } + + public static void Hsetp2R(EmitterContext context) + { + InstHsetp2R op = context.GetOp<InstHsetp2R>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BSwizzle, op.SrcB, op.NegB, op.AbsB); + + EmitHsetp2(context, op.FComp2, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd); + } + + public static void Hsetp2I(EmitterContext context) + { + InstHsetp2I op = context.GetOp<InstHsetp2I>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, op.BimmH0, op.BimmH1); + + EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd); + } + + public static void Hsetp2C(EmitterContext context) + { + InstHsetp2C op = context.GetOp<InstHsetp2C>(); + + var srcA = GetHalfSrc(context, op.ASwizzle, op.SrcA, op.NegA, op.AbsA); + var srcB = GetHalfSrc(context, HalfSwizzle.F32, op.CbufSlot, op.CbufOffset, op.NegB, op.AbsB); + + EmitHsetp2(context, op.FComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.HAnd); + } + + private static void EmitFcmp(EmitterContext context, FComp cmpOp, Operand srcA, Operand srcB, Operand srcC, int rd) + { + Operand cmpRes = GetFPComparison(context, cmpOp, srcC, ConstF(0)); + + Operand res = context.ConditionalSelect(cmpRes, srcA, srcB); + + context.Copy(GetDest(rd), res); + } + + private static void EmitFset( + EmitterContext context, + FComp cmpOp, + BoolOp logicOp, + Operand srcA, + Operand srcB, + int srcPred, + bool srcPredInv, + int rd, + bool absoluteA, + bool absoluteB, + bool negateA, + bool negateB, + bool boolFloat, + bool writeCC, + bool isFP64 = false) + { + Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32; + + srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType); + srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType); + + Operand res = GetFPComparison(context, cmpOp, srcA, srcB, fpType); + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + res = GetPredLogicalOp(context, logicOp, res, pred); + + Operand dest = GetDest(rd); + + if (boolFloat) + { + res = context.ConditionalSelect(res, ConstF(1), Const(0)); + + context.Copy(dest, res); + + SetFPZnFlags(context, res, writeCC); + } + else + { + context.Copy(dest, res); + + SetZnFlags(context, res, writeCC, extended: false); + } + } + + private static void EmitFsetp( + EmitterContext context, + FComp cmpOp, + BoolOp logicOp, + Operand srcA, + Operand srcB, + int srcPred, + bool srcPredInv, + int destPred, + int destPredInv, + bool absoluteA, + bool absoluteB, + bool negateA, + bool negateB, + bool writeCC, + bool isFP64 = false) + { + Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32; + + srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType); + srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType); + + Operand p0Res = GetFPComparison(context, cmpOp, srcA, srcB, fpType); + Operand p1Res = context.BitwiseNot(p0Res); + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred); + p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred); + + context.Copy(Register(destPred, RegisterType.Predicate), p0Res); + context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res); + } + + private static void EmitHset2( + EmitterContext context, + FComp cmpOp, + BoolOp logicOp, + Operand[] srcA, + Operand[] srcB, + int srcPred, + bool srcPredInv, + int rd, + bool boolFloat) + { + Operand[] res = new Operand[2]; + + res[0] = GetFPComparison(context, cmpOp, srcA[0], srcB[0]); + res[1] = GetFPComparison(context, cmpOp, srcA[1], srcB[1]); + + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + res[0] = GetPredLogicalOp(context, logicOp, res[0], pred); + res[1] = GetPredLogicalOp(context, logicOp, res[1], pred); + + if (boolFloat) + { + res[0] = context.ConditionalSelect(res[0], ConstF(1), Const(0)); + res[1] = context.ConditionalSelect(res[1], ConstF(1), Const(0)); + + context.Copy(GetDest(rd), context.PackHalf2x16(res[0], res[1])); + } + else + { + Operand low = context.BitwiseAnd(res[0], Const(0xffff)); + Operand high = context.ShiftLeft (res[1], Const(16)); + + Operand packed = context.BitwiseOr(low, high); + + context.Copy(GetDest(rd), packed); + } + } + + private static void EmitHsetp2( + EmitterContext context, + FComp cmpOp, + BoolOp logicOp, + Operand[] srcA, + Operand[] srcB, + int srcPred, + bool srcPredInv, + int destPred, + int destPredInv, + bool hAnd) + { + Operand p0Res = GetFPComparison(context, cmpOp, srcA[0], srcB[0]); + Operand p1Res = GetFPComparison(context, cmpOp, srcA[1], srcB[1]); + + if (hAnd) + { + p0Res = context.BitwiseAnd(p0Res, p1Res); + p1Res = context.BitwiseNot(p0Res); + } + + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred); + p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred); + + context.Copy(Register(destPred, RegisterType.Predicate), p0Res); + context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res); + } + + private static Operand GetFPComparison(EmitterContext context, FComp cond, Operand srcA, Operand srcB, Instruction fpType = Instruction.FP32) + { + Operand res; + + if (cond == FComp.T) + { + res = Const(IrConsts.True); + } + else if (cond == FComp.F) + { + res = Const(IrConsts.False); + } + else if (cond == FComp.Nan || cond == FComp.Num) + { + res = context.BitwiseOr(context.IsNan(srcA, fpType), context.IsNan(srcB, fpType)); + + if (cond == FComp.Num) + { + res = context.BitwiseNot(res); + } + } + else + { + Instruction inst; + + switch (cond & ~FComp.Nan) + { + case FComp.Lt: inst = Instruction.CompareLess; break; + case FComp.Eq: inst = Instruction.CompareEqual; break; + case FComp.Le: inst = Instruction.CompareLessOrEqual; break; + case FComp.Gt: inst = Instruction.CompareGreater; break; + case FComp.Ne: inst = Instruction.CompareNotEqual; break; + case FComp.Ge: inst = Instruction.CompareGreaterOrEqual; break; + + default: throw new ArgumentException($"Unexpected condition \"{cond}\"."); + } + + res = context.Add(inst | fpType, Local(), srcA, srcB); + + if ((cond & FComp.Nan) != 0) + { + res = context.BitwiseOr(res, context.IsNan(srcA, fpType)); + res = context.BitwiseOr(res, context.IsNan(srcB, fpType)); + } + } + + return res; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs new file mode 100644 index 00000000..412a5305 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFloatMinMax.cs @@ -0,0 +1,106 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void DmnmxR(EmitterContext context) + { + InstDmnmxR op = context.GetOp<InstDmnmxR>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcReg(context, op.SrcB, isFP64: true); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true); + } + + public static void DmnmxI(EmitterContext context) + { + InstDmnmxI op = context.GetOp<InstDmnmxI>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20), isFP64: true); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true); + } + + public static void DmnmxC(EmitterContext context) + { + InstDmnmxC op = context.GetOp<InstDmnmxC>(); + + var srcA = GetSrcReg(context, op.SrcA, isFP64: true); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset, isFP64: true); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC, isFP64: true); + } + + public static void FmnmxR(EmitterContext context) + { + InstFmnmxR op = context.GetOp<InstFmnmxR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC); + } + + public static void FmnmxI(EmitterContext context) + { + InstFmnmxI op = context.GetOp<InstFmnmxI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToFloat(op.Imm20)); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC); + } + + public static void FmnmxC(EmitterContext context) + { + InstFmnmxC op = context.GetOp<InstFmnmxC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitFmnmx(context, srcA, srcB, srcPred, op.Dest, op.AbsA, op.AbsB, op.NegA, op.NegB, op.WriteCC); + } + + private static void EmitFmnmx( + EmitterContext context, + Operand srcA, + Operand srcB, + Operand srcPred, + int rd, + bool absoluteA, + bool absoluteB, + bool negateA, + bool negateB, + bool writeCC, + bool isFP64 = false) + { + Instruction fpType = isFP64 ? Instruction.FP64 : Instruction.FP32; + + srcA = context.FPAbsNeg(srcA, absoluteA, negateA, fpType); + srcB = context.FPAbsNeg(srcB, absoluteB, negateB, fpType); + + Operand resMin = context.FPMinimum(srcA, srcB, fpType); + Operand resMax = context.FPMaximum(srcA, srcB, fpType); + + Operand res = context.ConditionalSelect(srcPred, resMin, resMax); + + SetDest(context, res, rd, isFP64); + + SetFPZnFlags(context, res, writeCC, fpType); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs new file mode 100644 index 00000000..91c23230 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitFlowControl.cs @@ -0,0 +1,322 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Collections.Generic; +using System.Linq; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Bra(EmitterContext context) + { + InstBra op = context.GetOp<InstBra>(); + + EmitBranch(context, context.CurrBlock.Successors[^1].Address); + } + + public static void Brk(EmitterContext context) + { + InstBrk op = context.GetOp<InstBrk>(); + + EmitBrkContSync(context); + } + + public static void Brx(EmitterContext context) + { + InstBrx op = context.GetOp<InstBrx>(); + InstOp currOp = context.CurrOp; + int startIndex = context.CurrBlock.HasNext() ? 1 : 0; + + if (context.CurrBlock.Successors.Count <= startIndex) + { + context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{currOp.Address:X}."); + return; + } + + int offset = (int)currOp.GetAbsoluteAddress(); + + Operand address = context.IAdd(Register(op.SrcA, RegisterType.Gpr), Const(offset)); + + var targets = context.CurrBlock.Successors.Skip(startIndex); + + bool allTargetsSinglePred = true; + int total = context.CurrBlock.Successors.Count - startIndex; + int count = 0; + + foreach (var target in targets.OrderBy(x => x.Address)) + { + if (++count < total && (target.Predecessors.Count > 1 || target.Address <= context.CurrBlock.Address)) + { + allTargetsSinglePred = false; + break; + } + } + + if (allTargetsSinglePred) + { + // Chain blocks, each target block will check if the BRX target address + // matches its own address, if not, it jumps to the next target which will do the same check, + // until it reaches the last possible target, which executed unconditionally. + // We can only do this if the BRX block is the only predecessor of all target blocks. + // Additionally, this is not supported for blocks located before the current block, + // since it will be too late to insert a label, but this is something that can be improved + // in the future if necessary. + + var sortedTargets = targets.OrderBy(x => x.Address); + + Block currentTarget = null; + ulong firstTargetAddress = 0; + + foreach (Block nextTarget in sortedTargets) + { + if (currentTarget != null) + { + if (currentTarget.Address != nextTarget.Address) + { + context.SetBrxTarget(currentTarget.Address, address, (int)currentTarget.Address, nextTarget.Address); + } + } + else + { + firstTargetAddress = nextTarget.Address; + } + + currentTarget = nextTarget; + } + + context.Branch(context.GetLabel(firstTargetAddress)); + } + else + { + // Emit the branches sequentially. + // This generates slightly worse code, but should work for all cases. + + var sortedTargets = targets.OrderByDescending(x => x.Address); + ulong lastTargetAddress = ulong.MaxValue; + + count = 0; + + foreach (Block target in sortedTargets) + { + Operand label = context.GetLabel(target.Address); + + if (++count < total) + { + if (target.Address != lastTargetAddress) + { + context.BranchIfTrue(label, context.ICompareEqual(address, Const((int)target.Address))); + } + + lastTargetAddress = target.Address; + } + else + { + context.Branch(label); + } + } + } + } + + public static void Cal(EmitterContext context) + { + InstCal op = context.GetOp<InstCal>(); + + DecodedFunction function = context.Program.GetFunctionByAddress(context.CurrOp.GetAbsoluteAddress()); + + if (function.IsCompilerGenerated) + { + switch (function.Type) + { + case FunctionType.BuiltInFSIBegin: + context.FSIBegin(); + break; + case FunctionType.BuiltInFSIEnd: + context.FSIEnd(); + break; + } + } + else + { + context.Call(function.Id, false); + } + } + + public static void Cont(EmitterContext context) + { + InstCont op = context.GetOp<InstCont>(); + + EmitBrkContSync(context); + } + + public static void Exit(EmitterContext context) + { + InstExit op = context.GetOp<InstExit>(); + + if (context.IsNonMain) + { + context.Config.GpuAccessor.Log("Invalid exit on non-main function."); + return; + } + + if (op.Ccc == Ccc.T) + { + context.Return(); + } + else + { + Operand cond = GetCondition(context, op.Ccc, IrConsts.False); + + // If the condition is always false, we don't need to do anything. + if (cond.Type != OperandType.Constant || cond.Value != IrConsts.False) + { + Operand lblSkip = Label(); + context.BranchIfFalse(lblSkip, cond); + context.Return(); + context.MarkLabel(lblSkip); + } + } + } + + public static void Kil(EmitterContext context) + { + InstKil op = context.GetOp<InstKil>(); + + context.Discard(); + } + + public static void Pbk(EmitterContext context) + { + InstPbk op = context.GetOp<InstPbk>(); + + EmitPbkPcntSsy(context); + } + + public static void Pcnt(EmitterContext context) + { + InstPcnt op = context.GetOp<InstPcnt>(); + + EmitPbkPcntSsy(context); + } + + public static void Ret(EmitterContext context) + { + InstRet op = context.GetOp<InstRet>(); + + if (context.IsNonMain) + { + context.Return(); + } + else + { + context.Config.GpuAccessor.Log("Invalid return on main function."); + } + } + + public static void Ssy(EmitterContext context) + { + InstSsy op = context.GetOp<InstSsy>(); + + EmitPbkPcntSsy(context); + } + + public static void Sync(EmitterContext context) + { + InstSync op = context.GetOp<InstSync>(); + + EmitBrkContSync(context); + } + + private static void EmitPbkPcntSsy(EmitterContext context) + { + var consumers = context.CurrBlock.PushOpCodes.First(x => x.Op.Address == context.CurrOp.Address).Consumers; + + foreach (KeyValuePair<Block, Operand> kv in consumers) + { + Block consumerBlock = kv.Key; + Operand local = kv.Value; + + int id = consumerBlock.SyncTargets[context.CurrOp.Address].PushOpId; + + context.Copy(local, Const(id)); + } + } + + private static void EmitBrkContSync(EmitterContext context) + { + var targets = context.CurrBlock.SyncTargets; + + if (targets.Count == 1) + { + // If we have only one target, then the SSY/PBK is basically + // a branch, we can produce better codegen for this case. + EmitBranch(context, targets.Values.First().PushOpInfo.Op.GetAbsoluteAddress()); + } + else + { + // TODO: Support CC here as well (condition). + foreach (SyncTarget target in targets.Values) + { + PushOpInfo pushOpInfo = target.PushOpInfo; + + Operand label = context.GetLabel(pushOpInfo.Op.GetAbsoluteAddress()); + Operand local = pushOpInfo.Consumers[context.CurrBlock]; + + context.BranchIfTrue(label, context.ICompareEqual(local, Const(target.PushOpId))); + } + } + } + + private static void EmitBranch(EmitterContext context, ulong address) + { + InstOp op = context.CurrOp; + InstConditional opCond = new InstConditional(op.RawOpCode); + + // If we're branching to the next instruction, then the branch + // is useless and we can ignore it. + if (address == op.Address + 8) + { + return; + } + + Operand label = context.GetLabel(address); + + Operand pred = Register(opCond.Pred, RegisterType.Predicate); + + if (opCond.Ccc != Ccc.T) + { + Operand cond = GetCondition(context, opCond.Ccc); + + if (opCond.Pred == RegisterConsts.PredicateTrueIndex) + { + pred = cond; + } + else if (opCond.PredInv) + { + pred = context.BitwiseAnd(context.BitwiseNot(pred), cond); + } + else + { + pred = context.BitwiseAnd(pred, cond); + } + + context.BranchIfTrue(label, pred); + } + else if (opCond.Pred == RegisterConsts.PredicateTrueIndex) + { + context.Branch(label); + } + else if (opCond.PredInv) + { + context.BranchIfFalse(label, pred); + } + else + { + context.BranchIfTrue(label, pred); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs new file mode 100644 index 00000000..0ba4667e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitHelper.cs @@ -0,0 +1,266 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Runtime.CompilerServices; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static class InstEmitHelper + { + public static Operand GetZF() + { + return Register(0, RegisterType.Flag); + } + + public static Operand GetNF() + { + return Register(1, RegisterType.Flag); + } + + public static Operand GetCF() + { + return Register(2, RegisterType.Flag); + } + + public static Operand GetVF() + { + return Register(3, RegisterType.Flag); + } + + public static Operand GetDest(int rd) + { + return Register(rd, RegisterType.Gpr); + } + + public static Operand GetDest2(int rd) + { + return Register(rd | 1, RegisterType.Gpr); + } + + public static Operand GetSrcCbuf(EmitterContext context, int cbufSlot, int cbufOffset, bool isFP64 = false) + { + if (isFP64) + { + return context.PackDouble2x32( + Cbuf(cbufSlot, cbufOffset), + Cbuf(cbufSlot, cbufOffset + 1)); + } + else + { + return Cbuf(cbufSlot, cbufOffset); + } + } + + public static Operand GetSrcImm(EmitterContext context, int imm, bool isFP64 = false) + { + if (isFP64) + { + return context.PackDouble2x32(Const(0), Const(imm)); + } + else + { + return Const(imm); + } + } + + public static Operand GetSrcReg(EmitterContext context, int reg, bool isFP64 = false) + { + if (isFP64) + { + return context.PackDouble2x32(Register(reg, RegisterType.Gpr), Register(reg | 1, RegisterType.Gpr)); + } + else + { + return Register(reg, RegisterType.Gpr); + } + } + + public static Operand[] GetHalfSrc( + EmitterContext context, + HalfSwizzle swizzle, + int ra, + bool negate, + bool absolute) + { + Operand[] operands = GetHalfUnpacked(context, GetSrcReg(context, ra), swizzle); + + return FPAbsNeg(context, operands, absolute, negate); + } + + public static Operand[] GetHalfSrc( + EmitterContext context, + HalfSwizzle swizzle, + int cbufSlot, + int cbufOffset, + bool negate, + bool absolute) + { + Operand[] operands = GetHalfUnpacked(context, GetSrcCbuf(context, cbufSlot, cbufOffset), swizzle); + + return FPAbsNeg(context, operands, absolute, negate); + } + + public static Operand[] GetHalfSrc(EmitterContext context, int immH0, int immH1) + { + ushort low = (ushort)(immH0 << 6); + ushort high = (ushort)(immH1 << 6); + + return new Operand[] + { + ConstF((float)Unsafe.As<ushort, Half>(ref low)), + ConstF((float)Unsafe.As<ushort, Half>(ref high)) + }; + } + + public static Operand[] GetHalfSrc(EmitterContext context, int imm32) + { + ushort low = (ushort)imm32; + ushort high = (ushort)(imm32 >> 16); + + return new Operand[] + { + ConstF((float)Unsafe.As<ushort, Half>(ref low)), + ConstF((float)Unsafe.As<ushort, Half>(ref high)) + }; + } + + public static Operand[] FPAbsNeg(EmitterContext context, Operand[] operands, bool abs, bool neg) + { + for (int index = 0; index < operands.Length; index++) + { + operands[index] = context.FPAbsNeg(operands[index], abs, neg); + } + + return operands; + } + + public static Operand[] GetHalfUnpacked(EmitterContext context, Operand src, HalfSwizzle swizzle) + { + switch (swizzle) + { + case HalfSwizzle.F16: + return new Operand[] + { + context.UnpackHalf2x16Low (src), + context.UnpackHalf2x16High(src) + }; + + case HalfSwizzle.F32: return new Operand[] { src, src }; + + case HalfSwizzle.H0H0: + return new Operand[] + { + context.UnpackHalf2x16Low(src), + context.UnpackHalf2x16Low(src) + }; + + case HalfSwizzle.H1H1: + return new Operand[] + { + context.UnpackHalf2x16High(src), + context.UnpackHalf2x16High(src) + }; + } + + throw new ArgumentException($"Invalid swizzle \"{swizzle}\"."); + } + + public static Operand GetHalfPacked(EmitterContext context, OFmt swizzle, Operand[] results, int rd) + { + switch (swizzle) + { + case OFmt.F16: return context.PackHalf2x16(results[0], results[1]); + + case OFmt.F32: return results[0]; + + case OFmt.MrgH0: + { + Operand h1 = GetHalfDest(context, rd, isHigh: true); + + return context.PackHalf2x16(results[0], h1); + } + + case OFmt.MrgH1: + { + Operand h0 = GetHalfDest(context, rd, isHigh: false); + + return context.PackHalf2x16(h0, results[1]); + } + } + + throw new ArgumentException($"Invalid swizzle \"{swizzle}\"."); + } + + public static Operand GetHalfDest(EmitterContext context, int rd, bool isHigh) + { + if (isHigh) + { + return context.UnpackHalf2x16High(GetDest(rd)); + } + else + { + return context.UnpackHalf2x16Low(GetDest(rd)); + } + } + + public static Operand GetPredicate(EmitterContext context, int pred, bool not) + { + Operand local = Register(pred, RegisterType.Predicate); + + if (not) + { + local = context.BitwiseNot(local); + } + + return local; + } + + public static void SetDest(EmitterContext context, Operand value, int rd, bool isFP64) + { + if (isFP64) + { + context.Copy(GetDest(rd), context.UnpackDouble2x32Low(value)); + context.Copy(GetDest2(rd), context.UnpackDouble2x32High(value)); + } + else + { + context.Copy(GetDest(rd), value); + } + } + + public static int Imm16ToSInt(int imm16) + { + return (short)imm16; + } + + public static int Imm20ToFloat(int imm20) + { + return imm20 << 12; + } + + public static int Imm20ToSInt(int imm20) + { + return (imm20 << 12) >> 12; + } + + public static int Imm24ToSInt(int imm24) + { + return (imm24 << 8) >> 8; + } + + public static Operand SignExtendTo32(EmitterContext context, Operand src, int srcBits) + { + return context.BitfieldExtractS32(src, Const(0), Const(srcBits)); + } + + public static Operand ZeroExtendTo32(EmitterContext context, Operand src, int srcBits) + { + int mask = (int)(uint.MaxValue >> (32 - srcBits)); + + return context.BitwiseAnd(src, Const(mask)); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs new file mode 100644 index 00000000..374e3d61 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerArithmetic.cs @@ -0,0 +1,699 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void IaddR(EmitterContext context) + { + InstIaddR op = context.GetOp<InstIaddR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC); + } + + public static void IaddI(EmitterContext context) + { + InstIaddI op = context.GetOp<InstIaddI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC); + } + + public static void IaddC(EmitterContext context) + { + InstIaddC op = context.GetOp<InstIaddC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC); + } + + public static void Iadd32i(EmitterContext context) + { + InstIadd32i op = context.GetOp<InstIadd32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitIadd(context, srcA, srcB, op.Dest, op.AvgMode, op.X, op.WriteCC); + } + + public static void Iadd3R(EmitterContext context) + { + InstIadd3R op = context.GetOp<InstIadd3R>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIadd3(context, op.Lrs, srcA, srcB, srcC, op.Apart, op.Bpart, op.Cpart, op.Dest, op.NegA, op.NegB, op.NegC); + } + + public static void Iadd3I(EmitterContext context) + { + InstIadd3I op = context.GetOp<InstIadd3I>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC); + } + + public static void Iadd3C(EmitterContext context) + { + InstIadd3C op = context.GetOp<InstIadd3C>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIadd3(context, Lrs.None, srcA, srcB, srcC, HalfSelect.B32, HalfSelect.B32, HalfSelect.B32, op.Dest, op.NegA, op.NegB, op.NegC); + } + + public static void ImadR(EmitterContext context) + { + InstImadR op = context.GetOp<InstImadR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImadI(EmitterContext context) + { + InstImadI op = context.GetOp<InstImadI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImadC(EmitterContext context) + { + InstImadC op = context.GetOp<InstImadC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImadRc(EmitterContext context) + { + InstImadRc op = context.GetOp<InstImadRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void Imad32i(EmitterContext context) + { + InstImad32i op = context.GetOp<InstImad32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + var srcC = GetSrcReg(context, op.Dest); + + EmitImad(context, srcA, srcB, srcC, op.Dest, op.AvgMode, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImulR(EmitterContext context) + { + InstImulR op = context.GetOp<InstImulR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImulI(EmitterContext context) + { + InstImulI op = context.GetOp<InstImulI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void ImulC(EmitterContext context) + { + InstImulC op = context.GetOp<InstImulC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void Imul32i(EmitterContext context) + { + InstImul32i op = context.GetOp<InstImul32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitImad(context, srcA, srcB, Const(0), op.Dest, AvgMode.NoNeg, op.ASigned, op.BSigned, op.Hilo); + } + + public static void IscaddR(EmitterContext context) + { + InstIscaddR op = context.GetOp<InstIscaddR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC); + } + + public static void IscaddI(EmitterContext context) + { + InstIscaddI op = context.GetOp<InstIscaddI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC); + } + + public static void IscaddC(EmitterContext context) + { + InstIscaddC op = context.GetOp<InstIscaddC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, op.AvgMode, op.WriteCC); + } + + public static void Iscadd32i(EmitterContext context) + { + InstIscadd32i op = context.GetOp<InstIscadd32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitIscadd(context, srcA, srcB, op.Dest, op.Imm5, AvgMode.NoNeg, op.WriteCC); + } + + public static void LeaR(EmitterContext context) + { + InstLeaR op = context.GetOp<InstLeaR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5); + } + + public static void LeaI(EmitterContext context) + { + InstLeaI op = context.GetOp<InstLeaI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5); + } + + public static void LeaC(EmitterContext context) + { + InstLeaC op = context.GetOp<InstLeaC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitLea(context, srcA, srcB, op.Dest, op.NegA, op.ImmU5); + } + + public static void LeaHiR(EmitterContext context) + { + InstLeaHiR op = context.GetOp<InstLeaHiR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5); + } + + public static void LeaHiC(EmitterContext context) + { + InstLeaHiC op = context.GetOp<InstLeaHiC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLeaHi(context, srcA, srcB, srcC, op.Dest, op.NegA, op.ImmU5); + } + + public static void XmadR(EmitterContext context) + { + InstXmadR op = context.GetOp<InstXmadR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC); + } + + public static void XmadI(EmitterContext context) + { + InstXmadI op = context.GetOp<InstXmadI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm16); + var srcC = GetSrcReg(context, op.SrcC); + + EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, false, op.Psl, op.Mrg, op.X, op.WriteCC); + } + + public static void XmadC(EmitterContext context) + { + InstXmadC op = context.GetOp<InstXmadC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, op.Psl, op.Mrg, op.X, op.WriteCC); + } + + public static void XmadRc(EmitterContext context) + { + InstXmadRc op = context.GetOp<InstXmadRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitXmad(context, op.XmadCop, srcA, srcB, srcC, op.Dest, op.ASigned, op.BSigned, op.HiloA, op.HiloB, false, false, op.X, op.WriteCC); + } + + private static void EmitIadd( + EmitterContext context, + Operand srcA, + Operand srcB, + int rd, + AvgMode avgMode, + bool extended, + bool writeCC) + { + srcA = context.INegate(srcA, avgMode == AvgMode.NegA); + srcB = context.INegate(srcB, avgMode == AvgMode.NegB); + + Operand res = context.IAdd(srcA, srcB); + + if (extended) + { + res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1))); + } + + SetIaddFlags(context, res, srcA, srcB, writeCC, extended); + + // TODO: SAT. + + context.Copy(GetDest(rd), res); + } + + private static void EmitIadd3( + EmitterContext context, + Lrs mode, + Operand srcA, + Operand srcB, + Operand srcC, + HalfSelect partA, + HalfSelect partB, + HalfSelect partC, + int rd, + bool negateA, + bool negateB, + bool negateC) + { + Operand Extend(Operand src, HalfSelect part) + { + if (part == HalfSelect.B32) + { + return src; + } + + if (part == HalfSelect.H0) + { + return context.BitwiseAnd(src, Const(0xffff)); + } + else if (part == HalfSelect.H1) + { + return context.ShiftRightU32(src, Const(16)); + } + else + { + context.Config.GpuAccessor.Log($"Iadd3 has invalid component selection {part}."); + } + + return src; + } + + srcA = context.INegate(Extend(srcA, partA), negateA); + srcB = context.INegate(Extend(srcB, partB), negateB); + srcC = context.INegate(Extend(srcC, partC), negateC); + + Operand res = context.IAdd(srcA, srcB); + + if (mode != Lrs.None) + { + if (mode == Lrs.LeftShift) + { + res = context.ShiftLeft(res, Const(16)); + } + else if (mode == Lrs.RightShift) + { + res = context.ShiftRightU32(res, Const(16)); + } + else + { + // TODO: Warning. + } + } + + res = context.IAdd(res, srcC); + + context.Copy(GetDest(rd), res); + + // TODO: CC, X, corner cases. + } + + private static void EmitImad( + EmitterContext context, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + AvgMode avgMode, + bool signedA, + bool signedB, + bool high) + { + srcB = context.INegate(srcB, avgMode == AvgMode.NegA); + srcC = context.INegate(srcC, avgMode == AvgMode.NegB); + + Operand res; + + if (high) + { + if (signedA && signedB) + { + res = context.MultiplyHighS32(srcA, srcB); + } + else + { + res = context.MultiplyHighU32(srcA, srcB); + + if (signedA) + { + res = context.IAdd(res, context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31)))); + } + else if (signedB) + { + res = context.IAdd(res, context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31)))); + } + } + } + else + { + res = context.IMultiply(srcA, srcB); + } + + if (srcC.Type != OperandType.Constant || srcC.Value != 0) + { + res = context.IAdd(res, srcC); + } + + // TODO: CC, X, SAT, and more? + + context.Copy(GetDest(rd), res); + } + + private static void EmitIscadd( + EmitterContext context, + Operand srcA, + Operand srcB, + int rd, + int shift, + AvgMode avgMode, + bool writeCC) + { + srcA = context.ShiftLeft(srcA, Const(shift)); + + srcA = context.INegate(srcA, avgMode == AvgMode.NegA); + srcB = context.INegate(srcB, avgMode == AvgMode.NegB); + + Operand res = context.IAdd(srcA, srcB); + + SetIaddFlags(context, res, srcA, srcB, writeCC, false); + + context.Copy(GetDest(rd), res); + } + + public static void EmitLea(EmitterContext context, Operand srcA, Operand srcB, int rd, bool negateA, int shift) + { + srcA = context.ShiftLeft(srcA, Const(shift)); + srcA = context.INegate(srcA, negateA); + + Operand res = context.IAdd(srcA, srcB); + + context.Copy(GetDest(rd), res); + + // TODO: CC, X. + } + + private static void EmitLeaHi( + EmitterContext context, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool negateA, + int shift) + { + Operand aLow = context.ShiftLeft(srcA, Const(shift)); + Operand aHigh = shift == 0 ? Const(0) : context.ShiftRightU32(srcA, Const(32 - shift)); + aHigh = context.BitwiseOr(aHigh, context.ShiftLeft(srcC, Const(shift))); + + if (negateA) + { + // Perform 64-bit negation by doing bitwise not of the value, + // then adding 1 and carrying over from low to high. + aLow = context.BitwiseNot(aLow); + aHigh = context.BitwiseNot(aHigh); + + aLow = AddWithCarry(context, aLow, Const(1), out Operand aLowCOut); + aHigh = context.IAdd(aHigh, aLowCOut); + } + + Operand res = context.IAdd(aHigh, srcB); + + context.Copy(GetDest(rd), res); + + // TODO: CC, X. + } + + public static void EmitXmad( + EmitterContext context, + XmadCop2 mode, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool signedA, + bool signedB, + bool highA, + bool highB, + bool productShiftLeft, + bool merge, + bool extended, + bool writeCC) + { + XmadCop modeConv; + switch (mode) + { + case XmadCop2.Cfull: + modeConv = XmadCop.Cfull; + break; + case XmadCop2.Clo: + modeConv = XmadCop.Clo; + break; + case XmadCop2.Chi: + modeConv = XmadCop.Chi; + break; + case XmadCop2.Csfu: + modeConv = XmadCop.Csfu; + break; + default: + context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\"."); + return; + } + + EmitXmad(context, modeConv, srcA, srcB, srcC, rd, signedA, signedB, highA, highB, productShiftLeft, merge, extended, writeCC); + } + + public static void EmitXmad( + EmitterContext context, + XmadCop mode, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool signedA, + bool signedB, + bool highA, + bool highB, + bool productShiftLeft, + bool merge, + bool extended, + bool writeCC) + { + var srcBUnmodified = srcB; + + Operand Extend16To32(Operand src, bool high, bool signed) + { + if (signed && high) + { + return context.ShiftRightS32(src, Const(16)); + } + else if (signed) + { + return context.BitfieldExtractS32(src, Const(0), Const(16)); + } + else if (high) + { + return context.ShiftRightU32(src, Const(16)); + } + else + { + return context.BitwiseAnd(src, Const(0xffff)); + } + } + + srcA = Extend16To32(srcA, highA, signedA); + srcB = Extend16To32(srcB, highB, signedB); + + Operand res = context.IMultiply(srcA, srcB); + + if (productShiftLeft) + { + res = context.ShiftLeft(res, Const(16)); + } + + switch (mode) + { + case XmadCop.Cfull: + break; + + case XmadCop.Clo: + srcC = Extend16To32(srcC, high: false, signed: false); + break; + case XmadCop.Chi: + srcC = Extend16To32(srcC, high: true, signed: false); + break; + + case XmadCop.Cbcc: + srcC = context.IAdd(srcC, context.ShiftLeft(srcBUnmodified, Const(16))); + break; + + case XmadCop.Csfu: + Operand signAdjustA = context.ShiftLeft(context.ShiftRightU32(srcA, Const(31)), Const(16)); + Operand signAdjustB = context.ShiftLeft(context.ShiftRightU32(srcB, Const(31)), Const(16)); + + srcC = context.ISubtract(srcC, context.IAdd(signAdjustA, signAdjustB)); + break; + + default: + context.Config.GpuAccessor.Log($"Invalid XMAD mode \"{mode}\"."); + return; + } + + Operand product = res; + + if (extended) + { + // Add with carry. + res = context.IAdd(res, context.BitwiseAnd(GetCF(), Const(1))); + } + else + { + // Add (no carry in). + res = context.IAdd(res, srcC); + } + + SetIaddFlags(context, res, product, srcC, writeCC, extended); + + if (merge) + { + res = context.BitwiseAnd(res, Const(0xffff)); + res = context.BitwiseOr(res, context.ShiftLeft(srcBUnmodified, Const(16))); + } + + context.Copy(GetDest(rd), res); + } + + private static void SetIaddFlags(EmitterContext context, Operand res, Operand srcA, Operand srcB, bool setCC, bool extended) + { + if (!setCC) + { + return; + } + + if (extended) + { + // C = (d == a && CIn) || d < a + Operand tempC0 = context.ICompareEqual(res, srcA); + Operand tempC1 = context.ICompareLessUnsigned(res, srcA); + + tempC0 = context.BitwiseAnd(tempC0, GetCF()); + + context.Copy(GetCF(), context.BitwiseOr(tempC0, tempC1)); + } + else + { + // C = d < a + context.Copy(GetCF(), context.ICompareLessUnsigned(res, srcA)); + } + + // V = (d ^ a) & ~(a ^ b) < 0 + Operand tempV0 = context.BitwiseExclusiveOr(res, srcA); + Operand tempV1 = context.BitwiseExclusiveOr(srcA, srcB); + + tempV1 = context.BitwiseNot(tempV1); + + Operand tempV = context.BitwiseAnd(tempV0, tempV1); + + context.Copy(GetVF(), context.ICompareLess(tempV, Const(0))); + + SetZnFlags(context, res, setCC: true, extended: extended); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs new file mode 100644 index 00000000..dcdb189f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerComparison.cs @@ -0,0 +1,310 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void IcmpR(EmitterContext context) + { + InstIcmpR op = context.GetOp<InstIcmpR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed); + } + + public static void IcmpI(EmitterContext context) + { + InstIcmpI op = context.GetOp<InstIcmpI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed); + } + + public static void IcmpC(EmitterContext context) + { + InstIcmpC op = context.GetOp<InstIcmpC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed); + } + + public static void IcmpRc(EmitterContext context) + { + InstIcmpRc op = context.GetOp<InstIcmpRc>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcC); + var srcC = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIcmp(context, op.IComp, srcA, srcB, srcC, op.Dest, op.Signed); + } + + public static void IsetR(EmitterContext context) + { + InstIsetR op = context.GetOp<InstIsetR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC); + } + + public static void IsetI(EmitterContext context) + { + InstIsetI op = context.GetOp<InstIsetI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC); + } + + public static void IsetC(EmitterContext context) + { + InstIsetC op = context.GetOp<InstIsetC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIset(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.Dest, op.BVal, op.Signed, op.X, op.WriteCC); + } + + public static void IsetpR(EmitterContext context) + { + InstIsetpR op = context.GetOp<InstIsetpR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X); + } + + public static void IsetpI(EmitterContext context) + { + InstIsetpI op = context.GetOp<InstIsetpI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X); + } + + public static void IsetpC(EmitterContext context) + { + InstIsetpC op = context.GetOp<InstIsetpC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitIsetp(context, op.IComp, op.Bop, srcA, srcB, op.SrcPred, op.SrcPredInv, op.DestPred, op.DestPredInv, op.Signed, op.X); + } + + private static void EmitIcmp( + EmitterContext context, + IComp cmpOp, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool isSigned) + { + Operand cmpRes = GetIntComparison(context, cmpOp, srcC, Const(0), isSigned); + + Operand res = context.ConditionalSelect(cmpRes, srcA, srcB); + + context.Copy(GetDest(rd), res); + } + + private static void EmitIset( + EmitterContext context, + IComp cmpOp, + BoolOp logicOp, + Operand srcA, + Operand srcB, + int srcPred, + bool srcPredInv, + int rd, + bool boolFloat, + bool isSigned, + bool extended, + bool writeCC) + { + Operand res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended); + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + res = GetPredLogicalOp(context, logicOp, res, pred); + + Operand dest = GetDest(rd); + + if (boolFloat) + { + res = context.ConditionalSelect(res, ConstF(1), Const(0)); + + context.Copy(dest, res); + + SetFPZnFlags(context, res, writeCC); + } + else + { + context.Copy(dest, res); + + SetZnFlags(context, res, writeCC, extended); + } + } + + private static void EmitIsetp( + EmitterContext context, + IComp cmpOp, + BoolOp logicOp, + Operand srcA, + Operand srcB, + int srcPred, + bool srcPredInv, + int destPred, + int destPredInv, + bool isSigned, + bool extended) + { + Operand p0Res = GetIntComparison(context, cmpOp, srcA, srcB, isSigned, extended); + Operand p1Res = context.BitwiseNot(p0Res); + Operand pred = GetPredicate(context, srcPred, srcPredInv); + + p0Res = GetPredLogicalOp(context, logicOp, p0Res, pred); + p1Res = GetPredLogicalOp(context, logicOp, p1Res, pred); + + context.Copy(Register(destPred, RegisterType.Predicate), p0Res); + context.Copy(Register(destPredInv, RegisterType.Predicate), p1Res); + } + + private static Operand GetIntComparison( + EmitterContext context, + IComp cond, + Operand srcA, + Operand srcB, + bool isSigned, + bool extended) + { + return extended + ? GetIntComparisonExtended(context, cond, srcA, srcB, isSigned) + : GetIntComparison(context, cond, srcA, srcB, isSigned); + } + + private static Operand GetIntComparisonExtended(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned) + { + Operand res; + + if (cond == IComp.T) + { + res = Const(IrConsts.True); + } + else if (cond == IComp.F) + { + res = Const(IrConsts.False); + } + else + { + res = context.ISubtract(srcA, srcB); + res = context.IAdd(res, context.BitwiseNot(GetCF())); + + switch (cond) + { + case IComp.Eq: // r = xh == yh && xl == yl + res = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetZF()); + break; + case IComp.Lt: // r = xh < yh || (xh == yh && xl < yl) + Operand notC = context.BitwiseNot(GetCF()); + Operand prevLt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notC); + res = isSigned + ? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLt) + : context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLt); + break; + case IComp.Le: // r = xh < yh || (xh == yh && xl <= yl) + Operand zOrNotC = context.BitwiseOr(GetZF(), context.BitwiseNot(GetCF())); + Operand prevLe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), zOrNotC); + res = isSigned + ? context.BitwiseOr(context.ICompareLess(srcA, srcB), prevLe) + : context.BitwiseOr(context.ICompareLessUnsigned(srcA, srcB), prevLe); + break; + case IComp.Gt: // r = xh > yh || (xh == yh && xl > yl) + Operand notZAndC = context.BitwiseAnd(context.BitwiseNot(GetZF()), GetCF()); + Operand prevGt = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), notZAndC); + res = isSigned + ? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGt) + : context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGt); + break; + case IComp.Ge: // r = xh > yh || (xh == yh && xl >= yl) + Operand prevGe = context.BitwiseAnd(context.ICompareEqual(srcA, srcB), GetCF()); + res = isSigned + ? context.BitwiseOr(context.ICompareGreater(srcA, srcB), prevGe) + : context.BitwiseOr(context.ICompareGreaterUnsigned(srcA, srcB), prevGe); + break; + case IComp.Ne: // r = xh != yh || xl != yl + res = context.BitwiseOr(context.ICompareNotEqual(srcA, srcB), context.BitwiseNot(GetZF())); + break; + default: + throw new ArgumentException($"Unexpected condition \"{cond}\"."); + } + } + + return res; + } + + private static Operand GetIntComparison(EmitterContext context, IComp cond, Operand srcA, Operand srcB, bool isSigned) + { + Operand res; + + if (cond == IComp.T) + { + res = Const(IrConsts.True); + } + else if (cond == IComp.F) + { + res = Const(IrConsts.False); + } + else + { + var inst = cond switch + { + IComp.Lt => Instruction.CompareLessU32, + IComp.Eq => Instruction.CompareEqual, + IComp.Le => Instruction.CompareLessOrEqualU32, + IComp.Gt => Instruction.CompareGreaterU32, + IComp.Ne => Instruction.CompareNotEqual, + IComp.Ge => Instruction.CompareGreaterOrEqualU32, + _ => throw new InvalidOperationException($"Unexpected condition \"{cond}\".") + }; + + if (isSigned) + { + switch (cond) + { + case IComp.Lt: inst = Instruction.CompareLess; break; + case IComp.Le: inst = Instruction.CompareLessOrEqual; break; + case IComp.Gt: inst = Instruction.CompareGreater; break; + case IComp.Ge: inst = Instruction.CompareGreaterOrEqual; break; + } + } + + res = context.Add(inst, Local(), srcA, srcB); + } + + return res; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs new file mode 100644 index 00000000..1f3f66ae --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerLogical.cs @@ -0,0 +1,167 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + private const int PT = RegisterConsts.PredicateTrueIndex; + + public static void LopR(EmitterContext context) + { + InstLopR op = context.GetOp<InstLopR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitLop(context, op.Lop, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC); + } + + public static void LopI(EmitterContext context) + { + InstLopI op = context.GetOp<InstLopI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC); + } + + public static void LopC(EmitterContext context) + { + InstLopC op = context.GetOp<InstLopC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitLop(context, op.LogicOp, op.PredicateOp, srcA, srcB, op.Dest, op.DestPred, op.NegA, op.NegB, op.X, op.WriteCC); + } + + public static void Lop32i(EmitterContext context) + { + InstLop32i op = context.GetOp<InstLop32i>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, op.Imm32); + + EmitLop(context, op.LogicOp, PredicateOp.F, srcA, srcB, op.Dest, PT, op.NegA, op.NegB, op.X, op.WriteCC); + } + + public static void Lop3R(EmitterContext context) + { + InstLop3R op = context.GetOp<InstLop3R>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLop3(context, op.Imm, op.PredicateOp, srcA, srcB, srcC, op.Dest, op.DestPred, op.X, op.WriteCC); + } + + public static void Lop3I(EmitterContext context) + { + InstLop3I op = context.GetOp<InstLop3I>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC); + } + + public static void Lop3C(EmitterContext context) + { + InstLop3C op = context.GetOp<InstLop3C>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcC = GetSrcReg(context, op.SrcC); + + EmitLop3(context, op.Imm, PredicateOp.F, srcA, srcB, srcC, op.Dest, PT, false, op.WriteCC); + } + + private static void EmitLop( + EmitterContext context, + LogicOp logicOp, + PredicateOp predOp, + Operand srcA, + Operand srcB, + int rd, + int destPred, + bool invertA, + bool invertB, + bool extended, + bool writeCC) + { + srcA = context.BitwiseNot(srcA, invertA); + srcB = context.BitwiseNot(srcB, invertB); + + Operand res = logicOp switch + { + LogicOp.And => res = context.BitwiseAnd(srcA, srcB), + LogicOp.Or => res = context.BitwiseOr(srcA, srcB), + LogicOp.Xor => res = context.BitwiseExclusiveOr(srcA, srcB), + _ => srcB + }; + + EmitLopPredWrite(context, res, predOp, destPred); + + context.Copy(GetDest(rd), res); + + SetZnFlags(context, res, writeCC, extended); + } + + private static void EmitLop3( + EmitterContext context, + int truthTable, + PredicateOp predOp, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + int destPred, + bool extended, + bool writeCC) + { + Operand res = Lop3Expression.GetFromTruthTable(context, srcA, srcB, srcC, truthTable); + + EmitLopPredWrite(context, res, predOp, destPred); + + context.Copy(GetDest(rd), res); + + SetZnFlags(context, res, writeCC, extended); + } + + private static void EmitLopPredWrite(EmitterContext context, Operand result, PredicateOp predOp, int pred) + { + if (pred != RegisterConsts.PredicateTrueIndex) + { + Operand pRes; + + if (predOp == PredicateOp.F) + { + pRes = Const(IrConsts.False); + } + else if (predOp == PredicateOp.T) + { + pRes = Const(IrConsts.True); + } + else if (predOp == PredicateOp.Z) + { + pRes = context.ICompareEqual(result, Const(0)); + } + else /* if (predOp == Pop.Nz) */ + { + pRes = context.ICompareNotEqual(result, Const(0)); + } + + context.Copy(Register(pred, RegisterType.Predicate), pRes); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs new file mode 100644 index 00000000..73930ed1 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitIntegerMinMax.cs @@ -0,0 +1,71 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void ImnmxR(EmitterContext context) + { + InstImnmxR op = context.GetOp<InstImnmxR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC); + } + + public static void ImnmxI(EmitterContext context) + { + InstImnmxI op = context.GetOp<InstImnmxI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC); + } + + public static void ImnmxC(EmitterContext context) + { + InstImnmxC op = context.GetOp<InstImnmxC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + var srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitImnmx(context, srcA, srcB, srcPred, op.Dest, op.Signed, op.WriteCC); + } + + private static void EmitImnmx( + EmitterContext context, + Operand srcA, + Operand srcB, + Operand srcPred, + int rd, + bool isSignedInt, + bool writeCC) + { + Operand resMin = isSignedInt + ? context.IMinimumS32(srcA, srcB) + : context.IMinimumU32(srcA, srcB); + + Operand resMax = isSignedInt + ? context.IMaximumS32(srcA, srcB) + : context.IMaximumU32(srcA, srcB); + + Operand res = context.ConditionalSelect(srcPred, resMin, resMax); + + context.Copy(GetDest(rd), res); + + SetZnFlags(context, res, writeCC); + + // TODO: X flags. + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs new file mode 100644 index 00000000..c73c6b2a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMemory.cs @@ -0,0 +1,541 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + private enum MemoryRegion + { + Local, + Shared + } + + public static void Atom(EmitterContext context) + { + InstAtom op = context.GetOp<InstAtom>(); + + int sOffset = (op.Imm20 << 12) >> 12; + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, sOffset); + + Operand value = GetSrcReg(context, op.SrcB); + + Operand res = EmitAtomicOp(context, StorageKind.GlobalMemory, op.Op, op.Size, addrLow, addrHigh, value); + + context.Copy(GetDest(op.Dest), res); + } + + public static void Atoms(EmitterContext context) + { + InstAtoms op = context.GetOp<InstAtoms>(); + + Operand offset = context.ShiftRightU32(GetSrcReg(context, op.SrcA), Const(2)); + + int sOffset = (op.Imm22 << 10) >> 10; + + offset = context.IAdd(offset, Const(sOffset)); + + Operand value = GetSrcReg(context, op.SrcB); + + AtomSize size = op.AtomsSize switch + { + AtomsSize.S32 => AtomSize.S32, + AtomsSize.U64 => AtomSize.U64, + AtomsSize.S64 => AtomSize.S64, + _ => AtomSize.U32 + }; + + Operand res = EmitAtomicOp(context, StorageKind.SharedMemory, op.AtomOp, size, offset, Const(0), value); + + context.Copy(GetDest(op.Dest), res); + } + + public static void Ldc(EmitterContext context) + { + InstLdc op = context.GetOp<InstLdc>(); + + if (op.LsSize > LsSize2.B64) + { + context.Config.GpuAccessor.Log($"Invalid LDC size: {op.LsSize}."); + return; + } + + bool isSmallInt = op.LsSize < LsSize2.B32; + + int count = op.LsSize == LsSize2.B64 ? 2 : 1; + + Operand slot = Const(op.CbufSlot); + Operand srcA = GetSrcReg(context, op.SrcA); + + if (op.AddressMode == AddressMode.Is || op.AddressMode == AddressMode.Isl) + { + slot = context.IAdd(slot, context.BitfieldExtractU32(srcA, Const(16), Const(16))); + srcA = context.BitwiseAnd(srcA, Const(0xffff)); + } + + Operand addr = context.IAdd(srcA, Const(Imm16ToSInt(op.CbufOffset))); + Operand wordOffset = context.ShiftRightU32(addr, Const(2)); + Operand bitOffset = GetBitOffset(context, addr); + + for (int index = 0; index < count; index++) + { + Register dest = new Register(op.Dest + index, RegisterType.Gpr); + + if (dest.IsRZ) + { + break; + } + + Operand offset = context.IAdd(wordOffset, Const(index)); + Operand value = context.LoadConstant(slot, offset); + + if (isSmallInt) + { + value = ExtractSmallInt(context, (LsSize)op.LsSize, bitOffset, value); + } + + context.Copy(Register(dest), value); + } + } + + public static void Ldg(EmitterContext context) + { + InstLdg op = context.GetOp<InstLdg>(); + + EmitLdg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E); + } + + public static void Ldl(EmitterContext context) + { + InstLdl op = context.GetOp<InstLdl>(); + + EmitLoad(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + } + + public static void Lds(EmitterContext context) + { + InstLds op = context.GetOp<InstLds>(); + + EmitLoad(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + } + + public static void Red(EmitterContext context) + { + InstRed op = context.GetOp<InstRed>(); + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(op.SrcA, RegisterType.Gpr), op.E, op.Imm20); + + EmitAtomicOp(context, StorageKind.GlobalMemory, (AtomOp)op.RedOp, op.RedSize, addrLow, addrHigh, GetDest(op.SrcB)); + } + + public static void Stg(EmitterContext context) + { + InstStg op = context.GetOp<InstStg>(); + + EmitStg(context, op.LsSize, op.SrcA, op.Dest, Imm24ToSInt(op.Imm24), op.E); + } + + public static void Stl(EmitterContext context) + { + InstStl op = context.GetOp<InstStl>(); + + EmitStore(context, MemoryRegion.Local, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + } + + public static void Sts(EmitterContext context) + { + InstSts op = context.GetOp<InstSts>(); + + EmitStore(context, MemoryRegion.Shared, op.LsSize, GetSrcReg(context, op.SrcA), op.Dest, Imm24ToSInt(op.Imm24)); + } + + private static Operand EmitAtomicOp( + EmitterContext context, + StorageKind storageKind, + AtomOp op, + AtomSize type, + Operand addrLow, + Operand addrHigh, + Operand value) + { + Operand res = Const(0); + + switch (op) + { + case AtomOp.Add: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicAdd(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.And: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicAnd(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Xor: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicXor(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Or: + if (type == AtomSize.S32 || type == AtomSize.U32) + { + res = context.AtomicOr(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Max: + if (type == AtomSize.S32) + { + res = context.AtomicMaxS32(storageKind, addrLow, addrHigh, value); + } + else if (type == AtomSize.U32) + { + res = context.AtomicMaxU32(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + case AtomOp.Min: + if (type == AtomSize.S32) + { + res = context.AtomicMinS32(storageKind, addrLow, addrHigh, value); + } + else if (type == AtomSize.U32) + { + res = context.AtomicMinU32(storageKind, addrLow, addrHigh, value); + } + else + { + context.Config.GpuAccessor.Log($"Invalid reduction type: {type}."); + } + break; + } + + return res; + } + + private static void EmitLoad( + EmitterContext context, + MemoryRegion region, + LsSize2 size, + Operand srcA, + int rd, + int offset) + { + if (size > LsSize2.B128) + { + context.Config.GpuAccessor.Log($"Invalid load size: {size}."); + return; + } + + bool isSmallInt = size < LsSize2.B32; + + int count = 1; + + switch (size) + { + case LsSize2.B64: count = 2; break; + case LsSize2.B128: count = 4; break; + } + + Operand baseOffset = context.IAdd(srcA, Const(offset)); + Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); // Word offset = byte offset / 4 (one word = 4 bytes). + Operand bitOffset = GetBitOffset(context, baseOffset); + + for (int index = 0; index < count; index++) + { + Register dest = new Register(rd + index, RegisterType.Gpr); + + if (dest.IsRZ) + { + break; + } + + Operand elemOffset = context.IAdd(wordOffset, Const(index)); + Operand value = null; + + switch (region) + { + case MemoryRegion.Local: value = context.LoadLocal(elemOffset); break; + case MemoryRegion.Shared: value = context.LoadShared(elemOffset); break; + } + + if (isSmallInt) + { + value = ExtractSmallInt(context, (LsSize)size, bitOffset, value); + } + + context.Copy(Register(dest), value); + } + } + + private static void EmitLdg( + EmitterContext context, + LsSize size, + int ra, + int rd, + int offset, + bool extended) + { + bool isSmallInt = size < LsSize.B32; + + int count = GetVectorCount(size); + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset); + + Operand bitOffset = GetBitOffset(context, addrLow); + + for (int index = 0; index < count; index++) + { + Register dest = new Register(rd + index, RegisterType.Gpr); + + if (dest.IsRZ) + { + break; + } + + Operand value = context.LoadGlobal(context.IAdd(addrLow, Const(index * 4)), addrHigh); + + if (isSmallInt) + { + value = ExtractSmallInt(context, size, bitOffset, value); + } + + context.Copy(Register(dest), value); + } + } + + private static void EmitStore( + EmitterContext context, + MemoryRegion region, + LsSize2 size, + Operand srcA, + int rd, + int offset) + { + if (size > LsSize2.B128) + { + context.Config.GpuAccessor.Log($"Invalid store size: {size}."); + return; + } + + bool isSmallInt = size < LsSize2.B32; + + int count = 1; + + switch (size) + { + case LsSize2.B64: count = 2; break; + case LsSize2.B128: count = 4; break; + } + + Operand baseOffset = context.IAdd(srcA, Const(offset)); + Operand wordOffset = context.ShiftRightU32(baseOffset, Const(2)); + Operand bitOffset = GetBitOffset(context, baseOffset); + + for (int index = 0; index < count; index++) + { + bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex; + + Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr); + Operand elemOffset = context.IAdd(wordOffset, Const(index)); + + if (isSmallInt && region == MemoryRegion.Local) + { + Operand word = context.LoadLocal(elemOffset); + + value = InsertSmallInt(context, (LsSize)size, bitOffset, word, value); + } + + if (region == MemoryRegion.Local) + { + context.StoreLocal(elemOffset, value); + } + else if (region == MemoryRegion.Shared) + { + switch (size) + { + case LsSize2.U8: + case LsSize2.S8: + context.StoreShared8(baseOffset, value); + break; + case LsSize2.U16: + case LsSize2.S16: + context.StoreShared16(baseOffset, value); + break; + default: + context.StoreShared(elemOffset, value); + break; + } + } + } + } + + private static void EmitStg( + EmitterContext context, + LsSize2 size, + int ra, + int rd, + int offset, + bool extended) + { + if (size > LsSize2.B128) + { + context.Config.GpuAccessor.Log($"Invalid store size: {size}."); + return; + } + + int count = GetVectorCount((LsSize)size); + + (Operand addrLow, Operand addrHigh) = Get40BitsAddress(context, new Register(ra, RegisterType.Gpr), extended, offset); + + Operand bitOffset = GetBitOffset(context, addrLow); + + for (int index = 0; index < count; index++) + { + bool isRz = rd + index >= RegisterConsts.RegisterZeroIndex; + + Operand value = Register(isRz ? rd : rd + index, RegisterType.Gpr); + + Operand addrLowOffset = context.IAdd(addrLow, Const(index * 4)); + + if (size == LsSize2.U8 || size == LsSize2.S8) + { + context.StoreGlobal8(addrLowOffset, addrHigh, value); + } + else if (size == LsSize2.U16 || size == LsSize2.S16) + { + context.StoreGlobal16(addrLowOffset, addrHigh, value); + } + else + { + context.StoreGlobal(addrLowOffset, addrHigh, value); + } + } + } + + private static int GetVectorCount(LsSize size) + { + switch (size) + { + case LsSize.B64: + return 2; + case LsSize.B128: + case LsSize.UB128: + return 4; + } + + return 1; + } + + private static (Operand, Operand) Get40BitsAddress( + EmitterContext context, + Register ra, + bool extended, + int offset) + { + Operand addrLow = Register(ra); + Operand addrHigh; + + if (extended && !ra.IsRZ) + { + addrHigh = Register(ra.Index + 1, RegisterType.Gpr); + } + else + { + addrHigh = Const(0); + } + + Operand offs = Const(offset); + + addrLow = context.IAdd(addrLow, offs); + + if (extended) + { + Operand carry = context.ICompareLessUnsigned(addrLow, offs); + + addrHigh = context.IAdd(addrHigh, context.ConditionalSelect(carry, Const(1), Const(0))); + } + + return (addrLow, addrHigh); + } + + private static Operand GetBitOffset(EmitterContext context, Operand baseOffset) + { + // Note: bit offset = (baseOffset & 0b11) * 8. + // Addresses should be always aligned to the integer type, + // so we don't need to take unaligned addresses into account. + return context.ShiftLeft(context.BitwiseAnd(baseOffset, Const(3)), Const(3)); + } + + private static Operand ExtractSmallInt( + EmitterContext context, + LsSize size, + Operand bitOffset, + Operand value) + { + value = context.ShiftRightU32(value, bitOffset); + + switch (size) + { + case LsSize.U8: value = ZeroExtendTo32(context, value, 8); break; + case LsSize.U16: value = ZeroExtendTo32(context, value, 16); break; + case LsSize.S8: value = SignExtendTo32(context, value, 8); break; + case LsSize.S16: value = SignExtendTo32(context, value, 16); break; + } + + return value; + } + + private static Operand InsertSmallInt( + EmitterContext context, + LsSize size, + Operand bitOffset, + Operand word, + Operand value) + { + switch (size) + { + case LsSize.U8: + case LsSize.S8: + value = context.BitwiseAnd(value, Const(0xff)); + value = context.BitfieldInsert(word, value, bitOffset, Const(8)); + break; + + case LsSize.U16: + case LsSize.S16: + value = context.BitwiseAnd(value, Const(0xffff)); + value = context.BitfieldInsert(word, value, bitOffset, Const(16)); + break; + } + + return value; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs new file mode 100644 index 00000000..9992ac37 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs @@ -0,0 +1,237 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void MovR(EmitterContext context) + { + InstMovR op = context.GetOp<InstMovR>(); + + context.Copy(GetDest(op.Dest), GetSrcReg(context, op.SrcA)); + } + + public static void MovI(EmitterContext context) + { + InstMovI op = context.GetOp<InstMovI>(); + + context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm20)); + } + + public static void MovC(EmitterContext context) + { + InstMovC op = context.GetOp<InstMovC>(); + + context.Copy(GetDest(op.Dest), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset)); + } + + public static void Mov32i(EmitterContext context) + { + InstMov32i op = context.GetOp<InstMov32i>(); + + context.Copy(GetDest(op.Dest), GetSrcImm(context, op.Imm32)); + } + + public static void R2pR(EmitterContext context) + { + InstR2pR op = context.GetOp<InstR2pR>(); + + Operand value = GetSrcReg(context, op.SrcA); + Operand mask = GetSrcReg(context, op.SrcB); + + EmitR2p(context, value, mask, op.ByteSel, op.Ccpr); + } + + public static void R2pI(EmitterContext context) + { + InstR2pI op = context.GetOp<InstR2pI>(); + + Operand value = GetSrcReg(context, op.SrcA); + Operand mask = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitR2p(context, value, mask, op.ByteSel, op.Ccpr); + } + + public static void R2pC(EmitterContext context) + { + InstR2pC op = context.GetOp<InstR2pC>(); + + Operand value = GetSrcReg(context, op.SrcA); + Operand mask = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitR2p(context, value, mask, op.ByteSel, op.Ccpr); + } + + public static void S2r(EmitterContext context) + { + InstS2r op = context.GetOp<InstS2r>(); + + Operand src; + + switch (op.SReg) + { + case SReg.LaneId: + src = context.Load(StorageKind.Input, IoVariable.SubgroupLaneId); + break; + + case SReg.InvocationId: + src = context.Load(StorageKind.Input, IoVariable.InvocationId); + break; + + case SReg.YDirection: + src = ConstF(1); // TODO: Use value from Y direction GPU register. + break; + + case SReg.ThreadKill: + src = context.Config.Stage == ShaderStage.Fragment ? context.Load(StorageKind.Input, IoVariable.ThreadKill) : Const(0); + break; + + case SReg.InvocationInfo: + if (context.Config.Stage != ShaderStage.Compute && context.Config.Stage != ShaderStage.Fragment) + { + // Note: Lowest 8-bits seems to contain some primitive index, + // but it seems to be NVIDIA implementation specific as it's only used + // to calculate ISBE offsets, so we can just keep it as zero. + + if (context.Config.Stage == ShaderStage.TessellationControl || + context.Config.Stage == ShaderStage.TessellationEvaluation) + { + src = context.ShiftLeft(context.Load(StorageKind.Input, IoVariable.PatchVertices), Const(16)); + } + else + { + src = Const(context.Config.GpuAccessor.QueryPrimitiveTopology().ToInputVertices() << 16); + } + } + else + { + src = Const(0); + } + break; + + case SReg.TId: + Operand tidX = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0)); + Operand tidY = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1)); + Operand tidZ = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2)); + + tidY = context.ShiftLeft(tidY, Const(16)); + tidZ = context.ShiftLeft(tidZ, Const(26)); + + src = context.BitwiseOr(tidX, context.BitwiseOr(tidY, tidZ)); + break; + + case SReg.TIdX: + src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(0)); + break; + case SReg.TIdY: + src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(1)); + break; + case SReg.TIdZ: + src = context.Load(StorageKind.Input, IoVariable.ThreadId, null, Const(2)); + break; + + case SReg.CtaIdX: + src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(0)); + break; + case SReg.CtaIdY: + src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(1)); + break; + case SReg.CtaIdZ: + src = context.Load(StorageKind.Input, IoVariable.CtaId, null, Const(2)); + break; + + case SReg.EqMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupEqMask, null, Const(0)); + break; + case SReg.LtMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupLtMask, null, Const(0)); + break; + case SReg.LeMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupLeMask, null, Const(0)); + break; + case SReg.GtMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupGtMask, null, Const(0)); + break; + case SReg.GeMask: + src = context.Load(StorageKind.Input, IoVariable.SubgroupGeMask, null, Const(0)); + break; + + default: + src = Const(0); + break; + } + + context.Copy(GetDest(op.Dest), src); + } + + public static void SelR(EmitterContext context) + { + InstSelR op = context.GetOp<InstSelR>(); + + Operand srcA = GetSrcReg(context, op.SrcA); + Operand srcB = GetSrcReg(context, op.SrcB); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitSel(context, srcA, srcB, srcPred, op.Dest); + } + + public static void SelI(EmitterContext context) + { + InstSelI op = context.GetOp<InstSelI>(); + + Operand srcA = GetSrcReg(context, op.SrcA); + Operand srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitSel(context, srcA, srcB, srcPred, op.Dest); + } + + public static void SelC(EmitterContext context) + { + InstSelC op = context.GetOp<InstSelC>(); + + Operand srcA = GetSrcReg(context, op.SrcA); + Operand srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + EmitSel(context, srcA, srcB, srcPred, op.Dest); + } + + private static void EmitR2p(EmitterContext context, Operand value, Operand mask, ByteSel byteSel, bool ccpr) + { + Operand Test(Operand value, int bit) + { + return context.ICompareNotEqual(context.BitwiseAnd(value, Const(1 << bit)), Const(0)); + } + + if (ccpr) + { + // TODO: Support Register to condition code flags copy. + context.Config.GpuAccessor.Log("R2P.CC not implemented."); + } + else + { + int shift = (int)byteSel * 8; + + for (int bit = 0; bit < RegisterConsts.PredsCount; bit++) + { + Operand pred = Register(bit, RegisterType.Predicate); + Operand res = context.ConditionalSelect(Test(mask, bit), Test(value, bit + shift), pred); + context.Copy(pred, res); + } + } + } + + private static void EmitSel(EmitterContext context, Operand srcA, Operand srcB, Operand srcPred, int rd) + { + Operand res = context.ConditionalSelect(srcPred, srcA, srcB); + + context.Copy(GetDest(rd), res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs new file mode 100644 index 00000000..1ea7d321 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitMultifunction.cs @@ -0,0 +1,97 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void RroR(EmitterContext context) + { + InstRroR op = context.GetOp<InstRroR>(); + + EmitRro(context, GetSrcReg(context, op.SrcB), op.Dest, op.AbsB, op.NegB); + } + + public static void RroI(EmitterContext context) + { + InstRroI op = context.GetOp<InstRroI>(); + + EmitRro(context, GetSrcImm(context, Imm20ToFloat(op.Imm20)), op.Dest, op.AbsB, op.NegB); + } + + public static void RroC(EmitterContext context) + { + InstRroC op = context.GetOp<InstRroC>(); + + EmitRro(context, GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.AbsB, op.NegB); + } + + public static void Mufu(EmitterContext context) + { + InstMufu op = context.GetOp<InstMufu>(); + + Operand res = context.FPAbsNeg(GetSrcReg(context, op.SrcA), op.AbsA, op.NegA); + + switch (op.MufuOp) + { + case MufuOp.Cos: + res = context.FPCosine(res); + break; + + case MufuOp.Sin: + res = context.FPSine(res); + break; + + case MufuOp.Ex2: + res = context.FPExponentB2(res); + break; + + case MufuOp.Lg2: + res = context.FPLogarithmB2(res); + break; + + case MufuOp.Rcp: + res = context.FPReciprocal(res); + break; + + case MufuOp.Rsq: + res = context.FPReciprocalSquareRoot(res); + break; + + case MufuOp.Rcp64h: + res = context.PackDouble2x32(OperandHelper.Const(0), res); + res = context.UnpackDouble2x32High(context.FPReciprocal(res, Instruction.FP64)); + break; + + case MufuOp.Rsq64h: + res = context.PackDouble2x32(OperandHelper.Const(0), res); + res = context.UnpackDouble2x32High(context.FPReciprocalSquareRoot(res, Instruction.FP64)); + break; + + case MufuOp.Sqrt: + res = context.FPSquareRoot(res); + break; + + default: + context.Config.GpuAccessor.Log($"Invalid MUFU operation \"{op.MufuOp}\"."); + break; + } + + context.Copy(GetDest(op.Dest), context.FPSaturate(res, op.Sat)); + } + + private static void EmitRro(EmitterContext context, Operand srcB, int rd, bool absB, bool negB) + { + // This is the range reduction operator, + // we translate it as a simple move, as it + // should be always followed by a matching + // MUFU instruction. + srcB = context.FPAbsNeg(srcB, absB, negB); + + context.Copy(GetDest(rd), srcB); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs new file mode 100644 index 00000000..01144007 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitNop.cs @@ -0,0 +1,15 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Nop(EmitterContext context) + { + InstNop op = context.GetOp<InstNop>(); + + // No operation. + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs new file mode 100644 index 00000000..d605661f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitPredicate.cs @@ -0,0 +1,54 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitAluHelper; +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Pset(EmitterContext context) + { + InstPset op = context.GetOp<InstPset>(); + + Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv); + Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv); + Operand srcC = context.BitwiseNot(Register(op.SrcPred, RegisterType.Predicate), op.SrcPredInv); + + Operand res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB); + res = GetPredLogicalOp(context, op.BoolOpC, res, srcC); + + Operand dest = GetDest(op.Dest); + + if (op.BVal) + { + context.Copy(dest, context.ConditionalSelect(res, ConstF(1), Const(0))); + } + else + { + context.Copy(dest, res); + } + } + + public static void Psetp(EmitterContext context) + { + InstPsetp op = context.GetOp<InstPsetp>(); + + Operand srcA = context.BitwiseNot(Register(op.Src2Pred, RegisterType.Predicate), op.Src2PredInv); + Operand srcB = context.BitwiseNot(Register(op.Src1Pred, RegisterType.Predicate), op.Src1PredInv); + + Operand p0Res = GetPredLogicalOp(context, op.BoolOpAB, srcA, srcB); + Operand p1Res = context.BitwiseNot(p0Res); + Operand srcPred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + p0Res = GetPredLogicalOp(context, op.BoolOpC, p0Res, srcPred); + p1Res = GetPredLogicalOp(context, op.BoolOpC, p1Res, srcPred); + + context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); + context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs new file mode 100644 index 00000000..2873cad8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitShift.cs @@ -0,0 +1,249 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void ShfLR(EmitterContext context) + { + InstShfLR op = context.GetOp<InstShfLR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC); + } + + public static void ShfRR(EmitterContext context) + { + InstShfRR op = context.GetOp<InstShfRR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + var srcC = GetSrcReg(context, op.SrcC); + + EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC); + } + + public static void ShfLI(EmitterContext context) + { + InstShfLI op = context.GetOp<InstShfLI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = Const(op.Imm6); + var srcC = GetSrcReg(context, op.SrcC); + + EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: true, op.WriteCC); + } + + public static void ShfRI(EmitterContext context) + { + InstShfRI op = context.GetOp<InstShfRI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = Const(op.Imm6); + var srcC = GetSrcReg(context, op.SrcC); + + EmitShf(context, op.MaxShift, srcA, srcB, srcC, op.Dest, op.M, left: false, op.WriteCC); + } + + public static void ShlR(EmitterContext context) + { + InstShlR op = context.GetOp<InstShlR>(); + + EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcReg(context, op.SrcB), op.Dest, op.M); + } + + public static void ShlI(EmitterContext context) + { + InstShlI op = context.GetOp<InstShlI>(); + + EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcImm(context, Imm20ToSInt(op.Imm20)), op.Dest, op.M); + } + + public static void ShlC(EmitterContext context) + { + InstShlC op = context.GetOp<InstShlC>(); + + EmitShl(context, GetSrcReg(context, op.SrcA), GetSrcCbuf(context, op.CbufSlot, op.CbufOffset), op.Dest, op.M); + } + + public static void ShrR(EmitterContext context) + { + InstShrR op = context.GetOp<InstShrR>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcReg(context, op.SrcB); + + EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed); + } + + public static void ShrI(EmitterContext context) + { + InstShrI op = context.GetOp<InstShrI>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcImm(context, Imm20ToSInt(op.Imm20)); + + EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed); + } + + public static void ShrC(EmitterContext context) + { + InstShrC op = context.GetOp<InstShrC>(); + + var srcA = GetSrcReg(context, op.SrcA); + var srcB = GetSrcCbuf(context, op.CbufSlot, op.CbufOffset); + + EmitShr(context, srcA, srcB, op.Dest, op.M, op.Brev, op.Signed); + } + + private static void EmitShf( + EmitterContext context, + MaxShift maxShift, + Operand srcA, + Operand srcB, + Operand srcC, + int rd, + bool mask, + bool left, + bool writeCC) + { + bool isLongShift = maxShift == MaxShift.U64 || maxShift == MaxShift.S64; + bool signedShift = maxShift == MaxShift.S64; + int maxShiftConst = isLongShift ? 64 : 32; + + if (mask) + { + srcB = context.BitwiseAnd(srcB, Const(maxShiftConst - 1)); + } + + Operand res; + + if (left) + { + // res = (C << B) | (A >> (32 - B)) + res = context.ShiftLeft(srcC, srcB); + res = context.BitwiseOr(res, context.ShiftRightU32(srcA, context.ISubtract(Const(32), srcB))); + + if (isLongShift) + { + // res = B >= 32 ? A << (B - 32) : res + Operand lowerShift = context.ShiftLeft(srcA, context.ISubtract(srcB, Const(32))); + + Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32)); + res = context.ConditionalSelect(shiftGreaterThan31, lowerShift, res); + } + } + else + { + // res = (A >> B) | (C << (32 - B)) + res = context.ShiftRightU32(srcA, srcB); + res = context.BitwiseOr(res, context.ShiftLeft(srcC, context.ISubtract(Const(32), srcB))); + + if (isLongShift) + { + // res = B >= 32 ? C >> (B - 32) : res + Operand upperShift = signedShift + ? context.ShiftRightS32(srcC, context.ISubtract(srcB, Const(32))) + : context.ShiftRightU32(srcC, context.ISubtract(srcB, Const(32))); + + Operand shiftGreaterThan31 = context.ICompareGreaterOrEqualUnsigned(srcB, Const(32)); + res = context.ConditionalSelect(shiftGreaterThan31, upperShift, res); + } + } + + if (!mask) + { + // Clamped shift value. + Operand isLessThanMax = context.ICompareLessUnsigned(srcB, Const(maxShiftConst)); + + res = context.ConditionalSelect(isLessThanMax, res, Const(0)); + } + + context.Copy(GetDest(rd), res); + + if (writeCC) + { + InstEmitAluHelper.SetZnFlags(context, res, writeCC); + } + + // TODO: X. + } + + private static void EmitShl(EmitterContext context, Operand srcA, Operand srcB, int rd, bool mask) + { + if (mask) + { + srcB = context.BitwiseAnd(srcB, Const(0x1f)); + } + + Operand res = context.ShiftLeft(srcA, srcB); + + if (!mask) + { + // Clamped shift value. + Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32)); + + res = context.ConditionalSelect(isLessThan32, res, Const(0)); + } + + // TODO: X, CC. + + context.Copy(GetDest(rd), res); + } + + private static void EmitShr( + EmitterContext context, + Operand srcA, + Operand srcB, + int rd, + bool mask, + bool bitReverse, + bool isSigned) + { + if (bitReverse) + { + srcA = context.BitfieldReverse(srcA); + } + + if (mask) + { + srcB = context.BitwiseAnd(srcB, Const(0x1f)); + } + + Operand res = isSigned + ? context.ShiftRightS32(srcA, srcB) + : context.ShiftRightU32(srcA, srcB); + + if (!mask) + { + // Clamped shift value. + Operand resShiftBy32; + + if (isSigned) + { + resShiftBy32 = context.ShiftRightS32(srcA, Const(31)); + } + else + { + resShiftBy32 = Const(0); + } + + Operand isLessThan32 = context.ICompareLessUnsigned(srcB, Const(32)); + + res = context.ConditionalSelect(isLessThan32, res, resShiftBy32); + } + + // TODO: X, CC. + + context.Copy(GetDest(rd), res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs new file mode 100644 index 00000000..3d94b893 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitSurface.cs @@ -0,0 +1,796 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void SuatomB(EmitterContext context) + { + InstSuatomB op = context.GetOp<InstSuatomB>(); + + EmitSuatom( + context, + op.Dim, + op.Op, + op.Size, + 0, + op.SrcA, + op.SrcB, + op.SrcC, + op.Dest, + op.Ba, + isBindless: true, + compareAndSwap: false); + } + + public static void Suatom(EmitterContext context) + { + InstSuatom op = context.GetOp<InstSuatom>(); + + EmitSuatom( + context, + op.Dim, + op.Op, + op.Size, + op.TidB, + op.SrcA, + op.SrcB, + 0, + op.Dest, + op.Ba, + isBindless: false, + compareAndSwap: false); + } + + public static void SuatomB2(EmitterContext context) + { + InstSuatomB2 op = context.GetOp<InstSuatomB2>(); + + EmitSuatom( + context, + op.Dim, + op.Op, + op.Size, + 0, + op.SrcA, + op.SrcB, + op.SrcC, + op.Dest, + op.Ba, + isBindless: true, + compareAndSwap: false); + } + + public static void SuatomCasB(EmitterContext context) + { + InstSuatomCasB op = context.GetOp<InstSuatomCasB>(); + + EmitSuatom( + context, + op.Dim, + 0, + op.Size, + 0, + op.SrcA, + op.SrcB, + op.SrcC, + op.Dest, + op.Ba, + isBindless: true, + compareAndSwap: true); + } + + public static void SuatomCas(EmitterContext context) + { + InstSuatomCas op = context.GetOp<InstSuatomCas>(); + + EmitSuatom( + context, + op.Dim, + 0, + op.Size, + op.TidB, + op.SrcA, + op.SrcB, + 0, + op.Dest, + op.Ba, + isBindless: false, + compareAndSwap: true); + } + + public static void SuldDB(EmitterContext context) + { + InstSuldDB op = context.GetOp<InstSuldDB>(); + + EmitSuld(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true); + } + + public static void SuldD(EmitterContext context) + { + InstSuldD op = context.GetOp<InstSuldD>(); + + EmitSuld(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false); + } + + public static void SuldB(EmitterContext context) + { + InstSuldB op = context.GetOp<InstSuldB>(); + + EmitSuld(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true); + } + + public static void Suld(EmitterContext context) + { + InstSuld op = context.GetOp<InstSuld>(); + + EmitSuld(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false); + } + + public static void SuredB(EmitterContext context) + { + InstSuredB op = context.GetOp<InstSuredB>(); + + EmitSured(context, op.Dim, op.Op, op.Size, 0, op.SrcA, op.Dest, op.SrcC, op.Ba, isBindless: true); + } + + public static void Sured(EmitterContext context) + { + InstSured op = context.GetOp<InstSured>(); + + EmitSured(context, op.Dim, op.Op, op.Size, op.TidB, op.SrcA, op.Dest, 0, op.Ba, isBindless: false); + } + + public static void SustDB(EmitterContext context) + { + InstSustDB op = context.GetOp<InstSustDB>(); + + EmitSust(context, op.CacheOp, op.Dim, op.Size, 0, 0, op.SrcA, op.Dest, op.SrcC, useComponents: false, op.Ba, isBindless: true); + } + + public static void SustD(EmitterContext context) + { + InstSustD op = context.GetOp<InstSustD>(); + + EmitSust(context, op.CacheOp, op.Dim, op.Size, op.TidB, 0, op.SrcA, op.Dest, 0, useComponents: false, op.Ba, isBindless: false); + } + + public static void SustB(EmitterContext context) + { + InstSustB op = context.GetOp<InstSustB>(); + + EmitSust(context, op.CacheOp, op.Dim, 0, 0, op.Rgba, op.SrcA, op.Dest, op.SrcC, useComponents: true, false, isBindless: true); + } + + public static void Sust(EmitterContext context) + { + InstSust op = context.GetOp<InstSust>(); + + EmitSust(context, op.CacheOp, op.Dim, 0, op.TidB, op.Rgba, op.SrcA, op.Dest, 0, useComponents: true, false, isBindless: false); + } + + private static void EmitSuatom( + EmitterContext context, + SuDim dimensions, + SuatomOp atomicOp, + SuatomSize size, + int imm, + int srcA, + int srcB, + int srcC, + int dest, + bool byteAddress, + bool isBindless, + bool compareAndSwap) + { + SamplerType type = ConvertSamplerType(dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image atomic sampler type."); + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + Operand destOperand = dest != RegisterConsts.RegisterZeroIndex ? Register(dest, RegisterType.Gpr) : null; + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(context.Copy(GetSrcReg(context, srcC))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + + type |= SamplerType.Array; + } + + if (byteAddress) + { + int xIndex = isBindless ? 1 : 0; + + sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size))); + } + + // TODO: FP and 64-bit formats. + TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64 + ? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm)) + : GetTextureFormat(size); + + if (compareAndSwap) + { + sourcesList.Add(Rb()); + } + + sourcesList.Add(Rb()); + + Operand[] sources = sourcesList.ToArray(); + + TextureFlags flags = compareAndSwap ? TextureFlags.CAS : GetAtomicOpFlags(atomicOp); + + if (isBindless) + { + flags |= TextureFlags.Bindless; + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageAtomic, + type, + format, + flags, + imm, + 0, + new[] { destOperand }, + sources); + + context.Add(operation); + } + + private static void EmitSuld( + EmitterContext context, + CacheOpLd cacheOp, + SuDim dimensions, + SuSize size, + int imm, + SuRgba componentMask, + int srcA, + int srcB, + int srcC, + bool useComponents, + bool byteAddress, + bool isBindless) + { + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + SamplerType type = ConvertSamplerType(dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image store sampler type."); + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + } + + Operand[] sources = sourcesList.ToArray(); + + int handle = imm; + + TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None; + + if (cacheOp == CacheOpLd.Cg) + { + flags |= TextureFlags.Coherent; + } + + if (useComponents) + { + Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (srcB + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageLoad, + type, + flags, + handle, + (int)componentMask, + dests, + sources); + + if (!isBindless) + { + operation.Format = context.Config.GetTextureFormat(handle); + } + + context.Add(operation); + } + else + { + if (byteAddress) + { + int xIndex = isBindless ? 1 : 0; + + sources[xIndex] = context.ShiftRightS32(sources[xIndex], Const(GetComponentSizeInBytesLog2(size))); + } + + int components = GetComponents(size); + int compMask = (1 << components) - 1; + + Operand[] dests = new Operand[components]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (srcB + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(srcB + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageLoad, + type, + GetTextureFormat(size), + flags, + handle, + compMask, + dests, + sources); + + context.Add(operation); + + switch (size) + { + case SuSize.U8: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 8)); break; + case SuSize.U16: context.Copy(dests[0], ZeroExtendTo32(context, dests[0], 16)); break; + case SuSize.S8: context.Copy(dests[0], SignExtendTo32(context, dests[0], 8)); break; + case SuSize.S16: context.Copy(dests[0], SignExtendTo32(context, dests[0], 16)); break; + } + } + } + + private static void EmitSured( + EmitterContext context, + SuDim dimensions, + RedOp atomicOp, + SuatomSize size, + int imm, + int srcA, + int srcB, + int srcC, + bool byteAddress, + bool isBindless) + { + SamplerType type = ConvertSamplerType(dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image reduction sampler type."); + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(context.Copy(GetSrcReg(context, srcC))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + + type |= SamplerType.Array; + } + + if (byteAddress) + { + int xIndex = isBindless ? 1 : 0; + + sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size))); + } + + // TODO: FP and 64-bit formats. + TextureFormat format = size == SuatomSize.Sd32 || size == SuatomSize.Sd64 + ? (isBindless ? TextureFormat.Unknown : context.Config.GetTextureFormatAtomic(imm)) + : GetTextureFormat(size); + + sourcesList.Add(Rb()); + + Operand[] sources = sourcesList.ToArray(); + + TextureFlags flags = GetAtomicOpFlags((SuatomOp)atomicOp); + + if (isBindless) + { + flags |= TextureFlags.Bindless; + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageAtomic, + type, + format, + flags, + imm, + 0, + null, + sources); + + context.Add(operation); + } + + private static void EmitSust( + EmitterContext context, + CacheOpSt cacheOp, + SuDim dimensions, + SuSize size, + int imm, + SuRgba componentMask, + int srcA, + int srcB, + int srcC, + bool useComponents, + bool byteAddress, + bool isBindless) + { + SamplerType type = ConvertSamplerType(dimensions); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid image store sampler type."); + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(context.Copy(Register(srcC, RegisterType.Gpr))); + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(Const(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + if (type.HasFlag(SamplerType.Array)) + { + sourcesList.Add(Ra()); + } + + TextureFormat format = TextureFormat.Unknown; + + if (useComponents) + { + for (int compMask = (int)componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++) + { + if ((compMask & 1) != 0) + { + sourcesList.Add(Rb()); + } + } + + if (!isBindless) + { + format = context.Config.GetTextureFormat(imm); + } + } + else + { + if (byteAddress) + { + int xIndex = isBindless ? 1 : 0; + + sourcesList[xIndex] = context.ShiftRightS32(sourcesList[xIndex], Const(GetComponentSizeInBytesLog2(size))); + } + + int components = GetComponents(size); + + for (int compIndex = 0; compIndex < components; compIndex++) + { + sourcesList.Add(Rb()); + } + + format = GetTextureFormat(size); + } + + Operand[] sources = sourcesList.ToArray(); + + int handle = imm; + + TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None; + + if (cacheOp == CacheOpSt.Cg) + { + flags |= TextureFlags.Coherent; + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.ImageStore, + type, + format, + flags, + handle, + 0, + null, + sources); + + context.Add(operation); + } + + private static int GetComponentSizeInBytesLog2(SuatomSize size) + { + return size switch + { + SuatomSize.U32 => 2, + SuatomSize.S32 => 2, + SuatomSize.U64 => 3, + SuatomSize.F32FtzRn => 2, + SuatomSize.F16x2FtzRn => 2, + SuatomSize.S64 => 3, + SuatomSize.Sd32 => 2, + SuatomSize.Sd64 => 3, + _ => 2 + }; + } + + private static TextureFormat GetTextureFormat(SuatomSize size) + { + return size switch + { + SuatomSize.U32 => TextureFormat.R32Uint, + SuatomSize.S32 => TextureFormat.R32Sint, + SuatomSize.U64 => TextureFormat.R32G32Uint, + SuatomSize.F32FtzRn => TextureFormat.R32Float, + SuatomSize.F16x2FtzRn => TextureFormat.R16G16Float, + SuatomSize.S64 => TextureFormat.R32G32Uint, + SuatomSize.Sd32 => TextureFormat.R32Uint, + SuatomSize.Sd64 => TextureFormat.R32G32Uint, + _ => TextureFormat.R32Uint + }; + } + + private static TextureFlags GetAtomicOpFlags(SuatomOp op) + { + return op switch + { + SuatomOp.Add => TextureFlags.Add, + SuatomOp.Min => TextureFlags.Minimum, + SuatomOp.Max => TextureFlags.Maximum, + SuatomOp.Inc => TextureFlags.Increment, + SuatomOp.Dec => TextureFlags.Decrement, + SuatomOp.And => TextureFlags.BitwiseAnd, + SuatomOp.Or => TextureFlags.BitwiseOr, + SuatomOp.Xor => TextureFlags.BitwiseXor, + SuatomOp.Exch => TextureFlags.Swap, + _ => TextureFlags.Add + }; + } + + private static int GetComponents(SuSize size) + { + return size switch + { + SuSize.B64 => 2, + SuSize.B128 => 4, + SuSize.UB128 => 4, + _ => 1 + }; + } + + private static int GetComponentSizeInBytesLog2(SuSize size) + { + return size switch + { + SuSize.U8 => 0, + SuSize.S8 => 0, + SuSize.U16 => 1, + SuSize.S16 => 1, + SuSize.B32 => 2, + SuSize.B64 => 3, + SuSize.B128 => 4, + SuSize.UB128 => 4, + _ => 2 + }; + } + + private static TextureFormat GetTextureFormat(SuSize size) + { + return size switch + { + SuSize.U8 => TextureFormat.R8Uint, + SuSize.S8 => TextureFormat.R8Sint, + SuSize.U16 => TextureFormat.R16Uint, + SuSize.S16 => TextureFormat.R16Sint, + SuSize.B32 => TextureFormat.R32Uint, + SuSize.B64 => TextureFormat.R32G32Uint, + SuSize.B128 => TextureFormat.R32G32B32A32Uint, + SuSize.UB128 => TextureFormat.R32G32B32A32Uint, + _ => TextureFormat.R32Uint + }; + } + + private static SamplerType ConvertSamplerType(SuDim target) + { + return target switch + { + SuDim._1d => SamplerType.Texture1D, + SuDim._1dBuffer => SamplerType.TextureBuffer, + SuDim._1dArray => SamplerType.Texture1D | SamplerType.Array, + SuDim._2d => SamplerType.Texture2D, + SuDim._2dArray => SamplerType.Texture2D | SamplerType.Array, + SuDim._3d => SamplerType.Texture3D, + _ => SamplerType.None + }; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs new file mode 100644 index 00000000..caa9a775 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs @@ -0,0 +1,1312 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Numerics; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + private static readonly int[,] _maskLut = new int[,] + { + { 0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b1001, 0b1010, 0b1100 }, + { 0b0111, 0b1011, 0b1101, 0b1110, 0b1111, 0b0000, 0b0000, 0b0000 } + }; + + public const bool Sample1DAs2D = true; + + private enum TexsType + { + Texs, + Tlds, + Tld4s + } + + public static void Tex(EmitterContext context) + { + InstTex op = context.GetOp<InstTex>(); + + EmitTex(context, TextureFlags.None, op.Dim, op.Lod, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, false, op.Dc, op.Aoffi); + } + + public static void TexB(EmitterContext context) + { + InstTexB op = context.GetOp<InstTexB>(); + + EmitTex(context, TextureFlags.Bindless, op.Dim, op.Lodb, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, false, op.Dc, op.Aoffib); + } + + public static void Texs(EmitterContext context) + { + InstTexs op = context.GetOp<InstTexs>(); + + EmitTexs(context, TexsType.Texs, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false); + } + + public static void TexsF16(EmitterContext context) + { + InstTexs op = context.GetOp<InstTexs>(); + + EmitTexs(context, TexsType.Texs, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true); + } + + public static void Tld(EmitterContext context) + { + InstTld op = context.GetOp<InstTld>(); + + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + var lod = op.Lod ? Lod.Ll : Lod.Lz; + + EmitTex(context, TextureFlags.IntCoords, op.Dim, lod, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Ms, false, op.Toff); + } + + public static void TldB(EmitterContext context) + { + InstTldB op = context.GetOp<InstTldB>(); + + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + var flags = TextureFlags.IntCoords | TextureFlags.Bindless; + var lod = op.Lod ? Lod.Ll : Lod.Lz; + + EmitTex(context, flags, op.Dim, lod, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Ms, false, op.Toff); + } + + public static void Tlds(EmitterContext context) + { + InstTlds op = context.GetOp<InstTlds>(); + + EmitTexs(context, TexsType.Tlds, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false); + } + + public static void TldsF16(EmitterContext context) + { + InstTlds op = context.GetOp<InstTlds>(); + + EmitTexs(context, TexsType.Tlds, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true); + } + + public static void Tld4(EmitterContext context) + { + InstTld4 op = context.GetOp<InstTld4>(); + + EmitTld4(context, op.Dim, op.TexComp, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, op.Dc, isBindless: false); + } + + public static void Tld4B(EmitterContext context) + { + InstTld4B op = context.GetOp<InstTld4B>(); + + EmitTld4(context, op.Dim, op.TexComp, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, op.Dc, isBindless: true); + } + + public static void Tld4s(EmitterContext context) + { + InstTld4s op = context.GetOp<InstTld4s>(); + + EmitTexs(context, TexsType.Tld4s, op.TidB, 4, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: false); + } + + public static void Tld4sF16(EmitterContext context) + { + InstTld4s op = context.GetOp<InstTld4s>(); + + EmitTexs(context, TexsType.Tld4s, op.TidB, 4, op.SrcA, op.SrcB, op.Dest, op.Dest2, isF16: true); + } + + public static void Tmml(EmitterContext context) + { + InstTmml op = context.GetOp<InstTmml>(); + + EmitTmml(context, op.Dim, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, isBindless: false); + } + + public static void TmmlB(EmitterContext context) + { + InstTmmlB op = context.GetOp<InstTmmlB>(); + + EmitTmml(context, op.Dim, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, isBindless: true); + } + + public static void Txd(EmitterContext context) + { + InstTxd op = context.GetOp<InstTxd>(); + + EmitTxd(context, op.Dim, op.TidB, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, isBindless: false); + } + + public static void TxdB(EmitterContext context) + { + InstTxdB op = context.GetOp<InstTxdB>(); + + EmitTxd(context, op.Dim, 0, op.WMask, op.SrcA, op.SrcB, op.Dest, op.Toff, isBindless: true); + } + + public static void Txq(EmitterContext context) + { + InstTxq op = context.GetOp<InstTxq>(); + + EmitTxq(context, op.TexQuery, op.TidB, op.WMask, op.SrcA, op.Dest, isBindless: false); + } + + public static void TxqB(EmitterContext context) + { + InstTxqB op = context.GetOp<InstTxqB>(); + + EmitTxq(context, op.TexQuery, 0, op.WMask, op.SrcA, op.Dest, isBindless: true); + } + + private static void EmitTex( + EmitterContext context, + TextureFlags flags, + TexDim dimensions, + Lod lodMode, + int imm, + int componentMask, + int raIndex, + int rbIndex, + int rdIndex, + bool isMultisample, + bool hasDepthCompare, + bool hasOffset) + { + if (rdIndex == RegisterConsts.RegisterZeroIndex) + { + return; + } + + Operand Ra() + { + if (raIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(raIndex++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (rbIndex > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(rbIndex++, RegisterType.Gpr)); + } + + SamplerType type = ConvertSamplerType(dimensions); + + bool isArray = type.HasFlag(SamplerType.Array); + bool isBindless = flags.HasFlag(TextureFlags.Bindless); + + Operand arrayIndex = isArray ? Ra() : null; + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(Rb()); + } + + bool hasLod = lodMode > Lod.Lz; + + if (type == SamplerType.Texture1D && (flags & ~TextureFlags.Bindless) == TextureFlags.IntCoords && !( + hasLod || + hasDepthCompare || + hasOffset || + isArray || + isMultisample)) + { + // For bindless, we don't have any way to know the texture type, + // so we assume it's texture buffer when the sampler type is 1D, since that's more common. + bool isTypeBuffer = isBindless || context.Config.GpuAccessor.QuerySamplerType(imm) == SamplerType.TextureBuffer; + if (isTypeBuffer) + { + type = SamplerType.TextureBuffer; + } + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + bool is1DTo2D = false; + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(ConstF(0)); + + type = SamplerType.Texture2D | (type & SamplerType.Array); + is1DTo2D = true; + } + + if (isArray) + { + sourcesList.Add(arrayIndex); + } + + Operand lodValue = hasLod ? Rb() : ConstF(0); + + Operand packedOffs = hasOffset ? Rb() : null; + + if (hasDepthCompare) + { + sourcesList.Add(Rb()); + + type |= SamplerType.Shadow; + } + + if ((lodMode == Lod.Lz || + lodMode == Lod.Ll || + lodMode == Lod.Lla) && !isMultisample && type != SamplerType.TextureBuffer) + { + sourcesList.Add(lodValue); + + flags |= TextureFlags.LodLevel; + } + + if (hasOffset) + { + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(context.BitfieldExtractS32(packedOffs, Const(index * 4), Const(4))); + } + + if (is1DTo2D) + { + sourcesList.Add(Const(0)); + } + + flags |= TextureFlags.Offset; + } + + if (lodMode == Lod.Lb || lodMode == Lod.Lba) + { + sourcesList.Add(lodValue); + + flags |= TextureFlags.LodBias; + } + + if (isMultisample) + { + sourcesList.Add(Rb()); + + type |= SamplerType.Multisample; + } + + Operand[] sources = sourcesList.ToArray(); + Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (rdIndex + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(rdIndex + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + int handle = !isBindless ? imm : 0; + + TextureOperation operation = context.CreateTextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + componentMask, + dests, + sources); + + context.Add(operation); + } + + private static void EmitTexs( + EmitterContext context, + TexsType texsType, + int imm, + int writeMask, + int srcA, + int srcB, + int dest, + int dest2, + bool isF16) + { + if (dest == RegisterConsts.RegisterZeroIndex && dest2 == RegisterConsts.RegisterZeroIndex) + { + return; + } + + List<Operand> sourcesList = new List<Operand>(); + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + void AddTextureOffset(int coordsCount, int stride, int size) + { + Operand packedOffs = Rb(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(context.BitfieldExtractS32(packedOffs, Const(index * stride), Const(size))); + } + } + + SamplerType type; + TextureFlags flags; + + if (texsType == TexsType.Texs) + { + var texsOp = context.GetOp<InstTexs>(); + + type = ConvertSamplerType(texsOp.Target); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid texture sampler type."); + return; + } + + flags = ConvertTextureFlags(texsOp.Target); + + // We don't need to handle 1D -> Buffer conversions here as + // only texture sample with integer coordinates can ever use buffer targets. + + if ((type & SamplerType.Array) != 0) + { + Operand arrayIndex = Ra(); + + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + + sourcesList.Add(arrayIndex); + + if ((type & SamplerType.Shadow) != 0) + { + sourcesList.Add(Rb()); + } + + if ((flags & TextureFlags.LodLevel) != 0) + { + sourcesList.Add(ConstF(0)); + } + } + else + { + switch (texsOp.Target) + { + case TexsTarget.Texture1DLodZero: + sourcesList.Add(Ra()); + + if (Sample1DAs2D) + { + sourcesList.Add(ConstF(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + sourcesList.Add(ConstF(0)); + break; + + case TexsTarget.Texture2D: + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + break; + + case TexsTarget.Texture2DLodZero: + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(ConstF(0)); + break; + + case TexsTarget.Texture2DLodLevel: + case TexsTarget.Texture2DDepthCompare: + case TexsTarget.Texture3D: + case TexsTarget.TextureCube: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + break; + + case TexsTarget.Texture2DLodZeroDepthCompare: + case TexsTarget.Texture3DLodZero: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(ConstF(0)); + break; + + case TexsTarget.Texture2DLodLevelDepthCompare: + case TexsTarget.TextureCubeLodLevel: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(Rb()); + break; + } + } + } + else if (texsType == TexsType.Tlds) + { + var tldsOp = context.GetOp<InstTlds>(); + + type = ConvertSamplerType(tldsOp.Target); + + if (type == SamplerType.None) + { + context.Config.GpuAccessor.Log("Invalid texel fetch sampler type."); + return; + } + + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + flags = ConvertTextureFlags(tldsOp.Target) | TextureFlags.IntCoords; + + if (tldsOp.Target == TldsTarget.Texture1DLodZero && + context.Config.GpuAccessor.QuerySamplerType(tldsOp.TidB) == SamplerType.TextureBuffer) + { + type = SamplerType.TextureBuffer; + flags &= ~TextureFlags.LodLevel; + } + + switch (tldsOp.Target) + { + case TldsTarget.Texture1DLodZero: + sourcesList.Add(Ra()); + + if (type != SamplerType.TextureBuffer) + { + if (Sample1DAs2D) + { + sourcesList.Add(ConstF(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + sourcesList.Add(ConstF(0)); + } + break; + + case TldsTarget.Texture1DLodLevel: + sourcesList.Add(Ra()); + + if (Sample1DAs2D) + { + sourcesList.Add(ConstF(0)); + + type &= ~SamplerType.Mask; + type |= SamplerType.Texture2D; + } + + sourcesList.Add(Rb()); + break; + + case TldsTarget.Texture2DLodZero: + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(Const(0)); + break; + + case TldsTarget.Texture2DLodZeroOffset: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Const(0)); + break; + + case TldsTarget.Texture2DLodZeroMultisample: + case TldsTarget.Texture2DLodLevel: + case TldsTarget.Texture2DLodLevelOffset: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + break; + + case TldsTarget.Texture3DLodZero: + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + sourcesList.Add(Const(0)); + break; + + case TldsTarget.Texture2DArrayLodZero: + sourcesList.Add(Rb()); + sourcesList.Add(Rb()); + sourcesList.Add(Ra()); + sourcesList.Add(Const(0)); + break; + } + + if ((flags & TextureFlags.Offset) != 0) + { + AddTextureOffset(type.GetDimensions(), 4, 4); + } + } + else if (texsType == TexsType.Tld4s) + { + var tld4sOp = context.GetOp<InstTld4s>(); + + if (!(tld4sOp.Dc || tld4sOp.Aoffi)) + { + sourcesList.Add(Ra()); + sourcesList.Add(Rb()); + } + else + { + sourcesList.Add(Ra()); + sourcesList.Add(Ra()); + } + + type = SamplerType.Texture2D; + flags = TextureFlags.Gather; + + if (tld4sOp.Dc) + { + sourcesList.Add(Rb()); + + type |= SamplerType.Shadow; + } + + if (tld4sOp.Aoffi) + { + AddTextureOffset(type.GetDimensions(), 8, 6); + + flags |= TextureFlags.Offset; + } + + sourcesList.Add(Const((int)tld4sOp.TexComp)); + } + else + { + throw new ArgumentException($"Invalid TEXS type \"{texsType}\"."); + } + + Operand[] sources = sourcesList.ToArray(); + + Operand[] rd0 = new Operand[2] { ConstF(0), ConstF(0) }; + Operand[] rd1 = new Operand[2] { ConstF(0), ConstF(0) }; + + int handle = imm; + int componentMask = _maskLut[dest2 == RegisterConsts.RegisterZeroIndex ? 0 : 1, writeMask]; + + int componentsCount = BitOperations.PopCount((uint)componentMask); + + Operand[] dests = new Operand[componentsCount]; + + int outputIndex = 0; + + for (int i = 0; i < componentsCount; i++) + { + int high = i >> 1; + int low = i & 1; + + if (isF16) + { + dests[outputIndex++] = high != 0 + ? (rd1[low] = Local()) + : (rd0[low] = Local()); + } + else + { + int rdIndex = high != 0 ? dest2 : dest; + + if (rdIndex < RegisterConsts.RegisterZeroIndex) + { + rdIndex += low; + } + + dests[outputIndex++] = Register(rdIndex, RegisterType.Gpr); + } + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + TextureOperation operation = context.CreateTextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + componentMask, + dests, + sources); + + context.Add(operation); + + if (isF16) + { + context.Copy(Register(dest, RegisterType.Gpr), context.PackHalf2x16(rd0[0], rd0[1])); + context.Copy(Register(dest2, RegisterType.Gpr), context.PackHalf2x16(rd1[0], rd1[1])); + } + } + + private static void EmitTld4( + EmitterContext context, + TexDim dimensions, + TexComp component, + int imm, + int componentMask, + int srcA, + int srcB, + int dest, + TexOffset offset, + bool hasDepthCompare, + bool isBindless) + { + if (dest == RegisterConsts.RegisterZeroIndex) + { + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + bool isArray = + dimensions == TexDim.Array1d || + dimensions == TexDim.Array2d || + dimensions == TexDim.Array3d || + dimensions == TexDim.ArrayCube; + + Operand arrayIndex = isArray ? Ra() : null; + + List<Operand> sourcesList = new List<Operand>(); + + SamplerType type = ConvertSamplerType(dimensions); + TextureFlags flags = TextureFlags.Gather; + + if (isBindless) + { + sourcesList.Add(Rb()); + + flags |= TextureFlags.Bindless; + } + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + bool is1DTo2D = Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D; + + if (is1DTo2D) + { + sourcesList.Add(ConstF(0)); + + type = SamplerType.Texture2D | (type & SamplerType.Array); + } + + if (isArray) + { + sourcesList.Add(arrayIndex); + } + + Operand[] packedOffs = new Operand[2]; + + bool hasAnyOffset = offset == TexOffset.Aoffi || offset == TexOffset.Ptp; + + packedOffs[0] = hasAnyOffset ? Rb() : null; + packedOffs[1] = offset == TexOffset.Ptp ? Rb() : null; + + if (hasDepthCompare) + { + sourcesList.Add(Rb()); + + type |= SamplerType.Shadow; + } + + if (hasAnyOffset) + { + int offsetTexelsCount = offset == TexOffset.Ptp ? 4 : 1; + + for (int index = 0; index < coordsCount * offsetTexelsCount; index++) + { + Operand packed = packedOffs[(index >> 2) & 1]; + + sourcesList.Add(context.BitfieldExtractS32(packed, Const((index & 3) * 8), Const(6))); + } + + if (is1DTo2D) + { + for (int index = 0; index < offsetTexelsCount; index++) + { + sourcesList.Add(Const(0)); + } + } + + flags |= offset == TexOffset.Ptp ? TextureFlags.Offsets : TextureFlags.Offset; + } + + sourcesList.Add(Const((int)component)); + + Operand[] sources = sourcesList.ToArray(); + Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (dest + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(dest + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + int handle = imm; + + TextureOperation operation = context.CreateTextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + componentMask, + dests, + sources); + + context.Add(operation); + } + + private static void EmitTmml( + EmitterContext context, + TexDim dimensions, + int imm, + int componentMask, + int srcA, + int srcB, + int dest, + bool isBindless) + { + if (dest == RegisterConsts.RegisterZeroIndex) + { + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + TextureFlags flags = TextureFlags.None; + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(Rb()); + + flags |= TextureFlags.Bindless; + } + + SamplerType type = ConvertSamplerType(dimensions); + + int coordsCount = type.GetDimensions(); + + bool isArray = + dimensions == TexDim.Array1d || + dimensions == TexDim.Array2d || + dimensions == TexDim.Array3d || + dimensions == TexDim.ArrayCube; + + Operand arrayIndex = isArray ? Ra() : null; + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + if (Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D) + { + sourcesList.Add(ConstF(0)); + + type = SamplerType.Texture2D | (type & SamplerType.Array); + } + + if (isArray) + { + sourcesList.Add(arrayIndex); + } + + Operand[] sources = sourcesList.ToArray(); + + Operand GetDest() + { + if (dest >= RegisterConsts.RegisterZeroIndex) + { + return null; + } + + return Register(dest++, RegisterType.Gpr); + } + + int handle = imm; + + for (int compMask = componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++) + { + if ((compMask & 1) != 0) + { + Operand destOperand = GetDest(); + + if (destOperand == null) + { + break; + } + + // Components z and w aren't standard, we return 0 in this case and add a comment. + if (compIndex >= 2) + { + context.Add(new CommentNode("Unsupported component z or w found")); + context.Copy(destOperand, Const(0)); + } + else + { + Operand tempDest = Local(); + + TextureOperation operation = context.CreateTextureOperation( + Instruction.Lod, + type, + flags, + handle, + compIndex ^ 1, // The instruction component order is the inverse of GLSL's. + new[] { tempDest }, + sources); + + context.Add(operation); + + tempDest = context.FPMultiply(tempDest, ConstF(256.0f)); + + Operand fixedPointValue = context.FP32ConvertToS32(tempDest); + + context.Copy(destOperand, fixedPointValue); + } + } + } + } + + private static void EmitTxd( + EmitterContext context, + TexDim dimensions, + int imm, + int componentMask, + int srcA, + int srcB, + int dest, + bool hasOffset, + bool isBindless) + { + if (dest == RegisterConsts.RegisterZeroIndex) + { + return; + } + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + Operand Rb() + { + if (srcB > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcB++, RegisterType.Gpr)); + } + + TextureFlags flags = TextureFlags.Derivatives; + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(Ra()); + + flags |= TextureFlags.Bindless; + } + + SamplerType type = ConvertSamplerType(dimensions); + + int coordsCount = type.GetDimensions(); + + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(Ra()); + } + + bool is1DTo2D = Sample1DAs2D && (type & SamplerType.Mask) == SamplerType.Texture1D; + + if (is1DTo2D) + { + sourcesList.Add(ConstF(0)); + + type = SamplerType.Texture2D | (type & SamplerType.Array); + } + + Operand packedParams = Ra(); + + bool isArray = + dimensions == TexDim.Array1d || + dimensions == TexDim.Array2d || + dimensions == TexDim.Array3d || + dimensions == TexDim.ArrayCube; + + if (isArray) + { + sourcesList.Add(context.BitwiseAnd(packedParams, Const(0xffff))); + } + + // Derivatives (X and Y). + for (int dIndex = 0; dIndex < 2 * coordsCount; dIndex++) + { + sourcesList.Add(Rb()); + + if (is1DTo2D) + { + sourcesList.Add(ConstF(0)); + } + } + + if (hasOffset) + { + for (int index = 0; index < coordsCount; index++) + { + sourcesList.Add(context.BitfieldExtractS32(packedParams, Const(16 + index * 4), Const(4))); + } + + if (is1DTo2D) + { + sourcesList.Add(Const(0)); + } + + flags |= TextureFlags.Offset; + } + + Operand[] sources = sourcesList.ToArray(); + Operand[] dests = new Operand[BitOperations.PopCount((uint)componentMask)]; + + int outputIndex = 0; + + for (int i = 0; i < dests.Length; i++) + { + if (dest + i >= RegisterConsts.RegisterZeroIndex) + { + break; + } + + dests[outputIndex++] = Register(dest + i, RegisterType.Gpr); + } + + if (outputIndex != dests.Length) + { + Array.Resize(ref dests, outputIndex); + } + + int handle = imm; + + TextureOperation operation = context.CreateTextureOperation( + Instruction.TextureSample, + type, + flags, + handle, + componentMask, + dests, + sources); + + context.Add(operation); + } + + private static void EmitTxq( + EmitterContext context, + TexQuery query, + int imm, + int componentMask, + int srcA, + int dest, + bool isBindless) + { + if (dest == RegisterConsts.RegisterZeroIndex) + { + return; + } + + context.Config.SetUsedFeature(FeatureFlags.IntegerSampling); + + // TODO: Validate and use query. + Instruction inst = Instruction.TextureSize; + TextureFlags flags = isBindless ? TextureFlags.Bindless : TextureFlags.None; + + Operand Ra() + { + if (srcA > RegisterConsts.RegisterZeroIndex) + { + return Const(0); + } + + return context.Copy(Register(srcA++, RegisterType.Gpr)); + } + + List<Operand> sourcesList = new List<Operand>(); + + if (isBindless) + { + sourcesList.Add(Ra()); + } + + sourcesList.Add(Ra()); + + Operand[] sources = sourcesList.ToArray(); + + Operand GetDest() + { + if (dest >= RegisterConsts.RegisterZeroIndex) + { + return null; + } + + return Register(dest++, RegisterType.Gpr); + } + + SamplerType type; + + if (isBindless) + { + type = (componentMask & 4) != 0 ? SamplerType.Texture3D : SamplerType.Texture2D; + } + else + { + type = context.Config.GpuAccessor.QuerySamplerType(imm); + } + + for (int compMask = componentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++) + { + if ((compMask & 1) != 0) + { + Operand destOperand = GetDest(); + + if (destOperand == null) + { + break; + } + + TextureOperation operation = context.CreateTextureOperation( + inst, + type, + flags, + imm, + compIndex, + new[] { destOperand }, + sources); + + context.Add(operation); + } + } + } + + private static SamplerType ConvertSamplerType(TexDim dimensions) + { + return dimensions switch + { + TexDim._1d => SamplerType.Texture1D, + TexDim.Array1d => SamplerType.Texture1D | SamplerType.Array, + TexDim._2d => SamplerType.Texture2D, + TexDim.Array2d => SamplerType.Texture2D | SamplerType.Array, + TexDim._3d => SamplerType.Texture3D, + TexDim.Array3d => SamplerType.Texture3D | SamplerType.Array, + TexDim.Cube => SamplerType.TextureCube, + TexDim.ArrayCube => SamplerType.TextureCube | SamplerType.Array, + _ => throw new ArgumentException($"Invalid texture dimensions \"{dimensions}\".") + }; + } + + private static SamplerType ConvertSamplerType(TexsTarget type) + { + switch (type) + { + case TexsTarget.Texture1DLodZero: + return SamplerType.Texture1D; + + case TexsTarget.Texture2D: + case TexsTarget.Texture2DLodZero: + case TexsTarget.Texture2DLodLevel: + return SamplerType.Texture2D; + + case TexsTarget.Texture2DDepthCompare: + case TexsTarget.Texture2DLodLevelDepthCompare: + case TexsTarget.Texture2DLodZeroDepthCompare: + return SamplerType.Texture2D | SamplerType.Shadow; + + case TexsTarget.Texture2DArray: + case TexsTarget.Texture2DArrayLodZero: + return SamplerType.Texture2D | SamplerType.Array; + + case TexsTarget.Texture2DArrayLodZeroDepthCompare: + return SamplerType.Texture2D | SamplerType.Array | SamplerType.Shadow; + + case TexsTarget.Texture3D: + case TexsTarget.Texture3DLodZero: + return SamplerType.Texture3D; + + case TexsTarget.TextureCube: + case TexsTarget.TextureCubeLodLevel: + return SamplerType.TextureCube; + } + + return SamplerType.None; + } + + private static SamplerType ConvertSamplerType(TldsTarget type) + { + switch (type) + { + case TldsTarget.Texture1DLodZero: + case TldsTarget.Texture1DLodLevel: + return SamplerType.Texture1D; + + case TldsTarget.Texture2DLodZero: + case TldsTarget.Texture2DLodZeroOffset: + case TldsTarget.Texture2DLodLevel: + case TldsTarget.Texture2DLodLevelOffset: + return SamplerType.Texture2D; + + case TldsTarget.Texture2DLodZeroMultisample: + return SamplerType.Texture2D | SamplerType.Multisample; + + case TldsTarget.Texture3DLodZero: + return SamplerType.Texture3D; + + case TldsTarget.Texture2DArrayLodZero: + return SamplerType.Texture2D | SamplerType.Array; + } + + return SamplerType.None; + } + + private static TextureFlags ConvertTextureFlags(TexsTarget type) + { + switch (type) + { + case TexsTarget.Texture1DLodZero: + case TexsTarget.Texture2DLodZero: + case TexsTarget.Texture2DLodLevel: + case TexsTarget.Texture2DLodLevelDepthCompare: + case TexsTarget.Texture2DLodZeroDepthCompare: + case TexsTarget.Texture2DArrayLodZero: + case TexsTarget.Texture2DArrayLodZeroDepthCompare: + case TexsTarget.Texture3DLodZero: + case TexsTarget.TextureCubeLodLevel: + return TextureFlags.LodLevel; + + case TexsTarget.Texture2D: + case TexsTarget.Texture2DDepthCompare: + case TexsTarget.Texture2DArray: + case TexsTarget.Texture3D: + case TexsTarget.TextureCube: + return TextureFlags.None; + } + + return TextureFlags.None; + } + + private static TextureFlags ConvertTextureFlags(TldsTarget type) + { + switch (type) + { + case TldsTarget.Texture1DLodZero: + case TldsTarget.Texture1DLodLevel: + case TldsTarget.Texture2DLodZero: + case TldsTarget.Texture2DLodLevel: + case TldsTarget.Texture2DLodZeroMultisample: + case TldsTarget.Texture3DLodZero: + case TldsTarget.Texture2DArrayLodZero: + return TextureFlags.LodLevel; + + case TldsTarget.Texture2DLodZeroOffset: + case TldsTarget.Texture2DLodLevelOffset: + return TextureFlags.LodLevel | TextureFlags.Offset; + } + + return TextureFlags.None; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs new file mode 100644 index 00000000..2d84c5bd --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoArithmetic.cs @@ -0,0 +1,118 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Vmad(EmitterContext context) + { + InstVmad op = context.GetOp<InstVmad>(); + + bool aSigned = (op.ASelect & VectorSelect.S8B0) != 0; + bool bSigned = (op.BSelect & VectorSelect.S8B0) != 0; + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcC = context.INegate(GetSrcReg(context, op.SrcC), op.AvgMode == AvgMode.NegB); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if (bSigned) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand productLow = context.IMultiply(srcA, srcB); + Operand productHigh; + + if (aSigned == bSigned) + { + productHigh = aSigned + ? context.MultiplyHighS32(srcA, srcB) + : context.MultiplyHighU32(srcA, srcB); + } + else + { + Operand temp = aSigned + ? context.IMultiply(srcB, context.ShiftRightS32(srcA, Const(31))) + : context.IMultiply(srcA, context.ShiftRightS32(srcB, Const(31))); + + productHigh = context.IAdd(temp, context.MultiplyHighU32(srcA, srcB)); + } + + if (op.AvgMode == AvgMode.NegA) + { + (productLow, productHigh) = InstEmitAluHelper.NegateLong(context, productLow, productHigh); + } + + Operand resLow = InstEmitAluHelper.AddWithCarry(context, productLow, srcC, out Operand sumCarry); + Operand resHigh = context.IAdd(productHigh, sumCarry); + + if (op.AvgMode == AvgMode.PlusOne) + { + resLow = InstEmitAluHelper.AddWithCarry(context, resLow, Const(1), out Operand poCarry); + resHigh = context.IAdd(resHigh, poCarry); + } + + bool resSigned = op.ASelect == VectorSelect.S32 || + op.BSelect == VectorSelect.S32 || + op.AvgMode == AvgMode.NegB || + op.AvgMode == AvgMode.NegA; + + int shift = op.VideoScale switch + { + VideoScale.Shr7 => 7, + VideoScale.Shr15 => 15, + _ => 0 + }; + + if (shift != 0) + { + // Low = (Low >> Shift) | (High << (32 - Shift)) + // High >>= Shift + resLow = context.ShiftRightU32(resLow, Const(shift)); + resLow = context.BitwiseOr(resLow, context.ShiftLeft(resHigh, Const(32 - shift))); + resHigh = resSigned + ? context.ShiftRightS32(resHigh, Const(shift)) + : context.ShiftRightU32(resHigh, Const(shift)); + } + + Operand res = resLow; + + if (op.Sat) + { + Operand sign = context.ShiftRightS32(resHigh, Const(31)); + + if (resSigned) + { + Operand overflow = context.ICompareNotEqual(resHigh, context.ShiftRightS32(resLow, Const(31))); + Operand clampValue = context.ConditionalSelect(sign, Const(int.MinValue), Const(int.MaxValue)); + res = context.ConditionalSelect(overflow, clampValue, resLow); + } + else + { + Operand overflow = context.ICompareNotEqual(resHigh, Const(0)); + res = context.ConditionalSelect(overflow, context.BitwiseNot(sign), resLow); + } + } + + context.Copy(GetDest(op.Dest), res); + + // TODO: CC. + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs new file mode 100644 index 00000000..67b185ab --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitVideoMinMax.cs @@ -0,0 +1,183 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Vmnmx(EmitterContext context) + { + InstVmnmx op = context.GetOp<InstVmnmx>(); + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcC = GetSrcReg(context, op.SrcC); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if ((op.BSelect & VectorSelect.S8B0) != 0) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand res; + + bool resSigned; + + if ((op.ASelect & VectorSelect.S8B0) != (op.BSelect & VectorSelect.S8B0)) + { + // Signedness is different, but for max, result will always fit a U32, + // since one of the inputs can't be negative, and the result is the one + // with highest value. For min, it will always fit on a S32, since + // one of the input can't be greater than INT_MAX and we want the lowest value. + resSigned = !op.Mn; + + res = op.Mn ? context.IMaximumU32(srcA, srcB) : context.IMinimumS32(srcA, srcB); + + if ((op.ASelect & VectorSelect.S8B0) != 0) + { + Operand isBGtIntMax = context.ICompareLess(srcB, Const(0)); + + res = context.ConditionalSelect(isBGtIntMax, srcB, res); + } + else + { + Operand isAGtIntMax = context.ICompareLess(srcA, Const(0)); + + res = context.ConditionalSelect(isAGtIntMax, srcA, res); + } + } + else + { + // Ra and Rb have the same signedness, so doesn't matter which one we test. + resSigned = (op.ASelect & VectorSelect.S8B0) != 0; + + if (op.Mn) + { + res = resSigned + ? context.IMaximumS32(srcA, srcB) + : context.IMaximumU32(srcA, srcB); + } + else + { + res = resSigned + ? context.IMinimumS32(srcA, srcB) + : context.IMinimumU32(srcA, srcB); + } + } + + if (op.Sat) + { + if (op.DFormat && !resSigned) + { + res = context.IMinimumU32(res, Const(int.MaxValue)); + } + else if (!op.DFormat && resSigned) + { + res = context.IMaximumS32(res, Const(0)); + } + } + + switch (op.VideoOp) + { + case VideoOp.Acc: + res = context.IAdd(res, srcC); + break; + case VideoOp.Max: + res = op.DFormat ? context.IMaximumS32(res, srcC) : context.IMaximumU32(res, srcC); + break; + case VideoOp.Min: + res = op.DFormat ? context.IMinimumS32(res, srcC) : context.IMinimumU32(res, srcC); + break; + case VideoOp.Mrg16h: + res = context.BitfieldInsert(srcC, res, Const(16), Const(16)); + break; + case VideoOp.Mrg16l: + res = context.BitfieldInsert(srcC, res, Const(0), Const(16)); + break; + case VideoOp.Mrg8b0: + res = context.BitfieldInsert(srcC, res, Const(0), Const(8)); + break; + case VideoOp.Mrg8b2: + res = context.BitfieldInsert(srcC, res, Const(16), Const(8)); + break; + } + + context.Copy(GetDest(op.Dest), res); + } + + public static void Vsetp(EmitterContext context) + { + InstVsetp op = context.GetOp<InstVsetp>(); + + Operand srcA = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcA), op.ASelect); + Operand srcB; + + if (op.BVideo) + { + srcB = InstEmitAluHelper.Extend(context, GetSrcReg(context, op.SrcB), op.BSelect); + } + else + { + int imm = op.Imm16; + + if ((op.BSelect & VectorSelect.S8B0) != 0) + { + imm = (imm << 16) >> 16; + } + + srcB = Const(imm); + } + + Operand p0Res; + + bool signedA = (op.ASelect & VectorSelect.S8B0) != 0; + bool signedB = (op.BSelect & VectorSelect.S8B0) != 0; + + if (signedA != signedB) + { + bool a32 = (op.ASelect & ~VectorSelect.S8B0) == VectorSelect.U32; + bool b32 = (op.BSelect & ~VectorSelect.S8B0) == VectorSelect.U32; + + if (!a32 && !b32) + { + // Both values are extended small integer and can always fit in a S32, just do a signed comparison. + p0Res = GetIntComparison(context, op.VComp, srcA, srcB, isSigned: true, extended: false); + } + else + { + // TODO: Mismatching sign case. + p0Res = Const(0); + } + } + else + { + // Sign matches, just do a regular comparison. + p0Res = GetIntComparison(context, op.VComp, srcA, srcB, signedA, extended: false); + } + + Operand p1Res = context.BitwiseNot(p0Res); + + Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + + p0Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p0Res, pred); + p1Res = InstEmitAluHelper.GetPredLogicalOp(context, op.BoolOp, p1Res, pred); + + context.Copy(Register(op.DestPred, RegisterType.Predicate), p0Res); + context.Copy(Register(op.DestPredInv, RegisterType.Predicate), p1Res); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs new file mode 100644 index 00000000..3c833613 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitWarp.cs @@ -0,0 +1,84 @@ +using Ryujinx.Graphics.Shader.Decoders; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.Instructions.InstEmitHelper; +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static partial class InstEmit + { + public static void Fswzadd(EmitterContext context) + { + InstFswzadd op = context.GetOp<InstFswzadd>(); + + Operand srcA = GetSrcReg(context, op.SrcA); + Operand srcB = GetSrcReg(context, op.SrcB); + Operand dest = GetDest(op.Dest); + + context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, op.PnWord)); + + InstEmitAluHelper.SetFPZnFlags(context, dest, op.WriteCC); + } + + public static void Shfl(EmitterContext context) + { + InstShfl op = context.GetOp<InstShfl>(); + + Operand pred = Register(op.DestPred, RegisterType.Predicate); + + Operand srcA = GetSrcReg(context, op.SrcA); + + Operand srcB = op.BFixShfl ? Const(op.SrcBImm) : GetSrcReg(context, op.SrcB); + Operand srcC = op.CFixShfl ? Const(op.SrcCImm) : GetSrcReg(context, op.SrcC); + + (Operand res, Operand valid) = op.ShflMode switch + { + ShflMode.Idx => context.Shuffle(srcA, srcB, srcC), + ShflMode.Up => context.ShuffleUp(srcA, srcB, srcC), + ShflMode.Down => context.ShuffleDown(srcA, srcB, srcC), + ShflMode.Bfly => context.ShuffleXor(srcA, srcB, srcC), + _ => (null, null) + }; + + context.Copy(GetDest(op.Dest), res); + context.Copy(pred, valid); + } + + public static void Vote(EmitterContext context) + { + InstVote op = context.GetOp<InstVote>(); + + Operand pred = GetPredicate(context, op.SrcPred, op.SrcPredInv); + Operand res = null; + + switch (op.VoteMode) + { + case VoteMode.All: + res = context.VoteAll(pred); + break; + case VoteMode.Any: + res = context.VoteAny(pred); + break; + case VoteMode.Eq: + res = context.VoteAllEqual(pred); + break; + } + + if (res != null) + { + context.Copy(Register(op.VpDest, RegisterType.Predicate), res); + } + else + { + context.Config.GpuAccessor.Log($"Invalid vote operation: {op.VoteMode}."); + } + + if (op.Dest != RegisterConsts.RegisterZeroIndex) + { + context.Copy(GetDest(op.Dest), context.Ballot(pred)); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs new file mode 100644 index 00000000..91c740b6 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitter.cs @@ -0,0 +1,6 @@ +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + delegate void InstEmitter(EmitterContext context); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs b/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs new file mode 100644 index 00000000..6217ce53 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Instructions/Lop3Expression.cs @@ -0,0 +1,141 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Instructions +{ + static class Lop3Expression + { + private enum TruthTable : byte + { + False = 0x00, // false + True = 0xff, // true + In = 0xf0, // a + And2 = 0xc0, // a & b + Or2 = 0xfc, // a | b + Xor2 = 0x3c, // a ^ b + And3 = 0x80, // a & b & c + Or3 = 0xfe, // a | b | c + XorAnd = 0x60, // a & (b ^ c) + XorOr = 0xf6, // a | (b ^ c) + OrAnd = 0xe0, // a & (b | c) + AndOr = 0xf8, // a | (b & c) + Onehot = 0x16, // (a & !b & !c) | (!a & b & !c) | (!a & !b & c) - Only one value is true. + Majority = 0xe8, // Popcount(a, b, c) >= 2 + Gamble = 0x81, // (a & b & c) | (!a & !b & !c) - All on or all off + InverseGamble = 0x7e, // Inverse of Gamble + Dot = 0x1a, // a ^ (c | (a & b)) + Mux = 0xca, // a ? b : c + AndXor = 0x78, // a ^ (b & c) + OrXor = 0x1e, // a ^ (b | c) + Xor3 = 0x96, // a ^ b ^ c + } + + public static Operand GetFromTruthTable(EmitterContext context, Operand srcA, Operand srcB, Operand srcC, int imm) + { + for (int i = 0; i < 0x40; i++) + { + TruthTable currImm = (TruthTable)imm; + + Operand x = srcA; + Operand y = srcB; + Operand z = srcC; + + if ((i & 0x01) != 0) + { + (x, y) = (y, x); + currImm = PermuteTable(currImm, 7, 6, 3, 2, 5, 4, 1, 0); + } + + if ((i & 0x02) != 0) + { + (x, z) = (z, x); + currImm = PermuteTable(currImm, 7, 3, 5, 1, 6, 2, 4, 0); + } + + if ((i & 0x04) != 0) + { + (y, z) = (z, y); + currImm = PermuteTable(currImm, 7, 5, 6, 4, 3, 1, 2, 0); + } + + if ((i & 0x08) != 0) + { + x = context.BitwiseNot(x); + currImm = PermuteTable(currImm, 3, 2, 1, 0, 7, 6, 5, 4); + } + + if ((i & 0x10) != 0) + { + y = context.BitwiseNot(y); + currImm = PermuteTable(currImm, 5, 4, 7, 6, 1, 0, 3, 2); + } + + if ((i & 0x20) != 0) + { + z = context.BitwiseNot(z); + currImm = PermuteTable(currImm, 6, 7, 4, 5, 2, 3, 0, 1); + } + + Operand result = GetExpr(currImm, context, x, y, z); + if (result != null) + { + return result; + } + + Operand notResult = GetExpr((TruthTable)((~(int)currImm) & 0xff), context, x, y, z); + if (notResult != null) + { + return context.BitwiseNot(notResult); + } + } + + return null; + } + + private static Operand GetExpr(TruthTable imm, EmitterContext context, Operand x, Operand y, Operand z) + { + return imm switch + { + TruthTable.False => Const(0), + TruthTable.True => Const(-1), + TruthTable.In => x, + TruthTable.And2 => context.BitwiseAnd(x, y), + TruthTable.Or2 => context.BitwiseOr(x, y), + TruthTable.Xor2 => context.BitwiseExclusiveOr(x, y), + TruthTable.And3 => context.BitwiseAnd(x, context.BitwiseAnd(y, z)), + TruthTable.Or3 => context.BitwiseOr(x, context.BitwiseOr(y, z)), + TruthTable.XorAnd => context.BitwiseAnd(x, context.BitwiseExclusiveOr(y, z)), + TruthTable.XorOr => context.BitwiseOr(x, context.BitwiseExclusiveOr(y, z)), + TruthTable.OrAnd => context.BitwiseAnd(x, context.BitwiseOr(y, z)), + TruthTable.AndOr => context.BitwiseOr(x, context.BitwiseAnd(y, z)), + TruthTable.Onehot => context.BitwiseExclusiveOr(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))), + TruthTable.Majority => context.BitwiseAnd(context.BitwiseOr(x, y), context.BitwiseOr(z, context.BitwiseAnd(x, y))), + TruthTable.InverseGamble => context.BitwiseOr(context.BitwiseExclusiveOr(x, y), context.BitwiseExclusiveOr(x, z)), + TruthTable.Dot => context.BitwiseAnd(context.BitwiseExclusiveOr(x, z), context.BitwiseOr(context.BitwiseNot(y), z)), + TruthTable.Mux => context.BitwiseOr(context.BitwiseAnd(x, y), context.BitwiseAnd(context.BitwiseNot(x), z)), + TruthTable.AndXor => context.BitwiseExclusiveOr(x, context.BitwiseAnd(y, z)), + TruthTable.OrXor => context.BitwiseExclusiveOr(x, context.BitwiseOr(y, z)), + TruthTable.Xor3 => context.BitwiseExclusiveOr(x, context.BitwiseExclusiveOr(y, z)), + _ => null + }; + } + + private static TruthTable PermuteTable(TruthTable imm, int bit7, int bit6, int bit5, int bit4, int bit3, int bit2, int bit1, int bit0) + { + int result = 0; + + result |= (((int)imm >> 0) & 1) << bit0; + result |= (((int)imm >> 1) & 1) << bit1; + result |= (((int)imm >> 2) & 1) << bit2; + result |= (((int)imm >> 3) & 1) << bit3; + result |= (((int)imm >> 4) & 1) << bit4; + result |= (((int)imm >> 5) & 1) << bit5; + result |= (((int)imm >> 6) & 1) << bit6; + result |= (((int)imm >> 7) & 1) << bit7; + + return (TruthTable)result; + } + } +}
\ No newline at end of file |
