aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorgdk <gab.dark.100@gmail.com>2019-10-31 00:29:22 -0300
committerThog <thog@protonmail.com>2020-01-09 02:13:00 +0100
commit278a4c317c0b87add67cc9ebc904afe1db23a031 (patch)
tree452b59bf4aebf45b9086cf1f59e006c089a2cba7
parentd786d8d2b924da7cd116a2eb97d738a9f07b4e43 (diff)
Implement BFI, BRK, FLO, FSWZADD, PBK, SHFL and TXD shader instructions, misc. fixes
-rw-r--r--Ryujinx.Common/Utilities/EmbeddedResources.cs139
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs7
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs35
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs9
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs11
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl9
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl9
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl8
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl9
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl7
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs10
-rw-r--r--Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs44
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/Decoder.cs2
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs4
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/OpCodeShuffle.cs40
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs2
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs272
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs2
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs2
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs18
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/ShuffleType.cs10
-rw-r--r--Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs15
-rw-r--r--Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs39
-rw-r--r--Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs16
-rw-r--r--Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs22
-rw-r--r--Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs35
-rw-r--r--Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs127
-rw-r--r--Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs10
-rw-r--r--Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs7
-rw-r--r--Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs17
-rw-r--r--Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj12
-rw-r--r--Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs14
-rw-r--r--Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs10
-rw-r--r--Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs22
-rw-r--r--Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs2
-rw-r--r--Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs40
-rw-r--r--Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs19
-rw-r--r--Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs82
38 files changed, 972 insertions, 166 deletions
diff --git a/Ryujinx.Common/Utilities/EmbeddedResources.cs b/Ryujinx.Common/Utilities/EmbeddedResources.cs
new file mode 100644
index 00000000..93ff70ea
--- /dev/null
+++ b/Ryujinx.Common/Utilities/EmbeddedResources.cs
@@ -0,0 +1,139 @@
+using System;
+using System.IO;
+using System.Reflection;
+using System.Threading.Tasks;
+
+namespace Ryujinx.Common
+{
+ public static class EmbeddedResources
+ {
+ private readonly static Assembly ResourceAssembly;
+
+ static EmbeddedResources()
+ {
+ ResourceAssembly = Assembly.GetAssembly(typeof(EmbeddedResources));
+ }
+
+ public static byte[] Read(string filename)
+ {
+ var (assembly, path) = ResolveManifestPath(filename);
+
+ return Read(assembly, path);
+ }
+
+ public static Task<byte[]> ReadAsync(string filename)
+ {
+ var (assembly, path) = ResolveManifestPath(filename);
+
+ return ReadAsync(assembly, path);
+ }
+
+ public static byte[] Read(Assembly assembly, string filename)
+ {
+ using (var stream = GetStream(assembly, filename))
+ {
+ if (stream == null)
+ return null;
+
+ using (var mem = new MemoryStream())
+ {
+ stream.CopyTo(mem);
+ return mem.ToArray();
+ }
+ }
+ }
+
+ public async static Task<byte[]> ReadAsync(Assembly assembly, string filename)
+ {
+ using (var stream = GetStream(assembly, filename))
+ {
+ if (stream == null)
+ return null;
+
+ using (var mem = new MemoryStream())
+ {
+ await stream.CopyToAsync(mem);
+ return mem.ToArray();
+ }
+ }
+ }
+
+ public static string ReadAllText(string filename)
+ {
+ var (assembly, path) = ResolveManifestPath(filename);
+
+ return ReadAllText(assembly, path);
+ }
+
+ public static Task<string> ReadAllTextAsync(string filename)
+ {
+ var (assembly, path) = ResolveManifestPath(filename);
+
+ return ReadAllTextAsync(assembly, path);
+ }
+
+ public static string ReadAllText(Assembly assembly, string filename)
+ {
+ using (var stream = GetStream(assembly, filename))
+ {
+ if (stream == null)
+ return null;
+
+ using (var reader = new StreamReader(stream))
+ {
+ return reader.ReadToEnd();
+ }
+ }
+ }
+
+ public async static Task<string> ReadAllTextAsync(Assembly assembly, string filename)
+ {
+ using (var stream = GetStream(assembly, filename))
+ {
+ if (stream == null)
+ return null;
+
+ using (var reader = new StreamReader(stream))
+ {
+ return await reader.ReadToEndAsync();
+ }
+ }
+ }
+
+ public static Stream GetStream(string filename)
+ {
+ var (assembly, path) = ResolveManifestPath(filename);
+
+ return GetStream(assembly, filename);
+ }
+
+ public static Stream GetStream(Assembly assembly, string filename)
+ {
+ var namespace_ = assembly.GetName().Name;
+ var manifestUri = namespace_ + "." + filename.Replace('/', '.');
+
+ var stream = assembly.GetManifestResourceStream(manifestUri);
+
+ if (stream == null)
+ return null;
+
+ return stream;
+ }
+
+ private static (Assembly, string) ResolveManifestPath(string filename)
+ {
+ var segments = filename.Split(new[] { '/' }, 2, StringSplitOptions.RemoveEmptyEntries);
+
+ if (segments.Length >= 2)
+ {
+ foreach (var assembly in AppDomain.CurrentDomain.GetAssemblies())
+ {
+ if (assembly.GetName().Name == segments[0])
+ return (assembly, segments[1]);
+ }
+ }
+
+ return (EmbeddedResources.ResourceAssembly, filename);
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs
index abfe55a5..5222fc7d 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs
@@ -5,7 +5,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{
class CodeGenContext
{
- private const string Tab = " ";
+ public const string Tab = " ";
public ShaderConfig Config { get; }
@@ -90,5 +90,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
return indentation;
}
+
+ public string GetTabString()
+ {
+ return Tab;
+ }
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
index ab10d91a..7c67bc13 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs
@@ -1,3 +1,4 @@
+using Ryujinx.Common;
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr;
using Ryujinx.Graphics.Shader.Translation;
@@ -15,6 +16,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static void Declare(CodeGenContext context, StructuredProgramInfo info)
{
context.AppendLine("#version 420 core");
+ context.AppendLine("#extension GL_ARB_shader_ballot : enable");
context.AppendLine("#extension GL_ARB_shader_storage_buffer_object : enable");
if (context.Config.Stage == ShaderStage.Compute)
@@ -131,6 +133,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
$"local_size_z = {localSizeZ}) in;");
context.AppendLine();
}
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl");
+ }
+
+ if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0)
+ {
+ AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl");
+ }
}
public static void DeclareLocals(CodeGenContext context, StructuredProgramInfo info)
@@ -321,6 +348,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
}
}
+ private static void AppendHelperFunction(CodeGenContext context, string filename)
+ {
+ string code = EmbeddedResources.ReadAllText(filename);
+
+ context.AppendLine(code.Replace("\t", CodeGenContext.Tab));
+ context.AppendLine();
+ }
+
private static string GetSamplerTypeName(SamplerType type)
{
string typeName;
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs
index b5407eb8..b1b9afad 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs
@@ -33,6 +33,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
Declarations.DeclareLocals(context, info);
+ // Some games will leave some elements of gl_Position uninitialized,
+ // in those cases, the elements will contain undefined values according
+ // to the spec, but on NVIDIA they seems to be always initialized to (0, 0, 0, 1),
+ // so we do explicit initialization to avoid UB on non-NVIDIA gpus.
+ if (context.Config.Stage == ShaderStage.Vertex)
+ {
+ context.AppendLine("gl_Position = vec4(0.0, 0.0, 0.0, 1.0);");
+ }
+
// Ensure that unused attributes are set, otherwise the downstream
// compiler may eliminate them.
// (Not needed for fragment shader as it is the last stage).
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs
new file mode 100644
index 00000000..f1540fbf
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs
@@ -0,0 +1,11 @@
+namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
+{
+ static class HelperFunctionNames
+ {
+ public static string Shuffle = "Helper_Shuffle";
+ public static string ShuffleDown = "Helper_ShuffleDown";
+ public static string ShuffleUp = "Helper_ShuffleUp";
+ public static string ShuffleXor = "Helper_ShuffleXor";
+ public static string SwizzleAdd = "Helper_SwizzleAdd";
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl
new file mode 100644
index 00000000..380bc581
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl
@@ -0,0 +1,9 @@
+float Helper_Shuffle(float x, uint index, uint mask)
+{
+ uint clamp = mask & 0x1fu;
+ uint segMask = (mask >> 8) & 0x1fu;
+ uint minThreadId = gl_SubGroupInvocationARB & segMask;
+ uint maxThreadId = minThreadId | (clamp & ~segMask);
+ uint srcThreadId = (index & ~segMask) | minThreadId;
+ return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl
new file mode 100644
index 00000000..46750f20
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl
@@ -0,0 +1,9 @@
+float Helper_ShuffleDown(float x, uint index, uint mask)
+{
+ uint clamp = mask & 0x1fu;
+ uint segMask = (mask >> 8) & 0x1fu;
+ uint minThreadId = gl_SubGroupInvocationARB & segMask;
+ uint maxThreadId = minThreadId | (clamp & ~segMask);
+ uint srcThreadId = gl_SubGroupInvocationARB + index;
+ return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl
new file mode 100644
index 00000000..2bc83469
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl
@@ -0,0 +1,8 @@
+float Helper_ShuffleUp(float x, uint index, uint mask)
+{
+ uint clamp = mask & 0x1fu;
+ uint segMask = (mask >> 8) & 0x1fu;
+ uint minThreadId = gl_SubGroupInvocationARB & segMask;
+ uint srcThreadId = gl_SubGroupInvocationARB - index;
+ return (srcThreadId >= minThreadId) ? readInvocationARB(x, srcThreadId) : x;
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl
new file mode 100644
index 00000000..1049e181
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl
@@ -0,0 +1,9 @@
+float Helper_ShuffleXor(float x, uint index, uint mask)
+{
+ uint clamp = mask & 0x1fu;
+ uint segMask = (mask >> 8) & 0x1fu;
+ uint minThreadId = gl_SubGroupInvocationARB & segMask;
+ uint maxThreadId = minThreadId | (clamp & ~segMask);
+ uint srcThreadId = gl_SubGroupInvocationARB ^ index;
+ return (srcThreadId <= maxThreadId) ? readInvocationARB(x, srcThreadId) : x;
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl
new file mode 100644
index 00000000..7df3e57f
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl
@@ -0,0 +1,7 @@
+float Helper_SwizzleAdd(float x, float y, int mask)
+{
+ vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0);
+ vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0);
+ int lutIdx = mask >> int(gl_SubGroupInvocationARB & 3u) * 2;
+ return x * xLut[lutIdx] + y * yLut[lutIdx];
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
index 24b93afb..2aaae71c 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs
@@ -15,6 +15,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.Absolute, InstType.CallUnary, "abs");
Add(Instruction.Add, InstType.OpBinaryCom, "+", 2);
+ Add(Instruction.BitCount, InstType.CallUnary, "bitCount");
Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract");
Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract");
Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert");
@@ -41,11 +42,15 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.ConvertS32ToFP, InstType.CallUnary, "float");
Add(Instruction.ConvertU32ToFP, InstType.CallUnary, "float");
Add(Instruction.Cosine, InstType.CallUnary, "cos");
+ Add(Instruction.Ddx, InstType.CallUnary, "dFdx");
+ Add(Instruction.Ddy, InstType.CallUnary, "dFdy");
Add(Instruction.Discard, InstType.OpNullary, "discard");
Add(Instruction.Divide, InstType.OpBinary, "/", 1);
Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex");
Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive");
Add(Instruction.ExponentB2, InstType.CallUnary, "exp2");
+ Add(Instruction.FindFirstSetS32, InstType.CallUnary, "findMSB");
+ Add(Instruction.FindFirstSetU32, InstType.CallUnary, "findMSB");
Add(Instruction.Floor, InstType.CallUnary, "floor");
Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma");
Add(Instruction.ImageLoad, InstType.Special);
@@ -66,6 +71,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3);
Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3);
Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3);
+ Add(Instruction.Shuffle, InstType.CallTernary, HelperFunctionNames.Shuffle);
+ Add(Instruction.ShuffleDown, InstType.CallTernary, HelperFunctionNames.ShuffleDown);
+ Add(Instruction.ShuffleUp, InstType.CallTernary, HelperFunctionNames.ShuffleUp);
+ Add(Instruction.ShuffleXor, InstType.CallTernary, HelperFunctionNames.ShuffleXor);
Add(Instruction.Maximum, InstType.CallBinary, "max");
Add(Instruction.MaximumU32, InstType.CallBinary, "max");
Add(Instruction.Minimum, InstType.CallBinary, "min");
@@ -80,6 +89,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.StoreLocal, InstType.Special);
Add(Instruction.StoreStorage, InstType.Special);
Add(Instruction.Subtract, InstType.OpBinary, "-", 2);
+ Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd);
Add(Instruction.TextureSample, InstType.Special);
Add(Instruction.TextureSize, InstType.Special);
Add(Instruction.Truncate, InstType.CallUnary, "trunc");
diff --git a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
index f2f6ae0c..913cace1 100644
--- a/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
+++ b/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs
@@ -164,13 +164,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
AstTextureOperation texOp = (AstTextureOperation)operation;
- bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
- bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
- bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
- bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
- bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
- bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
- bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
+ bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
+ bool isGather = (texOp.Flags & TextureFlags.Gather) != 0;
+ bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0;
+ bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0;
+ bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0;
+ bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0;
+ bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
+ bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
bool isArray = (texOp.Type & SamplerType.Array) != 0;
bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0;
@@ -190,6 +191,10 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
texCall += "Gather";
}
+ else if (hasDerivatives)
+ {
+ texCall += "Grad";
+ }
else if (hasLodLevel && !intCoords)
{
texCall += "Lod";
@@ -297,6 +302,31 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Append(AssemblePVector(pCount));
+ string AssembleDerivativesVector(int count)
+ {
+ if (count > 1)
+ {
+ string[] elems = new string[count];
+
+ for (int index = 0; index < count; index++)
+ {
+ elems[index] = Src(VariableType.F32);
+ }
+
+ return "vec" + count + "(" + string.Join(", ", elems) + ")";
+ }
+ else
+ {
+ return Src(VariableType.F32);
+ }
+ }
+
+ if (hasDerivatives)
+ {
+ Append(AssembleDerivativesVector(coordsCount)); // dPdx
+ Append(AssembleDerivativesVector(coordsCount)); // dPdy
+ }
+
if (hasExtraCompareArg)
{
Append(Src(VariableType.F32));
diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
index dd5347d9..4078440b 100644
--- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
@@ -241,7 +241,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
private static bool IsBranch(OpCode opCode)
{
- return (opCode is OpCodeBranch && opCode.Emitter != InstEmit.Ssy) ||
+ return (opCode is OpCodeBranch opBranch && !opBranch.PushTarget) ||
opCode is OpCodeSync ||
opCode is OpCodeExit;
}
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs
index 25941b39..f51c3996 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeBranch.cs
@@ -6,9 +6,13 @@ namespace Ryujinx.Graphics.Shader.Decoders
{
public int Offset { get; }
+ public bool PushTarget { get; protected set; }
+
public OpCodeBranch(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
{
Offset = ((int)(opCode >> 20) << 8) >> 8;
+
+ PushTarget = false;
}
public ulong GetAbsoluteAddress()
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeShuffle.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeShuffle.cs
new file mode 100644
index 00000000..43693cf4
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeShuffle.cs
@@ -0,0 +1,40 @@
+using Ryujinx.Graphics.Shader.Instructions;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ class OpCodeShuffle : OpCode, IOpCodeRd, IOpCodeRa
+ {
+ public Register Rd { get; }
+ public Register Ra { get; }
+ public Register Rb { get; }
+ public Register Rc { get; }
+
+ public int ImmediateB { get; }
+ public int ImmediateC { get; }
+
+ public bool IsBImmediate { get; }
+ public bool IsCImmediate { get; }
+
+ public ShuffleType ShuffleType { get; }
+
+ public Register Predicate48 { get; }
+
+ public OpCodeShuffle(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
+ {
+ Rd = new Register(opCode.Extract(0, 8), RegisterType.Gpr);
+ Ra = new Register(opCode.Extract(8, 8), RegisterType.Gpr);
+ Rb = new Register(opCode.Extract(20, 8), RegisterType.Gpr);
+ Rc = new Register(opCode.Extract(39, 8), RegisterType.Gpr);
+
+ ImmediateB = opCode.Extract(20, 5);
+ ImmediateC = opCode.Extract(34, 13);
+
+ IsBImmediate = opCode.Extract(28);
+ IsCImmediate = opCode.Extract(29);
+
+ ShuffleType = (ShuffleType)opCode.Extract(30, 2);
+
+ Predicate48 = new Register(opCode.Extract(48, 3), RegisterType.Predicate);
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs
index 499c0706..d3831e22 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeSsy.cs
@@ -15,6 +15,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
Predicate = new Register(RegisterConsts.PredicateTrueIndex, RegisterType.Predicate);
InvertPredicate = false;
+
+ PushTarget = true;
}
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
index 5128dae3..7adaff61 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTable.cs
@@ -30,136 +30,148 @@ namespace Ryujinx.Graphics.Shader.Decoders
_opCodes = new TableEntry[1 << EncodingBits];
#region Instructions
- Set("1110111111011x", InstEmit.Ald, typeof(OpCodeAttribute));
- Set("1110111111110x", InstEmit.Ast, typeof(OpCodeAttribute));
- Set("0100110000000x", InstEmit.Bfe, typeof(OpCodeAluCbuf));
- Set("0011100x00000x", InstEmit.Bfe, typeof(OpCodeAluImm));
- Set("0101110000000x", InstEmit.Bfe, typeof(OpCodeAluReg));
- Set("111000100100xx", InstEmit.Bra, typeof(OpCodeBranch));
- Set("0101000010100x", InstEmit.Csetp, typeof(OpCodePsetp));
- Set("111000110000xx", InstEmit.Exit, typeof(OpCodeExit));
- Set("0100110010101x", InstEmit.F2F, typeof(OpCodeFArithCbuf));
- Set("0011100x10101x", InstEmit.F2F, typeof(OpCodeFArithImm));
- Set("0101110010101x", InstEmit.F2F, typeof(OpCodeFArithReg));
- Set("0100110010110x", InstEmit.F2I, typeof(OpCodeFArithCbuf));
- Set("0011100x10110x", InstEmit.F2I, typeof(OpCodeFArithImm));
- Set("0101110010110x", InstEmit.F2I, typeof(OpCodeFArithReg));
- Set("0100110001011x", InstEmit.Fadd, typeof(OpCodeFArithCbuf));
- Set("0011100x01011x", InstEmit.Fadd, typeof(OpCodeFArithImm));
- Set("000010xxxxxxxx", InstEmit.Fadd, typeof(OpCodeFArithImm32));
- Set("0101110001011x", InstEmit.Fadd, typeof(OpCodeFArithReg));
- Set("010010011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithCbuf));
- Set("0011001x1xxxxx", InstEmit.Ffma, typeof(OpCodeFArithImm));
- Set("010100011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithRegCbuf));
- Set("010110011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithReg));
- Set("0100110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithCbuf));
- Set("0011100x01100x", InstEmit.Fmnmx, typeof(OpCodeFArithImm));
- Set("0101110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithReg));
- Set("0100110001101x", InstEmit.Fmul, typeof(OpCodeFArithCbuf));
- Set("0011100x01101x", InstEmit.Fmul, typeof(OpCodeFArithImm));
- Set("00011110xxxxxx", InstEmit.Fmul, typeof(OpCodeFArithImm32));
- Set("0101110001101x", InstEmit.Fmul, typeof(OpCodeFArithReg));
- Set("0100100xxxxxxx", InstEmit.Fset, typeof(OpCodeSetCbuf));
- Set("0011000xxxxxxx", InstEmit.Fset, typeof(OpCodeFsetImm));
- Set("01011000xxxxxx", InstEmit.Fset, typeof(OpCodeSetReg));
- Set("010010111011xx", InstEmit.Fsetp, typeof(OpCodeSetCbuf));
- Set("0011011x1011xx", InstEmit.Fsetp, typeof(OpCodeFsetImm));
- Set("010110111011xx", InstEmit.Fsetp, typeof(OpCodeSetReg));
- Set("0111101x1xxxxx", InstEmit.Hadd2, typeof(OpCodeAluCbuf));
- Set("0111101x0xxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm2x10));
- Set("0010110xxxxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm32));
- Set("0101110100010x", InstEmit.Hadd2, typeof(OpCodeAluReg));
- Set("01110xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaCbuf));
- Set("01110xxx0xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm2x10));
- Set("0010100xxxxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm32));
- Set("0101110100000x", InstEmit.Hfma2, typeof(OpCodeHfmaReg));
- Set("01100xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaRegCbuf));
- Set("0111100x1xxxxx", InstEmit.Hmul2, typeof(OpCodeAluCbuf));
- Set("0111100x0xxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm2x10));
- Set("0010101xxxxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm32));
- Set("0101110100001x", InstEmit.Hmul2, typeof(OpCodeAluReg));
- Set("0111111x1xxxxx", InstEmit.Hsetp2, typeof(OpCodeSetCbuf));
- Set("0111111x0xxxxx", InstEmit.Hsetp2, typeof(OpCodeHsetImm2x10));
- Set("0101110100100x", InstEmit.Hsetp2, typeof(OpCodeSetReg));
- Set("0100110010111x", InstEmit.I2F, typeof(OpCodeAluCbuf));
- Set("0011100x10111x", InstEmit.I2F, typeof(OpCodeAluImm));
- Set("0101110010111x", InstEmit.I2F, typeof(OpCodeAluReg));
- Set("0100110011100x", InstEmit.I2I, typeof(OpCodeAluCbuf));
- Set("0011100x11100x", InstEmit.I2I, typeof(OpCodeAluImm));
- Set("0101110011100x", InstEmit.I2I, typeof(OpCodeAluReg));
- Set("0100110000010x", InstEmit.Iadd, typeof(OpCodeAluCbuf));
- Set("0011100000010x", InstEmit.Iadd, typeof(OpCodeAluImm));
- Set("0001110x0xxxxx", InstEmit.Iadd, typeof(OpCodeAluImm32));
- Set("0101110000010x", InstEmit.Iadd, typeof(OpCodeAluReg));
- Set("010011001100xx", InstEmit.Iadd3, typeof(OpCodeAluCbuf));
- Set("001110001100xx", InstEmit.Iadd3, typeof(OpCodeAluImm));
- Set("010111001100xx", InstEmit.Iadd3, typeof(OpCodeAluReg));
- Set("0100110000100x", InstEmit.Imnmx, typeof(OpCodeAluCbuf));
- Set("0011100x00100x", InstEmit.Imnmx, typeof(OpCodeAluImm));
- Set("0101110000100x", InstEmit.Imnmx, typeof(OpCodeAluReg));
- Set("11100000xxxxxx", InstEmit.Ipa, typeof(OpCodeIpa));
- Set("1110111111010x", InstEmit.Isberd, typeof(OpCodeAlu));
- Set("0100110000011x", InstEmit.Iscadd, typeof(OpCodeAluCbuf));
- Set("0011100x00011x", InstEmit.Iscadd, typeof(OpCodeAluImm));
- Set("000101xxxxxxxx", InstEmit.Iscadd, typeof(OpCodeAluImm32));
- Set("0101110000011x", InstEmit.Iscadd, typeof(OpCodeAluReg));
- Set("010010110101xx", InstEmit.Iset, typeof(OpCodeSetCbuf));
- Set("001101100101xx", InstEmit.Iset, typeof(OpCodeSetImm));
- Set("010110110101xx", InstEmit.Iset, typeof(OpCodeSetReg));
- Set("010010110110xx", InstEmit.Isetp, typeof(OpCodeSetCbuf));
- Set("0011011x0110xx", InstEmit.Isetp, typeof(OpCodeSetImm));
- Set("010110110110xx", InstEmit.Isetp, typeof(OpCodeSetReg));
- Set("111000110011xx", InstEmit.Kil, typeof(OpCodeExit));
- Set("1110111101000x", InstEmit.Ld, typeof(OpCodeMemory));
- Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc));
- Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory));
- Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf));
- Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm));
- Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32));
- Set("0101110001000x", InstEmit.Lop, typeof(OpCodeLopReg));
- Set("0010000xxxxxxx", InstEmit.Lop3, typeof(OpCodeLopCbuf));
- Set("001111xxxxxxxx", InstEmit.Lop3, typeof(OpCodeLopImm));
- Set("0101101111100x", InstEmit.Lop3, typeof(OpCodeLopReg));
- Set("0100110010011x", InstEmit.Mov, typeof(OpCodeAluCbuf));
- Set("0011100x10011x", InstEmit.Mov, typeof(OpCodeAluImm));
- Set("000000010000xx", InstEmit.Mov, typeof(OpCodeAluImm32));
- Set("0101110010011x", InstEmit.Mov, typeof(OpCodeAluReg));
- Set("0101000010000x", InstEmit.Mufu, typeof(OpCodeFArith));
- Set("1111101111100x", InstEmit.Out, typeof(OpCode));
- Set("0101000010010x", InstEmit.Psetp, typeof(OpCodePsetp));
- Set("0100110010010x", InstEmit.Rro, typeof(OpCodeFArithCbuf));
- Set("0011100x10010x", InstEmit.Rro, typeof(OpCodeFArithImm));
- Set("0101110010010x", InstEmit.Rro, typeof(OpCodeFArithReg));
- Set("1111000011001x", InstEmit.S2r, typeof(OpCodeAlu));
- Set("0100110010100x", InstEmit.Sel, typeof(OpCodeAluCbuf));
- Set("0011100x10100x", InstEmit.Sel, typeof(OpCodeAluImm));
- Set("0101110010100x", InstEmit.Sel, typeof(OpCodeAluReg));
- Set("0100110001001x", InstEmit.Shl, typeof(OpCodeAluCbuf));
- Set("0011100x01001x", InstEmit.Shl, typeof(OpCodeAluImm));
- Set("0101110001001x", InstEmit.Shl, typeof(OpCodeAluReg));
- Set("0100110000101x", InstEmit.Shr, typeof(OpCodeAluCbuf));
- Set("0011100x00101x", InstEmit.Shr, typeof(OpCodeAluImm));
- Set("0101110000101x", InstEmit.Shr, typeof(OpCodeAluReg));
- Set("111000101001xx", InstEmit.Ssy, typeof(OpCodeSsy));
- Set("1110111101010x", InstEmit.St, typeof(OpCodeMemory));
- Set("1110111011011x", InstEmit.Stg, typeof(OpCodeMemory));
- Set("11101011001xxx", InstEmit.Sust, typeof(OpCodeImage));
- Set("1111000011111x", InstEmit.Sync, typeof(OpCodeSync));
- Set("110000xxxx111x", InstEmit.Tex, typeof(OpCodeTex));
- Set("1101111010111x", InstEmit.TexB, typeof(OpCodeTexB));
- Set("1101x00xxxxxxx", InstEmit.Texs, typeof(OpCodeTexs));
- Set("1101x01xxxxxxx", InstEmit.Texs, typeof(OpCodeTlds));
- Set("1101x11100xxxx", InstEmit.Texs, typeof(OpCodeTld4s));
- Set("11011100xx111x", InstEmit.Tld, typeof(OpCodeTld));
- Set("11011101xx111x", InstEmit.TldB, typeof(OpCodeTld));
- Set("110010xxxx111x", InstEmit.Tld4, typeof(OpCodeTld4));
- Set("1101111101001x", InstEmit.Txq, typeof(OpCodeTex));
- Set("1101111101010x", InstEmit.TxqB, typeof(OpCodeTex));
- Set("01011111xxxxxx", InstEmit.Vmad, typeof(OpCodeVideo));
- Set("0100111xxxxxxx", InstEmit.Xmad, typeof(OpCodeAluCbuf));
- Set("0011011x00xxxx", InstEmit.Xmad, typeof(OpCodeAluImm));
- Set("010100010xxxxx", InstEmit.Xmad, typeof(OpCodeAluRegCbuf));
- Set("0101101100xxxx", InstEmit.Xmad, typeof(OpCodeAluReg));
+ Set("1110111111011x", InstEmit.Ald, typeof(OpCodeAttribute));
+ Set("1110111111110x", InstEmit.Ast, typeof(OpCodeAttribute));
+ Set("0100110000000x", InstEmit.Bfe, typeof(OpCodeAluCbuf));
+ Set("0011100x00000x", InstEmit.Bfe, typeof(OpCodeAluImm));
+ Set("0101110000000x", InstEmit.Bfe, typeof(OpCodeAluReg));
+ Set("0100101111110x", InstEmit.Bfi, typeof(OpCodeAluCbuf));
+ Set("0011011x11110x", InstEmit.Bfi, typeof(OpCodeAluImm));
+ Set("0101001111110x", InstEmit.Bfi, typeof(OpCodeAluRegCbuf));
+ Set("0101101111110x", InstEmit.Bfi, typeof(OpCodeAluReg));
+ Set("111000100100xx", InstEmit.Bra, typeof(OpCodeBranch));
+ Set("111000110100xx", InstEmit.Brk, typeof(OpCodeSync));
+ Set("0101000010100x", InstEmit.Csetp, typeof(OpCodePsetp));
+ Set("111000110000xx", InstEmit.Exit, typeof(OpCodeExit));
+ Set("0100110010101x", InstEmit.F2F, typeof(OpCodeFArithCbuf));
+ Set("0011100x10101x", InstEmit.F2F, typeof(OpCodeFArithImm));
+ Set("0101110010101x", InstEmit.F2F, typeof(OpCodeFArithReg));
+ Set("0100110010110x", InstEmit.F2I, typeof(OpCodeFArithCbuf));
+ Set("0011100x10110x", InstEmit.F2I, typeof(OpCodeFArithImm));
+ Set("0101110010110x", InstEmit.F2I, typeof(OpCodeFArithReg));
+ Set("0100110001011x", InstEmit.Fadd, typeof(OpCodeFArithCbuf));
+ Set("0011100x01011x", InstEmit.Fadd, typeof(OpCodeFArithImm));
+ Set("000010xxxxxxxx", InstEmit.Fadd, typeof(OpCodeFArithImm32));
+ Set("0101110001011x", InstEmit.Fadd, typeof(OpCodeFArithReg));
+ Set("010010011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithCbuf));
+ Set("0011001x1xxxxx", InstEmit.Ffma, typeof(OpCodeFArithImm));
+ Set("010100011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithRegCbuf));
+ Set("010110011xxxxx", InstEmit.Ffma, typeof(OpCodeFArithReg));
+ Set("0100110000110x", InstEmit.Flo, typeof(OpCodeAluCbuf));
+ Set("0011100x00110x", InstEmit.Flo, typeof(OpCodeAluImm));
+ Set("0101110000110x", InstEmit.Flo, typeof(OpCodeAluReg));
+ Set("0100110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithCbuf));
+ Set("0011100x01100x", InstEmit.Fmnmx, typeof(OpCodeFArithImm));
+ Set("0101110001100x", InstEmit.Fmnmx, typeof(OpCodeFArithReg));
+ Set("0100110001101x", InstEmit.Fmul, typeof(OpCodeFArithCbuf));
+ Set("0011100x01101x", InstEmit.Fmul, typeof(OpCodeFArithImm));
+ Set("00011110xxxxxx", InstEmit.Fmul, typeof(OpCodeFArithImm32));
+ Set("0101110001101x", InstEmit.Fmul, typeof(OpCodeFArithReg));
+ Set("0100100xxxxxxx", InstEmit.Fset, typeof(OpCodeSetCbuf));
+ Set("0011000xxxxxxx", InstEmit.Fset, typeof(OpCodeFsetImm));
+ Set("01011000xxxxxx", InstEmit.Fset, typeof(OpCodeSetReg));
+ Set("010010111011xx", InstEmit.Fsetp, typeof(OpCodeSetCbuf));
+ Set("0011011x1011xx", InstEmit.Fsetp, typeof(OpCodeFsetImm));
+ Set("010110111011xx", InstEmit.Fsetp, typeof(OpCodeSetReg));
+ Set("0101000011111x", InstEmit.Fswzadd, typeof(OpCodeAluReg));
+ Set("0111101x1xxxxx", InstEmit.Hadd2, typeof(OpCodeAluCbuf));
+ Set("0111101x0xxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm2x10));
+ Set("0010110xxxxxxx", InstEmit.Hadd2, typeof(OpCodeAluImm32));
+ Set("0101110100010x", InstEmit.Hadd2, typeof(OpCodeAluReg));
+ Set("01110xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaCbuf));
+ Set("01110xxx0xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm2x10));
+ Set("0010100xxxxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaImm32));
+ Set("0101110100000x", InstEmit.Hfma2, typeof(OpCodeHfmaReg));
+ Set("01100xxx1xxxxx", InstEmit.Hfma2, typeof(OpCodeHfmaRegCbuf));
+ Set("0111100x1xxxxx", InstEmit.Hmul2, typeof(OpCodeAluCbuf));
+ Set("0111100x0xxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm2x10));
+ Set("0010101xxxxxxx", InstEmit.Hmul2, typeof(OpCodeAluImm32));
+ Set("0101110100001x", InstEmit.Hmul2, typeof(OpCodeAluReg));
+ Set("0111111x1xxxxx", InstEmit.Hsetp2, typeof(OpCodeSetCbuf));
+ Set("0111111x0xxxxx", InstEmit.Hsetp2, typeof(OpCodeHsetImm2x10));
+ Set("0101110100100x", InstEmit.Hsetp2, typeof(OpCodeSetReg));
+ Set("0100110010111x", InstEmit.I2F, typeof(OpCodeAluCbuf));
+ Set("0011100x10111x", InstEmit.I2F, typeof(OpCodeAluImm));
+ Set("0101110010111x", InstEmit.I2F, typeof(OpCodeAluReg));
+ Set("0100110011100x", InstEmit.I2I, typeof(OpCodeAluCbuf));
+ Set("0011100x11100x", InstEmit.I2I, typeof(OpCodeAluImm));
+ Set("0101110011100x", InstEmit.I2I, typeof(OpCodeAluReg));
+ Set("0100110000010x", InstEmit.Iadd, typeof(OpCodeAluCbuf));
+ Set("0011100000010x", InstEmit.Iadd, typeof(OpCodeAluImm));
+ Set("0001110x0xxxxx", InstEmit.Iadd, typeof(OpCodeAluImm32));
+ Set("0101110000010x", InstEmit.Iadd, typeof(OpCodeAluReg));
+ Set("010011001100xx", InstEmit.Iadd3, typeof(OpCodeAluCbuf));
+ Set("001110001100xx", InstEmit.Iadd3, typeof(OpCodeAluImm));
+ Set("010111001100xx", InstEmit.Iadd3, typeof(OpCodeAluReg));
+ Set("0100110000100x", InstEmit.Imnmx, typeof(OpCodeAluCbuf));
+ Set("0011100x00100x", InstEmit.Imnmx, typeof(OpCodeAluImm));
+ Set("0101110000100x", InstEmit.Imnmx, typeof(OpCodeAluReg));
+ Set("11100000xxxxxx", InstEmit.Ipa, typeof(OpCodeIpa));
+ Set("1110111111010x", InstEmit.Isberd, typeof(OpCodeAlu));
+ Set("0100110000011x", InstEmit.Iscadd, typeof(OpCodeAluCbuf));
+ Set("0011100x00011x", InstEmit.Iscadd, typeof(OpCodeAluImm));
+ Set("000101xxxxxxxx", InstEmit.Iscadd, typeof(OpCodeAluImm32));
+ Set("0101110000011x", InstEmit.Iscadd, typeof(OpCodeAluReg));
+ Set("010010110101xx", InstEmit.Iset, typeof(OpCodeSetCbuf));
+ Set("001101100101xx", InstEmit.Iset, typeof(OpCodeSetImm));
+ Set("010110110101xx", InstEmit.Iset, typeof(OpCodeSetReg));
+ Set("010010110110xx", InstEmit.Isetp, typeof(OpCodeSetCbuf));
+ Set("0011011x0110xx", InstEmit.Isetp, typeof(OpCodeSetImm));
+ Set("010110110110xx", InstEmit.Isetp, typeof(OpCodeSetReg));
+ Set("111000110011xx", InstEmit.Kil, typeof(OpCodeExit));
+ Set("1110111101000x", InstEmit.Ld, typeof(OpCodeMemory));
+ Set("1110111110010x", InstEmit.Ldc, typeof(OpCodeLdc));
+ Set("1110111011010x", InstEmit.Ldg, typeof(OpCodeMemory));
+ Set("0100110001000x", InstEmit.Lop, typeof(OpCodeLopCbuf));
+ Set("0011100001000x", InstEmit.Lop, typeof(OpCodeLopImm));
+ Set("000001xxxxxxxx", InstEmit.Lop, typeof(OpCodeLopImm32));
+ Set("0101110001000x", InstEmit.Lop, typeof(OpCodeLopReg));
+ Set("0010000xxxxxxx", InstEmit.Lop3, typeof(OpCodeLopCbuf));
+ Set("001111xxxxxxxx", InstEmit.Lop3, typeof(OpCodeLopImm));
+ Set("0101101111100x", InstEmit.Lop3, typeof(OpCodeLopReg));
+ Set("0100110010011x", InstEmit.Mov, typeof(OpCodeAluCbuf));
+ Set("0011100x10011x", InstEmit.Mov, typeof(OpCodeAluImm));
+ Set("000000010000xx", InstEmit.Mov, typeof(OpCodeAluImm32));
+ Set("0101110010011x", InstEmit.Mov, typeof(OpCodeAluReg));
+ Set("0101000010000x", InstEmit.Mufu, typeof(OpCodeFArith));
+ Set("1111101111100x", InstEmit.Out, typeof(OpCode));
+ Set("111000101010xx", InstEmit.Pbk, typeof(OpCodeSsy));
+ Set("0101000010010x", InstEmit.Psetp, typeof(OpCodePsetp));
+ Set("0100110010010x", InstEmit.Rro, typeof(OpCodeFArithCbuf));
+ Set("0011100x10010x", InstEmit.Rro, typeof(OpCodeFArithImm));
+ Set("0101110010010x", InstEmit.Rro, typeof(OpCodeFArithReg));
+ Set("1111000011001x", InstEmit.S2r, typeof(OpCodeAlu));
+ Set("0100110010100x", InstEmit.Sel, typeof(OpCodeAluCbuf));
+ Set("0011100x10100x", InstEmit.Sel, typeof(OpCodeAluImm));
+ Set("0101110010100x", InstEmit.Sel, typeof(OpCodeAluReg));
+ Set("1110111100010x", InstEmit.Shfl, typeof(OpCodeShuffle));
+ Set("0100110001001x", InstEmit.Shl, typeof(OpCodeAluCbuf));
+ Set("0011100x01001x", InstEmit.Shl, typeof(OpCodeAluImm));
+ Set("0101110001001x", InstEmit.Shl, typeof(OpCodeAluReg));
+ Set("0100110000101x", InstEmit.Shr, typeof(OpCodeAluCbuf));
+ Set("0011100x00101x", InstEmit.Shr, typeof(OpCodeAluImm));
+ Set("0101110000101x", InstEmit.Shr, typeof(OpCodeAluReg));
+ Set("111000101001xx", InstEmit.Ssy, typeof(OpCodeSsy));
+ Set("1110111101010x", InstEmit.St, typeof(OpCodeMemory));
+ Set("1110111011011x", InstEmit.Stg, typeof(OpCodeMemory));
+ Set("11101011001xxx", InstEmit.Sust, typeof(OpCodeImage));
+ Set("1111000011111x", InstEmit.Sync, typeof(OpCodeSync));
+ Set("110000xxxx111x", InstEmit.Tex, typeof(OpCodeTex));
+ Set("1101111010111x", InstEmit.TexB, typeof(OpCodeTexB));
+ Set("1101x00xxxxxxx", InstEmit.Texs, typeof(OpCodeTexs));
+ Set("1101x01xxxxxxx", InstEmit.Texs, typeof(OpCodeTlds));
+ Set("11011111x0xxxx", InstEmit.Texs, typeof(OpCodeTld4s));
+ Set("11011100xx111x", InstEmit.Tld, typeof(OpCodeTld));
+ Set("11011101xx111x", InstEmit.TldB, typeof(OpCodeTld));
+ Set("110010xxxx111x", InstEmit.Tld4, typeof(OpCodeTld4));
+ Set("110111100x1110", InstEmit.Txd, typeof(OpCodeTxd));
+ Set("1101111101001x", InstEmit.Txq, typeof(OpCodeTex));
+ Set("1101111101010x", InstEmit.TxqB, typeof(OpCodeTex));
+ Set("01011111xxxxxx", InstEmit.Vmad, typeof(OpCodeVideo));
+ Set("0100111xxxxxxx", InstEmit.Xmad, typeof(OpCodeAluCbuf));
+ Set("0011011x00xxxx", InstEmit.Xmad, typeof(OpCodeAluImm));
+ Set("010100010xxxxx", InstEmit.Xmad, typeof(OpCodeAluRegCbuf));
+ Set("0101101100xxxx", InstEmit.Xmad, typeof(OpCodeAluReg));
#endregion
}
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs
index 1c175e30..543f8d13 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTextureScalar.cs
@@ -39,7 +39,7 @@ namespace Ryujinx.Graphics.Shader.Decoders
protected int RawType;
- public bool IsFp16 { get; }
+ public bool IsFp16 { get; protected set; }
public OpCodeTextureScalar(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
{
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs
index 7e51a9e5..fd3240a0 100644
--- a/Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTld4s.cs
@@ -16,6 +16,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
GatherCompIndex = opCode.Extract(52, 2);
+ IsFp16 = opCode.Extract(55);
+
ComponentMask = Rd1.IsRZ ? 3 : 0xf;
}
}
diff --git a/Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs b/Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs
new file mode 100644
index 00000000..25df1f81
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/Decoders/OpCodeTxd.cs
@@ -0,0 +1,18 @@
+using Ryujinx.Graphics.Shader.Instructions;
+
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ class OpCodeTxd : OpCodeTexture
+ {
+ public bool IsBindless { get; }
+
+ public OpCodeTxd(InstEmitter emitter, ulong address, long opCode) : base(emitter, address, opCode)
+ {
+ HasOffset = opCode.Extract(35);
+
+ IsBindless = opCode.Extract(54);
+
+ LodMode = TextureLodMode.None;
+ }
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/ShuffleType.cs b/Ryujinx.Graphics.Shader/Decoders/ShuffleType.cs
new file mode 100644
index 00000000..2892c8dd
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/Decoders/ShuffleType.cs
@@ -0,0 +1,10 @@
+namespace Ryujinx.Graphics.Shader.Decoders
+{
+ enum ShuffleType
+ {
+ Indexed = 0,
+ Up = 1,
+ Down = 2,
+ Butterfly = 3
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs b/Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs
index 1f51d93c..2f3f4492 100644
--- a/Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/SystemRegister.cs
@@ -2,12 +2,13 @@ namespace Ryujinx.Graphics.Shader.Decoders
{
enum SystemRegister
{
- ThreadId = 0x20,
- ThreadIdX = 0x21,
- ThreadIdY = 0x22,
- ThreadIdZ = 0x23,
- CtaIdX = 0x25,
- CtaIdY = 0x26,
- CtaIdZ = 0x27
+ YDirection = 0x12,
+ ThreadId = 0x20,
+ ThreadIdX = 0x21,
+ ThreadIdY = 0x22,
+ ThreadIdZ = 0x23,
+ CtaIdX = 0x25,
+ CtaIdY = 0x26,
+ CtaIdZ = 0x27
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs
index 5cbb3b73..8d14b0cf 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitAlu.cs
@@ -39,6 +39,23 @@ namespace Ryujinx.Graphics.Shader.Instructions
// TODO: CC, X, corner cases
}
+ public static void Bfi(EmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ Operand srcA = GetSrcA(context);
+ Operand srcB = GetSrcB(context);
+ Operand srcC = GetSrcC(context);
+
+ Operand position = context.BitwiseAnd(srcB, Const(0xff));
+
+ Operand size = context.BitfieldExtractU32(srcB, Const(8), Const(8));
+
+ Operand res = context.BitfieldInsert(srcC, srcA, position, size);
+
+ context.Copy(GetDest(context), res);
+ }
+
public static void Csetp(EmitterContext context)
{
OpCodePsetp op = (OpCodePsetp)context.CurrOp;
@@ -58,6 +75,28 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(Register(op.Predicate0), p1Res);
}
+ public static void Flo(EmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ bool invert = op.RawOpCode.Extract(40);
+ bool countZeros = op.RawOpCode.Extract(41);
+ bool isSigned = op.RawOpCode.Extract(48);
+
+ Operand srcB = context.BitwiseNot(GetSrcB(context), invert);
+
+ Operand res = isSigned
+ ? context.FindFirstSetS32(srcB)
+ : context.FindFirstSetU32(srcB);
+
+ if (countZeros)
+ {
+ res = context.BitwiseExclusiveOr(res, Const(31));
+ }
+
+ context.Copy(GetDest(context), res);
+ }
+
public static void Iadd(EmitterContext context)
{
OpCodeAlu op = (OpCodeAlu)context.CurrOp;
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
index 1a7d4251..4f7072eb 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFArith.cs
@@ -180,6 +180,22 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(Register(op.Predicate0), p1Res);
}
+ public static void Fswzadd(EmitterContext context)
+ {
+ OpCodeAlu op = (OpCodeAlu)context.CurrOp;
+
+ int mask = op.RawOpCode.Extract(28, 8);
+
+ Operand srcA = GetSrcA(context);
+ Operand srcB = GetSrcB(context);
+
+ Operand dest = GetDest(context);
+
+ context.Copy(dest, context.FPSwizzleAdd(srcA, srcB, mask));
+
+ SetFPZnFlags(context, dest, op.SetCondCode);
+ }
+
public static void Hadd2(EmitterContext context)
{
Hadd2Hmul2Impl(context, isAdd: true);
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
index fb76e06a..e17c9d6c 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
@@ -15,6 +15,11 @@ namespace Ryujinx.Graphics.Shader.Instructions
EmitBranch(context, context.CurrBlock.Branch.Address);
}
+ public static void Brk(EmitterContext context)
+ {
+ EmitBrkOrSync(context);
+ }
+
public static void Exit(EmitterContext context)
{
OpCodeExit op = (OpCodeExit)context.CurrOp;
@@ -32,8 +37,23 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Discard();
}
+ public static void Pbk(EmitterContext context)
+ {
+ EmitPbkOrSsy(context);
+ }
+
public static void Ssy(EmitterContext context)
{
+ EmitPbkOrSsy(context);
+ }
+
+ public static void Sync(EmitterContext context)
+ {
+ EmitBrkOrSync(context);
+ }
+
+ private static void EmitPbkOrSsy(EmitterContext context)
+ {
OpCodeSsy op = (OpCodeSsy)context.CurrOp;
foreach (KeyValuePair<OpCodeSync, Operand> kv in op.Syncs)
@@ -48,7 +68,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
}
}
- public static void Sync(EmitterContext context)
+ private static void EmitBrkOrSync(EmitterContext context)
{
OpCodeSync op = (OpCodeSync)context.CurrOp;
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs
index b9bb18d9..f0792245 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitMove.cs
@@ -27,6 +27,9 @@ namespace Ryujinx.Graphics.Shader.Instructions
switch (sysReg)
{
+ // TODO: Use value from Y direction GPU register.
+ case SystemRegister.YDirection: src = ConstF(1); break;
+
case SystemRegister.ThreadId:
{
Operand tidX = Attribute(AttributeConsts.ThreadIdX);
@@ -67,5 +70,37 @@ namespace Ryujinx.Graphics.Shader.Instructions
context.Copy(GetDest(context), res);
}
+
+ public static void Shfl(EmitterContext context)
+ {
+ OpCodeShuffle op = (OpCodeShuffle)context.CurrOp;
+
+ Operand pred = Register(op.Predicate48);
+
+ Operand srcA = GetSrcA(context);
+
+ Operand srcB = op.IsBImmediate ? Const(op.ImmediateB) : Register(op.Rb);
+ Operand srcC = op.IsCImmediate ? Const(op.ImmediateC) : Register(op.Rc);
+
+ Operand res = null;
+
+ switch (op.ShuffleType)
+ {
+ case ShuffleType.Indexed:
+ res = context.Shuffle(srcA, srcB, srcC);
+ break;
+ case ShuffleType.Up:
+ res = context.ShuffleUp(srcA, srcB, srcC);
+ break;
+ case ShuffleType.Down:
+ res = context.ShuffleDown(srcA, srcB, srcC);
+ break;
+ case ShuffleType.Butterfly:
+ res = context.ShuffleXor(srcA, srcB, srcC);
+ break;
+ }
+
+ context.Copy(GetDest(context), res);
+ }
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
index 39672789..2654a05b 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitTexture.cs
@@ -102,22 +102,22 @@ namespace Ryujinx.Graphics.Shader.Instructions
public static void Tex(EmitterContext context)
{
- Tex(context, TextureFlags.None);
+ EmitTextureSample(context, TextureFlags.None);
}
public static void TexB(EmitterContext context)
{
- Tex(context, TextureFlags.Bindless);
+ EmitTextureSample(context, TextureFlags.Bindless);
}
public static void Tld(EmitterContext context)
{
- Tex(context, TextureFlags.IntCoords);
+ EmitTextureSample(context, TextureFlags.IntCoords);
}
public static void TldB(EmitterContext context)
{
- Tex(context, TextureFlags.IntCoords | TextureFlags.Bindless);
+ EmitTextureSample(context, TextureFlags.IntCoords | TextureFlags.Bindless);
}
public static void Texs(EmitterContext context)
@@ -512,17 +512,128 @@ namespace Ryujinx.Graphics.Shader.Instructions
}
}
+ public static void Txd(EmitterContext context)
+ {
+ OpCodeTxd op = (OpCodeTxd)context.CurrOp;
+
+ if (op.Rd.IsRZ)
+ {
+ return;
+ }
+
+ int raIndex = op.Ra.Index;
+ int rbIndex = op.Rb.Index;
+
+ Operand Ra()
+ {
+ if (raIndex > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(raIndex++, RegisterType.Gpr));
+ }
+
+ Operand Rb()
+ {
+ if (rbIndex > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return context.Copy(Register(rbIndex++, RegisterType.Gpr));
+ }
+
+ TextureFlags flags = TextureFlags.Derivatives;
+
+ List<Operand> sourcesList = new List<Operand>();
+
+ if (op.IsBindless)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ SamplerType type = GetSamplerType(op.Dimensions);
+
+ int coordsCount = type.GetDimensions();
+
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(Ra());
+ }
+
+ Operand packedParams = Ra();
+
+ if (op.IsArray)
+ {
+ sourcesList.Add(context.BitwiseAnd(packedParams, Const(0xffff)));
+
+ type |= SamplerType.Array;
+ }
+
+ // Derivatives (X and Y).
+ for (int dIndex = 0; dIndex < 2 * coordsCount; dIndex++)
+ {
+ sourcesList.Add(Rb());
+ }
+
+ if (op.HasOffset)
+ {
+ for (int index = 0; index < coordsCount; index++)
+ {
+ sourcesList.Add(context.BitfieldExtractS32(packedParams, Const(16 + index * 4), Const(4)));
+ }
+
+ flags |= TextureFlags.Offset;
+ }
+
+ Operand[] sources = sourcesList.ToArray();
+
+ int rdIndex = op.Rd.Index;
+
+ Operand GetDest()
+ {
+ if (rdIndex > RegisterConsts.RegisterZeroIndex)
+ {
+ return Const(0);
+ }
+
+ return Register(rdIndex++, RegisterType.Gpr);
+ }
+
+ int handle = !op.IsBindless ? op.Immediate : 0;
+
+ for (int compMask = op.ComponentMask, compIndex = 0; compMask != 0; compMask >>= 1, compIndex++)
+ {
+ if ((compMask & 1) != 0)
+ {
+ Operand dest = GetDest();
+
+ TextureOperation operation = new TextureOperation(
+ Instruction.TextureSample,
+ type,
+ flags,
+ handle,
+ compIndex,
+ dest,
+ sources);
+
+ context.Add(operation);
+ }
+ }
+ }
+
public static void Txq(EmitterContext context)
{
- Txq(context, bindless: false);
+ EmitTextureQuery(context, bindless: false);
}
public static void TxqB(EmitterContext context)
{
- Txq(context, bindless: true);
+ EmitTextureQuery(context, bindless: true);
}
- private static void Txq(EmitterContext context, bool bindless)
+ private static void EmitTextureQuery(EmitterContext context, bool bindless)
{
OpCodeTex op = (OpCodeTex)context.CurrOp;
@@ -597,7 +708,7 @@ namespace Ryujinx.Graphics.Shader.Instructions
}
}
- private static void Tex(EmitterContext context, TextureFlags flags)
+ private static void EmitTextureSample(EmitterContext context, TextureFlags flags)
{
OpCodeTexture op = (OpCodeTexture)context.CurrOp;
diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
index 88918f3f..46c6b57f 100644
--- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
+++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Instruction.cs
@@ -7,6 +7,7 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
{
Absolute = 1,
Add,
+ BitCount,
BitfieldExtractS32,
BitfieldExtractU32,
BitfieldInsert,
@@ -38,11 +39,15 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
ConvertU32ToFP,
Copy,
Cosine,
+ Ddx,
+ Ddy,
Discard,
Divide,
EmitVertex,
EndPrimitive,
ExponentB2,
+ FindFirstSetS32,
+ FindFirstSetU32,
Floor,
FusedMultiplyAdd,
ImageLoad,
@@ -75,12 +80,17 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
ShiftLeft,
ShiftRightS32,
ShiftRightU32,
+ Shuffle,
+ ShuffleDown,
+ ShuffleUp,
+ ShuffleXor,
Sine,
SquareRoot,
StoreGlobal,
StoreLocal,
StoreStorage,
Subtract,
+ SwizzleAdd,
TextureSample,
TextureSize,
Truncate,
diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs
index fc01d47e..0d7379a8 100644
--- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs
+++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/Operation.cs
@@ -80,7 +80,12 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
public void TurnIntoCopy(Operand source)
{
- Inst = Instruction.Copy;
+ TurnInto(Instruction.Copy, source);
+ }
+
+ public void TurnInto(Instruction newInst, Operand source)
+ {
+ Inst = newInst;
foreach (Operand oldSrc in _sources)
{
diff --git a/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs b/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs
index 5f0a8427..5334afac 100644
--- a/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs
+++ b/Ryujinx.Graphics.Shader/IntermediateRepresentation/TextureFlags.cs
@@ -5,13 +5,14 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation
[Flags]
enum TextureFlags
{
- None = 0,
- Bindless = 1 << 0,
- Gather = 1 << 1,
- IntCoords = 1 << 2,
- LodBias = 1 << 3,
- LodLevel = 1 << 4,
- Offset = 1 << 5,
- Offsets = 1 << 6
+ None = 0,
+ Bindless = 1 << 0,
+ Gather = 1 << 1,
+ Derivatives = 1 << 2,
+ IntCoords = 1 << 3,
+ LodBias = 1 << 4,
+ LodLevel = 1 << 5,
+ Offset = 1 << 6,
+ Offsets = 1 << 7
}
} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
index ea83d296..e10d1eda 100644
--- a/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
+++ b/Ryujinx.Graphics.Shader/Ryujinx.Graphics.Shader.csproj
@@ -1,5 +1,17 @@
<Project Sdk="Microsoft.NET.Sdk">
+ <ItemGroup>
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\Shuffle.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleDown.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleUp.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\ShuffleXor.glsl" />
+ <EmbeddedResource Include="CodeGen\Glsl\HelperFunctions\SwizzleAdd.glsl" />
+ </ItemGroup>
+
+ <ItemGroup>
+ <ProjectReference Include="..\Ryujinx.Common\Ryujinx.Common.csproj" />
+ </ItemGroup>
+
<PropertyGroup>
<TargetFramework>netcoreapp3.0</TargetFramework>
</PropertyGroup>
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
new file mode 100644
index 00000000..e2eee78d
--- /dev/null
+++ b/Ryujinx.Graphics.Shader/StructuredIr/HelperFunctionsMask.cs
@@ -0,0 +1,14 @@
+using System;
+
+namespace Ryujinx.Graphics.Shader.StructuredIr
+{
+ [Flags]
+ enum HelperFunctionsMask
+ {
+ Shuffle = 1 << 0,
+ ShuffleDown = 1 << 1,
+ ShuffleUp = 1 << 2,
+ ShuffleXor = 1 << 3,
+ SwizzleAdd = 1 << 4
+ }
+} \ No newline at end of file
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
index 675a9678..381cf292 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/InstructionInfo.cs
@@ -27,6 +27,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
// Inst Destination type Source 1 type Source 2 type Source 3 type Source 4 type
Add(Instruction.Absolute, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.Add, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
+ Add(Instruction.BitCount, VariableType.Int, VariableType.Int);
Add(Instruction.BitfieldExtractS32, VariableType.S32, VariableType.S32, VariableType.S32, VariableType.S32);
Add(Instruction.BitfieldExtractU32, VariableType.U32, VariableType.U32, VariableType.S32, VariableType.S32);
Add(Instruction.BitfieldInsert, VariableType.Int, VariableType.Int, VariableType.Int, VariableType.S32, VariableType.S32);
@@ -55,8 +56,12 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.ConvertS32ToFP, VariableType.F32, VariableType.S32);
Add(Instruction.ConvertU32ToFP, VariableType.F32, VariableType.U32);
Add(Instruction.Cosine, VariableType.Scalar, VariableType.Scalar);
+ Add(Instruction.Ddx, VariableType.F32, VariableType.F32);
+ Add(Instruction.Ddy, VariableType.F32, VariableType.F32);
Add(Instruction.Divide, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.ExponentB2, VariableType.Scalar, VariableType.Scalar);
+ Add(Instruction.FindFirstSetS32, VariableType.S32, VariableType.S32);
+ Add(Instruction.FindFirstSetU32, VariableType.S32, VariableType.U32);
Add(Instruction.Floor, VariableType.F32, VariableType.F32);
Add(Instruction.FusedMultiplyAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.F32);
Add(Instruction.ImageLoad, VariableType.F32);
@@ -75,6 +80,10 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.ShiftLeft, VariableType.Int, VariableType.Int, VariableType.Int);
Add(Instruction.ShiftRightS32, VariableType.S32, VariableType.S32, VariableType.Int);
Add(Instruction.ShiftRightU32, VariableType.U32, VariableType.U32, VariableType.Int);
+ Add(Instruction.Shuffle, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32);
+ Add(Instruction.ShuffleDown, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32);
+ Add(Instruction.ShuffleUp, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32);
+ Add(Instruction.ShuffleXor, VariableType.F32, VariableType.F32, VariableType.U32, VariableType.U32);
Add(Instruction.Maximum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
Add(Instruction.MaximumU32, VariableType.U32, VariableType.U32, VariableType.U32);
Add(Instruction.Minimum, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
@@ -90,6 +99,7 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
Add(Instruction.StoreLocal, VariableType.None, VariableType.S32, VariableType.F32);
Add(Instruction.StoreStorage, VariableType.None, VariableType.S32, VariableType.S32, VariableType.F32);
Add(Instruction.Subtract, VariableType.Scalar, VariableType.Scalar, VariableType.Scalar);
+ Add(Instruction.SwizzleAdd, VariableType.F32, VariableType.F32, VariableType.F32, VariableType.S32);
Add(Instruction.TextureSample, VariableType.F32);
Add(Instruction.TextureSize, VariableType.S32, VariableType.S32, VariableType.S32);
Add(Instruction.Truncate, VariableType.F32, VariableType.F32);
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
index 53ca6700..c4ffbe1a 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgram.cs
@@ -179,6 +179,28 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
context.AddNode(new AstOperation(inst, sources));
}
+
+ // Those instructions needs to be emulated by using helper functions,
+ // because they are NVIDIA specific. Those flags helps the backend to
+ // decide which helper functions are needed on the final generated code.
+ switch (operation.Inst)
+ {
+ case Instruction.Shuffle:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.Shuffle;
+ break;
+ case Instruction.ShuffleDown:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleDown;
+ break;
+ case Instruction.ShuffleUp:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleUp;
+ break;
+ case Instruction.ShuffleXor:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.ShuffleXor;
+ break;
+ case Instruction.SwizzleAdd:
+ context.Info.HelperFunctionsMask |= HelperFunctionsMask.SwizzleAdd;
+ break;
+ }
}
private static VariableType GetVarTypeFromUses(Operand dest)
diff --git a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs
index 1094fba2..0ef4bde3 100644
--- a/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs
+++ b/Ryujinx.Graphics.Shader/StructuredIr/StructuredProgramInfo.cs
@@ -18,6 +18,8 @@ namespace Ryujinx.Graphics.Shader.StructuredIr
public bool UsesInstanceId { get; set; }
+ public HelperFunctionsMask HelperFunctionsMask { get; set; }
+
public HashSet<AstTextureOperation> Samplers { get; }
public HashSet<AstTextureOperation> Images { get; }
diff --git a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
index 7d64e7ca..58a37b52 100644
--- a/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
+++ b/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs
@@ -6,6 +6,11 @@ namespace Ryujinx.Graphics.Shader.Translation
{
static class EmitterContextInsts
{
+ public static Operand BitCount(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.BitCount, Local(), a);
+ }
+
public static Operand BitfieldExtractS32(this EmitterContext context, Operand a, Operand b, Operand c)
{
return context.Add(Instruction.BitfieldExtractS32, Local(), a, b, c);
@@ -106,6 +111,16 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.EndPrimitive);
}
+ public static Operand FindFirstSetS32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FindFirstSetS32, Local(), a);
+ }
+
+ public static Operand FindFirstSetU32(this EmitterContext context, Operand a)
+ {
+ return context.Add(Instruction.FindFirstSetU32, Local(), a);
+ }
+
public static Operand FPAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg)
{
return context.FPNegate(context.FPAbsolute(a, abs), neg);
@@ -256,6 +271,11 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.Truncate, Local(), a);
}
+ public static Operand FPSwizzleAdd(this EmitterContext context, Operand a, Operand b, int mask)
+ {
+ return context.Add(Instruction.SwizzleAdd, Local(), a, b, Const(mask));
+ }
+
public static Operand IAbsNeg(this EmitterContext context, Operand a, bool abs, bool neg)
{
return context.INegate(context.IAbsolute(a, abs), neg);
@@ -418,6 +438,26 @@ namespace Ryujinx.Graphics.Shader.Translation
return context.Add(Instruction.ShiftRightU32, Local(), a, b);
}
+ public static Operand Shuffle(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.Shuffle, Local(), a, b, c);
+ }
+
+ public static Operand ShuffleDown(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleDown, Local(), a, b, c);
+ }
+
+ public static Operand ShuffleUp(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleUp, Local(), a, b, c);
+ }
+
+ public static Operand ShuffleXor(this EmitterContext context, Operand a, Operand b, Operand c)
+ {
+ return context.Add(Instruction.ShuffleXor, Local(), a, b, c);
+ }
+
public static Operand StoreGlobal(this EmitterContext context, Operand a, Operand b)
{
return context.Add(Instruction.StoreGlobal, null, a, b);
diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
index d64579b7..97852ac1 100644
--- a/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/ConstantFolding.cs
@@ -21,6 +21,10 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
EvaluateBinary(operation, (x, y) => x + y);
break;
+ case Instruction.BitCount:
+ EvaluateUnary(operation, (x) => BitCount(x));
+ break;
+
case Instruction.BitwiseAnd:
EvaluateBinary(operation, (x, y) => x & y);
break;
@@ -208,6 +212,21 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return true;
}
+ private static int BitCount(int value)
+ {
+ int count = 0;
+
+ for (int bit = 0; bit < 32; bit++)
+ {
+ if (value.Extract(bit))
+ {
+ count++;
+ }
+ }
+
+ return count;
+ }
+
private static void BitfieldExtractS32(Operation operation)
{
int value = GetBitfieldExtractValue(operation);
diff --git a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
index d5e57546..22d794a4 100644
--- a/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
+++ b/Ryujinx.Graphics.Shader/Translation/Optimizations/Optimizer.cs
@@ -1,5 +1,6 @@
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using System.Collections.Generic;
+using System.Diagnostics;
using System.Linq;
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
@@ -59,7 +60,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
modified = true;
}
- else if (operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation))
+ else if ((operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation)) ||
+ (operation.Inst == Instruction.ShuffleXor && MatchDdxOrDdy(operation)))
{
if (operation.Dest.UseOps.Count == 0)
{
@@ -135,6 +137,84 @@ namespace Ryujinx.Graphics.Shader.Translation.Optimizations
return modified;
}
+ public static bool MatchDdxOrDdy(Operation operation)
+ {
+ // It's assumed that "operation.Inst" is ShuffleXor,
+ // that should be checked before calling this method.
+ Debug.Assert(operation.Inst == Instruction.ShuffleXor);
+
+ bool modified = false;
+
+ Operand src2 = operation.GetSource(1);
+ Operand src3 = operation.GetSource(2);
+
+ if (src2.Type != OperandType.Constant || (src2.Value != 1 && src2.Value != 2))
+ {
+ return false;
+ }
+
+ if (src3.Type != OperandType.Constant || src3.Value != 0x1c03)
+ {
+ return false;
+ }
+
+ bool isDdy = src2.Value == 2;
+ bool isDdx = !isDdy;
+
+ // We can replace any use by a FSWZADD with DDX/DDY, when
+ // the following conditions are true:
+ // - The mask should be 0b10100101 for DDY, or 0b10011001 for DDX.
+ // - The first source operand must be the shuffle output.
+ // - The second source operand must be the shuffle first source operand.
+ INode[] uses = operation.Dest.UseOps.ToArray();
+
+ foreach (INode use in uses)
+ {
+ if (!(use is Operation test))
+ {
+ continue;
+ }
+
+ if (!(use is Operation useOp) || useOp.Inst != Instruction.SwizzleAdd)
+ {
+ continue;
+ }
+
+ Operand fswzaddSrc1 = useOp.GetSource(0);
+ Operand fswzaddSrc2 = useOp.GetSource(1);
+ Operand fswzaddSrc3 = useOp.GetSource(2);
+
+ if (fswzaddSrc1 != operation.Dest)
+ {
+ continue;
+ }
+
+ if (fswzaddSrc2 != operation.GetSource(0))
+ {
+ continue;
+ }
+
+ if (fswzaddSrc3.Type != OperandType.Constant)
+ {
+ continue;
+ }
+
+ int mask = fswzaddSrc3.Value;
+
+ if ((isDdx && mask != 0b10011001) ||
+ (isDdy && mask != 0b10100101))
+ {
+ continue;
+ }
+
+ useOp.TurnInto(isDdx ? Instruction.Ddx : Instruction.Ddy, fswzaddSrc2);
+
+ modified = true;
+ }
+
+ return modified;
+ }
+
private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode)
{
// Remove a node from the nodes list, and also remove itself