From 6ed613a6e6a66d57d2fdb045d926e42dfcdd3206 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Wed, 16 Aug 2023 21:31:07 -0300 Subject: Fix vote and shuffle shader instructions on AMD GPUs (#5540) * Move shuffle handling out of the backend to a transform pass * Handle subgroup sizes higher than 32 * Stop using the subgroup size control extension * Make GenerateShuffleFunction static * Shader cache version bump --- .../CodeGen/Spirv/Instructions.cs | 95 ++-------------------- .../CodeGen/Spirv/SpirvGenerator.cs | 7 +- 2 files changed, 10 insertions(+), 92 deletions(-) (limited to 'src/Ryujinx.Graphics.Shader/CodeGen/Spirv') diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs index 98c1b9d2..719ccf0c 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -231,7 +231,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv var execution = context.Constant(context.TypeU32(), Scope.Subgroup); var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source)); - var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)0); + var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)operation.Index); return new OperationResult(AggregateType.U32, mask); } @@ -1100,117 +1100,40 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv private static OperationResult GenerateShuffle(CodeGenContext context, AstOperation operation) { - var x = context.GetFP32(operation.GetSource(0)); + var value = context.GetFP32(operation.GetSource(0)); var index = context.GetU32(operation.GetSource(1)); - var mask = context.GetU32(operation.GetSource(2)); - var const31 = context.Constant(context.TypeU32(), 31); - var const8 = context.Constant(context.TypeU32(), 8); - - var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); - var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); - var notSegMask = context.Not(context.TypeU32(), segMask); - var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); - var indexNotSegMask = context.BitwiseAnd(context.TypeU32(), index, notSegMask); - - var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); - - var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); - var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); - var srcThreadId = context.BitwiseOr(context.TypeU32(), indexNotSegMask, minThreadId); - var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); - var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); - var result = context.Select(context.TypeFP32(), valid, value, x); - - var validLocal = (AstOperand)operation.GetSource(3); - - context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + var result = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index); return new OperationResult(AggregateType.FP32, result); } private static OperationResult GenerateShuffleDown(CodeGenContext context, AstOperation operation) { - var x = context.GetFP32(operation.GetSource(0)); + var value = context.GetFP32(operation.GetSource(0)); var index = context.GetU32(operation.GetSource(1)); - var mask = context.GetU32(operation.GetSource(2)); - - var const31 = context.Constant(context.TypeU32(), 31); - var const8 = context.Constant(context.TypeU32(), 8); - - var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); - var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); - var notSegMask = context.Not(context.TypeU32(), segMask); - var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); - - var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); - - var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); - var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); - var srcThreadId = context.IAdd(context.TypeU32(), threadId, index); - var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); - var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); - var result = context.Select(context.TypeFP32(), valid, value, x); - - var validLocal = (AstOperand)operation.GetSource(3); - context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + var result = context.GroupNonUniformShuffleDown(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index); return new OperationResult(AggregateType.FP32, result); } private static OperationResult GenerateShuffleUp(CodeGenContext context, AstOperation operation) { - var x = context.GetFP32(operation.GetSource(0)); + var value = context.GetFP32(operation.GetSource(0)); var index = context.GetU32(operation.GetSource(1)); - var mask = context.GetU32(operation.GetSource(2)); - var const31 = context.Constant(context.TypeU32(), 31); - var const8 = context.Constant(context.TypeU32(), 8); - - var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); - - var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); - - var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); - var srcThreadId = context.ISub(context.TypeU32(), threadId, index); - var valid = context.SGreaterThanEqual(context.TypeBool(), srcThreadId, minThreadId); - var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); - var result = context.Select(context.TypeFP32(), valid, value, x); - - var validLocal = (AstOperand)operation.GetSource(3); - - context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + var result = context.GroupNonUniformShuffleUp(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index); return new OperationResult(AggregateType.FP32, result); } private static OperationResult GenerateShuffleXor(CodeGenContext context, AstOperation operation) { - var x = context.GetFP32(operation.GetSource(0)); + var value = context.GetFP32(operation.GetSource(0)); var index = context.GetU32(operation.GetSource(1)); - var mask = context.GetU32(operation.GetSource(2)); - - var const31 = context.Constant(context.TypeU32(), 31); - var const8 = context.Constant(context.TypeU32(), 8); - - var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); - var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); - var notSegMask = context.Not(context.TypeU32(), segMask); - var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); - - var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); - - var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); - var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); - var srcThreadId = context.BitwiseXor(context.TypeU32(), threadId, index); - var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); - var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); - var result = context.Select(context.TypeFP32(), valid, value, x); - - var validLocal = (AstOperand)operation.GetSource(3); - context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + var result = context.GroupNonUniformShuffleXor(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), value, index); return new OperationResult(AggregateType.FP32, result); } diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs index 5eee888e..70f1dd3c 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -28,12 +28,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv _poolLock = new object(); } - private const HelperFunctionsMask NeedsInvocationIdMask = - HelperFunctionsMask.Shuffle | - HelperFunctionsMask.ShuffleDown | - HelperFunctionsMask.ShuffleUp | - HelperFunctionsMask.ShuffleXor | - HelperFunctionsMask.SwizzleAdd; + private const HelperFunctionsMask NeedsInvocationIdMask = HelperFunctionsMask.SwizzleAdd; public static byte[] Generate(StructuredProgramInfo info, CodeGenParameters parameters) { -- cgit v1.2.3