diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Shader')
34 files changed, 1677 insertions, 362 deletions
diff --git a/src/Ryujinx.Graphics.Shader/AttributeType.cs b/src/Ryujinx.Graphics.Shader/AttributeType.cs index 1d950773..d2d146ec 100644 --- a/src/Ryujinx.Graphics.Shader/AttributeType.cs +++ b/src/Ryujinx.Graphics.Shader/AttributeType.cs @@ -11,13 +11,17 @@ namespace Ryujinx.Graphics.Shader Uint, Sscaled, Uscaled, + + Packed = 1 << 6, + PackedRgb10A2Signed = 1 << 7, + AnyPacked = Packed | PackedRgb10A2Signed, } static class AttributeTypeExtensions { public static AggregateType ToAggregateType(this AttributeType type) { - return type switch + return (type & ~AttributeType.AnyPacked) switch { AttributeType.Float => AggregateType.FP32, AttributeType.Sint => AggregateType.S32, @@ -28,7 +32,7 @@ namespace Ryujinx.Graphics.Shader public static AggregateType ToAggregateType(this AttributeType type, bool supportsScaledFormats) { - return type switch + return (type & ~AttributeType.AnyPacked) switch { AttributeType.Float => AggregateType.FP32, AttributeType.Sint => AggregateType.S32, diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs index 607ff431..500de71f 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -100,10 +100,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl else { string outPrimitive = context.Definitions.OutputTopology.ToGlslString(); - - int maxOutputVertices = context.Definitions.GpPassthrough - ? context.Definitions.InputTopology.ToInputVertices() - : context.Definitions.MaxOutputVertices; + int maxOutputVertices = context.Definitions.MaxOutputVertices; context.AppendLine($"layout ({outPrimitive}, max_vertices = {maxOutputVertices}) out;"); } @@ -320,15 +317,22 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl { string typeName = GetVarTypeName(context, memory.Type & ~AggregateType.Array); - if (memory.ArrayLength > 0) + if (memory.Type.HasFlag(AggregateType.Array)) { - string arraySize = memory.ArrayLength.ToString(CultureInfo.InvariantCulture); + if (memory.ArrayLength > 0) + { + string arraySize = memory.ArrayLength.ToString(CultureInfo.InvariantCulture); - context.AppendLine($"{prefix}{typeName} {memory.Name}[{arraySize}];"); + context.AppendLine($"{prefix}{typeName} {memory.Name}[{arraySize}];"); + } + else + { + context.AppendLine($"{prefix}{typeName} {memory.Name}[];"); + } } else { - context.AppendLine($"{prefix}{typeName} {memory.Name}[];"); + context.AppendLine($"{prefix}{typeName} {memory.Name};"); } } } diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs index b5f453ae..caa6ef64 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs @@ -31,6 +31,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions IoVariable.FrontColorDiffuse => ("gl_FrontColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. IoVariable.FrontColorSpecular => ("gl_FrontSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. IoVariable.FrontFacing => ("gl_FrontFacing", AggregateType.Bool), + IoVariable.GlobalId => ("gl_GlobalInvocationID", AggregateType.Vector3 | AggregateType.U32), IoVariable.InstanceId => ("gl_InstanceID", AggregateType.S32), IoVariable.InstanceIndex => ("gl_InstanceIndex", AggregateType.S32), IoVariable.InvocationId => ("gl_InvocationID", AggregateType.S32), diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs index d385782a..9f9411a9 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -27,8 +27,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv public ILogger Logger { get; } public TargetApi TargetApi { get; } - public int InputVertices { get; } - public Dictionary<int, Instruction> ConstantBuffers { get; } = new(); public Dictionary<int, Instruction> StorageBuffers { get; } = new(); @@ -101,19 +99,6 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv Logger = parameters.Logger; TargetApi = parameters.TargetApi; - if (parameters.Definitions.Stage == ShaderStage.Geometry) - { - InputVertices = parameters.Definitions.InputTopology switch - { - InputTopology.Points => 1, - InputTopology.Lines => 2, - InputTopology.LinesAdjacency => 2, - InputTopology.Triangles => 3, - InputTopology.TrianglesAdjacency => 3, - _ => throw new InvalidOperationException($"Invalid input topology \"{parameters.Definitions.InputTopology}\"."), - }; - } - AddCapability(Capability.Shader); AddCapability(Capability.Float64); diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs index b0659ba4..54767c2f 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs @@ -369,7 +369,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv if (context.Definitions.Stage != ShaderStage.Vertex) { var perVertexInputStructType = CreatePerVertexStructType(context); - int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.InputVertices : 32; + int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.Definitions.InputTopology.ToInputVertices() : 32; var perVertexInputArrayType = context.TypeArray(perVertexInputStructType, context.Constant(context.TypeU32(), arraySize)); var perVertexInputPointerType = context.TypePointer(StorageClass.Input, perVertexInputArrayType); var perVertexInputVariable = context.Variable(perVertexInputPointerType, StorageClass.Input); @@ -506,7 +506,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv if (!isPerPatch && IoMap.IsPerVertex(ioVariable, context.Definitions.Stage, isOutput)) { - int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.InputVertices : 32; + int arraySize = context.Definitions.Stage == ShaderStage.Geometry ? context.Definitions.InputTopology.ToInputVertices() : 32; spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), arraySize)); if (context.Definitions.GpPassthrough && context.HostCapabilities.SupportsGeometryShaderPassthrough) diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs index 08d403e2..7b4e14ff 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs @@ -22,6 +22,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv IoVariable.FragmentCoord => (BuiltIn.FragCoord, AggregateType.Vector4 | AggregateType.FP32), IoVariable.FragmentOutputDepth => (BuiltIn.FragDepth, AggregateType.FP32), IoVariable.FrontFacing => (BuiltIn.FrontFacing, AggregateType.Bool), + IoVariable.GlobalId => (BuiltIn.GlobalInvocationId, AggregateType.Vector3 | AggregateType.U32), IoVariable.InstanceId => (BuiltIn.InstanceId, AggregateType.S32), IoVariable.InstanceIndex => (BuiltIn.InstanceIndex, AggregateType.S32), IoVariable.InvocationId => (BuiltIn.InvocationId, AggregateType.S32), diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs index 70f1dd3c..44d3e985 100644 --- a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -239,9 +239,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv _ => throw new InvalidOperationException($"Invalid output topology \"{context.Definitions.OutputTopology}\"."), }); - int maxOutputVertices = context.Definitions.GpPassthrough ? context.InputVertices : context.Definitions.MaxOutputVertices; - - context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)maxOutputVertices); + context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)context.Definitions.MaxOutputVertices); } else if (context.Definitions.Stage == ShaderStage.Fragment) { @@ -279,6 +277,14 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Spirv localSizeZ); } + if (context.Definitions.Stage != ShaderStage.Fragment && + context.Definitions.Stage != ShaderStage.Geometry && + context.Definitions.Stage != ShaderStage.Compute && + context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.Layer))) + { + context.AddCapability(Capability.ShaderLayer); + } + if (context.Definitions.TransformFeedbackEnabled && context.Definitions.LastInVertexPipeline) { context.AddExecutionMode(spvFunc, ExecutionMode.Xfb); diff --git a/src/Ryujinx.Graphics.Shader/Constants.cs b/src/Ryujinx.Graphics.Shader/Constants.cs index cff2c37a..6317369f 100644 --- a/src/Ryujinx.Graphics.Shader/Constants.cs +++ b/src/Ryujinx.Graphics.Shader/Constants.cs @@ -10,11 +10,5 @@ namespace Ryujinx.Graphics.Shader public const int NvnBaseVertexByteOffset = 0x640; public const int NvnBaseInstanceByteOffset = 0x644; public const int NvnDrawIndexByteOffset = 0x648; - - // Transform Feedback emulation. - - public const int TfeInfoBinding = 0; - public const int TfeBufferBaseBinding = 1; - public const int TfeBuffersCount = 4; } } diff --git a/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs b/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs index 67304d02..fdf3eacc 100644 --- a/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs +++ b/src/Ryujinx.Graphics.Shader/Decoders/DecodedProgram.cs @@ -60,6 +60,11 @@ namespace Ryujinx.Graphics.Shader.Decoders _functionsWithId.Add(function); } + public IoUsage GetIoUsage() + { + return new IoUsage(UsedFeatures, ClipDistancesWritten, AttributeUsage.UsedOutputAttributes); + } + public IEnumerator<DecodedFunction> GetEnumerator() { return _functions.Values.GetEnumerator(); diff --git a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs index 4266dedc..1211e561 100644 --- a/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs +++ b/src/Ryujinx.Graphics.Shader/Decoders/Decoder.cs @@ -297,6 +297,9 @@ namespace Ryujinx.Graphics.Shader.Decoders case InstName.Ssy: block.AddPushOp(op); break; + case InstName.Shfl: + context.SetUsedFeature(FeatureFlags.Shuffle); + break; case InstName.Ldl: case InstName.Stl: context.SetUsedFeature(FeatureFlags.LocalMemory); @@ -307,8 +310,22 @@ namespace Ryujinx.Graphics.Shader.Decoders case InstName.Sts: context.SetUsedFeature(FeatureFlags.SharedMemory); break; - case InstName.Shfl: - context.SetUsedFeature(FeatureFlags.Shuffle); + case InstName.Atom: + case InstName.AtomCas: + case InstName.Red: + case InstName.Stg: + case InstName.Suatom: + case InstName.SuatomB: + case InstName.SuatomB2: + case InstName.SuatomCas: + case InstName.SuatomCasB: + case InstName.Sured: + case InstName.SuredB: + case InstName.Sust: + case InstName.SustB: + case InstName.SustD: + case InstName.SustDB: + context.SetUsedFeature(FeatureFlags.Store); break; } @@ -424,6 +441,12 @@ namespace Ryujinx.Graphics.Shader.Decoders context.SetUsedFeature(FeatureFlags.RtLayer); } break; + case AttributeConsts.ViewportIndex: + if (definitions.Stage != ShaderStage.Fragment) + { + context.SetUsedFeature(FeatureFlags.ViewportIndex); + } + break; case AttributeConsts.ClipDistance0: case AttributeConsts.ClipDistance1: case AttributeConsts.ClipDistance2: @@ -432,11 +455,17 @@ namespace Ryujinx.Graphics.Shader.Decoders case AttributeConsts.ClipDistance5: case AttributeConsts.ClipDistance6: case AttributeConsts.ClipDistance7: - if (definitions.Stage == ShaderStage.Vertex) + if (definitions.Stage.IsVtg()) { context.SetClipDistanceWritten((attr - AttributeConsts.ClipDistance0) / 4); } break; + case AttributeConsts.ViewportMask: + if (definitions.Stage != ShaderStage.Fragment) + { + context.SetUsedFeature(FeatureFlags.ViewportMask); + } + break; } } else diff --git a/src/Ryujinx.Graphics.Shader/InputTopology.cs b/src/Ryujinx.Graphics.Shader/InputTopology.cs index ebd2930e..9438263d 100644 --- a/src/Ryujinx.Graphics.Shader/InputTopology.cs +++ b/src/Ryujinx.Graphics.Shader/InputTopology.cs @@ -29,6 +29,19 @@ namespace Ryujinx.Graphics.Shader return topology switch { InputTopology.Points => 1, + InputTopology.Lines => 2, + InputTopology.LinesAdjacency => 4, + InputTopology.Triangles => 3, + InputTopology.TrianglesAdjacency => 6, + _ => 1, + }; + } + + public static int ToInputVerticesNoAdjacency(this InputTopology topology) + { + return topology switch + { + InputTopology.Points => 1, InputTopology.Lines or InputTopology.LinesAdjacency => 2, InputTopology.Triangles or diff --git a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs index 53d774d6..63ce38e2 100644 --- a/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs +++ b/src/Ryujinx.Graphics.Shader/Instructions/InstEmitAttribute.cs @@ -63,7 +63,7 @@ namespace Ryujinx.Graphics.Shader.Instructions { value = AttributeMap.GenerateAttributeLoad(context, primVertex, offset, isOutput, op.P); - if (!context.TranslatorContext.Definitions.SupportsScaledVertexFormats && + if ((!context.TranslatorContext.Definitions.SupportsScaledVertexFormats || context.VertexAsCompute) && context.TranslatorContext.Stage == ShaderStage.Vertex && !op.O && offset >= 0x80 && diff --git a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs index fdee8345..21e20863 100644 --- a/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs +++ b/src/Ryujinx.Graphics.Shader/IntermediateRepresentation/IoVariable.cs @@ -18,6 +18,7 @@ namespace Ryujinx.Graphics.Shader.IntermediateRepresentation FrontColorDiffuse, FrontColorSpecular, FrontFacing, + GlobalId, InstanceId, InstanceIndex, InvocationId, diff --git a/src/Ryujinx.Graphics.Shader/ResourceReservationCounts.cs b/src/Ryujinx.Graphics.Shader/ResourceReservationCounts.cs new file mode 100644 index 00000000..c0bae8ea --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/ResourceReservationCounts.cs @@ -0,0 +1,22 @@ +using Ryujinx.Graphics.Shader.Translation; + +namespace Ryujinx.Graphics.Shader +{ + public readonly struct ResourceReservationCounts + { + public readonly int ReservedConstantBuffers { get; } + public readonly int ReservedStorageBuffers { get; } + public readonly int ReservedTextures { get; } + public readonly int ReservedImages { get; } + + public ResourceReservationCounts(bool isTransformFeedbackEmulated, bool vertexAsCompute) + { + ResourceReservations reservations = new(isTransformFeedbackEmulated, vertexAsCompute); + + ReservedConstantBuffers = reservations.ReservedConstantBuffers; + ReservedStorageBuffers = reservations.ReservedStorageBuffers; + ReservedTextures = reservations.ReservedTextures; + ReservedImages = reservations.ReservedImages; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs b/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs deleted file mode 100644 index 551e318c..00000000 --- a/src/Ryujinx.Graphics.Shader/ShaderIdentification.cs +++ /dev/null @@ -1,8 +0,0 @@ -namespace Ryujinx.Graphics.Shader -{ - public enum ShaderIdentification - { - None, - GeometryLayerPassthrough, - } -} diff --git a/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs b/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs index f9776afc..22823ac3 100644 --- a/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs +++ b/src/Ryujinx.Graphics.Shader/ShaderProgramInfo.cs @@ -10,9 +10,10 @@ namespace Ryujinx.Graphics.Shader public ReadOnlyCollection<TextureDescriptor> Textures { get; } public ReadOnlyCollection<TextureDescriptor> Images { get; } - public ShaderIdentification Identification { get; } - public int GpLayerInputAttribute { get; } public ShaderStage Stage { get; } + public int GeometryVerticesPerPrimitive { get; } + public int GeometryMaxOutputVertices { get; } + public int ThreadsPerInputPrimitive { get; } public bool UsesFragCoord { get; } public bool UsesInstanceId { get; } public bool UsesDrawParameters { get; } @@ -25,9 +26,10 @@ namespace Ryujinx.Graphics.Shader BufferDescriptor[] sBuffers, TextureDescriptor[] textures, TextureDescriptor[] images, - ShaderIdentification identification, - int gpLayerInputAttribute, ShaderStage stage, + int geometryVerticesPerPrimitive, + int geometryMaxOutputVertices, + int threadsPerInputPrimitive, bool usesFragCoord, bool usesInstanceId, bool usesDrawParameters, @@ -40,9 +42,10 @@ namespace Ryujinx.Graphics.Shader Textures = Array.AsReadOnly(textures); Images = Array.AsReadOnly(images); - Identification = identification; - GpLayerInputAttribute = gpLayerInputAttribute; Stage = stage; + GeometryVerticesPerPrimitive = geometryVerticesPerPrimitive; + GeometryMaxOutputVertices = geometryMaxOutputVertices; + ThreadsPerInputPrimitive = threadsPerInputPrimitive; UsesFragCoord = usesFragCoord; UsesInstanceId = usesInstanceId; UsesDrawParameters = usesDrawParameters; diff --git a/src/Ryujinx.Graphics.Shader/SupportBuffer.cs b/src/Ryujinx.Graphics.Shader/SupportBuffer.cs index 0b7a2edd..d4d3cbf8 100644 --- a/src/Ryujinx.Graphics.Shader/SupportBuffer.cs +++ b/src/Ryujinx.Graphics.Shader/SupportBuffer.cs @@ -22,11 +22,13 @@ namespace Ryujinx.Graphics.Shader ViewportSize, FragmentRenderScaleCount, RenderScale, + TfeOffset, + TfeVertexCount, } public struct SupportBuffer { - internal const int Binding = 0; + public const int Binding = 0; public static readonly int FieldSize; public static readonly int RequiredSize; @@ -38,6 +40,8 @@ namespace Ryujinx.Graphics.Shader public static readonly int FragmentRenderScaleCountOffset; public static readonly int GraphicsRenderScaleOffset; public static readonly int ComputeRenderScaleOffset; + public static readonly int TfeOffsetOffset; + public static readonly int TfeVertexCountOffset; public const int FragmentIsBgraCount = 8; // One for the render target, 64 for the textures, and 8 for the images. @@ -62,18 +66,22 @@ namespace Ryujinx.Graphics.Shader FragmentRenderScaleCountOffset = OffsetOf(ref instance, ref instance.FragmentRenderScaleCount); GraphicsRenderScaleOffset = OffsetOf(ref instance, ref instance.RenderScale); ComputeRenderScaleOffset = GraphicsRenderScaleOffset + FieldSize; + TfeOffsetOffset = OffsetOf(ref instance, ref instance.TfeOffset); + TfeVertexCountOffset = OffsetOf(ref instance, ref instance.TfeVertexCount); } internal static StructureType GetStructureType() { return new StructureType(new[] { - new StructureField(AggregateType.U32, "s_alpha_test"), - new StructureField(AggregateType.Array | AggregateType.U32, "s_is_bgra", FragmentIsBgraCount), - new StructureField(AggregateType.Vector4 | AggregateType.FP32, "s_viewport_inverse"), - new StructureField(AggregateType.Vector4 | AggregateType.FP32, "s_viewport_size"), - new StructureField(AggregateType.S32, "s_frag_scale_count"), - new StructureField(AggregateType.Array | AggregateType.FP32, "s_render_scale", RenderScaleMaxCount), + new StructureField(AggregateType.U32, "alpha_test"), + new StructureField(AggregateType.Array | AggregateType.U32, "is_bgra", FragmentIsBgraCount), + new StructureField(AggregateType.Vector4 | AggregateType.FP32, "viewport_inverse"), + new StructureField(AggregateType.Vector4 | AggregateType.FP32, "viewport_size"), + new StructureField(AggregateType.S32, "frag_scale_count"), + new StructureField(AggregateType.Array | AggregateType.FP32, "render_scale", RenderScaleMaxCount), + new StructureField(AggregateType.Vector4 | AggregateType.S32, "tfe_offset"), + new StructureField(AggregateType.S32, "tfe_vertex_count"), }); } @@ -85,5 +93,8 @@ namespace Ryujinx.Graphics.Shader // Render scale max count: 1 + 64 + 8. First scale is fragment output scale, others are textures/image inputs. public Array73<Vector4<float>> RenderScale; + + public Vector4<int> TfeOffset; + public Vector4<int> TfeVertexCount; } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs index f749cecb..c4bd2cbf 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/AttributeConsts.cs @@ -4,6 +4,7 @@ namespace Ryujinx.Graphics.Shader.Translation { public const int PrimitiveId = 0x060; public const int Layer = 0x064; + public const int ViewportIndex = 0x068; public const int PositionX = 0x070; public const int PositionY = 0x074; public const int FrontColorDiffuseR = 0x280; @@ -24,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.Translation public const int TexCoordCount = 10; public const int TexCoordBase = 0x300; public const int TexCoordEnd = TexCoordBase + TexCoordCount * 16; + public const int ViewportMask = 0x3a0; public const int FrontFacing = 0x3fc; public const int UserAttributesCount = 32; diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs index 43263dd4..f1dffb35 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContext.cs @@ -14,6 +14,8 @@ namespace Ryujinx.Graphics.Shader.Translation public TranslatorContext TranslatorContext { get; } public ResourceManager ResourceManager { get; } + public bool VertexAsCompute { get; } + public bool IsNonMain { get; } public Block CurrBlock { get; set; } @@ -59,11 +61,13 @@ namespace Ryujinx.Graphics.Shader.Translation TranslatorContext translatorContext, ResourceManager resourceManager, DecodedProgram program, + bool vertexAsCompute, bool isNonMain) : this() { TranslatorContext = translatorContext; ResourceManager = resourceManager; Program = program; + VertexAsCompute = vertexAsCompute; IsNonMain = isNonMain; EmitStart(); @@ -71,13 +75,87 @@ namespace Ryujinx.Graphics.Shader.Translation private void EmitStart() { - if (TranslatorContext.Definitions.Stage == ShaderStage.Vertex && - TranslatorContext.Options.TargetApi == TargetApi.Vulkan && - (TranslatorContext.Options.Flags & TranslationFlags.VertexA) == 0) + if (TranslatorContext.Options.Flags.HasFlag(TranslationFlags.VertexA)) + { + return; + } + + if (TranslatorContext.Definitions.Stage == ShaderStage.Vertex && TranslatorContext.Options.TargetApi == TargetApi.Vulkan) { // Vulkan requires the point size to be always written on the shader if the primitive topology is points. this.Store(StorageKind.Output, IoVariable.PointSize, null, ConstF(TranslatorContext.Definitions.PointSize)); } + + if (VertexAsCompute) + { + int vertexInfoCbBinding = ResourceManager.Reservations.VertexInfoConstantBufferBinding; + int countFieldIndex = TranslatorContext.Stage == ShaderStage.Vertex + ? (int)VertexInfoBufferField.VertexCounts + : (int)VertexInfoBufferField.GeometryCounts; + + Operand outputVertexOffset = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(0)); + Operand vertexCount = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const(countFieldIndex), Const(0)); + Operand isVertexOob = this.ICompareGreaterOrEqualUnsigned(outputVertexOffset, vertexCount); + + Operand lblVertexInBounds = Label(); + + this.BranchIfFalse(lblVertexInBounds, isVertexOob); + this.Return(); + this.MarkLabel(lblVertexInBounds); + + Operand outputInstanceOffset = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(1)); + Operand instanceCount = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(1)); + Operand firstVertex = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(2)); + Operand firstInstance = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(3)); + Operand ibBaseOffset = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.GeometryCounts), Const(3)); + Operand isInstanceOob = this.ICompareGreaterOrEqualUnsigned(outputInstanceOffset, instanceCount); + + Operand lblInstanceInBounds = Label(); + + this.BranchIfFalse(lblInstanceInBounds, isInstanceOob); + this.Return(); + this.MarkLabel(lblInstanceInBounds); + + if (TranslatorContext.Stage == ShaderStage.Vertex) + { + Operand vertexIndexVr = Local(); + + this.TextureSample( + SamplerType.TextureBuffer, + TextureFlags.IntCoords, + ResourceManager.Reservations.IndexBufferTextureBinding, + 1, + new[] { vertexIndexVr }, + new[] { this.IAdd(ibBaseOffset, outputVertexOffset) }); + + this.Store(StorageKind.LocalMemory, ResourceManager.LocalVertexIndexVertexRateMemoryId, this.IAdd(firstVertex, vertexIndexVr)); + this.Store(StorageKind.LocalMemory, ResourceManager.LocalVertexIndexInstanceRateMemoryId, this.IAdd(firstInstance, outputInstanceOffset)); + } + else if (TranslatorContext.Stage == ShaderStage.Geometry) + { + int inputVertices = TranslatorContext.Definitions.InputTopology.ToInputVertices(); + + Operand baseVertex = this.IMultiply(outputVertexOffset, Const(inputVertices)); + + for (int index = 0; index < inputVertices; index++) + { + Operand vertexIndex = Local(); + + this.TextureSample( + SamplerType.TextureBuffer, + TextureFlags.IntCoords, + ResourceManager.Reservations.TopologyRemapBufferTextureBinding, + 1, + new[] { vertexIndex }, + new[] { this.IAdd(baseVertex, Const(index)) }); + + this.Store(StorageKind.LocalMemory, ResourceManager.LocalTopologyRemapMemoryId, Const(index), vertexIndex); + } + + this.Store(StorageKind.LocalMemory, ResourceManager.LocalGeometryOutputVertexCountMemoryId, Const(0)); + this.Store(StorageKind.LocalMemory, ResourceManager.LocalGeometryOutputIndexCountMemoryId, Const(0)); + } + } } public T GetOp<T>() where T : unmanaged @@ -166,16 +244,21 @@ namespace Ryujinx.Graphics.Shader.Translation public void PrepareForVertexReturn() { - if (!TranslatorContext.GpuAccessor.QueryHostSupportsTransformFeedback() && TranslatorContext.GpuAccessor.QueryTransformFeedbackEnabled()) + // TODO: Support transform feedback emulation on stages other than vertex. + // Those stages might produce more primitives, so it needs a way to "compact" the output after it is written. + + if (!TranslatorContext.GpuAccessor.QueryHostSupportsTransformFeedback() && + TranslatorContext.GpuAccessor.QueryTransformFeedbackEnabled() && + TranslatorContext.Stage == ShaderStage.Vertex) { - Operand vertexCount = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(1)); + Operand vertexCount = this.Load(StorageKind.ConstantBuffer, SupportBuffer.Binding, Const((int)SupportBufferField.TfeVertexCount)); - for (int tfbIndex = 0; tfbIndex < Constants.TfeBuffersCount; tfbIndex++) + for (int tfbIndex = 0; tfbIndex < ResourceReservations.TfeBuffersCount; tfbIndex++) { var locations = TranslatorContext.GpuAccessor.QueryTransformFeedbackVaryingLocations(tfbIndex); var stride = TranslatorContext.GpuAccessor.QueryTransformFeedbackStride(tfbIndex); - Operand baseOffset = this.Load(StorageKind.StorageBuffer, Constants.TfeInfoBinding, Const(0), Const(tfbIndex)); + Operand baseOffset = this.Load(StorageKind.ConstantBuffer, SupportBuffer.Binding, Const((int)SupportBufferField.TfeOffset), Const(tfbIndex)); Operand baseVertex = this.Load(StorageKind.Input, IoVariable.BaseVertex); Operand baseInstance = this.Load(StorageKind.Input, IoVariable.BaseInstance); Operand vertexIndex = this.Load(StorageKind.Input, IoVariable.VertexIndex); @@ -200,7 +283,9 @@ namespace Ryujinx.Graphics.Shader.Translation Operand offset = this.IAdd(baseOffset, Const(j)); Operand value = Instructions.AttributeMap.GenerateAttributeLoad(this, null, location * 4, isOutput: true, isPerPatch: false); - this.Store(StorageKind.StorageBuffer, Constants.TfeBufferBaseBinding + tfbIndex, Const(0), offset, value); + int binding = ResourceManager.Reservations.GetTfeBufferStorageBufferBinding(tfbIndex); + + this.Store(StorageKind.StorageBuffer, binding, Const(0), offset, value); } } } @@ -225,16 +310,6 @@ namespace Ryujinx.Graphics.Shader.Translation this.Store(StorageKind.Output, IoVariable.Position, null, Const(2), this.FPFusedMultiplyAdd(z, ConstF(0.5f), halfW)); } - - if (TranslatorContext.Definitions.Stage != ShaderStage.Geometry && TranslatorContext.HasLayerInputAttribute) - { - int attrVecIndex = TranslatorContext.GpLayerInputAttribute >> 2; - int attrComponentIndex = TranslatorContext.GpLayerInputAttribute & 3; - - Operand layer = this.Load(StorageKind.Output, IoVariable.UserDefined, null, Const(attrVecIndex), Const(attrComponentIndex)); - - this.Store(StorageKind.Output, IoVariable.Layer, null, layer); - } } public void PrepareForVertexReturn(out Operand oldXLocal, out Operand oldYLocal, out Operand oldZLocal) @@ -308,9 +383,30 @@ namespace Ryujinx.Graphics.Shader.Translation if (TranslatorContext.Definitions.GpPassthrough && !TranslatorContext.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) { - int inputVertices = TranslatorContext.Definitions.InputTopology.ToInputVertices(); + int inputStart, inputEnd, inputStep; + + InputTopology topology = TranslatorContext.Definitions.InputTopology; + + if (topology == InputTopology.LinesAdjacency) + { + inputStart = 1; + inputEnd = 3; + inputStep = 1; + } + else if (topology == InputTopology.TrianglesAdjacency) + { + inputStart = 0; + inputEnd = 6; + inputStep = 2; + } + else + { + inputStart = 0; + inputEnd = topology.ToInputVerticesNoAdjacency(); + inputStep = 1; + } - for (int primIndex = 0; primIndex < inputVertices; primIndex++) + for (int primIndex = inputStart; primIndex < inputEnd; primIndex += inputStep) { WritePositionOutput(primIndex); @@ -428,6 +524,65 @@ namespace Ryujinx.Graphics.Shader.Translation } } + if (VertexAsCompute) + { + if (TranslatorContext.Stage == ShaderStage.Vertex) + { + int vertexInfoCbBinding = ResourceManager.Reservations.VertexInfoConstantBufferBinding; + int vertexOutputSbBinding = ResourceManager.Reservations.VertexOutputStorageBufferBinding; + int stride = ResourceManager.Reservations.OutputSizePerInvocation; + + Operand vertexCount = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(0)); + + Operand outputVertexOffset = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(0)); + Operand outputInstanceOffset = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(1)); + + Operand outputBaseVertex = this.IMultiply(outputInstanceOffset, vertexCount); + + Operand baseOffset = this.IMultiply(this.IAdd(outputBaseVertex, outputVertexOffset), Const(stride)); + + for (int offset = 0; offset < stride; offset++) + { + Operand vertexOffset = this.IAdd(baseOffset, Const(offset)); + Operand value = this.Load(StorageKind.LocalMemory, ResourceManager.LocalVertexDataMemoryId, Const(offset)); + + this.Store(StorageKind.StorageBuffer, vertexOutputSbBinding, Const(0), vertexOffset, value); + } + } + else if (TranslatorContext.Stage == ShaderStage.Geometry) + { + Operand lblLoopHead = Label(); + Operand lblExit = Label(); + + this.MarkLabel(lblLoopHead); + + Operand writtenIndices = this.Load(StorageKind.LocalMemory, ResourceManager.LocalGeometryOutputIndexCountMemoryId); + + int maxIndicesPerPrimitiveInvocation = TranslatorContext.Definitions.GetGeometryOutputIndexBufferStridePerInstance(); + int maxIndicesPerPrimitive = maxIndicesPerPrimitiveInvocation * TranslatorContext.Definitions.ThreadsPerInputPrimitive; + + this.BranchIfTrue(lblExit, this.ICompareGreaterOrEqualUnsigned(writtenIndices, Const(maxIndicesPerPrimitiveInvocation))); + + int vertexInfoCbBinding = ResourceManager.Reservations.VertexInfoConstantBufferBinding; + + Operand primitiveIndex = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(0)); + Operand instanceIndex = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(1)); + Operand invocationId = this.Load(StorageKind.Input, IoVariable.GlobalId, Const(2)); + Operand vertexCount = this.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(0)); + Operand primitiveId = this.IAdd(this.IMultiply(instanceIndex, vertexCount), primitiveIndex); + Operand ibOffset = this.IMultiply(primitiveId, Const(maxIndicesPerPrimitive)); + ibOffset = this.IAdd(ibOffset, this.IMultiply(invocationId, Const(maxIndicesPerPrimitiveInvocation))); + ibOffset = this.IAdd(ibOffset, writtenIndices); + + this.Store(StorageKind.StorageBuffer, ResourceManager.Reservations.GeometryIndexOutputStorageBufferBinding, Const(0), ibOffset, Const(-1)); + this.Store(StorageKind.LocalMemory, ResourceManager.LocalGeometryOutputIndexCountMemoryId, this.IAdd(writtenIndices, Const(1))); + + this.Branch(lblLoopHead); + + this.MarkLabel(lblExit); + } + } + return true; } diff --git a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs index a08c8ea9..afa830de 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/EmitterContextInsts.cs @@ -831,6 +831,11 @@ namespace Ryujinx.Graphics.Shader.Translation return context.Add(Instruction.Store, storageKind, null, e0, e1, value); } + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand value) + { + return context.Add(Instruction.Store, storageKind, null, Const(binding), value); + } + public static Operand Store(this EmitterContext context, StorageKind storageKind, int binding, Operand e0, Operand value) { return context.Add(Instruction.Store, storageKind, null, Const(binding), e0, value); diff --git a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs index 552a3f31..88525462 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/FeatureFlags.cs @@ -19,8 +19,12 @@ namespace Ryujinx.Graphics.Shader.Translation DrawParameters = 1 << 4, RtLayer = 1 << 5, Shuffle = 1 << 6, + ViewportIndex = 1 << 7, + ViewportMask = 1 << 8, FixedFuncAttr = 1 << 9, LocalMemory = 1 << 10, SharedMemory = 1 << 11, + Store = 1 << 12, + VtgAsCompute = 1 << 13, } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/IoUsage.cs b/src/Ryujinx.Graphics.Shader/Translation/IoUsage.cs new file mode 100644 index 00000000..8ce2da4a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/IoUsage.cs @@ -0,0 +1,28 @@ +namespace Ryujinx.Graphics.Shader.Translation +{ + readonly struct IoUsage + { + private readonly FeatureFlags _usedFeatures; + + public readonly bool UsesRtLayer => _usedFeatures.HasFlag(FeatureFlags.RtLayer); + public readonly bool UsesViewportIndex => _usedFeatures.HasFlag(FeatureFlags.ViewportIndex); + public readonly bool UsesViewportMask => _usedFeatures.HasFlag(FeatureFlags.ViewportMask); + public readonly byte ClipDistancesWritten { get; } + public readonly int UserDefinedMap { get; } + + public IoUsage(FeatureFlags usedFeatures, byte clipDistancesWritten, int userDefinedMap) + { + _usedFeatures = usedFeatures; + ClipDistancesWritten = clipDistancesWritten; + UserDefinedMap = userDefinedMap; + } + + public readonly IoUsage Combine(IoUsage other) + { + return new IoUsage( + _usedFeatures | other._usedFeatures, + (byte)(ClipDistancesWritten | other.ClipDistancesWritten), + UserDefinedMap | other.UserDefinedMap); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs index d07d8dce..9c487c46 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceManager.cs @@ -48,12 +48,22 @@ namespace Ryujinx.Graphics.Shader.Translation public int LocalMemoryId { get; private set; } public int SharedMemoryId { get; private set; } + public int LocalVertexDataMemoryId { get; private set; } + public int LocalTopologyRemapMemoryId { get; private set; } + public int LocalVertexIndexVertexRateMemoryId { get; private set; } + public int LocalVertexIndexInstanceRateMemoryId { get; private set; } + public int LocalGeometryOutputVertexCountMemoryId { get; private set; } + public int LocalGeometryOutputIndexCountMemoryId { get; private set; } + public ShaderProperties Properties { get; } - public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor) + public ResourceReservations Reservations { get; } + + public ResourceManager(ShaderStage stage, IGpuAccessor gpuAccessor, ResourceReservations reservations = null) { _gpuAccessor = gpuAccessor; Properties = new(); + Reservations = reservations; _stage = stage; _stagePrefix = GetShaderStagePrefix(stage); @@ -114,6 +124,29 @@ namespace Ryujinx.Graphics.Shader.Translation } } + public void SetVertexAsComputeLocalMemories(ShaderStage stage, InputTopology inputTopology) + { + LocalVertexDataMemoryId = AddMemoryDefinition("local_vertex_data", AggregateType.Array | AggregateType.FP32, Reservations.OutputSizePerInvocation); + + if (stage == ShaderStage.Vertex) + { + LocalVertexIndexVertexRateMemoryId = AddMemoryDefinition("local_vertex_index_vr", AggregateType.U32); + LocalVertexIndexInstanceRateMemoryId = AddMemoryDefinition("local_vertex_index_ir", AggregateType.U32); + } + else if (stage == ShaderStage.Geometry) + { + LocalTopologyRemapMemoryId = AddMemoryDefinition("local_topology_remap", AggregateType.Array | AggregateType.U32, inputTopology.ToInputVertices()); + + LocalGeometryOutputVertexCountMemoryId = AddMemoryDefinition("local_geometry_output_vertex", AggregateType.U32); + LocalGeometryOutputIndexCountMemoryId = AddMemoryDefinition("local_geometry_output_index", AggregateType.U32); + } + } + + private int AddMemoryDefinition(string name, AggregateType type, int arrayLength = 1) + { + return Properties.AddLocalMemory(new MemoryDefinition(name, type, arrayLength)); + } + public int GetConstantBufferBinding(int slot) { int binding = _cbSlotToBindingMap[slot]; @@ -465,17 +498,22 @@ namespace Ryujinx.Graphics.Shader.Translation return descriptors; } - public (int, int) GetCbufSlotAndHandleForTexture(int binding) + public bool TryGetCbufSlotAndHandleForTexture(int binding, out int cbufSlot, out int handle) { foreach ((TextureInfo info, TextureMeta meta) in _usedTextures) { if (meta.Binding == binding) { - return (info.CbufSlot, info.Handle); + cbufSlot = info.CbufSlot; + handle = info.Handle; + + return true; } } - throw new ArgumentException($"Binding {binding} is invalid."); + cbufSlot = 0; + handle = 0; + return false; } private static int FindDescriptorIndex(TextureDescriptor[] array, int binding) diff --git a/src/Ryujinx.Graphics.Shader/Translation/ResourceReservations.cs b/src/Ryujinx.Graphics.Shader/Translation/ResourceReservations.cs new file mode 100644 index 00000000..d559f669 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/ResourceReservations.cs @@ -0,0 +1,186 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System.Collections.Generic; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.Translation +{ + public class ResourceReservations + { + public const int TfeBuffersCount = 4; + + public const int MaxVertexBufferTextures = 32; + + public int VertexInfoConstantBufferBinding { get; } + public int VertexOutputStorageBufferBinding { get; } + public int GeometryVertexOutputStorageBufferBinding { get; } + public int GeometryIndexOutputStorageBufferBinding { get; } + public int IndexBufferTextureBinding { get; } + public int TopologyRemapBufferTextureBinding { get; } + + public int ReservedConstantBuffers { get; } + public int ReservedStorageBuffers { get; } + public int ReservedTextures { get; } + public int ReservedImages { get; } + public int InputSizePerInvocation { get; } + public int OutputSizePerInvocation { get; } + public int OutputSizeInBytesPerInvocation => OutputSizePerInvocation * sizeof(uint); + + private readonly int _tfeBufferSbBaseBinding; + private readonly int _vertexBufferTextureBaseBinding; + + private readonly Dictionary<IoDefinition, int> _offsets; + internal IReadOnlyDictionary<IoDefinition, int> Offsets => _offsets; + + internal ResourceReservations(bool isTransformFeedbackEmulated, bool vertexAsCompute) + { + // All stages reserves the first constant buffer binding for the support buffer. + ReservedConstantBuffers = 1; + ReservedStorageBuffers = 0; + ReservedTextures = 0; + ReservedImages = 0; + + if (isTransformFeedbackEmulated) + { + // Transform feedback emulation currently always uses 4 storage buffers. + _tfeBufferSbBaseBinding = ReservedStorageBuffers; + ReservedStorageBuffers = TfeBuffersCount; + } + + if (vertexAsCompute) + { + // One constant buffer reserved for vertex related state. + VertexInfoConstantBufferBinding = ReservedConstantBuffers++; + + // One storage buffer for the output vertex data. + VertexOutputStorageBufferBinding = ReservedStorageBuffers++; + + // One storage buffer for the output geometry vertex data. + GeometryVertexOutputStorageBufferBinding = ReservedStorageBuffers++; + + // One storage buffer for the output geometry index data. + GeometryIndexOutputStorageBufferBinding = ReservedStorageBuffers++; + + // Enough textures reserved for all vertex attributes, plus the index buffer. + IndexBufferTextureBinding = ReservedTextures; + TopologyRemapBufferTextureBinding = ReservedTextures + 1; + _vertexBufferTextureBaseBinding = ReservedTextures + 2; + ReservedTextures += 2 + MaxVertexBufferTextures; + } + } + + internal ResourceReservations( + IGpuAccessor gpuAccessor, + bool isTransformFeedbackEmulated, + bool vertexAsCompute, + IoUsage? vacInput, + IoUsage vacOutput) : this(isTransformFeedbackEmulated, vertexAsCompute) + { + if (vertexAsCompute) + { + _offsets = new(); + + if (vacInput.HasValue) + { + InputSizePerInvocation = FillIoOffsetMap(gpuAccessor, StorageKind.Input, vacInput.Value); + } + + OutputSizePerInvocation = FillIoOffsetMap(gpuAccessor, StorageKind.Output, vacOutput); + } + } + + private int FillIoOffsetMap(IGpuAccessor gpuAccessor, StorageKind storageKind, IoUsage vacUsage) + { + int offset = 0; + + for (int c = 0; c < 4; c++) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.Position, 0, c), offset++); + } + + _offsets.Add(new IoDefinition(storageKind, IoVariable.PointSize), offset++); + + int clipDistancesWrittenMap = vacUsage.ClipDistancesWritten; + + while (clipDistancesWrittenMap != 0) + { + int index = BitOperations.TrailingZeroCount(clipDistancesWrittenMap); + + _offsets.Add(new IoDefinition(storageKind, IoVariable.ClipDistance, 0, index), offset++); + + clipDistancesWrittenMap &= ~(1 << index); + } + + if (vacUsage.UsesRtLayer) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.Layer), offset++); + } + + if (vacUsage.UsesViewportIndex && gpuAccessor.QueryHostSupportsViewportIndexVertexTessellation()) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.VertexIndex), offset++); + } + + if (vacUsage.UsesViewportMask && gpuAccessor.QueryHostSupportsViewportMask()) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.ViewportMask), offset++); + } + + int usedDefinedMap = vacUsage.UserDefinedMap; + + while (usedDefinedMap != 0) + { + int location = BitOperations.TrailingZeroCount(usedDefinedMap); + + for (int c = 0; c < 4; c++) + { + _offsets.Add(new IoDefinition(storageKind, IoVariable.UserDefined, location, c), offset++); + } + + usedDefinedMap &= ~(1 << location); + } + + return offset; + } + + internal static bool IsVectorOrArrayVariable(IoVariable variable) + { + return variable switch + { + IoVariable.ClipDistance or + IoVariable.Position => true, + _ => false, + }; + } + + public int GetTfeBufferStorageBufferBinding(int bufferIndex) + { + return _tfeBufferSbBaseBinding + bufferIndex; + } + + public int GetVertexBufferTextureBinding(int vaLocation) + { + return _vertexBufferTextureBaseBinding + vaLocation; + } + + internal bool TryGetOffset(StorageKind storageKind, int location, int component, out int offset) + { + return _offsets.TryGetValue(new IoDefinition(storageKind, IoVariable.UserDefined, location, component), out offset); + } + + internal bool TryGetOffset(StorageKind storageKind, IoVariable ioVariable, int location, int component, out int offset) + { + return _offsets.TryGetValue(new IoDefinition(storageKind, ioVariable, location, component), out offset); + } + + internal bool TryGetOffset(StorageKind storageKind, IoVariable ioVariable, int component, out int offset) + { + return _offsets.TryGetValue(new IoDefinition(storageKind, ioVariable, 0, component), out offset); + } + + internal bool TryGetOffset(StorageKind storageKind, IoVariable ioVariable, out int offset) + { + return _offsets.TryGetValue(new IoDefinition(storageKind, ioVariable, 0, 0), out offset); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs index 204f4278..3246e259 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/ShaderDefinitions.cs @@ -32,7 +32,7 @@ namespace Ryujinx.Graphics.Shader.Translation public bool GpPassthrough { get; } public bool LastInVertexPipeline { get; set; } - public int ThreadsPerInputPrimitive { get; } + public int ThreadsPerInputPrimitive { get; private set; } public InputTopology InputTopology => _graphicsState.Topology; public OutputTopology OutputTopology { get; } @@ -97,9 +97,14 @@ namespace Ryujinx.Graphics.Shader.Translation private readonly Dictionary<TransformFeedbackVariable, TransformFeedbackOutput> _transformFeedbackDefinitions; - public ShaderDefinitions(ShaderStage stage) + public ShaderDefinitions(ShaderStage stage, ulong transformFeedbackVecMap, TransformFeedbackOutput[] transformFeedbackOutputs) { Stage = stage; + TransformFeedbackEnabled = transformFeedbackOutputs != null; + _transformFeedbackOutputs = transformFeedbackOutputs; + _transformFeedbackDefinitions = new(); + + PopulateTransformFeedbackDefinitions(transformFeedbackVecMap, transformFeedbackOutputs); } public ShaderDefinitions( @@ -142,7 +147,6 @@ namespace Ryujinx.Graphics.Shader.Translation bool omapSampleMask, bool omapDepth, bool supportsScaledVertexFormats, - bool transformFeedbackEnabled, ulong transformFeedbackVecMap, TransformFeedbackOutput[] transformFeedbackOutputs) { @@ -151,17 +155,22 @@ namespace Ryujinx.Graphics.Shader.Translation GpPassthrough = gpPassthrough; ThreadsPerInputPrimitive = threadsPerInputPrimitive; OutputTopology = outputTopology; - MaxOutputVertices = maxOutputVertices; + MaxOutputVertices = gpPassthrough ? graphicsState.Topology.ToInputVerticesNoAdjacency() : maxOutputVertices; ImapTypes = imapTypes; OmapTargets = omapTargets; OmapSampleMask = omapSampleMask; OmapDepth = omapDepth; LastInVertexPipeline = stage < ShaderStage.Fragment; SupportsScaledVertexFormats = supportsScaledVertexFormats; - TransformFeedbackEnabled = transformFeedbackEnabled; + TransformFeedbackEnabled = transformFeedbackOutputs != null; _transformFeedbackOutputs = transformFeedbackOutputs; _transformFeedbackDefinitions = new(); + PopulateTransformFeedbackDefinitions(transformFeedbackVecMap, transformFeedbackOutputs); + } + + private void PopulateTransformFeedbackDefinitions(ulong transformFeedbackVecMap, TransformFeedbackOutput[] transformFeedbackOutputs) + { while (transformFeedbackVecMap != 0) { int vecIndex = BitOperations.TrailingZeroCount(transformFeedbackVecMap); @@ -200,16 +209,6 @@ namespace Ryujinx.Graphics.Shader.Translation OaIndexing = true; } - public TransformFeedbackOutput[] GetTransformFeedbackOutputs() - { - if (!HasTransformFeedbackOutputs()) - { - return null; - } - - return _transformFeedbackOutputs; - } - public bool TryGetTransformFeedbackOutput(IoVariable ioVariable, int location, int component, out TransformFeedbackOutput transformFeedbackOutput) { if (!HasTransformFeedbackOutputs()) @@ -320,5 +319,35 @@ namespace Ryujinx.Graphics.Shader.Translation { return _graphicsState.AttributeTypes[location]; } + + public bool IsAttributeSint(int location) + { + return (_graphicsState.AttributeTypes[location] & ~AttributeType.AnyPacked) == AttributeType.Sint; + } + + public bool IsAttributePacked(int location) + { + return _graphicsState.AttributeTypes[location].HasFlag(AttributeType.Packed); + } + + public bool IsAttributePackedRgb10A2Signed(int location) + { + return _graphicsState.AttributeTypes[location].HasFlag(AttributeType.PackedRgb10A2Signed); + } + + public int GetGeometryOutputIndexBufferStridePerInstance() + { + return MaxOutputVertices + OutputTopology switch + { + OutputTopology.LineStrip => MaxOutputVertices / 2, + OutputTopology.TriangleStrip => MaxOutputVertices / 3, + _ => MaxOutputVertices, + }; + } + + public int GetGeometryOutputIndexBufferStride() + { + return GetGeometryOutputIndexBufferStridePerInstance() * ThreadsPerInputPrimitive; + } } } diff --git a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs b/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs deleted file mode 100644 index c077e1cd..00000000 --- a/src/Ryujinx.Graphics.Shader/Translation/ShaderIdentifier.cs +++ /dev/null @@ -1,187 +0,0 @@ -using Ryujinx.Graphics.Shader.IntermediateRepresentation; -using System.Collections.Generic; - -namespace Ryujinx.Graphics.Shader.Translation -{ - static class ShaderIdentifier - { - public static ShaderIdentification Identify( - IReadOnlyList<Function> functions, - IGpuAccessor gpuAccessor, - ShaderStage stage, - InputTopology inputTopology, - out int layerInputAttr) - { - if (stage == ShaderStage.Geometry && - inputTopology == InputTopology.Triangles && - !gpuAccessor.QueryHostSupportsGeometryShader() && - IsLayerPassthroughGeometryShader(functions, out layerInputAttr)) - { - return ShaderIdentification.GeometryLayerPassthrough; - } - - layerInputAttr = 0; - return ShaderIdentification.None; - } - - private static bool IsLayerPassthroughGeometryShader(IReadOnlyList<Function> functions, out int layerInputAttr) - { - bool writesLayer = false; - layerInputAttr = 0; - - if (functions.Count != 1) - { - return false; - } - - int verticesCount = 0; - int totalVerticesCount = 0; - - foreach (BasicBlock block in functions[0].Blocks) - { - // We are not expecting loops or any complex control flow here, so fail in those cases. - if (block.Branch != null && block.Branch.Index <= block.Index) - { - return false; - } - - foreach (INode node in block.Operations) - { - if (node is not Operation operation) - { - continue; - } - - if (IsResourceWrite(operation.Inst, operation.StorageKind)) - { - return false; - } - - if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output) - { - Operand src = operation.GetSource(operation.SourcesCount - 1); - Operation srcAttributeAsgOp = null; - - if (src.Type == OperandType.LocalVariable && - src.AsgOp is Operation asgOp && - asgOp.Inst == Instruction.Load && - asgOp.StorageKind.IsInputOrOutput()) - { - if (asgOp.StorageKind != StorageKind.Input) - { - return false; - } - - srcAttributeAsgOp = asgOp; - } - - if (srcAttributeAsgOp != null) - { - IoVariable dstAttribute = (IoVariable)operation.GetSource(0).Value; - IoVariable srcAttribute = (IoVariable)srcAttributeAsgOp.GetSource(0).Value; - - if (dstAttribute == IoVariable.Layer && srcAttribute == IoVariable.UserDefined) - { - if (srcAttributeAsgOp.SourcesCount != 4) - { - return false; - } - - writesLayer = true; - layerInputAttr = srcAttributeAsgOp.GetSource(1).Value * 4 + srcAttributeAsgOp.GetSource(3).Value; - } - else - { - if (dstAttribute != srcAttribute) - { - return false; - } - - int inputsCount = operation.SourcesCount - 2; - - if (dstAttribute == IoVariable.UserDefined) - { - if (operation.GetSource(1).Value != srcAttributeAsgOp.GetSource(1).Value) - { - return false; - } - - inputsCount--; - } - - for (int i = 0; i < inputsCount; i++) - { - int dstIndex = operation.SourcesCount - 2 - i; - int srcIndex = srcAttributeAsgOp.SourcesCount - 1 - i; - - if ((dstIndex | srcIndex) < 0) - { - return false; - } - - if (operation.GetSource(dstIndex).Type != OperandType.Constant || - srcAttributeAsgOp.GetSource(srcIndex).Type != OperandType.Constant || - operation.GetSource(dstIndex).Value != srcAttributeAsgOp.GetSource(srcIndex).Value) - { - return false; - } - } - } - } - else if (src.Type == OperandType.Constant) - { - int dstComponent = operation.GetSource(operation.SourcesCount - 2).Value; - float expectedValue = dstComponent == 3 ? 1f : 0f; - - if (src.AsFloat() != expectedValue) - { - return false; - } - } - else - { - return false; - } - } - else if (operation.Inst == Instruction.EmitVertex) - { - verticesCount++; - } - else if (operation.Inst == Instruction.EndPrimitive) - { - totalVerticesCount += verticesCount; - verticesCount = 0; - } - } - } - - return totalVerticesCount + verticesCount == 3 && writesLayer; - } - - private static bool IsResourceWrite(Instruction inst, StorageKind storageKind) - { - switch (inst) - { - case Instruction.AtomicAdd: - case Instruction.AtomicAnd: - case Instruction.AtomicCompareAndSwap: - case Instruction.AtomicMaxS32: - case Instruction.AtomicMaxU32: - case Instruction.AtomicMinS32: - case Instruction.AtomicMinU32: - case Instruction.AtomicOr: - case Instruction.AtomicSwap: - case Instruction.AtomicXor: - case Instruction.ImageAtomic: - case Instruction.ImageStore: - return true; - case Instruction.Store: - return storageKind == StorageKind.StorageBuffer || - storageKind == StorageKind.SharedMemory || - storageKind == StorageKind.LocalMemory; - } - - return false; - } - } -} diff --git a/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs index fa687eca..87ebb8e7 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TransformContext.cs @@ -6,6 +6,7 @@ namespace Ryujinx.Graphics.Shader.Translation { public readonly HelperFunctionManager Hfm; public readonly BasicBlock[] Blocks; + public readonly ShaderDefinitions Definitions; public readonly ResourceManager ResourceManager; public readonly IGpuAccessor GpuAccessor; public readonly TargetLanguage TargetLanguage; @@ -15,6 +16,7 @@ namespace Ryujinx.Graphics.Shader.Translation public TransformContext( HelperFunctionManager hfm, BasicBlock[] blocks, + ShaderDefinitions definitions, ResourceManager resourceManager, IGpuAccessor gpuAccessor, TargetLanguage targetLanguage, @@ -23,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.Translation { Hfm = hfm; Blocks = blocks; + Definitions = definitions; ResourceManager = resourceManager; GpuAccessor = gpuAccessor; TargetLanguage = targetLanguage; diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/GeometryToCompute.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/GeometryToCompute.cs new file mode 100644 index 00000000..0013cf0e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/GeometryToCompute.cs @@ -0,0 +1,378 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class GeometryToCompute : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return usedFeatures.HasFlag(FeatureFlags.VtgAsCompute); + } + + public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node) + { + if (context.Definitions.Stage != ShaderStage.Geometry) + { + return node; + } + + Operation operation = (Operation)node.Value; + + LinkedListNode<INode> newNode = node; + + switch (operation.Inst) + { + case Instruction.EmitVertex: + newNode = GenerateEmitVertex(context.Definitions, context.ResourceManager, node); + break; + case Instruction.EndPrimitive: + newNode = GenerateEndPrimitive(context.Definitions, context.ResourceManager, node); + break; + case Instruction.Load: + if (operation.StorageKind == StorageKind.Input) + { + IoVariable ioVariable = (IoVariable)operation.GetSource(0).Value; + + if (TryGetOffset(context.ResourceManager, operation, StorageKind.Input, out int inputOffset)) + { + Operand primVertex = ioVariable == IoVariable.UserDefined + ? operation.GetSource(2) + : operation.GetSource(1); + + Operand vertexElemOffset = GenerateVertexOffset(context.ResourceManager, node, inputOffset, primVertex); + + newNode = node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.StorageBuffer, + operation.Dest, + new[] { Const(context.ResourceManager.Reservations.VertexOutputStorageBufferBinding), Const(0), vertexElemOffset })); + } + else + { + switch (ioVariable) + { + case IoVariable.InvocationId: + newNode = GenerateInvocationId(node, operation.Dest); + break; + case IoVariable.PrimitiveId: + newNode = GeneratePrimitiveId(context.ResourceManager, node, operation.Dest); + break; + case IoVariable.GlobalId: + case IoVariable.SubgroupEqMask: + case IoVariable.SubgroupGeMask: + case IoVariable.SubgroupGtMask: + case IoVariable.SubgroupLaneId: + case IoVariable.SubgroupLeMask: + case IoVariable.SubgroupLtMask: + // Those are valid or expected for geometry shaders. + break; + default: + context.GpuAccessor.Log($"Invalid input \"{ioVariable}\"."); + break; + } + } + } + else if (operation.StorageKind == StorageKind.Output) + { + if (TryGetOffset(context.ResourceManager, operation, StorageKind.Output, out int outputOffset)) + { + newNode = node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + operation.Dest, + new[] { Const(context.ResourceManager.LocalVertexDataMemoryId), Const(outputOffset) })); + } + else + { + context.GpuAccessor.Log($"Invalid output \"{(IoVariable)operation.GetSource(0).Value}\"."); + } + } + break; + case Instruction.Store: + if (operation.StorageKind == StorageKind.Output) + { + if (TryGetOffset(context.ResourceManager, operation, StorageKind.Output, out int outputOffset)) + { + Operand value = operation.GetSource(operation.SourcesCount - 1); + + newNode = node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.LocalMemory, + (Operand)null, + new[] { Const(context.ResourceManager.LocalVertexDataMemoryId), Const(outputOffset), value })); + } + else + { + context.GpuAccessor.Log($"Invalid output \"{(IoVariable)operation.GetSource(0).Value}\"."); + } + } + break; + } + + if (newNode != node) + { + Utils.DeleteNode(node, operation); + } + + return newNode; + } + + private static LinkedListNode<INode> GenerateEmitVertex(ShaderDefinitions definitions, ResourceManager resourceManager, LinkedListNode<INode> node) + { + int vbOutputBinding = resourceManager.Reservations.GeometryVertexOutputStorageBufferBinding; + int ibOutputBinding = resourceManager.Reservations.GeometryIndexOutputStorageBufferBinding; + int stride = resourceManager.Reservations.OutputSizePerInvocation; + + Operand outputPrimVertex = IncrementLocalMemory(node, resourceManager.LocalGeometryOutputVertexCountMemoryId); + Operand baseVertexOffset = GenerateBaseOffset( + resourceManager, + node, + definitions.MaxOutputVertices * definitions.ThreadsPerInputPrimitive, + definitions.ThreadsPerInputPrimitive); + Operand outputBaseVertex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, outputBaseVertex, new[] { baseVertexOffset, outputPrimVertex })); + + Operand outputPrimIndex = IncrementLocalMemory(node, resourceManager.LocalGeometryOutputIndexCountMemoryId); + Operand baseIndexOffset = GenerateBaseOffset( + resourceManager, + node, + definitions.GetGeometryOutputIndexBufferStride(), + definitions.ThreadsPerInputPrimitive); + Operand outputBaseIndex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, outputBaseIndex, new[] { baseIndexOffset, outputPrimIndex })); + + node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.StorageBuffer, + null, + new[] { Const(ibOutputBinding), Const(0), outputBaseIndex, outputBaseVertex })); + + Operand baseOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, baseOffset, new[] { outputBaseVertex, Const(stride) })); + + LinkedListNode<INode> newNode = node; + + for (int offset = 0; offset < stride; offset++) + { + Operand vertexOffset; + + if (offset > 0) + { + vertexOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, vertexOffset, new[] { baseOffset, Const(offset) })); + } + else + { + vertexOffset = baseOffset; + } + + Operand value = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + value, + new[] { Const(resourceManager.LocalVertexDataMemoryId), Const(offset) })); + + newNode = node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.StorageBuffer, + null, + new[] { Const(vbOutputBinding), Const(0), vertexOffset, value })); + } + + return newNode; + } + + private static LinkedListNode<INode> GenerateEndPrimitive(ShaderDefinitions definitions, ResourceManager resourceManager, LinkedListNode<INode> node) + { + int ibOutputBinding = resourceManager.Reservations.GeometryIndexOutputStorageBufferBinding; + + Operand outputPrimIndex = IncrementLocalMemory(node, resourceManager.LocalGeometryOutputIndexCountMemoryId); + Operand baseIndexOffset = GenerateBaseOffset( + resourceManager, + node, + definitions.GetGeometryOutputIndexBufferStride(), + definitions.ThreadsPerInputPrimitive); + Operand outputBaseIndex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, outputBaseIndex, new[] { baseIndexOffset, outputPrimIndex })); + + return node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.StorageBuffer, + null, + new[] { Const(ibOutputBinding), Const(0), outputBaseIndex, Const(-1) })); + } + + private static Operand GenerateBaseOffset(ResourceManager resourceManager, LinkedListNode<INode> node, int stride, int threadsPerInputPrimitive) + { + Operand primitiveId = Local(); + GeneratePrimitiveId(resourceManager, node, primitiveId); + + Operand baseOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, baseOffset, new[] { primitiveId, Const(stride) })); + + Operand invocationId = Local(); + GenerateInvocationId(node, invocationId); + + Operand invocationOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, invocationOffset, new[] { invocationId, Const(stride / threadsPerInputPrimitive) })); + + Operand combinedOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, combinedOffset, new[] { baseOffset, invocationOffset })); + + return combinedOffset; + } + + private static Operand IncrementLocalMemory(LinkedListNode<INode> node, int memoryId) + { + Operand oldValue = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + oldValue, + new[] { Const(memoryId) })); + + Operand newValue = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, newValue, new[] { oldValue, Const(1) })); + + node.List.AddBefore(node, new Operation(Instruction.Store, StorageKind.LocalMemory, null, new[] { Const(memoryId), newValue })); + + return oldValue; + } + + private static Operand GenerateVertexOffset( + ResourceManager resourceManager, + LinkedListNode<INode> node, + int elementOffset, + Operand primVertex) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + Operand vertexCount = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + vertexCount, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexCounts), Const(0) })); + + Operand primInputVertex = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + primInputVertex, + new[] { Const(resourceManager.LocalTopologyRemapMemoryId), primVertex })); + + Operand instanceIndex = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + instanceIndex, + new[] { Const((int)IoVariable.GlobalId), Const(1) })); + + Operand baseVertex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, baseVertex, new[] { instanceIndex, vertexCount })); + + Operand vertexIndex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, vertexIndex, new[] { baseVertex, primInputVertex })); + + Operand vertexBaseOffset = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Multiply, + vertexBaseOffset, + new[] { vertexIndex, Const(resourceManager.Reservations.InputSizePerInvocation) })); + + Operand vertexElemOffset; + + if (elementOffset != 0) + { + vertexElemOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.Add, vertexElemOffset, new[] { vertexBaseOffset, Const(elementOffset) })); + } + else + { + vertexElemOffset = vertexBaseOffset; + } + + return vertexElemOffset; + } + + private static LinkedListNode<INode> GeneratePrimitiveId(ResourceManager resourceManager, LinkedListNode<INode> node, Operand dest) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + Operand vertexCount = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + vertexCount, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexCounts), Const(0) })); + + Operand vertexIndex = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + vertexIndex, + new[] { Const((int)IoVariable.GlobalId), Const(0) })); + + Operand instanceIndex = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + instanceIndex, + new[] { Const((int)IoVariable.GlobalId), Const(1) })); + + Operand baseVertex = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, baseVertex, new[] { instanceIndex, vertexCount })); + + return node.List.AddBefore(node, new Operation(Instruction.Add, dest, new[] { baseVertex, vertexIndex })); + } + + private static LinkedListNode<INode> GenerateInvocationId(LinkedListNode<INode> node, Operand dest) + { + return node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + dest, + new[] { Const((int)IoVariable.GlobalId), Const(2) })); + } + + private static bool TryGetOffset(ResourceManager resourceManager, Operation operation, StorageKind storageKind, out int outputOffset) + { + bool isStore = operation.Inst == Instruction.Store; + + IoVariable ioVariable = (IoVariable)operation.GetSource(0).Value; + + bool isValidOutput; + + if (ioVariable == IoVariable.UserDefined) + { + int lastIndex = operation.SourcesCount - (isStore ? 2 : 1); + + int location = operation.GetSource(1).Value; + int component = operation.GetSource(lastIndex).Value; + + isValidOutput = resourceManager.Reservations.TryGetOffset(storageKind, location, component, out outputOffset); + } + else + { + if (ResourceReservations.IsVectorOrArrayVariable(ioVariable)) + { + int component = operation.GetSource(operation.SourcesCount - (isStore ? 2 : 1)).Value; + + isValidOutput = resourceManager.Reservations.TryGetOffset(storageKind, ioVariable, component, out outputOffset); + } + else + { + isValidOutput = resourceManager.Reservations.TryGetOffset(storageKind, ioVariable, out outputOffset); + } + } + + return isValidOutput; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs index 5ceed4b7..2479d85f 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TexturePass.cs @@ -153,15 +153,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; - if (isBindless) + if (isBindless || !resourceManager.TryGetCbufSlotAndHandleForTexture(texOp.Binding, out int cbufSlot, out int handle)) { return node; } bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; - (int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding); - bool isCoordNormalized = gpuAccessor.QueryTextureCoordNormalized(handle, cbufSlot); if (isCoordNormalized || intCoords) @@ -607,13 +605,11 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms // We can't query the format of a bindless texture, // because the handle is unknown, it can have any format. - if (texOp.Flags.HasFlag(TextureFlags.Bindless)) + if (texOp.Flags.HasFlag(TextureFlags.Bindless) || !resourceManager.TryGetCbufSlotAndHandleForTexture(texOp.Binding, out int cbufSlot, out int handle)) { return node; } - (int cbufSlot, int handle) = resourceManager.GetCbufSlotAndHandleForTexture(texOp.Binding); - TextureFormat format = gpuAccessor.QueryTextureFormat(handle, cbufSlot); int maxPositive = format switch diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs index 29393880..7ff3b8bf 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/TransformPasses.cs @@ -14,6 +14,8 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms RunPass<SharedStoreSmallIntCas>(context); RunPass<SharedAtomicSignedCas>(context); RunPass<ShufflePass>(context); + RunPass<VertexToCompute>(context); + RunPass<GeometryToCompute>(context); } private static void RunPass<T>(TransformContext context) where T : ITransformPass diff --git a/src/Ryujinx.Graphics.Shader/Translation/Transforms/VertexToCompute.cs b/src/Ryujinx.Graphics.Shader/Translation/Transforms/VertexToCompute.cs new file mode 100644 index 00000000..d71ada86 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/Translation/Transforms/VertexToCompute.cs @@ -0,0 +1,364 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation.Optimizations; +using System.Collections.Generic; + +using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; + +namespace Ryujinx.Graphics.Shader.Translation.Transforms +{ + class VertexToCompute : ITransformPass + { + public static bool IsEnabled(IGpuAccessor gpuAccessor, ShaderStage stage, TargetLanguage targetLanguage, FeatureFlags usedFeatures) + { + return usedFeatures.HasFlag(FeatureFlags.VtgAsCompute); + } + + public static LinkedListNode<INode> RunPass(TransformContext context, LinkedListNode<INode> node) + { + if (context.Definitions.Stage != ShaderStage.Vertex) + { + return node; + } + + Operation operation = (Operation)node.Value; + + LinkedListNode<INode> newNode = node; + + if (operation.Inst == Instruction.Load && operation.StorageKind == StorageKind.Input) + { + Operand dest = operation.Dest; + + switch ((IoVariable)operation.GetSource(0).Value) + { + case IoVariable.BaseInstance: + newNode = GenerateBaseInstanceLoad(context.ResourceManager, node, dest); + break; + case IoVariable.BaseVertex: + newNode = GenerateBaseVertexLoad(context.ResourceManager, node, dest); + break; + case IoVariable.InstanceId: + newNode = GenerateInstanceIdLoad(node, dest); + break; + case IoVariable.InstanceIndex: + newNode = GenerateInstanceIndexLoad(context.ResourceManager, node, dest); + break; + case IoVariable.VertexId: + case IoVariable.VertexIndex: + newNode = GenerateVertexIndexLoad(context.ResourceManager, node, dest); + break; + case IoVariable.UserDefined: + int location = operation.GetSource(1).Value; + int component = operation.GetSource(2).Value; + + if (context.Definitions.IsAttributePacked(location)) + { + bool needsSextNorm = context.Definitions.IsAttributePackedRgb10A2Signed(location); + + Operand temp = needsSextNorm ? Local() : dest; + Operand vertexElemOffset = GenerateVertexOffset(context.ResourceManager, node, location, 0); + + newNode = node.List.AddBefore(node, new TextureOperation( + Instruction.TextureSample, + SamplerType.TextureBuffer, + TextureFormat.Unknown, + TextureFlags.IntCoords, + context.ResourceManager.Reservations.GetVertexBufferTextureBinding(location), + 1 << component, + new[] { temp }, + new[] { vertexElemOffset })); + + if (needsSextNorm) + { + bool sint = context.Definitions.IsAttributeSint(location); + CopySignExtendedNormalized(node, component == 3 ? 2 : 10, !sint, dest, temp); + } + } + else + { + Operand temp = component > 0 ? Local() : dest; + Operand vertexElemOffset = GenerateVertexOffset(context.ResourceManager, node, location, component); + + newNode = node.List.AddBefore(node, new TextureOperation( + Instruction.TextureSample, + SamplerType.TextureBuffer, + TextureFormat.Unknown, + TextureFlags.IntCoords, + context.ResourceManager.Reservations.GetVertexBufferTextureBinding(location), + 1, + new[] { temp }, + new[] { vertexElemOffset })); + + if (component > 0) + { + newNode = CopyMasked(context.ResourceManager, newNode, location, component, dest, temp); + } + } + break; + case IoVariable.GlobalId: + case IoVariable.SubgroupEqMask: + case IoVariable.SubgroupGeMask: + case IoVariable.SubgroupGtMask: + case IoVariable.SubgroupLaneId: + case IoVariable.SubgroupLeMask: + case IoVariable.SubgroupLtMask: + // Those are valid or expected for vertex shaders. + break; + default: + context.GpuAccessor.Log($"Invalid input \"{(IoVariable)operation.GetSource(0).Value}\"."); + break; + } + } + else if (operation.Inst == Instruction.Load && operation.StorageKind == StorageKind.Output) + { + if (TryGetOutputOffset(context.ResourceManager, operation, out int outputOffset)) + { + newNode = node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.LocalMemory, + operation.Dest, + new[] { Const(context.ResourceManager.LocalVertexDataMemoryId), Const(outputOffset) })); + } + else + { + context.GpuAccessor.Log($"Invalid output \"{(IoVariable)operation.GetSource(0).Value}\"."); + } + } + else if (operation.Inst == Instruction.Store && operation.StorageKind == StorageKind.Output) + { + if (TryGetOutputOffset(context.ResourceManager, operation, out int outputOffset)) + { + Operand value = operation.GetSource(operation.SourcesCount - 1); + + newNode = node.List.AddBefore(node, new Operation( + Instruction.Store, + StorageKind.LocalMemory, + (Operand)null, + new[] { Const(context.ResourceManager.LocalVertexDataMemoryId), Const(outputOffset), value })); + } + else + { + context.GpuAccessor.Log($"Invalid output \"{(IoVariable)operation.GetSource(0).Value}\"."); + } + } + + if (newNode != node) + { + Utils.DeleteNode(node, operation); + } + + return newNode; + } + + private static Operand GenerateVertexOffset(ResourceManager resourceManager, LinkedListNode<INode> node, int location, int component) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + Operand vertexIdVr = Local(); + GenerateVertexIdVertexRateLoad(resourceManager, node, vertexIdVr); + + Operand vertexIdIr = Local(); + GenerateVertexIdInstanceRateLoad(resourceManager, node, vertexIdIr); + + Operand attributeOffset = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + attributeOffset, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexOffsets), Const(location), Const(0) })); + + Operand isInstanceRate = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + isInstanceRate, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexOffsets), Const(location), Const(1) })); + + Operand vertexId = Local(); + node.List.AddBefore(node, new Operation( + Instruction.ConditionalSelect, + vertexId, + new[] { isInstanceRate, vertexIdIr, vertexIdVr })); + + Operand vertexStride = Local(); + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + vertexStride, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexStrides), Const(location), Const(0) })); + + Operand vertexBaseOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Multiply, vertexBaseOffset, new[] { vertexId, vertexStride })); + + Operand vertexOffset = Local(); + node.List.AddBefore(node, new Operation(Instruction.Add, vertexOffset, new[] { attributeOffset, vertexBaseOffset })); + + Operand vertexElemOffset; + + if (component != 0) + { + vertexElemOffset = Local(); + + node.List.AddBefore(node, new Operation(Instruction.Add, vertexElemOffset, new[] { vertexOffset, Const(component) })); + } + else + { + vertexElemOffset = vertexOffset; + } + + return vertexElemOffset; + } + + private static LinkedListNode<INode> CopySignExtendedNormalized(LinkedListNode<INode> node, int bits, bool normalize, Operand dest, Operand src) + { + Operand leftShifted = Local(); + node = node.List.AddAfter(node, new Operation( + Instruction.ShiftLeft, + leftShifted, + new[] { src, Const(32 - bits) })); + + Operand rightShifted = normalize ? Local() : dest; + node = node.List.AddAfter(node, new Operation( + Instruction.ShiftRightS32, + rightShifted, + new[] { leftShifted, Const(32 - bits) })); + + if (normalize) + { + Operand asFloat = Local(); + node = node.List.AddAfter(node, new Operation(Instruction.ConvertS32ToFP32, asFloat, new[] { rightShifted })); + node = node.List.AddAfter(node, new Operation( + Instruction.FP32 | Instruction.Multiply, + dest, + new[] { asFloat, ConstF(1f / (1 << (bits - 1))) })); + } + + return node; + } + + private static LinkedListNode<INode> CopyMasked( + ResourceManager resourceManager, + LinkedListNode<INode> node, + int location, + int component, + Operand dest, + Operand src) + { + Operand componentExists = Local(); + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + node = node.List.AddAfter(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + componentExists, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexStrides), Const(location), Const(component) })); + + return node.List.AddAfter(node, new Operation( + Instruction.ConditionalSelect, + dest, + new[] { componentExists, src, ConstF(component == 3 ? 1f : 0f) })); + } + + private static LinkedListNode<INode> GenerateBaseVertexLoad(ResourceManager resourceManager, LinkedListNode<INode> node, Operand dest) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + return node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + dest, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexCounts), Const(2) })); + } + + private static LinkedListNode<INode> GenerateBaseInstanceLoad(ResourceManager resourceManager, LinkedListNode<INode> node, Operand dest) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + + return node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.ConstantBuffer, + dest, + new[] { Const(vertexInfoCbBinding), Const((int)VertexInfoBufferField.VertexCounts), Const(3) })); + } + + private static LinkedListNode<INode> GenerateVertexIndexLoad(ResourceManager resourceManager, LinkedListNode<INode> node, Operand dest) + { + Operand baseVertex = Local(); + Operand vertexId = Local(); + + GenerateBaseVertexLoad(resourceManager, node, baseVertex); + GenerateVertexIdVertexRateLoad(resourceManager, node, vertexId); + + return node.List.AddBefore(node, new Operation(Instruction.Add, dest, new[] { baseVertex, vertexId })); + } + + private static LinkedListNode<INode> GenerateInstanceIndexLoad(ResourceManager resourceManager, LinkedListNode<INode> node, Operand dest) + { + Operand baseInstance = Local(); + Operand instanceId = Local(); + + GenerateBaseInstanceLoad(resourceManager, node, baseInstance); + + node.List.AddBefore(node, new Operation( + Instruction.Load, + StorageKind.Input, + instanceId, + new[] { Const((int)IoVariable.GlobalId), Const(1) })); + + return node.List.AddBefore(node, new Operation(Instruction.Add, dest, new[] { baseInstance, instanceId })); + } + + private static LinkedListNode<INode> GenerateVertexIdVertexRateLoad(ResourceManager resourceManager, LinkedListNode<INode> node, Operand dest) + { + Operand[] sources = new Operand[] { Const(resourceManager.LocalVertexIndexVertexRateMemoryId) }; + + return node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.LocalMemory, dest, sources)); + } + + private static LinkedListNode<INode> GenerateVertexIdInstanceRateLoad(ResourceManager resourceManager, LinkedListNode<INode> node, Operand dest) + { + Operand[] sources = new Operand[] { Const(resourceManager.LocalVertexIndexInstanceRateMemoryId) }; + + return node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.LocalMemory, dest, sources)); + } + + private static LinkedListNode<INode> GenerateInstanceIdLoad(LinkedListNode<INode> node, Operand dest) + { + Operand[] sources = new Operand[] { Const((int)IoVariable.GlobalId), Const(1) }; + + return node.List.AddBefore(node, new Operation(Instruction.Load, StorageKind.Input, dest, sources)); + } + + private static bool TryGetOutputOffset(ResourceManager resourceManager, Operation operation, out int outputOffset) + { + bool isStore = operation.Inst == Instruction.Store; + + IoVariable ioVariable = (IoVariable)operation.GetSource(0).Value; + + bool isValidOutput; + + if (ioVariable == IoVariable.UserDefined) + { + int lastIndex = operation.SourcesCount - (isStore ? 2 : 1); + + int location = operation.GetSource(1).Value; + int component = operation.GetSource(lastIndex).Value; + + isValidOutput = resourceManager.Reservations.TryGetOffset(StorageKind.Output, location, component, out outputOffset); + } + else + { + if (ResourceReservations.IsVectorOrArrayVariable(ioVariable)) + { + int component = operation.GetSource(operation.SourcesCount - (isStore ? 2 : 1)).Value; + + isValidOutput = resourceManager.Reservations.TryGetOffset(StorageKind.Output, ioVariable, component, out outputOffset); + } + else + { + isValidOutput = resourceManager.Reservations.TryGetOffset(StorageKind.Output, ioVariable, out outputOffset); + } + } + + return isValidOutput; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs index 93a70ace..6a31ea2e 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/Translator.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/Translator.cs @@ -78,11 +78,31 @@ namespace Ryujinx.Graphics.Shader.Translation private static ShaderDefinitions CreateGraphicsDefinitions(IGpuAccessor gpuAccessor, ShaderHeader header) { + TransformFeedbackOutput[] transformFeedbackOutputs = GetTransformFeedbackOutputs(gpuAccessor, out ulong transformFeedbackVecMap); + + return new ShaderDefinitions( + header.Stage, + gpuAccessor.QueryGraphicsState(), + header.Stage == ShaderStage.Geometry && header.GpPassthrough, + header.ThreadsPerInputPrimitive, + header.OutputTopology, + header.MaxOutputVertexCount, + header.ImapTypes, + header.OmapTargets, + header.OmapSampleMask, + header.OmapDepth, + gpuAccessor.QueryHostSupportsScaledVertexFormats(), + transformFeedbackVecMap, + transformFeedbackOutputs); + } + + internal static TransformFeedbackOutput[] GetTransformFeedbackOutputs(IGpuAccessor gpuAccessor, out ulong transformFeedbackVecMap) + { bool transformFeedbackEnabled = gpuAccessor.QueryTransformFeedbackEnabled() && gpuAccessor.QueryHostSupportsTransformFeedback(); TransformFeedbackOutput[] transformFeedbackOutputs = null; - ulong transformFeedbackVecMap = 0UL; + transformFeedbackVecMap = 0UL; if (transformFeedbackEnabled) { @@ -105,21 +125,7 @@ namespace Ryujinx.Graphics.Shader.Translation } } - return new ShaderDefinitions( - header.Stage, - gpuAccessor.QueryGraphicsState(), - header.Stage == ShaderStage.Geometry && header.GpPassthrough, - header.ThreadsPerInputPrimitive, - header.OutputTopology, - header.MaxOutputVertexCount, - header.ImapTypes, - header.OmapTargets, - header.OmapSampleMask, - header.OmapDepth, - gpuAccessor.QueryHostSupportsScaledVertexFormats(), - transformFeedbackEnabled, - transformFeedbackVecMap, - transformFeedbackOutputs); + return transformFeedbackOutputs; } private static int GetLocalMemorySize(ShaderHeader header) @@ -131,6 +137,7 @@ namespace Ryujinx.Graphics.Shader.Translation TranslatorContext translatorContext, ResourceManager resourceManager, DecodedProgram program, + bool vertexAsCompute, bool initializeOutputs, out int initializationOperations) { @@ -147,7 +154,7 @@ namespace Ryujinx.Graphics.Shader.Translation for (int index = 0; index < functions.Length; index++) { - EmitterContext context = new(translatorContext, resourceManager, program, index != 0); + EmitterContext context = new(translatorContext, resourceManager, program, vertexAsCompute, index != 0); if (initializeOutputs && index == 0) { diff --git a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs index 39ce92c9..f1226ae6 100644 --- a/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs +++ b/src/Ryujinx.Graphics.Shader/Translation/TranslatorContext.cs @@ -8,7 +8,6 @@ using Ryujinx.Graphics.Shader.Translation.Optimizations; using Ryujinx.Graphics.Shader.Translation.Transforms; using System; using System.Collections.Generic; -using System.Linq; using System.Numerics; using static Ryujinx.Graphics.Shader.IntermediateRepresentation.OperandHelper; using static Ryujinx.Graphics.Shader.Translation.Translator; @@ -19,14 +18,12 @@ namespace Ryujinx.Graphics.Shader.Translation { private readonly DecodedProgram _program; private readonly int _localMemorySize; + private IoUsage _vertexOutput; public ulong Address { get; } public int Size { get; } public int Cb1DataSize => _program.Cb1DataSize; - internal bool HasLayerInputAttribute { get; private set; } - internal int GpLayerInputAttribute { get; private set; } - internal AttributeUsage AttributeUsage => _program.AttributeUsage; internal ShaderDefinitions Definitions { get; } @@ -37,7 +34,8 @@ namespace Ryujinx.Graphics.Shader.Translation internal TranslationOptions Options { get; } - internal FeatureFlags UsedFeatures { get; private set; } + private bool IsTransformFeedbackEmulated => !GpuAccessor.QueryHostSupportsTransformFeedback() && GpuAccessor.QueryTransformFeedbackEnabled(); + public bool HasStore => _program.UsedFeatures.HasFlag(FeatureFlags.Store) || (IsTransformFeedbackEmulated && Definitions.LastInVertexPipeline); public bool LayerOutputWritten { get; private set; } public int LayerOutputAttribute { get; private set; } @@ -55,10 +53,10 @@ namespace Ryujinx.Graphics.Shader.Translation Size = size; _program = program; _localMemorySize = localMemorySize; + _vertexOutput = new IoUsage(FeatureFlags.None, 0, -1); Definitions = definitions; GpuAccessor = gpuAccessor; Options = options; - UsedFeatures = program.UsedFeatures; } private static bool IsLoadUserDefined(Operation operation) @@ -171,13 +169,6 @@ namespace Ryujinx.Graphics.Shader.Translation LayerOutputAttribute = attr; } - public void SetGeometryShaderLayerInputAttribute(int attr) - { - UsedFeatures |= FeatureFlags.RtLayer; - HasLayerInputAttribute = true; - GpLayerInputAttribute = attr; - } - public void SetLastInVertexPipeline() { Definitions.LastInVertexPipeline = true; @@ -187,7 +178,7 @@ namespace Ryujinx.Graphics.Shader.Translation { AttributeUsage.MergeFromtNextStage( Definitions.GpPassthrough, - nextStage.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr), + nextStage._program.UsedFeatures.HasFlag(FeatureFlags.FixedFuncAttr), nextStage.AttributeUsage); // We don't consider geometry shaders using the geometry shader passthrough feature @@ -200,9 +191,9 @@ namespace Ryujinx.Graphics.Shader.Translation } } - public ShaderProgram Translate() + public ShaderProgram Translate(bool asCompute = false) { - ResourceManager resourceManager = CreateResourceManager(); + ResourceManager resourceManager = CreateResourceManager(asCompute); bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); @@ -215,36 +206,42 @@ namespace Ryujinx.Graphics.Shader.Translation resourceManager.SetCurrentSharedMemory(GpuAccessor.QueryComputeSharedMemorySize(), usesSharedMemory); } - FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: true, out _); + FunctionCode[] code = EmitShader(this, resourceManager, _program, asCompute, initializeOutputs: true, out _); - return Translate(code, resourceManager, UsedFeatures, _program.ClipDistancesWritten); + return Translate(code, resourceManager, _program.UsedFeatures, _program.ClipDistancesWritten, asCompute); } - public ShaderProgram Translate(TranslatorContext other) + public ShaderProgram Translate(TranslatorContext other, bool asCompute = false) { - ResourceManager resourceManager = CreateResourceManager(); + ResourceManager resourceManager = CreateResourceManager(asCompute); bool usesLocalMemory = _program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); resourceManager.SetCurrentLocalMemory(_localMemorySize, usesLocalMemory); - FunctionCode[] code = EmitShader(this, resourceManager, _program, initializeOutputs: false, out _); + FunctionCode[] code = EmitShader(this, resourceManager, _program, asCompute, initializeOutputs: false, out _); bool otherUsesLocalMemory = other._program.UsedFeatures.HasFlag(FeatureFlags.LocalMemory); resourceManager.SetCurrentLocalMemory(other._localMemorySize, otherUsesLocalMemory); - FunctionCode[] otherCode = EmitShader(other, resourceManager, other._program, initializeOutputs: true, out int aStart); + FunctionCode[] otherCode = EmitShader(other, resourceManager, other._program, asCompute, initializeOutputs: true, out int aStart); code = Combine(otherCode, code, aStart); return Translate( code, resourceManager, - UsedFeatures | other.UsedFeatures, - (byte)(_program.ClipDistancesWritten | other._program.ClipDistancesWritten)); + _program.UsedFeatures | other._program.UsedFeatures, + (byte)(_program.ClipDistancesWritten | other._program.ClipDistancesWritten), + asCompute); } - private ShaderProgram Translate(FunctionCode[] functions, ResourceManager resourceManager, FeatureFlags usedFeatures, byte clipDistancesWritten) + private ShaderProgram Translate(FunctionCode[] functions, ResourceManager resourceManager, FeatureFlags usedFeatures, byte clipDistancesWritten, bool asCompute) { + if (asCompute) + { + usedFeatures |= FeatureFlags.VtgAsCompute; + } + var cfgs = new ControlFlowGraph[functions.Length]; var frus = new RegisterUsage.FunctionRegisterUsage[functions.Length]; @@ -294,6 +291,7 @@ namespace Ryujinx.Graphics.Shader.Translation TransformContext context = new( hfm, cfg.Blocks, + Definitions, resourceManager, GpuAccessor, Options.TargetLanguage, @@ -307,28 +305,24 @@ namespace Ryujinx.Graphics.Shader.Translation funcs[i] = new Function(cfg.Blocks, $"fun{i}", false, inArgumentsCount, outArgumentsCount); } - var identification = ShaderIdentifier.Identify(funcs, GpuAccessor, Definitions.Stage, Definitions.InputTopology, out int layerInputAttr); - return Generate( funcs, AttributeUsage, + GetDefinitions(asCompute), Definitions, resourceManager, usedFeatures, - clipDistancesWritten, - identification, - layerInputAttr); + clipDistancesWritten); } private ShaderProgram Generate( IReadOnlyList<Function> funcs, AttributeUsage attributeUsage, ShaderDefinitions definitions, + ShaderDefinitions originalDefinitions, ResourceManager resourceManager, FeatureFlags usedFeatures, - byte clipDistancesWritten, - ShaderIdentification identification = ShaderIdentification.None, - int layerInputAttr = 0) + byte clipDistancesWritten) { var sInfo = StructuredProgram.MakeStructuredProgram( funcs, @@ -337,20 +331,28 @@ namespace Ryujinx.Graphics.Shader.Translation resourceManager, Options.Flags.HasFlag(TranslationFlags.DebugMode)); + int geometryVerticesPerPrimitive = Definitions.OutputTopology switch + { + OutputTopology.LineStrip => 2, + OutputTopology.TriangleStrip => 3, + _ => 1 + }; + var info = new ShaderProgramInfo( resourceManager.GetConstantBufferDescriptors(), resourceManager.GetStorageBufferDescriptors(), resourceManager.GetTextureDescriptors(), resourceManager.GetImageDescriptors(), - identification, - layerInputAttr, - definitions.Stage, + originalDefinitions.Stage, + geometryVerticesPerPrimitive, + originalDefinitions.MaxOutputVertices, + originalDefinitions.ThreadsPerInputPrimitive, usedFeatures.HasFlag(FeatureFlags.FragCoordXY), usedFeatures.HasFlag(FeatureFlags.InstanceId), usedFeatures.HasFlag(FeatureFlags.DrawParameters), usedFeatures.HasFlag(FeatureFlags.RtLayer), clipDistancesWritten, - definitions.OmapTargets); + originalDefinitions.OmapTargets); var hostCapabilities = new HostCapabilities( GpuAccessor.QueryHostReducedPrecision(), @@ -372,37 +374,203 @@ namespace Ryujinx.Graphics.Shader.Translation }; } - private ResourceManager CreateResourceManager() + private ResourceManager CreateResourceManager(bool vertexAsCompute) { - ResourceManager resourceManager = new(Definitions.Stage, GpuAccessor); + ResourceManager resourceManager = new(Definitions.Stage, GpuAccessor, GetResourceReservations()); - if (!GpuAccessor.QueryHostSupportsTransformFeedback() && GpuAccessor.QueryTransformFeedbackEnabled()) + if (IsTransformFeedbackEmulated) { - StructureType tfeInfoStruct = new(new StructureField[] - { - new StructureField(AggregateType.Array | AggregateType.U32, "base_offset", 4), - new StructureField(AggregateType.U32, "vertex_count") - }); - - BufferDefinition tfeInfoBuffer = new(BufferLayout.Std430, 1, Constants.TfeInfoBinding, "tfe_info", tfeInfoStruct); - resourceManager.Properties.AddOrUpdateStorageBuffer(tfeInfoBuffer); - StructureType tfeDataStruct = new(new StructureField[] { new StructureField(AggregateType.Array | AggregateType.U32, "data", 0) }); - for (int i = 0; i < Constants.TfeBuffersCount; i++) + for (int i = 0; i < ResourceReservations.TfeBuffersCount; i++) { - int binding = Constants.TfeBufferBaseBinding + i; + int binding = resourceManager.Reservations.GetTfeBufferStorageBufferBinding(i); BufferDefinition tfeDataBuffer = new(BufferLayout.Std430, 1, binding, $"tfe_data{i}", tfeDataStruct); resourceManager.Properties.AddOrUpdateStorageBuffer(tfeDataBuffer); } } + if (vertexAsCompute) + { + int vertexInfoCbBinding = resourceManager.Reservations.VertexInfoConstantBufferBinding; + BufferDefinition vertexInfoBuffer = new(BufferLayout.Std140, 0, vertexInfoCbBinding, "vb_info", VertexInfoBuffer.GetStructureType()); + resourceManager.Properties.AddOrUpdateConstantBuffer(vertexInfoBuffer); + + StructureType vertexOutputStruct = new(new StructureField[] + { + new StructureField(AggregateType.Array | AggregateType.FP32, "data", 0) + }); + + int vertexOutputSbBinding = resourceManager.Reservations.VertexOutputStorageBufferBinding; + BufferDefinition vertexOutputBuffer = new(BufferLayout.Std430, 1, vertexOutputSbBinding, "vertex_output", vertexOutputStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(vertexOutputBuffer); + + if (Stage == ShaderStage.Vertex) + { + int ibBinding = resourceManager.Reservations.IndexBufferTextureBinding; + TextureDefinition indexBuffer = new(2, ibBinding, "ib_data", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None); + resourceManager.Properties.AddOrUpdateTexture(indexBuffer); + + int inputMap = _program.AttributeUsage.UsedInputAttributes; + + while (inputMap != 0) + { + int location = BitOperations.TrailingZeroCount(inputMap); + int binding = resourceManager.Reservations.GetVertexBufferTextureBinding(location); + TextureDefinition vaBuffer = new(2, binding, $"vb_data{location}", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None); + resourceManager.Properties.AddOrUpdateTexture(vaBuffer); + + inputMap &= ~(1 << location); + } + } + else if (Stage == ShaderStage.Geometry) + { + int trbBinding = resourceManager.Reservations.TopologyRemapBufferTextureBinding; + TextureDefinition remapBuffer = new(2, trbBinding, "trb_data", SamplerType.TextureBuffer, TextureFormat.Unknown, TextureUsageFlags.None); + resourceManager.Properties.AddOrUpdateTexture(remapBuffer); + + int geometryVbOutputSbBinding = resourceManager.Reservations.GeometryVertexOutputStorageBufferBinding; + BufferDefinition geometryVbOutputBuffer = new(BufferLayout.Std430, 1, geometryVbOutputSbBinding, "geometry_vb_output", vertexOutputStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(geometryVbOutputBuffer); + + StructureType geometryIbOutputStruct = new(new StructureField[] + { + new StructureField(AggregateType.Array | AggregateType.U32, "data", 0) + }); + + int geometryIbOutputSbBinding = resourceManager.Reservations.GeometryIndexOutputStorageBufferBinding; + BufferDefinition geometryIbOutputBuffer = new(BufferLayout.Std430, 1, geometryIbOutputSbBinding, "geometry_ib_output", geometryIbOutputStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(geometryIbOutputBuffer); + } + + resourceManager.SetVertexAsComputeLocalMemories(Definitions.Stage, Definitions.InputTopology); + } + return resourceManager; } + private ShaderDefinitions GetDefinitions(bool vertexAsCompute) + { + if (vertexAsCompute) + { + return new ShaderDefinitions(ShaderStage.Compute, 32, 32, 1); + } + else + { + return Definitions; + } + } + + public ResourceReservations GetResourceReservations() + { + IoUsage ioUsage = _program.GetIoUsage(); + + if (Definitions.GpPassthrough) + { + ioUsage = ioUsage.Combine(_vertexOutput); + } + + return new ResourceReservations(GpuAccessor, IsTransformFeedbackEmulated, vertexAsCompute: true, _vertexOutput, ioUsage); + } + + public void SetVertexOutputMapForGeometryAsCompute(TranslatorContext vertexContext) + { + _vertexOutput = vertexContext._program.GetIoUsage(); + } + + public ShaderProgram GenerateVertexPassthroughForCompute() + { + var attributeUsage = new AttributeUsage(GpuAccessor); + var resourceManager = new ResourceManager(ShaderStage.Vertex, GpuAccessor); + + var reservations = GetResourceReservations(); + + int vertexInfoCbBinding = reservations.VertexInfoConstantBufferBinding; + + if (Stage == ShaderStage.Vertex) + { + BufferDefinition vertexInfoBuffer = new(BufferLayout.Std140, 0, vertexInfoCbBinding, "vb_info", VertexInfoBuffer.GetStructureType()); + resourceManager.Properties.AddOrUpdateConstantBuffer(vertexInfoBuffer); + } + + StructureType vertexInputStruct = new(new StructureField[] + { + new StructureField(AggregateType.Array | AggregateType.FP32, "data", 0) + }); + + int vertexDataSbBinding = reservations.VertexOutputStorageBufferBinding; + BufferDefinition vertexOutputBuffer = new(BufferLayout.Std430, 1, vertexDataSbBinding, "vb_input", vertexInputStruct); + resourceManager.Properties.AddOrUpdateStorageBuffer(vertexOutputBuffer); + + var context = new EmitterContext(); + + Operand vertexIndex = Options.TargetApi == TargetApi.OpenGL + ? context.Load(StorageKind.Input, IoVariable.VertexId) + : context.Load(StorageKind.Input, IoVariable.VertexIndex); + + if (Stage == ShaderStage.Vertex) + { + Operand vertexCount = context.Load(StorageKind.ConstantBuffer, vertexInfoCbBinding, Const((int)VertexInfoBufferField.VertexCounts), Const(0)); + + // Base instance will be always zero when this shader is used, so which one we use here doesn't really matter. + Operand instanceId = Options.TargetApi == TargetApi.OpenGL + ? context.Load(StorageKind.Input, IoVariable.InstanceId) + : context.Load(StorageKind.Input, IoVariable.InstanceIndex); + + vertexIndex = context.IAdd(context.IMultiply(instanceId, vertexCount), vertexIndex); + } + + Operand baseOffset = context.IMultiply(vertexIndex, Const(reservations.OutputSizePerInvocation)); + + foreach ((IoDefinition ioDefinition, int inputOffset) in reservations.Offsets) + { + if (ioDefinition.StorageKind != StorageKind.Output) + { + continue; + } + + Operand vertexOffset = inputOffset != 0 ? context.IAdd(baseOffset, Const(inputOffset)) : baseOffset; + Operand value = context.Load(StorageKind.StorageBuffer, vertexDataSbBinding, Const(0), vertexOffset); + + if (ioDefinition.IoVariable == IoVariable.UserDefined) + { + context.Store(StorageKind.Output, ioDefinition.IoVariable, null, Const(ioDefinition.Location), Const(ioDefinition.Component), value); + attributeUsage.SetOutputUserAttribute(ioDefinition.Location); + } + else if (ResourceReservations.IsVectorOrArrayVariable(ioDefinition.IoVariable)) + { + context.Store(StorageKind.Output, ioDefinition.IoVariable, null, Const(ioDefinition.Component), value); + } + else + { + context.Store(StorageKind.Output, ioDefinition.IoVariable, null, value); + } + } + + var operations = context.GetOperations(); + var cfg = ControlFlowGraph.Create(operations); + var function = new Function(cfg.Blocks, "main", false, 0, 0); + + var transformFeedbackOutputs = GetTransformFeedbackOutputs(GpuAccessor, out ulong transformFeedbackVecMap); + + var definitions = new ShaderDefinitions(ShaderStage.Vertex, transformFeedbackVecMap, transformFeedbackOutputs) + { + LastInVertexPipeline = true + }; + + return Generate( + new[] { function }, + attributeUsage, + definitions, + definitions, + resourceManager, + FeatureFlags.None, + 0); + } + public ShaderProgram GenerateGeometryPassthrough() { int outputAttributesMask = AttributeUsage.UsedOutputAttributes; @@ -484,7 +652,14 @@ namespace Ryujinx.Graphics.Shader.Translation outputTopology, maxOutputVertices); - return Generate(new[] { function }, attributeUsage, definitions, resourceManager, FeatureFlags.RtLayer, 0); + return Generate( + new[] { function }, + attributeUsage, + definitions, + definitions, + resourceManager, + FeatureFlags.RtLayer, + 0); } } } diff --git a/src/Ryujinx.Graphics.Shader/VertexInfoBuffer.cs b/src/Ryujinx.Graphics.Shader/VertexInfoBuffer.cs new file mode 100644 index 00000000..845135f8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/VertexInfoBuffer.cs @@ -0,0 +1,59 @@ +using Ryujinx.Common.Memory; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System.Runtime.CompilerServices; + +namespace Ryujinx.Graphics.Shader +{ + enum VertexInfoBufferField + { + // Must match the order of the fields on the struct. + VertexCounts, + GeometryCounts, + VertexStrides, + VertexOffsets, + } + + public struct VertexInfoBuffer + { + public static readonly int RequiredSize; + + public static readonly int VertexCountsOffset; + public static readonly int GeometryCountsOffset; + public static readonly int VertexStridesOffset; + public static readonly int VertexOffsetsOffset; + + private static int OffsetOf<T>(ref VertexInfoBuffer storage, ref T target) + { + return (int)Unsafe.ByteOffset(ref Unsafe.As<VertexInfoBuffer, T>(ref storage), ref target); + } + + static VertexInfoBuffer() + { + RequiredSize = Unsafe.SizeOf<VertexInfoBuffer>(); + + VertexInfoBuffer instance = new(); + + VertexCountsOffset = OffsetOf(ref instance, ref instance.VertexCounts); + GeometryCountsOffset = OffsetOf(ref instance, ref instance.GeometryCounts); + VertexStridesOffset = OffsetOf(ref instance, ref instance.VertexStrides); + VertexOffsetsOffset = OffsetOf(ref instance, ref instance.VertexOffsets); + } + + internal static StructureType GetStructureType() + { + return new StructureType(new[] + { + new StructureField(AggregateType.Vector4 | AggregateType.U32, "vertex_counts"), + new StructureField(AggregateType.Vector4 | AggregateType.U32, "geometry_counts"), + new StructureField(AggregateType.Array | AggregateType.Vector4 | AggregateType.U32, "vertex_strides", ResourceReservations.MaxVertexBufferTextures), + new StructureField(AggregateType.Array | AggregateType.Vector4 | AggregateType.U32, "vertex_offsets", ResourceReservations.MaxVertexBufferTextures), + }); + } + + public Vector4<int> VertexCounts; + public Vector4<int> GeometryCounts; + public Array32<Vector4<int>> VertexStrides; + public Array32<Vector4<int>> VertexOffsets; + } +} |
