diff options
Diffstat (limited to 'src/Ryujinx.Graphics.Shader/CodeGen')
43 files changed, 8067 insertions, 0 deletions
diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs new file mode 100644 index 00000000..9eb20f6f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/CodeGenContext.cs @@ -0,0 +1,95 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System.Text; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + class CodeGenContext + { + public const string Tab = " "; + + public StructuredFunction CurrentFunction { get; set; } + + public StructuredProgramInfo Info { get; } + + public ShaderConfig Config { get; } + + public OperandManager OperandManager { get; } + + private readonly StringBuilder _sb; + + private int _level; + + private string _indentation; + + public CodeGenContext(StructuredProgramInfo info, ShaderConfig config) + { + Info = info; + Config = config; + + OperandManager = new OperandManager(); + + _sb = new StringBuilder(); + } + + public void AppendLine() + { + _sb.AppendLine(); + } + + public void AppendLine(string str) + { + _sb.AppendLine(_indentation + str); + } + + public string GetCode() + { + return _sb.ToString(); + } + + public void EnterScope() + { + AppendLine("{"); + + _level++; + + UpdateIndentation(); + } + + public void LeaveScope(string suffix = "") + { + if (_level == 0) + { + return; + } + + _level--; + + UpdateIndentation(); + + AppendLine("}" + suffix); + } + + public StructuredFunction GetFunction(int id) + { + return Info.Functions[id]; + } + + private void UpdateIndentation() + { + _indentation = GetIndentation(_level); + } + + private static string GetIndentation(int level) + { + string indentation = string.Empty; + + for (int index = 0; index < level; index++) + { + indentation += Tab; + } + + return indentation; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs new file mode 100644 index 00000000..81b79ec4 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Declarations.cs @@ -0,0 +1,818 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class Declarations + { + public static void Declare(CodeGenContext context, StructuredProgramInfo info) + { + context.AppendLine(context.Config.Options.TargetApi == TargetApi.Vulkan ? "#version 460 core" : "#version 450 core"); + context.AppendLine("#extension GL_ARB_gpu_shader_int64 : enable"); + + if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) + { + context.AppendLine("#extension GL_ARB_shader_ballot : enable"); + } + else + { + context.AppendLine("#extension GL_KHR_shader_subgroup_basic : enable"); + context.AppendLine("#extension GL_KHR_shader_subgroup_ballot : enable"); + } + + context.AppendLine("#extension GL_ARB_shader_group_vote : enable"); + context.AppendLine("#extension GL_EXT_shader_image_load_formatted : enable"); + context.AppendLine("#extension GL_EXT_texture_shadow_lod : enable"); + + if (context.Config.Stage == ShaderStage.Compute) + { + context.AppendLine("#extension GL_ARB_compute_shader : enable"); + } + else if (context.Config.Stage == ShaderStage.Fragment) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.AppendLine("#extension GL_ARB_fragment_shader_interlock : enable"); + } + else if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel()) + { + context.AppendLine("#extension GL_INTEL_fragment_shader_ordering : enable"); + } + } + else + { + if (context.Config.Stage == ShaderStage.Vertex) + { + context.AppendLine("#extension GL_ARB_shader_draw_parameters : enable"); + } + + context.AppendLine("#extension GL_ARB_shader_viewport_layer_array : enable"); + } + + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.AppendLine("#extension GL_NV_geometry_shader_passthrough : enable"); + } + + if (context.Config.GpuAccessor.QueryHostSupportsViewportMask()) + { + context.AppendLine("#extension GL_NV_viewport_array2 : enable"); + } + + context.AppendLine("#pragma optionNV(fastmath off)"); + context.AppendLine(); + + context.AppendLine($"const int {DefaultNames.UndefinedName} = 0;"); + context.AppendLine(); + + if (context.Config.Stage == ShaderStage.Compute) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4); + + if (localMemorySize != 0) + { + string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize); + + context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];"); + context.AppendLine(); + } + + int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4); + + if (sharedMemorySize != 0) + { + string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize); + + context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];"); + context.AppendLine(); + } + } + else if (context.Config.LocalMemorySize != 0) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4); + + string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize); + + context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];"); + context.AppendLine(); + } + + var cBufferDescriptors = context.Config.GetConstantBufferDescriptors(); + if (cBufferDescriptors.Length != 0) + { + DeclareUniforms(context, cBufferDescriptors); + + context.AppendLine(); + } + + var sBufferDescriptors = context.Config.GetStorageBufferDescriptors(); + if (sBufferDescriptors.Length != 0) + { + DeclareStorages(context, sBufferDescriptors); + + context.AppendLine(); + } + + var textureDescriptors = context.Config.GetTextureDescriptors(); + if (textureDescriptors.Length != 0) + { + DeclareSamplers(context, textureDescriptors); + + context.AppendLine(); + } + + var imageDescriptors = context.Config.GetImageDescriptors(); + if (imageDescriptors.Length != 0) + { + DeclareImages(context, imageDescriptors); + + context.AppendLine(); + } + + if (context.Config.Stage != ShaderStage.Compute) + { + if (context.Config.Stage == ShaderStage.Geometry) + { + InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology(); + string inPrimitive = inputTopology.ToGlslString(); + + context.AppendLine($"layout (invocations = {context.Config.ThreadsPerInputPrimitive}, {inPrimitive}) in;"); + + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.AppendLine($"layout (passthrough) in gl_PerVertex"); + context.EnterScope(); + context.AppendLine("vec4 gl_Position;"); + context.AppendLine("float gl_PointSize;"); + context.AppendLine("float gl_ClipDistance[];"); + context.LeaveScope(";"); + } + else + { + string outPrimitive = context.Config.OutputTopology.ToGlslString(); + + int maxOutputVertices = context.Config.GpPassthrough + ? inputTopology.ToInputVertices() + : context.Config.MaxOutputVertices; + + context.AppendLine($"layout ({outPrimitive}, max_vertices = {maxOutputVertices}) out;"); + } + + context.AppendLine(); + } + else if (context.Config.Stage == ShaderStage.TessellationControl) + { + int threadsPerInputPrimitive = context.Config.ThreadsPerInputPrimitive; + + context.AppendLine($"layout (vertices = {threadsPerInputPrimitive}) out;"); + context.AppendLine(); + } + else if (context.Config.Stage == ShaderStage.TessellationEvaluation) + { + bool tessCw = context.Config.GpuAccessor.QueryTessCw(); + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + // We invert the front face on Vulkan backend, so we need to do that here aswell. + tessCw = !tessCw; + } + + string patchType = context.Config.GpuAccessor.QueryTessPatchType().ToGlsl(); + string spacing = context.Config.GpuAccessor.QueryTessSpacing().ToGlsl(); + string windingOrder = tessCw ? "cw" : "ccw"; + + context.AppendLine($"layout ({patchType}, {spacing}, {windingOrder}) in;"); + context.AppendLine(); + } + + if (context.Config.UsedInputAttributes != 0 || context.Config.GpPassthrough) + { + DeclareInputAttributes(context, info); + + context.AppendLine(); + } + + if (context.Config.UsedOutputAttributes != 0 || context.Config.Stage != ShaderStage.Fragment) + { + DeclareOutputAttributes(context, info); + + context.AppendLine(); + } + + if (context.Config.UsedInputAttributesPerPatch.Count != 0) + { + DeclareInputAttributesPerPatch(context, context.Config.UsedInputAttributesPerPatch); + + context.AppendLine(); + } + + if (context.Config.UsedOutputAttributesPerPatch.Count != 0) + { + DeclareUsedOutputAttributesPerPatch(context, context.Config.UsedOutputAttributesPerPatch); + + context.AppendLine(); + } + + if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline) + { + var tfOutput = context.Config.GetTransformFeedbackOutput(AttributeConsts.PositionX); + if (tfOutput.Valid) + { + context.AppendLine($"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) out gl_PerVertex"); + context.EnterScope(); + context.AppendLine("vec4 gl_Position;"); + context.LeaveScope(context.Config.Stage == ShaderStage.TessellationControl ? " gl_out[];" : ";"); + } + } + } + else + { + string localSizeX = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeX()); + string localSizeY = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeY()); + string localSizeZ = NumberFormatter.FormatInt(context.Config.GpuAccessor.QueryComputeLocalSizeZ()); + + context.AppendLine( + "layout (" + + $"local_size_x = {localSizeX}, " + + $"local_size_y = {localSizeY}, " + + $"local_size_z = {localSizeZ}) in;"); + context.AppendLine(); + } + + bool isFragment = context.Config.Stage == ShaderStage.Fragment; + + if (isFragment || context.Config.Stage == ShaderStage.Compute || context.Config.Stage == ShaderStage.Vertex) + { + if (isFragment && context.Config.GpuAccessor.QueryEarlyZForce()) + { + context.AppendLine("layout(early_fragment_tests) in;"); + context.AppendLine(); + } + + if ((context.Config.UsedFeatures & (FeatureFlags.FragCoordXY | FeatureFlags.IntegerSampling)) != 0) + { + string stage = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + int scaleElements = context.Config.GetTextureDescriptors().Length + context.Config.GetImageDescriptors().Length; + + if (isFragment) + { + scaleElements++; // Also includes render target scale, for gl_FragCoord. + } + + DeclareSupportUniformBlock(context, context.Config.Stage, scaleElements); + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IntegerSampling) && scaleElements != 0) + { + AppendHelperFunction(context, $"Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_{stage}.glsl"); + context.AppendLine(); + } + } + else if (isFragment || context.Config.Stage == ShaderStage.Vertex) + { + DeclareSupportUniformBlock(context, context.Config.Stage, 0); + } + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Shared) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.AtomicMinMaxS32Storage) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighS32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.MultiplyHighU32) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.Shuffle) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleDown) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleUp) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.ShuffleXor) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreSharedSmallInt) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.StoreStorageSmallInt) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl"); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.SwizzleAdd) != 0) + { + AppendHelperFunction(context, "Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl"); + } + } + + private static string GetTfLayout(TransformFeedbackOutput tfOutput) + { + if (tfOutput.Valid) + { + return $"layout (xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}) "; + } + + return string.Empty; + } + + public static void DeclareLocals(CodeGenContext context, StructuredFunction function) + { + foreach (AstOperand decl in function.Locals) + { + string name = context.OperandManager.DeclareLocal(decl); + + context.AppendLine(GetVarTypeName(context, decl.VarType) + " " + name + ";"); + } + } + + public static string GetVarTypeName(CodeGenContext context, AggregateType type, bool precise = true) + { + if (context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + precise = false; + } + + return type switch + { + AggregateType.Void => "void", + AggregateType.Bool => "bool", + AggregateType.FP32 => precise ? "precise float" : "float", + AggregateType.FP64 => "double", + AggregateType.S32 => "int", + AggregateType.U32 => "uint", + AggregateType.Vector2 | AggregateType.Bool => "bvec2", + AggregateType.Vector2 | AggregateType.FP32 => precise ? "precise vec2" : "vec2", + AggregateType.Vector2 | AggregateType.FP64 => "dvec2", + AggregateType.Vector2 | AggregateType.S32 => "ivec2", + AggregateType.Vector2 | AggregateType.U32 => "uvec2", + AggregateType.Vector3 | AggregateType.Bool => "bvec3", + AggregateType.Vector3 | AggregateType.FP32 => precise ? "precise vec3" : "vec3", + AggregateType.Vector3 | AggregateType.FP64 => "dvec3", + AggregateType.Vector3 | AggregateType.S32 => "ivec3", + AggregateType.Vector3 | AggregateType.U32 => "uvec3", + AggregateType.Vector4 | AggregateType.Bool => "bvec4", + AggregateType.Vector4 | AggregateType.FP32 => precise ? "precise vec4" : "vec4", + AggregateType.Vector4 | AggregateType.FP64 => "dvec4", + AggregateType.Vector4 | AggregateType.S32 => "ivec4", + AggregateType.Vector4 | AggregateType.U32 => "uvec4", + _ => throw new ArgumentException($"Invalid variable type \"{type}\".") + }; + } + + private static void DeclareUniforms(CodeGenContext context, BufferDescriptor[] descriptors) + { + string ubSize = "[" + NumberFormatter.FormatInt(Constants.ConstantBufferSize / 16) + "]"; + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing)) + { + string ubName = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + ubName += "_" + DefaultNames.UniformNamePrefix; + + string blockName = $"{ubName}_{DefaultNames.BlockSuffix}"; + + context.AppendLine($"layout (binding = {context.Config.FirstConstantBufferBinding}, std140) uniform {blockName}"); + context.EnterScope(); + context.AppendLine("vec4 " + DefaultNames.DataName + ubSize + ";"); + context.LeaveScope($" {ubName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];"); + } + else + { + foreach (var descriptor in descriptors) + { + string ubName = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + ubName += "_" + DefaultNames.UniformNamePrefix + descriptor.Slot; + + context.AppendLine($"layout (binding = {descriptor.Binding}, std140) uniform {ubName}"); + context.EnterScope(); + context.AppendLine("vec4 " + OperandManager.GetUbName(context.Config.Stage, descriptor.Slot, false) + ubSize + ";"); + context.LeaveScope(";"); + } + } + } + + private static void DeclareStorages(CodeGenContext context, BufferDescriptor[] descriptors) + { + string sbName = OperandManager.GetShaderStagePrefix(context.Config.Stage); + + sbName += "_" + DefaultNames.StorageNamePrefix; + + string blockName = $"{sbName}_{DefaultNames.BlockSuffix}"; + + string layout = context.Config.Options.TargetApi == TargetApi.Vulkan ? ", set = 1" : string.Empty; + + context.AppendLine($"layout (binding = {context.Config.FirstStorageBufferBinding}{layout}, std430) buffer {blockName}"); + context.EnterScope(); + context.AppendLine("uint " + DefaultNames.DataName + "[];"); + context.LeaveScope($" {sbName}[{NumberFormatter.FormatInt(descriptors.Max(x => x.Slot) + 1)}];"); + } + + private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) + { + int arraySize = 0; + foreach (var descriptor in descriptors) + { + if (descriptor.Type.HasFlag(SamplerType.Indexed)) + { + if (arraySize == 0) + { + arraySize = ShaderConfig.SamplerArraySize; + } + else if (--arraySize != 0) + { + continue; + } + } + + string indexExpr = NumberFormatter.FormatInt(arraySize); + + string samplerName = OperandManager.GetSamplerName( + context.Config.Stage, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Type.HasFlag(SamplerType.Indexed), + indexExpr); + + string samplerTypeName = descriptor.Type.ToGlslSamplerType(); + + string layout = string.Empty; + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + layout = ", set = 2"; + } + + context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {samplerTypeName} {samplerName};"); + } + } + + private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors) + { + int arraySize = 0; + foreach (var descriptor in descriptors) + { + if (descriptor.Type.HasFlag(SamplerType.Indexed)) + { + if (arraySize == 0) + { + arraySize = ShaderConfig.SamplerArraySize; + } + else if (--arraySize != 0) + { + continue; + } + } + + string indexExpr = NumberFormatter.FormatInt(arraySize); + + string imageName = OperandManager.GetImageName( + context.Config.Stage, + descriptor.CbufSlot, + descriptor.HandleIndex, + descriptor.Format, + descriptor.Type.HasFlag(SamplerType.Indexed), + indexExpr); + + string imageTypeName = descriptor.Type.ToGlslImageType(descriptor.Format.GetComponentType()); + + if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent)) + { + imageTypeName = "coherent " + imageTypeName; + } + + string layout = descriptor.Format.ToGlslFormat(); + + if (!string.IsNullOrEmpty(layout)) + { + layout = ", " + layout; + } + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + layout = $", set = 3{layout}"; + } + + context.AppendLine($"layout (binding = {descriptor.Binding}{layout}) uniform {imageTypeName} {imageName};"); + } + } + + private static void DeclareInputAttributes(CodeGenContext context, StructuredProgramInfo info) + { + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.IaIndexing)) + { + string suffix = context.Config.Stage == ShaderStage.Geometry ? "[]" : string.Empty; + + context.AppendLine($"layout (location = 0) in vec4 {DefaultNames.IAttributePrefix}{suffix}[{Constants.MaxAttributes}];"); + } + else + { + int usedAttributes = context.Config.UsedInputAttributes | context.Config.PassthroughAttributes; + while (usedAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(usedAttributes); + DeclareInputAttribute(context, info, index); + usedAttributes &= ~(1 << index); + } + } + } + + private static void DeclareInputAttributesPerPatch(CodeGenContext context, HashSet<int> attrs) + { + foreach (int attr in attrs.Order()) + { + DeclareInputAttributePerPatch(context, attr); + } + } + + private static void DeclareInputAttribute(CodeGenContext context, StructuredProgramInfo info, int attr) + { + string suffix = IsArrayAttributeGlsl(context.Config.Stage, isOutAttr: false) ? "[]" : string.Empty; + string iq = string.Empty; + + if (context.Config.Stage == ShaderStage.Fragment) + { + iq = context.Config.ImapTypes[attr].GetFirstUsedType() switch + { + PixelImap.Constant => "flat ", + PixelImap.ScreenLinear => "noperspective ", + _ => string.Empty + }; + } + + string name = $"{DefaultNames.IAttributePrefix}{attr}"; + + if (context.Config.TransformFeedbackEnabled && context.Config.Stage == ShaderStage.Fragment) + { + int components = context.Config.GetTransformFeedbackOutputComponents(attr, 0); + + if (components > 1) + { + string type = components switch + { + 2 => "vec2", + 3 => "vec3", + 4 => "vec4", + _ => "float" + }; + + context.AppendLine($"layout (location = {attr}) in {type} {name};"); + } + + for (int c = components > 1 ? components : 0; c < 4; c++) + { + char swzMask = "xyzw"[c]; + + context.AppendLine($"layout (location = {attr}, component = {c}) {iq}in float {name}_{swzMask}{suffix};"); + } + } + else + { + bool passthrough = (context.Config.PassthroughAttributes & (1 << attr)) != 0; + string pass = passthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough() ? "passthrough, " : string.Empty; + string type; + + if (context.Config.Stage == ShaderStage.Vertex) + { + type = context.Config.GpuAccessor.QueryAttributeType(attr).ToVec4Type(); + } + else + { + type = AttributeType.Float.ToVec4Type(); + } + + context.AppendLine($"layout ({pass}location = {attr}) {iq}in {type} {name}{suffix};"); + } + } + + private static void DeclareInputAttributePerPatch(CodeGenContext context, int attr) + { + int location = context.Config.GetPerPatchAttributeLocation(attr); + string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}"; + + context.AppendLine($"layout (location = {location}) patch in vec4 {name};"); + } + + private static void DeclareOutputAttributes(CodeGenContext context, StructuredProgramInfo info) + { + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.OaIndexing)) + { + context.AppendLine($"layout (location = 0) out vec4 {DefaultNames.OAttributePrefix}[{Constants.MaxAttributes}];"); + } + else + { + int usedAttributes = context.Config.UsedOutputAttributes; + + if (context.Config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryDualSourceBlendEnable()) + { + int firstOutput = BitOperations.TrailingZeroCount(usedAttributes); + int mask = 3 << firstOutput; + + if ((usedAttributes & mask) == mask) + { + usedAttributes &= ~mask; + DeclareOutputDualSourceBlendAttribute(context, firstOutput); + } + } + + while (usedAttributes != 0) + { + int index = BitOperations.TrailingZeroCount(usedAttributes); + DeclareOutputAttribute(context, index); + usedAttributes &= ~(1 << index); + } + } + } + + private static void DeclareOutputAttribute(CodeGenContext context, int attr) + { + string suffix = IsArrayAttributeGlsl(context.Config.Stage, isOutAttr: true) ? "[]" : string.Empty; + string name = $"{DefaultNames.OAttributePrefix}{attr}{suffix}"; + + if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline) + { + int components = context.Config.GetTransformFeedbackOutputComponents(attr, 0); + + if (components > 1) + { + string type = components switch + { + 2 => "vec2", + 3 => "vec3", + 4 => "vec4", + _ => "float" + }; + + string xfb = string.Empty; + + var tfOutput = context.Config.GetTransformFeedbackOutput(attr, 0); + if (tfOutput.Valid) + { + xfb = $", xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}"; + } + + context.AppendLine($"layout (location = {attr}{xfb}) out {type} {name};"); + } + + for (int c = components > 1 ? components : 0; c < 4; c++) + { + char swzMask = "xyzw"[c]; + + string xfb = string.Empty; + + var tfOutput = context.Config.GetTransformFeedbackOutput(attr, c); + if (tfOutput.Valid) + { + xfb = $", xfb_buffer = {tfOutput.Buffer}, xfb_offset = {tfOutput.Offset}, xfb_stride = {tfOutput.Stride}"; + } + + context.AppendLine($"layout (location = {attr}, component = {c}{xfb}) out float {name}_{swzMask};"); + } + } + else + { + string type = context.Config.Stage != ShaderStage.Fragment ? "vec4" : + context.Config.GpuAccessor.QueryFragmentOutputType(attr) switch + { + AttributeType.Sint => "ivec4", + AttributeType.Uint => "uvec4", + _ => "vec4" + }; + + if (context.Config.GpuAccessor.QueryHostReducedPrecision() && context.Config.Stage == ShaderStage.Vertex && attr == 0) + { + context.AppendLine($"layout (location = {attr}) invariant out {type} {name};"); + } + else + { + context.AppendLine($"layout (location = {attr}) out {type} {name};"); + } + } + } + + private static void DeclareOutputDualSourceBlendAttribute(CodeGenContext context, int attr) + { + string name = $"{DefaultNames.OAttributePrefix}{attr}"; + string name2 = $"{DefaultNames.OAttributePrefix}{(attr + 1)}"; + + context.AppendLine($"layout (location = {attr}, index = 0) out vec4 {name};"); + context.AppendLine($"layout (location = {attr}, index = 1) out vec4 {name2};"); + } + + private static bool IsArrayAttributeGlsl(ShaderStage stage, bool isOutAttr) + { + if (isOutAttr) + { + return stage == ShaderStage.TessellationControl; + } + else + { + return stage == ShaderStage.TessellationControl || + stage == ShaderStage.TessellationEvaluation || + stage == ShaderStage.Geometry; + } + } + + private static void DeclareUsedOutputAttributesPerPatch(CodeGenContext context, HashSet<int> attrs) + { + foreach (int attr in attrs.Order()) + { + DeclareOutputAttributePerPatch(context, attr); + } + } + + private static void DeclareOutputAttributePerPatch(CodeGenContext context, int attr) + { + int location = context.Config.GetPerPatchAttributeLocation(attr); + string name = $"{DefaultNames.PerPatchAttributePrefix}{attr}"; + + context.AppendLine($"layout (location = {location}) patch out vec4 {name};"); + } + + private static void DeclareSupportUniformBlock(CodeGenContext context, ShaderStage stage, int scaleElements) + { + bool needsSupportBlock = stage == ShaderStage.Fragment || + (context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable()); + + if (!needsSupportBlock && scaleElements == 0) + { + return; + } + + context.AppendLine($"layout (binding = 0, std140) uniform {DefaultNames.SupportBlockName}"); + context.EnterScope(); + + switch (stage) + { + case ShaderStage.Fragment: + case ShaderStage.Vertex: + context.AppendLine($"uint {DefaultNames.SupportBlockAlphaTestName};"); + context.AppendLine($"bool {DefaultNames.SupportBlockIsBgraName}[{SupportBuffer.FragmentIsBgraCount}];"); + context.AppendLine($"vec4 {DefaultNames.SupportBlockViewportInverse};"); + context.AppendLine($"int {DefaultNames.SupportBlockFragmentScaleCount};"); + break; + case ShaderStage.Compute: + context.AppendLine($"uint s_reserved[{SupportBuffer.ComputeRenderScaleOffset / SupportBuffer.FieldSize}];"); + break; + } + + context.AppendLine($"float {DefaultNames.SupportBlockRenderScaleName}[{SupportBuffer.RenderScaleMaxCount}];"); + + context.LeaveScope(";"); + context.AppendLine(); + } + + private static void AppendHelperFunction(CodeGenContext context, string filename) + { + string code = EmbeddedResources.ReadAllText(filename); + + code = code.Replace("\t", CodeGenContext.Tab); + code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName); + code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix); + + if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) + { + code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubGroupInvocationARB"); + code = code.Replace("$SUBGROUP_BROADCAST$", "readInvocationARB"); + } + else + { + code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubgroupInvocationID"); + code = code.Replace("$SUBGROUP_BROADCAST$", "subgroupBroadcast"); + } + + context.AppendLine(code); + context.AppendLine(); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs new file mode 100644 index 00000000..3ab4814c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/DefaultNames.cs @@ -0,0 +1,37 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class DefaultNames + { + public const string LocalNamePrefix = "temp"; + + public const string SamplerNamePrefix = "tex"; + public const string ImageNamePrefix = "img"; + + public const string PerPatchAttributePrefix = "patch_attr_"; + public const string IAttributePrefix = "in_attr"; + public const string OAttributePrefix = "out_attr"; + + public const string StorageNamePrefix = "s"; + + public const string DataName = "data"; + + public const string SupportBlockName = "support_block"; + public const string SupportBlockAlphaTestName = "s_alpha_test"; + public const string SupportBlockIsBgraName = "s_is_bgra"; + public const string SupportBlockViewportInverse = "s_viewport_inverse"; + public const string SupportBlockFragmentScaleCount = "s_frag_scale_count"; + public const string SupportBlockRenderScaleName = "s_render_scale"; + + public const string BlockSuffix = "block"; + + public const string UniformNamePrefix = "c"; + public const string UniformNameSuffix = "data"; + + public const string LocalMemoryName = "local_mem"; + public const string SharedMemoryName = "shared_mem"; + + public const string ArgumentNamePrefix = "a"; + + public const string UndefinedName = "undef"; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs new file mode 100644 index 00000000..751d0350 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/GlslGenerator.cs @@ -0,0 +1,154 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.TypeConversion; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class GlslGenerator + { + private const string MainFunctionName = "main"; + + public static string Generate(StructuredProgramInfo info, ShaderConfig config) + { + CodeGenContext context = new CodeGenContext(info, config); + + Declarations.Declare(context, info); + + if (info.Functions.Count != 0) + { + for (int i = 1; i < info.Functions.Count; i++) + { + context.AppendLine($"{GetFunctionSignature(context, info.Functions[i])};"); + } + + context.AppendLine(); + + for (int i = 1; i < info.Functions.Count; i++) + { + PrintFunction(context, info, info.Functions[i]); + + context.AppendLine(); + } + } + + PrintFunction(context, info, info.Functions[0], MainFunctionName); + + return context.GetCode(); + } + + private static void PrintFunction(CodeGenContext context, StructuredProgramInfo info, StructuredFunction function, string funcName = null) + { + context.CurrentFunction = function; + + context.AppendLine(GetFunctionSignature(context, function, funcName)); + context.EnterScope(); + + Declarations.DeclareLocals(context, function); + + PrintBlock(context, function.MainBlock); + + context.LeaveScope(); + } + + private static string GetFunctionSignature(CodeGenContext context, StructuredFunction function, string funcName = null) + { + string[] args = new string[function.InArguments.Length + function.OutArguments.Length]; + + for (int i = 0; i < function.InArguments.Length; i++) + { + args[i] = $"{Declarations.GetVarTypeName(context, function.InArguments[i])} {OperandManager.GetArgumentName(i)}"; + } + + for (int i = 0; i < function.OutArguments.Length; i++) + { + int j = i + function.InArguments.Length; + + args[j] = $"out {Declarations.GetVarTypeName(context, function.OutArguments[i])} {OperandManager.GetArgumentName(j)}"; + } + + return $"{Declarations.GetVarTypeName(context, function.ReturnType)} {funcName ?? function.Name}({string.Join(", ", args)})"; + } + + private static void PrintBlock(CodeGenContext context, AstBlock block) + { + AstBlockVisitor visitor = new AstBlockVisitor(block); + + visitor.BlockEntered += (sender, e) => + { + switch (e.Block.Type) + { + case AstBlockType.DoWhile: + context.AppendLine("do"); + break; + + case AstBlockType.Else: + context.AppendLine("else"); + break; + + case AstBlockType.ElseIf: + context.AppendLine($"else if ({GetCondExpr(context, e.Block.Condition)})"); + break; + + case AstBlockType.If: + context.AppendLine($"if ({GetCondExpr(context, e.Block.Condition)})"); + break; + + default: throw new InvalidOperationException($"Found unexpected block type \"{e.Block.Type}\"."); + } + + context.EnterScope(); + }; + + visitor.BlockLeft += (sender, e) => + { + context.LeaveScope(); + + if (e.Block.Type == AstBlockType.DoWhile) + { + context.AppendLine($"while ({GetCondExpr(context, e.Block.Condition)});"); + } + }; + + foreach (IAstNode node in visitor.Visit()) + { + if (node is AstOperation operation) + { + string expr = InstGen.GetExpression(context, operation); + + if (expr != null) + { + context.AppendLine(expr + ";"); + } + } + else if (node is AstAssignment assignment) + { + AggregateType dstType = OperandManager.GetNodeDestType(context, assignment.Destination); + AggregateType srcType = OperandManager.GetNodeDestType(context, assignment.Source); + + string dest = InstGen.GetExpression(context, assignment.Destination); + string src = ReinterpretCast(context, assignment.Source, srcType, dstType); + + context.AppendLine(dest + " = " + src + ";"); + } + else if (node is AstComment comment) + { + context.AppendLine("// " + comment.Comment); + } + else + { + throw new InvalidOperationException($"Found unexpected node type \"{node?.GetType().Name ?? "null"}\"."); + } + } + } + + private static string GetCondExpr(CodeGenContext context, IAstNode cond) + { + AggregateType srcType = OperandManager.GetNodeDestType(context, cond); + + return ReinterpretCast(context, cond, srcType, AggregateType.Bool); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl new file mode 100644 index 00000000..82b76bcc --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Shared.glsl @@ -0,0 +1,21 @@ +int Helper_AtomicMaxS32(int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[offset]; + newValue = uint(max(int(oldValue), value)); + } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue); + return int(oldValue); +} + +int Helper_AtomicMinS32(int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[offset]; + newValue = uint(min(int(oldValue), value)); + } while (atomicCompSwap($SHARED_MEM$[offset], oldValue, newValue) != oldValue); + return int(oldValue); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl new file mode 100644 index 00000000..0862a71b --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/AtomicMinMaxS32Storage.glsl @@ -0,0 +1,21 @@ +int Helper_AtomicMaxS32(int index, int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[offset]; + newValue = uint(max(int(oldValue), value)); + } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue); + return int(oldValue); +} + +int Helper_AtomicMinS32(int index, int offset, int value) +{ + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[offset]; + newValue = uint(min(int(oldValue), value)); + } while (atomicCompSwap($STORAGE_MEM$[index].data[offset], oldValue, newValue) != oldValue); + return int(oldValue); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs new file mode 100644 index 00000000..54f35b15 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/HelperFunctionNames.cs @@ -0,0 +1,22 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class HelperFunctionNames + { + public static string AtomicMaxS32 = "Helper_AtomicMaxS32"; + public static string AtomicMinS32 = "Helper_AtomicMinS32"; + + public static string MultiplyHighS32 = "Helper_MultiplyHighS32"; + public static string MultiplyHighU32 = "Helper_MultiplyHighU32"; + + public static string Shuffle = "Helper_Shuffle"; + public static string ShuffleDown = "Helper_ShuffleDown"; + public static string ShuffleUp = "Helper_ShuffleUp"; + public static string ShuffleXor = "Helper_ShuffleXor"; + public static string SwizzleAdd = "Helper_SwizzleAdd"; + + public static string StoreShared16 = "Helper_StoreShared16"; + public static string StoreShared8 = "Helper_StoreShared8"; + public static string StoreStorage16 = "Helper_StoreStorage16"; + public static string StoreStorage8 = "Helper_StoreStorage8"; + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl new file mode 100644 index 00000000..caad6f56 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighS32.glsl @@ -0,0 +1,7 @@ +int Helper_MultiplyHighS32(int x, int y) +{ + int msb; + int lsb; + imulExtended(x, y, msb, lsb); + return msb; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl new file mode 100644 index 00000000..617a925f --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/MultiplyHighU32.glsl @@ -0,0 +1,7 @@ +uint Helper_MultiplyHighU32(uint x, uint y) +{ + uint msb; + uint lsb; + umulExtended(x, y, msb, lsb); + return msb; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl new file mode 100644 index 00000000..7cb4764d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/Shuffle.glsl @@ -0,0 +1,11 @@ +float Helper_Shuffle(float x, uint index, uint mask, out bool valid) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = $SUBGROUP_INVOCATION$ & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = (index & ~segMask) | minThreadId; + valid = srcThreadId <= maxThreadId; + float v = $SUBGROUP_BROADCAST$(x, srcThreadId); + return valid ? v : x; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl new file mode 100644 index 00000000..71d901d5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleDown.glsl @@ -0,0 +1,11 @@ +float Helper_ShuffleDown(float x, uint index, uint mask, out bool valid) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = $SUBGROUP_INVOCATION$ & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = $SUBGROUP_INVOCATION$ + index; + valid = srcThreadId <= maxThreadId; + float v = $SUBGROUP_BROADCAST$(x, srcThreadId); + return valid ? v : x; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl new file mode 100644 index 00000000..ae264d87 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleUp.glsl @@ -0,0 +1,9 @@ +float Helper_ShuffleUp(float x, uint index, uint mask, out bool valid) +{ + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = $SUBGROUP_INVOCATION$ & segMask; + uint srcThreadId = $SUBGROUP_INVOCATION$ - index; + valid = int(srcThreadId) >= int(minThreadId); + float v = $SUBGROUP_BROADCAST$(x, srcThreadId); + return valid ? v : x; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl new file mode 100644 index 00000000..789089d6 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/ShuffleXor.glsl @@ -0,0 +1,11 @@ +float Helper_ShuffleXor(float x, uint index, uint mask, out bool valid) +{ + uint clamp = mask & 0x1fu; + uint segMask = (mask >> 8) & 0x1fu; + uint minThreadId = $SUBGROUP_INVOCATION$ & segMask; + uint maxThreadId = minThreadId | (clamp & ~segMask); + uint srcThreadId = $SUBGROUP_INVOCATION$ ^ index; + valid = srcThreadId <= maxThreadId; + float v = $SUBGROUP_BROADCAST$(x, srcThreadId); + return valid ? v : x; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl new file mode 100644 index 00000000..2f57b5ff --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreSharedSmallInt.glsl @@ -0,0 +1,23 @@ +void Helper_StoreShared16(int offset, uint value) +{ + int wordOffset = offset >> 2; + int bitOffset = (offset & 3) * 8; + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[wordOffset]; + newValue = bitfieldInsert(oldValue, value, bitOffset, 16); + } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue); +} + +void Helper_StoreShared8(int offset, uint value) +{ + int wordOffset = offset >> 2; + int bitOffset = (offset & 3) * 8; + uint oldValue, newValue; + do + { + oldValue = $SHARED_MEM$[wordOffset]; + newValue = bitfieldInsert(oldValue, value, bitOffset, 8); + } while (atomicCompSwap($SHARED_MEM$[wordOffset], oldValue, newValue) != oldValue); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl new file mode 100644 index 00000000..f2253a79 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/StoreStorageSmallInt.glsl @@ -0,0 +1,23 @@ +void Helper_StoreStorage16(int index, int offset, uint value) +{ + int wordOffset = offset >> 2; + int bitOffset = (offset & 3) * 8; + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[wordOffset]; + newValue = bitfieldInsert(oldValue, value, bitOffset, 16); + } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue); +} + +void Helper_StoreStorage8(int index, int offset, uint value) +{ + int wordOffset = offset >> 2; + int bitOffset = (offset & 3) * 8; + uint oldValue, newValue; + do + { + oldValue = $STORAGE_MEM$[index].data[wordOffset]; + newValue = bitfieldInsert(oldValue, value, bitOffset, 8); + } while (atomicCompSwap($STORAGE_MEM$[index].data[wordOffset], oldValue, newValue) != oldValue); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl new file mode 100644 index 00000000..057cb6ca --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/SwizzleAdd.glsl @@ -0,0 +1,7 @@ +float Helper_SwizzleAdd(float x, float y, int mask) +{ + vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0); + vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0); + int lutIdx = (mask >> (int($SUBGROUP_INVOCATION$ & 3u) * 2)) & 3; + return x * xLut[lutIdx] + y * yLut[lutIdx]; +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl new file mode 100644 index 00000000..4ebade5e --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_cp.glsl @@ -0,0 +1,19 @@ +ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) +{ + float scale = s_render_scale[samplerIndex]; + if (scale == 1.0) + { + return inputVec; + } + return ivec2(vec2(inputVec) * scale); +} + +int Helper_TextureSizeUnscale(int size, int samplerIndex) +{ + float scale = s_render_scale[samplerIndex]; + if (scale == 1.0) + { + return size; + } + return int(float(size) / scale); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl new file mode 100644 index 00000000..6c670f91 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_fp.glsl @@ -0,0 +1,26 @@ +ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) +{ + float scale = s_render_scale[1 + samplerIndex]; + if (scale == 1.0) + { + return inputVec; + } + if (scale < 0.0) // If less than 0, try interpolate between texels by using the screen position. + { + return ivec2(vec2(inputVec) * (-scale) + mod(gl_FragCoord.xy, 0.0 - scale)); + } + else + { + return ivec2(vec2(inputVec) * scale); + } +} + +int Helper_TextureSizeUnscale(int size, int samplerIndex) +{ + float scale = abs(s_render_scale[1 + samplerIndex]); + if (scale == 1.0) + { + return size; + } + return int(float(size) / scale); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl new file mode 100644 index 00000000..19eb119d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/HelperFunctions/TexelFetchScale_vp.glsl @@ -0,0 +1,20 @@ +ivec2 Helper_TexelFetchScale(ivec2 inputVec, int samplerIndex) +{ + float scale = abs(s_render_scale[1 + samplerIndex + s_frag_scale_count]); + if (scale == 1.0) + { + return inputVec; + } + + return ivec2(vec2(inputVec) * scale); +} + +int Helper_TextureSizeUnscale(int size, int samplerIndex) +{ + float scale = abs(s_render_scale[1 + samplerIndex + s_frag_scale_count]); + if (scale == 1.0) + { + return size; + } + return int(float(size) / scale); +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs new file mode 100644 index 00000000..01bd11e5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGen.cs @@ -0,0 +1,238 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenBallot; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenCall; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenFSI; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenMemory; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenPacking; +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenVector; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGen + { + public static string GetExpression(CodeGenContext context, IAstNode node) + { + if (node is AstOperation operation) + { + return GetExpression(context, operation); + } + else if (node is AstOperand operand) + { + return context.OperandManager.GetExpression(context, operand); + } + + throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\"."); + } + + public static string Negate(CodeGenContext context, AstOperation operation, InstInfo info) + { + IAstNode src = operation.GetSource(0); + + AggregateType type = GetSrcVarType(operation.Inst, 0); + + string srcExpr = GetSoureExpr(context, src, type); + string zero; + + if (type == AggregateType.FP64) + { + zero = "0.0"; + } + else + { + NumberFormatter.TryFormat(0, type, out zero); + } + + // Starting in the 496.13 NVIDIA driver, there's an issue with assigning variables to negated expressions. + // (-expr) does not work, but (0.0 - expr) does. This should be removed once the issue is resolved. + + return $"{zero} - {Enclose(srcExpr, src, operation.Inst, info, false)}"; + } + + private static string GetExpression(CodeGenContext context, AstOperation operation) + { + Instruction inst = operation.Inst; + + InstInfo info = GetInstructionInfo(inst); + + if ((info.Type & InstType.Call) != 0) + { + bool atomic = (info.Type & InstType.Atomic) != 0; + + int arity = (int)(info.Type & InstType.ArityMask); + + string args = string.Empty; + + for (int argIndex = 0; argIndex < arity; argIndex++) + { + // For shared memory access, the second argument is unused and should be ignored. + // It is there to make both storage and shared access have the same number of arguments. + // For storage, both inputs are consumed when the argument index is 0, so we should skip it here. + if (argIndex == 1 && (atomic || operation.StorageKind == StorageKind.SharedMemory)) + { + continue; + } + + if (argIndex != 0) + { + args += ", "; + } + + if (argIndex == 0 && atomic) + { + switch (operation.StorageKind) + { + case StorageKind.SharedMemory: args += LoadShared(context, operation); break; + case StorageKind.StorageBuffer: args += LoadStorage(context, operation); break; + + default: throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); + } + } + else + { + AggregateType dstType = GetSrcVarType(inst, argIndex); + + args += GetSoureExpr(context, operation.GetSource(argIndex), dstType); + } + } + + return info.OpName + '(' + args + ')'; + } + else if ((info.Type & InstType.Op) != 0) + { + string op = info.OpName; + + // Return may optionally have a return value (and in this case it is unary). + if (inst == Instruction.Return && operation.SourcesCount != 0) + { + return $"{op} {GetSoureExpr(context, operation.GetSource(0), context.CurrentFunction.ReturnType)}"; + } + + int arity = (int)(info.Type & InstType.ArityMask); + + string[] expr = new string[arity]; + + for (int index = 0; index < arity; index++) + { + IAstNode src = operation.GetSource(index); + + string srcExpr = GetSoureExpr(context, src, GetSrcVarType(inst, index)); + + bool isLhs = arity == 2 && index == 0; + + expr[index] = Enclose(srcExpr, src, inst, info, isLhs); + } + + switch (arity) + { + case 0: + return op; + + case 1: + return op + expr[0]; + + case 2: + return $"{expr[0]} {op} {expr[1]}"; + + case 3: + return $"{expr[0]} {op[0]} {expr[1]} {op[1]} {expr[2]}"; + } + } + else if ((info.Type & InstType.Special) != 0) + { + switch (inst & Instruction.Mask) + { + case Instruction.Ballot: + return Ballot(context, operation); + + case Instruction.Call: + return Call(context, operation); + + case Instruction.FSIBegin: + return FSIBegin(context); + + case Instruction.FSIEnd: + return FSIEnd(context); + + case Instruction.ImageLoad: + case Instruction.ImageStore: + case Instruction.ImageAtomic: + return ImageLoadOrStore(context, operation); + + case Instruction.Load: + return Load(context, operation); + + case Instruction.LoadConstant: + return LoadConstant(context, operation); + + case Instruction.LoadLocal: + return LoadLocal(context, operation); + + case Instruction.LoadShared: + return LoadShared(context, operation); + + case Instruction.LoadStorage: + return LoadStorage(context, operation); + + case Instruction.Lod: + return Lod(context, operation); + + case Instruction.Negate: + return Negate(context, operation, info); + + case Instruction.PackDouble2x32: + return PackDouble2x32(context, operation); + + case Instruction.PackHalf2x16: + return PackHalf2x16(context, operation); + + case Instruction.Store: + return Store(context, operation); + + case Instruction.StoreLocal: + return StoreLocal(context, operation); + + case Instruction.StoreShared: + return StoreShared(context, operation); + + case Instruction.StoreShared16: + return StoreShared16(context, operation); + + case Instruction.StoreShared8: + return StoreShared8(context, operation); + + case Instruction.StoreStorage: + return StoreStorage(context, operation); + + case Instruction.StoreStorage16: + return StoreStorage16(context, operation); + + case Instruction.StoreStorage8: + return StoreStorage8(context, operation); + + case Instruction.TextureSample: + return TextureSample(context, operation); + + case Instruction.TextureSize: + return TextureSize(context, operation); + + case Instruction.UnpackDouble2x32: + return UnpackDouble2x32(context, operation); + + case Instruction.UnpackHalf2x16: + return UnpackHalf2x16(context, operation); + + case Instruction.VectorExtract: + return VectorExtract(context, operation); + } + } + + throw new InvalidOperationException($"Unexpected instruction type \"{info.Type}\"."); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs new file mode 100644 index 00000000..68793c5d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenBallot.cs @@ -0,0 +1,27 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenBallot + { + public static string Ballot(CodeGenContext context, AstOperation operation) + { + AggregateType dstType = GetSrcVarType(operation.Inst, 0); + + string arg = GetSoureExpr(context, operation.GetSource(0), dstType); + + if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot()) + { + return $"unpackUint2x32(ballotARB({arg})).x"; + } + else + { + return $"subgroupBallot({arg}).x"; + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs new file mode 100644 index 00000000..2df6960d --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenCall.cs @@ -0,0 +1,29 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using System.Diagnostics; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenCall + { + public static string Call(CodeGenContext context, AstOperation operation) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + var function = context.GetFunction(funcId.Value); + + string[] args = new string[operation.SourcesCount - 1]; + + for (int i = 0; i < args.Length; i++) + { + args[i] = GetSoureExpr(context, operation.GetSource(i + 1), function.GetArgumentType(i)); + } + + return $"{function.Name}({string.Join(", ", args)})"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs new file mode 100644 index 00000000..f61a53cb --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenFSI.cs @@ -0,0 +1,29 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenFSI + { + public static string FSIBegin(CodeGenContext context) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + return "beginInvocationInterlockARB()"; + } + else if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderOrderingIntel()) + { + return "beginFragmentShaderOrderingINTEL()"; + } + + return null; + } + + public static string FSIEnd(CodeGenContext context) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + return "endInvocationInterlockARB()"; + } + + return null; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs new file mode 100644 index 00000000..00478f6a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenHelper.cs @@ -0,0 +1,231 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.TypeConversion; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenHelper + { + private static readonly InstInfo[] _infoTable; + + static InstGenHelper() + { + _infoTable = new InstInfo[(int)Instruction.Count]; + + Add(Instruction.AtomicAdd, InstType.AtomicBinary, "atomicAdd"); + Add(Instruction.AtomicAnd, InstType.AtomicBinary, "atomicAnd"); + Add(Instruction.AtomicCompareAndSwap, InstType.AtomicTernary, "atomicCompSwap"); + Add(Instruction.AtomicMaxS32, InstType.CallTernary, HelperFunctionNames.AtomicMaxS32); + Add(Instruction.AtomicMaxU32, InstType.AtomicBinary, "atomicMax"); + Add(Instruction.AtomicMinS32, InstType.CallTernary, HelperFunctionNames.AtomicMinS32); + Add(Instruction.AtomicMinU32, InstType.AtomicBinary, "atomicMin"); + Add(Instruction.AtomicOr, InstType.AtomicBinary, "atomicOr"); + Add(Instruction.AtomicSwap, InstType.AtomicBinary, "atomicExchange"); + Add(Instruction.AtomicXor, InstType.AtomicBinary, "atomicXor"); + Add(Instruction.Absolute, InstType.CallUnary, "abs"); + Add(Instruction.Add, InstType.OpBinaryCom, "+", 2); + Add(Instruction.Ballot, InstType.Special); + Add(Instruction.Barrier, InstType.CallNullary, "barrier"); + Add(Instruction.BitCount, InstType.CallUnary, "bitCount"); + Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract"); + Add(Instruction.BitfieldExtractU32, InstType.CallTernary, "bitfieldExtract"); + Add(Instruction.BitfieldInsert, InstType.CallQuaternary, "bitfieldInsert"); + Add(Instruction.BitfieldReverse, InstType.CallUnary, "bitfieldReverse"); + Add(Instruction.BitwiseAnd, InstType.OpBinaryCom, "&", 6); + Add(Instruction.BitwiseExclusiveOr, InstType.OpBinaryCom, "^", 7); + Add(Instruction.BitwiseNot, InstType.OpUnary, "~", 0); + Add(Instruction.BitwiseOr, InstType.OpBinaryCom, "|", 8); + Add(Instruction.Call, InstType.Special); + Add(Instruction.Ceiling, InstType.CallUnary, "ceil"); + Add(Instruction.Clamp, InstType.CallTernary, "clamp"); + Add(Instruction.ClampU32, InstType.CallTernary, "clamp"); + Add(Instruction.CompareEqual, InstType.OpBinaryCom, "==", 5); + Add(Instruction.CompareGreater, InstType.OpBinary, ">", 4); + Add(Instruction.CompareGreaterOrEqual, InstType.OpBinary, ">=", 4); + Add(Instruction.CompareGreaterOrEqualU32, InstType.OpBinary, ">=", 4); + Add(Instruction.CompareGreaterU32, InstType.OpBinary, ">", 4); + Add(Instruction.CompareLess, InstType.OpBinary, "<", 4); + Add(Instruction.CompareLessOrEqual, InstType.OpBinary, "<=", 4); + Add(Instruction.CompareLessOrEqualU32, InstType.OpBinary, "<=", 4); + Add(Instruction.CompareLessU32, InstType.OpBinary, "<", 4); + Add(Instruction.CompareNotEqual, InstType.OpBinaryCom, "!=", 5); + Add(Instruction.ConditionalSelect, InstType.OpTernary, "?:", 12); + Add(Instruction.ConvertFP32ToFP64, InstType.CallUnary, "double"); + Add(Instruction.ConvertFP64ToFP32, InstType.CallUnary, "float"); + Add(Instruction.ConvertFP32ToS32, InstType.CallUnary, "int"); + Add(Instruction.ConvertFP32ToU32, InstType.CallUnary, "uint"); + Add(Instruction.ConvertFP64ToS32, InstType.CallUnary, "int"); + Add(Instruction.ConvertFP64ToU32, InstType.CallUnary, "uint"); + Add(Instruction.ConvertS32ToFP32, InstType.CallUnary, "float"); + Add(Instruction.ConvertS32ToFP64, InstType.CallUnary, "double"); + Add(Instruction.ConvertU32ToFP32, InstType.CallUnary, "float"); + Add(Instruction.ConvertU32ToFP64, InstType.CallUnary, "double"); + Add(Instruction.Cosine, InstType.CallUnary, "cos"); + Add(Instruction.Ddx, InstType.CallUnary, "dFdx"); + Add(Instruction.Ddy, InstType.CallUnary, "dFdy"); + Add(Instruction.Discard, InstType.OpNullary, "discard"); + Add(Instruction.Divide, InstType.OpBinary, "/", 1); + Add(Instruction.EmitVertex, InstType.CallNullary, "EmitVertex"); + Add(Instruction.EndPrimitive, InstType.CallNullary, "EndPrimitive"); + Add(Instruction.ExponentB2, InstType.CallUnary, "exp2"); + Add(Instruction.FSIBegin, InstType.Special); + Add(Instruction.FSIEnd, InstType.Special); + Add(Instruction.FindLSB, InstType.CallUnary, "findLSB"); + Add(Instruction.FindMSBS32, InstType.CallUnary, "findMSB"); + Add(Instruction.FindMSBU32, InstType.CallUnary, "findMSB"); + Add(Instruction.Floor, InstType.CallUnary, "floor"); + Add(Instruction.FusedMultiplyAdd, InstType.CallTernary, "fma"); + Add(Instruction.GroupMemoryBarrier, InstType.CallNullary, "groupMemoryBarrier"); + Add(Instruction.ImageLoad, InstType.Special); + Add(Instruction.ImageStore, InstType.Special); + Add(Instruction.ImageAtomic, InstType.Special); + Add(Instruction.IsNan, InstType.CallUnary, "isnan"); + Add(Instruction.Load, InstType.Special); + Add(Instruction.LoadConstant, InstType.Special); + Add(Instruction.LoadLocal, InstType.Special); + Add(Instruction.LoadShared, InstType.Special); + Add(Instruction.LoadStorage, InstType.Special); + Add(Instruction.Lod, InstType.Special); + Add(Instruction.LogarithmB2, InstType.CallUnary, "log2"); + Add(Instruction.LogicalAnd, InstType.OpBinaryCom, "&&", 9); + Add(Instruction.LogicalExclusiveOr, InstType.OpBinaryCom, "^^", 10); + Add(Instruction.LogicalNot, InstType.OpUnary, "!", 0); + Add(Instruction.LogicalOr, InstType.OpBinaryCom, "||", 11); + Add(Instruction.LoopBreak, InstType.OpNullary, "break"); + Add(Instruction.LoopContinue, InstType.OpNullary, "continue"); + Add(Instruction.PackDouble2x32, InstType.Special); + Add(Instruction.PackHalf2x16, InstType.Special); + Add(Instruction.Maximum, InstType.CallBinary, "max"); + Add(Instruction.MaximumU32, InstType.CallBinary, "max"); + Add(Instruction.MemoryBarrier, InstType.CallNullary, "memoryBarrier"); + Add(Instruction.Minimum, InstType.CallBinary, "min"); + Add(Instruction.MinimumU32, InstType.CallBinary, "min"); + Add(Instruction.Multiply, InstType.OpBinaryCom, "*", 1); + Add(Instruction.MultiplyHighS32, InstType.CallBinary, HelperFunctionNames.MultiplyHighS32); + Add(Instruction.MultiplyHighU32, InstType.CallBinary, HelperFunctionNames.MultiplyHighU32); + Add(Instruction.Negate, InstType.Special); + Add(Instruction.ReciprocalSquareRoot, InstType.CallUnary, "inversesqrt"); + Add(Instruction.Return, InstType.OpNullary, "return"); + Add(Instruction.Round, InstType.CallUnary, "roundEven"); + Add(Instruction.ShiftLeft, InstType.OpBinary, "<<", 3); + Add(Instruction.ShiftRightS32, InstType.OpBinary, ">>", 3); + Add(Instruction.ShiftRightU32, InstType.OpBinary, ">>", 3); + Add(Instruction.Shuffle, InstType.CallQuaternary, HelperFunctionNames.Shuffle); + Add(Instruction.ShuffleDown, InstType.CallQuaternary, HelperFunctionNames.ShuffleDown); + Add(Instruction.ShuffleUp, InstType.CallQuaternary, HelperFunctionNames.ShuffleUp); + Add(Instruction.ShuffleXor, InstType.CallQuaternary, HelperFunctionNames.ShuffleXor); + Add(Instruction.Sine, InstType.CallUnary, "sin"); + Add(Instruction.SquareRoot, InstType.CallUnary, "sqrt"); + Add(Instruction.Store, InstType.Special); + Add(Instruction.StoreLocal, InstType.Special); + Add(Instruction.StoreShared, InstType.Special); + Add(Instruction.StoreShared16, InstType.Special); + Add(Instruction.StoreShared8, InstType.Special); + Add(Instruction.StoreStorage, InstType.Special); + Add(Instruction.StoreStorage16, InstType.Special); + Add(Instruction.StoreStorage8, InstType.Special); + Add(Instruction.Subtract, InstType.OpBinary, "-", 2); + Add(Instruction.SwizzleAdd, InstType.CallTernary, HelperFunctionNames.SwizzleAdd); + Add(Instruction.TextureSample, InstType.Special); + Add(Instruction.TextureSize, InstType.Special); + Add(Instruction.Truncate, InstType.CallUnary, "trunc"); + Add(Instruction.UnpackDouble2x32, InstType.Special); + Add(Instruction.UnpackHalf2x16, InstType.Special); + Add(Instruction.VectorExtract, InstType.Special); + Add(Instruction.VoteAll, InstType.CallUnary, "allInvocationsARB"); + Add(Instruction.VoteAllEqual, InstType.CallUnary, "allInvocationsEqualARB"); + Add(Instruction.VoteAny, InstType.CallUnary, "anyInvocationARB"); + } + + private static void Add(Instruction inst, InstType flags, string opName = null, int precedence = 0) + { + _infoTable[(int)inst] = new InstInfo(flags, opName, precedence); + } + + public static InstInfo GetInstructionInfo(Instruction inst) + { + return _infoTable[(int)(inst & Instruction.Mask)]; + } + + public static string GetSoureExpr(CodeGenContext context, IAstNode node, AggregateType dstType) + { + return ReinterpretCast(context, node, OperandManager.GetNodeDestType(context, node), dstType); + } + + public static string Enclose(string expr, IAstNode node, Instruction pInst, bool isLhs) + { + InstInfo pInfo = GetInstructionInfo(pInst); + + return Enclose(expr, node, pInst, pInfo, isLhs); + } + + public static string Enclose(string expr, IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs = false) + { + if (NeedsParenthesis(node, pInst, pInfo, isLhs)) + { + expr = "(" + expr + ")"; + } + + return expr; + } + + public static bool NeedsParenthesis(IAstNode node, Instruction pInst, InstInfo pInfo, bool isLhs) + { + // If the node isn't a operation, then it can only be a operand, + // and those never needs to be surrounded in parenthesis. + if (!(node is AstOperation operation)) + { + // This is sort of a special case, if this is a negative constant, + // and it is consumed by a unary operation, we need to put on the parenthesis, + // as in GLSL a sequence like --2 or ~-1 is not valid. + if (IsNegativeConst(node) && pInfo.Type == InstType.OpUnary) + { + return true; + } + + return false; + } + + if ((pInfo.Type & (InstType.Call | InstType.Special)) != 0) + { + return false; + } + + InstInfo info = _infoTable[(int)(operation.Inst & Instruction.Mask)]; + + if ((info.Type & (InstType.Call | InstType.Special)) != 0) + { + return false; + } + + if (info.Precedence < pInfo.Precedence) + { + return false; + } + + if (info.Precedence == pInfo.Precedence && isLhs) + { + return false; + } + + if (pInst == operation.Inst && info.Type == InstType.OpBinaryCom) + { + return false; + } + + return true; + } + + private static bool IsNegativeConst(IAstNode node) + { + if (!(node is AstOperand operand)) + { + return false; + } + + return operand.Type == OperandType.Constant && operand.Value < 0; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs new file mode 100644 index 00000000..99519837 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenMemory.cs @@ -0,0 +1,939 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Text; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenMemory + { + public static string ImageLoadOrStore(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + switch (texOp.Inst) + { + case Instruction.ImageStore: + return "// imageStore(bindless)"; + case Instruction.ImageLoad: + AggregateType componentType = texOp.Format.GetComponentType(); + + NumberFormatter.TryFormat(0, componentType, out string imageConst); + + AggregateType outputType = texOp.GetVectorType(componentType); + + if ((outputType & AggregateType.ElementCountMask) != 0) + { + return $"{Declarations.GetVarTypeName(context, outputType, precise: false)}({imageConst})"; + } + + return imageConst; + default: + return NumberFormatter.FormatInt(0); + } + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + var texCallBuilder = new StringBuilder(); + + if (texOp.Inst == Instruction.ImageAtomic) + { + texCallBuilder.Append((texOp.Flags & TextureFlags.AtomicMask) switch { + TextureFlags.Add => "imageAtomicAdd", + TextureFlags.Minimum => "imageAtomicMin", + TextureFlags.Maximum => "imageAtomicMax", + TextureFlags.Increment => "imageAtomicAdd", // TODO: Clamp value. + TextureFlags.Decrement => "imageAtomicAdd", // TODO: Clamp value. + TextureFlags.BitwiseAnd => "imageAtomicAnd", + TextureFlags.BitwiseOr => "imageAtomicOr", + TextureFlags.BitwiseXor => "imageAtomicXor", + TextureFlags.Swap => "imageAtomicExchange", + TextureFlags.CAS => "imageAtomicCompSwap", + _ => "imageAtomicAdd", + }); + } + else + { + texCallBuilder.Append(texOp.Inst == Instruction.ImageLoad ? "imageLoad" : "imageStore"); + } + + int srcIndex = isBindless ? 1 : 0; + + string Src(AggregateType type) + { + return GetSoureExpr(context, texOp.GetSource(srcIndex++), type); + } + + string indexExpr = null; + + if (isIndexed) + { + indexExpr = Src(AggregateType.S32); + } + + string imageName = OperandManager.GetImageName(context.Config.Stage, texOp, indexExpr); + + texCallBuilder.Append('('); + texCallBuilder.Append(imageName); + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + void Append(string str) + { + texCallBuilder.Append(", "); + texCallBuilder.Append(str); + } + + string ApplyScaling(string vector) + { + if (context.Config.Stage.SupportsRenderScale() && + texOp.Inst == Instruction.ImageLoad && + !isBindless && + !isIndexed) + { + // Image scales start after texture ones. + int scaleIndex = context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp); + + if (pCount == 3 && isArray) + { + // The array index is not scaled, just x and y. + vector = $"ivec3(Helper_TexelFetchScale(({vector}).xy, {scaleIndex}), ({vector}).z)"; + } + else if (pCount == 2 && !isArray) + { + vector = $"Helper_TexelFetchScale({vector}, {scaleIndex})"; + } + } + + return vector; + } + + if (pCount > 1) + { + string[] elems = new string[pCount]; + + for (int index = 0; index < pCount; index++) + { + elems[index] = Src(AggregateType.S32); + } + + Append(ApplyScaling($"ivec{pCount}({string.Join(", ", elems)})")); + } + else + { + Append(Src(AggregateType.S32)); + } + + if (texOp.Inst == Instruction.ImageStore) + { + AggregateType type = texOp.Format.GetComponentType(); + + string[] cElems = new string[4]; + + for (int index = 0; index < 4; index++) + { + if (srcIndex < texOp.SourcesCount) + { + cElems[index] = Src(type); + } + else + { + cElems[index] = type switch + { + AggregateType.S32 => NumberFormatter.FormatInt(0), + AggregateType.U32 => NumberFormatter.FormatUint(0), + _ => NumberFormatter.FormatFloat(0) + }; + } + } + + string prefix = type switch + { + AggregateType.S32 => "i", + AggregateType.U32 => "u", + _ => string.Empty + }; + + Append($"{prefix}vec4({string.Join(", ", cElems)})"); + } + + if (texOp.Inst == Instruction.ImageAtomic) + { + AggregateType type = texOp.Format.GetComponentType(); + + if ((texOp.Flags & TextureFlags.AtomicMask) == TextureFlags.CAS) + { + Append(Src(type)); // Compare value. + } + + string value = (texOp.Flags & TextureFlags.AtomicMask) switch + { + TextureFlags.Increment => NumberFormatter.FormatInt(1, type), // TODO: Clamp value + TextureFlags.Decrement => NumberFormatter.FormatInt(-1, type), // TODO: Clamp value + _ => Src(type) + }; + + Append(value); + + texCallBuilder.Append(')'); + + if (type != AggregateType.S32) + { + texCallBuilder + .Insert(0, "int(") + .Append(')'); + } + } + else + { + texCallBuilder.Append(')'); + + if (texOp.Inst == Instruction.ImageLoad) + { + texCallBuilder.Append(GetMaskMultiDest(texOp.Index)); + } + } + + return texCallBuilder.ToString(); + } + + public static string Load(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: false); + } + + public static string LoadConstant(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + offsetExpr = Enclose(offsetExpr, src2, Instruction.ShiftRightS32, isLhs: true); + + var config = context.Config; + bool indexElement = !config.GpuAccessor.QueryHostHasVectorIndexingBug(); + + if (src1 is AstOperand operand && operand.Type == OperandType.Constant) + { + bool cbIndexable = config.UsedFeatures.HasFlag(Translation.FeatureFlags.CbIndexing); + return OperandManager.GetConstantBufferName(operand.Value, offsetExpr, config.Stage, cbIndexable, indexElement); + } + else + { + string slotExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + return OperandManager.GetConstantBufferName(slotExpr, offsetExpr, config.Stage, indexElement); + } + } + + public static string LoadLocal(CodeGenContext context, AstOperation operation) + { + return LoadLocalOrShared(context, operation, DefaultNames.LocalMemoryName); + } + + public static string LoadShared(CodeGenContext context, AstOperation operation) + { + return LoadLocalOrShared(context, operation, DefaultNames.SharedMemoryName); + } + + private static string LoadLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName) + { + IAstNode src1 = operation.GetSource(0); + + string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + + return $"{arrayName}[{offsetExpr}]"; + } + + public static string LoadStorage(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + return GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); + } + + public static string Lod(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + int coordsCount = texOp.Type.GetDimensions(); + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return NumberFormatter.FormatFloat(0); + } + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + string indexExpr = null; + + if (isIndexed) + { + indexExpr = GetSoureExpr(context, texOp.GetSource(0), AggregateType.S32); + } + + string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr); + + int coordsIndex = isBindless || isIndexed ? 1 : 0; + + string coordsExpr; + + if (coordsCount > 1) + { + string[] elems = new string[coordsCount]; + + for (int index = 0; index < coordsCount; index++) + { + elems[index] = GetSoureExpr(context, texOp.GetSource(coordsIndex + index), AggregateType.FP32); + } + + coordsExpr = "vec" + coordsCount + "(" + string.Join(", ", elems) + ")"; + } + else + { + coordsExpr = GetSoureExpr(context, texOp.GetSource(coordsIndex), AggregateType.FP32); + } + + return $"textureQueryLod({samplerName}, {coordsExpr}){GetMask(texOp.Index)}"; + } + + public static string Store(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: true); + } + + public static string StoreLocal(CodeGenContext context, AstOperation operation) + { + return StoreLocalOrShared(context, operation, DefaultNames.LocalMemoryName); + } + + public static string StoreShared(CodeGenContext context, AstOperation operation) + { + return StoreLocalOrShared(context, operation, DefaultNames.SharedMemoryName); + } + + private static string StoreLocalOrShared(CodeGenContext context, AstOperation operation, string arrayName) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src2); + + string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); + + return $"{arrayName}[{offsetExpr}] = {src}"; + } + + public static string StoreShared16(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src2); + + string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); + + return $"{HelperFunctionNames.StoreShared16}({offsetExpr}, {src})"; + } + + public static string StoreShared8(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + + string offsetExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src2); + + string src = TypeConversion.ReinterpretCast(context, src2, srcType, AggregateType.U32); + + return $"{HelperFunctionNames.StoreShared8}({offsetExpr}, {src})"; + } + + public static string StoreStorage(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + IAstNode src3 = operation.GetSource(2); + + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src3); + + string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); + + string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); + + return $"{sb} = {src}"; + } + + public static string StoreStorage16(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + IAstNode src3 = operation.GetSource(2); + + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src3); + + string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); + + string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); + + return $"{HelperFunctionNames.StoreStorage16}({indexExpr}, {offsetExpr}, {src})"; + } + + public static string StoreStorage8(CodeGenContext context, AstOperation operation) + { + IAstNode src1 = operation.GetSource(0); + IAstNode src2 = operation.GetSource(1); + IAstNode src3 = operation.GetSource(2); + + string indexExpr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 0)); + string offsetExpr = GetSoureExpr(context, src2, GetSrcVarType(operation.Inst, 1)); + + AggregateType srcType = OperandManager.GetNodeDestType(context, src3); + + string src = TypeConversion.ReinterpretCast(context, src3, srcType, AggregateType.U32); + + string sb = GetStorageBufferAccessor(indexExpr, offsetExpr, context.Config.Stage); + + return $"{HelperFunctionNames.StoreStorage8}({indexExpr}, {offsetExpr}, {src})"; + } + + public static string TextureSample(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; + bool isShadow = (texOp.Type & SamplerType.Shadow) != 0; + + bool colorIsVector = isGather || !isShadow; + + SamplerType type = texOp.Type & SamplerType.Mask; + + bool is2D = type == SamplerType.Texture2D; + bool isCube = type == SamplerType.TextureCube; + + // 2D Array and Cube shadow samplers with LOD level or bias requires an extension. + // If the extension is not supported, just remove the LOD parameter. + if (isArray && isShadow && (is2D || isCube) && !context.Config.GpuAccessor.QueryHostSupportsTextureShadowLod()) + { + hasLodBias = false; + hasLodLevel = false; + } + + // Cube shadow samplers with LOD level requires an extension. + // If the extension is not supported, just remove the LOD level parameter. + if (isShadow && isCube && !context.Config.GpuAccessor.QueryHostSupportsTextureShadowLod()) + { + hasLodLevel = false; + } + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + string scalarValue = NumberFormatter.FormatFloat(0); + + if (colorIsVector) + { + AggregateType outputType = texOp.GetVectorType(AggregateType.FP32); + + if ((outputType & AggregateType.ElementCountMask) != 0) + { + return $"{Declarations.GetVarTypeName(context, outputType, precise: false)}({scalarValue})"; + } + } + + return scalarValue; + } + + string texCall = intCoords ? "texelFetch" : "texture"; + + if (isGather) + { + texCall += "Gather"; + } + else if (hasDerivatives) + { + texCall += "Grad"; + } + else if (hasLodLevel && !intCoords) + { + texCall += "Lod"; + } + + if (hasOffset) + { + texCall += "Offset"; + } + else if (hasOffsets) + { + texCall += "Offsets"; + } + + int srcIndex = isBindless ? 1 : 0; + + string Src(AggregateType type) + { + return GetSoureExpr(context, texOp.GetSource(srcIndex++), type); + } + + string indexExpr = null; + + if (isIndexed) + { + indexExpr = Src(AggregateType.S32); + } + + string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr); + + texCall += "(" + samplerName; + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount; + + int arrayIndexElem = -1; + + if (isArray) + { + arrayIndexElem = pCount++; + } + + // The sampler 1D shadow overload expects a + // dummy value on the middle of the vector, who knows why... + bool hasDummy1DShadowElem = texOp.Type == (SamplerType.Texture1D | SamplerType.Shadow); + + if (hasDummy1DShadowElem) + { + pCount++; + } + + if (isShadow && !isGather) + { + pCount++; + } + + // On textureGather*, the comparison value is + // always specified as an extra argument. + bool hasExtraCompareArg = isShadow && isGather; + + if (pCount == 5) + { + pCount = 4; + + hasExtraCompareArg = true; + } + + void Append(string str) + { + texCall += ", " + str; + } + + AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32; + + string AssemblePVector(int count) + { + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + if (arrayIndexElem == index) + { + elems[index] = Src(AggregateType.S32); + + if (!intCoords) + { + elems[index] = "float(" + elems[index] + ")"; + } + } + else if (index == 1 && hasDummy1DShadowElem) + { + elems[index] = NumberFormatter.FormatFloat(0); + } + else + { + elems[index] = Src(coordType); + } + } + + string prefix = intCoords ? "i" : string.Empty; + + return prefix + "vec" + count + "(" + string.Join(", ", elems) + ")"; + } + else + { + return Src(coordType); + } + } + + string ApplyScaling(string vector) + { + if (intCoords) + { + if (context.Config.Stage.SupportsRenderScale() && + !isBindless && + !isIndexed) + { + int index = context.Config.FindTextureDescriptorIndex(texOp); + + if (pCount == 3 && isArray) + { + // The array index is not scaled, just x and y. + vector = "ivec3(Helper_TexelFetchScale((" + vector + ").xy, " + index + "), (" + vector + ").z)"; + } + else if (pCount == 2 && !isArray) + { + vector = "Helper_TexelFetchScale(" + vector + ", " + index + ")"; + } + } + } + + return vector; + } + + string ApplyBias(string vector) + { + int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision(); + if (isGather && gatherBiasPrecision != 0) + { + // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels. + // Offset by the gather precision divided by 2 to correct for rounding. + + if (pCount == 1) + { + vector = $"{vector} + (1.0 / (float(textureSize({samplerName}, 0)) * float({1 << (gatherBiasPrecision + 1)})))"; + } + else + { + vector = $"{vector} + (1.0 / (vec{pCount}(textureSize({samplerName}, 0).{"xyz".Substring(0, pCount)}) * float({1 << (gatherBiasPrecision + 1)})))"; + } + } + + return vector; + } + + Append(ApplyBias(ApplyScaling(AssemblePVector(pCount)))); + + string AssembleDerivativesVector(int count) + { + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.FP32); + } + + return "vec" + count + "(" + string.Join(", ", elems) + ")"; + } + else + { + return Src(AggregateType.FP32); + } + } + + if (hasExtraCompareArg) + { + Append(Src(AggregateType.FP32)); + } + + if (hasDerivatives) + { + Append(AssembleDerivativesVector(coordsCount)); // dPdx + Append(AssembleDerivativesVector(coordsCount)); // dPdy + } + + if (isMultisample) + { + Append(Src(AggregateType.S32)); + } + else if (hasLodLevel) + { + Append(Src(coordType)); + } + + string AssembleOffsetVector(int count) + { + if (count > 1) + { + string[] elems = new string[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.S32); + } + + return "ivec" + count + "(" + string.Join(", ", elems) + ")"; + } + else + { + return Src(AggregateType.S32); + } + } + + if (hasOffset) + { + Append(AssembleOffsetVector(coordsCount)); + } + else if (hasOffsets) + { + texCall += $", ivec{coordsCount}[4]("; + + texCall += AssembleOffsetVector(coordsCount) + ", "; + texCall += AssembleOffsetVector(coordsCount) + ", "; + texCall += AssembleOffsetVector(coordsCount) + ", "; + texCall += AssembleOffsetVector(coordsCount) + ")"; + } + + if (hasLodBias) + { + Append(Src(AggregateType.FP32)); + } + + // textureGather* optional extra component index, + // not needed for shadow samplers. + if (isGather && !isShadow) + { + Append(Src(AggregateType.S32)); + } + + texCall += ")" + (colorIsVector ? GetMaskMultiDest(texOp.Index) : ""); + + return texCall; + } + + public static string TextureSize(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return NumberFormatter.FormatInt(0); + } + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + string indexExpr = null; + + if (isIndexed) + { + indexExpr = GetSoureExpr(context, texOp.GetSource(0), AggregateType.S32); + } + + string samplerName = OperandManager.GetSamplerName(context.Config.Stage, texOp, indexExpr); + + if (texOp.Index == 3) + { + return $"textureQueryLevels({samplerName})"; + } + else + { + (TextureDescriptor descriptor, int descriptorIndex) = context.Config.FindTextureDescriptor(texOp); + bool hasLod = !descriptor.Type.HasFlag(SamplerType.Multisample) && descriptor.Type != SamplerType.TextureBuffer; + string texCall; + + if (hasLod) + { + int lodSrcIndex = isBindless || isIndexed ? 1 : 0; + IAstNode lod = operation.GetSource(lodSrcIndex); + string lodExpr = GetSoureExpr(context, lod, GetSrcVarType(operation.Inst, lodSrcIndex)); + + texCall = $"textureSize({samplerName}, {lodExpr}){GetMask(texOp.Index)}"; + } + else + { + texCall = $"textureSize({samplerName}){GetMask(texOp.Index)}"; + } + + if (context.Config.Stage.SupportsRenderScale() && + (texOp.Index < 2 || (texOp.Type & SamplerType.Mask) == SamplerType.Texture3D) && + !isBindless && + !isIndexed) + { + texCall = $"Helper_TextureSizeUnscale({texCall}, {descriptorIndex})"; + } + + return texCall; + } + } + + private static string GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) + { + StorageKind storageKind = operation.StorageKind; + + string varName; + AggregateType varType; + int srcIndex = 0; + + switch (storageKind) + { + case StorageKind.Input: + case StorageKind.InputPerPatch: + case StorageKind.Output: + case StorageKind.OutputPerPatch: + if (!(operation.GetSource(srcIndex++) is AstOperand varId) || varId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + IoVariable ioVariable = (IoVariable)varId.Value; + bool isOutput = storageKind.IsOutput(); + bool isPerPatch = storageKind.IsPerPatch(); + int location = -1; + int component = 0; + + if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + if (!(operation.GetSource(srcIndex++) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + location = vecIndex.Value; + + if (operation.SourcesCount > srcIndex && + operation.GetSource(srcIndex) is AstOperand elemIndex && + elemIndex.Type == OperandType.Constant && + context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput)) + { + component = elemIndex.Value; + srcIndex++; + } + } + + (varName, varType) = IoMap.GetGlslVariable(context.Config, ioVariable, location, component, isOutput, isPerPatch); + + if (IoMap.IsPerVertexBuiltIn(context.Config.Stage, ioVariable, isOutput)) + { + // Since those exist both as input and output on geometry and tessellation shaders, + // we need the gl_in and gl_out prefixes to disambiguate. + + if (storageKind == StorageKind.Input) + { + string expr = GetSoureExpr(context, operation.GetSource(srcIndex++), AggregateType.S32); + varName = $"gl_in[{expr}].{varName}"; + } + else if (storageKind == StorageKind.Output) + { + string expr = GetSoureExpr(context, operation.GetSource(srcIndex++), AggregateType.S32); + varName = $"gl_out[{expr}].{varName}"; + } + } + + int firstSrcIndex = srcIndex; + int inputsCount = isStore ? operation.SourcesCount - 1 : operation.SourcesCount; + + for (; srcIndex < inputsCount; srcIndex++) + { + IAstNode src = operation.GetSource(srcIndex); + + if ((varType & AggregateType.ElementCountMask) != 0 && + srcIndex == inputsCount - 1 && + src is AstOperand elementIndex && + elementIndex.Type == OperandType.Constant) + { + varName += "." + "xyzw"[elementIndex.Value & 3]; + } + else if (srcIndex == firstSrcIndex && context.Config.Stage == ShaderStage.TessellationControl && storageKind == StorageKind.Output) + { + // GLSL requires that for tessellation control shader outputs, + // that the index expression must be *exactly* "gl_InvocationID", + // otherwise the compilation fails. + // TODO: Get rid of this and use expression propagation to make sure we generate the correct code from IR. + varName += "[gl_InvocationID]"; + } + else + { + varName += $"[{GetSoureExpr(context, src, AggregateType.S32)}]"; + } + } + break; + + default: + throw new InvalidOperationException($"Invalid storage kind {storageKind}."); + } + + if (isStore) + { + varType &= AggregateType.ElementTypeMask; + varName = $"{varName} = {GetSoureExpr(context, operation.GetSource(srcIndex), varType)}"; + } + + return varName; + } + + private static string GetStorageBufferAccessor(string slotExpr, string offsetExpr, ShaderStage stage) + { + string sbName = OperandManager.GetShaderStagePrefix(stage); + + sbName += "_" + DefaultNames.StorageNamePrefix; + + return $"{sbName}[{slotExpr}].{DefaultNames.DataName}[{offsetExpr}]"; + } + + private static string GetMask(int index) + { + return $".{"rgba".AsSpan(index, 1)}"; + } + + private static string GetMaskMultiDest(int mask) + { + string swizzle = "."; + + for (int i = 0; i < 4; i++) + { + if ((mask & (1 << i)) != 0) + { + swizzle += "xyzw"[i]; + } + } + + return swizzle; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs new file mode 100644 index 00000000..5a888e9c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenPacking.cs @@ -0,0 +1,56 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using System; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenPacking + { + public static string PackDouble2x32(CodeGenContext context, AstOperation operation) + { + IAstNode src0 = operation.GetSource(0); + IAstNode src1 = operation.GetSource(1); + + string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0)); + string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1)); + + return $"packDouble2x32(uvec2({src0Expr}, {src1Expr}))"; + } + + public static string PackHalf2x16(CodeGenContext context, AstOperation operation) + { + IAstNode src0 = operation.GetSource(0); + IAstNode src1 = operation.GetSource(1); + + string src0Expr = GetSoureExpr(context, src0, GetSrcVarType(operation.Inst, 0)); + string src1Expr = GetSoureExpr(context, src1, GetSrcVarType(operation.Inst, 1)); + + return $"packHalf2x16(vec2({src0Expr}, {src1Expr}))"; + } + + public static string UnpackDouble2x32(CodeGenContext context, AstOperation operation) + { + IAstNode src = operation.GetSource(0); + + string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0)); + + return $"unpackDouble2x32({srcExpr}){GetMask(operation.Index)}"; + } + + public static string UnpackHalf2x16(CodeGenContext context, AstOperation operation) + { + IAstNode src = operation.GetSource(0); + + string srcExpr = GetSoureExpr(context, src, GetSrcVarType(operation.Inst, 0)); + + return $"unpackHalf2x16({srcExpr}){GetMask(operation.Index)}"; + } + + private static string GetMask(int index) + { + return $".{"xy".AsSpan(index, 1)}"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs new file mode 100644 index 00000000..f09ea2e8 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstGenVector.cs @@ -0,0 +1,32 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; + +using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class InstGenVector + { + public static string VectorExtract(CodeGenContext context, AstOperation operation) + { + IAstNode vector = operation.GetSource(0); + IAstNode index = operation.GetSource(1); + + string vectorExpr = GetSoureExpr(context, vector, OperandManager.GetNodeDestType(context, vector)); + + if (index is AstOperand indexOperand && indexOperand.Type == OperandType.Constant) + { + char elem = "xyzw"[indexOperand.Value]; + + return $"{vectorExpr}.{elem}"; + } + else + { + string indexExpr = GetSoureExpr(context, index, GetSrcVarType(operation.Inst, 1)); + + return $"{vectorExpr}[{indexExpr}]"; + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs new file mode 100644 index 00000000..7b2a6b46 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstInfo.cs @@ -0,0 +1,18 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + readonly struct InstInfo + { + public InstType Type { get; } + + public string OpName { get; } + + public int Precedence { get; } + + public InstInfo(InstType type, string opName, int precedence) + { + Type = type; + OpName = opName; + Precedence = precedence; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs new file mode 100644 index 00000000..84e36cdd --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/InstType.cs @@ -0,0 +1,33 @@ +using System; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + [Flags] + enum InstType + { + OpNullary = Op | 0, + OpUnary = Op | 1, + OpBinary = Op | 2, + OpBinaryCom = Op | 2 | Commutative, + OpTernary = Op | 3, + + CallNullary = Call | 0, + CallUnary = Call | 1, + CallBinary = Call | 2, + CallTernary = Call | 3, + CallQuaternary = Call | 4, + + // The atomic instructions have one extra operand, + // for the storage slot and offset pair. + AtomicBinary = Call | Atomic | 3, + AtomicTernary = Call | Atomic | 4, + + Commutative = 1 << 8, + Op = 1 << 9, + Call = 1 << 10, + Atomic = 1 << 11, + Special = 1 << 12, + + ArityMask = 0xff + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs new file mode 100644 index 00000000..093ee232 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/Instructions/IoMap.cs @@ -0,0 +1,145 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using System.Globalization; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions +{ + static class IoMap + { + public static (string, AggregateType) GetGlslVariable( + ShaderConfig config, + IoVariable ioVariable, + int location, + int component, + bool isOutput, + bool isPerPatch) + { + return ioVariable switch + { + IoVariable.BackColorDiffuse => ("gl_BackColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.BackColorSpecular => ("gl_BackSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.BaseInstance => ("gl_BaseInstanceARB", AggregateType.S32), + IoVariable.BaseVertex => ("gl_BaseVertexARB", AggregateType.S32), + IoVariable.ClipDistance => ("gl_ClipDistance", AggregateType.Array | AggregateType.FP32), + IoVariable.CtaId => ("gl_WorkGroupID", AggregateType.Vector3 | AggregateType.U32), + IoVariable.DrawIndex => ("gl_DrawIDARB", AggregateType.S32), + IoVariable.FogCoord => ("gl_FogFragCoord", AggregateType.FP32), // Deprecated. + IoVariable.FragmentCoord => ("gl_FragCoord", AggregateType.Vector4 | AggregateType.FP32), + IoVariable.FragmentOutputColor => GetFragmentOutputColorVariableName(config, location), + IoVariable.FragmentOutputDepth => ("gl_FragDepth", AggregateType.FP32), + IoVariable.FragmentOutputIsBgra => (DefaultNames.SupportBlockIsBgraName, AggregateType.Array | AggregateType.Bool), + IoVariable.FrontColorDiffuse => ("gl_FrontColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.FrontColorSpecular => ("gl_FrontSecondaryColor", AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.FrontFacing => ("gl_FrontFacing", AggregateType.Bool), + IoVariable.InstanceId => ("gl_InstanceID", AggregateType.S32), + IoVariable.InstanceIndex => ("gl_InstanceIndex", AggregateType.S32), + IoVariable.InvocationId => ("gl_InvocationID", AggregateType.S32), + IoVariable.Layer => ("gl_Layer", AggregateType.S32), + IoVariable.PatchVertices => ("gl_PatchVerticesIn", AggregateType.S32), + IoVariable.PointCoord => ("gl_PointCoord", AggregateType.Vector2 | AggregateType.FP32), + IoVariable.PointSize => ("gl_PointSize", AggregateType.FP32), + IoVariable.Position => ("gl_Position", AggregateType.Vector4 | AggregateType.FP32), + IoVariable.PrimitiveId => GetPrimitiveIdVariableName(config.Stage, isOutput), + IoVariable.SubgroupEqMask => GetSubgroupMaskVariableName(config, "Eq"), + IoVariable.SubgroupGeMask => GetSubgroupMaskVariableName(config, "Ge"), + IoVariable.SubgroupGtMask => GetSubgroupMaskVariableName(config, "Gt"), + IoVariable.SubgroupLaneId => GetSubgroupInvocationIdVariableName(config), + IoVariable.SubgroupLeMask => GetSubgroupMaskVariableName(config, "Le"), + IoVariable.SubgroupLtMask => GetSubgroupMaskVariableName(config, "Lt"), + IoVariable.SupportBlockRenderScale => (DefaultNames.SupportBlockRenderScaleName, AggregateType.Array | AggregateType.FP32), + IoVariable.SupportBlockViewInverse => (DefaultNames.SupportBlockViewportInverse, AggregateType.Vector2 | AggregateType.FP32), + IoVariable.TessellationCoord => ("gl_TessCoord", AggregateType.Vector3 | AggregateType.FP32), + IoVariable.TessellationLevelInner => ("gl_TessLevelInner", AggregateType.Array | AggregateType.FP32), + IoVariable.TessellationLevelOuter => ("gl_TessLevelOuter", AggregateType.Array | AggregateType.FP32), + IoVariable.TextureCoord => ("gl_TexCoord", AggregateType.Array | AggregateType.Vector4 | AggregateType.FP32), // Deprecated. + IoVariable.ThreadId => ("gl_LocalInvocationID", AggregateType.Vector3 | AggregateType.U32), + IoVariable.ThreadKill => ("gl_HelperInvocation", AggregateType.Bool), + IoVariable.UserDefined => GetUserDefinedVariableName(config, location, component, isOutput, isPerPatch), + IoVariable.VertexId => ("gl_VertexID", AggregateType.S32), + IoVariable.VertexIndex => ("gl_VertexIndex", AggregateType.S32), + IoVariable.ViewportIndex => ("gl_ViewportIndex", AggregateType.S32), + IoVariable.ViewportMask => ("gl_ViewportMask", AggregateType.Array | AggregateType.S32), + _ => (null, AggregateType.Invalid) + }; + } + + public static bool IsPerVertexBuiltIn(ShaderStage stage, IoVariable ioVariable, bool isOutput) + { + switch (ioVariable) + { + case IoVariable.Layer: + case IoVariable.ViewportIndex: + case IoVariable.PointSize: + case IoVariable.Position: + case IoVariable.ClipDistance: + case IoVariable.PointCoord: + case IoVariable.ViewportMask: + if (isOutput) + { + return stage == ShaderStage.TessellationControl; + } + else + { + return stage == ShaderStage.TessellationControl || + stage == ShaderStage.TessellationEvaluation || + stage == ShaderStage.Geometry; + } + } + + return false; + } + + private static (string, AggregateType) GetFragmentOutputColorVariableName(ShaderConfig config, int location) + { + if (location < 0) + { + return (DefaultNames.OAttributePrefix, config.GetFragmentOutputColorType(0)); + } + + string name = DefaultNames.OAttributePrefix + location.ToString(CultureInfo.InvariantCulture); + + return (name, config.GetFragmentOutputColorType(location)); + } + + private static (string, AggregateType) GetPrimitiveIdVariableName(ShaderStage stage, bool isOutput) + { + // The geometry stage has an additional gl_PrimitiveIDIn variable. + return (isOutput || stage != ShaderStage.Geometry ? "gl_PrimitiveID" : "gl_PrimitiveIDIn", AggregateType.S32); + } + + private static (string, AggregateType) GetSubgroupMaskVariableName(ShaderConfig config, string cc) + { + return config.GpuAccessor.QueryHostSupportsShaderBallot() + ? ($"unpackUint2x32(gl_SubGroup{cc}MaskARB)", AggregateType.Vector2 | AggregateType.U32) + : ($"gl_Subgroup{cc}Mask", AggregateType.Vector4 | AggregateType.U32); + } + + private static (string, AggregateType) GetSubgroupInvocationIdVariableName(ShaderConfig config) + { + return config.GpuAccessor.QueryHostSupportsShaderBallot() + ? ("gl_SubGroupInvocationARB", AggregateType.U32) + : ("gl_SubgroupInvocationID", AggregateType.U32); + } + + private static (string, AggregateType) GetUserDefinedVariableName(ShaderConfig config, int location, int component, bool isOutput, bool isPerPatch) + { + string name = isPerPatch + ? DefaultNames.PerPatchAttributePrefix + : (isOutput ? DefaultNames.OAttributePrefix : DefaultNames.IAttributePrefix); + + if (location < 0) + { + return (name, config.GetUserDefinedType(0, isOutput)); + } + + name += location.ToString(CultureInfo.InvariantCulture); + + if (config.HasPerLocationInputOrOutputComponent(IoVariable.UserDefined, location, component, isOutput)) + { + name += "_" + "xyzw"[component & 3]; + } + + return (name, config.GetUserDefinedType(location, isOutput)); + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs new file mode 100644 index 00000000..eb27e9bf --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/NumberFormatter.cs @@ -0,0 +1,104 @@ +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Globalization; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class NumberFormatter + { + private const int MaxDecimal = 256; + + public static bool TryFormat(int value, AggregateType dstType, out string formatted) + { + if (dstType == AggregateType.FP32) + { + return TryFormatFloat(BitConverter.Int32BitsToSingle(value), out formatted); + } + else if (dstType == AggregateType.S32) + { + formatted = FormatInt(value); + } + else if (dstType == AggregateType.U32) + { + formatted = FormatUint((uint)value); + } + else if (dstType == AggregateType.Bool) + { + formatted = value != 0 ? "true" : "false"; + } + else + { + throw new ArgumentException($"Invalid variable type \"{dstType}\"."); + } + + return true; + } + + public static string FormatFloat(float value) + { + if (!TryFormatFloat(value, out string formatted)) + { + throw new ArgumentException("Failed to convert float value to string."); + } + + return formatted; + } + + public static bool TryFormatFloat(float value, out string formatted) + { + if (float.IsNaN(value) || float.IsInfinity(value)) + { + formatted = null; + + return false; + } + + formatted = value.ToString("G9", CultureInfo.InvariantCulture); + + if (!(formatted.Contains('.') || + formatted.Contains('e') || + formatted.Contains('E'))) + { + formatted += ".0"; + } + + return true; + } + + public static string FormatInt(int value, AggregateType dstType) + { + if (dstType == AggregateType.S32) + { + return FormatInt(value); + } + else if (dstType == AggregateType.U32) + { + return FormatUint((uint)value); + } + else + { + throw new ArgumentException($"Invalid variable type \"{dstType}\"."); + } + } + + public static string FormatInt(int value) + { + if (value <= MaxDecimal && value >= -MaxDecimal) + { + return value.ToString(CultureInfo.InvariantCulture); + } + + return "0x" + value.ToString("X", CultureInfo.InvariantCulture); + } + + public static string FormatUint(uint value) + { + if (value <= MaxDecimal && value >= 0) + { + return value.ToString(CultureInfo.InvariantCulture) + "u"; + } + + return "0x" + value.ToString("X", CultureInfo.InvariantCulture) + "u"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs new file mode 100644 index 00000000..92e83358 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/OperandManager.cs @@ -0,0 +1,254 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; + +using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + class OperandManager + { + private static readonly string[] _stagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; + + private Dictionary<AstOperand, string> _locals; + + public OperandManager() + { + _locals = new Dictionary<AstOperand, string>(); + } + + public string DeclareLocal(AstOperand operand) + { + string name = $"{DefaultNames.LocalNamePrefix}_{_locals.Count}"; + + _locals.Add(operand, name); + + return name; + } + + public string GetExpression(CodeGenContext context, AstOperand operand) + { + return operand.Type switch + { + OperandType.Argument => GetArgumentName(operand.Value), + OperandType.Constant => NumberFormatter.FormatInt(operand.Value), + OperandType.ConstantBuffer => GetConstantBufferName(operand, context.Config), + OperandType.LocalVariable => _locals[operand], + OperandType.Undefined => DefaultNames.UndefinedName, + _ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".") + }; + } + + private static string GetConstantBufferName(AstOperand operand, ShaderConfig config) + { + return GetConstantBufferName(operand.CbufSlot, operand.CbufOffset, config.Stage, config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing)); + } + + public static string GetConstantBufferName(int slot, int offset, ShaderStage stage, bool cbIndexable) + { + return $"{GetUbName(stage, slot, cbIndexable)}[{offset >> 2}].{GetSwizzleMask(offset & 3)}"; + } + + private static string GetVec4Indexed(string vectorName, string indexExpr, bool indexElement) + { + if (indexElement) + { + return $"{vectorName}[{indexExpr}]"; + } + + string result = $"{vectorName}.x"; + for (int i = 1; i < 4; i++) + { + result = $"(({indexExpr}) == {i}) ? ({vectorName}.{GetSwizzleMask(i)}) : ({result})"; + } + return $"({result})"; + } + + public static string GetConstantBufferName(int slot, string offsetExpr, ShaderStage stage, bool cbIndexable, bool indexElement) + { + return GetVec4Indexed(GetUbName(stage, slot, cbIndexable) + $"[{offsetExpr} >> 2]", offsetExpr + " & 3", indexElement); + } + + public static string GetConstantBufferName(string slotExpr, string offsetExpr, ShaderStage stage, bool indexElement) + { + return GetVec4Indexed(GetUbName(stage, slotExpr) + $"[{offsetExpr} >> 2]", offsetExpr + " & 3", indexElement); + } + + public static string GetUbName(ShaderStage stage, int slot, bool cbIndexable) + { + if (cbIndexable) + { + return GetUbName(stage, NumberFormatter.FormatInt(slot, AggregateType.S32)); + } + + return $"{GetShaderStagePrefix(stage)}_{DefaultNames.UniformNamePrefix}{slot}_{DefaultNames.UniformNameSuffix}"; + } + + private static string GetUbName(ShaderStage stage, string slotExpr) + { + return $"{GetShaderStagePrefix(stage)}_{DefaultNames.UniformNamePrefix}[{slotExpr}].{DefaultNames.DataName}"; + } + + public static string GetSamplerName(ShaderStage stage, AstTextureOperation texOp, string indexExpr) + { + return GetSamplerName(stage, texOp.CbufSlot, texOp.Handle, texOp.Type.HasFlag(SamplerType.Indexed), indexExpr); + } + + public static string GetSamplerName(ShaderStage stage, int cbufSlot, int handle, bool indexed, string indexExpr) + { + string suffix = cbufSlot < 0 ? $"_tcb_{handle:X}" : $"_cb{cbufSlot}_{handle:X}"; + + if (indexed) + { + suffix += $"a[{indexExpr}]"; + } + + return GetShaderStagePrefix(stage) + "_" + DefaultNames.SamplerNamePrefix + suffix; + } + + public static string GetImageName(ShaderStage stage, AstTextureOperation texOp, string indexExpr) + { + return GetImageName(stage, texOp.CbufSlot, texOp.Handle, texOp.Format, texOp.Type.HasFlag(SamplerType.Indexed), indexExpr); + } + + public static string GetImageName( + ShaderStage stage, + int cbufSlot, + int handle, + TextureFormat format, + bool indexed, + string indexExpr) + { + string suffix = cbufSlot < 0 + ? $"_tcb_{handle:X}_{format.ToGlslFormat()}" + : $"_cb{cbufSlot}_{handle:X}_{format.ToGlslFormat()}"; + + if (indexed) + { + suffix += $"a[{indexExpr}]"; + } + + return GetShaderStagePrefix(stage) + "_" + DefaultNames.ImageNamePrefix + suffix; + } + + public static string GetShaderStagePrefix(ShaderStage stage) + { + int index = (int)stage; + + if ((uint)index >= _stagePrefixes.Length) + { + return "invalid"; + } + + return _stagePrefixes[index]; + } + + private static char GetSwizzleMask(int value) + { + return "xyzw"[value]; + } + + public static string GetArgumentName(int argIndex) + { + return $"{DefaultNames.ArgumentNamePrefix}{argIndex}"; + } + + public static AggregateType GetNodeDestType(CodeGenContext context, IAstNode node) + { + // TODO: Get rid of that function entirely and return the type from the operation generation + // functions directly, like SPIR-V does. + + if (node is AstOperation operation) + { + if (operation.Inst == Instruction.Load) + { + switch (operation.StorageKind) + { + case StorageKind.Input: + case StorageKind.InputPerPatch: + case StorageKind.Output: + case StorageKind.OutputPerPatch: + if (!(operation.GetSource(0) is AstOperand varId) || varId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + IoVariable ioVariable = (IoVariable)varId.Value; + bool isOutput = operation.StorageKind == StorageKind.Output || operation.StorageKind == StorageKind.OutputPerPatch; + bool isPerPatch = operation.StorageKind == StorageKind.InputPerPatch || operation.StorageKind == StorageKind.OutputPerPatch; + int location = 0; + int component = 0; + + if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + if (!(operation.GetSource(1) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {operation.StorageKind} storage must be a constant operand."); + } + + location = vecIndex.Value; + + if (operation.SourcesCount > 2 && + operation.GetSource(2) is AstOperand elemIndex && + elemIndex.Type == OperandType.Constant && + context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput)) + { + component = elemIndex.Value; + } + } + + (_, AggregateType varType) = IoMap.GetGlslVariable(context.Config, ioVariable, location, component, isOutput, isPerPatch); + + return varType & AggregateType.ElementTypeMask; + } + } + else if (operation.Inst == Instruction.Call) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + return context.GetFunction(funcId.Value).ReturnType; + } + else if (operation.Inst == Instruction.VectorExtract) + { + return GetNodeDestType(context, operation.GetSource(0)) & ~AggregateType.ElementCountMask; + } + else if (operation is AstTextureOperation texOp) + { + if (texOp.Inst == Instruction.ImageLoad || + texOp.Inst == Instruction.ImageStore || + texOp.Inst == Instruction.ImageAtomic) + { + return texOp.GetVectorType(texOp.Format.GetComponentType()); + } + else if (texOp.Inst == Instruction.TextureSample) + { + return texOp.GetVectorType(GetDestVarType(operation.Inst)); + } + } + + return GetDestVarType(operation.Inst); + } + else if (node is AstOperand operand) + { + if (operand.Type == OperandType.Argument) + { + int argIndex = operand.Value; + + return context.CurrentFunction.GetArgumentType(argIndex); + } + + return OperandInfo.GetVarType(operand); + } + else + { + throw new ArgumentException($"Invalid node type \"{node?.GetType().Name ?? "null"}\"."); + } + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs new file mode 100644 index 00000000..22c8623c --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Glsl/TypeConversion.cs @@ -0,0 +1,87 @@ +using Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; + +namespace Ryujinx.Graphics.Shader.CodeGen.Glsl +{ + static class TypeConversion + { + public static string ReinterpretCast( + CodeGenContext context, + IAstNode node, + AggregateType srcType, + AggregateType dstType) + { + if (node is AstOperand operand && operand.Type == OperandType.Constant) + { + if (NumberFormatter.TryFormat(operand.Value, dstType, out string formatted)) + { + return formatted; + } + } + + string expr = InstGen.GetExpression(context, node); + + return ReinterpretCast(expr, node, srcType, dstType); + } + + private static string ReinterpretCast(string expr, IAstNode node, AggregateType srcType, AggregateType dstType) + { + if (srcType == dstType) + { + return expr; + } + + if (srcType == AggregateType.FP32) + { + switch (dstType) + { + case AggregateType.Bool: return $"(floatBitsToInt({expr}) != 0)"; + case AggregateType.S32: return $"floatBitsToInt({expr})"; + case AggregateType.U32: return $"floatBitsToUint({expr})"; + } + } + else if (dstType == AggregateType.FP32) + { + switch (srcType) + { + case AggregateType.Bool: return $"intBitsToFloat({ReinterpretBoolToInt(expr, node, AggregateType.S32)})"; + case AggregateType.S32: return $"intBitsToFloat({expr})"; + case AggregateType.U32: return $"uintBitsToFloat({expr})"; + } + } + else if (srcType == AggregateType.Bool) + { + return ReinterpretBoolToInt(expr, node, dstType); + } + else if (dstType == AggregateType.Bool) + { + expr = InstGenHelper.Enclose(expr, node, Instruction.CompareNotEqual, isLhs: true); + + return $"({expr} != 0)"; + } + else if (dstType == AggregateType.S32) + { + return $"int({expr})"; + } + else if (dstType == AggregateType.U32) + { + return $"uint({expr})"; + } + + throw new ArgumentException($"Invalid reinterpret cast from \"{srcType}\" to \"{dstType}\"."); + } + + private static string ReinterpretBoolToInt(string expr, IAstNode node, AggregateType dstType) + { + string trueExpr = NumberFormatter.FormatInt(IrConsts.True, dstType); + string falseExpr = NumberFormatter.FormatInt(IrConsts.False, dstType); + + expr = InstGenHelper.Enclose(expr, node, Instruction.ConditionalSelect, isLhs: false); + + return $"({expr} ? {trueExpr} : {falseExpr})"; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs new file mode 100644 index 00000000..ed292ef1 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/CodeGenContext.cs @@ -0,0 +1,409 @@ +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; +using System; +using System.Collections.Generic; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using IrConsts = IntermediateRepresentation.IrConsts; + using IrOperandType = IntermediateRepresentation.OperandType; + + partial class CodeGenContext : Module + { + private const uint SpirvVersionMajor = 1; + private const uint SpirvVersionMinor = 3; + private const uint SpirvVersionRevision = 0; + private const uint SpirvVersionPacked = (SpirvVersionMajor << 16) | (SpirvVersionMinor << 8) | SpirvVersionRevision; + + public StructuredProgramInfo Info { get; } + + public ShaderConfig Config { get; } + + public int InputVertices { get; } + + public Dictionary<int, Instruction> UniformBuffers { get; } = new Dictionary<int, Instruction>(); + public Instruction SupportBuffer { get; set; } + public Instruction UniformBuffersArray { get; set; } + public Instruction StorageBuffersArray { get; set; } + public Instruction LocalMemory { get; set; } + public Instruction SharedMemory { get; set; } + public Dictionary<TextureMeta, SamplerType> SamplersTypes { get; } = new Dictionary<TextureMeta, SamplerType>(); + public Dictionary<TextureMeta, (Instruction, Instruction, Instruction)> Samplers { get; } = new Dictionary<TextureMeta, (Instruction, Instruction, Instruction)>(); + public Dictionary<TextureMeta, (Instruction, Instruction)> Images { get; } = new Dictionary<TextureMeta, (Instruction, Instruction)>(); + public Dictionary<IoDefinition, Instruction> Inputs { get; } = new Dictionary<IoDefinition, Instruction>(); + public Dictionary<IoDefinition, Instruction> Outputs { get; } = new Dictionary<IoDefinition, Instruction>(); + public Dictionary<IoDefinition, Instruction> InputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>(); + public Dictionary<IoDefinition, Instruction> OutputsPerPatch { get; } = new Dictionary<IoDefinition, Instruction>(); + + public Instruction CoordTemp { get; set; } + private readonly Dictionary<AstOperand, Instruction> _locals = new Dictionary<AstOperand, Instruction>(); + private readonly Dictionary<int, Instruction[]> _localForArgs = new Dictionary<int, Instruction[]>(); + private readonly Dictionary<int, Instruction> _funcArgs = new Dictionary<int, Instruction>(); + private readonly Dictionary<int, (StructuredFunction, Instruction)> _functions = new Dictionary<int, (StructuredFunction, Instruction)>(); + + private class BlockState + { + private int _entryCount; + private readonly List<Instruction> _labels = new List<Instruction>(); + + public Instruction GetNextLabel(CodeGenContext context) + { + return GetLabel(context, _entryCount); + } + + public Instruction GetNextLabelAutoIncrement(CodeGenContext context) + { + return GetLabel(context, _entryCount++); + } + + public Instruction GetLabel(CodeGenContext context, int index) + { + while (index >= _labels.Count) + { + _labels.Add(context.Label()); + } + + return _labels[index]; + } + } + + private readonly Dictionary<AstBlock, BlockState> _labels = new Dictionary<AstBlock, BlockState>(); + + public Dictionary<AstBlock, (Instruction, Instruction)> LoopTargets { get; set; } + + public AstBlock CurrentBlock { get; private set; } + + public SpirvDelegates Delegates { get; } + + public CodeGenContext( + StructuredProgramInfo info, + ShaderConfig config, + GeneratorPool<Instruction> instPool, + GeneratorPool<LiteralInteger> integerPool) : base(SpirvVersionPacked, instPool, integerPool) + { + Info = info; + Config = config; + + if (config.Stage == ShaderStage.Geometry) + { + InputTopology inPrimitive = config.GpuAccessor.QueryPrimitiveTopology(); + + InputVertices = inPrimitive switch + { + InputTopology.Points => 1, + InputTopology.Lines => 2, + InputTopology.LinesAdjacency => 2, + InputTopology.Triangles => 3, + InputTopology.TrianglesAdjacency => 3, + _ => throw new InvalidOperationException($"Invalid input topology \"{inPrimitive}\".") + }; + } + + AddCapability(Capability.Shader); + AddCapability(Capability.Float64); + + SetMemoryModel(AddressingModel.Logical, MemoryModel.GLSL450); + + Delegates = new SpirvDelegates(this); + } + + public void StartFunction() + { + _locals.Clear(); + _localForArgs.Clear(); + _funcArgs.Clear(); + } + + public void EnterBlock(AstBlock block) + { + CurrentBlock = block; + AddLabel(GetBlockStateLazy(block).GetNextLabelAutoIncrement(this)); + } + + public Instruction GetFirstLabel(AstBlock block) + { + return GetBlockStateLazy(block).GetLabel(this, 0); + } + + public Instruction GetNextLabel(AstBlock block) + { + return GetBlockStateLazy(block).GetNextLabel(this); + } + + private BlockState GetBlockStateLazy(AstBlock block) + { + if (!_labels.TryGetValue(block, out var blockState)) + { + blockState = new BlockState(); + + _labels.Add(block, blockState); + } + + return blockState; + } + + public Instruction NewBlock() + { + var label = Label(); + Branch(label); + AddLabel(label); + return label; + } + + public Instruction[] GetMainInterface() + { + var mainInterface = new List<Instruction>(); + + mainInterface.AddRange(Inputs.Values); + mainInterface.AddRange(Outputs.Values); + mainInterface.AddRange(InputsPerPatch.Values); + mainInterface.AddRange(OutputsPerPatch.Values); + + return mainInterface.ToArray(); + } + + public void DeclareLocal(AstOperand local, Instruction spvLocal) + { + _locals.Add(local, spvLocal); + } + + public void DeclareLocalForArgs(int funcIndex, Instruction[] spvLocals) + { + _localForArgs.Add(funcIndex, spvLocals); + } + + public void DeclareArgument(int argIndex, Instruction spvLocal) + { + _funcArgs.Add(argIndex, spvLocal); + } + + public void DeclareFunction(int funcIndex, StructuredFunction function, Instruction spvFunc) + { + _functions.Add(funcIndex, (function, spvFunc)); + } + + public Instruction GetFP32(IAstNode node) + { + return Get(AggregateType.FP32, node); + } + + public Instruction GetFP64(IAstNode node) + { + return Get(AggregateType.FP64, node); + } + + public Instruction GetS32(IAstNode node) + { + return Get(AggregateType.S32, node); + } + + public Instruction GetU32(IAstNode node) + { + return Get(AggregateType.U32, node); + } + + public Instruction Get(AggregateType type, IAstNode node) + { + if (node is AstOperation operation) + { + var opResult = Instructions.Generate(this, operation); + return BitcastIfNeeded(type, opResult.Type, opResult.Value); + } + else if (node is AstOperand operand) + { + return operand.Type switch + { + IrOperandType.Argument => GetArgument(type, operand), + IrOperandType.Constant => GetConstant(type, operand), + IrOperandType.ConstantBuffer => GetConstantBuffer(type, operand), + IrOperandType.LocalVariable => GetLocal(type, operand), + IrOperandType.Undefined => GetUndefined(type), + _ => throw new ArgumentException($"Invalid operand type \"{operand.Type}\".") + }; + } + + throw new NotImplementedException(node.GetType().Name); + } + + public Instruction GetWithType(IAstNode node, out AggregateType type) + { + if (node is AstOperation operation) + { + var opResult = Instructions.Generate(this, operation); + type = opResult.Type; + return opResult.Value; + } + else if (node is AstOperand operand) + { + switch (operand.Type) + { + case IrOperandType.LocalVariable: + type = operand.VarType; + return GetLocal(type, operand); + default: + throw new ArgumentException($"Invalid operand type \"{operand.Type}\"."); + } + } + + throw new NotImplementedException(node.GetType().Name); + } + + private Instruction GetUndefined(AggregateType type) + { + return type switch + { + AggregateType.Bool => ConstantFalse(TypeBool()), + AggregateType.FP32 => Constant(TypeFP32(), 0f), + AggregateType.FP64 => Constant(TypeFP64(), 0d), + _ => Constant(GetType(type), 0) + }; + } + + public Instruction GetConstant(AggregateType type, AstOperand operand) + { + return type switch + { + AggregateType.Bool => operand.Value != 0 ? ConstantTrue(TypeBool()) : ConstantFalse(TypeBool()), + AggregateType.FP32 => Constant(TypeFP32(), BitConverter.Int32BitsToSingle(operand.Value)), + AggregateType.FP64 => Constant(TypeFP64(), (double)BitConverter.Int32BitsToSingle(operand.Value)), + AggregateType.S32 => Constant(TypeS32(), operand.Value), + AggregateType.U32 => Constant(TypeU32(), (uint)operand.Value), + _ => throw new ArgumentException($"Invalid type \"{type}\".") + }; + } + + public Instruction GetConstantBuffer(AggregateType type, AstOperand operand) + { + var i1 = Constant(TypeS32(), 0); + var i2 = Constant(TypeS32(), operand.CbufOffset >> 2); + var i3 = Constant(TypeU32(), operand.CbufOffset & 3); + + Instruction elemPointer; + + if (UniformBuffersArray != null) + { + var ubVariable = UniformBuffersArray; + var i0 = Constant(TypeS32(), operand.CbufSlot); + + elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i0, i1, i2, i3); + } + else + { + var ubVariable = UniformBuffers[operand.CbufSlot]; + + elemPointer = AccessChain(TypePointer(StorageClass.Uniform, TypeFP32()), ubVariable, i1, i2, i3); + } + + return BitcastIfNeeded(type, AggregateType.FP32, Load(TypeFP32(), elemPointer)); + } + + public Instruction GetLocalPointer(AstOperand local) + { + return _locals[local]; + } + + public Instruction[] GetLocalForArgsPointers(int funcIndex) + { + return _localForArgs[funcIndex]; + } + + public Instruction GetArgumentPointer(AstOperand funcArg) + { + return _funcArgs[funcArg.Value]; + } + + public Instruction GetLocal(AggregateType dstType, AstOperand local) + { + var srcType = local.VarType; + return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetLocalPointer(local))); + } + + public Instruction GetArgument(AggregateType dstType, AstOperand funcArg) + { + var srcType = funcArg.VarType; + return BitcastIfNeeded(dstType, srcType, Load(GetType(srcType), GetArgumentPointer(funcArg))); + } + + public (StructuredFunction, Instruction) GetFunction(int funcIndex) + { + return _functions[funcIndex]; + } + + public Instruction GetType(AggregateType type, int length = 1) + { + if ((type & AggregateType.Array) != 0) + { + return TypeArray(GetType(type & ~AggregateType.Array), Constant(TypeU32(), length)); + } + else if ((type & AggregateType.ElementCountMask) != 0) + { + int vectorLength = (type & AggregateType.ElementCountMask) switch + { + AggregateType.Vector2 => 2, + AggregateType.Vector3 => 3, + AggregateType.Vector4 => 4, + _ => 1 + }; + + return TypeVector(GetType(type & ~AggregateType.ElementCountMask), vectorLength); + } + + return type switch + { + AggregateType.Void => TypeVoid(), + AggregateType.Bool => TypeBool(), + AggregateType.FP32 => TypeFP32(), + AggregateType.FP64 => TypeFP64(), + AggregateType.S32 => TypeS32(), + AggregateType.U32 => TypeU32(), + _ => throw new ArgumentException($"Invalid attribute type \"{type}\".") + }; + } + + public Instruction BitcastIfNeeded(AggregateType dstType, AggregateType srcType, Instruction value) + { + if (dstType == srcType) + { + return value; + } + + if (dstType == AggregateType.Bool) + { + return INotEqual(TypeBool(), BitcastIfNeeded(AggregateType.S32, srcType, value), Constant(TypeS32(), 0)); + } + else if (srcType == AggregateType.Bool) + { + var intTrue = Constant(TypeS32(), IrConsts.True); + var intFalse = Constant(TypeS32(), IrConsts.False); + + return BitcastIfNeeded(dstType, AggregateType.S32, Select(TypeS32(), value, intTrue, intFalse)); + } + else + { + return Bitcast(GetType(dstType, 1), value); + } + } + + public Instruction TypeS32() + { + return TypeInt(32, true); + } + + public Instruction TypeU32() + { + return TypeInt(32, false); + } + + public Instruction TypeFP32() + { + return TypeFloat(32); + } + + public Instruction TypeFP64() + { + return TypeFloat(64); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs new file mode 100644 index 00000000..821da477 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Declarations.cs @@ -0,0 +1,615 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Numerics; +using static Spv.Specification; +using SpvInstruction = Spv.Generator.Instruction; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + static class Declarations + { + private static readonly string[] StagePrefixes = new string[] { "cp", "vp", "tcp", "tep", "gp", "fp" }; + + public static void DeclareParameters(CodeGenContext context, StructuredFunction function) + { + DeclareParameters(context, function.InArguments, 0); + DeclareParameters(context, function.OutArguments, function.InArguments.Length); + } + + private static void DeclareParameters(CodeGenContext context, IEnumerable<AggregateType> argTypes, int argIndex) + { + foreach (var argType in argTypes) + { + var argPointerType = context.TypePointer(StorageClass.Function, context.GetType(argType)); + var spvArg = context.FunctionParameter(argPointerType); + + context.DeclareArgument(argIndex++, spvArg); + } + } + + public static void DeclareLocals(CodeGenContext context, StructuredFunction function) + { + foreach (AstOperand local in function.Locals) + { + var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(local.VarType)); + var spvLocal = context.Variable(localPointerType, StorageClass.Function); + + context.AddLocalVariable(spvLocal); + context.DeclareLocal(local, spvLocal); + } + + var ivector2Type = context.TypeVector(context.TypeS32(), 2); + var coordTempPointerType = context.TypePointer(StorageClass.Function, ivector2Type); + var coordTemp = context.Variable(coordTempPointerType, StorageClass.Function); + + context.AddLocalVariable(coordTemp); + context.CoordTemp = coordTemp; + } + + public static void DeclareLocalForArgs(CodeGenContext context, List<StructuredFunction> functions) + { + for (int funcIndex = 0; funcIndex < functions.Count; funcIndex++) + { + StructuredFunction function = functions[funcIndex]; + SpvInstruction[] locals = new SpvInstruction[function.InArguments.Length]; + + for (int i = 0; i < function.InArguments.Length; i++) + { + var type = function.GetArgumentType(i); + var localPointerType = context.TypePointer(StorageClass.Function, context.GetType(type)); + var spvLocal = context.Variable(localPointerType, StorageClass.Function); + + context.AddLocalVariable(spvLocal); + + locals[i] = spvLocal; + } + + context.DeclareLocalForArgs(funcIndex, locals); + } + } + + public static void DeclareAll(CodeGenContext context, StructuredProgramInfo info) + { + if (context.Config.Stage == ShaderStage.Compute) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeLocalMemorySize(), 4); + + if (localMemorySize != 0) + { + DeclareLocalMemory(context, localMemorySize); + } + + int sharedMemorySize = BitUtils.DivRoundUp(context.Config.GpuAccessor.QueryComputeSharedMemorySize(), 4); + + if (sharedMemorySize != 0) + { + DeclareSharedMemory(context, sharedMemorySize); + } + } + else if (context.Config.LocalMemorySize != 0) + { + int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4); + DeclareLocalMemory(context, localMemorySize); + } + + DeclareSupportBuffer(context); + DeclareUniformBuffers(context, context.Config.GetConstantBufferDescriptors()); + DeclareStorageBuffers(context, context.Config.GetStorageBufferDescriptors()); + DeclareSamplers(context, context.Config.GetTextureDescriptors()); + DeclareImages(context, context.Config.GetImageDescriptors()); + DeclareInputsAndOutputs(context, info); + } + + private static void DeclareLocalMemory(CodeGenContext context, int size) + { + context.LocalMemory = DeclareMemory(context, StorageClass.Private, size); + } + + private static void DeclareSharedMemory(CodeGenContext context, int size) + { + context.SharedMemory = DeclareMemory(context, StorageClass.Workgroup, size); + } + + private static SpvInstruction DeclareMemory(CodeGenContext context, StorageClass storage, int size) + { + var arrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), size)); + var pointerType = context.TypePointer(storage, arrayType); + var variable = context.Variable(pointerType, storage); + + context.AddGlobalVariable(variable); + + return variable; + } + + private static void DeclareSupportBuffer(CodeGenContext context) + { + if (!context.Config.Stage.SupportsRenderScale() && !(context.Config.LastInVertexPipeline && context.Config.GpuAccessor.QueryViewportTransformDisable())) + { + return; + } + + var isBgraArrayType = context.TypeArray(context.TypeU32(), context.Constant(context.TypeU32(), SupportBuffer.FragmentIsBgraCount)); + var viewportInverseVectorType = context.TypeVector(context.TypeFP32(), 4); + var renderScaleArrayType = context.TypeArray(context.TypeFP32(), context.Constant(context.TypeU32(), SupportBuffer.RenderScaleMaxCount)); + + context.Decorate(isBgraArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize); + context.Decorate(renderScaleArrayType, Decoration.ArrayStride, (LiteralInteger)SupportBuffer.FieldSize); + + var supportBufferStructType = context.TypeStruct(false, context.TypeU32(), isBgraArrayType, viewportInverseVectorType, context.TypeS32(), renderScaleArrayType); + + context.MemberDecorate(supportBufferStructType, 0, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentAlphaTestOffset); + context.MemberDecorate(supportBufferStructType, 1, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentIsBgraOffset); + context.MemberDecorate(supportBufferStructType, 2, Decoration.Offset, (LiteralInteger)SupportBuffer.ViewportInverseOffset); + context.MemberDecorate(supportBufferStructType, 3, Decoration.Offset, (LiteralInteger)SupportBuffer.FragmentRenderScaleCountOffset); + context.MemberDecorate(supportBufferStructType, 4, Decoration.Offset, (LiteralInteger)SupportBuffer.GraphicsRenderScaleOffset); + context.Decorate(supportBufferStructType, Decoration.Block); + + var supportBufferPointerType = context.TypePointer(StorageClass.Uniform, supportBufferStructType); + var supportBufferVariable = context.Variable(supportBufferPointerType, StorageClass.Uniform); + + context.Decorate(supportBufferVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(supportBufferVariable, Decoration.Binding, (LiteralInteger)0); + + context.AddGlobalVariable(supportBufferVariable); + + context.SupportBuffer = supportBufferVariable; + } + + private static void DeclareUniformBuffers(CodeGenContext context, BufferDescriptor[] descriptors) + { + if (descriptors.Length == 0) + { + return; + } + + uint ubSize = Constants.ConstantBufferSize / 16; + + var ubArrayType = context.TypeArray(context.TypeVector(context.TypeFP32(), 4), context.Constant(context.TypeU32(), ubSize), true); + context.Decorate(ubArrayType, Decoration.ArrayStride, (LiteralInteger)16); + var ubStructType = context.TypeStruct(true, ubArrayType); + context.Decorate(ubStructType, Decoration.Block); + context.MemberDecorate(ubStructType, 0, Decoration.Offset, (LiteralInteger)0); + + if (context.Config.UsedFeatures.HasFlag(FeatureFlags.CbIndexing)) + { + int count = descriptors.Max(x => x.Slot) + 1; + + var ubStructArrayType = context.TypeArray(ubStructType, context.Constant(context.TypeU32(), count)); + var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructArrayType); + var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform); + + context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_u"); + context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstConstantBufferBinding); + context.AddGlobalVariable(ubVariable); + + context.UniformBuffersArray = ubVariable; + } + else + { + var ubPointerType = context.TypePointer(StorageClass.Uniform, ubStructType); + + foreach (var descriptor in descriptors) + { + var ubVariable = context.Variable(ubPointerType, StorageClass.Uniform); + + context.Name(ubVariable, $"{GetStagePrefix(context.Config.Stage)}_c{descriptor.Slot}"); + context.Decorate(ubVariable, Decoration.DescriptorSet, (LiteralInteger)0); + context.Decorate(ubVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + context.AddGlobalVariable(ubVariable); + context.UniformBuffers.Add(descriptor.Slot, ubVariable); + } + } + } + + private static void DeclareStorageBuffers(CodeGenContext context, BufferDescriptor[] descriptors) + { + if (descriptors.Length == 0) + { + return; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 1 : 0; + int count = descriptors.Max(x => x.Slot) + 1; + + var sbArrayType = context.TypeRuntimeArray(context.TypeU32()); + context.Decorate(sbArrayType, Decoration.ArrayStride, (LiteralInteger)4); + var sbStructType = context.TypeStruct(true, sbArrayType); + context.Decorate(sbStructType, Decoration.BufferBlock); + context.MemberDecorate(sbStructType, 0, Decoration.Offset, (LiteralInteger)0); + var sbStructArrayType = context.TypeArray(sbStructType, context.Constant(context.TypeU32(), count)); + var sbPointerType = context.TypePointer(StorageClass.Uniform, sbStructArrayType); + var sbVariable = context.Variable(sbPointerType, StorageClass.Uniform); + + context.Name(sbVariable, $"{GetStagePrefix(context.Config.Stage)}_s"); + context.Decorate(sbVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(sbVariable, Decoration.Binding, (LiteralInteger)context.Config.FirstStorageBufferBinding); + context.AddGlobalVariable(sbVariable); + + context.StorageBuffersArray = sbVariable; + } + + private static void DeclareSamplers(CodeGenContext context, TextureDescriptor[] descriptors) + { + foreach (var descriptor in descriptors) + { + var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format); + + if (context.Samplers.ContainsKey(meta)) + { + continue; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 2 : 0; + + var dim = (descriptor.Type & SamplerType.Mask) switch + { + SamplerType.Texture1D => Dim.Dim1D, + SamplerType.Texture2D => Dim.Dim2D, + SamplerType.Texture3D => Dim.Dim3D, + SamplerType.TextureCube => Dim.Cube, + SamplerType.TextureBuffer => Dim.Buffer, + _ => throw new InvalidOperationException($"Invalid sampler type \"{descriptor.Type & SamplerType.Mask}\".") + }; + + var imageType = context.TypeImage( + context.TypeFP32(), + dim, + descriptor.Type.HasFlag(SamplerType.Shadow), + descriptor.Type.HasFlag(SamplerType.Array), + descriptor.Type.HasFlag(SamplerType.Multisample), + 1, + ImageFormat.Unknown); + + var nameSuffix = meta.CbufSlot < 0 ? $"_tcb_{meta.Handle:X}" : $"_cb{meta.CbufSlot}_{meta.Handle:X}"; + + var sampledImageType = context.TypeSampledImage(imageType); + var sampledImagePointerType = context.TypePointer(StorageClass.UniformConstant, sampledImageType); + var sampledImageVariable = context.Variable(sampledImagePointerType, StorageClass.UniformConstant); + + context.Samplers.Add(meta, (imageType, sampledImageType, sampledImageVariable)); + context.SamplersTypes.Add(meta, descriptor.Type); + + context.Name(sampledImageVariable, $"{GetStagePrefix(context.Config.Stage)}_tex{nameSuffix}"); + context.Decorate(sampledImageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(sampledImageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + context.AddGlobalVariable(sampledImageVariable); + } + } + + private static void DeclareImages(CodeGenContext context, TextureDescriptor[] descriptors) + { + foreach (var descriptor in descriptors) + { + var meta = new TextureMeta(descriptor.CbufSlot, descriptor.HandleIndex, descriptor.Format); + + if (context.Images.ContainsKey(meta)) + { + continue; + } + + int setIndex = context.Config.Options.TargetApi == TargetApi.Vulkan ? 3 : 0; + + var dim = GetDim(descriptor.Type); + + var imageType = context.TypeImage( + context.GetType(meta.Format.GetComponentType()), + dim, + descriptor.Type.HasFlag(SamplerType.Shadow), + descriptor.Type.HasFlag(SamplerType.Array), + descriptor.Type.HasFlag(SamplerType.Multisample), + AccessQualifier.ReadWrite, + GetImageFormat(meta.Format)); + + var nameSuffix = meta.CbufSlot < 0 ? + $"_tcb_{meta.Handle:X}_{meta.Format.ToGlslFormat()}" : + $"_cb{meta.CbufSlot}_{meta.Handle:X}_{meta.Format.ToGlslFormat()}"; + + var imagePointerType = context.TypePointer(StorageClass.UniformConstant, imageType); + var imageVariable = context.Variable(imagePointerType, StorageClass.UniformConstant); + + context.Images.Add(meta, (imageType, imageVariable)); + + context.Name(imageVariable, $"{GetStagePrefix(context.Config.Stage)}_img{nameSuffix}"); + context.Decorate(imageVariable, Decoration.DescriptorSet, (LiteralInteger)setIndex); + context.Decorate(imageVariable, Decoration.Binding, (LiteralInteger)descriptor.Binding); + + if (descriptor.Flags.HasFlag(TextureUsageFlags.ImageCoherent)) + { + context.Decorate(imageVariable, Decoration.Coherent); + } + + context.AddGlobalVariable(imageVariable); + } + } + + private static Dim GetDim(SamplerType type) + { + return (type & SamplerType.Mask) switch + { + SamplerType.Texture1D => Dim.Dim1D, + SamplerType.Texture2D => Dim.Dim2D, + SamplerType.Texture3D => Dim.Dim3D, + SamplerType.TextureCube => Dim.Cube, + SamplerType.TextureBuffer => Dim.Buffer, + _ => throw new ArgumentException($"Invalid sampler type \"{type & SamplerType.Mask}\".") + }; + } + + private static ImageFormat GetImageFormat(TextureFormat format) + { + return format switch + { + TextureFormat.Unknown => ImageFormat.Unknown, + TextureFormat.R8Unorm => ImageFormat.R8, + TextureFormat.R8Snorm => ImageFormat.R8Snorm, + TextureFormat.R8Uint => ImageFormat.R8ui, + TextureFormat.R8Sint => ImageFormat.R8i, + TextureFormat.R16Float => ImageFormat.R16f, + TextureFormat.R16Unorm => ImageFormat.R16, + TextureFormat.R16Snorm => ImageFormat.R16Snorm, + TextureFormat.R16Uint => ImageFormat.R16ui, + TextureFormat.R16Sint => ImageFormat.R16i, + TextureFormat.R32Float => ImageFormat.R32f, + TextureFormat.R32Uint => ImageFormat.R32ui, + TextureFormat.R32Sint => ImageFormat.R32i, + TextureFormat.R8G8Unorm => ImageFormat.Rg8, + TextureFormat.R8G8Snorm => ImageFormat.Rg8Snorm, + TextureFormat.R8G8Uint => ImageFormat.Rg8ui, + TextureFormat.R8G8Sint => ImageFormat.Rg8i, + TextureFormat.R16G16Float => ImageFormat.Rg16f, + TextureFormat.R16G16Unorm => ImageFormat.Rg16, + TextureFormat.R16G16Snorm => ImageFormat.Rg16Snorm, + TextureFormat.R16G16Uint => ImageFormat.Rg16ui, + TextureFormat.R16G16Sint => ImageFormat.Rg16i, + TextureFormat.R32G32Float => ImageFormat.Rg32f, + TextureFormat.R32G32Uint => ImageFormat.Rg32ui, + TextureFormat.R32G32Sint => ImageFormat.Rg32i, + TextureFormat.R8G8B8A8Unorm => ImageFormat.Rgba8, + TextureFormat.R8G8B8A8Snorm => ImageFormat.Rgba8Snorm, + TextureFormat.R8G8B8A8Uint => ImageFormat.Rgba8ui, + TextureFormat.R8G8B8A8Sint => ImageFormat.Rgba8i, + TextureFormat.R16G16B16A16Float => ImageFormat.Rgba16f, + TextureFormat.R16G16B16A16Unorm => ImageFormat.Rgba16, + TextureFormat.R16G16B16A16Snorm => ImageFormat.Rgba16Snorm, + TextureFormat.R16G16B16A16Uint => ImageFormat.Rgba16ui, + TextureFormat.R16G16B16A16Sint => ImageFormat.Rgba16i, + TextureFormat.R32G32B32A32Float => ImageFormat.Rgba32f, + TextureFormat.R32G32B32A32Uint => ImageFormat.Rgba32ui, + TextureFormat.R32G32B32A32Sint => ImageFormat.Rgba32i, + TextureFormat.R10G10B10A2Unorm => ImageFormat.Rgb10A2, + TextureFormat.R10G10B10A2Uint => ImageFormat.Rgb10a2ui, + TextureFormat.R11G11B10Float => ImageFormat.R11fG11fB10f, + _ => throw new ArgumentException($"Invalid texture format \"{format}\".") + }; + } + + private static void DeclareInputsAndOutputs(CodeGenContext context, StructuredProgramInfo info) + { + foreach (var ioDefinition in info.IoDefinitions) + { + var ioVariable = ioDefinition.IoVariable; + + // Those are actually from constant buffer, rather than being actual inputs or outputs, + // so we must ignore them here as they are declared as part of the support buffer. + // TODO: Delete this after we represent this properly on the IR (as a constant buffer rather than "input"). + if (ioVariable == IoVariable.FragmentOutputIsBgra || + ioVariable == IoVariable.SupportBlockRenderScale || + ioVariable == IoVariable.SupportBlockViewInverse) + { + continue; + } + + bool isOutput = ioDefinition.StorageKind.IsOutput(); + bool isPerPatch = ioDefinition.StorageKind.IsPerPatch(); + + PixelImap iq = PixelImap.Unused; + + if (context.Config.Stage == ShaderStage.Fragment) + { + if (ioVariable == IoVariable.UserDefined) + { + iq = context.Config.ImapTypes[ioDefinition.Location].GetFirstUsedType(); + } + else + { + (_, AggregateType varType) = IoMap.GetSpirvBuiltIn(ioVariable); + AggregateType elemType = varType & AggregateType.ElementTypeMask; + + if (elemType == AggregateType.S32 || elemType == AggregateType.U32) + { + iq = PixelImap.Constant; + } + } + } + + DeclareInputOrOutput(context, ioDefinition, isOutput, isPerPatch, iq); + } + } + + private static void DeclareInputOrOutput(CodeGenContext context, IoDefinition ioDefinition, bool isOutput, bool isPerPatch, PixelImap iq = PixelImap.Unused) + { + IoVariable ioVariable = ioDefinition.IoVariable; + var storageClass = isOutput ? StorageClass.Output : StorageClass.Input; + + bool isBuiltIn; + BuiltIn builtIn = default; + AggregateType varType; + + if (ioVariable == IoVariable.UserDefined) + { + varType = context.Config.GetUserDefinedType(ioDefinition.Location, isOutput); + isBuiltIn = false; + } + else if (ioVariable == IoVariable.FragmentOutputColor) + { + varType = context.Config.GetFragmentOutputColorType(ioDefinition.Location); + isBuiltIn = false; + } + else + { + (builtIn, varType) = IoMap.GetSpirvBuiltIn(ioVariable); + isBuiltIn = true; + + if (varType == AggregateType.Invalid) + { + throw new InvalidOperationException($"Unknown variable {ioVariable}."); + } + } + + bool hasComponent = context.Config.HasPerLocationInputOrOutputComponent(ioVariable, ioDefinition.Location, ioDefinition.Component, isOutput); + + if (hasComponent) + { + varType &= AggregateType.ElementTypeMask; + } + else if (ioVariable == IoVariable.UserDefined && context.Config.HasTransformFeedbackOutputs(isOutput)) + { + varType &= AggregateType.ElementTypeMask; + varType |= context.Config.GetTransformFeedbackOutputComponents(ioDefinition.Location, ioDefinition.Component) switch + { + 2 => AggregateType.Vector2, + 3 => AggregateType.Vector3, + 4 => AggregateType.Vector4, + _ => AggregateType.Invalid + }; + } + + var spvType = context.GetType(varType, IoMap.GetSpirvBuiltInArrayLength(ioVariable)); + bool builtInPassthrough = false; + + if (!isPerPatch && IoMap.IsPerVertex(ioVariable, context.Config.Stage, isOutput)) + { + int arraySize = context.Config.Stage == ShaderStage.Geometry ? context.InputVertices : 32; + spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), (LiteralInteger)arraySize)); + + if (context.Config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + builtInPassthrough = true; + } + } + + if (context.Config.Stage == ShaderStage.TessellationControl && isOutput && !isPerPatch) + { + spvType = context.TypeArray(spvType, context.Constant(context.TypeU32(), context.Config.ThreadsPerInputPrimitive)); + } + + var spvPointerType = context.TypePointer(storageClass, spvType); + var spvVar = context.Variable(spvPointerType, storageClass); + + if (builtInPassthrough) + { + context.Decorate(spvVar, Decoration.PassthroughNV); + } + + if (isBuiltIn) + { + if (isPerPatch) + { + context.Decorate(spvVar, Decoration.Patch); + } + + if (context.Config.GpuAccessor.QueryHostReducedPrecision() && ioVariable == IoVariable.Position) + { + context.Decorate(spvVar, Decoration.Invariant); + } + + context.Decorate(spvVar, Decoration.BuiltIn, (LiteralInteger)builtIn); + } + else if (isPerPatch) + { + context.Decorate(spvVar, Decoration.Patch); + + if (ioVariable == IoVariable.UserDefined) + { + int location = context.Config.GetPerPatchAttributeLocation(ioDefinition.Location); + + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + } + } + else if (ioVariable == IoVariable.UserDefined) + { + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)ioDefinition.Location); + + if (hasComponent) + { + context.Decorate(spvVar, Decoration.Component, (LiteralInteger)ioDefinition.Component); + } + + if (!isOutput && + !isPerPatch && + (context.Config.PassthroughAttributes & (1 << ioDefinition.Location)) != 0 && + context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.Decorate(spvVar, Decoration.PassthroughNV); + } + } + else if (ioVariable == IoVariable.FragmentOutputColor) + { + int location = ioDefinition.Location; + + if (context.Config.Stage == ShaderStage.Fragment && context.Config.GpuAccessor.QueryDualSourceBlendEnable()) + { + int firstLocation = BitOperations.TrailingZeroCount(context.Config.UsedOutputAttributes); + int index = location - firstLocation; + int mask = 3 << firstLocation; + + if ((uint)index < 2 && (context.Config.UsedOutputAttributes & mask) == mask) + { + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)firstLocation); + context.Decorate(spvVar, Decoration.Index, (LiteralInteger)index); + } + else + { + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + } + } + else + { + context.Decorate(spvVar, Decoration.Location, (LiteralInteger)location); + } + } + + if (!isOutput) + { + switch (iq) + { + case PixelImap.Constant: + context.Decorate(spvVar, Decoration.Flat); + break; + case PixelImap.ScreenLinear: + context.Decorate(spvVar, Decoration.NoPerspective); + break; + } + } + else if (context.Config.TryGetTransformFeedbackOutput( + ioVariable, + ioDefinition.Location, + ioDefinition.Component, + out var transformFeedbackOutput)) + { + context.Decorate(spvVar, Decoration.XfbBuffer, (LiteralInteger)transformFeedbackOutput.Buffer); + context.Decorate(spvVar, Decoration.XfbStride, (LiteralInteger)transformFeedbackOutput.Stride); + context.Decorate(spvVar, Decoration.Offset, (LiteralInteger)transformFeedbackOutput.Offset); + } + + context.AddGlobalVariable(spvVar); + + var dict = isPerPatch + ? (isOutput ? context.OutputsPerPatch : context.InputsPerPatch) + : (isOutput ? context.Outputs : context.Inputs); + dict.Add(ioDefinition, spvVar); + } + + private static string GetStagePrefix(ShaderStage stage) + { + return StagePrefixes[(int)stage]; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs new file mode 100644 index 00000000..72541774 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/EnumConversion.cs @@ -0,0 +1,22 @@ +using System; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + static class EnumConversion + { + public static ExecutionModel Convert(this ShaderStage stage) + { + return stage switch + { + ShaderStage.Compute => ExecutionModel.GLCompute, + ShaderStage.Vertex => ExecutionModel.Vertex, + ShaderStage.TessellationControl => ExecutionModel.TessellationControl, + ShaderStage.TessellationEvaluation => ExecutionModel.TessellationEvaluation, + ShaderStage.Geometry => ExecutionModel.Geometry, + ShaderStage.Fragment => ExecutionModel.Fragment, + _ => throw new ArgumentException($"Invalid shader stage \"{stage}\".") + }; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs new file mode 100644 index 00000000..b6ffdb7a --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/Instructions.cs @@ -0,0 +1,2480 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Numerics; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + using SpvLiteralInteger = Spv.Generator.LiteralInteger; + + static class Instructions + { + private const MemorySemanticsMask DefaultMemorySemantics = + MemorySemanticsMask.ImageMemory | + MemorySemanticsMask.AtomicCounterMemory | + MemorySemanticsMask.WorkgroupMemory | + MemorySemanticsMask.UniformMemory | + MemorySemanticsMask.AcquireRelease; + + private static readonly Func<CodeGenContext, AstOperation, OperationResult>[] InstTable; + + static Instructions() + { + InstTable = new Func<CodeGenContext, AstOperation, OperationResult>[(int)Instruction.Count]; + + Add(Instruction.Absolute, GenerateAbsolute); + Add(Instruction.Add, GenerateAdd); + Add(Instruction.AtomicAdd, GenerateAtomicAdd); + Add(Instruction.AtomicAnd, GenerateAtomicAnd); + Add(Instruction.AtomicCompareAndSwap, GenerateAtomicCompareAndSwap); + Add(Instruction.AtomicMinS32, GenerateAtomicMinS32); + Add(Instruction.AtomicMinU32, GenerateAtomicMinU32); + Add(Instruction.AtomicMaxS32, GenerateAtomicMaxS32); + Add(Instruction.AtomicMaxU32, GenerateAtomicMaxU32); + Add(Instruction.AtomicOr, GenerateAtomicOr); + Add(Instruction.AtomicSwap, GenerateAtomicSwap); + Add(Instruction.AtomicXor, GenerateAtomicXor); + Add(Instruction.Ballot, GenerateBallot); + Add(Instruction.Barrier, GenerateBarrier); + Add(Instruction.BitCount, GenerateBitCount); + Add(Instruction.BitfieldExtractS32, GenerateBitfieldExtractS32); + Add(Instruction.BitfieldExtractU32, GenerateBitfieldExtractU32); + Add(Instruction.BitfieldInsert, GenerateBitfieldInsert); + Add(Instruction.BitfieldReverse, GenerateBitfieldReverse); + Add(Instruction.BitwiseAnd, GenerateBitwiseAnd); + Add(Instruction.BitwiseExclusiveOr, GenerateBitwiseExclusiveOr); + Add(Instruction.BitwiseNot, GenerateBitwiseNot); + Add(Instruction.BitwiseOr, GenerateBitwiseOr); + Add(Instruction.Call, GenerateCall); + Add(Instruction.Ceiling, GenerateCeiling); + Add(Instruction.Clamp, GenerateClamp); + Add(Instruction.ClampU32, GenerateClampU32); + Add(Instruction.Comment, GenerateComment); + Add(Instruction.CompareEqual, GenerateCompareEqual); + Add(Instruction.CompareGreater, GenerateCompareGreater); + Add(Instruction.CompareGreaterOrEqual, GenerateCompareGreaterOrEqual); + Add(Instruction.CompareGreaterOrEqualU32, GenerateCompareGreaterOrEqualU32); + Add(Instruction.CompareGreaterU32, GenerateCompareGreaterU32); + Add(Instruction.CompareLess, GenerateCompareLess); + Add(Instruction.CompareLessOrEqual, GenerateCompareLessOrEqual); + Add(Instruction.CompareLessOrEqualU32, GenerateCompareLessOrEqualU32); + Add(Instruction.CompareLessU32, GenerateCompareLessU32); + Add(Instruction.CompareNotEqual, GenerateCompareNotEqual); + Add(Instruction.ConditionalSelect, GenerateConditionalSelect); + Add(Instruction.ConvertFP32ToFP64, GenerateConvertFP32ToFP64); + Add(Instruction.ConvertFP32ToS32, GenerateConvertFP32ToS32); + Add(Instruction.ConvertFP32ToU32, GenerateConvertFP32ToU32); + Add(Instruction.ConvertFP64ToFP32, GenerateConvertFP64ToFP32); + Add(Instruction.ConvertFP64ToS32, GenerateConvertFP64ToS32); + Add(Instruction.ConvertFP64ToU32, GenerateConvertFP64ToU32); + Add(Instruction.ConvertS32ToFP32, GenerateConvertS32ToFP32); + Add(Instruction.ConvertS32ToFP64, GenerateConvertS32ToFP64); + Add(Instruction.ConvertU32ToFP32, GenerateConvertU32ToFP32); + Add(Instruction.ConvertU32ToFP64, GenerateConvertU32ToFP64); + Add(Instruction.Cosine, GenerateCosine); + Add(Instruction.Ddx, GenerateDdx); + Add(Instruction.Ddy, GenerateDdy); + Add(Instruction.Discard, GenerateDiscard); + Add(Instruction.Divide, GenerateDivide); + Add(Instruction.EmitVertex, GenerateEmitVertex); + Add(Instruction.EndPrimitive, GenerateEndPrimitive); + Add(Instruction.ExponentB2, GenerateExponentB2); + Add(Instruction.FSIBegin, GenerateFSIBegin); + Add(Instruction.FSIEnd, GenerateFSIEnd); + Add(Instruction.FindLSB, GenerateFindLSB); + Add(Instruction.FindMSBS32, GenerateFindMSBS32); + Add(Instruction.FindMSBU32, GenerateFindMSBU32); + Add(Instruction.Floor, GenerateFloor); + Add(Instruction.FusedMultiplyAdd, GenerateFusedMultiplyAdd); + Add(Instruction.GroupMemoryBarrier, GenerateGroupMemoryBarrier); + Add(Instruction.ImageAtomic, GenerateImageAtomic); + Add(Instruction.ImageLoad, GenerateImageLoad); + Add(Instruction.ImageStore, GenerateImageStore); + Add(Instruction.IsNan, GenerateIsNan); + Add(Instruction.Load, GenerateLoad); + Add(Instruction.LoadConstant, GenerateLoadConstant); + Add(Instruction.LoadLocal, GenerateLoadLocal); + Add(Instruction.LoadShared, GenerateLoadShared); + Add(Instruction.LoadStorage, GenerateLoadStorage); + Add(Instruction.Lod, GenerateLod); + Add(Instruction.LogarithmB2, GenerateLogarithmB2); + Add(Instruction.LogicalAnd, GenerateLogicalAnd); + Add(Instruction.LogicalExclusiveOr, GenerateLogicalExclusiveOr); + Add(Instruction.LogicalNot, GenerateLogicalNot); + Add(Instruction.LogicalOr, GenerateLogicalOr); + Add(Instruction.LoopBreak, GenerateLoopBreak); + Add(Instruction.LoopContinue, GenerateLoopContinue); + Add(Instruction.Maximum, GenerateMaximum); + Add(Instruction.MaximumU32, GenerateMaximumU32); + Add(Instruction.MemoryBarrier, GenerateMemoryBarrier); + Add(Instruction.Minimum, GenerateMinimum); + Add(Instruction.MinimumU32, GenerateMinimumU32); + Add(Instruction.Multiply, GenerateMultiply); + Add(Instruction.MultiplyHighS32, GenerateMultiplyHighS32); + Add(Instruction.MultiplyHighU32, GenerateMultiplyHighU32); + Add(Instruction.Negate, GenerateNegate); + Add(Instruction.PackDouble2x32, GeneratePackDouble2x32); + Add(Instruction.PackHalf2x16, GeneratePackHalf2x16); + Add(Instruction.ReciprocalSquareRoot, GenerateReciprocalSquareRoot); + Add(Instruction.Return, GenerateReturn); + Add(Instruction.Round, GenerateRound); + Add(Instruction.ShiftLeft, GenerateShiftLeft); + Add(Instruction.ShiftRightS32, GenerateShiftRightS32); + Add(Instruction.ShiftRightU32, GenerateShiftRightU32); + Add(Instruction.Shuffle, GenerateShuffle); + Add(Instruction.ShuffleDown, GenerateShuffleDown); + Add(Instruction.ShuffleUp, GenerateShuffleUp); + Add(Instruction.ShuffleXor, GenerateShuffleXor); + Add(Instruction.Sine, GenerateSine); + Add(Instruction.SquareRoot, GenerateSquareRoot); + Add(Instruction.Store, GenerateStore); + Add(Instruction.StoreLocal, GenerateStoreLocal); + Add(Instruction.StoreShared, GenerateStoreShared); + Add(Instruction.StoreShared16, GenerateStoreShared16); + Add(Instruction.StoreShared8, GenerateStoreShared8); + Add(Instruction.StoreStorage, GenerateStoreStorage); + Add(Instruction.StoreStorage16, GenerateStoreStorage16); + Add(Instruction.StoreStorage8, GenerateStoreStorage8); + Add(Instruction.Subtract, GenerateSubtract); + Add(Instruction.SwizzleAdd, GenerateSwizzleAdd); + Add(Instruction.TextureSample, GenerateTextureSample); + Add(Instruction.TextureSize, GenerateTextureSize); + Add(Instruction.Truncate, GenerateTruncate); + Add(Instruction.UnpackDouble2x32, GenerateUnpackDouble2x32); + Add(Instruction.UnpackHalf2x16, GenerateUnpackHalf2x16); + Add(Instruction.VectorExtract, GenerateVectorExtract); + Add(Instruction.VoteAll, GenerateVoteAll); + Add(Instruction.VoteAllEqual, GenerateVoteAllEqual); + Add(Instruction.VoteAny, GenerateVoteAny); + } + + private static void Add(Instruction inst, Func<CodeGenContext, AstOperation, OperationResult> handler) + { + InstTable[(int)(inst & Instruction.Mask)] = handler; + } + + public static OperationResult Generate(CodeGenContext context, AstOperation operation) + { + var handler = InstTable[(int)(operation.Inst & Instruction.Mask)]; + if (handler != null) + { + return handler(context, operation); + } + else + { + throw new NotImplementedException(operation.Inst.ToString()); + } + } + + private static OperationResult GenerateAbsolute(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslFAbs, context.Delegates.GlslSAbs); + } + + private static OperationResult GenerateAdd(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FAdd, context.Delegates.IAdd); + } + + private static OperationResult GenerateAtomicAdd(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicIAdd); + } + + private static OperationResult GenerateAtomicAnd(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicAnd); + } + + private static OperationResult GenerateAtomicCompareAndSwap(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryCas(context, operation); + } + + private static OperationResult GenerateAtomicMinS32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicSMin); + } + + private static OperationResult GenerateAtomicMinU32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicUMin); + } + + private static OperationResult GenerateAtomicMaxS32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicSMax); + } + + private static OperationResult GenerateAtomicMaxU32(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicUMax); + } + + private static OperationResult GenerateAtomicOr(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicOr); + } + + private static OperationResult GenerateAtomicSwap(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicExchange); + } + + private static OperationResult GenerateAtomicXor(CodeGenContext context, AstOperation operation) + { + return GenerateAtomicMemoryBinary(context, operation, context.Delegates.AtomicXor); + } + + private static OperationResult GenerateBallot(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + var uvec4Type = context.TypeVector(context.TypeU32(), 4); + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + + var maskVector = context.GroupNonUniformBallot(uvec4Type, execution, context.Get(AggregateType.Bool, source)); + var mask = context.CompositeExtract(context.TypeU32(), maskVector, (SpvLiteralInteger)0); + + return new OperationResult(AggregateType.U32, mask); + } + + private static OperationResult GenerateBarrier(CodeGenContext context, AstOperation operation) + { + context.ControlBarrier( + context.Constant(context.TypeU32(), Scope.Workgroup), + context.Constant(context.TypeU32(), Scope.Workgroup), + context.Constant(context.TypeU32(), MemorySemanticsMask.WorkgroupMemory | MemorySemanticsMask.AcquireRelease)); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateBitCount(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.BitCount); + } + + private static OperationResult GenerateBitfieldExtractS32(CodeGenContext context, AstOperation operation) + { + return GenerateBitfieldExtractS32(context, operation, context.Delegates.BitFieldSExtract); + } + + private static OperationResult GenerateBitfieldExtractU32(CodeGenContext context, AstOperation operation) + { + return GenerateTernaryU32(context, operation, context.Delegates.BitFieldUExtract); + } + + private static OperationResult GenerateBitfieldInsert(CodeGenContext context, AstOperation operation) + { + return GenerateBitfieldInsert(context, operation, context.Delegates.BitFieldInsert); + } + + private static OperationResult GenerateBitfieldReverse(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.BitReverse); + } + + private static OperationResult GenerateBitwiseAnd(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseAnd); + } + + private static OperationResult GenerateBitwiseExclusiveOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseXor); + } + + private static OperationResult GenerateBitwiseNot(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryS32(context, operation, context.Delegates.Not); + } + + private static OperationResult GenerateBitwiseOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.BitwiseOr); + } + + private static OperationResult GenerateCall(CodeGenContext context, AstOperation operation) + { + AstOperand funcId = (AstOperand)operation.GetSource(0); + + Debug.Assert(funcId.Type == OperandType.Constant); + + (var function, var spvFunc) = context.GetFunction(funcId.Value); + + var args = new SpvInstruction[operation.SourcesCount - 1]; + var spvLocals = context.GetLocalForArgsPointers(funcId.Value); + + for (int i = 0; i < args.Length; i++) + { + var operand = (AstOperand)operation.GetSource(i + 1); + if (i >= function.InArguments.Length) + { + args[i] = context.GetLocalPointer(operand); + } + else + { + var type = function.GetArgumentType(i); + var value = context.Get(type, operand); + var spvLocal = spvLocals[i]; + + context.Store(spvLocal, value); + + args[i] = spvLocal; + } + } + + var retType = function.ReturnType; + var result = context.FunctionCall(context.GetType(retType), spvFunc, args); + return new OperationResult(retType, result); + } + + private static OperationResult GenerateCeiling(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslCeil, null); + } + + private static OperationResult GenerateClamp(CodeGenContext context, AstOperation operation) + { + return GenerateTernary(context, operation, context.Delegates.GlslFClamp, context.Delegates.GlslSClamp); + } + + private static OperationResult GenerateClampU32(CodeGenContext context, AstOperation operation) + { + return GenerateTernaryU32(context, operation, context.Delegates.GlslUClamp); + } + + private static OperationResult GenerateComment(CodeGenContext context, AstOperation operation) + { + return OperationResult.Invalid; + } + + private static OperationResult GenerateCompareEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdEqual, context.Delegates.IEqual); + } + + private static OperationResult GenerateCompareGreater(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdGreaterThan, context.Delegates.SGreaterThan); + } + + private static OperationResult GenerateCompareGreaterOrEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdGreaterThanEqual, context.Delegates.SGreaterThanEqual); + } + + private static OperationResult GenerateCompareGreaterOrEqualU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.UGreaterThanEqual); + } + + private static OperationResult GenerateCompareGreaterU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.UGreaterThan); + } + + private static OperationResult GenerateCompareLess(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdLessThan, context.Delegates.SLessThan); + } + + private static OperationResult GenerateCompareLessOrEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdLessThanEqual, context.Delegates.SLessThanEqual); + } + + private static OperationResult GenerateCompareLessOrEqualU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.ULessThanEqual); + } + + private static OperationResult GenerateCompareLessU32(CodeGenContext context, AstOperation operation) + { + return GenerateCompareU32(context, operation, context.Delegates.ULessThan); + } + + private static OperationResult GenerateCompareNotEqual(CodeGenContext context, AstOperation operation) + { + return GenerateCompare(context, operation, context.Delegates.FOrdNotEqual, context.Delegates.INotEqual); + } + + private static OperationResult GenerateConditionalSelect(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + var cond = context.Get(AggregateType.Bool, src1); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + return new OperationResult(AggregateType.FP64, context.Select(context.TypeFP64(), cond, context.GetFP64(src2), context.GetFP64(src3))); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + return new OperationResult(AggregateType.FP32, context.Select(context.TypeFP32(), cond, context.GetFP32(src2), context.GetFP32(src3))); + } + else + { + return new OperationResult(AggregateType.S32, context.Select(context.TypeS32(), cond, context.GetS32(src2), context.GetS32(src3))); + } + } + + private static OperationResult GenerateConvertFP32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.FConvert(context.TypeFP64(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP32ToS32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.S32, context.ConvertFToS(context.TypeS32(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP32ToU32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.U32, context.ConvertFToU(context.TypeU32(), context.GetFP32(source))); + } + + private static OperationResult GenerateConvertFP64ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.FConvert(context.TypeFP32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertFP64ToS32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.S32, context.ConvertFToS(context.TypeS32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertFP64ToU32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.U32, context.ConvertFToU(context.TypeU32(), context.GetFP64(source))); + } + + private static OperationResult GenerateConvertS32ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.ConvertSToF(context.TypeFP32(), context.GetS32(source))); + } + + private static OperationResult GenerateConvertS32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.ConvertSToF(context.TypeFP64(), context.GetS32(source))); + } + + private static OperationResult GenerateConvertU32ToFP32(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP32, context.ConvertUToF(context.TypeFP32(), context.GetU32(source))); + } + + private static OperationResult GenerateConvertU32ToFP64(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + return new OperationResult(AggregateType.FP64, context.ConvertUToF(context.TypeFP64(), context.GetU32(source))); + } + + private static OperationResult GenerateCosine(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslCos, null); + } + + private static OperationResult GenerateDdx(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryFP32(context, operation, context.Delegates.DPdx); + } + + private static OperationResult GenerateDdy(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryFP32(context, operation, context.Delegates.DPdy); + } + + private static OperationResult GenerateDiscard(CodeGenContext context, AstOperation operation) + { + context.Kill(); + return OperationResult.Invalid; + } + + private static OperationResult GenerateDivide(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FDiv, context.Delegates.SDiv); + } + + private static OperationResult GenerateEmitVertex(CodeGenContext context, AstOperation operation) + { + context.EmitVertex(); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateEndPrimitive(CodeGenContext context, AstOperation operation) + { + context.EndPrimitive(); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateExponentB2(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslExp2, null); + } + + private static OperationResult GenerateFSIBegin(CodeGenContext context, AstOperation operation) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.BeginInvocationInterlockEXT(); + } + + return OperationResult.Invalid; + } + + private static OperationResult GenerateFSIEnd(CodeGenContext context, AstOperation operation) + { + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.EndInvocationInterlockEXT(); + } + + return OperationResult.Invalid; + } + + private static OperationResult GenerateFindLSB(CodeGenContext context, AstOperation operation) + { + var source = context.GetU32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindILsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFindMSBS32(CodeGenContext context, AstOperation operation) + { + var source = context.GetS32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindSMsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFindMSBU32(CodeGenContext context, AstOperation operation) + { + var source = context.GetU32(operation.GetSource(0)); + return new OperationResult(AggregateType.U32, context.GlslFindUMsb(context.TypeU32(), source)); + } + + private static OperationResult GenerateFloor(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslFloor, null); + } + + private static OperationResult GenerateFusedMultiplyAdd(CodeGenContext context, AstOperation operation) + { + return GenerateTernary(context, operation, context.Delegates.GlslFma, null); + } + + private static OperationResult GenerateGroupMemoryBarrier(CodeGenContext context, AstOperation operation) + { + context.MemoryBarrier(context.Constant(context.TypeU32(), Scope.Workgroup), context.Constant(context.TypeU32(), DefaultMemorySemantics)); + return OperationResult.Invalid; + } + + private static OperationResult GenerateImageAtomic(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + var componentType = texOp.Format.GetComponentType(); + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + return new OperationResult(componentType, componentType switch + { + AggregateType.S32 => context.Constant(context.TypeS32(), 0), + AggregateType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }); + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + SpvInstruction value = Src(componentType); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + + SpvInstruction resultType = context.GetType(componentType); + SpvInstruction imagePointerType = context.TypePointer(StorageClass.Image, resultType); + + var pointer = context.ImageTexelPointer(imagePointerType, imageVariable, pCoords, context.Constant(context.TypeU32(), 0)); + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + var result = (texOp.Flags & TextureFlags.AtomicMask) switch + { + TextureFlags.Add => context.AtomicIAdd(resultType, pointer, one, zero, value), + TextureFlags.Minimum => componentType == AggregateType.S32 + ? context.AtomicSMin(resultType, pointer, one, zero, value) + : context.AtomicUMin(resultType, pointer, one, zero, value), + TextureFlags.Maximum => componentType == AggregateType.S32 + ? context.AtomicSMax(resultType, pointer, one, zero, value) + : context.AtomicUMax(resultType, pointer, one, zero, value), + TextureFlags.Increment => context.AtomicIIncrement(resultType, pointer, one, zero), + TextureFlags.Decrement => context.AtomicIDecrement(resultType, pointer, one, zero), + TextureFlags.BitwiseAnd => context.AtomicAnd(resultType, pointer, one, zero, value), + TextureFlags.BitwiseOr => context.AtomicOr(resultType, pointer, one, zero, value), + TextureFlags.BitwiseXor => context.AtomicXor(resultType, pointer, one, zero, value), + TextureFlags.Swap => context.AtomicExchange(resultType, pointer, one, zero, value), + TextureFlags.CAS => context.AtomicCompareExchange(resultType, pointer, one, zero, zero, Src(componentType), value), + _ => context.AtomicIAdd(resultType, pointer, one, zero, value), + }; + + return new OperationResult(componentType, result); + } + + private static OperationResult GenerateImageLoad(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + var componentType = texOp.Format.GetComponentType(); + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + return GetZeroOperationResult(context, texOp, componentType, isVector: true); + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords: true, isBindless, isIndexed, isArray, pCount); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + var imageComponentType = context.GetType(componentType); + var swizzledResultType = texOp.GetVectorType(componentType); + + var texel = context.ImageRead(context.TypeVector(imageComponentType, 4), image, pCoords, ImageOperandsMask.MaskNone); + var result = GetSwizzledResult(context, texel, swizzledResultType, texOp.Index); + + return new OperationResult(componentType, result); + } + + private static OperationResult GenerateImageStore(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0/do nothing. + if (isBindless) + { + return OperationResult.Invalid; + } + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount + (isArray ? 1 : 0); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.S32); + } + + var componentType = texOp.Format.GetComponentType(); + + const int ComponentsCount = 4; + + SpvInstruction[] cElems = new SpvInstruction[ComponentsCount]; + + for (int i = 0; i < ComponentsCount; i++) + { + if (srcIndex < texOp.SourcesCount) + { + cElems[i] = Src(componentType); + } + else + { + cElems[i] = componentType switch + { + AggregateType.S32 => context.Constant(context.TypeS32(), 0), + AggregateType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }; + } + } + + var texel = context.CompositeConstruct(context.TypeVector(context.GetType(componentType), ComponentsCount), cElems); + + (var imageType, var imageVariable) = context.Images[new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format)]; + + var image = context.Load(imageType, imageVariable); + + context.ImageWrite(image, pCoords, texel, ImageOperandsMask.MaskNone); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateIsNan(CodeGenContext context, AstOperation operation) + { + var source = operation.GetSource(0); + + SpvInstruction result; + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + result = context.IsNan(context.TypeBool(), context.GetFP64(source)); + } + else + { + result = context.IsNan(context.TypeBool(), context.GetFP32(source)); + } + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateLoad(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: false); + } + + private static OperationResult GenerateLoadConstant(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = context.Get(AggregateType.S32, operation.GetSource(1)); + + var i1 = context.Constant(context.TypeS32(), 0); + var i2 = context.ShiftRightArithmetic(context.TypeS32(), src2, context.Constant(context.TypeS32(), 2)); + var i3 = context.BitwiseAnd(context.TypeS32(), src2, context.Constant(context.TypeS32(), 3)); + + SpvInstruction value = null; + + if (context.Config.GpuAccessor.QueryHostHasVectorIndexingBug()) + { + // Test for each component individually. + for (int i = 0; i < 4; i++) + { + var component = context.Constant(context.TypeS32(), i); + + SpvInstruction elemPointer; + if (context.UniformBuffersArray != null) + { + var ubVariable = context.UniformBuffersArray; + var i0 = context.Get(AggregateType.S32, src1); + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i0, i1, i2, component); + } + else + { + var ubVariable = context.UniformBuffers[((AstOperand)src1).Value]; + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i1, i2, component); + } + + SpvInstruction newValue = context.Load(context.TypeFP32(), elemPointer); + + value = value != null ? context.Select(context.TypeFP32(), context.IEqual(context.TypeBool(), i3, component), newValue, value) : newValue; + } + } + else + { + SpvInstruction elemPointer; + + if (context.UniformBuffersArray != null) + { + var ubVariable = context.UniformBuffersArray; + var i0 = context.Get(AggregateType.S32, src1); + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i0, i1, i2, i3); + } + else + { + var ubVariable = context.UniformBuffers[((AstOperand)src1).Value]; + + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeFP32()), ubVariable, i1, i2, i3); + } + + value = context.Load(context.TypeFP32(), elemPointer); + } + + return new OperationResult(AggregateType.FP32, value); + } + + private static OperationResult GenerateLoadLocal(CodeGenContext context, AstOperation operation) + { + return GenerateLoadLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); + } + + private static OperationResult GenerateLoadShared(CodeGenContext context, AstOperation operation) + { + return GenerateLoadLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory); + } + + private static OperationResult GenerateLoadLocalOrShared( + CodeGenContext context, + AstOperation operation, + StorageClass storageClass, + SpvInstruction memory) + { + var offset = context.Get(AggregateType.S32, operation.GetSource(0)); + + var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset); + var value = context.Load(context.TypeU32(), elemPointer); + + return new OperationResult(AggregateType.U32, value); + } + + private static OperationResult GenerateLoadStorage(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetStorageElemPointer(context, operation); + var value = context.Load(context.TypeU32(), elemPointer); + + return new OperationResult(AggregateType.U32, value); + } + + private static OperationResult GenerateLod(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return new OperationResult(AggregateType.S32, context.Constant(context.TypeS32(), 0)); + } + + int srcIndex = 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int pCount = texOp.Type.GetDimensions(); + + SpvInstruction pCoords; + + if (pCount > 1) + { + SpvInstruction[] elems = new SpvInstruction[pCount]; + + for (int i = 0; i < pCount; i++) + { + elems[i] = Src(AggregateType.FP32); + } + + var vectorType = context.TypeVector(context.TypeFP32(), pCount); + pCoords = context.CompositeConstruct(vectorType, elems); + } + else + { + pCoords = Src(AggregateType.FP32); + } + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (_, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + + var resultType = context.TypeVector(context.TypeFP32(), 2); + var packed = context.ImageQueryLod(resultType, image, pCoords); + var result = context.CompositeExtract(context.TypeFP32(), packed, (SpvLiteralInteger)texOp.Index); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateLogarithmB2(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslLog2, null); + } + + private static OperationResult GenerateLogicalAnd(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalAnd); + } + + private static OperationResult GenerateLogicalExclusiveOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalNotEqual); + } + + private static OperationResult GenerateLogicalNot(CodeGenContext context, AstOperation operation) + { + return GenerateUnaryBool(context, operation, context.Delegates.LogicalNot); + } + + private static OperationResult GenerateLogicalOr(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryBool(context, operation, context.Delegates.LogicalOr); + } + + private static OperationResult GenerateLoopBreak(CodeGenContext context, AstOperation operation) + { + AstBlock loopBlock = context.CurrentBlock; + while (loopBlock.Type != AstBlockType.DoWhile) + { + loopBlock = loopBlock.Parent; + } + + context.Branch(context.GetNextLabel(loopBlock.Parent)); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateLoopContinue(CodeGenContext context, AstOperation operation) + { + AstBlock loopBlock = context.CurrentBlock; + while (loopBlock.Type != AstBlockType.DoWhile) + { + loopBlock = loopBlock.Parent; + } + + (var loopTarget, var continueTarget) = context.LoopTargets[loopBlock]; + + context.Branch(continueTarget); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateMaximum(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.GlslFMax, context.Delegates.GlslSMax); + } + + private static OperationResult GenerateMaximumU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryU32(context, operation, context.Delegates.GlslUMax); + } + + private static OperationResult GenerateMemoryBarrier(CodeGenContext context, AstOperation operation) + { + context.MemoryBarrier(context.Constant(context.TypeU32(), Scope.Device), context.Constant(context.TypeU32(), DefaultMemorySemantics)); + return OperationResult.Invalid; + } + + private static OperationResult GenerateMinimum(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.GlslFMin, context.Delegates.GlslSMin); + } + + private static OperationResult GenerateMinimumU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryU32(context, operation, context.Delegates.GlslUMin); + } + + private static OperationResult GenerateMultiply(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FMul, context.Delegates.IMul); + } + + private static OperationResult GenerateMultiplyHighS32(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var resultType = context.TypeStruct(false, context.TypeS32(), context.TypeS32()); + var result = context.SMulExtended(resultType, context.GetS32(src1), context.GetS32(src2)); + result = context.CompositeExtract(context.TypeS32(), result, 1); + + return new OperationResult(AggregateType.S32, result); + } + + private static OperationResult GenerateMultiplyHighU32(CodeGenContext context, AstOperation operation) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var resultType = context.TypeStruct(false, context.TypeU32(), context.TypeU32()); + var result = context.UMulExtended(resultType, context.GetU32(src1), context.GetU32(src2)); + result = context.CompositeExtract(context.TypeU32(), result, 1); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateNegate(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.FNegate, context.Delegates.SNegate); + } + + private static OperationResult GeneratePackDouble2x32(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetU32(operation.GetSource(0)); + var value1 = context.GetU32(operation.GetSource(1)); + var vector = context.CompositeConstruct(context.TypeVector(context.TypeU32(), 2), value0, value1); + var result = context.GlslPackDouble2x32(context.TypeFP64(), vector); + + return new OperationResult(AggregateType.FP64, result); + } + + private static OperationResult GeneratePackHalf2x16(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetFP32(operation.GetSource(0)); + var value1 = context.GetFP32(operation.GetSource(1)); + var vector = context.CompositeConstruct(context.TypeVector(context.TypeFP32(), 2), value0, value1); + var result = context.GlslPackHalf2x16(context.TypeU32(), vector); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateReciprocalSquareRoot(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslInverseSqrt, null); + } + + private static OperationResult GenerateReturn(CodeGenContext context, AstOperation operation) + { + context.Return(); + return OperationResult.Invalid; + } + + private static OperationResult GenerateRound(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslRoundEven, null); + } + + private static OperationResult GenerateShiftLeft(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftLeftLogical); + } + + private static OperationResult GenerateShiftRightS32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftRightArithmetic); + } + + private static OperationResult GenerateShiftRightU32(CodeGenContext context, AstOperation operation) + { + return GenerateBinaryS32(context, operation, context.Delegates.ShiftRightLogical); + } + + private static OperationResult GenerateShuffle(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + var indexNotSegMask = context.BitwiseAnd(context.TypeU32(), index, notSegMask); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.BitwiseOr(context.TypeU32(), indexNotSegMask, minThreadId); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleDown(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.IAdd(context.TypeU32(), threadId, index); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleUp(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var srcThreadId = context.ISub(context.TypeU32(), threadId, index); + var valid = context.SGreaterThanEqual(context.TypeBool(), srcThreadId, minThreadId); + var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateShuffleXor(CodeGenContext context, AstOperation operation) + { + var x = context.GetFP32(operation.GetSource(0)); + var index = context.GetU32(operation.GetSource(1)); + var mask = context.GetU32(operation.GetSource(2)); + + var const31 = context.Constant(context.TypeU32(), 31); + var const8 = context.Constant(context.TypeU32(), 8); + + var clamp = context.BitwiseAnd(context.TypeU32(), mask, const31); + var segMask = context.BitwiseAnd(context.TypeU32(), context.ShiftRightLogical(context.TypeU32(), mask, const8), const31); + var notSegMask = context.Not(context.TypeU32(), segMask); + var clampNotSegMask = context.BitwiseAnd(context.TypeU32(), clamp, notSegMask); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + + var minThreadId = context.BitwiseAnd(context.TypeU32(), threadId, segMask); + var maxThreadId = context.BitwiseOr(context.TypeU32(), minThreadId, clampNotSegMask); + var srcThreadId = context.BitwiseXor(context.TypeU32(), threadId, index); + var valid = context.ULessThanEqual(context.TypeBool(), srcThreadId, maxThreadId); + var value = context.GroupNonUniformShuffle(context.TypeFP32(), context.Constant(context.TypeU32(), (int)Scope.Subgroup), x, srcThreadId); + var result = context.Select(context.TypeFP32(), valid, value, x); + + var validLocal = (AstOperand)operation.GetSource(3); + + context.Store(context.GetLocalPointer(validLocal), context.BitcastIfNeeded(validLocal.VarType, AggregateType.Bool, valid)); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateSine(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslSin, null); + } + + private static OperationResult GenerateSquareRoot(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslSqrt, null); + } + + private static OperationResult GenerateStore(CodeGenContext context, AstOperation operation) + { + return GenerateLoadOrStore(context, operation, isStore: true); + } + + private static OperationResult GenerateStoreLocal(CodeGenContext context, AstOperation operation) + { + return GenerateStoreLocalOrShared(context, operation, StorageClass.Private, context.LocalMemory); + } + + private static OperationResult GenerateStoreShared(CodeGenContext context, AstOperation operation) + { + return GenerateStoreLocalOrShared(context, operation, StorageClass.Workgroup, context.SharedMemory); + } + + private static OperationResult GenerateStoreLocalOrShared( + CodeGenContext context, + AstOperation operation, + StorageClass storageClass, + SpvInstruction memory) + { + var offset = context.Get(AggregateType.S32, operation.GetSource(0)); + var value = context.Get(AggregateType.U32, operation.GetSource(1)); + + var elemPointer = context.AccessChain(context.TypePointer(storageClass, context.TypeU32()), memory, offset); + context.Store(elemPointer, value); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreShared16(CodeGenContext context, AstOperation operation) + { + GenerateStoreSharedSmallInt(context, operation, 16); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreShared8(CodeGenContext context, AstOperation operation) + { + GenerateStoreSharedSmallInt(context, operation, 8); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage(CodeGenContext context, AstOperation operation) + { + var elemPointer = GetStorageElemPointer(context, operation); + context.Store(elemPointer, context.Get(AggregateType.U32, operation.GetSource(2))); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage16(CodeGenContext context, AstOperation operation) + { + GenerateStoreStorageSmallInt(context, operation, 16); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateStoreStorage8(CodeGenContext context, AstOperation operation) + { + GenerateStoreStorageSmallInt(context, operation, 8); + + return OperationResult.Invalid; + } + + private static OperationResult GenerateSubtract(CodeGenContext context, AstOperation operation) + { + return GenerateBinary(context, operation, context.Delegates.FSub, context.Delegates.ISub); + } + + private static OperationResult GenerateSwizzleAdd(CodeGenContext context, AstOperation operation) + { + var x = context.Get(AggregateType.FP32, operation.GetSource(0)); + var y = context.Get(AggregateType.FP32, operation.GetSource(1)); + var mask = context.Get(AggregateType.U32, operation.GetSource(2)); + + var v4float = context.TypeVector(context.TypeFP32(), 4); + var one = context.Constant(context.TypeFP32(), 1.0f); + var minusOne = context.Constant(context.TypeFP32(), -1.0f); + var zero = context.Constant(context.TypeFP32(), 0.0f); + var xLut = context.ConstantComposite(v4float, one, minusOne, one, zero); + var yLut = context.ConstantComposite(v4float, one, one, minusOne, one); + + var three = context.Constant(context.TypeU32(), 3); + + var threadId = GetScalarInput(context, IoVariable.SubgroupLaneId); + var shift = context.BitwiseAnd(context.TypeU32(), threadId, three); + shift = context.ShiftLeftLogical(context.TypeU32(), shift, context.Constant(context.TypeU32(), 1)); + var lutIdx = context.ShiftRightLogical(context.TypeU32(), mask, shift); + lutIdx = context.BitwiseAnd(context.TypeU32(), lutIdx, three); + + var xLutValue = context.VectorExtractDynamic(context.TypeFP32(), xLut, lutIdx); + var yLutValue = context.VectorExtractDynamic(context.TypeFP32(), yLut, lutIdx); + + var xResult = context.FMul(context.TypeFP32(), x, xLutValue); + var yResult = context.FMul(context.TypeFP32(), y, yLutValue); + var result = context.FAdd(context.TypeFP32(), xResult, yResult); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateTextureSample(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + bool isGather = (texOp.Flags & TextureFlags.Gather) != 0; + bool hasDerivatives = (texOp.Flags & TextureFlags.Derivatives) != 0; + bool intCoords = (texOp.Flags & TextureFlags.IntCoords) != 0; + bool hasLodBias = (texOp.Flags & TextureFlags.LodBias) != 0; + bool hasLodLevel = (texOp.Flags & TextureFlags.LodLevel) != 0; + bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0; + bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0; + + bool isArray = (texOp.Type & SamplerType.Array) != 0; + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + bool isMultisample = (texOp.Type & SamplerType.Multisample) != 0; + bool isShadow = (texOp.Type & SamplerType.Shadow) != 0; + + bool colorIsVector = isGather || !isShadow; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return GetZeroOperationResult(context, texOp, AggregateType.FP32, colorIsVector); + } + + // This combination is valid, but not available on GLSL. + // For now, ignore the LOD level and do a normal sample. + // TODO: How to implement it properly? + if (hasLodLevel && isArray && isShadow) + { + hasLodLevel = false; + } + + int srcIndex = isBindless ? 1 : 0; + + SpvInstruction Src(AggregateType type) + { + return context.Get(type, texOp.GetSource(srcIndex++)); + } + + SpvInstruction index = null; + + if (isIndexed) + { + index = Src(AggregateType.S32); + } + + int coordsCount = texOp.Type.GetDimensions(); + + int pCount = coordsCount; + + int arrayIndexElem = -1; + + if (isArray) + { + arrayIndexElem = pCount++; + } + + AggregateType coordType = intCoords ? AggregateType.S32 : AggregateType.FP32; + + SpvInstruction AssemblePVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + if (arrayIndexElem == index) + { + elems[index] = Src(AggregateType.S32); + + if (!intCoords) + { + elems[index] = context.ConvertSToF(context.TypeFP32(), elems[index]); + } + } + else + { + elems[index] = Src(coordType); + } + } + + var vectorType = context.TypeVector(intCoords ? context.TypeS32() : context.TypeFP32(), count); + return context.CompositeConstruct(vectorType, elems); + } + else + { + return Src(coordType); + } + } + + SpvInstruction ApplyBias(SpvInstruction vector, SpvInstruction image) + { + int gatherBiasPrecision = context.Config.GpuAccessor.QueryHostGatherBiasPrecision(); + if (isGather && gatherBiasPrecision != 0) + { + // GPU requires texture gather to be slightly offset to match NVIDIA behaviour when point is exactly between two texels. + // Offset by the gather precision divided by 2 to correct for rounding. + var sizeType = pCount == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), pCount); + var pVectorType = pCount == 1 ? context.TypeFP32() : context.TypeVector(context.TypeFP32(), pCount); + + var bias = context.Constant(context.TypeFP32(), (float)(1 << (gatherBiasPrecision + 1))); + var biasVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(bias, pCount).ToArray()); + + var one = context.Constant(context.TypeFP32(), 1f); + var oneVector = context.CompositeConstruct(pVectorType, Enumerable.Repeat(one, pCount).ToArray()); + + var divisor = context.FMul( + pVectorType, + context.ConvertSToF(pVectorType, context.ImageQuerySize(sizeType, image)), + biasVector); + + vector = context.FAdd(pVectorType, vector, context.FDiv(pVectorType, oneVector, divisor)); + } + + return vector; + } + + SpvInstruction pCoords = AssemblePVector(pCount); + pCoords = ScalingHelpers.ApplyScaling(context, texOp, pCoords, intCoords, isBindless, isIndexed, isArray, pCount); + + SpvInstruction AssembleDerivativesVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.FP32); + } + + var vectorType = context.TypeVector(context.TypeFP32(), count); + return context.CompositeConstruct(vectorType, elems); + } + else + { + return Src(AggregateType.FP32); + } + } + + SpvInstruction dRef = null; + + if (isShadow) + { + dRef = Src(AggregateType.FP32); + } + + SpvInstruction[] derivatives = null; + + if (hasDerivatives) + { + derivatives = new[] + { + AssembleDerivativesVector(coordsCount), // dPdx + AssembleDerivativesVector(coordsCount) // dPdy + }; + } + + SpvInstruction sample = null; + SpvInstruction lod = null; + + if (isMultisample) + { + sample = Src(AggregateType.S32); + } + else if (hasLodLevel) + { + lod = Src(coordType); + } + + SpvInstruction AssembleOffsetVector(int count) + { + if (count > 1) + { + SpvInstruction[] elems = new SpvInstruction[count]; + + for (int index = 0; index < count; index++) + { + elems[index] = Src(AggregateType.S32); + } + + var vectorType = context.TypeVector(context.TypeS32(), count); + + return context.ConstantComposite(vectorType, elems); + } + else + { + return Src(AggregateType.S32); + } + } + + SpvInstruction[] offsets = null; + + if (hasOffset) + { + offsets = new[] { AssembleOffsetVector(coordsCount) }; + } + else if (hasOffsets) + { + offsets = new[] + { + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount), + AssembleOffsetVector(coordsCount) + }; + } + + SpvInstruction lodBias = null; + + if (hasLodBias) + { + lodBias = Src(AggregateType.FP32); + } + + SpvInstruction compIdx = null; + + // textureGather* optional extra component index, + // not needed for shadow samplers. + if (isGather && !isShadow) + { + compIdx = Src(AggregateType.S32); + } + + var operandsList = new List<SpvInstruction>(); + var operandsMask = ImageOperandsMask.MaskNone; + + if (hasLodBias) + { + operandsMask |= ImageOperandsMask.Bias; + operandsList.Add(lodBias); + } + + if (!isMultisample && hasLodLevel) + { + operandsMask |= ImageOperandsMask.Lod; + operandsList.Add(lod); + } + + if (hasDerivatives) + { + operandsMask |= ImageOperandsMask.Grad; + operandsList.Add(derivatives[0]); + operandsList.Add(derivatives[1]); + } + + if (hasOffset) + { + operandsMask |= ImageOperandsMask.ConstOffset; + operandsList.Add(offsets[0]); + } + else if (hasOffsets) + { + operandsMask |= ImageOperandsMask.ConstOffsets; + SpvInstruction arrayv2 = context.TypeArray(context.TypeVector(context.TypeS32(), 2), context.Constant(context.TypeU32(), 4)); + operandsList.Add(context.ConstantComposite(arrayv2, offsets[0], offsets[1], offsets[2], offsets[3])); + } + + if (isMultisample) + { + operandsMask |= ImageOperandsMask.Sample; + operandsList.Add(sample); + } + + var resultType = colorIsVector ? context.TypeVector(context.TypeFP32(), 4) : context.TypeFP32(); + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (var imageType, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + + if (intCoords) + { + image = context.Image(imageType, image); + } + + pCoords = ApplyBias(pCoords, image); + + var operands = operandsList.ToArray(); + + SpvInstruction result; + + if (intCoords) + { + result = context.ImageFetch(resultType, image, pCoords, operandsMask, operands); + } + else if (isGather) + { + if (isShadow) + { + result = context.ImageDrefGather(resultType, image, pCoords, dRef, operandsMask, operands); + } + else + { + result = context.ImageGather(resultType, image, pCoords, compIdx, operandsMask, operands); + } + } + else if (isShadow) + { + if (hasLodLevel) + { + result = context.ImageSampleDrefExplicitLod(resultType, image, pCoords, dRef, operandsMask, operands); + } + else + { + result = context.ImageSampleDrefImplicitLod(resultType, image, pCoords, dRef, operandsMask, operands); + } + } + else if (hasDerivatives || hasLodLevel) + { + result = context.ImageSampleExplicitLod(resultType, image, pCoords, operandsMask, operands); + } + else + { + result = context.ImageSampleImplicitLod(resultType, image, pCoords, operandsMask, operands); + } + + var swizzledResultType = AggregateType.FP32; + + if (colorIsVector) + { + swizzledResultType = texOp.GetVectorType(swizzledResultType); + + result = GetSwizzledResult(context, result, swizzledResultType, texOp.Index); + } + + return new OperationResult(swizzledResultType, result); + } + + private static OperationResult GenerateTextureSize(CodeGenContext context, AstOperation operation) + { + AstTextureOperation texOp = (AstTextureOperation)operation; + + bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0; + + // TODO: Bindless texture support. For now we just return 0. + if (isBindless) + { + return new OperationResult(AggregateType.S32, context.Constant(context.TypeS32(), 0)); + } + + bool isIndexed = (texOp.Type & SamplerType.Indexed) != 0; + + SpvInstruction index = null; + + if (isIndexed) + { + index = context.GetS32(texOp.GetSource(0)); + } + + var meta = new TextureMeta(texOp.CbufSlot, texOp.Handle, texOp.Format); + + (var imageType, var sampledImageType, var sampledImageVariable) = context.Samplers[meta]; + + var image = context.Load(sampledImageType, sampledImageVariable); + image = context.Image(imageType, image); + + if (texOp.Index == 3) + { + return new OperationResult(AggregateType.S32, context.ImageQueryLevels(context.TypeS32(), image)); + } + else + { + var type = context.SamplersTypes[meta]; + bool hasLod = !type.HasFlag(SamplerType.Multisample) && type != SamplerType.TextureBuffer; + + int dimensions = (type & SamplerType.Mask) == SamplerType.TextureCube ? 2 : type.GetDimensions(); + + if (type.HasFlag(SamplerType.Array)) + { + dimensions++; + } + + var resultType = dimensions == 1 ? context.TypeS32() : context.TypeVector(context.TypeS32(), dimensions); + + SpvInstruction result; + + if (hasLod) + { + int lodSrcIndex = isBindless || isIndexed ? 1 : 0; + var lod = context.GetS32(operation.GetSource(lodSrcIndex)); + result = context.ImageQuerySizeLod(resultType, image, lod); + } + else + { + result = context.ImageQuerySize(resultType, image); + } + + if (dimensions != 1) + { + result = context.CompositeExtract(context.TypeS32(), result, (SpvLiteralInteger)texOp.Index); + } + + if (texOp.Index < 2 || (type & SamplerType.Mask) == SamplerType.Texture3D) + { + result = ScalingHelpers.ApplyUnscaling(context, texOp.WithType(type), result, isBindless, isIndexed); + } + + return new OperationResult(AggregateType.S32, result); + } + } + + private static OperationResult GenerateTruncate(CodeGenContext context, AstOperation operation) + { + return GenerateUnary(context, operation, context.Delegates.GlslTrunc, null); + } + + private static OperationResult GenerateUnpackDouble2x32(CodeGenContext context, AstOperation operation) + { + var value = context.GetFP64(operation.GetSource(0)); + var vector = context.GlslUnpackDouble2x32(context.TypeVector(context.TypeU32(), 2), value); + var result = context.CompositeExtract(context.TypeU32(), vector, operation.Index); + + return new OperationResult(AggregateType.U32, result); + } + + private static OperationResult GenerateUnpackHalf2x16(CodeGenContext context, AstOperation operation) + { + var value = context.GetU32(operation.GetSource(0)); + var vector = context.GlslUnpackHalf2x16(context.TypeVector(context.TypeFP32(), 2), value); + var result = context.CompositeExtract(context.TypeFP32(), vector, operation.Index); + + return new OperationResult(AggregateType.FP32, result); + } + + private static OperationResult GenerateVectorExtract(CodeGenContext context, AstOperation operation) + { + var vector = context.GetWithType(operation.GetSource(0), out AggregateType vectorType); + var scalarType = vectorType & ~AggregateType.ElementCountMask; + var resultType = context.GetType(scalarType); + SpvInstruction result; + + if (operation.GetSource(1) is AstOperand indexOperand && indexOperand.Type == OperandType.Constant) + { + result = context.CompositeExtract(resultType, vector, (SpvLiteralInteger)indexOperand.Value); + } + else + { + var index = context.Get(AggregateType.S32, operation.GetSource(1)); + result = context.VectorExtractDynamic(resultType, vector, index); + } + + return new OperationResult(scalarType, result); + } + + private static OperationResult GenerateVoteAll(CodeGenContext context, AstOperation operation) + { + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + var result = context.GroupNonUniformAll(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateVoteAllEqual(CodeGenContext context, AstOperation operation) + { + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + var result = context.GroupNonUniformAllEqual(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateVoteAny(CodeGenContext context, AstOperation operation) + { + var execution = context.Constant(context.TypeU32(), Scope.Subgroup); + var result = context.GroupNonUniformAny(context.TypeBool(), execution, context.Get(AggregateType.Bool, operation.GetSource(0))); + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateCompare( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + SpvInstruction result; + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + result = emitF(context.TypeBool(), context.GetFP64(src1), context.GetFP64(src2)); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + result = emitF(context.TypeBool(), context.GetFP32(src1), context.GetFP32(src2)); + } + else + { + result = emitI(context.TypeBool(), context.GetS32(src1), context.GetS32(src2)); + } + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateCompareU32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + var result = emitU(context.TypeBool(), context.GetU32(src1), context.GetU32(src2)); + + return new OperationResult(AggregateType.Bool, result); + } + + private static OperationResult GenerateAtomicMemoryBinary( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) + { + var value = context.GetU32(operation.GetSource(2)); + + SpvInstruction elemPointer; + + if (operation.StorageKind == StorageKind.StorageBuffer) + { + elemPointer = GetStorageElemPointer(context, operation); + } + else if (operation.StorageKind == StorageKind.SharedMemory) + { + var offset = context.GetU32(operation.GetSource(0)); + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset); + } + else + { + throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); + } + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), elemPointer, one, zero, value)); + } + + private static OperationResult GenerateAtomicMemoryCas(CodeGenContext context, AstOperation operation) + { + var value0 = context.GetU32(operation.GetSource(2)); + var value1 = context.GetU32(operation.GetSource(3)); + + SpvInstruction elemPointer; + + if (operation.StorageKind == StorageKind.StorageBuffer) + { + elemPointer = GetStorageElemPointer(context, operation); + } + else if (operation.StorageKind == StorageKind.SharedMemory) + { + var offset = context.GetU32(operation.GetSource(0)); + elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), context.SharedMemory, offset); + } + else + { + throw new InvalidOperationException($"Invalid storage kind \"{operation.StorageKind}\"."); + } + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + return new OperationResult(AggregateType.U32, context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, value1, value0)); + } + + private static OperationResult GenerateLoadOrStore(CodeGenContext context, AstOperation operation, bool isStore) + { + StorageKind storageKind = operation.StorageKind; + + SpvInstruction pointer; + AggregateType varType; + int srcIndex = 0; + + switch (storageKind) + { + case StorageKind.Input: + case StorageKind.InputPerPatch: + case StorageKind.Output: + case StorageKind.OutputPerPatch: + if (!(operation.GetSource(srcIndex++) is AstOperand varId) || varId.Type != OperandType.Constant) + { + throw new InvalidOperationException($"First input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + IoVariable ioVariable = (IoVariable)varId.Value; + bool isOutput = storageKind.IsOutput(); + bool isPerPatch = storageKind.IsPerPatch(); + int location = 0; + int component = 0; + + if (context.Config.HasPerLocationInputOrOutput(ioVariable, isOutput)) + { + if (!(operation.GetSource(srcIndex++) is AstOperand vecIndex) || vecIndex.Type != OperandType.Constant) + { + throw new InvalidOperationException($"Second input of {operation.Inst} with {storageKind} storage must be a constant operand."); + } + + location = vecIndex.Value; + + if (operation.SourcesCount > srcIndex && + operation.GetSource(srcIndex) is AstOperand elemIndex && + elemIndex.Type == OperandType.Constant && + context.Config.HasPerLocationInputOrOutputComponent(ioVariable, location, elemIndex.Value, isOutput)) + { + component = elemIndex.Value; + srcIndex++; + } + } + + if (ioVariable == IoVariable.UserDefined) + { + varType = context.Config.GetUserDefinedType(location, isOutput); + } + else if (ioVariable == IoVariable.FragmentOutputColor) + { + varType = context.Config.GetFragmentOutputColorType(location); + } + else if (ioVariable == IoVariable.FragmentOutputIsBgra) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeU32()); + var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 1), elemIndex); + varType = AggregateType.U32; + + break; + } + else if (ioVariable == IoVariable.SupportBlockRenderScale) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 4), elemIndex); + varType = AggregateType.FP32; + + break; + } + else if (ioVariable == IoVariable.SupportBlockViewInverse) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var elemIndex = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(pointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 2), elemIndex); + varType = AggregateType.FP32; + + break; + } + else + { + (_, varType) = IoMap.GetSpirvBuiltIn(ioVariable); + } + + varType &= AggregateType.ElementTypeMask; + + int inputsCount = (isStore ? operation.SourcesCount - 1 : operation.SourcesCount) - srcIndex; + var storageClass = isOutput ? StorageClass.Output : StorageClass.Input; + + var ioDefinition = new IoDefinition(storageKind, ioVariable, location, component); + var dict = isPerPatch + ? (isOutput ? context.OutputsPerPatch : context.InputsPerPatch) + : (isOutput ? context.Outputs : context.Inputs); + + SpvInstruction baseObj = dict[ioDefinition]; + SpvInstruction e0, e1, e2; + + switch (inputsCount) + { + case 0: + pointer = baseObj; + break; + case 1: + e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0); + break; + case 2: + e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + e1 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0, e1); + break; + case 3: + e0 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + e1 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + e2 = context.Get(AggregateType.S32, operation.GetSource(srcIndex++)); + pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, e0, e1, e2); + break; + default: + var indexes = new SpvInstruction[inputsCount]; + int index = 0; + + for (; index < inputsCount; srcIndex++, index++) + { + indexes[index] = context.Get(AggregateType.S32, operation.GetSource(srcIndex)); + } + + pointer = context.AccessChain(context.TypePointer(storageClass, context.GetType(varType)), baseObj, indexes); + break; + } + break; + + default: + throw new InvalidOperationException($"Invalid storage kind {storageKind}."); + } + + if (isStore) + { + context.Store(pointer, context.Get(varType, operation.GetSource(srcIndex))); + return OperationResult.Invalid; + } + else + { + var result = context.Load(context.GetType(varType), pointer); + return new OperationResult(varType, result); + } + } + + private static SpvInstruction GetScalarInput(CodeGenContext context, IoVariable ioVariable) + { + (_, var varType) = IoMap.GetSpirvBuiltIn(ioVariable); + varType &= AggregateType.ElementTypeMask; + + var ioDefinition = new IoDefinition(StorageKind.Input, ioVariable); + + return context.Load(context.GetType(varType), context.Inputs[ioDefinition]); + } + + private static void GenerateStoreSharedSmallInt(CodeGenContext context, AstOperation operation, int bitSize) + { + var offset = context.Get(AggregateType.U32, operation.GetSource(0)); + var value = context.Get(AggregateType.U32, operation.GetSource(1)); + + var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2)); + var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3)); + bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3)); + + var memory = context.SharedMemory; + + var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Workgroup, context.TypeU32()), memory, wordOffset); + + GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); + } + + private static void GenerateStoreStorageSmallInt(CodeGenContext context, AstOperation operation, int bitSize) + { + var i0 = context.Get(AggregateType.S32, operation.GetSource(0)); + var offset = context.Get(AggregateType.U32, operation.GetSource(1)); + var value = context.Get(AggregateType.U32, operation.GetSource(2)); + + var wordOffset = context.ShiftRightLogical(context.TypeU32(), offset, context.Constant(context.TypeU32(), 2)); + var bitOffset = context.BitwiseAnd(context.TypeU32(), offset, context.Constant(context.TypeU32(), 3)); + bitOffset = context.ShiftLeftLogical(context.TypeU32(), bitOffset, context.Constant(context.TypeU32(), 3)); + + var sbVariable = context.StorageBuffersArray; + + var i1 = context.Constant(context.TypeS32(), 0); + + var elemPointer = context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, wordOffset); + + GenerateStoreSmallInt(context, elemPointer, bitOffset, value, bitSize); + } + + private static void GenerateStoreSmallInt( + CodeGenContext context, + SpvInstruction elemPointer, + SpvInstruction bitOffset, + SpvInstruction value, + int bitSize) + { + var loopStart = context.Label(); + var loopEnd = context.Label(); + + context.Branch(loopStart); + context.AddLabel(loopStart); + + var oldValue = context.Load(context.TypeU32(), elemPointer); + var newValue = context.BitFieldInsert(context.TypeU32(), oldValue, value, bitOffset, context.Constant(context.TypeU32(), bitSize)); + + var one = context.Constant(context.TypeU32(), 1); + var zero = context.Constant(context.TypeU32(), 0); + + var result = context.AtomicCompareExchange(context.TypeU32(), elemPointer, one, zero, zero, newValue, oldValue); + var failed = context.INotEqual(context.TypeBool(), result, oldValue); + + context.LoopMerge(loopEnd, loopStart, LoopControlMask.MaskNone); + context.BranchConditional(failed, loopStart, loopEnd); + + context.AddLabel(loopEnd); + } + + private static OperationResult GetZeroOperationResult( + CodeGenContext context, + AstTextureOperation texOp, + AggregateType scalarType, + bool isVector) + { + var zero = scalarType switch + { + AggregateType.S32 => context.Constant(context.TypeS32(), 0), + AggregateType.U32 => context.Constant(context.TypeU32(), 0u), + _ => context.Constant(context.TypeFP32(), 0f), + }; + + if (isVector) + { + AggregateType outputType = texOp.GetVectorType(scalarType); + + if ((outputType & AggregateType.ElementCountMask) != 0) + { + int componentsCount = BitOperations.PopCount((uint)texOp.Index); + + SpvInstruction[] values = new SpvInstruction[componentsCount]; + + values.AsSpan().Fill(zero); + + return new OperationResult(outputType, context.ConstantComposite(context.GetType(outputType), values)); + } + } + + return new OperationResult(scalarType, zero); + } + + private static SpvInstruction GetSwizzledResult(CodeGenContext context, SpvInstruction vector, AggregateType swizzledResultType, int mask) + { + if ((swizzledResultType & AggregateType.ElementCountMask) != 0) + { + SpvLiteralInteger[] components = new SpvLiteralInteger[BitOperations.PopCount((uint)mask)]; + + int componentIndex = 0; + + for (int i = 0; i < 4; i++) + { + if ((mask & (1 << i)) != 0) + { + components[componentIndex++] = i; + } + } + + return context.VectorShuffle(context.GetType(swizzledResultType), vector, vector, components); + } + else + { + int componentIndex = (int)BitOperations.TrailingZeroCount(mask); + + return context.CompositeExtract(context.GetType(swizzledResultType), vector, (SpvLiteralInteger)componentIndex); + } + } + + private static SpvInstruction GetStorageElemPointer(CodeGenContext context, AstOperation operation) + { + var sbVariable = context.StorageBuffersArray; + var i0 = context.Get(AggregateType.S32, operation.GetSource(0)); + var i1 = context.Constant(context.TypeS32(), 0); + var i2 = context.Get(AggregateType.S32, operation.GetSource(1)); + + return context.AccessChain(context.TypePointer(StorageClass.Uniform, context.TypeU32()), sbVariable, i0, i1, i2); + } + + private static OperationResult GenerateUnary( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emitF, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emitI) + { + var source = operation.GetSource(0); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + return new OperationResult(AggregateType.FP64, emitF(context.TypeFP64(), context.GetFP64(source))); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + return new OperationResult(AggregateType.FP32, emitF(context.TypeFP32(), context.GetFP32(source))); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(source))); + } + } + + private static OperationResult GenerateUnaryBool( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emitB) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.Bool, emitB(context.TypeBool(), context.Get(AggregateType.Bool, source))); + } + + private static OperationResult GenerateUnaryFP32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emit) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.FP32, emit(context.TypeFP32(), context.GetFP32(source))); + } + + private static OperationResult GenerateUnaryS32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction> emitS) + { + var source = operation.GetSource(0); + return new OperationResult(AggregateType.S32, emitS(context.TypeS32(), context.GetS32(source))); + } + + private static OperationResult GenerateBinary( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + var result = emitF(context.TypeFP64(), context.GetFP64(src1), context.GetFP64(src2)); + + if (!context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + context.Decorate(result, Decoration.NoContraction); + } + + return new OperationResult(AggregateType.FP64, result); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + var result = emitF(context.TypeFP32(), context.GetFP32(src1), context.GetFP32(src2)); + + if (!context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + context.Decorate(result, Decoration.NoContraction); + } + + return new OperationResult(AggregateType.FP32, result); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(src1), context.GetS32(src2))); + } + } + + private static OperationResult GenerateBinaryBool( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitB) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.Bool, emitB(context.TypeBool(), context.Get(AggregateType.Bool, src1), context.Get(AggregateType.Bool, src2))); + } + + private static OperationResult GenerateBinaryS32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.S32, emitS(context.TypeS32(), context.GetS32(src1), context.GetS32(src2))); + } + + private static OperationResult GenerateBinaryU32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + + return new OperationResult(AggregateType.U32, emitU(context.TypeU32(), context.GetU32(src1), context.GetU32(src2))); + } + + private static OperationResult GenerateTernary( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitF, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitI) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + if (operation.Inst.HasFlag(Instruction.FP64)) + { + var result = emitF(context.TypeFP64(), context.GetFP64(src1), context.GetFP64(src2), context.GetFP64(src3)); + + if (!context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + context.Decorate(result, Decoration.NoContraction); + } + + return new OperationResult(AggregateType.FP64, result); + } + else if (operation.Inst.HasFlag(Instruction.FP32)) + { + var result = emitF(context.TypeFP32(), context.GetFP32(src1), context.GetFP32(src2), context.GetFP32(src3)); + + if (!context.Config.GpuAccessor.QueryHostReducedPrecision()) + { + context.Decorate(result, Decoration.NoContraction); + } + + return new OperationResult(AggregateType.FP32, result); + } + else + { + return new OperationResult(AggregateType.S32, emitI(context.TypeS32(), context.GetS32(src1), context.GetS32(src2), context.GetS32(src3))); + } + } + + private static OperationResult GenerateTernaryU32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitU) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + return new OperationResult(AggregateType.U32, emitU( + context.TypeU32(), + context.GetU32(src1), + context.GetU32(src2), + context.GetU32(src3))); + } + + private static OperationResult GenerateBitfieldExtractS32( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + + return new OperationResult(AggregateType.S32, emitS( + context.TypeS32(), + context.GetS32(src1), + context.GetU32(src2), + context.GetU32(src3))); + } + + private static OperationResult GenerateBitfieldInsert( + CodeGenContext context, + AstOperation operation, + Func<SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction, SpvInstruction> emitS) + { + var src1 = operation.GetSource(0); + var src2 = operation.GetSource(1); + var src3 = operation.GetSource(2); + var src4 = operation.GetSource(3); + + return new OperationResult(AggregateType.U32, emitS( + context.TypeU32(), + context.GetU32(src1), + context.GetU32(src2), + context.GetU32(src3), + context.GetU32(src4))); + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs new file mode 100644 index 00000000..d2ff0085 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/IoMap.cs @@ -0,0 +1,86 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.Translation; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + static class IoMap + { + // At least 16 attributes are guaranteed by the spec. + private const int MaxAttributes = 16; + + public static (BuiltIn, AggregateType) GetSpirvBuiltIn(IoVariable ioVariable) + { + return ioVariable switch + { + IoVariable.BaseInstance => (BuiltIn.BaseInstance, AggregateType.S32), + IoVariable.BaseVertex => (BuiltIn.BaseVertex, AggregateType.S32), + IoVariable.ClipDistance => (BuiltIn.ClipDistance, AggregateType.Array | AggregateType.FP32), + IoVariable.CtaId => (BuiltIn.WorkgroupId, AggregateType.Vector3 | AggregateType.U32), + IoVariable.DrawIndex => (BuiltIn.DrawIndex, AggregateType.S32), + IoVariable.FragmentCoord => (BuiltIn.FragCoord, AggregateType.Vector4 | AggregateType.FP32), + IoVariable.FragmentOutputDepth => (BuiltIn.FragDepth, AggregateType.FP32), + IoVariable.FrontFacing => (BuiltIn.FrontFacing, AggregateType.Bool), + IoVariable.InstanceId => (BuiltIn.InstanceId, AggregateType.S32), + IoVariable.InstanceIndex => (BuiltIn.InstanceIndex, AggregateType.S32), + IoVariable.InvocationId => (BuiltIn.InvocationId, AggregateType.S32), + IoVariable.Layer => (BuiltIn.Layer, AggregateType.S32), + IoVariable.PatchVertices => (BuiltIn.PatchVertices, AggregateType.S32), + IoVariable.PointCoord => (BuiltIn.PointCoord, AggregateType.Vector2 | AggregateType.FP32), + IoVariable.PointSize => (BuiltIn.PointSize, AggregateType.FP32), + IoVariable.Position => (BuiltIn.Position, AggregateType.Vector4 | AggregateType.FP32), + IoVariable.PrimitiveId => (BuiltIn.PrimitiveId, AggregateType.S32), + IoVariable.SubgroupEqMask => (BuiltIn.SubgroupEqMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupGeMask => (BuiltIn.SubgroupGeMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupGtMask => (BuiltIn.SubgroupGtMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupLaneId => (BuiltIn.SubgroupLocalInvocationId, AggregateType.U32), + IoVariable.SubgroupLeMask => (BuiltIn.SubgroupLeMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.SubgroupLtMask => (BuiltIn.SubgroupLtMask, AggregateType.Vector4 | AggregateType.U32), + IoVariable.TessellationCoord => (BuiltIn.TessCoord, AggregateType.Vector3 | AggregateType.FP32), + IoVariable.TessellationLevelInner => (BuiltIn.TessLevelInner, AggregateType.Array | AggregateType.FP32), + IoVariable.TessellationLevelOuter => (BuiltIn.TessLevelOuter, AggregateType.Array | AggregateType.FP32), + IoVariable.ThreadId => (BuiltIn.LocalInvocationId, AggregateType.Vector3 | AggregateType.U32), + IoVariable.ThreadKill => (BuiltIn.HelperInvocation, AggregateType.Bool), + IoVariable.VertexId => (BuiltIn.VertexId, AggregateType.S32), + IoVariable.VertexIndex => (BuiltIn.VertexIndex, AggregateType.S32), + IoVariable.ViewportIndex => (BuiltIn.ViewportIndex, AggregateType.S32), + IoVariable.ViewportMask => (BuiltIn.ViewportMaskNV, AggregateType.Array | AggregateType.S32), + _ => (default, AggregateType.Invalid) + }; + } + + public static int GetSpirvBuiltInArrayLength(IoVariable ioVariable) + { + return ioVariable switch + { + IoVariable.ClipDistance => 8, + IoVariable.TessellationLevelInner => 2, + IoVariable.TessellationLevelOuter => 4, + IoVariable.ViewportMask => 1, + IoVariable.UserDefined => MaxAttributes, + _ => 1 + }; + } + + public static bool IsPerVertex(IoVariable ioVariable, ShaderStage stage, bool isOutput) + { + switch (ioVariable) + { + case IoVariable.Layer: + case IoVariable.ViewportIndex: + case IoVariable.PointSize: + case IoVariable.Position: + case IoVariable.UserDefined: + case IoVariable.ClipDistance: + case IoVariable.PointCoord: + case IoVariable.ViewportMask: + return !isOutput && + (stage == ShaderStage.TessellationControl || + stage == ShaderStage.TessellationEvaluation || + stage == ShaderStage.Geometry); + } + + return false; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs new file mode 100644 index 00000000..f80c8110 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/OperationResult.cs @@ -0,0 +1,19 @@ +using Ryujinx.Graphics.Shader.Translation; +using Spv.Generator; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + readonly struct OperationResult + { + public static OperationResult Invalid => new OperationResult(AggregateType.Invalid, null); + + public AggregateType Type { get; } + public Instruction Value { get; } + + public OperationResult(AggregateType type, Instruction value) + { + Type = type; + Value = value; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs new file mode 100644 index 00000000..f6c218c6 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/ScalingHelpers.cs @@ -0,0 +1,227 @@ +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + + static class ScalingHelpers + { + public static SpvInstruction ApplyScaling( + CodeGenContext context, + AstTextureOperation texOp, + SpvInstruction vector, + bool intCoords, + bool isBindless, + bool isIndexed, + bool isArray, + int pCount) + { + if (intCoords) + { + if (context.Config.Stage.SupportsRenderScale() && + !isBindless && + !isIndexed) + { + int index = texOp.Inst == Instruction.ImageLoad + ? context.Config.GetTextureDescriptors().Length + context.Config.FindImageDescriptorIndex(texOp) + : context.Config.FindTextureDescriptorIndex(texOp); + + if (pCount == 3 && isArray) + { + return ApplyScaling2DArray(context, vector, index); + } + else if (pCount == 2 && !isArray) + { + return ApplyScaling2D(context, vector, index); + } + } + } + + return vector; + } + + private static SpvInstruction ApplyScaling2DArray(CodeGenContext context, SpvInstruction vector, int index) + { + // The array index is not scaled, just x and y. + var vectorXY = context.VectorShuffle(context.TypeVector(context.TypeS32(), 2), vector, vector, 0, 1); + var vectorZ = context.CompositeExtract(context.TypeS32(), vector, 2); + var vectorXYScaled = ApplyScaling2D(context, vectorXY, index); + var vectorScaled = context.CompositeConstruct(context.TypeVector(context.TypeS32(), 3), vectorXYScaled, vectorZ); + + return vectorScaled; + } + + private static SpvInstruction ApplyScaling2D(CodeGenContext context, SpvInstruction vector, int index) + { + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var fieldIndex = context.Constant(context.TypeU32(), 4); + var scaleIndex = context.Constant(context.TypeU32(), index); + + if (context.Config.Stage == ShaderStage.Vertex) + { + var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32()); + var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3)); + var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer); + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount); + } + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1)); + + var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex); + var scale = context.Load(context.TypeFP32(), scaleElemPointer); + + var ivector2Type = context.TypeVector(context.TypeS32(), 2); + var localVector = context.CoordTemp; + + var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f)); + + var mergeLabel = context.Label(); + + if (context.Config.Stage == ShaderStage.Fragment) + { + var scaledInterpolatedLabel = context.Label(); + var scaledNoInterpolationLabel = context.Label(); + + var needsInterpolation = context.FOrdLessThan(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 0f)); + + context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(needsInterpolation, scaledInterpolatedLabel, scaledNoInterpolationLabel); + + // scale < 0.0 + context.AddLabel(scaledInterpolatedLabel); + + ApplyScalingInterpolated(context, localVector, vector, scale); + context.Branch(mergeLabel); + + // scale >= 0.0 + context.AddLabel(scaledNoInterpolationLabel); + + ApplyScalingNoInterpolation(context, localVector, vector, scale); + context.Branch(mergeLabel); + + context.AddLabel(mergeLabel); + + var passthroughLabel = context.Label(); + var finalMergeLabel = context.Label(); + + context.SelectionMerge(finalMergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(passthrough, passthroughLabel, finalMergeLabel); + + context.AddLabel(passthroughLabel); + + context.Store(localVector, vector); + context.Branch(finalMergeLabel); + + context.AddLabel(finalMergeLabel); + + return context.Load(ivector2Type, localVector); + } + else + { + var passthroughLabel = context.Label(); + var scaledLabel = context.Label(); + + context.SelectionMerge(mergeLabel, SelectionControlMask.MaskNone); + context.BranchConditional(passthrough, passthroughLabel, scaledLabel); + + // scale == 1.0 + context.AddLabel(passthroughLabel); + + context.Store(localVector, vector); + context.Branch(mergeLabel); + + // scale != 1.0 + context.AddLabel(scaledLabel); + + ApplyScalingNoInterpolation(context, localVector, vector, scale); + context.Branch(mergeLabel); + + context.AddLabel(mergeLabel); + + return context.Load(ivector2Type, localVector); + } + } + + private static void ApplyScalingInterpolated(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale) + { + var vector2Type = context.TypeVector(context.TypeFP32(), 2); + + var scaleNegated = context.FNegate(context.TypeFP32(), scale); + var scaleVector = context.CompositeConstruct(vector2Type, scaleNegated, scaleNegated); + + var vectorFloat = context.ConvertSToF(vector2Type, vector); + var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scaleNegated); + + var fragCoordPointer = context.Inputs[new IoDefinition(StorageKind.Input, IoVariable.FragmentCoord)]; + var fragCoord = context.Load(context.TypeVector(context.TypeFP32(), 4), fragCoordPointer); + var fragCoordXY = context.VectorShuffle(vector2Type, fragCoord, fragCoord, 0, 1); + + var scaleMod = context.FMod(vector2Type, fragCoordXY, scaleVector); + var vectorInterpolated = context.FAdd(vector2Type, vectorScaled, scaleMod); + + context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorInterpolated)); + } + + private static void ApplyScalingNoInterpolation(CodeGenContext context, SpvInstruction output, SpvInstruction vector, SpvInstruction scale) + { + if (context.Config.Stage == ShaderStage.Vertex) + { + scale = context.GlslFAbs(context.TypeFP32(), scale); + } + + var vector2Type = context.TypeVector(context.TypeFP32(), 2); + + var vectorFloat = context.ConvertSToF(vector2Type, vector); + var vectorScaled = context.VectorTimesScalar(vector2Type, vectorFloat, scale); + + context.Store(output, context.ConvertFToS(context.TypeVector(context.TypeS32(), 2), vectorScaled)); + } + + public static SpvInstruction ApplyUnscaling( + CodeGenContext context, + AstTextureOperation texOp, + SpvInstruction size, + bool isBindless, + bool isIndexed) + { + if (context.Config.Stage.SupportsRenderScale() && + !isBindless && + !isIndexed) + { + int index = context.Config.FindTextureDescriptorIndex(texOp); + + var pointerType = context.TypePointer(StorageClass.Uniform, context.TypeFP32()); + var fieldIndex = context.Constant(context.TypeU32(), 4); + var scaleIndex = context.Constant(context.TypeU32(), index); + + if (context.Config.Stage == ShaderStage.Vertex) + { + var scaleCountPointerType = context.TypePointer(StorageClass.Uniform, context.TypeS32()); + var scaleCountElemPointer = context.AccessChain(scaleCountPointerType, context.SupportBuffer, context.Constant(context.TypeU32(), 3)); + var scaleCount = context.Load(context.TypeS32(), scaleCountElemPointer); + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, scaleCount); + } + + scaleIndex = context.IAdd(context.TypeU32(), scaleIndex, context.Constant(context.TypeU32(), 1)); + + var scaleElemPointer = context.AccessChain(pointerType, context.SupportBuffer, fieldIndex, scaleIndex); + var scale = context.GlslFAbs(context.TypeFP32(), context.Load(context.TypeFP32(), scaleElemPointer)); + + var passthrough = context.FOrdEqual(context.TypeBool(), scale, context.Constant(context.TypeFP32(), 1f)); + + var sizeFloat = context.ConvertSToF(context.TypeFP32(), size); + var sizeUnscaled = context.FDiv(context.TypeFP32(), sizeFloat, scale); + var sizeUnscaledInt = context.ConvertFToS(context.TypeS32(), sizeUnscaled); + + return context.Select(context.TypeS32(), passthrough, size, sizeUnscaledInt); + } + + return size; + } + } +}
\ No newline at end of file diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs new file mode 100644 index 00000000..3ccfd7f5 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvDelegates.cs @@ -0,0 +1,226 @@ +using FuncBinaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>; +using FuncQuaternaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>; +using FuncTernaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>; +using FuncUnaryInstruction = System.Func<Spv.Generator.Instruction, Spv.Generator.Instruction, Spv.Generator.Instruction>; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + /// <summary> + /// Delegate cache for SPIR-V instruction generators. Avoids delegate allocation when passing generators as arguments. + /// </summary> + internal readonly struct SpirvDelegates + { + // Unary + public readonly FuncUnaryInstruction GlslFAbs; + public readonly FuncUnaryInstruction GlslSAbs; + public readonly FuncUnaryInstruction GlslCeil; + public readonly FuncUnaryInstruction GlslCos; + public readonly FuncUnaryInstruction GlslExp2; + public readonly FuncUnaryInstruction GlslFloor; + public readonly FuncUnaryInstruction GlslLog2; + public readonly FuncUnaryInstruction FNegate; + public readonly FuncUnaryInstruction SNegate; + public readonly FuncUnaryInstruction GlslInverseSqrt; + public readonly FuncUnaryInstruction GlslRoundEven; + public readonly FuncUnaryInstruction GlslSin; + public readonly FuncUnaryInstruction GlslSqrt; + public readonly FuncUnaryInstruction GlslTrunc; + + // UnaryBool + public readonly FuncUnaryInstruction LogicalNot; + + // UnaryFP32 + public readonly FuncUnaryInstruction DPdx; + public readonly FuncUnaryInstruction DPdy; + + // UnaryS32 + public readonly FuncUnaryInstruction BitCount; + public readonly FuncUnaryInstruction BitReverse; + public readonly FuncUnaryInstruction Not; + + // Compare + public readonly FuncBinaryInstruction FOrdEqual; + public readonly FuncBinaryInstruction IEqual; + public readonly FuncBinaryInstruction FOrdGreaterThan; + public readonly FuncBinaryInstruction SGreaterThan; + public readonly FuncBinaryInstruction FOrdGreaterThanEqual; + public readonly FuncBinaryInstruction SGreaterThanEqual; + public readonly FuncBinaryInstruction FOrdLessThan; + public readonly FuncBinaryInstruction SLessThan; + public readonly FuncBinaryInstruction FOrdLessThanEqual; + public readonly FuncBinaryInstruction SLessThanEqual; + public readonly FuncBinaryInstruction FOrdNotEqual; + public readonly FuncBinaryInstruction INotEqual; + + // CompareU32 + public readonly FuncBinaryInstruction UGreaterThanEqual; + public readonly FuncBinaryInstruction UGreaterThan; + public readonly FuncBinaryInstruction ULessThanEqual; + public readonly FuncBinaryInstruction ULessThan; + + // Binary + public readonly FuncBinaryInstruction FAdd; + public readonly FuncBinaryInstruction IAdd; + public readonly FuncBinaryInstruction FDiv; + public readonly FuncBinaryInstruction SDiv; + public readonly FuncBinaryInstruction GlslFMax; + public readonly FuncBinaryInstruction GlslSMax; + public readonly FuncBinaryInstruction GlslFMin; + public readonly FuncBinaryInstruction GlslSMin; + public readonly FuncBinaryInstruction FMul; + public readonly FuncBinaryInstruction IMul; + public readonly FuncBinaryInstruction FSub; + public readonly FuncBinaryInstruction ISub; + + // BinaryBool + public readonly FuncBinaryInstruction LogicalAnd; + public readonly FuncBinaryInstruction LogicalNotEqual; + public readonly FuncBinaryInstruction LogicalOr; + + // BinaryS32 + public readonly FuncBinaryInstruction BitwiseAnd; + public readonly FuncBinaryInstruction BitwiseXor; + public readonly FuncBinaryInstruction BitwiseOr; + public readonly FuncBinaryInstruction ShiftLeftLogical; + public readonly FuncBinaryInstruction ShiftRightArithmetic; + public readonly FuncBinaryInstruction ShiftRightLogical; + + // BinaryU32 + public readonly FuncBinaryInstruction GlslUMax; + public readonly FuncBinaryInstruction GlslUMin; + + // AtomicMemoryBinary + public readonly FuncQuaternaryInstruction AtomicIAdd; + public readonly FuncQuaternaryInstruction AtomicAnd; + public readonly FuncQuaternaryInstruction AtomicSMin; + public readonly FuncQuaternaryInstruction AtomicUMin; + public readonly FuncQuaternaryInstruction AtomicSMax; + public readonly FuncQuaternaryInstruction AtomicUMax; + public readonly FuncQuaternaryInstruction AtomicOr; + public readonly FuncQuaternaryInstruction AtomicExchange; + public readonly FuncQuaternaryInstruction AtomicXor; + + // Ternary + public readonly FuncTernaryInstruction GlslFClamp; + public readonly FuncTernaryInstruction GlslSClamp; + public readonly FuncTernaryInstruction GlslFma; + + // TernaryS32 + public readonly FuncTernaryInstruction BitFieldSExtract; + public readonly FuncTernaryInstruction BitFieldUExtract; + + // TernaryU32 + public readonly FuncTernaryInstruction GlslUClamp; + + // QuaternaryS32 + public readonly FuncQuaternaryInstruction BitFieldInsert; + + public SpirvDelegates(CodeGenContext context) + { + // Unary + GlslFAbs = context.GlslFAbs; + GlslSAbs = context.GlslSAbs; + GlslCeil = context.GlslCeil; + GlslCos = context.GlslCos; + GlslExp2 = context.GlslExp2; + GlslFloor = context.GlslFloor; + GlslLog2 = context.GlslLog2; + FNegate = context.FNegate; + SNegate = context.SNegate; + GlslInverseSqrt = context.GlslInverseSqrt; + GlslRoundEven = context.GlslRoundEven; + GlslSin = context.GlslSin; + GlslSqrt = context.GlslSqrt; + GlslTrunc = context.GlslTrunc; + + // UnaryBool + LogicalNot = context.LogicalNot; + + // UnaryFP32 + DPdx = context.DPdx; + DPdy = context.DPdy; + + // UnaryS32 + BitCount = context.BitCount; + BitReverse = context.BitReverse; + Not = context.Not; + + // Compare + FOrdEqual = context.FOrdEqual; + IEqual = context.IEqual; + FOrdGreaterThan = context.FOrdGreaterThan; + SGreaterThan = context.SGreaterThan; + FOrdGreaterThanEqual = context.FOrdGreaterThanEqual; + SGreaterThanEqual = context.SGreaterThanEqual; + FOrdLessThan = context.FOrdLessThan; + SLessThan = context.SLessThan; + FOrdLessThanEqual = context.FOrdLessThanEqual; + SLessThanEqual = context.SLessThanEqual; + FOrdNotEqual = context.FOrdNotEqual; + INotEqual = context.INotEqual; + + // CompareU32 + UGreaterThanEqual = context.UGreaterThanEqual; + UGreaterThan = context.UGreaterThan; + ULessThanEqual = context.ULessThanEqual; + ULessThan = context.ULessThan; + + // Binary + FAdd = context.FAdd; + IAdd = context.IAdd; + FDiv = context.FDiv; + SDiv = context.SDiv; + GlslFMax = context.GlslFMax; + GlslSMax = context.GlslSMax; + GlslFMin = context.GlslFMin; + GlslSMin = context.GlslSMin; + FMul = context.FMul; + IMul = context.IMul; + FSub = context.FSub; + ISub = context.ISub; + + // BinaryBool + LogicalAnd = context.LogicalAnd; + LogicalNotEqual = context.LogicalNotEqual; + LogicalOr = context.LogicalOr; + + // BinaryS32 + BitwiseAnd = context.BitwiseAnd; + BitwiseXor = context.BitwiseXor; + BitwiseOr = context.BitwiseOr; + ShiftLeftLogical = context.ShiftLeftLogical; + ShiftRightArithmetic = context.ShiftRightArithmetic; + ShiftRightLogical = context.ShiftRightLogical; + + // BinaryU32 + GlslUMax = context.GlslUMax; + GlslUMin = context.GlslUMin; + + // AtomicMemoryBinary + AtomicIAdd = context.AtomicIAdd; + AtomicAnd = context.AtomicAnd; + AtomicSMin = context.AtomicSMin; + AtomicUMin = context.AtomicUMin; + AtomicSMax = context.AtomicSMax; + AtomicUMax = context.AtomicUMax; + AtomicOr = context.AtomicOr; + AtomicExchange = context.AtomicExchange; + AtomicXor = context.AtomicXor; + + // Ternary + GlslFClamp = context.GlslFClamp; + GlslSClamp = context.GlslSClamp; + GlslFma = context.GlslFma; + + // TernaryS32 + BitFieldSExtract = context.BitFieldSExtract; + BitFieldUExtract = context.BitFieldUExtract; + + // TernaryU32 + GlslUClamp = context.GlslUClamp; + + // QuaternaryS32 + BitFieldInsert = context.BitFieldInsert; + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs new file mode 100644 index 00000000..3e11a974 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/SpirvGenerator.cs @@ -0,0 +1,415 @@ +using Ryujinx.Common; +using Ryujinx.Graphics.Shader.IntermediateRepresentation; +using Ryujinx.Graphics.Shader.StructuredIr; +using Ryujinx.Graphics.Shader.Translation; +using System; +using System.Collections.Generic; +using static Spv.Specification; + +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + using SpvInstruction = Spv.Generator.Instruction; + using SpvInstructionPool = Spv.Generator.GeneratorPool<Spv.Generator.Instruction>; + using SpvLiteralInteger = Spv.Generator.LiteralInteger; + using SpvLiteralIntegerPool = Spv.Generator.GeneratorPool<Spv.Generator.LiteralInteger>; + + static class SpirvGenerator + { + // Resource pools for Spirv generation. Note: Increase count when more threads are being used. + private const int GeneratorPoolCount = 1; + private static ObjectPool<SpvInstructionPool> InstructionPool; + private static ObjectPool<SpvLiteralIntegerPool> IntegerPool; + private static object PoolLock; + + static SpirvGenerator() + { + InstructionPool = new (() => new SpvInstructionPool(), GeneratorPoolCount); + IntegerPool = new (() => new SpvLiteralIntegerPool(), GeneratorPoolCount); + PoolLock = new object(); + } + + private const HelperFunctionsMask NeedsInvocationIdMask = + HelperFunctionsMask.Shuffle | + HelperFunctionsMask.ShuffleDown | + HelperFunctionsMask.ShuffleUp | + HelperFunctionsMask.ShuffleXor | + HelperFunctionsMask.SwizzleAdd; + + public static byte[] Generate(StructuredProgramInfo info, ShaderConfig config) + { + SpvInstructionPool instPool; + SpvLiteralIntegerPool integerPool; + + lock (PoolLock) + { + instPool = InstructionPool.Allocate(); + integerPool = IntegerPool.Allocate(); + } + + CodeGenContext context = new CodeGenContext(info, config, instPool, integerPool); + + context.AddCapability(Capability.GroupNonUniformBallot); + context.AddCapability(Capability.GroupNonUniformShuffle); + context.AddCapability(Capability.GroupNonUniformVote); + context.AddCapability(Capability.ImageBuffer); + context.AddCapability(Capability.ImageGatherExtended); + context.AddCapability(Capability.ImageQuery); + context.AddCapability(Capability.SampledBuffer); + + if (config.TransformFeedbackEnabled && config.LastInVertexPipeline) + { + context.AddCapability(Capability.TransformFeedback); + } + + if (config.Stage == ShaderStage.Fragment) + { + if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Input, IoVariable.Layer))) + { + context.AddCapability(Capability.Geometry); + } + + if (context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.AddCapability(Capability.FragmentShaderPixelInterlockEXT); + context.AddExtension("SPV_EXT_fragment_shader_interlock"); + } + } + else if (config.Stage == ShaderStage.Geometry) + { + context.AddCapability(Capability.Geometry); + + if (config.GpPassthrough && context.Config.GpuAccessor.QueryHostSupportsGeometryShaderPassthrough()) + { + context.AddExtension("SPV_NV_geometry_shader_passthrough"); + context.AddCapability(Capability.GeometryShaderPassthroughNV); + } + } + else if (config.Stage == ShaderStage.TessellationControl || config.Stage == ShaderStage.TessellationEvaluation) + { + context.AddCapability(Capability.Tessellation); + } + else if (config.Stage == ShaderStage.Vertex) + { + context.AddCapability(Capability.DrawParameters); + } + + if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.ViewportMask))) + { + context.AddExtension("SPV_NV_viewport_array2"); + context.AddCapability(Capability.ShaderViewportMaskNV); + } + + if ((info.HelperFunctionsMask & NeedsInvocationIdMask) != 0) + { + info.IoDefinitions.Add(new IoDefinition(StorageKind.Input, IoVariable.SubgroupLaneId)); + } + + Declarations.DeclareAll(context, info); + + for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++) + { + var function = info.Functions[funcIndex]; + var retType = context.GetType(function.ReturnType); + + var funcArgs = new SpvInstruction[function.InArguments.Length + function.OutArguments.Length]; + + for (int argIndex = 0; argIndex < funcArgs.Length; argIndex++) + { + var argType = context.GetType(function.GetArgumentType(argIndex)); + var argPointerType = context.TypePointer(StorageClass.Function, argType); + funcArgs[argIndex] = argPointerType; + } + + var funcType = context.TypeFunction(retType, false, funcArgs); + var spvFunc = context.Function(retType, FunctionControlMask.MaskNone, funcType); + + context.DeclareFunction(funcIndex, function, spvFunc); + } + + for (int funcIndex = 0; funcIndex < info.Functions.Count; funcIndex++) + { + Generate(context, info, funcIndex); + } + + byte[] result = context.Generate(); + + lock (PoolLock) + { + InstructionPool.Release(instPool); + IntegerPool.Release(integerPool); + } + + return result; + } + + private static void Generate(CodeGenContext context, StructuredProgramInfo info, int funcIndex) + { + var function = info.Functions[funcIndex]; + + (_, var spvFunc) = context.GetFunction(funcIndex); + + context.AddFunction(spvFunc); + context.StartFunction(); + + Declarations.DeclareParameters(context, function); + + context.EnterBlock(function.MainBlock); + + Declarations.DeclareLocals(context, function); + Declarations.DeclareLocalForArgs(context, info.Functions); + + Generate(context, function.MainBlock); + + // Functions must always end with a return. + if (!(function.MainBlock.Last is AstOperation operation) || + (operation.Inst != Instruction.Return && operation.Inst != Instruction.Discard)) + { + context.Return(); + } + + context.FunctionEnd(); + + if (funcIndex == 0) + { + context.AddEntryPoint(context.Config.Stage.Convert(), spvFunc, "main", context.GetMainInterface()); + + if (context.Config.Stage == ShaderStage.TessellationControl) + { + context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive); + } + else if (context.Config.Stage == ShaderStage.TessellationEvaluation) + { + switch (context.Config.GpuAccessor.QueryTessPatchType()) + { + case TessPatchType.Isolines: + context.AddExecutionMode(spvFunc, ExecutionMode.Isolines); + break; + case TessPatchType.Triangles: + context.AddExecutionMode(spvFunc, ExecutionMode.Triangles); + break; + case TessPatchType.Quads: + context.AddExecutionMode(spvFunc, ExecutionMode.Quads); + break; + } + + switch (context.Config.GpuAccessor.QueryTessSpacing()) + { + case TessSpacing.EqualSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingEqual); + break; + case TessSpacing.FractionalEventSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalEven); + break; + case TessSpacing.FractionalOddSpacing: + context.AddExecutionMode(spvFunc, ExecutionMode.SpacingFractionalOdd); + break; + } + + bool tessCw = context.Config.GpuAccessor.QueryTessCw(); + + if (context.Config.Options.TargetApi == TargetApi.Vulkan) + { + // We invert the front face on Vulkan backend, so we need to do that here as well. + tessCw = !tessCw; + } + + if (tessCw) + { + context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCw); + } + else + { + context.AddExecutionMode(spvFunc, ExecutionMode.VertexOrderCcw); + } + } + else if (context.Config.Stage == ShaderStage.Geometry) + { + InputTopology inputTopology = context.Config.GpuAccessor.QueryPrimitiveTopology(); + + context.AddExecutionMode(spvFunc, inputTopology switch + { + InputTopology.Points => ExecutionMode.InputPoints, + InputTopology.Lines => ExecutionMode.InputLines, + InputTopology.LinesAdjacency => ExecutionMode.InputLinesAdjacency, + InputTopology.Triangles => ExecutionMode.Triangles, + InputTopology.TrianglesAdjacency => ExecutionMode.InputTrianglesAdjacency, + _ => throw new InvalidOperationException($"Invalid input topology \"{inputTopology}\".") + }); + + context.AddExecutionMode(spvFunc, ExecutionMode.Invocations, (SpvLiteralInteger)context.Config.ThreadsPerInputPrimitive); + + context.AddExecutionMode(spvFunc, context.Config.OutputTopology switch + { + OutputTopology.PointList => ExecutionMode.OutputPoints, + OutputTopology.LineStrip => ExecutionMode.OutputLineStrip, + OutputTopology.TriangleStrip => ExecutionMode.OutputTriangleStrip, + _ => throw new InvalidOperationException($"Invalid output topology \"{context.Config.OutputTopology}\".") + }); + + int maxOutputVertices = context.Config.GpPassthrough ? context.InputVertices : context.Config.MaxOutputVertices; + + context.AddExecutionMode(spvFunc, ExecutionMode.OutputVertices, (SpvLiteralInteger)maxOutputVertices); + } + else if (context.Config.Stage == ShaderStage.Fragment) + { + context.AddExecutionMode(spvFunc, context.Config.Options.TargetApi == TargetApi.Vulkan + ? ExecutionMode.OriginUpperLeft + : ExecutionMode.OriginLowerLeft); + + if (context.Info.IoDefinitions.Contains(new IoDefinition(StorageKind.Output, IoVariable.FragmentOutputDepth))) + { + context.AddExecutionMode(spvFunc, ExecutionMode.DepthReplacing); + } + + if (context.Config.GpuAccessor.QueryEarlyZForce()) + { + context.AddExecutionMode(spvFunc, ExecutionMode.EarlyFragmentTests); + } + + if ((info.HelperFunctionsMask & HelperFunctionsMask.FSI) != 0 && + context.Config.GpuAccessor.QueryHostSupportsFragmentShaderInterlock()) + { + context.AddExecutionMode(spvFunc, ExecutionMode.PixelInterlockOrderedEXT); + } + } + else if (context.Config.Stage == ShaderStage.Compute) + { + var localSizeX = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeX(); + var localSizeY = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeY(); + var localSizeZ = (SpvLiteralInteger)context.Config.GpuAccessor.QueryComputeLocalSizeZ(); + + context.AddExecutionMode( + spvFunc, + ExecutionMode.LocalSize, + localSizeX, + localSizeY, + localSizeZ); + } + + if (context.Config.TransformFeedbackEnabled && context.Config.LastInVertexPipeline) + { + context.AddExecutionMode(spvFunc, ExecutionMode.Xfb); + } + } + } + + private static void Generate(CodeGenContext context, AstBlock block) + { + AstBlockVisitor visitor = new AstBlockVisitor(block); + + var loopTargets = new Dictionary<AstBlock, (SpvInstruction, SpvInstruction)>(); + + context.LoopTargets = loopTargets; + + visitor.BlockEntered += (sender, e) => + { + AstBlock mergeBlock = e.Block.Parent; + + if (e.Block.Type == AstBlockType.If) + { + AstBlock ifTrueBlock = e.Block; + AstBlock ifFalseBlock; + + if (AstHelper.Next(e.Block) is AstBlock nextBlock && nextBlock.Type == AstBlockType.Else) + { + ifFalseBlock = nextBlock; + } + else + { + ifFalseBlock = mergeBlock; + } + + var condition = context.Get(AggregateType.Bool, e.Block.Condition); + + context.SelectionMerge(context.GetNextLabel(mergeBlock), SelectionControlMask.MaskNone); + context.BranchConditional(condition, context.GetNextLabel(ifTrueBlock), context.GetNextLabel(ifFalseBlock)); + } + else if (e.Block.Type == AstBlockType.DoWhile) + { + var continueTarget = context.Label(); + + loopTargets.Add(e.Block, (context.NewBlock(), continueTarget)); + + context.LoopMerge(context.GetNextLabel(mergeBlock), continueTarget, LoopControlMask.MaskNone); + context.Branch(context.GetFirstLabel(e.Block)); + } + + context.EnterBlock(e.Block); + }; + + visitor.BlockLeft += (sender, e) => + { + if (e.Block.Parent != null) + { + if (e.Block.Type == AstBlockType.DoWhile) + { + // This is a loop, we need to jump back to the loop header + // if the condition is true. + AstBlock mergeBlock = e.Block.Parent; + + (var loopTarget, var continueTarget) = loopTargets[e.Block]; + + context.Branch(continueTarget); + context.AddLabel(continueTarget); + + var condition = context.Get(AggregateType.Bool, e.Block.Condition); + + context.BranchConditional(condition, loopTarget, context.GetNextLabel(mergeBlock)); + } + else + { + // We only need a branch if the last instruction didn't + // already cause the program to exit or jump elsewhere. + bool lastIsCf = e.Block.Last is AstOperation lastOp && + (lastOp.Inst == Instruction.Discard || + lastOp.Inst == Instruction.LoopBreak || + lastOp.Inst == Instruction.LoopContinue || + lastOp.Inst == Instruction.Return); + + if (!lastIsCf) + { + context.Branch(context.GetNextLabel(e.Block.Parent)); + } + } + + bool hasElse = AstHelper.Next(e.Block) is AstBlock nextBlock && + (nextBlock.Type == AstBlockType.Else || + nextBlock.Type == AstBlockType.ElseIf); + + // Re-enter the parent block. + if (e.Block.Parent != null && !hasElse) + { + context.EnterBlock(e.Block.Parent); + } + } + }; + + foreach (IAstNode node in visitor.Visit()) + { + if (node is AstAssignment assignment) + { + var dest = (AstOperand)assignment.Destination; + + if (dest.Type == OperandType.LocalVariable) + { + var source = context.Get(dest.VarType, assignment.Source); + context.Store(context.GetLocalPointer(dest), source); + } + else if (dest.Type == OperandType.Argument) + { + var source = context.Get(dest.VarType, assignment.Source); + context.Store(context.GetArgumentPointer(dest), source); + } + else + { + throw new NotImplementedException(dest.Type.ToString()); + } + } + else if (node is AstOperation operation) + { + Instructions.Generate(context, operation); + } + } + } + } +} diff --git a/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs new file mode 100644 index 00000000..4de05603 --- /dev/null +++ b/src/Ryujinx.Graphics.Shader/CodeGen/Spirv/TextureMeta.cs @@ -0,0 +1,4 @@ +namespace Ryujinx.Graphics.Shader.CodeGen.Spirv +{ + readonly record struct TextureMeta(int CbufSlot, int Handle, TextureFormat Format); +}
\ No newline at end of file |
